diff options
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/bit_spinlock.h | 4 | ||||
-rw-r--r-- | include/linux/coda_linux.h | 2 | ||||
-rw-r--r-- | include/linux/dcache.h | 243 | ||||
-rw-r--r-- | include/linux/fs.h | 63 | ||||
-rw-r--r-- | include/linux/fs_struct.h | 3 | ||||
-rw-r--r-- | include/linux/fsnotify.h | 2 | ||||
-rw-r--r-- | include/linux/fsnotify_backend.h | 11 | ||||
-rw-r--r-- | include/linux/generic_acl.h | 2 | ||||
-rw-r--r-- | include/linux/list_bl.h | 144 | ||||
-rw-r--r-- | include/linux/mount.h | 53 | ||||
-rw-r--r-- | include/linux/namei.h | 16 | ||||
-rw-r--r-- | include/linux/ncp_fs.h | 4 | ||||
-rw-r--r-- | include/linux/nfs_fs.h | 2 | ||||
-rw-r--r-- | include/linux/path.h | 2 | ||||
-rw-r--r-- | include/linux/posix_acl.h | 19 | ||||
-rw-r--r-- | include/linux/rculist_bl.h | 127 | ||||
-rw-r--r-- | include/linux/reiserfs_xattr.h | 2 | ||||
-rw-r--r-- | include/linux/security.h | 8 | ||||
-rw-r--r-- | include/linux/seqlock.h | 80 | ||||
-rw-r--r-- | include/linux/slab.h | 2 |
20 files changed, 586 insertions, 203 deletions
diff --git a/include/linux/bit_spinlock.h b/include/linux/bit_spinlock.h index 7113a32a86e..e612575a259 100644 --- a/include/linux/bit_spinlock.h +++ b/include/linux/bit_spinlock.h @@ -1,6 +1,10 @@ #ifndef __LINUX_BIT_SPINLOCK_H #define __LINUX_BIT_SPINLOCK_H +#include <linux/kernel.h> +#include <linux/preempt.h> +#include <asm/atomic.h> + /* * bit-based spin_lock() * diff --git a/include/linux/coda_linux.h b/include/linux/coda_linux.h index 2e914d0771b..4ccc59c1ea8 100644 --- a/include/linux/coda_linux.h +++ b/include/linux/coda_linux.h @@ -37,7 +37,7 @@ extern const struct file_operations coda_ioctl_operations; /* operations shared over more than one file */ int coda_open(struct inode *i, struct file *f); int coda_release(struct inode *i, struct file *f); -int coda_permission(struct inode *inode, int mask); +int coda_permission(struct inode *inode, int mask, unsigned int flags); int coda_revalidate_inode(struct dentry *); int coda_getattr(struct vfsmount *, struct dentry *, struct kstat *); int coda_setattr(struct dentry *, struct iattr *); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 6a4aea30aa0..bd07758943e 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -4,7 +4,9 @@ #include <asm/atomic.h> #include <linux/list.h> #include <linux/rculist.h> +#include <linux/rculist_bl.h> #include <linux/spinlock.h> +#include <linux/seqlock.h> #include <linux/cache.h> #include <linux/rcupdate.h> @@ -45,6 +47,27 @@ struct dentry_stat_t { }; extern struct dentry_stat_t dentry_stat; +/* + * Compare 2 name strings, return 0 if they match, otherwise non-zero. + * The strings are both count bytes long, and count is non-zero. + */ +static inline int dentry_cmp(const unsigned char *cs, size_t scount, + const unsigned char *ct, size_t tcount) +{ + int ret; + if (scount != tcount) + return 1; + do { + ret = (*cs != *ct); + if (ret) + break; + cs++; + ct++; + tcount--; + } while (tcount); + return ret; +} + /* Name hashing routines. Initial hash value */ /* Hash courtesy of the R5 hash in reiserfs modulo sign bits */ #define init_name_hash() 0 @@ -81,25 +104,33 @@ full_name_hash(const unsigned char *name, unsigned int len) * large memory footprint increase). */ #ifdef CONFIG_64BIT -#define DNAME_INLINE_LEN_MIN 32 /* 192 bytes */ +# define DNAME_INLINE_LEN 32 /* 192 bytes */ #else -#define DNAME_INLINE_LEN_MIN 40 /* 128 bytes */ +# ifdef CONFIG_SMP +# define DNAME_INLINE_LEN 36 /* 128 bytes */ +# else +# define DNAME_INLINE_LEN 40 /* 128 bytes */ +# endif #endif struct dentry { - atomic_t d_count; + /* RCU lookup touched fields */ unsigned int d_flags; /* protected by d_lock */ - spinlock_t d_lock; /* per dentry lock */ - int d_mounted; - struct inode *d_inode; /* Where the name belongs to - NULL is - * negative */ - /* - * The next three fields are touched by __d_lookup. Place them here - * so they all fit in a cache line. - */ - struct hlist_node d_hash; /* lookup hash list */ + seqcount_t d_seq; /* per dentry seqlock */ + struct hlist_bl_node d_hash; /* lookup hash list */ struct dentry *d_parent; /* parent directory */ struct qstr d_name; + struct inode *d_inode; /* Where the name belongs to - NULL is + * negative */ + unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */ + + /* Ref lookup also touches following */ + unsigned int d_count; /* protected by d_lock */ + spinlock_t d_lock; /* per dentry lock */ + const struct dentry_operations *d_op; + struct super_block *d_sb; /* The root of the dentry tree */ + unsigned long d_time; /* used by d_revalidate */ + void *d_fsdata; /* fs-specific data */ struct list_head d_lru; /* LRU list */ /* @@ -111,12 +142,6 @@ struct dentry { } d_u; struct list_head d_subdirs; /* our children */ struct list_head d_alias; /* inode alias list */ - unsigned long d_time; /* used by d_revalidate */ - const struct dentry_operations *d_op; - struct super_block *d_sb; /* The root of the dentry tree */ - void *d_fsdata; /* fs-specific data */ - - unsigned char d_iname[DNAME_INLINE_LEN_MIN]; /* small names */ }; /* @@ -133,96 +158,61 @@ enum dentry_d_lock_class struct dentry_operations { int (*d_revalidate)(struct dentry *, struct nameidata *); - int (*d_hash) (struct dentry *, struct qstr *); - int (*d_compare) (struct dentry *, struct qstr *, struct qstr *); - int (*d_delete)(struct dentry *); + int (*d_hash)(const struct dentry *, const struct inode *, + struct qstr *); + int (*d_compare)(const struct dentry *, const struct inode *, + const struct dentry *, const struct inode *, + unsigned int, const char *, const struct qstr *); + int (*d_delete)(const struct dentry *); void (*d_release)(struct dentry *); void (*d_iput)(struct dentry *, struct inode *); char *(*d_dname)(struct dentry *, char *, int); -}; - -/* the dentry parameter passed to d_hash and d_compare is the parent - * directory of the entries to be compared. It is used in case these - * functions need any directory specific information for determining - * equivalency classes. Using the dentry itself might not work, as it - * might be a negative dentry which has no information associated with - * it */ +} ____cacheline_aligned; /* -locking rules: - big lock dcache_lock d_lock may block -d_revalidate: no no no yes -d_hash no no no yes -d_compare: no yes yes no -d_delete: no yes no no -d_release: no no no yes -d_iput: no no no yes + * Locking rules for dentry_operations callbacks are to be found in + * Documentation/filesystems/Locking. Keep it updated! + * + * FUrther descriptions are found in Documentation/filesystems/vfs.txt. + * Keep it updated too! */ /* d_flags entries */ #define DCACHE_AUTOFS_PENDING 0x0001 /* autofs: "under construction" */ -#define DCACHE_NFSFS_RENAMED 0x0002 /* this dentry has been "silly - * renamed" and has to be - * deleted on the last dput() - */ -#define DCACHE_DISCONNECTED 0x0004 - /* This dentry is possibly not currently connected to the dcache tree, - * in which case its parent will either be itself, or will have this - * flag as well. nfsd will not use a dentry with this bit set, but will - * first endeavour to clear the bit either by discovering that it is - * connected, or by performing lookup operations. Any filesystem which - * supports nfsd_operations MUST have a lookup function which, if it finds - * a directory inode with a DCACHE_DISCONNECTED dentry, will d_move - * that dentry into place and return that dentry rather than the passed one, - * typically using d_splice_alias. - */ +#define DCACHE_NFSFS_RENAMED 0x0002 + /* this dentry has been "silly renamed" and has to be deleted on the last + * dput() */ + +#define DCACHE_DISCONNECTED 0x0004 + /* This dentry is possibly not currently connected to the dcache tree, in + * which case its parent will either be itself, or will have this flag as + * well. nfsd will not use a dentry with this bit set, but will first + * endeavour to clear the bit either by discovering that it is connected, + * or by performing lookup operations. Any filesystem which supports + * nfsd_operations MUST have a lookup function which, if it finds a + * directory inode with a DCACHE_DISCONNECTED dentry, will d_move that + * dentry into place and return that dentry rather than the passed one, + * typically using d_splice_alias. */ #define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */ #define DCACHE_UNHASHED 0x0010 - -#define DCACHE_INOTIFY_PARENT_WATCHED 0x0020 /* Parent inode is watched by inotify */ +#define DCACHE_INOTIFY_PARENT_WATCHED 0x0020 + /* Parent inode is watched by inotify */ #define DCACHE_COOKIE 0x0040 /* For use by dcookie subsystem */ - -#define DCACHE_FSNOTIFY_PARENT_WATCHED 0x0080 /* Parent inode is watched by some fsnotify listener */ +#define DCACHE_FSNOTIFY_PARENT_WATCHED 0x0080 + /* Parent inode is watched by some fsnotify listener */ #define DCACHE_CANT_MOUNT 0x0100 +#define DCACHE_GENOCIDE 0x0200 +#define DCACHE_MOUNTED 0x0400 /* is a mountpoint */ -extern spinlock_t dcache_lock; -extern seqlock_t rename_lock; - -/** - * d_drop - drop a dentry - * @dentry: dentry to drop - * - * d_drop() unhashes the entry from the parent dentry hashes, so that it won't - * be found through a VFS lookup any more. Note that this is different from - * deleting the dentry - d_delete will try to mark the dentry negative if - * possible, giving a successful _negative_ lookup, while d_drop will - * just make the cache lookup fail. - * - * d_drop() is used mainly for stuff that wants to invalidate a dentry for some - * reason (NFS timeouts or autofs deletes). - * - * __d_drop requires dentry->d_lock. - */ - -static inline void __d_drop(struct dentry *dentry) -{ - if (!(dentry->d_flags & DCACHE_UNHASHED)) { - dentry->d_flags |= DCACHE_UNHASHED; - hlist_del_rcu(&dentry->d_hash); - } -} +#define DCACHE_OP_HASH 0x1000 +#define DCACHE_OP_COMPARE 0x2000 +#define DCACHE_OP_REVALIDATE 0x4000 +#define DCACHE_OP_DELETE 0x8000 -static inline void d_drop(struct dentry *dentry) -{ - spin_lock(&dcache_lock); - spin_lock(&dentry->d_lock); - __d_drop(dentry); - spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); -} +extern seqlock_t rename_lock; static inline int dname_external(struct dentry *dentry) { @@ -235,10 +225,14 @@ static inline int dname_external(struct dentry *dentry) extern void d_instantiate(struct dentry *, struct inode *); extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *); extern struct dentry * d_materialise_unique(struct dentry *, struct inode *); +extern void __d_drop(struct dentry *dentry); +extern void d_drop(struct dentry *dentry); extern void d_delete(struct dentry *); +extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op); /* allocate/de-allocate */ extern struct dentry * d_alloc(struct dentry *, const struct qstr *); +extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *); extern struct dentry * d_splice_alias(struct inode *, struct dentry *); extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *); extern struct dentry * d_obtain_alias(struct inode *); @@ -296,14 +290,40 @@ static inline struct dentry *d_add_unique(struct dentry *entry, struct inode *in return res; } +extern void dentry_update_name_case(struct dentry *, struct qstr *); + /* used for rename() and baskets */ extern void d_move(struct dentry *, struct dentry *); extern struct dentry *d_ancestor(struct dentry *, struct dentry *); /* appendix may either be NULL or be used for transname suffixes */ -extern struct dentry * d_lookup(struct dentry *, struct qstr *); -extern struct dentry * __d_lookup(struct dentry *, struct qstr *); -extern struct dentry * d_hash_and_lookup(struct dentry *, struct qstr *); +extern struct dentry *d_lookup(struct dentry *, struct qstr *); +extern struct dentry *d_hash_and_lookup(struct dentry *, struct qstr *); +extern struct dentry *__d_lookup(struct dentry *, struct qstr *); +extern struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name, + unsigned *seq, struct inode **inode); + +/** + * __d_rcu_to_refcount - take a refcount on dentry if sequence check is ok + * @dentry: dentry to take a ref on + * @seq: seqcount to verify against + * @Returns: 0 on failure, else 1. + * + * __d_rcu_to_refcount operates on a dentry,seq pair that was returned + * by __d_lookup_rcu, to get a reference on an rcu-walk dentry. + */ +static inline int __d_rcu_to_refcount(struct dentry *dentry, unsigned seq) +{ + int ret = 0; + + assert_spin_locked(&dentry->d_lock); + if (!read_seqcount_retry(&dentry->d_seq, seq)) { + ret = 1; + dentry->d_count++; + } + + return ret; +} /* validate "insecure" dentry pointer */ extern int d_validate(struct dentry *, struct dentry *); @@ -316,34 +336,37 @@ extern char *dynamic_dname(struct dentry *, char *, int, const char *, ...); extern char *__d_path(const struct path *path, struct path *root, char *, int); extern char *d_path(const struct path *, char *, int); extern char *d_path_with_unreachable(const struct path *, char *, int); -extern char *__dentry_path(struct dentry *, char *, int); +extern char *dentry_path_raw(struct dentry *, char *, int); extern char *dentry_path(struct dentry *, char *, int); /* Allocation counts.. */ /** - * dget, dget_locked - get a reference to a dentry + * dget, dget_dlock - get a reference to a dentry * @dentry: dentry to get a reference to * * Given a dentry or %NULL pointer increment the reference count * if appropriate and return the dentry. A dentry will not be - * destroyed when it has references. dget() should never be - * called for dentries with zero reference counter. For these cases - * (preferably none, functions in dcache.c are sufficient for normal - * needs and they take necessary precautions) you should hold dcache_lock - * and call dget_locked() instead of dget(). + * destroyed when it has references. */ - +static inline struct dentry *dget_dlock(struct dentry *dentry) +{ + if (dentry) + dentry->d_count++; + return dentry; +} + static inline struct dentry *dget(struct dentry *dentry) { if (dentry) { - BUG_ON(!atomic_read(&dentry->d_count)); - atomic_inc(&dentry->d_count); + spin_lock(&dentry->d_lock); + dget_dlock(dentry); + spin_unlock(&dentry->d_lock); } return dentry; } -extern struct dentry * dget_locked(struct dentry *); +extern struct dentry *dget_parent(struct dentry *dentry); /** * d_unhashed - is dentry hashed @@ -374,21 +397,11 @@ static inline void dont_mount(struct dentry *dentry) spin_unlock(&dentry->d_lock); } -static inline struct dentry *dget_parent(struct dentry *dentry) -{ - struct dentry *ret; - - spin_lock(&dentry->d_lock); - ret = dget(dentry->d_parent); - spin_unlock(&dentry->d_lock); - return ret; -} - extern void dput(struct dentry *); static inline int d_mountpoint(struct dentry *dentry) { - return dentry->d_mounted; + return dentry->d_flags & DCACHE_MOUNTED; } extern struct vfsmount *lookup_mnt(struct path *); diff --git a/include/linux/fs.h b/include/linux/fs.h index 090f0eacde2..baf3e556ff0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -392,6 +392,7 @@ struct inodes_stat_t { #include <linux/capability.h> #include <linux/semaphore.h> #include <linux/fiemap.h> +#include <linux/rculist_bl.h> #include <asm/atomic.h> #include <asm/byteorder.h> @@ -733,16 +734,31 @@ struct posix_acl; #define ACL_NOT_CACHED ((void *)(-1)) struct inode { + /* RCU path lookup touches following: */ + umode_t i_mode; + uid_t i_uid; + gid_t i_gid; + const struct inode_operations *i_op; + struct super_block *i_sb; + + spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ + unsigned int i_flags; + struct mutex i_mutex; + + unsigned long i_state; + unsigned long dirtied_when; /* jiffies of first dirtying */ + struct hlist_node i_hash; struct list_head i_wb_list; /* backing dev IO list */ struct list_head i_lru; /* inode LRU list */ struct list_head i_sb_list; - struct list_head i_dentry; + union { + struct list_head i_dentry; + struct rcu_head i_rcu; + }; unsigned long i_ino; atomic_t i_count; unsigned int i_nlink; - uid_t i_uid; - gid_t i_gid; dev_t i_rdev; unsigned int i_blkbits; u64 i_version; @@ -755,13 +771,8 @@ struct inode { struct timespec i_ctime; blkcnt_t i_blocks; unsigned short i_bytes; - umode_t i_mode; - spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ - struct mutex i_mutex; struct rw_semaphore i_alloc_sem; - const struct inode_operations *i_op; const struct file_operations *i_fop; /* former ->i_op->default_file_ops */ - struct super_block *i_sb; struct file_lock *i_flock; struct address_space *i_mapping; struct address_space i_data; @@ -782,11 +793,6 @@ struct inode { struct hlist_head i_fsnotify_marks; #endif - unsigned long i_state; - unsigned long dirtied_when; /* jiffies of first dirtying */ - - unsigned int i_flags; - #ifdef CONFIG_IMA /* protected by i_lock */ unsigned int i_readcount; /* struct files open RO */ @@ -1372,13 +1378,13 @@ struct super_block { const struct xattr_handler **s_xattr; struct list_head s_inodes; /* all inodes */ - struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */ + struct hlist_bl_head s_anon; /* anonymous dentries for (nfs) exporting */ #ifdef CONFIG_SMP struct list_head __percpu *s_files; #else struct list_head s_files; #endif - /* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */ + /* s_dentry_lru, s_nr_dentry_unused protected by dcache.c lru locks */ struct list_head s_dentry_lru; /* unused dentry lru */ int s_nr_dentry_unused; /* # of dentry on lru */ @@ -1545,9 +1551,18 @@ struct file_operations { int (*setlease)(struct file *, long, struct file_lock **); }; +#define IPERM_FLAG_RCU 0x0001 + struct inode_operations { - int (*create) (struct inode *,struct dentry *,int, struct nameidata *); struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *); + void * (*follow_link) (struct dentry *, struct nameidata *); + int (*permission) (struct inode *, int, unsigned int); + int (*check_acl)(struct inode *, int, unsigned int); + + int (*readlink) (struct dentry *, char __user *,int); + void (*put_link) (struct dentry *, struct nameidata *, void *); + + int (*create) (struct inode *,struct dentry *,int, struct nameidata *); int (*link) (struct dentry *,struct inode *,struct dentry *); int (*unlink) (struct inode *,struct dentry *); int (*symlink) (struct inode *,struct dentry *,const char *); @@ -1556,12 +1571,7 @@ struct inode_operations { int (*mknod) (struct inode *,struct dentry *,int,dev_t); int (*rename) (struct inode *, struct dentry *, struct inode *, struct dentry *); - int (*readlink) (struct dentry *, char __user *,int); - void * (*follow_link) (struct dentry *, struct nameidata *); - void (*put_link) (struct dentry *, struct nameidata *, void *); void (*truncate) (struct inode *); - int (*permission) (struct inode *, int); - int (*check_acl)(struct inode *, int); int (*setattr) (struct dentry *, struct iattr *); int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); @@ -1573,7 +1583,7 @@ struct inode_operations { loff_t len); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); -}; +} ____cacheline_aligned; struct seq_file; @@ -2158,8 +2168,8 @@ extern sector_t bmap(struct inode *, sector_t); #endif extern int notify_change(struct dentry *, struct iattr *); extern int inode_permission(struct inode *, int); -extern int generic_permission(struct inode *, int, - int (*check_acl)(struct inode *, int)); +extern int generic_permission(struct inode *, int, unsigned int, + int (*check_acl)(struct inode *, int, unsigned int)); static inline bool execute_ok(struct inode *inode) { @@ -2230,6 +2240,7 @@ extern void iget_failed(struct inode *); extern void end_writeback(struct inode *); extern void __destroy_inode(struct inode *); extern struct inode *new_inode(struct super_block *); +extern void free_inode_nonrcu(struct inode *inode); extern int should_remove_suid(struct dentry *); extern int file_remove_suid(struct file *); @@ -2446,6 +2457,10 @@ static inline ino_t parent_ino(struct dentry *dentry) { ino_t res; + /* + * Don't strictly need d_lock here? If the parent ino could change + * then surely we'd have a deeper race in the caller? + */ spin_lock(&dentry->d_lock); res = dentry->d_parent->d_inode->i_ino; spin_unlock(&dentry->d_lock); diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h index a42b5bf02f8..003dc0fd734 100644 --- a/include/linux/fs_struct.h +++ b/include/linux/fs_struct.h @@ -2,10 +2,13 @@ #define _LINUX_FS_STRUCT_H #include <linux/path.h> +#include <linux/spinlock.h> +#include <linux/seqlock.h> struct fs_struct { int users; spinlock_t lock; + seqcount_t seq; int umask; int in_exec; struct path root, pwd; diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index b10bcdeaef7..2a53f10712b 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -17,7 +17,6 @@ /* * fsnotify_d_instantiate - instantiate a dentry for inode - * Called with dcache_lock held. */ static inline void fsnotify_d_instantiate(struct dentry *dentry, struct inode *inode) @@ -62,7 +61,6 @@ static inline int fsnotify_perm(struct file *file, int mask) /* * fsnotify_d_move - dentry has been moved - * Called with dcache_lock and dentry->d_lock held. */ static inline void fsnotify_d_move(struct dentry *dentry) { diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 7380763595d..69ad89b5048 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -329,9 +329,15 @@ static inline void __fsnotify_update_dcache_flags(struct dentry *dentry) { struct dentry *parent; - assert_spin_locked(&dcache_lock); assert_spin_locked(&dentry->d_lock); + /* + * Serialisation of setting PARENT_WATCHED on the dentries is provided + * by d_lock. If inotify_inode_watched changes after we have taken + * d_lock, the following __fsnotify_update_child_dentry_flags call will + * find our entry, so it will spin until we complete here, and update + * us with the new state. + */ parent = dentry->d_parent; if (parent->d_inode && fsnotify_inode_watches_children(parent->d_inode)) dentry->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED; @@ -341,15 +347,12 @@ static inline void __fsnotify_update_dcache_flags(struct dentry *dentry) /* * fsnotify_d_instantiate - instantiate a dentry for inode - * Called with dcache_lock held. */ static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode *inode) { if (!inode) return; - assert_spin_locked(&dcache_lock); - spin_lock(&dentry->d_lock); __fsnotify_update_dcache_flags(dentry); spin_unlock(&dentry->d_lock); diff --git a/include/linux/generic_acl.h b/include/linux/generic_acl.h index 574bea4013b..0437e377b55 100644 --- a/include/linux/generic_acl.h +++ b/include/linux/generic_acl.h @@ -10,6 +10,6 @@ extern const struct xattr_handler generic_acl_default_handler; int generic_acl_init(struct inode *, struct inode *); int generic_acl_chmod(struct inode *); -int generic_check_acl(struct inode *inode, int mask); +int generic_check_acl(struct inode *inode, int mask, unsigned int flags); #endif /* LINUX_GENERIC_ACL_H */ diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h new file mode 100644 index 00000000000..9ee97e7f2be --- /dev/null +++ b/include/linux/list_bl.h @@ -0,0 +1,144 @@ +#ifndef _LINUX_LIST_BL_H +#define _LINUX_LIST_BL_H + +#include <linux/list.h> + +/* + * Special version of lists, where head of the list has a lock in the lowest + * bit. This is useful for scalable hash tables without increasing memory + * footprint overhead. + * + * For modification operations, the 0 bit of hlist_bl_head->first + * pointer must be set. + * + * With some small modifications, this can easily be adapted to store several + * arbitrary bits (not just a single lock bit), if the need arises to store + * some fast and compact auxiliary data. + */ + +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) +#define LIST_BL_LOCKMASK 1UL +#else +#define LIST_BL_LOCKMASK 0UL +#endif + +#ifdef CONFIG_DEBUG_LIST +#define LIST_BL_BUG_ON(x) BUG_ON(x) +#else +#define LIST_BL_BUG_ON(x) +#endif + + +struct hlist_bl_head { + struct hlist_bl_node *first; +}; + +struct hlist_bl_node { + struct hlist_bl_node *next, **pprev; +}; +#define INIT_HLIST_BL_HEAD(ptr) \ + ((ptr)->first = NULL) + +static inline void INIT_HLIST_BL_NODE(struct hlist_bl_node *h) +{ + h->next = NULL; + h->pprev = NULL; +} + +#define hlist_bl_entry(ptr, type, member) container_of(ptr,type,member) + +static inline int hlist_bl_unhashed(const struct hlist_bl_node *h) +{ + return !h->pprev; +} + +static inline struct hlist_bl_node *hlist_bl_first(struct hlist_bl_head *h) +{ + return (struct hlist_bl_node *) + ((unsigned long)h->first & ~LIST_BL_LOCKMASK); +} + +static inline void hlist_bl_set_first(struct hlist_bl_head *h, + struct hlist_bl_node *n) +{ + LIST_BL_BUG_ON((unsigned long)n & LIST_BL_LOCKMASK); + LIST_BL_BUG_ON(!((unsigned long)h->first & LIST_BL_LOCKMASK)); + h->first = (struct hlist_bl_node *)((unsigned long)n | LIST_BL_LOCKMASK); +} + +static inline int hlist_bl_empty(const struct hlist_bl_head *h) +{ + return !((unsigned long)h->first & ~LIST_BL_LOCKMASK); +} + +static inline void hlist_bl_add_head(struct hlist_bl_node *n, + struct hlist_bl_head *h) +{ + struct hlist_bl_node *first = hlist_bl_first(h); + + n->next = first; + if (first) + first->pprev = &n->next; + n->pprev = &h->first; + hlist_bl_set_first(h, n); +} + +static inline void __hlist_bl_del(struct hlist_bl_node *n) +{ + struct hlist_bl_node *next = n->next; + struct hlist_bl_node **pprev = n->pprev; + + LIST_BL_BUG_ON((unsigned long)n & LIST_BL_LOCKMASK); + + /* pprev may be `first`, so be careful not to lose the lock bit */ + *pprev = (struct hlist_bl_node *) + ((unsigned long)next | + ((unsigned long)*pprev & LIST_BL_LOCKMASK)); + if (next) + next->pprev = pprev; +} + +static inline void hlist_bl_del(struct hlist_bl_node *n) +{ + __hlist_bl_del(n); + n->next = LIST_POISON1; + n->pprev = LIST_POISON2; +} + +static inline void hlist_bl_del_init(struct hlist_bl_node *n) +{ + if (!hlist_bl_unhashed(n)) { + __hlist_bl_del(n); + INIT_HLIST_BL_NODE(n); + } +} + +/** + * hlist_bl_for_each_entry - iterate over list of given type + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct hlist_node to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the hlist_node within the struct. + * + */ +#define hlist_bl_for_each_entry(tpos, pos, head, member) \ + for (pos = hlist_bl_first(head); \ + pos && \ + ({ tpos = hlist_bl_entry(pos, typeof(*tpos), member); 1;}); \ + pos = pos->next) + +/** + * hlist_bl_for_each_entry_safe - iterate over list of given type safe against removal of list entry + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct hlist_node to use as a loop cursor. + * @n: another &struct hlist_node to use as temporary storage + * @head: the head for your list. + * @member: the name of the hlist_node within the struct. + */ +#define hlist_bl_for_each_entry_safe(tpos, pos, n, head, member) \ + for (pos = hlist_bl_first(head); \ + pos && ({ n = pos->next; 1; }) && \ + ({ tpos = hlist_bl_entry(pos, typeof(*tpos), member); 1;}); \ + pos = n) + +#endif diff --git a/include/linux/mount.h b/include/linux/mount.h index 5e7a59408dd..1869ea24a73 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -13,6 +13,7 @@ #include <linux/list.h> #include <linux/nodemask.h> #include <linux/spinlock.h> +#include <linux/seqlock.h> #include <asm/atomic.h> struct super_block; @@ -46,12 +47,24 @@ struct mnt_namespace; #define MNT_INTERNAL 0x4000 +struct mnt_pcp { + int mnt_count; + int mnt_writers; +}; + struct vfsmount { struct list_head mnt_hash; struct vfsmount *mnt_parent; /* fs we are mounted on */ struct dentry *mnt_mountpoint; /* dentry of mountpoint */ struct dentry *mnt_root; /* root of the mounted tree */ struct super_block *mnt_sb; /* pointer to superblock */ +#ifdef CONFIG_SMP + struct mnt_pcp __percpu *mnt_pcp; + atomic_t mnt_longrefs; +#else + int mnt_count; + int mnt_writers; +#endif struct list_head mnt_mounts; /* list of children, anchored here */ struct list_head mnt_child; /* and going through their mnt_child */ int mnt_flags; @@ -70,57 +83,25 @@ struct vfsmount { struct mnt_namespace *mnt_ns; /* containing namespace */ int mnt_id; /* mount identifier */ int mnt_group_id; /* peer group identifier */ - /* - * We put mnt_count & mnt_expiry_mark at the end of struct vfsmount - * to let these frequently modified fields in a separate cache line - * (so that reads of mnt_flags wont ping-pong on SMP machines) - */ - atomic_t mnt_count; int mnt_expiry_mark; /* true if marked for expiry */ int mnt_pinned; int mnt_ghosts; -#ifdef CONFIG_SMP - int __percpu *mnt_writers; -#else - int mnt_writers; -#endif }; -static inline int *get_mnt_writers_ptr(struct vfsmount *mnt) -{ -#ifdef CONFIG_SMP - return mnt->mnt_writers; -#else - return &mnt->mnt_writers; -#endif -} - -static inline struct vfsmount *mntget(struct vfsmount *mnt) -{ - if (mnt) - atomic_inc(&mnt->mnt_count); - return mnt; -} - struct file; /* forward dec */ extern int mnt_want_write(struct vfsmount *mnt); extern int mnt_want_write_file(struct file *file); extern int mnt_clone_write(struct vfsmount *mnt); extern void mnt_drop_write(struct vfsmount *mnt); -extern void mntput_no_expire(struct vfsmount *mnt); +extern void mntput(struct vfsmount *mnt); +extern struct vfsmount *mntget(struct vfsmount *mnt); +extern void mntput_long(struct vfsmount *mnt); +extern struct vfsmount *mntget_long(struct vfsmount *mnt); extern void mnt_pin(struct vfsmount *mnt); extern void mnt_unpin(struct vfsmount *mnt); extern int __mnt_is_readonly(struct vfsmount *mnt); -static inline void mntput(struct vfsmount *mnt) -{ - if (mnt) { - mnt->mnt_expiry_mark = 0; - mntput_no_expire(mnt); - } -} - extern struct vfsmount *do_kern_mount(const char *fstype, int flags, const char *name, void *data); diff --git a/include/linux/namei.h b/include/linux/namei.h index 05b441d9364..18d06add0a4 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -19,7 +19,10 @@ struct nameidata { struct path path; struct qstr last; struct path root; + struct file *file; + struct inode *inode; /* path.dentry.d_inode */ unsigned int flags; + unsigned seq; int last_type; unsigned depth; char *saved_names[MAX_NESTED_LINKS + 1]; @@ -41,14 +44,15 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND}; * - require a directory * - ending slashes ok even for nonexistent files * - internal "there are more path components" flag - * - locked when lookup done with dcache_lock held * - dentry cache is untrusted; force a real lookup */ -#define LOOKUP_FOLLOW 1 -#define LOOKUP_DIRECTORY 2 -#define LOOKUP_CONTINUE 4 -#define LOOKUP_PARENT 16 -#define LOOKUP_REVAL 64 +#define LOOKUP_FOLLOW 0x0001 +#define LOOKUP_DIRECTORY 0x0002 +#define LOOKUP_CONTINUE 0x0004 + +#define LOOKUP_PARENT 0x0010 +#define LOOKUP_REVAL 0x0020 +#define LOOKUP_RCU 0x0040 /* * Intent data */ diff --git a/include/linux/ncp_fs.h b/include/linux/ncp_fs.h index ef663061d5a..1c27f201c85 100644 --- a/include/linux/ncp_fs.h +++ b/include/linux/ncp_fs.h @@ -184,13 +184,13 @@ struct ncp_entry_info { __u8 file_handle[6]; }; -static inline struct ncp_server *NCP_SBP(struct super_block *sb) +static inline struct ncp_server *NCP_SBP(const struct super_block *sb) { return sb->s_fs_info; } #define NCP_SERVER(inode) NCP_SBP((inode)->i_sb) -static inline struct ncp_inode_info *NCP_FINFO(struct inode *inode) +static inline struct ncp_inode_info *NCP_FINFO(const struct inode *inode) { return container_of(inode, struct ncp_inode_info, vfs_inode); } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 29d504d5d1c..0779bb8f95b 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -351,7 +351,7 @@ extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *); extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr); extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr); extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *); -extern int nfs_permission(struct inode *, int); +extern int nfs_permission(struct inode *, int, unsigned int); extern int nfs_open(struct inode *, struct file *); extern int nfs_release(struct inode *, struct file *); extern int nfs_attribute_timeout(struct inode *inode); diff --git a/include/linux/path.h b/include/linux/path.h index edc98dec626..a581e8c0653 100644 --- a/include/linux/path.h +++ b/include/linux/path.h @@ -10,7 +10,9 @@ struct path { }; extern void path_get(struct path *); +extern void path_get_long(struct path *); extern void path_put(struct path *); +extern void path_put_long(struct path *); static inline int path_equal(const struct path *path1, const struct path *path2) { diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index 67608161df6..d68283a898b 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -108,6 +108,25 @@ static inline struct posix_acl *get_cached_acl(struct inode *inode, int type) return acl; } +static inline int negative_cached_acl(struct inode *inode, int type) +{ + struct posix_acl **p, *acl; + switch (type) { + case ACL_TYPE_ACCESS: + p = &inode->i_acl; + break; + case ACL_TYPE_DEFAULT: + p = &inode->i_default_acl; + break; + default: + BUG(); + } + acl = ACCESS_ONCE(*p); + if (acl) + return 0; + return 1; +} + static inline void set_cached_acl(struct inode *inode, int type, struct posix_acl *acl) diff --git a/include/linux/rculist_bl.h b/include/linux/rculist_bl.h new file mode 100644 index 00000000000..b872b493724 --- /dev/null +++ b/include/linux/rculist_bl.h @@ -0,0 +1,127 @@ +#ifndef _LINUX_RCULIST_BL_H +#define _LINUX_RCULIST_BL_H + +/* + * RCU-protected bl list version. See include/linux/list_bl.h. + */ +#include <linux/list_bl.h> +#include <linux/rcupdate.h> + +static inline void hlist_bl_set_first_rcu(struct hlist_bl_head *h, + struct hlist_bl_node *n) +{ + LIST_BL_BUG_ON((unsigned long)n & LIST_BL_LOCKMASK); + LIST_BL_BUG_ON(!((unsigned long)h->first & LIST_BL_LOCKMASK)); + rcu_assign_pointer(h->first, + (struct hlist_bl_node *)((unsigned long)n | LIST_BL_LOCKMASK)); +} + +static inline struct hlist_bl_node *hlist_bl_first_rcu(struct hlist_bl_head *h) +{ + return (struct hlist_bl_node *) + ((unsigned long)rcu_dereference(h->first) & ~LIST_BL_LOCKMASK); +} + +/** + * hlist_bl_del_init_rcu - deletes entry from hash list with re-initialization + * @n: the element to delete from the hash list. + * + * Note: hlist_bl_unhashed() on the node returns true after this. It is + * useful for RCU based read lockfree traversal if the writer side + * must know if the list entry is still hashed or already unhashed. + * + * In particular, it means that we can not poison the forward pointers + * that may still be used for walking the hash list and we can only + * zero the pprev pointer so list_unhashed() will return true after + * this. + * + * The caller must take whatever precautions are necessary (such as + * holding appropriate locks) to avoid racing with another + * list-mutation primitive, such as hlist_bl_add_head_rcu() or + * hlist_bl_del_rcu(), running on this same list. However, it is + * perfectly legal to run concurrently with the _rcu list-traversal + * primitives, such as hlist_bl_for_each_entry_rcu(). + */ +static inline void hlist_bl_del_init_rcu(struct hlist_bl_node *n) +{ + if (!hlist_bl_unhashed(n)) { + __hlist_bl_del(n); + n->pprev = NULL; + } +} + +/** + * hlist_bl_del_rcu - deletes entry from hash list without re-initialization + * @n: the element to delete from the hash list. + * + * Note: hlist_bl_unhashed() on entry does not return true after this, + * the entry is in an undefined state. It is useful for RCU based + * lockfree traversal. + * + * In particular, it means that we can not poison the forward + * pointers that may still be used for walking the hash list. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_bl_add_head_rcu() + * or hlist_bl_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_bl_for_each_entry(). + */ +static inline void hlist_bl_del_rcu(struct hlist_bl_node *n) +{ + __hlist_bl_del(n); + n->pprev = LIST_POISON2; +} + +/** + * hlist_bl_add_head_rcu + * @n: the element to add to the hash list. + * @h: the list to add to. + * + * Description: + * Adds the specified element to the specified hlist_bl, + * while permitting racing traversals. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_bl_add_head_rcu() + * or hlist_bl_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_bl_for_each_entry_rcu(), used to prevent memory-consistency + * problems on Alpha CPUs. Regardless of the type of CPU, the + * list-traversal primitive must be guarded by rcu_read_lock(). + */ +static inline void hlist_bl_add_head_rcu(struct hlist_bl_node *n, + struct hlist_bl_head *h) +{ + struct hlist_bl_node *first; + + /* don't need hlist_bl_first_rcu because we're under lock */ + first = hlist_bl_first(h); + + n->next = first; + if (first) + first->pprev = &n->next; + n->pprev = &h->first; + + /* need _rcu because we can have concurrent lock free readers */ + hlist_bl_set_first_rcu(h, n); +} +/** + * hlist_bl_for_each_entry_rcu - iterate over rcu list of given type + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct hlist_bl_node to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the hlist_bl_node within the struct. + * + */ +#define hlist_bl_for_each_entry_rcu(tpos, pos, head, member) \ + for (pos = hlist_bl_first_rcu(head); \ + pos && \ + ({ tpos = hlist_bl_entry(pos, typeof(*tpos), member); 1; }); \ + pos = rcu_dereference_raw(pos->next)) + +#endif diff --git a/include/linux/reiserfs_xattr.h b/include/linux/reiserfs_xattr.h index b2cf2089769..3b94c91f20a 100644 --- a/include/linux/reiserfs_xattr.h +++ b/include/linux/reiserfs_xattr.h @@ -41,7 +41,7 @@ int reiserfs_xattr_init(struct super_block *sb, int mount_flags); int reiserfs_lookup_privroot(struct super_block *sb); int reiserfs_delete_xattrs(struct inode *inode); int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs); -int reiserfs_permission(struct inode *inode, int mask); +int reiserfs_permission(struct inode *inode, int mask, unsigned int flags); #ifdef CONFIG_REISERFS_FS_XATTR #define has_xattr_dir(inode) (REISERFS_I(inode)->i_flags & i_has_xattr_dir) diff --git a/include/linux/security.h b/include/linux/security.h index d47a4c24b3e..1ac42475ea0 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -457,7 +457,6 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * called when the actual read/write operations are performed. * @inode contains the inode structure to check. * @mask contains the permission mask. - * @nd contains the nameidata (may be NULL). * Return 0 if permission is granted. * @inode_setattr: * Check permission before setting file attributes. Note that the kernel @@ -1713,6 +1712,7 @@ int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry, int security_inode_readlink(struct dentry *dentry); int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd); int security_inode_permission(struct inode *inode, int mask); +int security_inode_exec_permission(struct inode *inode, unsigned int flags); int security_inode_setattr(struct dentry *dentry, struct iattr *attr); int security_inode_getattr(struct vfsmount *mnt, struct dentry *dentry); int security_inode_setxattr(struct dentry *dentry, const char *name, @@ -2102,6 +2102,12 @@ static inline int security_inode_permission(struct inode *inode, int mask) return 0; } +static inline int security_inode_exec_permission(struct inode *inode, + unsigned int flags) +{ + return 0; +} + static inline int security_inode_setattr(struct dentry *dentry, struct iattr *attr) { diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 632205ccc25..e98cd2e5719 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -107,7 +107,7 @@ static __always_inline int read_seqretry(const seqlock_t *sl, unsigned start) { smp_rmb(); - return (sl->sequence != start); + return unlikely(sl->sequence != start); } @@ -125,14 +125,25 @@ typedef struct seqcount { #define SEQCNT_ZERO { 0 } #define seqcount_init(x) do { *(x) = (seqcount_t) SEQCNT_ZERO; } while (0) -/* Start of read using pointer to a sequence counter only. */ -static inline unsigned read_seqcount_begin(const seqcount_t *s) +/** + * __read_seqcount_begin - begin a seq-read critical section (without barrier) + * @s: pointer to seqcount_t + * Returns: count to be passed to read_seqcount_retry + * + * __read_seqcount_begin is like read_seqcount_begin, but has no smp_rmb() + * barrier. Callers should ensure that smp_rmb() or equivalent ordering is + * provided before actually loading any of the variables that are to be + * protected in this critical section. + * + * Use carefully, only in critical code, and comment how the barrier is + * provided. + */ +static inline unsigned __read_seqcount_begin(const seqcount_t *s) { unsigned ret; repeat: ret = s->sequence; - smp_rmb(); if (unlikely(ret & 1)) { cpu_relax(); goto repeat; @@ -140,14 +151,56 @@ repeat: return ret; } -/* - * Test if reader processed invalid data because sequence number has changed. +/** + * read_seqcount_begin - begin a seq-read critical section + * @s: pointer to seqcount_t + * Returns: count to be passed to read_seqcount_retry + * + * read_seqcount_begin opens a read critical section of the given seqcount. + * Validity of the critical section is tested by checking read_seqcount_retry + * function. + */ +static inline unsigned read_seqcount_begin(const seqcount_t *s) +{ + unsigned ret = __read_seqcount_begin(s); + smp_rmb(); + return ret; +} + +/** + * __read_seqcount_retry - end a seq-read critical section (without barrier) + * @s: pointer to seqcount_t + * @start: count, from read_seqcount_begin + * Returns: 1 if retry is required, else 0 + * + * __read_seqcount_retry is like read_seqcount_retry, but has no smp_rmb() + * barrier. Callers should ensure that smp_rmb() or equivalent ordering is + * provided before actually loading any of the variables that are to be + * protected in this critical section. + * + * Use carefully, only in critical code, and comment how the barrier is + * provided. + */ +static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start) +{ + return unlikely(s->sequence != start); +} + +/** + * read_seqcount_retry - end a seq-read critical section + * @s: pointer to seqcount_t + * @start: count, from read_seqcount_begin + * Returns: 1 if retry is required, else 0 + * + * read_seqcount_retry closes a read critical section of the given seqcount. + * If the critical section was invalid, it must be ignored (and typically + * retried). */ static inline int read_seqcount_retry(const seqcount_t *s, unsigned start) { smp_rmb(); - return s->sequence != start; + return __read_seqcount_retry(s, start); } @@ -167,6 +220,19 @@ static inline void write_seqcount_end(seqcount_t *s) s->sequence++; } +/** + * write_seqcount_barrier - invalidate in-progress read-side seq operations + * @s: pointer to seqcount_t + * + * After write_seqcount_barrier, no read-side seq operations will complete + * successfully and see data older than this. + */ +static inline void write_seqcount_barrier(seqcount_t *s) +{ + smp_wmb(); + s->sequence+=2; +} + /* * Possible sw/hw IRQ protected versions of the interfaces. */ diff --git a/include/linux/slab.h b/include/linux/slab.h index 59260e21bdf..fa9086647eb 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -106,8 +106,6 @@ int kmem_cache_shrink(struct kmem_cache *); void kmem_cache_free(struct kmem_cache *, void *); unsigned int kmem_cache_size(struct kmem_cache *); const char *kmem_cache_name(struct kmem_cache *); -int kern_ptr_validate(const void *ptr, unsigned long size); -int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr); /* * Please use this macro to create slab caches. Simply specify the |