diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-01-07 08:56:33 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-01-07 08:56:33 -0800 |
commit | b4a45f5fe8078bfc10837dbd5b98735058bc4698 (patch) | |
tree | df6f13a27610a3ec7eb4a661448cd779a8f84c79 /include | |
parent | 01539ba2a706ab7d35fc0667dff919ade7f87d63 (diff) | |
parent | b3e19d924b6eaf2ca7d22cba99a517c5171007b6 (diff) |
Merge branch 'vfs-scale-working' of git://git.kernel.org/pub/scm/linux/kernel/git/npiggin/linux-npiggin
* 'vfs-scale-working' of git://git.kernel.org/pub/scm/linux/kernel/git/npiggin/linux-npiggin: (57 commits)
fs: scale mntget/mntput
fs: rename vfsmount counter helpers
fs: implement faster dentry memcmp
fs: prefetch inode data in dcache lookup
fs: improve scalability of pseudo filesystems
fs: dcache per-inode inode alias locking
fs: dcache per-bucket dcache hash locking
bit_spinlock: add required includes
kernel: add bl_list
xfs: provide simple rcu-walk ACL implementation
btrfs: provide simple rcu-walk ACL implementation
ext2,3,4: provide simple rcu-walk ACL implementation
fs: provide simple rcu-walk generic_check_acl implementation
fs: provide rcu-walk aware permission i_ops
fs: rcu-walk aware d_revalidate method
fs: cache optimise dentry and inode for rcu-walk
fs: dcache reduce branches in lookup path
fs: dcache remove d_mounted
fs: fs_struct use seqlock
fs: rcu-walk for path lookup
...
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/bit_spinlock.h | 4 | ||||
-rw-r--r-- | include/linux/coda_linux.h | 2 | ||||
-rw-r--r-- | include/linux/dcache.h | 243 | ||||
-rw-r--r-- | include/linux/fs.h | 63 | ||||
-rw-r--r-- | include/linux/fs_struct.h | 3 | ||||
-rw-r--r-- | include/linux/fsnotify.h | 2 | ||||
-rw-r--r-- | include/linux/fsnotify_backend.h | 11 | ||||
-rw-r--r-- | include/linux/generic_acl.h | 2 | ||||
-rw-r--r-- | include/linux/list_bl.h | 144 | ||||
-rw-r--r-- | include/linux/mount.h | 53 | ||||
-rw-r--r-- | include/linux/namei.h | 16 | ||||
-rw-r--r-- | include/linux/ncp_fs.h | 4 | ||||
-rw-r--r-- | include/linux/nfs_fs.h | 2 | ||||
-rw-r--r-- | include/linux/path.h | 2 | ||||
-rw-r--r-- | include/linux/posix_acl.h | 19 | ||||
-rw-r--r-- | include/linux/rculist_bl.h | 127 | ||||
-rw-r--r-- | include/linux/reiserfs_xattr.h | 2 | ||||
-rw-r--r-- | include/linux/security.h | 8 | ||||
-rw-r--r-- | include/linux/seqlock.h | 80 | ||||
-rw-r--r-- | include/linux/slab.h | 2 |
20 files changed, 586 insertions, 203 deletions
diff --git a/include/linux/bit_spinlock.h b/include/linux/bit_spinlock.h index 7113a32a86e..e612575a259 100644 --- a/include/linux/bit_spinlock.h +++ b/include/linux/bit_spinlock.h @@ -1,6 +1,10 @@ #ifndef __LINUX_BIT_SPINLOCK_H #define __LINUX_BIT_SPINLOCK_H +#include <linux/kernel.h> +#include <linux/preempt.h> +#include <asm/atomic.h> + /* * bit-based spin_lock() * diff --git a/include/linux/coda_linux.h b/include/linux/coda_linux.h index 2e914d0771b..4ccc59c1ea8 100644 --- a/include/linux/coda_linux.h +++ b/include/linux/coda_linux.h @@ -37,7 +37,7 @@ extern const struct file_operations coda_ioctl_operations; /* operations shared over more than one file */ int coda_open(struct inode *i, struct file *f); int coda_release(struct inode *i, struct file *f); -int coda_permission(struct inode *inode, int mask); +int coda_permission(struct inode *inode, int mask, unsigned int flags); int coda_revalidate_inode(struct dentry *); int coda_getattr(struct vfsmount *, struct dentry *, struct kstat *); int coda_setattr(struct dentry *, struct iattr *); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 6a4aea30aa0..bd07758943e 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -4,7 +4,9 @@ #include <asm/atomic.h> #include <linux/list.h> #include <linux/rculist.h> +#include <linux/rculist_bl.h> #include <linux/spinlock.h> +#include <linux/seqlock.h> #include <linux/cache.h> #include <linux/rcupdate.h> @@ -45,6 +47,27 @@ struct dentry_stat_t { }; extern struct dentry_stat_t dentry_stat; +/* + * Compare 2 name strings, return 0 if they match, otherwise non-zero. + * The strings are both count bytes long, and count is non-zero. + */ +static inline int dentry_cmp(const unsigned char *cs, size_t scount, + const unsigned char *ct, size_t tcount) +{ + int ret; + if (scount != tcount) + return 1; + do { + ret = (*cs != *ct); + if (ret) + break; + cs++; + ct++; + tcount--; + } while (tcount); + return ret; +} + /* Name hashing routines. Initial hash value */ /* Hash courtesy of the R5 hash in reiserfs modulo sign bits */ #define init_name_hash() 0 @@ -81,25 +104,33 @@ full_name_hash(const unsigned char *name, unsigned int len) * large memory footprint increase). */ #ifdef CONFIG_64BIT -#define DNAME_INLINE_LEN_MIN 32 /* 192 bytes */ +# define DNAME_INLINE_LEN 32 /* 192 bytes */ #else -#define DNAME_INLINE_LEN_MIN 40 /* 128 bytes */ +# ifdef CONFIG_SMP +# define DNAME_INLINE_LEN 36 /* 128 bytes */ +# else +# define DNAME_INLINE_LEN 40 /* 128 bytes */ +# endif #endif struct dentry { - atomic_t d_count; + /* RCU lookup touched fields */ unsigned int d_flags; /* protected by d_lock */ - spinlock_t d_lock; /* per dentry lock */ - int d_mounted; - struct inode *d_inode; /* Where the name belongs to - NULL is - * negative */ - /* - * The next three fields are touched by __d_lookup. Place them here - * so they all fit in a cache line. - */ - struct hlist_node d_hash; /* lookup hash list */ + seqcount_t d_seq; /* per dentry seqlock */ + struct hlist_bl_node d_hash; /* lookup hash list */ struct dentry *d_parent; /* parent directory */ struct qstr d_name; + struct inode *d_inode; /* Where the name belongs to - NULL is + * negative */ + unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */ + + /* Ref lookup also touches following */ + unsigned int d_count; /* protected by d_lock */ + spinlock_t d_lock; /* per dentry lock */ + const struct dentry_operations *d_op; + struct super_block *d_sb; /* The root of the dentry tree */ + unsigned long d_time; /* used by d_revalidate */ + void *d_fsdata; /* fs-specific data */ struct list_head d_lru; /* LRU list */ /* @@ -111,12 +142,6 @@ struct dentry { } d_u; struct list_head d_subdirs; /* our children */ struct list_head d_alias; /* inode alias list */ - unsigned long d_time; /* used by d_revalidate */ - const struct dentry_operations *d_op; - struct super_block *d_sb; /* The root of the dentry tree */ - void *d_fsdata; /* fs-specific data */ - - unsigned char d_iname[DNAME_INLINE_LEN_MIN]; /* small names */ }; /* @@ -133,96 +158,61 @@ enum dentry_d_lock_class struct dentry_operations { int (*d_revalidate)(struct dentry *, struct nameidata *); - int (*d_hash) (struct dentry *, struct qstr *); - int (*d_compare) (struct dentry *, struct qstr *, struct qstr *); - int (*d_delete)(struct dentry *); + int (*d_hash)(const struct dentry *, const struct inode *, + struct qstr *); + int (*d_compare)(const struct dentry *, const struct inode *, + const struct dentry *, const struct inode *, + unsigned int, const char *, const struct qstr *); + int (*d_delete)(const struct dentry *); void (*d_release)(struct dentry *); void (*d_iput)(struct dentry *, struct inode *); char *(*d_dname)(struct dentry *, char *, int); -}; - -/* the dentry parameter passed to d_hash and d_compare is the parent - * directory of the entries to be compared. It is used in case these - * functions need any directory specific information for determining - * equivalency classes. Using the dentry itself might not work, as it - * might be a negative dentry which has no information associated with - * it */ +} ____cacheline_aligned; /* -locking rules: - big lock dcache_lock d_lock may block -d_revalidate: no no no yes -d_hash no no no yes -d_compare: no yes yes no -d_delete: no yes no no -d_release: no no no yes -d_iput: no no no yes + * Locking rules for dentry_operations callbacks are to be found in + * Documentation/filesystems/Locking. Keep it updated! + * + * FUrther descriptions are found in Documentation/filesystems/vfs.txt. + * Keep it updated too! */ /* d_flags entries */ #define DCACHE_AUTOFS_PENDING 0x0001 /* autofs: "under construction" */ -#define DCACHE_NFSFS_RENAMED 0x0002 /* this dentry has been "silly - * renamed" and has to be - * deleted on the last dput() - */ -#define DCACHE_DISCONNECTED 0x0004 - /* This dentry is possibly not currently connected to the dcache tree, - * in which case its parent will either be itself, or will have this - * flag as well. nfsd will not use a dentry with this bit set, but will - * first endeavour to clear the bit either by discovering that it is - * connected, or by performing lookup operations. Any filesystem which - * supports nfsd_operations MUST have a lookup function which, if it finds - * a directory inode with a DCACHE_DISCONNECTED dentry, will d_move - * that dentry into place and return that dentry rather than the passed one, - * typically using d_splice_alias. - */ +#define DCACHE_NFSFS_RENAMED 0x0002 + /* this dentry has been "silly renamed" and has to be deleted on the last + * dput() */ + +#define DCACHE_DISCONNECTED 0x0004 + /* This dentry is possibly not currently connected to the dcache tree, in + * which case its parent will either be itself, or will have this flag as + * well. nfsd will not use a dentry with this bit set, but will first + * endeavour to clear the bit either by discovering that it is connected, + * or by performing lookup operations. Any filesystem which supports + * nfsd_operations MUST have a lookup function which, if it finds a + * directory inode with a DCACHE_DISCONNECTED dentry, will d_move that + * dentry into place and return that dentry rather than the passed one, + * typically using d_splice_alias. */ #define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */ #define DCACHE_UNHASHED 0x0010 - -#define DCACHE_INOTIFY_PARENT_WATCHED 0x0020 /* Parent inode is watched by inotify */ +#define DCACHE_INOTIFY_PARENT_WATCHED 0x0020 + /* Parent inode is watched by inotify */ #define DCACHE_COOKIE 0x0040 /* For use by dcookie subsystem */ - -#define DCACHE_FSNOTIFY_PARENT_WATCHED 0x0080 /* Parent inode is watched by some fsnotify listener */ +#define DCACHE_FSNOTIFY_PARENT_WATCHED 0x0080 + /* Parent inode is watched by some fsnotify listener */ #define DCACHE_CANT_MOUNT 0x0100 +#define DCACHE_GENOCIDE 0x0200 +#define DCACHE_MOUNTED 0x0400 /* is a mountpoint */ -extern spinlock_t dcache_lock; -extern seqlock_t rename_lock; - -/** - * d_drop - drop a dentry - * @dentry: dentry to drop - * - * d_drop() unhashes the entry from the parent dentry hashes, so that it won't - * be found through a VFS lookup any more. Note that this is different from - * deleting the dentry - d_delete will try to mark the dentry negative if - * possible, giving a successful _negative_ lookup, while d_drop will - * just make the cache lookup fail. - * - * d_drop() is used mainly for stuff that wants to invalidate a dentry for some - * reason (NFS timeouts or autofs deletes). - * - * __d_drop requires dentry->d_lock. - */ - -static inline void __d_drop(struct dentry *dentry) -{ - if (!(dentry->d_flags & DCACHE_UNHASHED)) { - dentry->d_flags |= DCACHE_UNHASHED; - hlist_del_rcu(&dentry->d_hash); - } -} +#define DCACHE_OP_HASH 0x1000 +#define DCACHE_OP_COMPARE 0x2000 +#define DCACHE_OP_REVALIDATE 0x4000 +#define DCACHE_OP_DELETE 0x8000 -static inline void d_drop(struct dentry *dentry) -{ - spin_lock(&dcache_lock); - spin_lock(&dentry->d_lock); - __d_drop(dentry); - spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); -} +extern seqlock_t rename_lock; static inline int dname_external(struct dentry *dentry) { @@ -235,10 +225,14 @@ static inline int dname_external(struct dentry *dentry) extern void d_instantiate(struct dentry *, struct inode *); extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *); extern struct dentry * d_materialise_unique(struct dentry *, struct inode *); +extern void __d_drop(struct dentry *dentry); +extern void d_drop(struct dentry *dentry); extern void d_delete(struct dentry *); +extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op); /* allocate/de-allocate */ extern struct dentry * d_alloc(struct dentry *, const struct qstr *); +extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *); extern struct dentry * d_splice_alias(struct inode *, struct dentry *); extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *); extern struct dentry * d_obtain_alias(struct inode *); @@ -296,14 +290,40 @@ static inline struct dentry *d_add_unique(struct dentry *entry, struct inode *in return res; } +extern void dentry_update_name_case(struct dentry *, struct qstr *); + /* used for rename() and baskets */ extern void d_move(struct dentry *, struct dentry *); extern struct dentry *d_ancestor(struct dentry *, struct dentry *); /* appendix may either be NULL or be used for transname suffixes */ -extern struct dentry * d_lookup(struct dentry *, struct qstr *); -extern struct dentry * __d_lookup(struct dentry *, struct qstr *); -extern struct dentry * d_hash_and_lookup(struct dentry *, struct qstr *); +extern struct dentry *d_lookup(struct dentry *, struct qstr *); +extern struct dentry *d_hash_and_lookup(struct dentry *, struct qstr *); +extern struct dentry *__d_lookup(struct dentry *, struct qstr *); +extern struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name, + unsigned *seq, struct inode **inode); + +/** + * __d_rcu_to_refcount - take a refcount on dentry if sequence check is ok + * @dentry: dentry to take a ref on + * @seq: seqcount to verify against + * @Returns: 0 on failure, else 1. + * + * __d_rcu_to_refcount operates on a dentry,seq pair that was returned + * by __d_lookup_rcu, to get a reference on an rcu-walk dentry. + */ +static inline int __d_rcu_to_refcount(struct dentry *dentry, unsigned seq) +{ + int ret = 0; + + assert_spin_locked(&dentry->d_lock); + if (!read_seqcount_retry(&dentry->d_seq, seq)) { + ret = 1; + dentry->d_count++; + } + + return ret; +} /* validate "insecure" dentry pointer */ extern int d_validate(struct dentry *, struct dentry *); @@ -316,34 +336,37 @@ extern char *dynamic_dname(struct dentry *, char *, int, const char *, ...); extern char *__d_path(const struct path *path, struct path *root, char *, int); extern char *d_path(const struct path *, char *, int); extern char *d_path_with_unreachable(const struct path *, char *, int); -extern char *__dentry_path(struct dentry *, char *, int); +extern char *dentry_path_raw(struct dentry *, char *, int); extern char *dentry_path(struct dentry *, char *, int); /* Allocation counts.. */ /** - * dget, dget_locked - get a reference to a dentry + * dget, dget_dlock - get a reference to a dentry * @dentry: dentry to get a reference to * * Given a dentry or %NULL pointer increment the reference count * if appropriate and return the dentry. A dentry will not be - * destroyed when it has references. dget() should never be - * called for dentries with zero reference counter. For these cases - * (preferably none, functions in dcache.c are sufficient for normal - * needs and they take necessary precautions) you should hold dcache_lock - * and call dget_locked() instead of dget(). + * destroyed when it has references. */ - +static inline struct dentry *dget_dlock(struct dentry *dentry) +{ + if (dentry) + dentry->d_count++; + return dentry; +} + static inline struct dentry *dget(struct dentry *dentry) { if (dentry) { - BUG_ON(!atomic_read(&dentry->d_count)); - atomic_inc(&dentry->d_count); + spin_lock(&dentry->d_lock); + dget_dlock(dentry); + spin_unlock(&dentry->d_lock); } return dentry; } -extern struct dentry * dget_locked(struct dentry *); +extern struct dentry *dget_parent(struct dentry *dentry); /** * d_unhashed - is dentry hashed @@ -374,21 +397,11 @@ static inline void dont_mount(struct dentry *dentry) spin_unlock(&dentry->d_lock); } -static inline struct dentry *dget_parent(struct dentry *dentry) -{ - struct dentry *ret; - - spin_lock(&dentry->d_lock); - ret = dget(dentry->d_parent); - spin_unlock(&dentry->d_lock); - return ret; -} - extern void dput(struct dentry *); static inline int d_mountpoint(struct dentry *dentry) { - return dentry->d_mounted; + return dentry->d_flags & DCACHE_MOUNTED; } extern struct vfsmount *lookup_mnt(struct path *); diff --git a/include/linux/fs.h b/include/linux/fs.h index 090f0eacde2..baf3e556ff0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -392,6 +392,7 @@ struct inodes_stat_t { #include <linux/capability.h> #include <linux/semaphore.h> #include <linux/fiemap.h> +#include <linux/rculist_bl.h> #include <asm/atomic.h> #include <asm/byteorder.h> @@ -733,16 +734,31 @@ struct posix_acl; #define ACL_NOT_CACHED ((void *)(-1)) struct inode { + /* RCU path lookup touches following: */ + umode_t i_mode; + uid_t i_uid; + gid_t i_gid; + const struct inode_operations *i_op; + struct super_block *i_sb; + + spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ + unsigned int i_flags; + struct mutex i_mutex; + + unsigned long i_state; + unsigned long dirtied_when; /* jiffies of first dirtying */ + struct hlist_node i_hash; struct list_head i_wb_list; /* backing dev IO list */ struct list_head i_lru; /* inode LRU list */ struct list_head i_sb_list; - struct list_head i_dentry; + union { + struct list_head i_dentry; + struct rcu_head i_rcu; + }; unsigned long i_ino; atomic_t i_count; unsigned int i_nlink; - uid_t i_uid; - gid_t i_gid; dev_t i_rdev; unsigned int i_blkbits; u64 i_version; @@ -755,13 +771,8 @@ struct inode { struct timespec i_ctime; blkcnt_t i_blocks; unsigned short i_bytes; - umode_t i_mode; - spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ - struct mutex i_mutex; struct rw_semaphore i_alloc_sem; - const struct inode_operations *i_op; const struct file_operations *i_fop; /* former ->i_op->default_file_ops */ - struct super_block *i_sb; struct file_lock *i_flock; struct address_space *i_mapping; struct address_space i_data; @@ -782,11 +793,6 @@ struct inode { struct hlist_head i_fsnotify_marks; #endif - unsigned long i_state; - unsigned long dirtied_when; /* jiffies of first dirtying */ - - unsigned int i_flags; - #ifdef CONFIG_IMA /* protected by i_lock */ unsigned int i_readcount; /* struct files open RO */ @@ -1372,13 +1378,13 @@ struct super_block { const struct xattr_handler **s_xattr; struct list_head s_inodes; /* all inodes */ - struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */ + struct hlist_bl_head s_anon; /* anonymous dentries for (nfs) exporting */ #ifdef CONFIG_SMP struct list_head __percpu *s_files; #else struct list_head s_files; #endif - /* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */ + /* s_dentry_lru, s_nr_dentry_unused protected by dcache.c lru locks */ struct list_head s_dentry_lru; /* unused dentry lru */ int s_nr_dentry_unused; /* # of dentry on lru */ @@ -1545,9 +1551,18 @@ struct file_operations { int (*setlease)(struct file *, long, struct file_lock **); }; +#define IPERM_FLAG_RCU 0x0001 + struct inode_operations { - int (*create) (struct inode *,struct dentry *,int, struct nameidata *); struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *); + void * (*follow_link) (struct dentry *, struct nameidata *); + int (*permission) (struct inode *, int, unsigned int); + int (*check_acl)(struct inode *, int, unsigned int); + + int (*readlink) (struct dentry *, char __user *,int); + void (*put_link) (struct dentry *, struct nameidata *, void *); + + int (*create) (struct inode *,struct dentry *,int, struct nameidata *); int (*link) (struct dentry *,struct inode *,struct dentry *); int (*unlink) (struct inode *,struct dentry *); int (*symlink) (struct inode *,struct dentry *,const char *); @@ -1556,12 +1571,7 @@ struct inode_operations { int (*mknod) (struct inode *,struct dentry *,int,dev_t); int (*rename) (struct inode *, struct dentry *, struct inode *, struct dentry *); - int (*readlink) (struct dentry *, char __user *,int); - void * (*follow_link) (struct dentry *, struct nameidata *); - void (*put_link) (struct dentry *, struct nameidata *, void *); void (*truncate) (struct inode *); - int (*permission) (struct inode *, int); - int (*check_acl)(struct inode *, int); int (*setattr) (struct dentry *, struct iattr *); int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); @@ -1573,7 +1583,7 @@ struct inode_operations { loff_t len); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); -}; +} ____cacheline_aligned; struct seq_file; @@ -2158,8 +2168,8 @@ extern sector_t bmap(struct inode *, sector_t); #endif extern int notify_change(struct dentry *, struct iattr *); extern int inode_permission(struct inode *, int); -extern int generic_permission(struct inode *, int, - int (*check_acl)(struct inode *, int)); +extern int generic_permission(struct inode *, int, unsigned int, + int (*check_acl)(struct inode *, int, unsigned int)); static inline bool execute_ok(struct inode *inode) { @@ -2230,6 +2240,7 @@ extern void iget_failed(struct inode *); extern void end_writeback(struct inode *); extern void __destroy_inode(struct inode *); extern struct inode *new_inode(struct super_block *); +extern void free_inode_nonrcu(struct inode *inode); extern int should_remove_suid(struct dentry *); extern int file_remove_suid(struct file *); @@ -2446,6 +2457,10 @@ static inline ino_t parent_ino(struct dentry *dentry) { ino_t res; + /* + * Don't strictly need d_lock here? If the parent ino could change + * then surely we'd have a deeper race in the caller? + */ spin_lock(&dentry->d_lock); res = dentry->d_parent->d_inode->i_ino; spin_unlock(&dentry->d_lock); diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h index a42b5bf02f8..003dc0fd734 100644 --- a/include/linux/fs_struct.h +++ b/include/linux/fs_struct.h @@ -2,10 +2,13 @@ #define _LINUX_FS_STRUCT_H #include <linux/path.h> +#include <linux/spinlock.h> +#include <linux/seqlock.h> struct fs_struct { int users; spinlock_t lock; + seqcount_t seq; int umask; int in_exec; struct path root, pwd; diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index b10bcdeaef7..2a53f10712b 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -17,7 +17,6 @@ /* * fsnotify_d_instantiate - instantiate a dentry for inode - * Called with dcache_lock held. */ static inline void fsnotify_d_instantiate(struct dentry *dentry, struct inode *inode) @@ -62,7 +61,6 @@ static inline int fsnotify_perm(struct file *file, int mask) /* * fsnotify_d_move - dentry has been moved - * Called with dcache_lock and dentry->d_lock held. */ static inline void fsnotify_d_move(struct dentry *dentry) { diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 7380763595d..69ad89b5048 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -329,9 +329,15 @@ static inline void __fsnotify_update_dcache_flags(struct dentry *dentry) { struct dentry *parent; - assert_spin_locked(&dcache_lock); assert_spin_locked(&dentry->d_lock); + /* + * Serialisation of setting PARENT_WATCHED on the dentries is provided + * by d_lock. If inotify_inode_watched changes after we have taken + * d_lock, the following __fsnotify_update_child_dentry_flags call will + * find our entry, so it will spin until we complete here, and update + * us with the new state. + */ parent = dentry->d_parent; if (parent->d_inode && fsnotify_inode_watches_children(parent->d_inode)) dentry->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED; @@ -341,15 +347,12 @@ static inline void __fsnotify_update_dcache_flags(struct dentry *dentry) /* * fsnotify_d_instantiate - instantiate a dentry for inode - * Called with dcache_lock held. */ static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode *inode) { if (!inode) return; - assert_spin_locked(&dcache_lock); - spin_lock(&dentry->d_lock); __fsnotify_update_dcache_flags(dentry); spin_unlock(&dentry->d_lock); diff --git a/include/linux/generic_acl.h b/include/linux/generic_acl.h index 574bea4013b..0437e377b55 100644 --- a/include/linux/generic_acl.h +++ b/include/linux/generic_acl.h @@ -10,6 +10,6 @@ extern const struct xattr_handler generic_acl_default_handler; int generic_acl_init(struct inode *, struct inode *); int generic_acl_chmod(struct inode *); -int generic_check_acl(struct inode *inode, int mask); +int generic_check_acl(struct inode *inode, int mask, unsigned int flags); #endif /* LINUX_GENERIC_ACL_H */ diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h new file mode 100644 index 00000000000..9ee97e7f2be --- /dev/null +++ b/include/linux/list_bl.h @@ -0,0 +1,144 @@ +#ifndef _LINUX_LIST_BL_H +#define _LINUX_LIST_BL_H + +#include <linux/list.h> + +/* + * Special version of lists, where head of the list has a lock in the lowest + * bit. This is useful for scalable hash tables without increasing memory + * footprint overhead. + * + * For modification operations, the 0 bit of hlist_bl_head->first + * pointer must be set. + * + * With some small modifications, this can easily be adapted to store several + * arbitrary bits (not just a single lock bit), if the need arises to store + * some fast and compact auxiliary data. + */ + +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) +#define LIST_BL_LOCKMASK 1UL +#else +#define LIST_BL_LOCKMASK 0UL +#endif + +#ifdef CONFIG_DEBUG_LIST +#define LIST_BL_BUG_ON(x) BUG_ON(x) +#else +#define LIST_BL_BUG_ON(x) +#endif + + +struct hlist_bl_head { + struct hlist_bl_node *first; +}; + +struct hlist_bl_node { + struct hlist_bl_node *next, **pprev; +}; +#define INIT_HLIST_BL_HEAD(ptr) \ + ((ptr)->first = NULL) + +static inline void INIT_HLIST_BL_NODE(struct hlist_bl_node *h) +{ + h->next = NULL; + h->pprev = NULL; +} + +#define hlist_bl_entry(ptr, type, member) container_of(ptr,type,member) + +static inline int hlist_bl_unhashed(const struct hlist_bl_node *h) +{ + return !h->pprev; +} + +static inline struct hlist_bl_node *hlist_bl_first(struct hlist_bl_head *h) +{ + return (struct hlist_bl_node *) + ((unsigned long)h->first & ~LIST_BL_LOCKMASK); +} + +static inline void hlist_bl_set_first(struct hlist_bl_head *h, + struct hlist_bl_node *n) +{ + LIST_BL_BUG_ON((unsigned long)n & LIST_BL_LOCKMASK); + LIST_BL_BUG_ON(!((unsigned long)h->first & LIST_BL_LOCKMASK)); + h->first = (struct hlist_bl_node *)((unsigned long)n | LIST_BL_LOCKMASK); +} + +static inline int hlist_bl_empty(const struct hlist_bl_head *h) +{ + return !((unsigned long)h->first & ~LIST_BL_LOCKMASK); +} + +static inline void hlist_bl_add_head(struct hlist_bl_node *n, + struct hlist_bl_head *h) +{ + struct hlist_bl_node *first = hlist_bl_first(h); + + n->next = first; + if (first) + first->pprev = &n->next; + n->pprev = &h->first; + hlist_bl_set_first(h, n); +} + +static inline void __hlist_bl_del(struct hlist_bl_node *n) +{ + struct hlist_bl_node *next = n->next; + struct hlist_bl_node **pprev = n->pprev; + + LIST_BL_BUG_ON((unsigned long)n & LIST_BL_LOCKMASK); + + /* pprev may be `first`, so be careful not to lose the lock bit */ + *pprev = (struct hlist_bl_node *) + ((unsigned long)next | + ((unsigned long)*pprev & LIST_BL_LOCKMASK)); + if (next) + next->pprev = pprev; +} + +static inline void hlist_bl_del(struct hlist_bl_node *n) +{ + __hlist_bl_del(n); + n->next = LIST_POISON1; + n->pprev = LIST_POISON2; +} + +static inline void hlist_bl_del_init(struct hlist_bl_node *n) +{ + if (!hlist_bl_unhashed(n)) { + __hlist_bl_del(n); + INIT_HLIST_BL_NODE(n); + } +} + +/** + * hlist_bl_for_each_entry - iterate over list of given type + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct hlist_node to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the hlist_node within the struct. + * + */ +#define hlist_bl_for_each_entry(tpos, pos, head, member) \ + for (pos = hlist_bl_first(head); \ + pos && \ + ({ tpos = hlist_bl_entry(pos, typeof(*tpos), member); 1;}); \ + pos = pos->next) + +/** + * hlist_bl_for_each_entry_safe - iterate over list of given type safe against removal of list entry + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct hlist_node to use as a loop cursor. + * @n: another &struct hlist_node to use as temporary storage + * @head: the head for your list. + * @member: the name of the hlist_node within the struct. + */ +#define hlist_bl_for_each_entry_safe(tpos, pos, n, head, member) \ + for (pos = hlist_bl_first(head); \ + pos && ({ n = pos->next; 1; }) && \ + ({ tpos = hlist_bl_entry(pos, typeof(*tpos), member); 1;}); \ + pos = n) + +#endif diff --git a/include/linux/mount.h b/include/linux/mount.h index 5e7a59408dd..1869ea24a73 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -13,6 +13,7 @@ #include <linux/list.h> #include <linux/nodemask.h> #include <linux/spinlock.h> +#include <linux/seqlock.h> #include <asm/atomic.h> struct super_block; @@ -46,12 +47,24 @@ struct mnt_namespace; #define MNT_INTERNAL 0x4000 +struct mnt_pcp { + int mnt_count; + int mnt_writers; +}; + struct vfsmount { struct list_head mnt_hash; struct vfsmount *mnt_parent; /* fs we are mounted on */ struct dentry *mnt_mountpoint; /* dentry of mountpoint */ struct dentry *mnt_root; /* root of the mounted tree */ struct super_block *mnt_sb; /* pointer to superblock */ +#ifdef CONFIG_SMP + struct mnt_pcp __percpu *mnt_pcp; + atomic_t mnt_longrefs; +#else + int mnt_count; + int mnt_writers; +#endif struct list_head mnt_mounts; /* list of children, anchored here */ struct list_head mnt_child; /* and going through their mnt_child */ int mnt_flags; @@ -70,57 +83,25 @@ struct vfsmount { struct mnt_namespace *mnt_ns; /* containing namespace */ int mnt_id; /* mount identifier */ int mnt_group_id; /* peer group identifier */ - /* - * We put mnt_count & mnt_expiry_mark at the end of struct vfsmount - * to let these frequently modified fields in a separate cache line - * (so that reads of mnt_flags wont ping-pong on SMP machines) - */ - atomic_t mnt_count; int mnt_expiry_mark; /* true if marked for expiry */ int mnt_pinned; int mnt_ghosts; -#ifdef CONFIG_SMP - int __percpu *mnt_writers; -#else - int mnt_writers; -#endif }; -static inline int *get_mnt_writers_ptr(struct vfsmount *mnt) -{ -#ifdef CONFIG_SMP - return mnt->mnt_writers; -#else - return &mnt->mnt_writers; -#endif -} - -static inline struct vfsmount *mntget(struct vfsmount *mnt) -{ - if (mnt) - atomic_i |