diff options
Diffstat (limited to 'fs/dcache.c')
-rw-r--r-- | fs/dcache.c | 1375 |
1 files changed, 981 insertions, 394 deletions
diff --git a/fs/dcache.c b/fs/dcache.c index 23702a9d4e6..5699d4c027c 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -33,20 +33,58 @@ #include <linux/bootmem.h> #include <linux/fs_struct.h> #include <linux/hardirq.h> +#include <linux/bit_spinlock.h> +#include <linux/rculist_bl.h> #include "internal.h" +/* + * Usage: + * dcache->d_inode->i_lock protects: + * - i_dentry, d_alias, d_inode of aliases + * dcache_hash_bucket lock protects: + * - the dcache hash table + * s_anon bl list spinlock protects: + * - the s_anon list (see __d_drop) + * dcache_lru_lock protects: + * - the dcache lru lists and counters + * d_lock protects: + * - d_flags + * - d_name + * - d_lru + * - d_count + * - d_unhashed() + * - d_parent and d_subdirs + * - childrens' d_child and d_parent + * - d_alias, d_inode + * + * Ordering: + * dentry->d_inode->i_lock + * dentry->d_lock + * dcache_lru_lock + * dcache_hash_bucket lock + * s_anon lock + * + * If there is an ancestor relationship: + * dentry->d_parent->...->d_parent->d_lock + * ... + * dentry->d_parent->d_lock + * dentry->d_lock + * + * If no ancestor relationship: + * if (dentry1 < dentry2) + * dentry1->d_lock + * dentry2->d_lock + */ int sysctl_vfs_cache_pressure __read_mostly = 100; EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); - __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock); +static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock); __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); -EXPORT_SYMBOL(dcache_lock); +EXPORT_SYMBOL(rename_lock); static struct kmem_cache *dentry_cache __read_mostly; -#define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname)) - /* * This is the single most critical data structure when it comes * to the dcache: the hashtable for lookups. Somebody should try @@ -60,22 +98,51 @@ static struct kmem_cache *dentry_cache __read_mostly; static unsigned int d_hash_mask __read_mostly; static unsigned int d_hash_shift __read_mostly; -static struct hlist_head *dentry_hashtable __read_mostly; + +struct dcache_hash_bucket { + struct hlist_bl_head head; +}; +static struct dcache_hash_bucket *dentry_hashtable __read_mostly; + +static inline struct dcache_hash_bucket *d_hash(struct dentry *parent, + unsigned long hash) +{ + hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES; + hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS); + return dentry_hashtable + (hash & D_HASHMASK); +} + +static inline void spin_lock_bucket(struct dcache_hash_bucket *b) +{ + bit_spin_lock(0, (unsigned long *)&b->head.first); +} + +static inline void spin_unlock_bucket(struct dcache_hash_bucket *b) +{ + __bit_spin_unlock(0, (unsigned long *)&b->head.first); +} /* Statistics gathering. */ struct dentry_stat_t dentry_stat = { .age_limit = 45, }; -static struct percpu_counter nr_dentry __cacheline_aligned_in_smp; -static struct percpu_counter nr_dentry_unused __cacheline_aligned_in_smp; +static DEFINE_PER_CPU(unsigned int, nr_dentry); #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) +static int get_nr_dentry(void) +{ + int i; + int sum = 0; + for_each_possible_cpu(i) + sum += per_cpu(nr_dentry, i); + return sum < 0 ? 0 : sum; +} + int proc_nr_dentry(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - dentry_stat.nr_dentry = percpu_counter_sum_positive(&nr_dentry); - dentry_stat.nr_unused = percpu_counter_sum_positive(&nr_dentry_unused); + dentry_stat.nr_dentry = get_nr_dentry(); return proc_dointvec(table, write, buffer, lenp, ppos); } #endif @@ -91,35 +158,50 @@ static void __d_free(struct rcu_head *head) } /* - * no dcache_lock, please. + * no locks, please. */ static void d_free(struct dentry *dentry) { - percpu_counter_dec(&nr_dentry); + BUG_ON(dentry->d_count); + this_cpu_dec(nr_dentry); if (dentry->d_op && dentry->d_op->d_release) dentry->d_op->d_release(dentry); /* if dentry was never inserted into hash, immediate free is OK */ - if (hlist_unhashed(&dentry->d_hash)) + if (hlist_bl_unhashed(&dentry->d_hash)) __d_free(&dentry->d_u.d_rcu); else call_rcu(&dentry->d_u.d_rcu, __d_free); } +/** + * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups + * After this call, in-progress rcu-walk path lookup will fail. This + * should be called after unhashing, and after changing d_inode (if + * the dentry has not already been unhashed). + */ +static inline void dentry_rcuwalk_barrier(struct dentry *dentry) +{ + assert_spin_locked(&dentry->d_lock); + /* Go through a barrier */ + write_seqcount_barrier(&dentry->d_seq); +} + /* * Release the dentry's inode, using the filesystem - * d_iput() operation if defined. + * d_iput() operation if defined. Dentry has no refcount + * and is unhashed. */ static void dentry_iput(struct dentry * dentry) __releases(dentry->d_lock) - __releases(dcache_lock) + __releases(dentry->d_inode->i_lock) { struct inode *inode = dentry->d_inode; if (inode) { dentry->d_inode = NULL; list_del_init(&dentry->d_alias); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); + spin_unlock(&inode->i_lock); if (!inode->i_nlink) fsnotify_inoderemove(inode); if (dentry->d_op && dentry->d_op->d_iput) @@ -128,40 +210,72 @@ static void dentry_iput(struct dentry * dentry) iput(inode); } else { spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); } } /* - * dentry_lru_(add|del|move_tail) must be called with dcache_lock held. + * Release the dentry's inode, using the filesystem + * d_iput() operation if defined. dentry remains in-use. + */ +static void dentry_unlink_inode(struct dentry * dentry) + __releases(dentry->d_lock) + __releases(dentry->d_inode->i_lock) +{ + struct inode *inode = dentry->d_inode; + dentry->d_inode = NULL; + list_del_init(&dentry->d_alias); + dentry_rcuwalk_barrier(dentry); + spin_unlock(&dentry->d_lock); + spin_unlock(&inode->i_lock); + if (!inode->i_nlink) + fsnotify_inoderemove(inode); + if (dentry->d_op && dentry->d_op->d_iput) + dentry->d_op->d_iput(dentry, inode); + else + iput(inode); +} + +/* + * dentry_lru_(add|del|move_tail) must be called with d_lock held. */ static void dentry_lru_add(struct dentry *dentry) { if (list_empty(&dentry->d_lru)) { + spin_lock(&dcache_lru_lock); list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); dentry->d_sb->s_nr_dentry_unused++; - percpu_counter_inc(&nr_dentry_unused); + dentry_stat.nr_unused++; + spin_unlock(&dcache_lru_lock); } } +static void __dentry_lru_del(struct dentry *dentry) +{ + list_del_init(&dentry->d_lru); + dentry->d_sb->s_nr_dentry_unused--; + dentry_stat.nr_unused--; +} + static void dentry_lru_del(struct dentry *dentry) { if (!list_empty(&dentry->d_lru)) { - list_del_init(&dentry->d_lru); - dentry->d_sb->s_nr_dentry_unused--; - percpu_counter_dec(&nr_dentry_unused); + spin_lock(&dcache_lru_lock); + __dentry_lru_del(dentry); + spin_unlock(&dcache_lru_lock); } } static void dentry_lru_move_tail(struct dentry *dentry) { + spin_lock(&dcache_lru_lock); if (list_empty(&dentry->d_lru)) { list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); dentry->d_sb->s_nr_dentry_unused++; - percpu_counter_inc(&nr_dentry_unused); + dentry_stat.nr_unused++; } else { list_move_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); } + spin_unlock(&dcache_lru_lock); } /** @@ -171,22 +285,115 @@ static void dentry_lru_move_tail(struct dentry *dentry) * The dentry must already be unhashed and removed from the LRU. * * If this is the root of the dentry tree, return NULL. + * + * dentry->d_lock and parent->d_lock must be held by caller, and are dropped by + * d_kill. */ -static struct dentry *d_kill(struct dentry *dentry) +static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) __releases(dentry->d_lock) - __releases(dcache_lock) + __releases(parent->d_lock) + __releases(dentry->d_inode->i_lock) { - struct dentry *parent; - + dentry->d_parent = NULL; list_del(&dentry->d_u.d_child); - /*drops the locks, at that point nobody can reach this dentry */ + if (parent) + spin_unlock(&parent->d_lock); dentry_iput(dentry); + /* + * dentry_iput drops the locks, at which point nobody (except + * transient RCU lookups) can reach this dentry. + */ + d_free(dentry); + return parent; +} + +/** + * d_drop - drop a dentry + * @dentry: dentry to drop + * + * d_drop() unhashes the entry from the parent dentry hashes, so that it won't + * be found through a VFS lookup any more. Note that this is different from + * deleting the dentry - d_delete will try to mark the dentry negative if + * possible, giving a successful _negative_ lookup, while d_drop will + * just make the cache lookup fail. + * + * d_drop() is used mainly for stuff that wants to invalidate a dentry for some + * reason (NFS timeouts or autofs deletes). + * + * __d_drop requires dentry->d_lock. + */ +void __d_drop(struct dentry *dentry) +{ + if (!(dentry->d_flags & DCACHE_UNHASHED)) { + if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) { + bit_spin_lock(0, + (unsigned long *)&dentry->d_sb->s_anon.first); + dentry->d_flags |= DCACHE_UNHASHED; + hlist_bl_del_init(&dentry->d_hash); + __bit_spin_unlock(0, + (unsigned long *)&dentry->d_sb->s_anon.first); + } else { + struct dcache_hash_bucket *b; + b = d_hash(dentry->d_parent, dentry->d_name.hash); + spin_lock_bucket(b); + /* + * We may not actually need to put DCACHE_UNHASHED + * manipulations under the hash lock, but follow + * the principle of least surprise. + */ + dentry->d_flags |= DCACHE_UNHASHED; + hlist_bl_del_rcu(&dentry->d_hash); + spin_unlock_bucket(b); + dentry_rcuwalk_barrier(dentry); + } + } +} +EXPORT_SYMBOL(__d_drop); + +void d_drop(struct dentry *dentry) +{ + spin_lock(&dentry->d_lock); + __d_drop(dentry); + spin_unlock(&dentry->d_lock); +} +EXPORT_SYMBOL(d_drop); + +/* + * Finish off a dentry we've decided to kill. + * dentry->d_lock must be held, returns with it unlocked. + * If ref is non-zero, then decrement the refcount too. + * Returns dentry requiring refcount drop, or NULL if we're done. + */ +static inline struct dentry *dentry_kill(struct dentry *dentry, int ref) + __releases(dentry->d_lock) +{ + struct inode *inode; + struct dentry *parent; + + inode = dentry->d_inode; + if (inode && !spin_trylock(&inode->i_lock)) { +relock: + spin_unlock(&dentry->d_lock); + cpu_relax(); + return dentry; /* try again with same dentry */ + } if (IS_ROOT(dentry)) parent = NULL; else parent = dentry->d_parent; - d_free(dentry); - return parent; + if (parent && !spin_trylock(&parent->d_lock)) { + if (inode) + spin_unlock(&inode->i_lock); + goto relock; + } + + if (ref) + dentry->d_count--; + /* if dentry was on the d_lru list delete it from there */ + dentry_lru_del(dentry); + /* if it was on the hash then remove it */ + __d_drop(dentry); + return d_kill(dentry, parent); } /* @@ -214,34 +421,26 @@ static struct dentry *d_kill(struct dentry *dentry) * call the dentry unlink method as well as removing it from the queues and * releasing its resources. If the parent dentries were scheduled for release * they too may now get deleted. - * - * no dcache lock, please. */ - void dput(struct dentry *dentry) { if (!dentry) return; repeat: - if (atomic_read(&dentry->d_count) == 1) + if (dentry->d_count == 1) might_sleep(); - if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock)) - return; - spin_lock(&dentry->d_lock); - if (atomic_read(&dentry->d_count)) { + BUG_ON(!dentry->d_count); + if (dentry->d_count > 1) { + dentry->d_count--; spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); return; } - /* - * AV: ->d_delete() is _NOT_ allowed to block now. - */ - if (dentry->d_op && dentry->d_op->d_delete) { + if (dentry->d_flags & DCACHE_OP_DELETE) { if (dentry->d_op->d_delete(dentry)) - goto unhash_it; + goto kill_it; } /* Unreachable? Get rid of it */ @@ -252,16 +451,12 @@ repeat: dentry->d_flags |= DCACHE_REFERENCED; dentry_lru_add(dentry); - spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); + dentry->d_count--; + spin_unlock(&dentry->d_lock); return; -unhash_it: - __d_drop(dentry); kill_it: - /* if dentry was on the d_lru list delete it from there */ - dentry_lru_del(dentry); - dentry = d_kill(dentry); + dentry = dentry_kill(dentry, 1); if (dentry) goto repeat; } @@ -284,9 +479,9 @@ int d_invalidate(struct dentry * dentry) /* * If it's already been dropped, return OK. */ - spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); if (d_unhashed(dentry)) { - spin_unlock(&dcache_lock); + spin_unlock(&dentry->d_lock); return 0; } /* @@ -294,9 +489,9 @@ int d_invalidate(struct dentry * dentry) * to get rid of unused child entries. */ if (!list_empty(&dentry->d_subdirs)) { - spin_unlock(&dcache_lock); + spin_unlock(&dentry->d_lock); shrink_dcache_parent(dentry); - spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); } /* @@ -309,35 +504,61 @@ int d_invalidate(struct dentry * dentry) * we might still populate it if it was a * working directory or similar). */ - spin_lock(&dentry->d_lock); - if (atomic_read(&dentry->d_count) > 1) { + if (dentry->d_count > 1) { if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) { spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); return -EBUSY; } } __d_drop(dentry); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); return 0; } EXPORT_SYMBOL(d_invalidate); -/* This should be called _only_ with dcache_lock held */ -static inline struct dentry * __dget_locked(struct dentry *dentry) +/* This must be called with d_lock held */ +static inline void __dget_dlock(struct dentry *dentry) { - atomic_inc(&dentry->d_count); - dentry_lru_del(dentry); - return dentry; + dentry->d_count++; } -struct dentry * dget_locked(struct dentry *dentry) +static inline void __dget(struct dentry *dentry) { - return __dget_locked(dentry); + spin_lock(&dentry->d_lock); + __dget_dlock(dentry); + spin_unlock(&dentry->d_lock); +} + +struct dentry *dget_parent(struct dentry *dentry) +{ + struct dentry *ret; + +repeat: + /* + * Don't need rcu_dereference because we re-check it was correct under + * the lock. + */ + rcu_read_lock(); + ret = dentry->d_parent; + if (!ret) { + rcu_read_unlock(); + goto out; + } + spin_lock(&ret->d_lock); + if (unlikely(ret != dentry->d_parent)) { + spin_unlock(&ret->d_lock); + rcu_read_unlock(); + goto repeat; + } + rcu_read_unlock(); + BUG_ON(!ret->d_count); + ret->d_count++; + spin_unlock(&ret->d_lock); +out: + return ret; } -EXPORT_SYMBOL(dget_locked); +EXPORT_SYMBOL(dget_parent); /** * d_find_alias - grab a hashed alias of inode @@ -355,42 +576,51 @@ EXPORT_SYMBOL(dget_locked); * any other hashed alias over that one unless @want_discon is set, * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias. */ - -static struct dentry * __d_find_alias(struct inode *inode, int want_discon) +static struct dentry *__d_find_alias(struct inode *inode, int want_discon) { - struct list_head *head, *next, *tmp; - struct dentry *alias, *discon_alias=NULL; + struct dentry *alias, *discon_alias; - head = &inode->i_dentry; - next = inode->i_dentry.next; - while (next != head) { - tmp = next; - next = tmp->next; - prefetch(next); - alias = list_entry(tmp, struct dentry, d_alias); +again: + discon_alias = NULL; + list_for_each_entry(alias, &inode->i_dentry, d_alias) { + spin_lock(&alias->d_lock); if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { if (IS_ROOT(alias) && - (alias->d_flags & DCACHE_DISCONNECTED)) + (alias->d_flags & DCACHE_DISCONNECTED)) { discon_alias = alias; - else if (!want_discon) { - __dget_locked(alias); + } else if (!want_discon) { + __dget_dlock(alias); + spin_unlock(&alias->d_lock); + return alias; + } + } + spin_unlock(&alias->d_lock); + } + if (discon_alias) { + alias = discon_alias; + spin_lock(&alias->d_lock); + if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { + if (IS_ROOT(alias) && + (alias->d_flags & DCACHE_DISCONNECTED)) { + __dget_dlock(alias); + spin_unlock(&alias->d_lock); return alias; } } + spin_unlock(&alias->d_lock); + goto again; } - if (discon_alias) - __dget_locked(discon_alias); - return discon_alias; + return NULL; } -struct dentry * d_find_alias(struct inode *inode) +struct dentry *d_find_alias(struct inode *inode) { struct dentry *de = NULL; if (!list_empty(&inode->i_dentry)) { - spin_lock(&dcache_lock); + spin_lock(&inode->i_lock); de = __d_find_alias(inode, 0); - spin_unlock(&dcache_lock); + spin_unlock(&inode->i_lock); } return de; } @@ -404,54 +634,61 @@ void d_prune_aliases(struct inode *inode) { struct dentry *dentry; restart: - spin_lock(&dcache_lock); + spin_lock(&inode->i_lock); list_for_each_entry(dentry, &inode->i_dentry, d_alias) { spin_lock(&dentry->d_lock); - if (!atomic_read(&dentry->d_count)) { - __dget_locked(dentry); + if (!dentry->d_count) { + __dget_dlock(dentry); __d_drop(dentry); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); + spin_unlock(&inode->i_lock); dput(dentry); goto restart; } spin_unlock(&dentry->d_lock); } - spin_unlock(&dcache_lock); + spin_unlock(&inode->i_lock); } EXPORT_SYMBOL(d_prune_aliases); /* - * Throw away a dentry - free the inode, dput the parent. This requires that - * the LRU list has already been removed. + * Try to throw away a dentry - free the inode, dput the parent. + * Requires dentry->d_lock is held, and dentry->d_count == 0. + * Releases dentry->d_lock. * - * Try to prune ancestors as well. This is necessary to prevent - * quadratic behavior of shrink_dcache_parent(), but is also expected - * to be beneficial in reducing dentry cache fragmentation. + * This may fail if locks cannot be acquired no problem, just try again. */ -static void prune_one_dentry(struct dentry * dentry) +static void try_prune_one_dentry(struct dentry *dentry) __releases(dentry->d_lock) - __releases(dcache_lock) - __acquires(dcache_lock) { - __d_drop(dentry); - dentry = d_kill(dentry); + struct dentry *parent; + parent = dentry_kill(dentry, 0); /* - * Prune ancestors. Locking is simpler than in dput(), - * because dcache_lock needs to be taken anyway. + * If dentry_kill returns NULL, we have nothing more to do. + * if it returns the same dentry, trylocks failed. In either + * case, just loop again. + * + * Otherwise, we need to prune ancestors too. This is necessary + * to prevent quadratic behavior of shrink_dcache_parent(), but + * is also expected to be beneficial in reducing dentry cache + * fragmentation. */ - spin_lock(&dcache_lock); + if (!parent) + return; + if (parent == dentry) + return; + + /* Prune ancestors. */ + dentry = parent; while (dentry) { - if (!atomic_dec_and_lock(&dentry->d_count, &dentry->d_lock)) + spin_lock(&dentry->d_lock); + if (dentry->d_count > 1) { + dentry->d_count--; + spin_unlock(&dentry->d_lock); return; - - if (dentry->d_op && dentry->d_op->d_delete) - dentry->d_op->d_delete(dentry); - dentry_lru_del(dentry); - __d_drop(dentry); - dentry = d_kill(dentry); - spin_lock(&dcache_lock); + } + dentry = dentry_kill(dentry, 1); } } @@ -459,24 +696,35 @@ static void shrink_dentry_list(struct list_head *list) { struct dentry *dentry; - while (!list_empty(list)) { - dentry = list_entry(list->prev, struct dentry, d_lru); - dentry_lru_del(dentry); + rcu_read_lock(); + for (;;) { + dentry = list_entry_rcu(list->prev, struct dentry, d_lru); + if (&dentry->d_lru == list) + break; /* empty */ + spin_lock(&dentry->d_lock); + if (dentry != list_entry(list->prev, struct dentry, d_lru)) { + spin_unlock(&dentry->d_lock); + continue; + } /* * We found an inuse dentry which was not removed from * the LRU because of laziness during lookup. Do not free * it - just keep it off the LRU list. */ - spin_lock(&dentry->d_lock); - if (atomic_read(&dentry->d_count)) { + if (dentry->d_count) { + dentry_lru_del(dentry); spin_unlock(&dentry->d_lock); continue; } - prune_one_dentry(dentry); - /* dentry->d_lock was dropped in prune_one_dentry() */ - cond_resched_lock(&dcache_lock); + + rcu_read_unlock(); + + try_prune_one_dentry(dentry); + + rcu_read_lock(); } + rcu_read_unlock(); } /** @@ -495,42 +743,44 @@ static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags) LIST_HEAD(tmp); int cnt = *count; - spin_lock(&dcache_lock); +relock: + spin_lock(&dcache_lru_lock); while (!list_empty(&sb->s_dentry_lru)) { dentry = list_entry(sb->s_dentry_lru.prev, struct dentry, d_lru); BUG_ON(dentry->d_sb != sb); + if (!spin_trylock(&dentry->d_lock)) { + spin_unlock(&dcache_lru_lock); + cpu_relax(); + goto relock; + } + /* * If we are honouring the DCACHE_REFERENCED flag and the * dentry has this flag set, don't free it. Clear the flag * and put it back on the LRU. */ - if (flags & DCACHE_REFERENCED) { - spin_lock(&dentry->d_lock); - if (dentry->d_flags & DCACHE_REFERENCED) { - dentry->d_flags &= ~DCACHE_REFERENCED; - list_move(&dentry->d_lru, &referenced); - spin_unlock(&dentry->d_lock); - cond_resched_lock(&dcache_lock); - continue; - } + if (flags & DCACHE_REFERENCED && + dentry->d_flags & DCACHE_REFERENCED) { + dentry->d_flags &= ~DCACHE_REFERENCED; + list_move(&dentry->d_lru, &referenced); spin_unlock(&dentry->d_lock); + } else { + list_move_tail(&dentry->d_lru, &tmp); + spin_unlock(&dentry->d_lock); + if (!--cnt) + break; } - - list_move_tail(&dentry->d_lru, &tmp); - if (!--cnt) - break; - cond_resched_lock(&dcache_lock); + cond_resched_lock(&dcache_lru_lock); } - - *count = cnt; - shrink_dentry_list(&tmp); - if (!list_empty(&referenced)) list_splice(&referenced, &sb->s_dentry_lru); - spin_unlock(&dcache_lock); + spin_unlock(&dcache_lru_lock); + shrink_dentry_list(&tmp); + + *count = cnt; } /** @@ -546,13 +796,12 @@ static void prune_dcache(int count) { struct super_block *sb, *p = NULL; int w_count; - int unused = percpu_counter_sum_positive(&nr_dentry_unused); + int unused = dentry_stat.nr_unused; int prune_ratio; int pruned; if (unused == 0 || count == 0) return; - spin_lock(&dcache_lock); if (count >= unused) prune_ratio = 1; else @@ -589,11 +838,9 @@ static void prune_dcache(int count) if (down_read_trylock(&sb->s_umount)) { if ((sb->s_root != NULL) && (!list_empty(&sb->s_dentry_lru))) { - spin_unlock(&dcache_lock); __shrink_dcache_sb(sb, &w_count, DCACHE_REFERENCED); pruned -= w_count; - spin_lock(&dcache_lock); } up_read(&sb->s_umount); } @@ -609,7 +856,6 @@ static void prune_dcache(int count) if (p) __put_super(p); spin_unlock(&sb_lock); - spin_unlock(&dcache_lock); } /** @@ -623,12 +869,14 @@ void shrink_dcache_sb(struct super_block *sb) { LIST_HEAD(tmp); - spin_lock(&dcache_lock); + spin_lock(&dcache_lru_lock); while (!list_empty(&sb->s_dentry_lru)) { list_splice_init(&sb->s_dentry_lru, &tmp); + spin_unlock(&dcache_lru_lock); shrink_dentry_list(&tmp); + spin_lock(&dcache_lru_lock); } - spin_unlock(&dcache_lock); + spin_unlock(&dcache_lru_lock); } EXPORT_SYMBOL(shrink_dcache_sb); @@ -645,10 +893,10 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) BUG_ON(!IS_ROOT(dentry)); /* detach this root from the system */ - spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); dentry_lru_del(dentry); __d_drop(dentry); - spin_unlock(&dcache_lock); + spin_unlock(&dentry->d_lock); for (;;) { /* descend to the first leaf in the current subtree */ @@ -657,14 +905,16 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) /* this is a branch with children - detach all of them * from the system in one go */ - spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); list_for_each_entry(loop, &dentry->d_subdirs, d_u.d_child) { + spin_lock_nested(&loop->d_lock, + DENTRY_D_LOCK_NESTED); dentry_lru_del(loop); __d_drop(loop); - cond_resched_lock(&dcache_lock); + spin_unlock(&loop->d_lock); } - spin_unlock(&dcache_lock); + spin_unlock(&dentry->d_lock); /* move to the first child */ dentry = list_entry(dentry->d_subdirs.next, @@ -676,7 +926,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) do { struct inode *inode; - if (atomic_read(&dentry->d_count) != 0) { + if (dentry->d_count != 0) { printk(KERN_ERR "BUG: Dentry %p{i=%lx,n=%s}" " still in use (%d)" @@ -685,20 +935,23 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) dentry->d_inode ? dentry->d_inode->i_ino : 0UL, dentry->d_name.name, - atomic_read(&dentry->d_count), + dentry->d_count, dentry->d_sb->s_type->name, dentry->d_sb->s_id); BUG(); } - if (IS_ROOT(dentry)) + if (IS_ROOT(dentry)) { parent = NULL; - else { + list_del(&dentry->d_u.d_child); + } else { parent = dentry->d_parent; - atomic_dec(&parent->d_count); + spin_lock(&parent->d_lock); + parent->d_count--; + list_del(&dentry->d_u.d_child); + spin_unlock(&parent->d_lock); } - list_del(&dentry->d_u.d_child); detached++; inode = dentry->d_inode; @@ -728,8 +981,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) /* * destroy the dentries attached to a superblock on unmounting - * - we don't need to use dentry->d_lock, and only need dcache_lock when - * removing the dentry from the system lists and hashes because: + * - we don't need to use dentry->d_lock because: * - the superblock is detached from all mountings and open files, so the * dentry trees will not be rearranged by the VFS * - s_umount is write-locked, so the memory pressure shrinker will ignore @@ -746,11 +998,13 @@ void shrink_dcache_for_umount(struct super_block *sb) dentry = sb->s_root; sb->s_root = NULL; - atomic_dec(&dentry->d_count); + spin_lock(&dentry->d_lock); + dentry->d_count--; + spin_unlock(&dentry->d_lock); shrink_dcache_for_umount_subtree(dentry); - while (!hlist_empty(&sb->s_anon)) { - dentry = hlist_entry(sb->s_anon.first, struct dentry, d_hash); + while (!hlist_bl_empty(&sb->s_anon)) { + dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash); shrink_dcache_for_umount_subtree(dentry); } } @@ -768,15 +1022,20 @@ void shrink_dcache_for_umount(struct super_block *sb) * Return true if the parent or its subdirectories contain * a mount point */ - int have_submounts(struct dentry *parent) { - struct dentry *this_parent = parent; + struct dentry *this_parent; struct list_head *next; + unsigned seq; + int locked = 0; + + seq = read_seqbegin(&rename_lock); +again: + this_parent = parent; - spin_lock(&dcache_lock); if (d_mountpoint(parent)) goto positive; + spin_lock(&this_parent->d_lock); repeat: next = this_parent->d_subdirs.next; resume: @@ -784,27 +1043,65 @@ resume: struct list_head *tmp = next; struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); next = tmp->next; + + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); /* Have we found a mount point ? */ - if (d_mountpoint(dentry)) + if (d_mountpoint(dentry)) { + spin_unlock(&dentry->d_lock); + spin_unlock(&this_parent->d_lock); goto positive; + } if (!list_empty(&dentry->d_subdirs)) { + spin_unlock(&this_parent->d_lock); + spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_); this_parent = dentry; + spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); goto repeat; } + spin_unlock(&dentry->d_lock); } /* * All done at this level ... ascend and resume the search. */ if (this_parent != parent) { - next = this_parent->d_u.d_child.next; - this_parent = this_parent->d_parent; + struct dentry *tmp; + struct dentry *child; + + tmp = this_parent->d_parent; + rcu_read_lock(); + spin_unlock(&this_parent->d_lock); + child = this_parent; + this_parent = tmp; + spin_lock(&this_parent->d_lock); + /* might go back up the wrong parent if we have had a rename + * or deletion */ + if (this_parent != child->d_parent || + (!locked && read_seqretry(&rename_lock, seq))) { + spin_unlock(&this_parent->d_lock); + rcu_read_unlock(); + goto rename_retry; + } + rcu_read_unlock(); + next = child->d_u.d_child.next; goto resume; } - spin_unlock(&dcache_lock); + spin_unlock(&this_parent->d_lock); + if (!locked && read_seqretry(&rename_lock, seq)) + goto rename_retry; + if (locked) + write_sequnlock(&rename_lock); return 0; /* No mount points found in tree */ positive: - spin_unlock(&dcache_lock); + if (!locked && read_seqretry(&rename_lock, seq)) + goto rename_retry; + if (locked) + write_sequnlock(&rename_lock); return 1; + +rename_retry: + locked = 1; + write_seqlock(&rename_lock); + goto again; } EXPORT_SYMBOL(have_submounts); @@ -824,11 +1121,16 @@ EXPORT_SYMBOL(have_submounts); */ static int select_parent(struct dentry * parent) { - struct dentry *this_parent = parent; + struct dentry *this_parent; struct list_head *next; + unsigned seq; int found = 0; + int locked = 0; - spin_lock(&dcache_lock); + seq = read_seqbegin(&rename_lock); +again: + this_parent = parent; + spin_lock(&this_parent->d_lock); repeat: next = this_parent->d_subdirs.next; resume: @@ -837,11 +1139,13 @@ resume: struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); next = tmp->next; + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); + /* * move only zero ref count dentries to the end * of the unused list for prune_dcache */ - if (!atomic_read(&dentry->d_count)) { + if (!dentry->d_count) { dentry_lru_move_tail(dentry); found++; } else { @@ -853,28 +1157,63 @@ resume: * ensures forward progress). We'll be coming back to find * the rest. */ - if (found && need_resched()) + if (found && need_resched()) { + spin_unlock(&dentry->d_lock); goto out; + } /* * Descend a level if the d_subdirs list is non-empty. */ if (!list_empty(&dentry->d_subdirs)) { + spin_unlock(&this_parent->d_lock); + spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_); this_parent = dentry; + spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); goto repeat; } + + spin_unlock(&dentry->d_lock); } /* * All done at this level ... ascend and resume the search. */ if (this_parent != parent) { - next = this_parent->d_u.d_child.next; - this_parent = this_parent->d_parent; + struct dentry *tmp; + struct dentry *child; + + tmp = this_parent->d_parent; + rcu_read_lock(); + spin_unlock(&this_parent->d_lock); + child = this_parent; + this_parent = tmp; + spin_lock(&this_parent->d_lock); + /* might go back up the wrong parent if we have had a rename + * or deletion */ + if (this_parent != child->d_parent || + (!locked && read_seqretry(&rename_lock, seq))) { + spin_unlock(&this_parent->d_lock); + rcu_read_unlock(); + goto rename_retry; + } + rcu_read_unlock(); + next = child->d_u.d_child.next; goto resume; } out: - spin_unlock(&dcache_lock); + spin_unlock(&this_parent->d_lock); + if (!locked && read_seqretry(&rename_lock, seq)) + goto rename_retry; + if (locked) + write_sequnlock(&rename_lock); return found; + +rename_retry: + if (found) + return found; + locked = 1; + write_seqlock(&rename_lock); + goto again; } /** @@ -908,16 +1247,13 @@ EXPORT_SYMBOL(shrink_dcache_parent); */ static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) { - int nr_unused; - if (nr) { if (!(gfp_mask & __GFP_FS)) return -1; prune_dcache(nr); } - nr_unused = percpu_counter_sum_positive(&nr_dentry_unused); - return (nr_unused / 100) * sysctl_vfs_cache_pressure; + return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; } |