diff options
Diffstat (limited to 'fs/dcache.c')
| -rw-r--r-- | fs/dcache.c | 3067 |
1 files changed, 2070 insertions, 997 deletions
diff --git a/fs/dcache.c b/fs/dcache.c index 953173a293a..06f65857a85 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -23,7 +23,7 @@ #include <linux/init.h> #include <linux/hash.h> #include <linux/cache.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/mount.h> #include <linux/file.h> #include <asm/uaccess.h> @@ -33,20 +33,61 @@ #include <linux/bootmem.h> #include <linux/fs_struct.h> #include <linux/hardirq.h> +#include <linux/bit_spinlock.h> +#include <linux/rculist_bl.h> +#include <linux/prefetch.h> +#include <linux/ratelimit.h> +#include <linux/list_lru.h> #include "internal.h" +#include "mount.h" +/* + * Usage: + * dcache->d_inode->i_lock protects: + * - i_dentry, d_alias, d_inode of aliases + * dcache_hash_bucket lock protects: + * - the dcache hash table + * s_anon bl list spinlock protects: + * - the s_anon list (see __d_drop) + * dentry->d_sb->s_dentry_lru_lock protects: + * - the dcache lru lists and counters + * d_lock protects: + * - d_flags + * - d_name + * - d_lru + * - d_count + * - d_unhashed() + * - d_parent and d_subdirs + * - childrens' d_child and d_parent + * - d_alias, d_inode + * + * Ordering: + * dentry->d_inode->i_lock + * dentry->d_lock + * dentry->d_sb->s_dentry_lru_lock + * dcache_hash_bucket lock + * s_anon lock + * + * If there is an ancestor relationship: + * dentry->d_parent->...->d_parent->d_lock + * ... + * dentry->d_parent->d_lock + * dentry->d_lock + * + * If no ancestor relationship: + * if (dentry1 < dentry2) + * dentry1->d_lock + * dentry2->d_lock + */ int sysctl_vfs_cache_pressure __read_mostly = 100; EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); - __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock); __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); -EXPORT_SYMBOL(dcache_lock); +EXPORT_SYMBOL(rename_lock); static struct kmem_cache *dentry_cache __read_mostly; -#define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname)) - /* * This is the single most critical data structure when it comes * to the dcache: the hashtable for lookups. Somebody should try @@ -55,61 +96,194 @@ static struct kmem_cache *dentry_cache __read_mostly; * This hash-function tries to avoid losing too many bits of hash * information, yet avoid using a prime hash-size or similar. */ -#define D_HASHBITS d_hash_shift -#define D_HASHMASK d_hash_mask static unsigned int d_hash_mask __read_mostly; static unsigned int d_hash_shift __read_mostly; -static struct hlist_head *dentry_hashtable __read_mostly; + +static struct hlist_bl_head *dentry_hashtable __read_mostly; + +static inline struct hlist_bl_head *d_hash(const struct dentry *parent, + unsigned int hash) +{ + hash += (unsigned long) parent / L1_CACHE_BYTES; + hash = hash + (hash >> d_hash_shift); + return dentry_hashtable + (hash & d_hash_mask); +} /* Statistics gathering. */ struct dentry_stat_t dentry_stat = { .age_limit = 45, }; -static void __d_free(struct dentry *dentry) +static DEFINE_PER_CPU(long, nr_dentry); +static DEFINE_PER_CPU(long, nr_dentry_unused); + +#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) + +/* + * Here we resort to our own counters instead of using generic per-cpu counters + * for consistency with what the vfs inode code does. We are expected to harvest + * better code and performance by having our own specialized counters. + * + * Please note that the loop is done over all possible CPUs, not over all online + * CPUs. The reason for this is that we don't want to play games with CPUs going + * on and off. If one of them goes off, we will just keep their counters. + * + * glommer: See cffbc8a for details, and if you ever intend to change this, + * please update all vfs counters to match. + */ +static long get_nr_dentry(void) +{ + int i; + long sum = 0; + for_each_possible_cpu(i) + sum += per_cpu(nr_dentry, i); + return sum < 0 ? 0 : sum; +} + +static long get_nr_dentry_unused(void) +{ + int i; + long sum = 0; + for_each_possible_cpu(i) + sum += per_cpu(nr_dentry_unused, i); + return sum < 0 ? 0 : sum; +} + +int proc_nr_dentry(struct ctl_table *table, int write, void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + dentry_stat.nr_dentry = get_nr_dentry(); + dentry_stat.nr_unused = get_nr_dentry_unused(); + return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); +} +#endif + +/* + * Compare 2 name strings, return 0 if they match, otherwise non-zero. + * The strings are both count bytes long, and count is non-zero. + */ +#ifdef CONFIG_DCACHE_WORD_ACCESS + +#include <asm/word-at-a-time.h> +/* + * NOTE! 'cs' and 'scount' come from a dentry, so it has a + * aligned allocation for this particular component. We don't + * strictly need the load_unaligned_zeropad() safety, but it + * doesn't hurt either. + * + * In contrast, 'ct' and 'tcount' can be from a pathname, and do + * need the careful unaligned handling. + */ +static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount) +{ + unsigned long a,b,mask; + + for (;;) { + a = *(unsigned long *)cs; + b = load_unaligned_zeropad(ct); + if (tcount < sizeof(unsigned long)) + break; + if (unlikely(a != b)) + return 1; + cs += sizeof(unsigned long); + ct += sizeof(unsigned long); + tcount -= sizeof(unsigned long); + if (!tcount) + return 0; + } + mask = bytemask_from_count(tcount); + return unlikely(!!((a ^ b) & mask)); +} + +#else + +static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount) +{ + do { + if (*cs != *ct) + return 1; + cs++; + ct++; + tcount--; + } while (tcount); + return 0; +} + +#endif + +static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *ct, unsigned tcount) { - WARN_ON(!list_empty(&dentry->d_alias)); + const unsigned char *cs; + /* + * Be careful about RCU walk racing with rename: + * use ACCESS_ONCE to fetch the name pointer. + * + * NOTE! Even if a rename will mean that the length + * was not loaded atomically, we don't care. The + * RCU walk will check the sequence count eventually, + * and catch it. And we won't overrun the buffer, + * because we're reading the name pointer atomically, + * and a dentry name is guaranteed to be properly + * terminated with a NUL byte. + * + * End result: even if 'len' is wrong, we'll exit + * early because the data cannot match (there can + * be no NUL in the ct/tcount data) + */ + cs = ACCESS_ONCE(dentry->d_name.name); + smp_read_barrier_depends(); + return dentry_string_cmp(cs, ct, tcount); +} + +static void __d_free(struct rcu_head *head) +{ + struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu); + + WARN_ON(!hlist_unhashed(&dentry->d_alias)); if (dname_external(dentry)) kfree(dentry->d_name.name); kmem_cache_free(dentry_cache, dentry); } -static void d_callback(struct rcu_head *head) +static void dentry_free(struct dentry *dentry) { - struct dentry * dentry = container_of(head, struct dentry, d_u.d_rcu); - __d_free(dentry); + /* if dentry was never visible to RCU, immediate free is OK */ + if (!(dentry->d_flags & DCACHE_RCUACCESS)) + __d_free(&dentry->d_u.d_rcu); + else + call_rcu(&dentry->d_u.d_rcu, __d_free); } -/* - * no dcache_lock, please. The caller must decrement dentry_stat.nr_dentry - * inside dcache_lock. +/** + * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups + * @dentry: the target dentry + * After this call, in-progress rcu-walk path lookup will fail. This + * should be called after unhashing, and after changing d_inode (if + * the dentry has not already been unhashed). */ -static void d_free(struct dentry *dentry) +static inline void dentry_rcuwalk_barrier(struct dentry *dentry) { - if (dentry->d_op && dentry->d_op->d_release) - dentry->d_op->d_release(dentry); - /* if dentry was never inserted into hash, immediate free is OK */ - if (hlist_unhashed(&dentry->d_hash)) - __d_free(dentry); - else - call_rcu(&dentry->d_u.d_rcu, d_callback); + assert_spin_locked(&dentry->d_lock); + /* Go through a barrier */ + write_seqcount_barrier(&dentry->d_seq); } /* * Release the dentry's inode, using the filesystem - * d_iput() operation if defined. + * d_iput() operation if defined. Dentry has no refcount + * and is unhashed. */ static void dentry_iput(struct dentry * dentry) __releases(dentry->d_lock) - __releases(dcache_lock) + __releases(dentry->d_inode->i_lock) { struct inode *inode = dentry->d_inode; if (inode) { dentry->d_inode = NULL; - list_del_init(&dentry->d_alias); + hlist_del_init(&dentry->d_alias); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); + spin_unlock(&inode->i_lock); if (!inode->i_nlink) fsnotify_inoderemove(inode); if (dentry->d_op && dentry->d_op->d_iput) @@ -118,68 +292,272 @@ static void dentry_iput(struct dentry * dentry) iput(inode); } else { spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); } } /* - * dentry_lru_(add|add_tail|del|del_init) must be called with dcache_lock held. + * Release the dentry's inode, using the filesystem + * d_iput() operation if defined. dentry remains in-use. */ -static void dentry_lru_add(struct dentry *dentry) +static void dentry_unlink_inode(struct dentry * dentry) + __releases(dentry->d_lock) + __releases(dentry->d_inode->i_lock) +{ + struct inode *inode = dentry->d_inode; + __d_clear_type(dentry); + dentry->d_inode = NULL; + hlist_del_init(&dentry->d_alias); + dentry_rcuwalk_barrier(dentry); + spin_unlock(&dentry->d_lock); + spin_unlock(&inode->i_lock); + if (!inode->i_nlink) + fsnotify_inoderemove(inode); + if (dentry->d_op && dentry->d_op->d_iput) + dentry->d_op->d_iput(dentry, inode); + else + iput(inode); +} + +/* + * The DCACHE_LRU_LIST bit is set whenever the 'd_lru' entry + * is in use - which includes both the "real" per-superblock + * LRU list _and_ the DCACHE_SHRINK_LIST use. + * + * The DCACHE_SHRINK_LIST bit is set whenever the dentry is + * on the shrink list (ie not on the superblock LRU list). + * + * The per-cpu "nr_dentry_unused" counters are updated with + * the DCACHE_LRU_LIST bit. + * + * These helper functions make sure we always follow the + * rules. d_lock must be held by the caller. + */ +#define D_FLAG_VERIFY(dentry,x) WARN_ON_ONCE(((dentry)->d_flags & (DCACHE_LRU_LIST | DCACHE_SHRINK_LIST)) != (x)) +static void d_lru_add(struct dentry *dentry) { - list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); - dentry->d_sb->s_nr_dentry_unused++; - dentry_stat.nr_unused++; + D_FLAG_VERIFY(dentry, 0); + dentry->d_flags |= DCACHE_LRU_LIST; + this_cpu_inc(nr_dentry_unused); + WARN_ON_ONCE(!list_lru_add(&dentry->d_sb->s_dentry_lru, &dentry->d_lru)); } -static void dentry_lru_add_tail(struct dentry *dentry) +static void d_lru_del(struct dentry *dentry) { - list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); - dentry->d_sb->s_nr_dentry_unused++; - dentry_stat.nr_unused++; + D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST); + dentry->d_flags &= ~DCACHE_LRU_LIST; + this_cpu_dec(nr_dentry_unused); + WARN_ON_ONCE(!list_lru_del(&dentry->d_sb->s_dentry_lru, &dentry->d_lru)); } -static void dentry_lru_del(struct dentry *dentry) +static void d_shrink_del(struct dentry *dentry) { - if (!list_empty(&dentry->d_lru)) { - list_del(&dentry->d_lru); - dentry->d_sb->s_nr_dentry_unused--; - dentry_stat.nr_unused--; - } + D_FLAG_VERIFY(dentry, DCACHE_SHRINK_LIST | DCACHE_LRU_LIST); + list_del_init(&dentry->d_lru); + dentry->d_flags &= ~(DCACHE_SHRINK_LIST | DCACHE_LRU_LIST); + this_cpu_dec(nr_dentry_unused); } -static void dentry_lru_del_init(struct dentry *dentry) +static void d_shrink_add(struct dentry *dentry, struct list_head *list) { - if (likely(!list_empty(&dentry->d_lru))) { - list_del_init(&dentry->d_lru); - dentry->d_sb->s_nr_dentry_unused--; - dentry_stat.nr_unused--; - } + D_FLAG_VERIFY(dentry, 0); + list_add(&dentry->d_lru, list); + dentry->d_flags |= DCACHE_SHRINK_LIST | DCACHE_LRU_LIST; + this_cpu_inc(nr_dentry_unused); +} + +/* + * These can only be called under the global LRU lock, ie during the + * callback for freeing the LRU list. "isolate" removes it from the + * LRU lists entirely, while shrink_move moves it to the indicated + * private list. + */ +static void d_lru_isolate(struct dentry *dentry) +{ + D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST); + dentry->d_flags &= ~DCACHE_LRU_LIST; + this_cpu_dec(nr_dentry_unused); + list_del_init(&dentry->d_lru); +} + +static void d_lru_shrink_move(struct dentry *dentry, struct list_head *list) +{ + D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST); + dentry->d_flags |= DCACHE_SHRINK_LIST; + list_move_tail(&dentry->d_lru, list); +} + +/* + * dentry_lru_(add|del)_list) must be called with d_lock held. + */ +static void dentry_lru_add(struct dentry *dentry) +{ + if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST))) + d_lru_add(dentry); } /** - * d_kill - kill dentry and return parent - * @dentry: dentry to kill + * d_drop - drop a dentry + * @dentry: dentry to drop * - * The dentry must already be unhashed and removed from the LRU. + * d_drop() unhashes the entry from the parent dentry hashes, so that it won't + * be found through a VFS lookup any more. Note that this is different from + * deleting the dentry - d_delete will try to mark the dentry negative if + * possible, giving a successful _negative_ lookup, while d_drop will + * just make the cache lookup fail. * - * If this is the root of the dentry tree, return NULL. + * d_drop() is used mainly for stuff that wants to invalidate a dentry for some + * reason (NFS timeouts or autofs deletes). + * + * __d_drop requires dentry->d_lock. */ -static struct dentry *d_kill(struct dentry *dentry) - __releases(dentry->d_lock) - __releases(dcache_lock) +void __d_drop(struct dentry *dentry) +{ + if (!d_unhashed(dentry)) { + struct hlist_bl_head *b; + /* + * Hashed dentries are normally on the dentry hashtable, + * with the exception of those newly allocated by + * d_obtain_alias, which are always IS_ROOT: + */ + if (unlikely(IS_ROOT(dentry))) + b = &dentry->d_sb->s_anon; + else + b = d_hash(dentry->d_parent, dentry->d_name.hash); + + hlist_bl_lock(b); + __hlist_bl_del(&dentry->d_hash); + dentry->d_hash.pprev = NULL; + hlist_bl_unlock(b); + dentry_rcuwalk_barrier(dentry); + } +} +EXPORT_SYMBOL(__d_drop); + +void d_drop(struct dentry *dentry) +{ + spin_lock(&dentry->d_lock); + __d_drop(dentry); + spin_unlock(&dentry->d_lock); +} +EXPORT_SYMBOL(d_drop); + +static void __dentry_kill(struct dentry *dentry) { - struct dentry *parent; + struct dentry *parent = NULL; + bool can_free = true; + if (!IS_ROOT(dentry)) + parent = dentry->d_parent; + /* + * The dentry is now unrecoverably dead to the world. + */ + lockref_mark_dead(&dentry->d_lockref); + + /* + * inform the fs via d_prune that this dentry is about to be + * unhashed and destroyed. + */ + if ((dentry->d_flags & DCACHE_OP_PRUNE) && !d_unhashed(dentry)) + dentry->d_op->d_prune(dentry); + + if (dentry->d_flags & DCACHE_LRU_LIST) { + if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) + d_lru_del(dentry); + } + /* if it was on the hash then remove it */ + __d_drop(dentry); list_del(&dentry->d_u.d_child); - dentry_stat.nr_dentry--; /* For d_free, below */ - /*drops the locks, at that point nobody can reach this dentry */ + /* + * Inform d_walk() that we are no longer attached to the + * dentry tree + */ + dentry->d_flags |= DCACHE_DENTRY_KILLED; + if (parent) + spin_unlock(&parent->d_lock); dentry_iput(dentry); + /* + * dentry_iput drops the locks, at which point nobody (except + * transient RCU lookups) can reach this dentry. + */ + BUG_ON((int)dentry->d_lockref.count > 0); + this_cpu_dec(nr_dentry); + if (dentry->d_op && dentry->d_op->d_release) + dentry->d_op->d_release(dentry); + + spin_lock(&dentry->d_lock); + if (dentry->d_flags & DCACHE_SHRINK_LIST) { + dentry->d_flags |= DCACHE_MAY_FREE; + can_free = false; + } + spin_unlock(&dentry->d_lock); + if (likely(can_free)) + dentry_free(dentry); +} + +/* + * Finish off a dentry we've decided to kill. + * dentry->d_lock must be held, returns with it unlocked. + * If ref is non-zero, then decrement the refcount too. + * Returns dentry requiring refcount drop, or NULL if we're done. + */ +static struct dentry *dentry_kill(struct dentry *dentry) + __releases(dentry->d_lock) +{ + struct inode *inode = dentry->d_inode; + struct dentry *parent = NULL; + + if (inode && unlikely(!spin_trylock(&inode->i_lock))) + goto failed; + + if (!IS_ROOT(dentry)) { + parent = dentry->d_parent; + if (unlikely(!spin_trylock(&parent->d_lock))) { + if (inode) + spin_unlock(&inode->i_lock); + goto failed; + } + } + + __dentry_kill(dentry); + return parent; + +failed: + spin_unlock(&dentry->d_lock); + cpu_relax(); + return dentry; /* try again with same dentry */ +} + +static inline struct dentry *lock_parent(struct dentry *dentry) +{ + struct dentry *parent = dentry->d_parent; if (IS_ROOT(dentry)) - parent = NULL; + return NULL; + if (unlikely((int)dentry->d_lockref.count < 0)) + return NULL; + if (likely(spin_trylock(&parent->d_lock))) + return parent; + rcu_read_lock(); + spin_unlock(&dentry->d_lock); +again: + parent = ACCESS_ONCE(dentry->d_parent); + spin_lock(&parent->d_lock); + /* + * We can't blindly lock dentry until we are sure + * that we won't violate the locking order. + * Any changes of dentry->d_parent must have + * been done with parent->d_lock held, so + * spin_lock() above is enough of a barrier + * for checking if it's still our child. + */ + if (unlikely(parent != dentry->d_parent)) { + spin_unlock(&parent->d_lock); + goto again; + } + rcu_read_unlock(); + if (parent != dentry) + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); else - parent = dentry->d_parent; - d_free(dentry); + parent = NULL; return parent; } @@ -208,55 +586,39 @@ static struct dentry *d_kill(struct dentry *dentry) * call the dentry unlink method as well as removing it from the queues and * releasing its resources. If the parent dentries were scheduled for release * they too may now get deleted. - * - * no dcache lock, please. */ - void dput(struct dentry *dentry) { - if (!dentry) + if (unlikely(!dentry)) return; repeat: - if (atomic_read(&dentry->d_count) == 1) - might_sleep(); - if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock)) + if (lockref_put_or_lock(&dentry->d_lockref)) return; - spin_lock(&dentry->d_lock); - if (atomic_read(&dentry->d_count)) { - spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); - return; - } + /* Unreachable? Get rid of it */ + if (unlikely(d_unhashed(dentry))) + goto kill_it; - /* - * AV: ->d_delete() is _NOT_ allowed to block now. - */ - if (dentry->d_op && dentry->d_op->d_delete) { + if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) { if (dentry->d_op->d_delete(dentry)) - goto unhash_it; + goto kill_it; } - /* Unreachable? Get rid of it */ - if (d_unhashed(dentry)) - goto kill_it; - if (list_empty(&dentry->d_lru)) { - dentry->d_flags |= DCACHE_REFERENCED; - dentry_lru_add(dentry); - } - spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); + + if (!(dentry->d_flags & DCACHE_REFERENCED)) + dentry->d_flags |= DCACHE_REFERENCED; + dentry_lru_add(dentry); + + dentry->d_lockref.count--; + spin_unlock(&dentry->d_lock); return; -unhash_it: - __d_drop(dentry); kill_it: - /* if dentry was on the d_lru list delete it from there */ - dentry_lru_del(dentry); - dentry = d_kill(dentry); + dentry = dentry_kill(dentry); if (dentry) goto repeat; } +EXPORT_SYMBOL(dput); /** * d_invalidate - invalidate a dentry @@ -275,9 +637,9 @@ int d_invalidate(struct dentry * dentry) /* * If it's already been dropped, return OK. */ - spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); if (d_unhashed(dentry)) { - spin_unlock(&dcache_lock); + spin_unlock(&dentry->d_lock); return 0; } /* @@ -285,9 +647,9 @@ int d_invalidate(struct dentry * dentry) * to get rid of unused child entries. */ if (!list_empty(&dentry->d_subdirs)) { - spin_unlock(&dcache_lock); + spin_unlock(&dentry->d_lock); shrink_dcache_parent(dentry); - spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); } /* @@ -299,35 +661,72 @@ int d_invalidate(struct dentry * dentry) * would make it unreachable from the root, * we might still populate it if it was a * working directory or similar). + * We also need to leave mountpoints alone, + * directory or not. */ - spin_lock(&dentry->d_lock); - if (atomic_read(&dentry->d_count) > 1) { - if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) { + if (dentry->d_lockref.count > 1 && dentry->d_inode) { + if (S_ISDIR(dentry->d_inode->i_mode) || d_mountpoint(dentry)) { spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); return -EBUSY; } } __d_drop(dentry); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); return 0; } +EXPORT_SYMBOL(d_invalidate); -/* This should be called _only_ with dcache_lock held */ +/* This must be called with d_lock held */ +static inline void __dget_dlock(struct dentry *dentry) +{ + dentry->d_lockref.count++; +} -static inline struct dentry * __dget_locked(struct dentry *dentry) +static inline void __dget(struct dentry *dentry) { - atomic_inc(&dentry->d_count); - dentry_lru_del_init(dentry); - return dentry; + lockref_get(&dentry->d_lockref); } -struct dentry * dget_locked(struct dentry *dentry) +struct dentry *dget_parent(struct dentry *dentry) { - return __dget_locked(dentry); + int gotref; + struct dentry *ret; + + /* + * Do optimistic parent lookup without any + * locking. + */ + rcu_read_lock(); + ret = ACCESS_ONCE(dentry->d_parent); + gotref = lockref_get_not_zero(&ret->d_lockref); + rcu_read_unlock(); + if (likely(gotref)) { + if (likely(ret == ACCESS_ONCE(dentry->d_parent))) + return ret; + dput(ret); + } + +repeat: + /* + * Don't need rcu_dereference because we re-check it was correct under + * the lock. + */ + rcu_read_lock(); + ret = dentry->d_parent; + spin_lock(&ret->d_lock); + if (unlikely(ret != dentry->d_parent)) { + spin_unlock(&ret->d_lock); + rcu_read_unlock(); + goto repeat; + } + rcu_read_unlock(); + BUG_ON(!ret->d_lockref.count); + ret->d_lockref.count++; + spin_unlock(&ret->d_lock); + return ret; } +EXPORT_SYMBOL(dget_parent); /** * d_find_alias - grab a hashed alias of inode @@ -345,45 +744,55 @@ struct dentry * dget_locked(struct dentry *dentry) * any other hashed alias over that one unless @want_discon is set, * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias. */ - -static struct dentry * __d_find_alias(struct inode *inode, int want_discon) +static struct dentry *__d_find_alias(struct inode *inode, int want_discon) { - struct list_head *head, *next, *tmp; - struct dentry *alias, *discon_alias=NULL; + struct dentry *alias, *discon_alias; - head = &inode->i_dentry; - next = inode->i_dentry.next; - while (next != head) { - tmp = next; - next = tmp->next; - prefetch(next); - alias = list_entry(tmp, struct dentry, d_alias); +again: + discon_alias = NULL; + hlist_for_each_entry(alias, &inode->i_dentry, d_alias) { + spin_lock(&alias->d_lock); if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { if (IS_ROOT(alias) && - (alias->d_flags & DCACHE_DISCONNECTED)) + (alias->d_flags & DCACHE_DISCONNECTED)) { discon_alias = alias; - else if (!want_discon) { - __dget_locked(alias); + } else if (!want_discon) { + __dget_dlock(alias); + spin_unlock(&alias->d_lock); + return alias; + } + } + spin_unlock(&alias->d_lock); + } + if (discon_alias) { + alias = discon_alias; + spin_lock(&alias->d_lock); + if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { + if (IS_ROOT(alias) && + (alias->d_flags & DCACHE_DISCONNECTED)) { + __dget_dlock(alias); + spin_unlock(&alias->d_lock); return alias; } } + spin_unlock(&alias->d_lock); + goto again; } - if (discon_alias) - __dget_locked(discon_alias); - return discon_alias; + return NULL; } -struct dentry * d_find_alias(struct inode *inode) +struct dentry *d_find_alias(struct inode *inode) { struct dentry *de = NULL; - if (!list_empty(&inode->i_dentry)) { - spin_lock(&dcache_lock); + if (!hlist_empty(&inode->i_dentry)) { + spin_lock(&inode->i_lock); de = __d_find_alias(inode, 0); - spin_unlock(&dcache_lock); + spin_unlock(&inode->i_lock); } return de; } +EXPORT_SYMBOL(d_find_alias); /* * Try to kill dentries associated with this inode. @@ -393,376 +802,287 @@ void d_prune_aliases(struct inode *inode) { struct dentry *dentry; restart: - spin_lock(&dcache_lock); - list_for_each_entry(dentry, &inode->i_dentry, d_alias) { + spin_lock(&inode->i_lock); + hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) { spin_lock(&dentry->d_lock); - if (!atomic_read(&dentry->d_count)) { - __dget_locked(dentry); + if (!dentry->d_lockref.count) { + /* + * inform the fs via d_prune that this dentry + * is about to be unhashed and destroyed. + */ + if ((dentry->d_flags & DCACHE_OP_PRUNE) && + !d_unhashed(dentry)) + dentry->d_op->d_prune(dentry); + + __dget_dlock(dentry); __d_drop(dentry); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); + spin_unlock(&inode->i_lock); dput(dentry); goto restart; } spin_unlock(&dentry->d_lock); } - spin_unlock(&dcache_lock); + spin_unlock(&inode->i_lock); } +EXPORT_SYMBOL(d_prune_aliases); -/* - * Throw away a dentry - free the inode, dput the parent. This requires that - * the LRU list has already been removed. - * - * Try to prune ancestors as well. This is necessary to prevent - * quadratic behavior of shrink_dcache_parent(), but is also expected - * to be beneficial in reducing dentry cache fragmentation. - */ -static void prune_one_dentry(struct dentry * dentry) - __releases(dentry->d_lock) - __releases(dcache_lock) - __acquires(dcache_lock) +static void shrink_dentry_list(struct list_head *list) { - __d_drop(dentry); - dentry = d_kill(dentry); + struct dentry *dentry, *parent; - /* - * Prune ancestors. Locking is simpler than in dput(), - * because dcache_lock needs to be taken anyway. - */ - spin_lock(&dcache_lock); - while (dentry) { - if (!atomic_dec_and_lock(&dentry->d_count, &dentry->d_lock)) - return; - - if (dentry->d_op && dentry->d_op->d_delete) - dentry->d_op->d_delete(dentry); - dentry_lru_del_init(dentry); - __d_drop(dentry); - dentry = d_kill(dentry); - spin_lock(&dcache_lock); - } -} + while (!list_empty(list)) { + struct inode *inode; + dentry = list_entry(list->prev, struct dentry, d_lru); + spin_lock(&dentry->d_lock); + parent = lock_parent(dentry); -/* - * Shrink the dentry LRU on a given superblock. - * @sb : superblock to shrink dentry LRU. - * @count: If count is NULL, we prune all dentries on superblock. - * @flags: If flags is non-zero, we need to do special processing based on - * which flags are set. This means we don't need to maintain multiple - * similar copies of this loop. - */ -static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags) -{ - LIST_HEAD(referenced); - LIST_HEAD(tmp); - struct dentry *dentry; - int cnt = 0; - - BUG_ON(!sb); - BUG_ON((flags & DCACHE_REFERENCED) && count == NULL); - spin_lock(&dcache_lock); - if (count != NULL) - /* called from prune_dcache() and shrink_dcache_parent() */ - cnt = *count; -restart: - if (count == NULL) - list_splice_init(&sb->s_dentry_lru, &tmp); - else { - while (!list_empty(&sb->s_dentry_lru)) { - dentry = list_entry(sb->s_dentry_lru.prev, - struct dentry, d_lru); - BUG_ON(dentry->d_sb != sb); + /* + * The dispose list is isolated and dentries are not accounted + * to the LRU here, so we can simply remove it from the list + * here regardless of whether it is referenced or not. + */ + d_shrink_del(dentry); - spin_lock(&dentry->d_lock); - /* - * If we are honouring the DCACHE_REFERENCED flag and - * the dentry has this flag set, don't free it. Clear - * the flag and put it back on the LRU. - */ - if ((flags & DCACHE_REFERENCED) - && (dentry->d_flags & DCACHE_REFERENCED)) { - dentry->d_flags &= ~DCACHE_REFERENCED; - list_move(&dentry->d_lru, &referenced); - spin_unlock(&dentry->d_lock); - } else { - list_move_tail(&dentry->d_lru, &tmp); - spin_unlock(&dentry->d_lock); - cnt--; - if (!cnt) - break; - } - cond_resched_lock(&dcache_lock); - } - } - while (!list_empty(&tmp)) { - dentry = list_entry(tmp.prev, struct dentry, d_lru); - dentry_lru_del_init(dentry); - spin_lock(&dentry->d_lock); /* * We found an inuse dentry which was not removed from - * the LRU because of laziness during lookup. Do not free - * it - just keep it off the LRU list. + * the LRU because of laziness during lookup. Do not free it. */ - if (atomic_read(&dentry->d_count)) { + if ((int)dentry->d_lockref.count > 0) { spin_unlock(&dentry->d_lock); + if (parent) + spin_unlock(&parent->d_lock); continue; } - prune_one_dentry(dentry); - /* dentry->d_lock was dropped in prune_one_dentry() */ - cond_resched_lock(&dcache_lock); - } - if (count == NULL && !list_empty(&sb->s_dentry_lru)) - goto restart; - if (count != NULL) - *count = cnt; - if (!list_empty(&referenced)) - list_splice(&referenced, &sb->s_dentry_lru); - spin_unlock(&dcache_lock); -} -/** - * prune_dcache - shrink the dcache - * @count: number of entries to try to free - * - * Shrink the dcache. This is done when we need more memory, or simply when we - * need to unmount something (at which point we need to unuse all dentries). - * - * This function may fail to free any resources if all the dentries are in use. - */ -static void prune_dcache(int count) -{ - struct super_block *sb; - int w_count; - int unused = dentry_stat.nr_unused; - int prune_ratio; - int pruned; - if (unused == 0 || count == 0) - return; - spin_lock(&dcache_lock); -restart: - if (count >= unused) - prune_ratio = 1; - else - prune_ratio = unused / count; - spin_lock(&sb_lock); - list_for_each_entry(sb, &super_blocks, s_list) { - if (sb->s_nr_dentry_unused == 0) + if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) { + bool can_free = dentry->d_flags & DCACHE_MAY_FREE; + spin_unlock(&dentry->d_lock); + if (parent) + spin_unlock(&parent->d_lock); + if (can_free) + dentry_free(dentry); continue; - sb->s_count++; - /* Now, we reclaim unused dentrins with fairness. - * We reclaim them same percentage from each superblock. - * We calculate number of dentries to scan on this sb - * as follows, but the implementation is arranged to avoid - * overflows: - * number of dentries to scan on this sb = - * count * (number of dentries on this sb / - * number of dentries in the machine) - */ - spin_unlock(&sb_lock); - if (prune_ratio != 1) - w_count = (sb->s_nr_dentry_unused / prune_ratio) + 1; - else - w_count = sb->s_nr_dentry_unused; - pruned = w_count; - /* - * We need to be sure this filesystem isn't being unmounted, - * otherwise we could race with generic_shutdown_super(), and - * end up holding a reference to an inode while the filesystem - * is unmounted. So we try to get s_umount, and make sure - * s_root isn't NULL. - */ - if (down_read_trylock(&sb->s_umount)) { - if ((sb->s_root != NULL) && - (!list_empty(&sb->s_dentry_lru))) { - spin_unlock(&dcache_lock); - __shrink_dcache_sb(sb, &w_count, - DCACHE_REFERENCED); - pruned -= w_count; - spin_lock(&dcache_lock); - } - up_read(&sb->s_umount); } - spin_lock(&sb_lock); - count -= pruned; + + inode = dentry->d_inode; + if (inode && unlikely(!spin_trylock(&inode->i_lock))) { + d_shrink_add(dentry, list); + spin_unlock(&dentry->d_lock); + if (parent) + spin_unlock(&parent->d_lock); + continue; + } + + __dentry_kill(dentry); + /* - * restart only when sb is no longer on the list and - * we have more work to do. + * We need to prune ancestors too. This is necessary to prevent + * quadratic behavior of shrink_dcache_parent(), but is also + * expected to be beneficial in reducing dentry cache + * fragmentation. */ - if (__put_super_and_need_restart(sb) && count > 0) { - spin_unlock(&sb_lock); - goto restart; + dentry = parent; + while (dentry && !lockref_put_or_lock(&dentry->d_lockref)) { + parent = lock_parent(dentry); + if (dentry->d_lockref.count != 1) { + dentry->d_lockref.count--; + spin_unlock(&dentry->d_lock); + if (parent) + spin_unlock(&parent->d_lock); + break; + } + inode = dentry->d_inode; /* can't be NULL */ + if (unlikely(!spin_trylock(&inode->i_lock))) { + spin_unlock(&dentry->d_lock); + if (parent) + spin_unlock(&parent->d_lock); + cpu_relax(); + continue; + } + __dentry_kill(dentry); + dentry = parent; } } - spin_unlock(&sb_lock); - spin_unlock(&dcache_lock); } -/** - * shrink_dcache_sb - shrink dcache for a superblock - * @sb: superblock - * - * Shrink the dcache for the specified super block. This - * is used to free the dcache before unmounting a file - * system - */ -void shrink_dcache_sb(struct super_block * sb) +static enum lru_status +dentry_lru_isolate(struct list_head *item, spinlock_t *lru_lock, void *arg) { - __shrink_dcache_sb(sb, NULL, 0); -} + struct list_head *freeable = arg; + struct dentry *dentry = container_of(item, struct dentry, d_lru); -/* - * destroy a single subtree of dentries for unmount - * - see the comments on shrink_dcache_for_umount() for a description of the - * locking - */ -static void shrink_dcache_for_umount_subtree(struct dentry *dentry) -{ - struct dentry *parent; - unsigned detached = 0; - BUG_ON(!IS_ROOT(dentry)); + /* + * we are inverting the lru lock/dentry->d_lock here, + * so use a trylock. If we fail to get the lock, just skip + * it + */ + if (!spin_trylock(&dentry->d_lock)) + return LRU_SKIP; - /* detach this root from the system */ - spin_lock(&dcache_lock); - dentry_lru_del_init(dentry); - __d_drop(dentry); - spin_unlock(&dcache_lock); + /* + * Referenced dentries are still in use. If they have active + * counts, just remove them from the LRU. Otherwise give them + * another pass through the LRU. + */ + if (dentry->d_lockref.count) { + d_lru_isolate(dentry); + spin_unlock(&dentry->d_lock); + return LRU_REMOVED; + } - for (;;) { - /* descend to the first leaf in the current subtree */ - while (!list_empty(&dentry->d_subdirs)) { - struct dentry *loop; - - /* this is a branch with children - detach all of them - * from the system in one go */ - spin_lock(&dcache_lock); - list_for_each_entry(loop, &dentry->d_subdirs, - d_u.d_child) { - dentry_lru_del_init(loop); - __d_drop(loop); - cond_resched_lock(&dcache_lock); - } - spin_unlock(&dcache_lock); + if (dentry->d_flags & DCACHE_REFERENCED) { + dentry->d_flags &= ~DCACHE_REFERENCED; + spin_unlock(&dentry->d_lock); - /* move to the first child */ - dentry = list_entry(dentry->d_subdirs.next, - struct dentry, d_u.d_child); - } + /* + * The list move itself will be made by the common LRU code. At + * this point, we've dropped the dentry->d_lock but keep the + * lru lock. This is safe to do, since every list movement is + * protected by the lru lock even if both locks are held. + * + * This is guaranteed by the fact that all LRU management + * functions are intermediated by the LRU API calls like + * list_lru_add and list_lru_del. List movement in this file + * only ever occur through this functions or through callbacks + * like this one, that are called from the LRU API. + * + * The only exceptions to this are functions like + * shrink_dentry_list, and code that first checks for the + * DCACHE_SHRINK_LIST flag. Those are guaranteed to be + * operating only with stack provided lists after they are + * properly isolated from the main list. It is thus, always a + * local access. + */ + return LRU_ROTATE; + } - /* consume the dentries from this leaf up through its parents - * until we find one with children or run out altogether */ - do { - struct inode *inode; - - if (atomic_read(&dentry->d_count) != 0) { - printk(KERN_ERR - "BUG: Dentry %p{i=%lx,n=%s}" - " still in use (%d)" - " [unmount of %s %s]\n", - dentry, - dentry->d_inode ? - dentry->d_inode->i_ino : 0UL, - dentry->d_name.name, - atomic_read(&dentry->d_count), - dentry->d_sb->s_type->name, - dentry->d_sb->s_id); - BUG(); - } + d_lru_shrink_move(dentry, freeable); + spin_unlock(&dentry->d_lock); - if (IS_ROOT(dentry)) - parent = NULL; - else { - parent = dentry->d_parent; - atomic_dec(&parent->d_count); - } + return LRU_REMOVED; +} - list_del(&dentry->d_u.d_child); - detached++; - - inode = dentry->d_inode; - if (inode) { - dentry->d_inode = NULL; - list_del_init(&dentry->d_alias); - if (dentry->d_op && dentry->d_op->d_iput) - dentry->d_op->d_iput(dentry, inode); - else - iput(inode); - } +/** + * prune_dcache_sb - shrink the dcache + * @sb: superblock + * @nr_to_scan : number of entries to try to free + * @nid: which node to scan for freeable entities + * + * Attempt to shrink the superblock dcache LRU by @nr_to_scan entries. This is + * done when we need more memory an called from the superblock shrinker + * function. + * + * This function may fail to free any resources if all the dentries are in + * use. + */ +long prune_dcache_sb(struct super_block *sb, unsigned long nr_to_scan, + int nid) +{ + LIST_HEAD(dispose); + long freed; - d_free(dentry); + freed = list_lru_walk_node(&sb->s_dentry_lru, nid, dentry_lru_isolate, + &dispose, &nr_to_scan); + shrink_dentry_list(&dispose); + return freed; +} - /* finished when we fall off the top of the tree, - * otherwise we ascend to the parent and move to the - * next sibling if there is one */ - if (!parent) - goto out; +static enum lru_status dentry_lru_isolate_shrink(struct list_head *item, + spinlock_t *lru_lock, void *arg) +{ + struct list_head *freeable = arg; + struct dentry *dentry = container_of(item, struct dentry, d_lru); - dentry = parent; + /* + * we are inverting the lru lock/dentry->d_lock here, + * so use a trylock. If we fail to get the lock, just skip + * it + */ + if (!spin_trylock(&dentry->d_lock)) + return LRU_SKIP; - } while (list_empty(&dentry->d_subdirs)); + d_lru_shrink_move(dentry, freeable); + spin_unlock(&dentry->d_lock); - dentry = list_entry(dentry->d_subdirs.next, - struct dentry, d_u.d_child); - } -out: - /* several dentries were freed, need to correct nr_dentry */ - spin_lock(&dcache_lock); - dentry_stat.nr_dentry -= detached; - spin_unlock(&dcache_lock); + return LRU_REMOVED; } -/* - * destroy the dentries attached to a superblock on unmounting - * - we don't need to use dentry->d_lock, and only need dcache_lock when - * removing the dentry from the system lists and hashes because: - * - the superblock is detached from all mountings and open files, so the - * dentry trees will not be rearranged by the VFS - * - s_umount is write-locked, so the memory pressure shrinker will ignore - * any dentries belonging to this superblock that it comes across - * - the filesystem itself is no longer permitted to rearrange the dentries - * in this superblock + +/** + * shrink_dcache_sb - shrink dcache for a superblock + * @sb: superblock + * + * Shrink the dcache for the specified super block. This is used to free + * the dcache before unmounting a file system. */ -void shrink_dcache_for_umount(struct super_block *sb) +void shrink_dcache_sb(struct super_block *sb) { - struct dentry *dentry; + long freed; - if (down_read_trylock(&sb->s_umount)) - BUG(); + do { + LIST_HEAD(dispose); - dentry = sb->s_root; - sb->s_root = NULL; - atomic_dec(&dentry->d_count); - shrink_dcache_for_umount_subtree(dentry); + freed = list_lru_walk(&sb->s_dentry_lru, + dentry_lru_isolate_shrink, &dispose, UINT_MAX); - while (!hlist_empty(&sb->s_anon)) { - dentry = hlist_entry(sb->s_anon.first, struct dentry, d_hash); - shrink_dcache_for_umount_subtree(dentry); - } + this_cpu_sub(nr_dentry_unused, freed); + shrink_dentry_list(&dispose); + } while (freed > 0); } +EXPORT_SYMBOL(shrink_dcache_sb); -/* - * Search for at least 1 mount point in the dentry's subdirs. - * We descend to the next level whenever the d_subdirs - * list is non-empty and continue searching. +/** + * enum d_walk_ret - action to talke during tree walk + * @D_WALK_CONTINUE: contrinue walk + * @D_WALK_QUIT: quit walk + * @D_WALK_NORETRY: quit when retry is needed + * @D_WALK_SKIP: skip this dentry and its children */ - +enum d_walk_ret { + D_WALK_CONTINUE, + D_WALK_QUIT, + D_WALK_NORETRY, + D_WALK_SKIP, +}; + /** - * have_submounts - check for mounts over a dentry - * @parent: dentry to check. + * d_walk - walk the dentry tree + * @parent: start of walk + * @data: data passed to @enter() and @finish() + * @enter: callback when first entering the dentry + * @finish: callback when successfully finished the walk * - * Return true if the parent or its subdirectories contain - * a mount point + * The @enter() and @finish() callbacks are called with d_lock held. */ - -int have_submounts(struct dentry *parent) +static void d_walk(struct dentry *parent, void *data, + enum d_walk_ret (*enter)(void *, struct dentry *), + void (*finish)(void *)) { - struct dentry *this_parent = parent; + struct dentry *this_parent; struct list_head *next; - - spin_lock(&dcache_lock); - if (d_mountpoint(parent)) - goto positive; + unsigned seq = 0; + enum d_walk_ret ret; + bool retry = true; + +again: + read_seqbegin_or_lock(&rename_lock, &seq); + this_parent = parent; + spin_lock(&this_parent->d_lock); + + ret = enter(data, this_parent); + switch (ret) { + case D_WALK_CONTINUE: + break; + case D_WALK_QUIT: + case D_WALK_SKIP: + goto out_unlock; + case D_WALK_NORETRY: + retry = false; + break; + } repeat: next = this_parent->d_subdirs.next; resume: @@ -770,31 +1090,146 @@ resume: struct list_head *tmp = next; struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); next = tmp->next; - /* Have we found a mount point ? */ - if (d_mountpoint(dentry)) - goto positive; + + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); + + ret = enter(data, dentry); + switch (ret) { + case D_WALK_CONTINUE: + break; + case D_WALK_QUIT: + spin_unlock(&dentry->d_lock); + goto out_unlock; + case D_WALK_NORETRY: + retry = false; + break; + case D_WALK_SKIP: + spin_unlock(&dentry->d_lock); + continue; + } + if (!list_empty(&dentry->d_subdirs)) { + spin_unlock(&this_parent->d_lock); + spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_); this_parent = dentry; + spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); goto repeat; } + spin_unlock(&dentry->d_lock); } /* * All done at this level ... ascend and resume the search. */ if (this_parent != parent) { - next = this_parent->d_u.d_child.next; - this_parent = this_parent->d_parent; + struct dentry *child = this_parent; + this_parent = child->d_parent; + + rcu_read_lock(); + spin_unlock(&child->d_lock); + spin_lock(&this_parent->d_lock); + + /* + * might go back up the wrong parent if we have had a rename + * or deletion + */ + if (this_parent != child->d_parent || + (child->d_flags & DCACHE_DENTRY_KILLED) || + need_seqretry(&rename_lock, seq)) { + spin_unlock(&this_parent->d_lock); + rcu_read_unlock(); + goto rename_retry; + } + rcu_read_unlock(); + next = child->d_u.d_child.next; goto resume; } - spin_unlock(&dcache_lock); - return 0; /* No mount points found in tree */ -positive: - spin_unlock(&dcache_lock); - return 1; + if (need_seqretry(&rename_lock, seq)) { + spin_unlock(&this_parent->d_lock); + goto rename_retry; + } + if (finish) + finish(data); + +out_unlock: + spin_unlock(&this_parent->d_lock); + done_seqretry(&rename_lock, seq); + return; + +rename_retry: + if (!retry) + return; + seq = 1; + goto again; } /* - * Search the dentry child list for the specified parent, + * Search for at least 1 mount point in the dentry's subdirs. + * We descend to the next level whenever the d_subdirs + * list is non-empty and continue searching. + */ + +static enum d_walk_ret check_mount(void *data, struct dentry *dentry) +{ + int *ret = data; + if (d_mountpoint(dentry)) { + *ret = 1; + return D_WALK_QUIT; + } + return D_WALK_CONTINUE; +} + +/** + * have_submounts - check for mounts over a dentry + * @parent: dentry to check. + * + * Return true if the parent or its subdirectories contain + * a mount point + */ +int have_submounts(struct dentry *parent) +{ + int ret = 0; + + d_walk(parent, &ret, check_mount, NULL); + + return ret; +} +EXPORT_SYMBOL(have_submounts); + +/* + * Called by mount code to set a mountpoint and check if the mountpoint is + * reachable (e.g. NFS can unhash a directory dentry and then the complete + * subtree can become unreachable). + * + * Only one of check_submounts_and_drop() and d_set_mounted() must succeed. For + * this reason take rename_lock and d_lock on dentry and ancestors. + */ +int d_set_mounted(struct dentry *dentry) +{ + struct dentry *p; + int ret = -ENOENT; + write_seqlock(&rename_lock); + for (p = dentry->d_parent; !IS_ROOT(p); p = p->d_parent) { + /* Need exclusion wrt. check_submounts_and_drop() */ + spin_lock(&p->d_lock); + if (unlikely(d_unhashed(p))) { + spin_unlock(&p->d_lock); + goto out; + } + spin_unlock(&p->d_lock); + } + spin_lock(&dentry->d_lock); + if (!d_unlinked(dentry)) { + dentry->d_flags |= DCACHE_MOUNTED; + ret = 0; + } + spin_unlock(&dentry->d_lock); +out: + write_sequnlock(&rename_lock); + return ret; +} + +/* + * Search the dentry child list of the specified parent, * and move any unused dentries to the end of the unused * list for prune_dcache(). We descend to the next level * whenever the d_subdirs list is non-empty and continue @@ -807,58 +1242,40 @@ positive: * drop the lock and return early due to latency * constraints. */ -static int select_parent(struct dentry * parent) -{ - struct dentry *this_parent = parent; - struct list_head *next; - int found = 0; - spin_lock(&dcache_lock); -repeat: - next = this_parent->d_subdirs.next; -resume: - while (next != &this_parent->d_subdirs) { - struct list_head *tmp = next; - struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); - next = tmp->next; +struct select_data { + struct dentry *start; + struct list_head dispose; + int found; +}; - dentry_lru_del_init(dentry); - /* - * move only zero ref count dentries to the end - * of the unused list for prune_dcache - */ - if (!atomic_read(&dentry->d_count)) { - dentry_lru_add_tail(dentry); - found++; - } +static enum d_walk_ret select_collect(void *_data, struct dentry *dentry) +{ + struct select_data *data = _data; + enum d_walk_ret ret = D_WALK_CONTINUE; - /* - * We can return to the caller if we have found some (this - * ensures forward progress). We'll be coming back to find - * the rest. - */ - if (found && need_resched()) - goto out; + if (data->start == dentry) + goto out; - /* - * Descend a level if the d_subdirs list is non-empty. - */ - if (!list_empty(&dentry->d_subdirs)) { - this_parent = dentry; - goto repeat; + if (dentry->d_flags & DCACHE_SHRINK_LIST) { + data->found++; + } else { + if (dentry->d_flags & DCACHE_LRU_LIST) + d_lru_del(dentry); + if (!dentry->d_lockref.count) { + d_shrink_add(dentry, &data->dispose); + data->found++; } } /* - * All done at this level ... ascend and resume the search. + * We can return to the caller if we have found some (this + * ensures forward progress). We'll be coming back to find + * the rest. */ - if (this_parent != parent) { - next = this_parent->d_u.d_child.next; - this_parent = this_parent->d_parent; - goto resume; - } + if (!list_empty(&data->dispose)) + ret = need_resched() ? D_WALK_QUIT : D_WALK_NORETRY; out: - spin_unlock(&dcache_lock); - return found; + return ret; } /** @@ -867,46 +1284,143 @@ out: * * Prune the dcache to remove unused children of the parent dentry. */ - -void shrink_dcache_parent(struct dentry * parent) +void shrink_dcache_parent(struct dentry *parent) { - struct super_block *sb = parent->d_sb; - int found; + for (;;) { + struct select_data data; - while ((found = select_parent(parent)) != 0) - __shrink_dcache_sb(sb, &found, 0); + INIT_LIST_HEAD(&data.dispose); + data.start = parent; + data.found = 0; + + d_walk(parent, &data, select_collect, NULL); + if (!data.found) + break; + + shrink_dentry_list(&data.dispose); + cond_resched(); + } +} +EXPORT_SYMBOL(shrink_dcache_parent); + +static enum d_walk_ret umount_check(void *_data, struct dentry *dentry) +{ + /* it has busy descendents; complain about those instead */ + if (!list_empty(&dentry->d_subdirs)) + return D_WALK_CONTINUE; + + /* root with refcount 1 is fine */ + if (dentry == _data && dentry->d_lockref.count == 1) + return D_WALK_CONTINUE; + + printk(KERN_ERR "BUG: Dentry %p{i=%lx,n=%pd} " + " still in use (%d) [unmount of %s %s]\n", + dentry, + dentry->d_inode ? + dentry->d_inode->i_ino : 0UL, + dentry, + dentry->d_lockref.count, + dentry->d_sb->s_type->name, + dentry->d_sb->s_id); + WARN_ON(1); + return D_WALK_CONTINUE; +} + +static void do_one_tree(struct dentry *dentry) +{ + shrink_dcache_parent(dentry); + d_walk(dentry, dentry, umount_check, NULL); + d_drop(dentry); + dput(dentry); } /* - * Scan `nr' dentries and return the number which remain. - * - * We need to avoid reentering the filesystem if the caller is performing a - * GFP_NOFS allocation attempt. One example deadlock is: + * destroy the dentries attached to a superblock on unmounting + */ +void shrink_dcache_for_umount(struct super_block *sb) +{ + struct dentry *dentry; + + WARN(down_read_trylock(&sb->s_umount), "s_umount should've been locked"); + + dentry = sb->s_root; + sb->s_root = NULL; + do_one_tree(dentry); + + while (!hlist_bl_empty(&sb->s_anon)) { + dentry = dget(hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash)); + do_one_tree(dentry); + } +} + +static enum d_walk_ret check_and_collect(void *_data, struct dentry *dentry) +{ + struct select_data *data = _data; + + if (d_mountpoint(dentry)) { + data->found = -EBUSY; + return D_WALK_QUIT; + } + + return select_collect(_data, dentry); +} + +static void check_and_drop(void *_data) +{ + struct select_data *data = _data; + + if (d_mountpoint(data->start)) + data->found = -EBUSY; + if (!data->found) + __d_drop(data->start); +} + +/** + * check_submounts_and_drop - prune dcache, check for submounts and drop * - * ext2_new_block->getblk->GFP->shrink_dcache_memory->prune_dcache-> - * prune_one_dentry->dput->dentry_iput->iput->inode->i_sb->s_op->put_inode-> - * ext2_discard_prealloc->ext2_free_blocks->lock_super->DEADLOCK. + * All done as a single atomic operation relative to has_unlinked_ancestor(). + * Returns 0 if successfully unhashed @parent. If there were submounts then + * return -EBUSY. * - * In this case we return -1 to tell the caller that we baled. + * @dentry: dentry to prune and drop */ -static int shrink_dcache_memory(int nr, gfp_t gfp_mask) +int check_submounts_and_drop(struct dentry *dentry) { - if (nr) { - if (!(gfp_mask & __GFP_FS)) - return -1; - prune_dcache(nr); + int ret = 0; + + /* Negative dentries can be dropped without further checks */ + if (!dentry->d_inode) { + d_drop(dentry); + goto out; } - return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; -} -static struct shrinker dcache_shrinker = { - .shrink = shrink_dcache_memory, - .seeks = DEFAULT_SEEKS, -}; + for (;;) { + struct select_data data; + + INIT_LIST_HEAD(&data.dispose); + data.start = dentry; + data.found = 0; + + d_walk(dentry, &data, check_and_collect, check_and_drop); + ret = data.found; + + if (!list_empty(&data.dispose)) + shrink_dentry_list(&data.dispose); + + if (ret <= 0) + break; + + cond_resched(); + } + +out: + return ret; +} +EXPORT_SYMBOL(check_submounts_and_drop); /** - * d_alloc - allocate a dcache entry - * @parent: parent of entry to allocate + * __d_alloc - allocate a dcache entry + * @sb: filesystem it will belong to * @name: qstr of the name * * Allocates a dentry. It returns %NULL if there is insufficient memory @@ -914,7 +1428,7 @@ static struct shrinker dcache_shrinker = { * copied and the copy passed in may be reused after this call. */ -struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) +struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) { struct dentry *dentry; char *dname; @@ -923,6 +1437,13 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) if (!dentry) return NULL; + /* + * We guarantee that the inline name is always NUL-terminated. + * This way the memcpy() done by the name switching in rename + * will still always have a NUL at the end, even if we might + * be overwriting an internal NUL character + */ + dentry->d_iname[DNAME_INLINE_LEN-1] = 0; if (name->len > DNAME_INLINE_LEN-1) { dname = kmalloc(name->len + 1, GFP_KERNEL); if (!dname) { @@ -932,42 +1453,79 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) } else { dname = dentry->d_iname; } - dentry->d_name.name = dname; dentry->d_name.len = name->len; dentry->d_name.hash = name->hash; memcpy(dname, name->name, name->len); dname[name->len] = 0; - atomic_set(&dentry->d_count, 1); - dentry->d_flags = DCACHE_UNHASHED; + /* Make sure we always see the terminating NUL character */ + smp_wmb(); + dentry->d_name.name = dname; + + dentry->d_lockref.count = 1; + dentry->d_flags = 0; spin_lock_init(&dentry->d_lock); + seqcount_init(&dentry->d_seq); dentry->d_inode = NULL; - dentry->d_parent = NULL; - dentry->d_sb = NULL; + dentry->d_parent = dentry; + dentry->d_sb = sb; dentry->d_op = NULL; dentry->d_fsdata = NULL; - dentry->d_mounted = 0; - INIT_HLIST_NODE(&dentry->d_hash); + INIT_HLIST_BL_NODE(&dentry->d_hash); INIT_LIST_HEAD(&dentry->d_lru); INIT_LIST_HEAD(&dentry->d_subdirs); - INIT_LIST_HEAD(&dentry->d_alias); + INIT_HLIST_NODE(&dentry->d_alias); + INIT_LIST_HEAD(&dentry->d_u.d_child); + d_set_d_op(dentry, dentry->d_sb->s_d_op); - if (parent) { - dentry->d_parent = dget(parent); - dentry->d_sb = parent->d_sb; - } else { - INIT_LIST_HEAD(&dentry->d_u.d_child); - } + this_cpu_inc(nr_dentry); - spin_lock(&dcache_lock); - if (parent) - list_add(&dentry->d_u.d_child, &parent->d_subdirs); - dentry_stat.nr_dentry++; - spin_unlock(&dcache_lock); + return dentry; +} + +/** + * d_alloc - allocate a dcache entry + * @parent: parent of entry to allocate + * @name: qstr of the name + * + * Allocates a dentry. It returns %NULL if there is insufficient memory + * available. On a success the dentry is returned. The name passed in is + * copied and the copy passed in may be reused after this call. + */ +struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) +{ + struct dentry *dentry = __d_alloc(parent->d_sb, name); + if (!dentry) + return NULL; + + spin_lock(&parent->d_lock); + /* + * don't need child lock because it is not subject + * to concurrency here + */ + __dget_dlock(parent); + dentry->d_parent = parent; + list_add(&dentry->d_u.d_child, &parent->d_subdirs); + spin_unlock(&parent->d_lock); return dentry; } +EXPORT_SYMBOL(d_alloc); + +/** + * d_alloc_pseudo - allocate a dentry (for lookup-less filesystems) + * @sb: the superblock + * @name: qstr of the name + * + * For a filesystem that just pins its dentries in memory and never + * performs lookups at all, return an unhashed IS_ROOT dentry. + */ +struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name) +{ + return __d_alloc(sb, name); +} +EXPORT_SYMBOL(d_alloc_pseudo); struct dentry *d_alloc_name(struct dentry *parent, const char *name) { @@ -980,12 +1538,71 @@ struct dentry *d_alloc_name(struct dentry *parent, const char *name) } EXPORT_SYMBOL(d_alloc_name); -/* the caller must hold dcache_lock */ +void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op) +{ + WARN_ON_ONCE(dentry->d_op); + WARN_ON_ONCE(dentry->d_flags & (DCACHE_OP_HASH | + DCACHE_OP_COMPARE | + DCACHE_OP_REVALIDATE | + DCACHE_OP_WEAK_REVALIDATE | + DCACHE_OP_DELETE )); + dentry->d_op = op; + if (!op) + return; + if (op->d_hash) + dentry->d_flags |= DCACHE_OP_HASH; + if (op->d_compare) + dentry->d_flags |= DCACHE_OP_COMPARE; + if (op->d_revalidate) + dentry->d_flags |= DCACHE_OP_REVALIDATE; + if (op->d_weak_revalidate) + dentry->d_flags |= DCACHE_OP_WEAK_REVALIDATE; + if (op->d_delete) + dentry->d_flags |= DCACHE_OP_DELETE; + if (op->d_prune) + dentry->d_flags |= DCACHE_OP_PRUNE; + +} +EXPORT_SYMBOL(d_set_d_op); + +static unsigned d_flags_for_inode(struct inode *inode) +{ + unsigned add_flags = DCACHE_FILE_TYPE; + + if (!inode) + return DCACHE_MISS_TYPE; + + if (S_ISDIR(inode->i_mode)) { + add_flags = DCACHE_DIRECTORY_TYPE; + if (unlikely(!(inode->i_opflags & IOP_LOOKUP))) { + if (unlikely(!inode->i_op->lookup)) + add_flags = DCACHE_AUTODIR_TYPE; + else + inode->i_opflags |= IOP_LOOKUP; + } + } else if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) { + if (unlikely(inode->i_op->follow_link)) + add_flags = DCACHE_SYMLINK_TYPE; + else + inode->i_opflags |= IOP_NOFOLLOW; + } + + if (unlikely(IS_AUTOMOUNT(inode))) + add_flags |= DCACHE_NEED_AUTOMOUNT; + return add_flags; +} + static void __d_instantiate(struct dentry *dentry, struct inode *inode) { + unsigned add_flags = d_flags_for_inode(inode); + + spin_lock(&dentry->d_lock); + __d_set_type(dentry, add_flags); if (inode) - list_add(&dentry->d_alias, &inode->i_dentry); + hlist_add_head(&dentry->d_alias, &inode->i_dentry); dentry->d_inode = inode; + dentry_rcuwalk_barrier(dentry); + spin_unlock(&dentry->d_lock); fsnotify_d_instantiate(dentry, inode); } @@ -1006,12 +1623,15 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) void d_instantiate(struct dentry *entry, struct inode * inode) { - BUG_ON(!list_empty(&entry->d_alias)); - spin_lock(&dcache_lock); + BUG_ON(!hlist_unhashed(&entry->d_alias)); + if (inode) + spin_lock(&inode->i_lock); __d_instantiate(entry, inode); - spin_unlock(&dcache_lock); + if (inode) + spin_unlock(&inode->i_lock); security_d_instantiate(entry, inode); } +EXPORT_SYMBOL(d_instantiate); /** * d_instantiate_unique - instantiate a non-aliased dentry @@ -1042,18 +1662,21 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry, return NULL; } - list_for_each_entry(alias, &inode->i_dentry, d_alias) { - struct qstr *qstr = &alias->d_name; - - if (qstr->hash != hash) + hlist_for_each_entry(alias, &inode->i_dentry, d_alias) { + /* + * Don't need alias->d_lock here, because aliases with + * d_parent == entry->d_parent are not subject to name or + * parent changes, because the parent inode i_mutex is held. + */ + if (alias->d_name.hash != hash) continue; if (alias->d_parent != entry->d_parent) continue; - if (qstr->len != len) + if (alias->d_name.len != len) continue; - if (memcmp(qstr->name, name, len)) + if (dentry_cmp(alias, name, len)) continue; - dget_locked(alias); + __dget(alias); return alias; } @@ -1065,11 +1688,13 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) { struct dentry *result; - BUG_ON(!list_empty(&entry->d_alias)); + BUG_ON(!hlist_unhashed(&entry->d_alias)); - spin_lock(&dcache_lock); + if (inode) + spin_lock(&inode->i_lock); result = __d_instantiate_unique(entry, inode); - spin_unlock(&dcache_lock); + if (inode) + spin_unlock(&inode->i_lock); if (!result) { security_d_instantiate(entry, inode); @@ -1084,38 +1709,77 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) EXPORT_SYMBOL(d_instantiate_unique); /** - * d_alloc_root - allocate root dentry - * @root_inode: inode to allocate the root for + * d_instantiate_no_diralias - instantiate a non-aliased dentry + * @entry: dentry to complete + * @inode: inode to attach to this dentry * - * Allocate a root ("/") dentry for the inode given. The inode is - * instantiated and returned. %NULL is returned if there is insufficient - * memory or the inode passed is %NULL. + * Fill in inode information in the entry. If a directory alias is found, then + * return an error (and drop inode). Together with d_materialise_unique() this + * guarantees that a directory inode may never have more than one alias. */ - -struct dentry * d_alloc_root(struct inode * root_inode) +int d_instantiate_no_diralias(struct dentry *entry, struct inode *inode) +{ + BUG_ON(!hlist_unhashed(&entry->d_alias)); + + spin_lock(&inode->i_lock); + if (S_ISDIR(inode->i_mode) && !hlist_empty(&inode->i_dentry)) { + spin_unlock(&inode->i_lock); + iput(inode); + return -EBUSY; + } + __d_instantiate(entry, inode); + spin_unlock(&inode->i_lock); + security_d_instantiate(entry, inode); + + return 0; +} +EXPORT_SYMBOL(d_instantiate_no_diralias); + +struct dentry *d_make_root(struct inode *root_inode) { struct dentry *res = NULL; if (root_inode) { - static const struct qstr name = { .name = "/", .len = 1 }; + static const struct qstr name = QSTR_INIT("/", 1); - res = d_alloc(NULL, &name); - if (res) { - res->d_sb = root_inode->i_sb; - res->d_parent = res; + res = __d_alloc(root_inode->i_sb, &name); + if (res) d_instantiate(res, root_inode); - } + else + iput(root_inode); } return res; } +EXPORT_SYMBOL(d_make_root); -static inline struct hlist_head *d_hash(struct dentry *parent, - unsigned long hash) +static struct dentry * __d_find_any_alias(struct inode *inode) +{ + struct dentry *alias; + + if (hlist_empty(&inode->i_dentry)) + return NULL; + alias = hlist_entry(inode->i_dentry.first, struct dentry, d_alias); + __dget(alias); + return alias; +} + +/** + * d_find_any_alias - find any alias for a given inode + * @inode: inode to find an alias for + * + * If any aliases exist for the given inode, take and return a + * reference for one of them. If no aliases exist, return %NULL. + */ +struct dentry *d_find_any_alias(struct inode *inode) { - hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES; - hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS); - return dentry_hashtable + (hash & D_HASHMASK); + struct dentry *de; + + spin_lock(&inode->i_lock); + de = __d_find_any_alias(inode); + spin_unlock(&inode->i_lock); + return de; } +EXPORT_SYMBOL(d_find_any_alias); /** * d_obtain_alias - find or allocate a dentry for a given inode @@ -1137,48 +1801,53 @@ static inline struct hlist_head *d_hash(struct dentry *parent, */ struct dentry *d_obtain_alias(struct inode *inode) { - static const struct qstr anonstring = { .name = "" }; + static const struct qstr anonstring = QSTR_INIT("/", 1); struct dentry *tmp; struct dentry *res; + unsigned add_flags; if (!inode) return ERR_PTR(-ESTALE); if (IS_ERR(inode)) return ERR_CAST(inode); - res = d_find_alias(inode); + res = d_find_any_alias(inode); if (res) goto out_iput; - tmp = d_alloc(NULL, &anonstring); + tmp = __d_alloc(inode->i_sb, &anonstring); if (!tmp) { res = ERR_PTR(-ENOMEM); goto out_iput; } - tmp->d_parent = tmp; /* make sure dput doesn't croak */ - spin_lock(&dcache_lock); - res = __d_find_alias(inode, 0); + spin_lock(&inode->i_lock); + res = __d_find_any_alias(inode); if (res) { - spin_unlock(&dcache_lock); + spin_unlock(&inode->i_lock); dput(tmp); goto out_iput; } /* attach a disconnected dentry */ + add_flags = d_flags_for_inode(inode) | DCACHE_DISCONNECTED; + spin_lock(&tmp->d_lock); - tmp->d_sb = inode->i_sb; tmp->d_inode = inode; - tmp->d_flags |= DCACHE_DISCONNECTED; - tmp->d_flags &= ~DCACHE_UNHASHED; - list_add(&tmp->d_alias, &inode->i_dentry); - hlist_add_head(&tmp->d_hash, &inode->i_sb->s_anon); + tmp->d_flags |= add_flags; + hlist_add_head(&tmp->d_alias, &inode->i_dentry); + hlist_bl_lock(&tmp->d_sb->s_anon); + hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon); + hlist_bl_unlock(&tmp->d_sb->s_anon); spin_unlock(&tmp->d_lock); + spin_unlock(&inode->i_lock); + security_d_instantiate(tmp, inode); - spin_unlock(&dcache_lock); return tmp; out_iput: + if (res && !IS_ERR(res)) + security_d_instantiate(res, inode); iput(inode); return res; } @@ -1199,32 +1868,42 @@ EXPORT_SYMBOL(d_obtain_alias); * If a dentry was found and moved, then it is returned. Otherwise NULL * is returned. This matches the expected return value of ->lookup. * + * Cluster filesystems may call this function with a negative, hashed dentry. + * In that case, we know that the inode will be a regular file, and also this + * will only occur during atomic_open. So we need to check for the dentry + * being already hashed only in the final case. */ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) { struct dentry *new = NULL; + if (IS_ERR(inode)) + return ERR_CAST(inode); + if (inode && S_ISDIR(inode->i_mode)) { - spin_lock(&dcache_lock); + spin_lock(&inode->i_lock); new = __d_find_alias(inode, 1); if (new) { BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); - spin_unlock(&dcache_lock); + spin_unlock(&inode->i_lock); security_d_instantiate(new, inode); - d_rehash(dentry); d_move(new, dentry); iput(inode); } else { - /* already taking dcache_lock, so d_add() by hand */ + /* already taking inode->i_lock, so d_add() by hand */ __d_instantiate(dentry, inode); - spin_unlock(&dcache_lock); + spin_unlock(&inode->i_lock); security_d_instantiate(dentry, inode); d_rehash(dentry); } - } else - d_add(dentry, inode); + } else { + d_instantiate(dentry, inode); + if (d_unhashed(dentry)) + d_rehash(dentry); + } return new; } +EXPORT_SYMBOL(d_splice_alias); /** * d_add_ci - lookup or allocate new dentry with case-exact name @@ -1245,7 +1924,6 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, struct qstr *name) { - int error; struct dentry *found; struct dentry *new; @@ -1254,10 +1932,12 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, * if not go ahead and create it now. */ found = d_hash_and_lookup(dentry->d_parent, name); + if (unlikely(IS_ERR(found))) + goto err_out; if (!found) { new = d_alloc(dentry->d_parent, name); if (!new) { - error = -ENOMEM; + found = ERR_PTR(-ENOMEM); goto err_out; } @@ -1289,65 +1969,178 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, * Negative dentry: instantiate it unless the inode is a directory and * already has a dentry. */ - spin_lock(&dcache_lock); - if (!S_ISDIR(inode->i_mode) || list_empty(&inode->i_dentry)) { - __d_instantiate(found, inode); - spin_unlock(&dcache_lock); - security_d_instantiate(found, inode); - return found; + new = d_splice_alias(inode, found); + if (new) { + dput(found); + found = new; } - - /* - * In case a directory already has a (disconnected) entry grab a - * reference to it, move it in place and use it. - */ - new = list_entry(inode->i_dentry.next, struct dentry, d_alias); - dget_locked(new); - spin_unlock(&dcache_lock); - security_d_instantiate(found, inode); - d_move(new, found); - iput(inode); - dput(found); - return new; + return found; err_out: iput(inode); - return ERR_PTR(error); + return found; +} +EXPORT_SYMBOL(d_add_ci); + +/* + * Do the slow-case of the dentry name compare. + * + * Unlike the dentry_cmp() function, we need to atomically + * load the name and length information, so that the + * filesystem can rely on them, and can use the 'name' and + * 'len' information without worrying about walking off the + * end of memory etc. + * + * Thus the read_seqcount_retry() and the "duplicate" info + * in arguments (the low-level filesystem should not look + * at the dentry inode or name contents directly, since + * rename can change them while we're in RCU mode). + */ +enum slow_d_compare { + D_COMP_OK, + D_COMP_NOMATCH, + D_COMP_SEQRETRY, +}; + +static noinline enum slow_d_compare slow_dentry_cmp( + const struct dentry *parent, + struct dentry *dentry, + unsigned int seq, + const struct qstr *name) +{ + int tlen = dentry->d_name.len; + const char *tname = dentry->d_name.name; + + if (read_seqcount_retry(&dentry->d_seq, seq)) { + cpu_relax(); + return D_COMP_SEQRETRY; + } + if (parent->d_op->d_compare(parent, dentry, tlen, tname, name)) + return D_COMP_NOMATCH; + return D_COMP_OK; } /** - * d_lookup - search for a dentry + * __d_lookup_rcu - search for a dentry (racy, store-free) * @parent: parent dentry * @name: qstr of name we wish to find + * @seqp: returns d_seq value at the point where the dentry was found + * Returns: dentry, or NULL * - * Searches the children of the parent dentry for the name in question. If - * the dentry is found its reference count is incremented and the dentry - * is returned. The caller must use dput to free the entry when it has - * finished using it. %NULL is returned on failure. + * __d_lookup_rcu is the dcache lookup function for rcu-walk name + * resolution (store-free path walking) design described in + * Documentation/filesystems/path-lookup.txt. * - * __d_lookup is dcache_lock free. The hash list is protected using RCU. - * Memory barriers are used while updating and doing lockless traversal. - * To avoid races with d_move while rename is happening, d_lock is used. + * This is not to be used outside core vfs. * - * Overflows in memcmp(), while d_move, are avoided by keeping the length - * and name pointer in one structure pointed by d_qstr. + * __d_lookup_rcu must only be used in rcu-walk mode, ie. with vfsmount lock + * held, and rcu_read_lock held. The returned dentry must not be stored into + * without taking d_lock and checking d_seq sequence count against @seq + * returned here. * - * rcu_read_lock() and rcu_read_unlock() are used to disable preemption while - * lookup is going on. + * A refcount may be taken on the found dentry with the d_rcu_to_refcount + * function. * - * The dentry unused LRU is not updated even if lookup finds the required dentry - * in there. It is updated in places such as prune_dcache, shrink_dcache_sb, - * select_parent and __dget_locked. This laziness saves lookup from dcache_lock - * acquisition. + * Alternatively, __d_lookup_rcu may be called again to look up the child of + * the returned dentry, so long as its parent's seqlock is checked after the + * child is looked up. Thus, an interlocking stepping of sequence lock checks + * is formed, giving integrity down the path walk. * - * d_lookup() is protected against the concurrent renames in some unrelated - * directory using the seqlockt_t rename_lock. + * NOTE! The caller *has* to check the resulting dentry against the sequence + * number we've returned before using any of the resulting dentry state! */ +struct dentry *__d_lookup_rcu(const struct dentry *parent, + const struct qstr *name, + unsigned *seqp) +{ + u64 hashlen = name->hash_len; + const unsigned char *str = name->name; + struct hlist_bl_head *b = d_hash(parent, hashlen_hash(hashlen)); + struct hlist_bl_node *node; + struct dentry *dentry; + + /* + * Note: There is significant duplication with __d_lookup_rcu which is + * required to prevent single threaded performance regressions + * especially on architectures where smp_rmb (in seqcounts) are costly. + * Keep the two functions in sync. + */ -struct dentry * d_lookup(struct dentry * parent, struct qstr * name) + /* + * The hash list is protected using RCU. + * + * Carefully use d_seq when comparing a candidate dentry, to avoid + * races with d_move(). + * + * It is possible that concurrent renames can mess up our list + * walk here and result in missing our dentry, resulting in the + * false-negative result. d_lookup() protects against concurrent + * renames using rename_lock seqlock. + * + * See Documentation/filesystems/path-lookup.txt for more details. + */ + hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) { + unsigned seq; + +seqretry: + /* + * The dentry sequence count protects us from concurrent + * renames, and thus protects parent and name fields. + * + * The caller must perform a seqcount check in order + * to do anything useful with the returned dentry. + * + * NOTE! We do a "raw" seqcount_begin here. That means that + * we don't wait for the sequence count to stabilize if it + * is in the middle of a sequence change. If we do the slow + * dentry compare, we will do seqretries until it is stable, + * and if we end up with a successful lookup, we actually + * want to exit RCU lookup anyway. + */ + seq = raw_seqcount_begin(&dentry->d_seq); + if (dentry->d_parent != parent) + continue; + if (d_unhashed(dentry)) + continue; + + if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) { + if (dentry->d_name.hash != hashlen_hash(hashlen)) + continue; + *seqp = seq; + switch (slow_dentry_cmp(parent, dentry, seq, name)) { + case D_COMP_OK: + return dentry; + case D_COMP_NOMATCH: + continue; + default: + goto seqretry; + } + } + + if (dentry->d_name.hash_len != hashlen) + continue; + *seqp = seq; + if (!dentry_cmp(dentry, str, hashlen_len(hashlen))) + return dentry; + } + return NULL; +} + +/** + * d_lookup - search for a dentry + * @parent: parent dentry + * @name: qstr of name we wish to find + * Returns: dentry, or NULL + * + * d_lookup searches the children of the parent dentry for the name in + * question. If the dentry is found its reference count is incremented and the + * dentry is returned. The caller must use dput to free the entry when it has + * finished using it. %NULL is returned if the dentry does not exist. + */ +struct dentry *d_lookup(const struct dentry *parent, const struct qstr *name) { - struct dentry * dentry = NULL; - unsigned long seq; + struct dentry *dentry; + unsigned seq; do { seq = read_seqbegin(&rename_lock); @@ -1357,38 +2150,63 @@ struct dentry * d_lookup(struct dentry * parent, struct qstr * name) } while (read_seqretry(&rename_lock, seq)); return dentry; } +EXPORT_SYMBOL(d_lookup); -struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) +/** + * __d_lookup - search for a dentry (racy) + * @parent: parent dentry + * @name: qstr of name we wish to find + * Returns: dentry, or NULL + * + * __d_lookup is like d_lookup, however it may (rarely) return a + * false-negative result due to unrelated rename activity. + * + * __d_lookup is slightly faster by avoiding rename_lock read seqlock, + * however it must be used carefully, eg. with a following d_lookup in + * the case of failure. + * + * __d_lookup callers must be commented. + */ +struct dentry *__d_lookup(const struct dentry *parent, const struct qstr *name) { unsigned int len = name->len; unsigned int hash = name->hash; const unsigned char *str = name->name; - struct hlist_head *head = d_hash(parent,hash); + struct hlist_bl_head *b = d_hash(parent, hash); + struct hlist_bl_node *node; struct dentry *found = NULL; - struct hlist_node *node; struct dentry *dentry; + /* + * Note: There is significant duplication with __d_lookup_rcu which is + * required to prevent single threaded performance regressions + * especially on architectures where smp_rmb (in seqcounts) are costly. + * Keep the two functions in sync. + */ + + /* + * The hash list is protected using RCU. + * + * Take d_lock when comparing a candidate dentry, to avoid races + * with d_move(). + * + * It is possible that concurrent renames can mess up our list + * walk here and result in missing our dentry, resulting in the + * false-negative result. d_lookup() protects against concurrent + * renames using rename_lock seqlock. + * + * See Documentation/filesystems/path-lookup.txt for more details. + */ rcu_read_lock(); - hlist_for_each_entry_rcu(dentry, node, head, d_hash) { - struct qstr *qstr; + hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) { if (dentry->d_name.hash != hash) continue; - if (dentry->d_parent != parent) - continue; spin_lock(&dentry->d_lock); - - /* - * Recheck the dentry after taking the lock - d_move may have - * changed things. Don't bother checking the hash because we're - * about to compare the whole name anyway. - */ if (dentry->d_parent != parent) goto next; - - /* non-existing due to RCU? */ if (d_unhashed(dentry)) goto next; @@ -1396,18 +2214,19 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) * It is safe to compare names since d_move() cannot * change the qstr (protected by d_lock). */ - qstr = &dentry->d_name; - if (parent->d_op && parent->d_op->d_compare) { - if (parent->d_op->d_compare(parent, qstr, name)) + if (parent->d_flags & DCACHE_OP_COMPARE) { + int tlen = dentry->d_name.len; + const char *tname = dentry->d_name.name; + if (parent->d_op->d_compare(parent, dentry, tlen, tname, name)) goto next; } else { - if (qstr->len != len) + if (dentry->d_name.len != len) goto next; - if (memcmp(qstr->name, str, len)) + if (dentry_cmp(dentry, str, len)) goto next; } - atomic_inc(&dentry->d_count); + dentry->d_lockref.count++; found = dentry; spin_unlock(&dentry->d_lock); break; @@ -1424,65 +2243,55 @@ next: * @dir: Directory to search in * @name: qstr of name we wish to find * - * On hash failure or on lookup failure NULL is returned. + * On lookup failure NULL is returned; on bad name - ERR_PTR(-error) */ struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name) { - struct dentry *dentry = NULL; - /* * Check for a fs-specific hash function. Note that we must * calculate the standard hash first, as the d_op->d_hash() * routine may choose to leave the hash value unchanged. */ name->hash = full_name_hash(name->name, name->len); - if (dir->d_op && dir->d_op->d_hash) { - if (dir->d_op->d_hash(dir, name) < 0) - goto out; + if (dir->d_flags & DCACHE_OP_HASH) { + int err = dir->d_op->d_hash(dir, name); + if (unlikely(err < 0)) + return ERR_PTR(err); } - dentry = d_lookup(dir, name); -out: - return dentry; + return d_lookup(dir, name); } +EXPORT_SYMBOL(d_hash_and_lookup); /** - * d_validate - verify dentry provided from insecure source + * d_validate - verify dentry provided from insecure source (deprecated) * @dentry: The dentry alleged to be valid child of @dparent * @dparent: The parent dentry (known to be valid) * * An insecure source has sent us a dentry, here we verify it and dget() it. * This is used by ncpfs in its readdir implementation. * Zero is returned in the dentry is invalid. + * + * This function is slow for big directories, and deprecated, do not use it. */ - int d_validate(struct dentry *dentry, struct dentry *dparent) { - struct hlist_head *base; - struct hlist_node *lhp; - - /* Check whether the ptr might be valid at all.. */ - if (!kmem_ptr_validate(dentry_cache, dentry)) - goto out; + struct dentry *child; - if (dentry->d_parent != dparent) - goto out; - - spin_lock(&dcache_lock); - base = d_hash(dparent, dentry->d_name.hash); - hlist_for_each(lhp,base) { - /* hlist_for_each_entry_rcu() not required for d_hash list - * as it is parsed under dcache_lock - */ - if (dentry == hlist_entry(lhp, struct dentry, d_hash)) { - __dget_locked(dentry); - spin_unlock(&dcache_lock); + spin_lock(&dparent->d_lock); + list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) { + if (dentry == child) { + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); + __dget_dlock(dentry); + spin_unlock(&dentry->d_lock); + spin_unlock(&dparent->d_lock); return 1; } } - spin_unlock(&dcache_lock); -out: + spin_unlock(&dparent->d_lock); + return 0; } +EXPORT_SYMBOL(d_validate); /* * When a file is deleted, we have two options: @@ -1507,15 +2316,23 @@ out: void d_delete(struct dentry * dentry) { + struct inode *inode; int isdir = 0; /* * Are we the only user? */ - spin_lock(&dcache_lock); +again: spin_lock(&dentry->d_lock); - isdir = S_ISDIR(dentry->d_inode->i_mode); - if (atomic_read(&dentry->d_count) == 1) { - dentry_iput(dentry); + inode = dentry->d_inode; + isdir = S_ISDIR(inode->i_mode); + if (dentry->d_lockref.count == 1) { + if (!spin_trylock(&inode->i_lock)) { + spin_unlock(&dentry->d_lock); + cpu_relax(); + goto again; + } + dentry->d_flags &= ~DCACHE_CANT_MOUNT; + dentry_unlink_inode(dentry); fsnotify_nameremove(dentry, isdir); return; } @@ -1524,16 +2341,18 @@ void d_delete(struct dentry * dentry) __d_drop(dentry); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); fsnotify_nameremove(dentry, isdir); } +EXPORT_SYMBOL(d_delete); -static void __d_rehash(struct dentry * entry, struct hlist_head *list) +static void __d_rehash(struct dentry * entry, struct hlist_bl_head *b) { - - entry->d_flags &= ~DCACHE_UNHASHED; - hlist_add_head_rcu(&entry->d_hash, list); + BUG_ON(!d_unhashed(entry)); + hlist_bl_lock(b); + entry->d_flags |= DCACHE_RCUACCESS; + hlist_bl_add_head_rcu(&entry->d_hash, b); + hlist_bl_unlock(b); } static void _d_rehash(struct dentry * entry) @@ -1550,24 +2369,39 @@ static void _d_rehash(struct dentry * entry) void d_rehash(struct dentry * entry) { - spin_lock(&dcache_lock); spin_lock(&entry->d_lock); _d_rehash(entry); spin_unlock(&entry->d_lock); - spin_unlock(&dcache_lock); } +EXPORT_SYMBOL(d_rehash); -/* - * When switching names, the actual string doesn't strictly have to - * be preserved in the target - because we're dropping the target - * anyway. As such, we can just do a simple memcpy() to copy over - * the new name before we switch. +/** + * dentry_update_name_case - update case insensitive dentry with a new name + * @dentry: dentry to be updated + * @name: new name * - * Note that we have to be a lot more careful about getting the hash - * switched - we have to switch the hash value properly even if it - * then no longer matches the actual (corrupted) string of the target. - * The hash value has to match the hash queue that the dentry is on.. + * Update a case insensitive dentry with new case of name. + * + * dentry must have been returned by d_lookup with name @name. Old and new + * name lengths must match (ie. no d_compare which allows mismatched name + * lengths). + * + * Parent inode i_mutex must be held over d_lookup and into this call (to + * keep renames and concurrent inserts, and readdir(2) away). */ +void dentry_update_name_case(struct dentry *dentry, struct qstr *name) +{ + BUG_ON(!mutex_is_locked(&dentry->d_parent->d_inode->i_mutex)); + BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */ + + spin_lock(&dentry->d_lock); + write_seqcount_begin(&dentry->d_seq); + memcpy((unsigned char *)dentry->d_name.name, name->name, name->len); + write_seqcount_end(&dentry->d_seq); + spin_unlock(&dentry->d_lock); +} +EXPORT_SYMBOL(dentry_update_name_case); + static void switch_names(struct dentry *dentry, struct dentry *target) { if (dname_external(target)) { @@ -1598,68 +2432,109 @@ static void switch_names(struct dentry *dentry, struct dentry *target) dentry->d_name.name = dentry->d_iname; } else { /* - * Both are internal. Just copy target to dentry + * Both are internal. */ - memcpy(dentry->d_iname, target->d_name.name, - target->d_name.len + 1); - dentry->d_name.len = target->d_name.len; - return; + unsigned int i; + BUILD_BUG_ON(!IS_ALIGNED(DNAME_INLINE_LEN, sizeof(long))); + for (i = 0; i < DNAME_INLINE_LEN / sizeof(long); i++) { + swap(((long *) &dentry->d_iname)[i], + ((long *) &target->d_iname)[i]); + } } } swap(dentry->d_name.len, target->d_name.len); } +static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target) +{ + /* + * XXXX: do we really need to take target->d_lock? + */ + if (IS_ROOT(dentry) || dentry->d_parent == target->d_parent) + spin_lock(&target->d_parent->d_lock); + else { + if (d_ancestor(dentry->d_parent, target->d_parent)) { + spin_lock(&dentry->d_parent->d_lock); + spin_lock_nested(&target->d_parent->d_lock, + DENTRY_D_LOCK_NESTED); + } else { + spin_lock(&target->d_parent->d_lock); + spin_lock_nested(&dentry->d_parent->d_lock, + DENTRY_D_LOCK_NESTED); + } + } + if (target < dentry) { + spin_lock_nested(&target->d_lock, 2); + spin_lock_nested(&dentry->d_lock, 3); + } else { + spin_lock_nested(&dentry->d_lock, 2); + spin_lock_nested(&target->d_lock, 3); + } +} + +static void dentry_unlock_parents_for_move(struct dentry *dentry, + struct dentry *target) +{ + if (target->d_parent != dentry->d_parent) + spin_unlock(&dentry->d_parent->d_lock); + if (target->d_parent != target) + spin_unlock(&target->d_parent->d_lock); +} + /* - * We cannibalize "target" when moving dentry on top of it, - * because it's going to be thrown away anyway. We could be more - * polite about it, though. + * When switching names, the actual string doesn't strictly have to + * be preserved in the target - because we're dropping the target + * anyway. As such, we can just do a simple memcpy() to copy over + * the new name before we switch. * - * This forceful removal will result in ugly /proc output if - * somebody holds a file open that got deleted due to a rename. - * We could be nicer about the deleted file, and let it show - * up under the name it had before it was deleted rather than - * under the original name of the file that was moved on top of it. + * Note that we have to be a lot more careful about getting the hash + * switched - we have to switch the hash value properly even if it + * then no longer matches the actual (corrupted) string of the target. + * The hash value has to match the hash queue that the dentry is on.. */ - /* - * d_move_locked - move a dentry + * __d_move - move a dentry * @dentry: entry to move * @target: new dentry + * @exchange: exchange the two dentries * * Update the dcache to reflect the move of a file name. Negative - * dcache entries should not be moved in this way. + * dcache entries should not be moved in this way. Caller must hold + * rename_lock, the i_mutex of the source and target directories, + * and the sb->s_vfs_rename_mutex if they differ. See lock_rename(). */ -static void d_move_locked(struct dentry * dentry, struct dentry * target) +static void __d_move(struct dentry *dentry, struct dentry *target, + bool exchange) { - struct hlist_head *list; - if (!dentry->d_inode) printk(KERN_WARNING "VFS: moving negative dcache entry\n"); - write_seqlock(&rename_lock); - /* - * XXXX: do we really need to take target->d_lock? - */ - if (target < dentry) { - spin_lock(&target->d_lock); - spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); - } else { - spin_lock(&dentry->d_lock); - spin_lock_nested(&target->d_lock, DENTRY_D_LOCK_NESTED); - } + BUG_ON(d_ancestor(dentry, target)); + BUG_ON(d_ancestor(target, dentry)); + + dentry_lock_for_move(dentry, target); - /* Move the dentry to the target hash queue, if on different bucket */ - if (d_unhashed(dentry)) - goto already_unhashed; + write_seqcount_begin(&dentry->d_seq); + write_seqcount_begin_nested(&target->d_seq, DENTRY_D_LOCK_NESTED); - hlist_del_rcu(&dentry->d_hash); + /* __d_drop does write_seqcount_barrier, but they're OK to nest. */ -already_unhashed: - list = d_hash(target->d_parent, target->d_name.hash); - __d_rehash(dentry, list); + /* + * Move the dentry to the target hash queue. Don't bother checking + * for the same hash queue because of how unlikely it is. + */ + __d_drop(dentry); + __d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash)); - /* Unhash the target: dput() will then get rid of it */ + /* + * Unhash the target (d_delete() is not usable here). If exchanging + * the two dentries, then rehash onto the other's hash queue. + */ __d_drop(target); + if (exchange) { + __d_rehash(target, + d_hash(dentry->d_parent, dentry->d_name.hash)); + } list_del(&dentry->d_u.d_child); list_del(&target->d_u.d_child); @@ -1681,26 +2556,52 @@ already_unhashed: } list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); + + write_seqcount_end(&target->d_seq); + write_seqcount_end(&dentry->d_seq); + + dentry_unlock_parents_for_move(dentry, target); + if (exchange) + fsnotify_d_move(target); spin_unlock(&target->d_lock); fsnotify_d_move(dentry); spin_unlock(&dentry->d_lock); - write_sequnlock(&rename_lock); } -/** +/* * d_move - move a dentry * @dentry: entry to move * @target: new dentry * * Update the dcache to reflect the move of a file name. Negative - * dcache entries should not be moved in this way. + * dcache entries should not be moved in this way. See the locking + * requirements for __d_move. */ +void d_move(struct dentry *dentry, struct dentry *target) +{ + write_seqlock(&rename_lock); + __d_move(dentry, target, false); + write_sequnlock(&rename_lock); +} +EXPORT_SYMBOL(d_move); -void d_move(struct dentry * dentry, struct dentry * target) +/* + * d_exchange - exchange two dentries + * @dentry1: first dentry + * @dentry2: second dentry + */ +void d_exchange(struct dentry *dentry1, struct dentry *dentry2) { - spin_lock(&dcache_lock); - d_move_locked(dentry, target); - spin_unlock(&dcache_lock); + write_seqlock(&rename_lock); + + WARN_ON(!dentry1->d_inode); + WARN_ON(!dentry2->d_inode); + WARN_ON(IS_ROOT(dentry1)); + WARN_ON(IS_ROOT(dentry2)); + + __d_move(dentry1, dentry2, true); + + write_sequnlock(&rename_lock); } /** @@ -1726,28 +2627,22 @@ struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2) * This helper attempts to cope with remotely renamed directories * * It assumes that the caller is already holding - * dentry->d_parent->d_inode->i_mutex and the dcache_lock + * dentry->d_parent->d_inode->i_mutex, inode->i_lock and rename_lock * * Note: If ever the locking in lock_rename() changes, then please * remember to update this too... */ -static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias) - __releases(dcache_lock) +static struct dentry *__d_unalias(struct inode *inode, + struct dentry *dentry, struct dentry *alias) { struct mutex *m1 = NULL, *m2 = NULL; - struct dentry *ret; + struct dentry *ret = ERR_PTR(-EBUSY); /* If alias and dentry share a parent, then no extra locks required */ if (alias->d_parent == dentry->d_parent) goto out_unalias; - /* Check for loops */ - ret = ERR_PTR(-ELOOP); - if (d_ancestor(alias, dentry)) - goto out_err; - /* See lock_rename() */ - ret = ERR_PTR(-EBUSY); if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex)) goto out_err; m1 = &dentry->d_sb->s_vfs_rename_mutex; @@ -1755,10 +2650,12 @@ static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias) goto out_err; m2 = &alias->d_parent->d_inode->i_mutex; out_unalias: - d_move_locked(alias, dentry); - ret = alias; + if (likely(!d_mountpoint(alias))) { + __d_move(alias, dentry, false); + ret = alias; + } out_err: - spin_unlock(&dcache_lock); + spin_unlock(&inode->i_lock); if (m2) mutex_unlock(m2); if (m1) @@ -1769,32 +2666,34 @@ out_err: /* * Prepare an anonymous dentry for life in the superblock's dentry tree as a * named dentry in place of the dentry to be replaced. + * returns with anon->d_lock held! */ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) { - struct dentry *dparent, *aparent; + struct dentry *dparent; + + dentry_lock_for_move(anon, dentry); + + write_seqcount_begin(&dentry->d_seq); + write_seqcount_begin_nested(&anon->d_seq, DENTRY_D_LOCK_NESTED); + + dparent = dentry->d_parent; switch_names(dentry, anon); swap(dentry->d_name.hash, anon->d_name.hash); - dparent = dentry->d_parent; - aparent = anon->d_parent; + dentry->d_parent = dentry; + list_del_init(&dentry->d_u.d_child); + anon->d_parent = dparent; + list_move(&anon->d_u.d_child, &dparent->d_subdirs); - dentry->d_parent = (aparent == anon) ? dentry : aparent; - list_del(&dentry->d_u.d_child); - if (!IS_ROOT(dentry)) - list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); - else - INIT_LIST_HEAD(&dentry->d_u.d_child); + write_seqcount_end(&dentry->d_seq); + write_seqcount_end(&anon->d_seq); - anon->d_parent = (dparent == dentry) ? anon : dparent; - list_del(&anon->d_u.d_child); - if (!IS_ROOT(anon)) - list_add(&anon->d_u.d_child, &anon->d_parent->d_subdirs); - else - INIT_LIST_HEAD(&anon->d_u.d_child); + dentry_unlock_parents_for_move(anon, dentry); + spin_unlock(&dentry->d_lock); - anon->d_flags &= ~DCACHE_DISCONNECTED; + /* anon->d_lock still locked, returns locked */ } /** @@ -1803,7 +2702,8 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) * @inode: inode to bind to the dentry, to which aliases may be attached * * Introduces an dentry into the tree, substituting an extant disconnected - * root directory alias in its place if there is one + * root directory alias in its place if there is one. Caller must hold the + * i_mutex of the parent directory. */ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) { @@ -1811,14 +2711,15 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) BUG_ON(!d_unhashed(dentry)); - spin_lock(&dcache_lock); - if (!inode) { actual = dentry; __d_instantiate(dentry, NULL); - goto found_lock; + d_rehash(actual); + goto out_nolock; } + spin_lock(&inode->i_lock); + if (S_ISDIR(inode->i_mode)) { struct dentry *alias; @@ -1826,18 +2727,35 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) alias = __d_find_alias(inode, 0); if (alias) { actual = alias; - /* Is this an anonymous mountpoint that we could splice - * into our tree? */ - if (IS_ROOT(alias)) { - spin_lock(&alias->d_lock); + write_seqlock(&rename_lock); + + if (d_ancestor(alias, dentry)) { + /* Check for loops */ + actual = ERR_PTR(-ELOOP); + spin_unlock(&inode->i_lock); + } else if (IS_ROOT(alias)) { + /* Is this an anonymous mountpoint that we + * could splice into our tree? */ __d_materialise_dentry(dentry, alias); + write_sequnlock(&rename_lock); __d_drop(alias); goto found; + } else { + /* Nope, but we must(!) avoid directory + * aliasing. This drops inode->i_lock */ + actual = __d_unalias(inode, dentry, alias); } - /* Nope, but we must(!) avoid directory aliasing */ - actual = __d_unalias(dentry, alias); - if (IS_ERR(actual)) + write_sequnlock(&rename_lock); + if (IS_ERR(actual)) { + if (PTR_ERR(actual) == -ELOOP) + pr_warn_ratelimited( + "VFS: Lookup of '%s' in %s %s" + " would have caused loop\n", + dentry->d_name.name, + inode->i_sb->s_type->name, + inode->i_sb->s_id); dput(alias); + } goto out_nolock; } } @@ -1846,15 +2764,14 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) actual = __d_instantiate_unique(dentry, inode); if (!actual) actual = dentry; - else if (unlikely(!d_unhashed(actual))) - goto shouldnt_be_hashed; + else + BUG_ON(!d_unhashed(actual)); -found_lock: spin_lock(&actual->d_lock); found: _d_rehash(actual); spin_unlock(&actual->d_lock); - spin_unlock(&dcache_lock); + spin_unlock(&inode->i_lock); out_nolock: if (actual == dentry) { security_d_instantiate(dentry, inode); @@ -1863,11 +2780,8 @@ out_nolock: iput(inode); return actual; - -shouldnt_be_hashed: - spin_unlock(&dcache_lock); - BUG(); } +EXPORT_SYMBOL_GPL(d_materialise_unique); static int prepend(char **buffer, int *buflen, const char *str, int namelen) { @@ -1879,87 +2793,225 @@ static int prepend(char **buffer, int *buflen, const char *str, int namelen) return 0; } +/** + * prepend_name - prepend a pathname in front of current buffer pointer + * @buffer: buffer pointer + * @buflen: allocated length of the buffer + * @name: name string and length qstr structure + * + * With RCU path tracing, it may race with d_move(). Use ACCESS_ONCE() to + * make sure that either the old or the new name pointer and length are + * fetched. However, there may be mismatch between length and pointer. + * The length cannot be trusted, we need to copy it byte-by-byte until + * the length is reached or a null byte is found. It also prepends "/" at + * the beginning of the name. The sequence number check at the caller will + * retry it again when a d_move() does happen. So any garbage in the buffer + * due to mismatched pointer and length will be discarded. + */ static int prepend_name(char **buffer, int *buflen, struct qstr *name) { - return prepend(buffer, buflen, name->name, name->len); + const char *dname = ACCESS_ONCE(name->name); + u32 dlen = ACCESS_ONCE(name->len); + char *p; + + *buflen -= dlen + 1; + if (*buflen < 0) + return -ENAMETOOLONG; + p = *buffer -= dlen + 1; + *p++ = '/'; + while (dlen--) { + char c = *dname++; + if (!c) + break; + *p++ = c; + } + return 0; +} + +/** + * prepend_path - Prepend path string to a buffer + * @path: the dentry/vfsmount to report + * @root: root vfsmnt/dentry + * @buffer: pointer to the end of the buffer + * @buflen: pointer to buffer length + * + * The function will first try to write out the pathname without taking any + * lock other than the RCU read lock to make sure that dentries won't go away. + * It only checks the sequence number of the global rename_lock as any change + * in the dentry's d_seq will be preceded by changes in the rename_lock + * sequence number. If the sequence number had been changed, it will restart + * the whole pathname back-tracing sequence again by taking the rename_lock. + * In this case, there is no need to take the RCU read lock as the recursive + * parent pointer references will keep the dentry chain alive as long as no + * rename operation is performed. + */ +static int prepend_path(const struct path *path, + const struct path *root, + char **buffer, int *buflen) +{ + struct dentry *dentry; + struct vfsmount *vfsmnt; + struct mount *mnt; + int error = 0; + unsigned seq, m_seq = 0; + char *bptr; + int blen; + + rcu_read_lock(); +restart_mnt: + read_seqbegin_or_lock(&mount_lock, &m_seq); + seq = 0; + rcu_read_lock(); +restart: + bptr = *buffer; + blen = *buflen; + error = 0; + dentry = path->dentry; + vfsmnt = path->mnt; + mnt = real_mount(vfsmnt); + read_seqbegin_or_lock(&rename_lock, &seq); + while (dentry != root->dentry || vfsmnt != root->mnt) { + struct dentry * parent; + + if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { + struct mount *parent = ACCESS_ONCE(mnt->mnt_parent); + /* Global root? */ + if (mnt != parent) { + dentry = ACCESS_ONCE(mnt->mnt_mountpoint); + mnt = parent; + vfsmnt = &mnt->mnt; + continue; + } + /* + * Filesystems needing to implement special "root names" + * should do so with ->d_dname() + */ + if (IS_ROOT(dentry) && + (dentry->d_name.len != 1 || + dentry->d_name.name[0] != '/')) { + WARN(1, "Root dentry has weird name <%.*s>\n", + (int) dentry->d_name.len, + dentry->d_name.name); + } + if (!error) + error = is_mounted(vfsmnt) ? 1 : 2; + break; + } + parent = dentry->d_parent; + prefetch(parent); + error = prepend_name(&bptr, &blen, &dentry->d_name); + if (error) + break; + + dentry = parent; + } + if (!(seq & 1)) + rcu_read_unlock(); + if (need_seqretry(&rename_lock, seq)) { + seq = 1; + goto restart; + } + done_seqretry(&rename_lock, seq); + + if (!(m_seq & 1)) + rcu_read_unlock(); + if (need_seqretry(&mount_lock, m_seq)) { + m_seq = 1; + goto restart_mnt; + } + done_seqretry(&mount_lock, m_seq); + + if (error >= 0 && bptr == *buffer) { + if (--blen < 0) + error = -ENAMETOOLONG; + else + *--bptr = '/'; + } + *buffer = bptr; + *buflen = blen; + return error; } /** * __d_path - return the path of a dentry * @path: the dentry/vfsmount to report - * @root: root vfsmnt/dentry (may be modified by this function) - * @buffer: buffer to return value in + * @root: root vfsmnt/dentry + * @buf: buffer to return value in * @buflen: buffer length * - * Convert a dentry into an ASCII path name. If the entry has been deleted - * the string " (deleted)" is appended. Note that this is ambiguous. + * Convert a dentry into an ASCII path name. * * Returns a pointer into the buffer or an error code if the * path was too long. * - * "buflen" should be positive. Caller holds the dcache_lock. + * "buflen" should be positive. * - * If path is not reachable from the supplied root, then the value of - * root is changed (without modifying refcounts). + * If the path is not reachable from the supplied root, return %NULL. */ -char *__d_path(const struct path *path, struct path *root, - char *buffer, int buflen) +char *__d_path(const struct path *path, + const struct path *root, + char *buf, int buflen) { - struct dentry *dentry = path->dentry; - struct vfsmount *vfsmnt = path->mnt; - char *end = buffer + buflen; - char *retval; + char *res = buf + buflen; + int error; - spin_lock(&vfsmount_lock); - prepend(&end, &buflen, "\0", 1); - if (d_unlinked(dentry) && - (prepend(&end, &buflen, " (deleted)", 10) != 0)) - goto Elong; + prepend(&res, &buflen, "\0", 1); + error = prepend_path(path, root, &res, &buflen); - if (buflen < 1) - goto Elong; - /* Get '/' right */ - retval = end-1; - *retval = '/'; + if (error < 0) + return ERR_PTR(error); + if (error > 0) + return NULL; + return res; +} - for (;;) { - struct dentry * parent; +char *d_absolute_path(const struct path *path, + char *buf, int buflen) +{ + struct path root = {}; + char *res = buf + buflen; + int error; - if (dentry == root->dentry && vfsmnt == root->mnt) - break; - if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { - /* Global root? */ - if (vfsmnt->mnt_parent == vfsmnt) { - goto global_root; - } - dentry = vfsmnt->mnt_mountpoint; - vfsmnt = vfsmnt->mnt_parent; - continue; - } - parent = dentry->d_parent; - prefetch(parent); - if ((prepend_name(&end, &buflen, &dentry->d_name) != 0) || - (prepend(&end, &buflen, "/", 1) != 0)) - goto Elong; - retval = end; - dentry = parent; + prepend(&res, &buflen, "\0", 1); + error = prepend_path(path, &root, &res, &buflen); + + if (error > 1) + error = -EINVAL; + if (error < 0) + return ERR_PTR(error); + return res; +} + +/* + * same as __d_path but appends "(deleted)" for unlinked files. + */ +static int path_with_deleted(const struct path *path, + const struct path *root, + char **buf, int *buflen) +{ + prepend(buf, buflen, "\0", 1); + if (d_unlinked(path->dentry)) { + int error = prepend(buf, buflen, " (deleted)", 10); + if (error) + return error; } -out: - spin_unlock(&vfsmount_lock); - return retval; + return prepend_path(path, root, buf, buflen); +} -global_root: - retval += 1; /* hit the slash */ - if (prepend_name(&retval, &buflen, &dentry->d_name) != 0) - goto Elong; - root->mnt = vfsmnt; - root->dentry = dentry; - goto out; +static int prepend_unreachable(char **buffer, int *buflen) +{ + return prepend(buffer, buflen, "(unreachable)", 13); +} -Elong: - retval = ERR_PTR(-ENAMETOOLONG); - goto out; +static void get_fs_root_rcu(struct fs_struct *fs, struct path *root) +{ + unsigned seq; + + do { + seq = read_seqcount_begin(&fs->seq); + *root = fs->root; + } while (read_seqcount_retry(&fs->seq, seq)); } /** @@ -1980,9 +3032,9 @@ Elong: */ char *d_path(const struct path *path, char *buf, int buflen) { - char *res; + char *res = buf + buflen; struct path root; - struct path tmp; + int error; /* * We have various synthetic filesystems that never get mounted. On @@ -1990,21 +3042,25 @@ char *d_path(const struct path *path, char *buf, int buflen) * thus don't need to be hashed. They also don't need a name until a * user wants to identify the object in /proc/pid/fd/. The little hack * below allows us to generate a name for these objects on demand: + * + * Some pseudo inodes are mountable. When they are mounted + * path->dentry == path->mnt->mnt_root. In that case don't call d_dname + * and instead have d_path return the mounted path. */ - if (path->dentry->d_op && path->dentry->d_op->d_dname) + if (path->dentry->d_op && path->dentry->d_op->d_dname && + (!IS_ROOT(path->dentry) || path->dentry != path->mnt->mnt_root)) return path->dentry->d_op->d_dname(path->dentry, buf, buflen); - read_lock(¤t->fs->lock); - root = current->fs->root; - path_get(&root); - read_unlock(¤t->fs->lock); - spin_lock(&dcache_lock); - tmp = root; - res = __d_path(path, &tmp, buf, buflen); - spin_unlock(&dcache_lock); - path_put(&root); + rcu_read_lock(); + get_fs_root_rcu(current->fs, &root); + error = path_with_deleted(path, &root, &res, &buflen); + rcu_read_unlock(); + + if (error < 0) + res = ERR_PTR(error); return res; } +EXPORT_SYMBOL(d_path); /* * Helper function for dentry_operations.d_dname() members @@ -2027,43 +3083,103 @@ char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen, return memcpy(buffer, temp, sz); } +char *simple_dname(struct dentry *dentry, char *buffer, int buflen) +{ + char *end = buffer + buflen; + /* these dentries are never renamed, so d_lock is not needed */ + if (prepend(&end, &buflen, " (deleted)", 11) || + prepend(&end, &buflen, dentry->d_name.name, dentry->d_name.len) || + prepend(&end, &buflen, "/", 1)) + end = ERR_PTR(-ENAMETOOLONG); + return end; +} +EXPORT_SYMBOL(simple_dname); + /* * Write full pathname from the root of the filesystem into the buffer. */ -char *dentry_path(struct dentry *dentry, char *buf, int buflen) +static char *__dentry_path(struct dentry *d, char *buf, int buflen) { - char *end = buf + buflen; - char *retval; + struct dentry *dentry; + char *end, *retval; + int len, seq = 0; + int error = 0; - spin_lock(&dcache_lock); - prepend(&end, &buflen, "\0", 1); - if (d_unlinked(dentry) && - (prepend(&end, &buflen, "//deleted", 9) != 0)) - goto Elong; - if (buflen < 1) + if (buflen < 2) goto Elong; + + rcu_read_lock(); +restart: + dentry = d; + end = buf + buflen; + len = buflen; + prepend(&end, &len, "\0", 1); /* Get '/' right */ retval = end-1; *retval = '/'; - + read_seqbegin_or_lock(&rename_lock, &seq); while (!IS_ROOT(dentry)) { struct dentry *parent = dentry->d_parent; prefetch(parent); - if ((prepend_name(&end, &buflen, &dentry->d_name) != 0) || - (prepend(&end, &buflen, "/", 1) != 0)) - goto Elong; + error = prepend_name(&end, &len, &dentry->d_name); + if (error) + break; retval = end; dentry = parent; } - spin_unlock(&dcache_lock); + if (!(seq & 1)) + rcu_read_unlock(); + if (need_seqretry(&rename_lock, seq)) { + seq = 1; + goto restart; + } + done_seqretry(&rename_lock, seq); + if (error) + goto Elong; return retval; Elong: - spin_unlock(&dcache_lock); return ERR_PTR(-ENAMETOOLONG); } +char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen) +{ + return __dentry_path(dentry, buf, buflen); +} +EXPORT_SYMBOL(dentry_path_raw); + +char *dentry_path(struct dentry *dentry, char *buf, int buflen) +{ + char *p = NULL; + char *retval; + + if (d_unlinked(dentry)) { + p = buf + buflen; + if (prepend(&p, &buflen, "//deleted", 10) != 0) + goto Elong; + buflen++; + } + retval = __dentry_path(dentry, buf, buflen); + if (!IS_ERR(retval) && p) + *p = '/'; /* restore '/' overriden with '\0' */ + return retval; +Elong: + return ERR_PTR(-ENAMETOOLONG); +} + +static void get_fs_root_and_pwd_rcu(struct fs_struct *fs, struct path *root, + struct path *pwd) +{ + unsigned seq; + + do { + seq = read_seqcount_begin(&fs->seq); + *root = fs->root; + *pwd = fs->pwd; + } while (read_seqcount_retry(&fs->seq, seq)); +} + /* * NOTE! The user-level library version returns a * character pointer. The kernel system call just @@ -2086,46 +3202,47 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) { int error; struct path pwd, root; - char *page = (char *) __get_free_page(GFP_USER); + char *page = __getname(); if (!page) return -ENOMEM; - read_lock(¤t->fs->lock); - pwd = current->fs->pwd; - path_get(&pwd); - root = current->fs->root; - path_get(&root); - read_unlock(¤t->fs->lock); + rcu_read_lock(); + get_fs_root_and_pwd_rcu(current->fs, &root, &pwd); error = -ENOENT; - spin_lock(&dcache_lock); if (!d_unlinked(pwd.dentry)) { unsigned long len; - struct path tmp = root; - char * cwd; + char *cwd = page + PATH_MAX; + int buflen = PATH_MAX; - cwd = __d_path(&pwd, &tmp, page, PAGE_SIZE); - spin_unlock(&dcache_lock); + prepend(&cwd, &buflen, "\0", 1); + error = prepend_path(&pwd, &root, &cwd, &buflen); + rcu_read_unlock(); - error = PTR_ERR(cwd); - if (IS_ERR(cwd)) + if (error < 0) goto out; + /* Unreachable from current root */ + if (error > 0) { + error = prepend_unreachable(&cwd, &buflen); + if (error) + goto out; + } + error = -ERANGE; - len = PAGE_SIZE + page - cwd; + len = PATH_MAX + page - cwd; if (len <= size) { error = len; if (copy_to_user(buf, cwd, len)) error = -EFAULT; } - } else - spin_unlock(&dcache_lock); + } else { + rcu_read_unlock(); + } out: - path_put(&pwd); - path_put(&root); - free_page((unsigned long) page); + __putname(page); return error; } @@ -2148,86 +3265,64 @@ out: int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry) { int result; - unsigned long seq; + unsigned seq; if (new_dentry == old_dentry) return 1; - /* - * Need rcu_readlock to protect against the d_parent trashing - * due to d_move - */ - rcu_read_lock(); do { /* for restarting inner loop in case of seq retry */ seq = read_seqbegin(&rename_lock); + /* + * Need rcu_readlock to protect against the d_parent trashing + * due to d_move + */ + rcu_read_lock(); if (d_ancestor(old_dentry, new_dentry)) result = 1; else result = 0; + rcu_read_unlock(); } while (read_seqretry(&rename_lock, seq)); - rcu_read_unlock(); return result; } -void d_genocide(struct dentry *root) +static enum d_walk_ret d_genocide_kill(void *data, struct dentry *dentry) { - struct dentry *this_parent = root; - struct list_head *next; + struct dentry *root = data; + if (dentry != root) { + if (d_unhashed(dentry) || !dentry->d_inode) + return D_WALK_SKIP; - spin_lock(&dcache_lock); -repeat: - next = this_parent->d_subdirs.next; -resume: - while (next != &this_parent->d_subdirs) { - struct list_head *tmp = next; - struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); - next = tmp->next; - if (d_unhashed(dentry)||!dentry->d_inode) - continue; - if (!list_empty(&dentry->d_subdirs)) { - this_parent = dentry; - goto repeat; + if (!(dentry->d_flags & DCACHE_GENOCIDE)) { + dentry->d_flags |= DCACHE_GENOCIDE; + dentry->d_lockref.count--; } - atomic_dec(&dentry->d_count); - } - if (this_parent != root) { - next = this_parent->d_u.d_child.next; - atomic_dec(&this_parent->d_count); - this_parent = this_parent->d_parent; - goto resume; } - spin_unlock(&dcache_lock); + return D_WALK_CONTINUE; } -/** - * find_inode_number - check for dentry with name - * @dir: directory to check - * @name: Name to find. - * - * Check whether a dentry already exists for the given name, - * and return the inode number if it has an inode. Otherwise - * 0 is returned. - * - * This routine is used to post-process directory listings for - * filesystems using synthetic inode numbers, and is necessary - * to keep getcwd() working. - */ - -ino_t find_inode_number(struct dentry *dir, struct qstr *name) +void d_genocide(struct dentry *parent) { - struct dentry * dentry; - ino_t ino = 0; + d_walk(parent, parent, d_genocide_kill, NULL); +} - dentry = d_hash_and_lookup(dir, name); - if (dentry) { - if (dentry->d_inode) - ino = dentry->d_inode->i_ino; - dput(dentry); - } - return ino; +void d_tmpfile(struct dentry *dentry, struct inode *inode) +{ + inode_dec_link_count(inode); + BUG_ON(dentry->d_name.name != dentry->d_iname || + !hlist_unhashed(&dentry->d_alias) || + !d_unlinked(dentry)); + spin_lock(&dentry->d_parent->d_lock); + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); + dentry->d_name.len = sprintf(dentry->d_iname, "#%llu", + (unsigned long long)inode->i_ino); + spin_unlock(&dentry->d_lock); + spin_unlock(&dentry->d_parent->d_lock); + d_instantiate(dentry, inode); } +EXPORT_SYMBOL(d_tmpfile); static __initdata unsigned long dhash_entries; static int __init set_dhash_entries(char *str) @@ -2241,7 +3336,7 @@ __setup("dhash_entries=", set_dhash_entries); static void __init dcache_init_early(void) { - int loop; + unsigned int loop; /* If hashes are distributed across NUMA nodes, defer * hash allocation until vmalloc space is available. @@ -2251,21 +3346,22 @@ static void __init dcache_init_early(void) dentry_hashtable = alloc_large_system_hash("Dentry cache", - sizeof(struct hlist_head), + sizeof(struct hlist_bl_head), dhash_entries, 13, HASH_EARLY, &d_hash_shift, &d_hash_mask, + 0, 0); - for (loop = 0; loop < (1 << d_hash_shift); loop++) - INIT_HLIST_HEAD(&dentry_hashtable[loop]); + for (loop = 0; loop < (1U << d_hash_shift); loop++) + INIT_HLIST_BL_HEAD(dentry_hashtable + loop); } static void __init dcache_init(void) { - int loop; + unsigned int loop; /* * A constructor could be added for stable state like the lists, @@ -2274,8 +3370,6 @@ static void __init dcache_init(void) */ dentry_cache = KMEM_CACHE(dentry, SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD); - - register_shrinker(&dcache_shrinker); /* Hash may have been set up in dcache_init_early */ if (!hashdist) @@ -2283,20 +3377,22 @@ static void __init dcache_init(void) dentry_hashtable = alloc_large_system_hash("Dentry cache", - sizeof(struct hlist_head), + sizeof(struct hlist_bl_head), dhash_entries, 13, 0, &d_hash_shift, &d_hash_mask, + 0, 0); - for (loop = 0; loop < (1 << d_hash_shift); loop++) - INIT_HLIST_HEAD(&dentry_hashtable[loop]); + for (loop = 0; loop < (1U << d_hash_shift); loop++) + INIT_HLIST_BL_HEAD(dentry_hashtable + loop); } /* SLAB cache for __getname() consumers */ struct kmem_cache *names_cachep __read_mostly; +EXPORT_SYMBOL(names_cachep); EXPORT_SYMBOL(d_genocide); @@ -2326,26 +3422,3 @@ void __init vfs_caches_init(unsigned long mempages) bdev_cache_init(); chrdev_init(); } - -EXPORT_SYMBOL(d_alloc); -EXPORT_SYMBOL(d_alloc_root); -EXPORT_SYMBOL(d_delete); -EXPORT_SYMBOL(d_find_alias); -EXPORT_SYMBOL(d_instantiate); -EXPORT_SYMBOL(d_invalidate); -EXPORT_SYMBOL(d_lookup); -EXPORT_SYMBOL(d_move); -EXPORT_SYMBOL_GPL(d_materialise_unique); -EXPORT_SYMBOL(d_path); -EXPORT_SYMBOL(d_prune_aliases); -EXPORT_SYMBOL(d_rehash); -EXPORT_SYMBOL(d_splice_alias); -EXPORT_SYMBOL(d_add_ci); -EXPORT_SYMBOL(d_validate); -EXPORT_SYMBOL(dget_locked); -EXPORT_SYMBOL(dput); -EXPORT_SYMBOL(find_inode_number); -EXPORT_SYMBOL(have_submounts); -EXPORT_SYMBOL(names_cachep); -EXPORT_SYMBOL(shrink_dcache_parent); -EXPORT_SYMBOL(shrink_dcache_sb); |
