diff options
Diffstat (limited to 'fs/dcache.c')
| -rw-r--r-- | fs/dcache.c | 846 |
1 files changed, 415 insertions, 431 deletions
diff --git a/fs/dcache.c b/fs/dcache.c index 41000305d71..06f65857a85 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -88,35 +88,6 @@ EXPORT_SYMBOL(rename_lock); static struct kmem_cache *dentry_cache __read_mostly; -/** - * read_seqbegin_or_lock - begin a sequence number check or locking block - * @lock: sequence lock - * @seq : sequence number to be checked - * - * First try it once optimistically without taking the lock. If that fails, - * take the lock. The sequence number is also used as a marker for deciding - * whether to be a reader (even) or writer (odd). - * N.B. seq must be initialized to an even number to begin with. - */ -static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq) -{ - if (!(*seq & 1)) /* Even */ - *seq = read_seqbegin(lock); - else /* Odd */ - read_seqlock_excl(lock); -} - -static inline int need_seqretry(seqlock_t *lock, int seq) -{ - return !(seq & 1) && read_seqretry(lock, seq); -} - -static inline void done_seqretry(seqlock_t *lock, int seq) -{ - if (seq & 1) - read_sequnlock_excl(lock); -} - /* * This is the single most critical data structure when it comes * to the dcache: the hashtable for lookups. Somebody should try @@ -125,8 +96,6 @@ static inline void done_seqretry(seqlock_t *lock, int seq) * This hash-function tries to avoid losing too many bits of hash * information, yet avoid using a prime hash-size or similar. */ -#define D_HASHBITS d_hash_shift -#define D_HASHMASK d_hash_mask static unsigned int d_hash_mask __read_mostly; static unsigned int d_hash_shift __read_mostly; @@ -137,8 +106,8 @@ static inline struct hlist_bl_head *d_hash(const struct dentry *parent, unsigned int hash) { hash += (unsigned long) parent / L1_CACHE_BYTES; - hash = hash + (hash >> D_HASHBITS); - return dentry_hashtable + (hash & D_HASHMASK); + hash = hash + (hash >> d_hash_shift); + return dentry_hashtable + (hash & d_hash_mask); } /* Statistics gathering. */ @@ -181,7 +150,7 @@ static long get_nr_dentry_unused(void) return sum < 0 ? 0 : sum; } -int proc_nr_dentry(ctl_table *table, int write, void __user *buffer, +int proc_nr_dentry(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { dentry_stat.nr_dentry = get_nr_dentry(); @@ -223,7 +192,7 @@ static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char if (!tcount) return 0; } - mask = ~(~0ul << tcount*8); + mask = bytemask_from_count(tcount); return unlikely(!!((a ^ b) & mask)); } @@ -277,16 +246,8 @@ static void __d_free(struct rcu_head *head) kmem_cache_free(dentry_cache, dentry); } -/* - * no locks, please. - */ -static void d_free(struct dentry *dentry) +static void dentry_free(struct dentry *dentry) { - BUG_ON((int)dentry->d_lockref.count > 0); - this_cpu_dec(nr_dentry); - if (dentry->d_op && dentry->d_op->d_release) - dentry->d_op->d_release(dentry); - /* if dentry was never visible to RCU, immediate free is OK */ if (!(dentry->d_flags & DCACHE_RCUACCESS)) __d_free(&dentry->d_u.d_rcu); @@ -343,6 +304,7 @@ static void dentry_unlink_inode(struct dentry * dentry) __releases(dentry->d_inode->i_lock) { struct inode *inode = dentry->d_inode; + __d_clear_type(dentry); dentry->d_inode = NULL; hlist_del_init(&dentry->d_alias); dentry_rcuwalk_barrier(dentry); @@ -433,77 +395,6 @@ static void dentry_lru_add(struct dentry *dentry) d_lru_add(dentry); } -/* - * Remove a dentry with references from the LRU. - * - * If we are on the shrink list, then we can get to try_prune_one_dentry() and - * lose our last reference through the parent walk. In this case, we need to - * remove ourselves from the shrink list, not the LRU. - */ -static void dentry_lru_del(struct dentry *dentry) -{ - if (dentry->d_flags & DCACHE_LRU_LIST) { - if (dentry->d_flags & DCACHE_SHRINK_LIST) - return d_shrink_del(dentry); - d_lru_del(dentry); - } -} - -/** - * d_kill - kill dentry and return parent - * @dentry: dentry to kill - * @parent: parent dentry - * - * The dentry must already be unhashed and removed from the LRU. - * - * If this is the root of the dentry tree, return NULL. - * - * dentry->d_lock and parent->d_lock must be held by caller, and are dropped by - * d_kill. - */ -static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) - __releases(dentry->d_lock) - __releases(parent->d_lock) - __releases(dentry->d_inode->i_lock) -{ - list_del(&dentry->d_u.d_child); - /* - * Inform try_to_ascend() that we are no longer attached to the - * dentry tree - */ - dentry->d_flags |= DCACHE_DENTRY_KILLED; - if (parent) - spin_unlock(&parent->d_lock); - dentry_iput(dentry); - /* - * dentry_iput drops the locks, at which point nobody (except - * transient RCU lookups) can reach this dentry. - */ - d_free(dentry); - return parent; -} - -/* - * Unhash a dentry without inserting an RCU walk barrier or checking that - * dentry->d_lock is locked. The caller must take care of that, if - * appropriate. - */ -static void __d_shrink(struct dentry *dentry) -{ - if (!d_unhashed(dentry)) { - struct hlist_bl_head *b; - if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) - b = &dentry->d_sb->s_anon; - else - b = d_hash(dentry->d_parent, dentry->d_name.hash); - - hlist_bl_lock(b); - __hlist_bl_del(&dentry->d_hash); - dentry->d_hash.pprev = NULL; - hlist_bl_unlock(b); - } -} - /** * d_drop - drop a dentry * @dentry: dentry to drop @@ -522,7 +413,21 @@ static void __d_shrink(struct dentry *dentry) void __d_drop(struct dentry *dentry) { if (!d_unhashed(dentry)) { - __d_shrink(dentry); + struct hlist_bl_head *b; + /* + * Hashed dentries are normally on the dentry hashtable, + * with the exception of those newly allocated by + * d_obtain_alias, which are always IS_ROOT: + */ + if (unlikely(IS_ROOT(dentry))) + b = &dentry->d_sb->s_anon; + else + b = d_hash(dentry->d_parent, dentry->d_name.hash); + + hlist_bl_lock(b); + __hlist_bl_del(&dentry->d_hash); + dentry->d_hash.pprev = NULL; + hlist_bl_unlock(b); dentry_rcuwalk_barrier(dentry); } } @@ -536,37 +441,12 @@ void d_drop(struct dentry *dentry) } EXPORT_SYMBOL(d_drop); -/* - * Finish off a dentry we've decided to kill. - * dentry->d_lock must be held, returns with it unlocked. - * If ref is non-zero, then decrement the refcount too. - * Returns dentry requiring refcount drop, or NULL if we're done. - */ -static inline struct dentry * -dentry_kill(struct dentry *dentry, int unlock_on_failure) - __releases(dentry->d_lock) +static void __dentry_kill(struct dentry *dentry) { - struct inode *inode; - struct dentry *parent; - - inode = dentry->d_inode; - if (inode && !spin_trylock(&inode->i_lock)) { -relock: - if (unlock_on_failure) { - spin_unlock(&dentry->d_lock); - cpu_relax(); - } - return dentry; /* try again with same dentry */ - } - if (IS_ROOT(dentry)) - parent = NULL; - else + struct dentry *parent = NULL; + bool can_free = true; + if (!IS_ROOT(dentry)) parent = dentry->d_parent; - if (parent && !spin_trylock(&parent->d_lock)) { - if (inode) - spin_unlock(&inode->i_lock); - goto relock; - } /* * The dentry is now unrecoverably dead to the world. @@ -580,10 +460,105 @@ relock: if ((dentry->d_flags & DCACHE_OP_PRUNE) && !d_unhashed(dentry)) dentry->d_op->d_prune(dentry); - dentry_lru_del(dentry); + if (dentry->d_flags & DCACHE_LRU_LIST) { + if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) + d_lru_del(dentry); + } /* if it was on the hash then remove it */ __d_drop(dentry); - return d_kill(dentry, parent); + list_del(&dentry->d_u.d_child); + /* + * Inform d_walk() that we are no longer attached to the + * dentry tree + */ + dentry->d_flags |= DCACHE_DENTRY_KILLED; + if (parent) + spin_unlock(&parent->d_lock); + dentry_iput(dentry); + /* + * dentry_iput drops the locks, at which point nobody (except + * transient RCU lookups) can reach this dentry. + */ + BUG_ON((int)dentry->d_lockref.count > 0); + this_cpu_dec(nr_dentry); + if (dentry->d_op && dentry->d_op->d_release) + dentry->d_op->d_release(dentry); + + spin_lock(&dentry->d_lock); + if (dentry->d_flags & DCACHE_SHRINK_LIST) { + dentry->d_flags |= DCACHE_MAY_FREE; + can_free = false; + } + spin_unlock(&dentry->d_lock); + if (likely(can_free)) + dentry_free(dentry); +} + +/* + * Finish off a dentry we've decided to kill. + * dentry->d_lock must be held, returns with it unlocked. + * If ref is non-zero, then decrement the refcount too. + * Returns dentry requiring refcount drop, or NULL if we're done. + */ +static struct dentry *dentry_kill(struct dentry *dentry) + __releases(dentry->d_lock) +{ + struct inode *inode = dentry->d_inode; + struct dentry *parent = NULL; + + if (inode && unlikely(!spin_trylock(&inode->i_lock))) + goto failed; + + if (!IS_ROOT(dentry)) { + parent = dentry->d_parent; + if (unlikely(!spin_trylock(&parent->d_lock))) { + if (inode) + spin_unlock(&inode->i_lock); + goto failed; + } + } + + __dentry_kill(dentry); + return parent; + +failed: + spin_unlock(&dentry->d_lock); + cpu_relax(); + return dentry; /* try again with same dentry */ +} + +static inline struct dentry *lock_parent(struct dentry *dentry) +{ + struct dentry *parent = dentry->d_parent; + if (IS_ROOT(dentry)) + return NULL; + if (unlikely((int)dentry->d_lockref.count < 0)) + return NULL; + if (likely(spin_trylock(&parent->d_lock))) + return parent; + rcu_read_lock(); + spin_unlock(&dentry->d_lock); +again: + parent = ACCESS_ONCE(dentry->d_parent); + spin_lock(&parent->d_lock); + /* + * We can't blindly lock dentry until we are sure + * that we won't violate the locking order. + * Any changes of dentry->d_parent must have + * been done with parent->d_lock held, so + * spin_lock() above is enough of a barrier + * for checking if it's still our child. + */ + if (unlikely(parent != dentry->d_parent)) { + spin_unlock(&parent->d_lock); + goto again; + } + rcu_read_unlock(); + if (parent != dentry) + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); + else + parent = NULL; + return parent; } /* @@ -630,7 +605,8 @@ repeat: goto kill_it; } - dentry->d_flags |= DCACHE_REFERENCED; + if (!(dentry->d_flags & DCACHE_REFERENCED)) + dentry->d_flags |= DCACHE_REFERENCED; dentry_lru_add(dentry); dentry->d_lockref.count--; @@ -638,7 +614,7 @@ repeat: return; kill_it: - dentry = dentry_kill(dentry, 1); + dentry = dentry_kill(dentry); if (dentry) goto repeat; } @@ -851,64 +827,15 @@ restart: } EXPORT_SYMBOL(d_prune_aliases); -/* - * Try to throw away a dentry - free the inode, dput the parent. - * Requires dentry->d_lock is held, and dentry->d_count == 0. - * Releases dentry->d_lock. - * - * This may fail if locks cannot be acquired no problem, just try again. - */ -static struct dentry * try_prune_one_dentry(struct dentry *dentry) - __releases(dentry->d_lock) -{ - struct dentry *parent; - - parent = dentry_kill(dentry, 0); - /* - * If dentry_kill returns NULL, we have nothing more to do. - * if it returns the same dentry, trylocks failed. In either - * case, just loop again. - * - * Otherwise, we need to prune ancestors too. This is necessary - * to prevent quadratic behavior of shrink_dcache_parent(), but - * is also expected to be beneficial in reducing dentry cache - * fragmentation. - */ - if (!parent) - return NULL; - if (parent == dentry) - return dentry; - - /* Prune ancestors. */ - dentry = parent; - while (dentry) { - if (lockref_put_or_lock(&dentry->d_lockref)) - return NULL; - dentry = dentry_kill(dentry, 1); - } - return NULL; -} - static void shrink_dentry_list(struct list_head *list) { - struct dentry *dentry; - - rcu_read_lock(); - for (;;) { - dentry = list_entry_rcu(list->prev, struct dentry, d_lru); - if (&dentry->d_lru == list) - break; /* empty */ + struct dentry *dentry, *parent; - /* - * Get the dentry lock, and re-verify that the dentry is - * this on the shrinking list. If it is, we know that - * DCACHE_SHRINK_LIST and DCACHE_LRU_LIST are set. - */ + while (!list_empty(list)) { + struct inode *inode; + dentry = list_entry(list->prev, struct dentry, d_lru); spin_lock(&dentry->d_lock); - if (dentry != list_entry(list->prev, struct dentry, d_lru)) { - spin_unlock(&dentry->d_lock); - continue; - } + parent = lock_parent(dentry); /* * The dispose list is isolated and dentries are not accounted @@ -921,30 +848,63 @@ static void shrink_dentry_list(struct list_head *list) * We found an inuse dentry which was not removed from * the LRU because of laziness during lookup. Do not free it. */ - if (dentry->d_lockref.count) { + if ((int)dentry->d_lockref.count > 0) { spin_unlock(&dentry->d_lock); + if (parent) + spin_unlock(&parent->d_lock); continue; } - rcu_read_unlock(); - /* - * If 'try_to_prune()' returns a dentry, it will - * be the same one we passed in, and d_lock will - * have been held the whole time, so it will not - * have been added to any other lists. We failed - * to get the inode lock. - * - * We just add it back to the shrink list. - */ - dentry = try_prune_one_dentry(dentry); - rcu_read_lock(); - if (dentry) { + if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) { + bool can_free = dentry->d_flags & DCACHE_MAY_FREE; + spin_unlock(&dentry->d_lock); + if (parent) + spin_unlock(&parent->d_lock); + if (can_free) + dentry_free(dentry); + continue; + } + + inode = dentry->d_inode; + if (inode && unlikely(!spin_trylock(&inode->i_lock))) { d_shrink_add(dentry, list); spin_unlock(&dentry->d_lock); + if (parent) + spin_unlock(&parent->d_lock); + continue; + } + + __dentry_kill(dentry); + + /* + * We need to prune ancestors too. This is necessary to prevent + * quadratic behavior of shrink_dcache_parent(), but is also + * expected to be beneficial in reducing dentry cache + * fragmentation. + */ + dentry = parent; + while (dentry && !lockref_put_or_lock(&dentry->d_lockref)) { + parent = lock_parent(dentry); + if (dentry->d_lockref.count != 1) { + dentry->d_lockref.count--; + spin_unlock(&dentry->d_lock); + if (parent) + spin_unlock(&parent->d_lock); + break; + } + inode = dentry->d_inode; /* can't be NULL */ + if (unlikely(!spin_trylock(&inode->i_lock))) { + spin_unlock(&dentry->d_lock); + if (parent) + spin_unlock(&parent->d_lock); + cpu_relax(); + continue; + } + __dentry_kill(dentry); + dentry = parent; } } - rcu_read_unlock(); } static enum lru_status @@ -1074,144 +1034,6 @@ void shrink_dcache_sb(struct super_block *sb) } EXPORT_SYMBOL(shrink_dcache_sb); -/* - * destroy a single subtree of dentries for unmount - * - see the comments on shrink_dcache_for_umount() for a description of the - * locking - */ -static void shrink_dcache_for_umount_subtree(struct dentry *dentry) -{ - struct dentry *parent; - - BUG_ON(!IS_ROOT(dentry)); - - for (;;) { - /* descend to the first leaf in the current subtree */ - while (!list_empty(&dentry->d_subdirs)) - dentry = list_entry(dentry->d_subdirs.next, - struct dentry, d_u.d_child); - - /* consume the dentries from this leaf up through its parents - * until we find one with children or run out altogether */ - do { - struct inode *inode; - - /* - * inform the fs that this dentry is about to be - * unhashed and destroyed. - */ - if ((dentry->d_flags & DCACHE_OP_PRUNE) && - !d_unhashed(dentry)) - dentry->d_op->d_prune(dentry); - - dentry_lru_del(dentry); - __d_shrink(dentry); - - if (dentry->d_lockref.count != 0) { - printk(KERN_ERR - "BUG: Dentry %p{i=%lx,n=%s}" - " still in use (%d)" - " [unmount of %s %s]\n", - dentry, - dentry->d_inode ? - dentry->d_inode->i_ino : 0UL, - dentry->d_name.name, - dentry->d_lockref.count, - dentry->d_sb->s_type->name, - dentry->d_sb->s_id); - BUG(); - } - - if (IS_ROOT(dentry)) { - parent = NULL; - list_del(&dentry->d_u.d_child); - } else { - parent = dentry->d_parent; - parent->d_lockref.count--; - list_del(&dentry->d_u.d_child); - } - - inode = dentry->d_inode; - if (inode) { - dentry->d_inode = NULL; - hlist_del_init(&dentry->d_alias); - if (dentry->d_op && dentry->d_op->d_iput) - dentry->d_op->d_iput(dentry, inode); - else - iput(inode); - } - - d_free(dentry); - - /* finished when we fall off the top of the tree, - * otherwise we ascend to the parent and move to the - * next sibling if there is one */ - if (!parent) - return; - dentry = parent; - } while (list_empty(&dentry->d_subdirs)); - - dentry = list_entry(dentry->d_subdirs.next, - struct dentry, d_u.d_child); - } -} - -/* - * destroy the dentries attached to a superblock on unmounting - * - we don't need to use dentry->d_lock because: - * - the superblock is detached from all mountings and open files, so the - * dentry trees will not be rearranged by the VFS - * - s_umount is write-locked, so the memory pressure shrinker will ignore - * any dentries belonging to this superblock that it comes across - * - the filesystem itself is no longer permitted to rearrange the dentries - * in this superblock - */ -void shrink_dcache_for_umount(struct super_block *sb) -{ - struct dentry *dentry; - - if (down_read_trylock(&sb->s_umount)) - BUG(); - - dentry = sb->s_root; - sb->s_root = NULL; - dentry->d_lockref.count--; - shrink_dcache_for_umount_subtree(dentry); - - while (!hlist_bl_empty(&sb->s_anon)) { - dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash); - shrink_dcache_for_umount_subtree(dentry); - } -} - -/* - * This tries to ascend one level of parenthood, but - * we can race with renaming, so we need to re-check - * the parenthood after dropping the lock and check - * that the sequence number still matches. - */ -static struct dentry *try_to_ascend(struct dentry *old, unsigned seq) -{ - struct dentry *new = old->d_parent; - - rcu_read_lock(); - spin_unlock(&old->d_lock); - spin_lock(&new->d_lock); - - /* - * might go back up the wrong parent if we have had a rename - * or deletion - */ - if (new != old->d_parent || - (old->d_flags & DCACHE_DENTRY_KILLED) || - need_seqretry(&rename_lock, seq)) { - spin_unlock(&new->d_lock); - new = NULL; - } - rcu_read_unlock(); - return new; -} - /** * enum d_walk_ret - action to talke during tree walk * @D_WALK_CONTINUE: contrinue walk @@ -1300,9 +1122,24 @@ resume: */ if (this_parent != parent) { struct dentry *child = this_parent; - this_parent = try_to_ascend(this_parent, seq); - if (!this_parent) + this_parent = child->d_parent; + + rcu_read_lock(); + spin_unlock(&child->d_lock); + spin_lock(&this_parent->d_lock); + + /* + * might go back up the wrong parent if we have had a rename + * or deletion + */ + if (this_parent != child->d_parent || + (child->d_flags & DCACHE_DENTRY_KILLED) || + need_seqretry(&rename_lock, seq)) { + spin_unlock(&this_parent->d_lock); + rcu_read_unlock(); goto rename_retry; + } + rcu_read_unlock(); next = child->d_u.d_child.next; goto resume; } @@ -1331,14 +1168,6 @@ rename_retry: * list is non-empty and continue searching. */ -/** - * have_submounts - check for mounts over a dentry - * @parent: dentry to check. - * - * Return true if the parent or its subdirectories contain - * a mount point - */ - static enum d_walk_ret check_mount(void *data, struct dentry *dentry) { int *ret = data; @@ -1349,6 +1178,13 @@ static enum d_walk_ret check_mount(void *data, struct dentry *dentry) return D_WALK_CONTINUE; } +/** + * have_submounts - check for mounts over a dentry + * @parent: dentry to check. + * + * Return true if the parent or its subdirectories contain + * a mount point + */ int have_submounts(struct dentry *parent) { int ret = 0; @@ -1421,34 +1257,23 @@ static enum d_walk_ret select_collect(void *_data, struct dentry *dentry) if (data->start == dentry) goto out; - /* - * move only zero ref count dentries to the dispose list. - * - * Those which are presently on the shrink list, being processed - * by shrink_dentry_list(), shouldn't be moved. Otherwise the - * loop in shrink_dcache_parent() might not make any progress - * and loop forever. - */ - if (dentry->d_lockref.count) { - dentry_lru_del(dentry); - } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) { - /* - * We can't use d_lru_shrink_move() because we - * need to get the global LRU lock and do the - * LRU accounting. - */ - d_lru_del(dentry); - d_shrink_add(dentry, &data->dispose); + if (dentry->d_flags & DCACHE_SHRINK_LIST) { data->found++; - ret = D_WALK_NORETRY; + } else { + if (dentry->d_flags & DCACHE_LRU_LIST) + d_lru_del(dentry); + if (!dentry->d_lockref.count) { + d_shrink_add(dentry, &data->dispose); + data->found++; + } } /* * We can return to the caller if we have found some (this * ensures forward progress). We'll be coming back to find * the rest. */ - if (data->found && need_resched()) - ret = D_WALK_QUIT; + if (!list_empty(&data->dispose)) + ret = need_resched() ? D_WALK_QUIT : D_WALK_NORETRY; out: return ret; } @@ -1478,6 +1303,56 @@ void shrink_dcache_parent(struct dentry *parent) } EXPORT_SYMBOL(shrink_dcache_parent); +static enum d_walk_ret umount_check(void *_data, struct dentry *dentry) +{ + /* it has busy descendents; complain about those instead */ + if (!list_empty(&dentry->d_subdirs)) + return D_WALK_CONTINUE; + + /* root with refcount 1 is fine */ + if (dentry == _data && dentry->d_lockref.count == 1) + return D_WALK_CONTINUE; + + printk(KERN_ERR "BUG: Dentry %p{i=%lx,n=%pd} " + " still in use (%d) [unmount of %s %s]\n", + dentry, + dentry->d_inode ? + dentry->d_inode->i_ino : 0UL, + dentry, + dentry->d_lockref.count, + dentry->d_sb->s_type->name, + dentry->d_sb->s_id); + WARN_ON(1); + return D_WALK_CONTINUE; +} + +static void do_one_tree(struct dentry *dentry) +{ + shrink_dcache_parent(dentry); + d_walk(dentry, dentry, umount_check, NULL); + d_drop(dentry); + dput(dentry); +} + +/* + * destroy the dentries attached to a superblock on unmounting + */ +void shrink_dcache_for_umount(struct super_block *sb) +{ + struct dentry *dentry; + + WARN(down_read_trylock(&sb->s_umount), "s_umount should've been locked"); + + dentry = sb->s_root; + sb->s_root = NULL; + do_one_tree(dentry); + + while (!hlist_bl_empty(&sb->s_anon)) { + dentry = dget(hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash)); + do_one_tree(dentry); + } +} + static enum d_walk_ret check_and_collect(void *_data, struct dentry *dentry) { struct select_data *data = _data; @@ -1638,12 +1513,17 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) } EXPORT_SYMBOL(d_alloc); +/** + * d_alloc_pseudo - allocate a dentry (for lookup-less filesystems) + * @sb: the superblock + * @name: qstr of the name + * + * For a filesystem that just pins its dentries in memory and never + * performs lookups at all, return an unhashed IS_ROOT dentry. + */ struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name) { - struct dentry *dentry = __d_alloc(sb, name); - if (dentry) - dentry->d_flags |= DCACHE_DISCONNECTED; - return dentry; + return __d_alloc(sb, name); } EXPORT_SYMBOL(d_alloc_pseudo); @@ -1685,14 +1565,41 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op) } EXPORT_SYMBOL(d_set_d_op); +static unsigned d_flags_for_inode(struct inode *inode) +{ + unsigned add_flags = DCACHE_FILE_TYPE; + + if (!inode) + return DCACHE_MISS_TYPE; + + if (S_ISDIR(inode->i_mode)) { + add_flags = DCACHE_DIRECTORY_TYPE; + if (unlikely(!(inode->i_opflags & IOP_LOOKUP))) { + if (unlikely(!inode->i_op->lookup)) + add_flags = DCACHE_AUTODIR_TYPE; + else + inode->i_opflags |= IOP_LOOKUP; + } + } else if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) { + if (unlikely(inode->i_op->follow_link)) + add_flags = DCACHE_SYMLINK_TYPE; + else + inode->i_opflags |= IOP_NOFOLLOW; + } + + if (unlikely(IS_AUTOMOUNT(inode))) + add_flags |= DCACHE_NEED_AUTOMOUNT; + return add_flags; +} + static void __d_instantiate(struct dentry *dentry, struct inode *inode) { + unsigned add_flags = d_flags_for_inode(inode); + spin_lock(&dentry->d_lock); - if (inode) { - if (unlikely(IS_AUTOMOUNT(inode))) - dentry->d_flags |= DCACHE_NEED_AUTOMOUNT; + __d_set_type(dentry, add_flags); + if (inode) hlist_add_head(&dentry->d_alias, &inode->i_dentry); - } dentry->d_inode = inode; dentry_rcuwalk_barrier(dentry); spin_unlock(&dentry->d_lock); @@ -1801,6 +1708,33 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) EXPORT_SYMBOL(d_instantiate_unique); +/** + * d_instantiate_no_diralias - instantiate a non-aliased dentry + * @entry: dentry to complete + * @inode: inode to attach to this dentry + * + * Fill in inode information in the entry. If a directory alias is found, then + * return an error (and drop inode). Together with d_materialise_unique() this + * guarantees that a directory inode may never have more than one alias. + */ +int d_instantiate_no_diralias(struct dentry *entry, struct inode *inode) +{ + BUG_ON(!hlist_unhashed(&entry->d_alias)); + + spin_lock(&inode->i_lock); + if (S_ISDIR(inode->i_mode) && !hlist_empty(&inode->i_dentry)) { + spin_unlock(&inode->i_lock); + iput(inode); + return -EBUSY; + } + __d_instantiate(entry, inode); + spin_unlock(&inode->i_lock); + security_d_instantiate(entry, inode); + + return 0; +} +EXPORT_SYMBOL(d_instantiate_no_diralias); + struct dentry *d_make_root(struct inode *root_inode) { struct dentry *res = NULL; @@ -1870,6 +1804,7 @@ struct dentry *d_obtain_alias(struct inode *inode) static const struct qstr anonstring = QSTR_INIT("/", 1); struct dentry *tmp; struct dentry *res; + unsigned add_flags; if (!inode) return ERR_PTR(-ESTALE); @@ -1895,9 +1830,11 @@ struct dentry *d_obtain_alias(struct inode *inode) } /* attach a disconnected dentry */ + add_flags = d_flags_for_inode(inode) | DCACHE_DISCONNECTED; + spin_lock(&tmp->d_lock); tmp->d_inode = inode; - tmp->d_flags |= DCACHE_DISCONNECTED; + tmp->d_flags |= add_flags; hlist_add_head(&tmp->d_alias, &inode->i_dentry); hlist_bl_lock(&tmp->d_sb->s_anon); hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon); @@ -2495,12 +2432,14 @@ static void switch_names(struct dentry *dentry, struct dentry *target) dentry->d_name.name = dentry->d_iname; } else { /* - * Both are internal. Just copy target to dentry + * Both are internal. */ - memcpy(dentry->d_iname, target->d_name.name, - target->d_name.len + 1); - dentry->d_name.len = target->d_name.len; - return; + unsigned int i; + BUILD_BUG_ON(!IS_ALIGNED(DNAME_INLINE_LEN, sizeof(long))); + for (i = 0; i < DNAME_INLINE_LEN / sizeof(long); i++) { + swap(((long *) &dentry->d_iname)[i], + ((long *) &target->d_iname)[i]); + } } } swap(dentry->d_name.len, target->d_name.len); @@ -2557,13 +2496,15 @@ static void dentry_unlock_parents_for_move(struct dentry *dentry, * __d_move - move a dentry * @dentry: entry to move * @target: new dentry + * @exchange: exchange the two dentries * * Update the dcache to reflect the move of a file name. Negative * dcache entries should not be moved in this way. Caller must hold * rename_lock, the i_mutex of the source and target directories, * and the sb->s_vfs_rename_mutex if they differ. See lock_rename(). */ -static void __d_move(struct dentry * dentry, struct dentry * target) +static void __d_move(struct dentry *dentry, struct dentry *target, + bool exchange) { if (!dentry->d_inode) printk(KERN_WARNING "VFS: moving negative dcache entry\n"); @@ -2574,7 +2515,7 @@ static void __d_move(struct dentry * dentry, struct dentry * target) dentry_lock_for_move(dentry, target); write_seqcount_begin(&dentry->d_seq); - write_seqcount_begin(&target->d_seq); + write_seqcount_begin_nested(&target->d_seq, DENTRY_D_LOCK_NESTED); /* __d_drop does write_seqcount_barrier, but they're OK to nest. */ @@ -2585,8 +2526,15 @@ static void __d_move(struct dentry * dentry, struct dentry * target) __d_drop(dentry); __d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash)); - /* Unhash the target: dput() will then get rid of it */ + /* + * Unhash the target (d_delete() is not usable here). If exchanging + * the two dentries, then rehash onto the other's hash queue. + */ __d_drop(target); + if (exchange) { + __d_rehash(target, + d_hash(dentry->d_parent, dentry->d_name.hash)); + } list_del(&dentry->d_u.d_child); list_del(&target->d_u.d_child); @@ -2613,6 +2561,8 @@ static void __d_move(struct dentry * dentry, struct dentry * target) write_seqcount_end(&dentry->d_seq); dentry_unlock_parents_for_move(dentry, target); + if (exchange) + fsnotify_d_move(target); spin_unlock(&target->d_lock); fsnotify_d_move(dentry); spin_unlock(&dentry->d_lock); @@ -2630,11 +2580,30 @@ static void __d_move(struct dentry * dentry, struct dentry * target) void d_move(struct dentry *dentry, struct dentry *target) { write_seqlock(&rename_lock); - __d_move(dentry, target); + __d_move(dentry, target, false); write_sequnlock(&rename_lock); } EXPORT_SYMBOL(d_move); +/* + * d_exchange - exchange two dentries + * @dentry1: first dentry + * @dentry2: second dentry + */ +void d_exchange(struct dentry *dentry1, struct dentry *dentry2) +{ + write_seqlock(&rename_lock); + + WARN_ON(!dentry1->d_inode); + WARN_ON(!dentry2->d_inode); + WARN_ON(IS_ROOT(dentry1)); + WARN_ON(IS_ROOT(dentry2)); + + __d_move(dentry1, dentry2, true); + + write_sequnlock(&rename_lock); +} + /** * d_ancestor - search for an ancestor * @p1: ancestor dentry @@ -2682,7 +2651,7 @@ static struct dentry *__d_unalias(struct inode *inode, m2 = &alias->d_parent->d_inode->i_mutex; out_unalias: if (likely(!d_mountpoint(alias))) { - __d_move(alias, dentry); + __d_move(alias, dentry, false); ret = alias; } out_err: @@ -2706,7 +2675,7 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) dentry_lock_for_move(anon, dentry); write_seqcount_begin(&dentry->d_seq); - write_seqcount_begin(&anon->d_seq); + write_seqcount_begin_nested(&anon->d_seq, DENTRY_D_LOCK_NESTED); dparent = dentry->d_parent; @@ -2725,7 +2694,6 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) spin_unlock(&dentry->d_lock); /* anon->d_lock still locked, returns locked */ - anon->d_flags &= ~DCACHE_DISCONNECTED; } /** @@ -2846,9 +2814,9 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name) u32 dlen = ACCESS_ONCE(name->len); char *p; - if (*buflen < dlen + 1) - return -ENAMETOOLONG; *buflen -= dlen + 1; + if (*buflen < 0) + return -ENAMETOOLONG; p = *buffer -= dlen + 1; *p++ = '/'; while (dlen--) { @@ -2881,27 +2849,36 @@ static int prepend_path(const struct path *path, const struct path *root, char **buffer, int *buflen) { - struct dentry *dentry = path->dentry; - struct vfsmount *vfsmnt = path->mnt; - struct mount *mnt = real_mount(vfsmnt); + struct dentry *dentry; + struct vfsmount *vfsmnt; + struct mount *mnt; int error = 0; - unsigned seq = 0; + unsigned seq, m_seq = 0; char *bptr; int blen; rcu_read_lock(); +restart_mnt: + read_seqbegin_or_lock(&mount_lock, &m_seq); + seq = 0; + rcu_read_lock(); restart: bptr = *buffer; blen = *buflen; + error = 0; + dentry = path->dentry; + vfsmnt = path->mnt; + mnt = real_mount(vfsmnt); read_seqbegin_or_lock(&rename_lock, &seq); while (dentry != root->dentry || vfsmnt != root->mnt) { struct dentry * parent; if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { + struct mount *parent = ACCESS_ONCE(mnt->mnt_parent); /* Global root? */ - if (mnt_has_parent(mnt)) { - dentry = mnt->mnt_mountpoint; - mnt = mnt->mnt_parent; + if (mnt != parent) { + dentry = ACCESS_ONCE(mnt->mnt_mountpoint); + mnt = parent; vfsmnt = &mnt->mnt; continue; } @@ -2936,6 +2913,14 @@ restart: } done_seqretry(&rename_lock, seq); + if (!(m_seq & 1)) + rcu_read_unlock(); + if (need_seqretry(&mount_lock, m_seq)) { + m_seq = 1; + goto restart_mnt; + } + done_seqretry(&mount_lock, m_seq); + if (error >= 0 && bptr == *buffer) { if (--blen < 0) error = -ENAMETOOLONG; @@ -2971,9 +2956,7 @@ char *__d_path(const struct path *path, int error; prepend(&res, &buflen, "\0", 1); - br_read_lock(&vfsmount_lock); error = prepend_path(path, root, &res, &buflen); - br_read_unlock(&vfsmount_lock); if (error < 0) return ERR_PTR(error); @@ -2990,9 +2973,7 @@ char *d_absolute_path(const struct path *path, int error; prepend(&res, &buflen, "\0", 1); - br_read_lock(&vfsmount_lock); error = prepend_path(path, &root, &res, &buflen); - br_read_unlock(&vfsmount_lock); if (error > 1) error = -EINVAL; @@ -3061,15 +3042,18 @@ char *d_path(const struct path *path, char *buf, int buflen) * thus don't need to be hashed. They also don't need a name until a * user wants to identify the object in /proc/pid/fd/. The little hack * below allows us to generate a name for these objects on demand: + * + * Some pseudo inodes are mountable. When they are mounted + * path->dentry == path->mnt->mnt_root. In that case don't call d_dname + * and instead have d_path return the mounted path. */ - if (path->dentry->d_op && path->dentry->d_op->d_dname) + if (path->dentry->d_op && path->dentry->d_op->d_dname && + (!IS_ROOT(path->dentry) || path->dentry != path->mnt->mnt_root)) return path->dentry->d_op->d_dname(path->dentry, buf, buflen); rcu_read_lock(); get_fs_root_rcu(current->fs, &root); - br_read_lock(&vfsmount_lock); error = path_with_deleted(path, &root, &res, &buflen); - br_read_unlock(&vfsmount_lock); rcu_read_unlock(); if (error < 0) @@ -3109,30 +3093,33 @@ char *simple_dname(struct dentry *dentry, char *buffer, int buflen) end = ERR_PTR(-ENAMETOOLONG); return end; } +EXPORT_SYMBOL(simple_dname); /* * Write full pathname from the root of the filesystem into the buffer. */ -static char *__dentry_path(struct dentry *dentry, char *buf, int buflen) +static char *__dentry_path(struct dentry *d, char *buf, int buflen) { + struct dentry *dentry; char *end, *retval; int len, seq = 0; int error = 0; + if (buflen < 2) + goto Elong; + rcu_read_lock(); restart: + dentry = d; end = buf + buflen; len = buflen; prepend(&end, &len, "\0", 1); - if (buflen < 1) - goto Elong; /* Get '/' right */ retval = end-1; *retval = '/'; read_seqbegin_or_lock(&rename_lock, &seq); while (!IS_ROOT(dentry)) { struct dentry *parent = dentry->d_parent; - int error; prefetch(parent); error = prepend_name(&end, &len, &dentry->d_name); @@ -3224,7 +3211,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) get_fs_root_and_pwd_rcu(current->fs, &root, &pwd); error = -ENOENT; - br_read_lock(&vfsmount_lock); if (!d_unlinked(pwd.dentry)) { unsigned long len; char *cwd = page + PATH_MAX; @@ -3232,7 +3218,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) prepend(&cwd, &buflen, "\0", 1); error = prepend_path(&pwd, &root, &cwd, &buflen); - br_read_unlock(&vfsmount_lock); rcu_read_unlock(); if (error < 0) @@ -3253,7 +3238,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) error = -EFAULT; } } else { - br_read_unlock(&vfsmount_lock); rcu_read_unlock(); } |
