diff options
Diffstat (limited to 'fs/super.c')
| -rw-r--r-- | fs/super.c | 332 |
1 files changed, 159 insertions, 173 deletions
diff --git a/fs/super.c b/fs/super.c index 7465d436420..d20d5b11ded 100644 --- a/fs/super.c +++ b/fs/super.c @@ -53,11 +53,15 @@ static char *sb_writers_name[SB_FREEZE_LEVELS] = { * shrinker path and that leads to deadlock on the shrinker_rwsem. Hence we * take a passive reference to the superblock to avoid this from occurring. */ -static int prune_super(struct shrinker *shrink, struct shrink_control *sc) +static unsigned long super_cache_scan(struct shrinker *shrink, + struct shrink_control *sc) { struct super_block *sb; - int fs_objects = 0; - int total_objects; + long fs_objects = 0; + long total_objects; + long freed = 0; + long dentries; + long inodes; sb = container_of(shrink, struct super_block, s_shrink); @@ -65,77 +69,88 @@ static int prune_super(struct shrinker *shrink, struct shrink_control *sc) * Deadlock avoidance. We may hold various FS locks, and we don't want * to recurse into the FS that called us in clear_inode() and friends.. */ - if (sc->nr_to_scan && !(sc->gfp_mask & __GFP_FS)) - return -1; + if (!(sc->gfp_mask & __GFP_FS)) + return SHRINK_STOP; if (!grab_super_passive(sb)) - return -1; + return SHRINK_STOP; - if (sb->s_op && sb->s_op->nr_cached_objects) - fs_objects = sb->s_op->nr_cached_objects(sb); - - total_objects = sb->s_nr_dentry_unused + - sb->s_nr_inodes_unused + fs_objects + 1; - - if (sc->nr_to_scan) { - int dentries; - int inodes; - - /* proportion the scan between the caches */ - dentries = (sc->nr_to_scan * sb->s_nr_dentry_unused) / - total_objects; - inodes = (sc->nr_to_scan * sb->s_nr_inodes_unused) / - total_objects; - if (fs_objects) - fs_objects = (sc->nr_to_scan * fs_objects) / - total_objects; - /* - * prune the dcache first as the icache is pinned by it, then - * prune the icache, followed by the filesystem specific caches - */ - prune_dcache_sb(sb, dentries); - prune_icache_sb(sb, inodes); + if (sb->s_op->nr_cached_objects) + fs_objects = sb->s_op->nr_cached_objects(sb, sc->nid); - if (fs_objects && sb->s_op->free_cached_objects) { - sb->s_op->free_cached_objects(sb, fs_objects); - fs_objects = sb->s_op->nr_cached_objects(sb); - } - total_objects = sb->s_nr_dentry_unused + - sb->s_nr_inodes_unused + fs_objects; + inodes = list_lru_count_node(&sb->s_inode_lru, sc->nid); + dentries = list_lru_count_node(&sb->s_dentry_lru, sc->nid); + total_objects = dentries + inodes + fs_objects + 1; + + /* proportion the scan between the caches */ + dentries = mult_frac(sc->nr_to_scan, dentries, total_objects); + inodes = mult_frac(sc->nr_to_scan, inodes, total_objects); + + /* + * prune the dcache first as the icache is pinned by it, then + * prune the icache, followed by the filesystem specific caches + */ + freed = prune_dcache_sb(sb, dentries, sc->nid); + freed += prune_icache_sb(sb, inodes, sc->nid); + + if (fs_objects) { + fs_objects = mult_frac(sc->nr_to_scan, fs_objects, + total_objects); + freed += sb->s_op->free_cached_objects(sb, fs_objects, + sc->nid); } - total_objects = (total_objects / 100) * sysctl_vfs_cache_pressure; drop_super(sb); - return total_objects; + return freed; } -static int init_sb_writers(struct super_block *s, struct file_system_type *type) +static unsigned long super_cache_count(struct shrinker *shrink, + struct shrink_control *sc) { - int err; - int i; + struct super_block *sb; + long total_objects = 0; - for (i = 0; i < SB_FREEZE_LEVELS; i++) { - err = percpu_counter_init(&s->s_writers.counter[i], 0); - if (err < 0) - goto err_out; - lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i], - &type->s_writers_key[i], 0); - } - init_waitqueue_head(&s->s_writers.wait); - init_waitqueue_head(&s->s_writers.wait_unfrozen); - return 0; -err_out: - while (--i >= 0) - percpu_counter_destroy(&s->s_writers.counter[i]); - return err; + sb = container_of(shrink, struct super_block, s_shrink); + + /* + * Don't call grab_super_passive as it is a potential + * scalability bottleneck. The counts could get updated + * between super_cache_count and super_cache_scan anyway. + * Call to super_cache_count with shrinker_rwsem held + * ensures the safety of call to list_lru_count_node() and + * s_op->nr_cached_objects(). + */ + if (sb->s_op && sb->s_op->nr_cached_objects) + total_objects = sb->s_op->nr_cached_objects(sb, + sc->nid); + + total_objects += list_lru_count_node(&sb->s_dentry_lru, + sc->nid); + total_objects += list_lru_count_node(&sb->s_inode_lru, + sc->nid); + + total_objects = vfs_pressure_ratio(total_objects); + return total_objects; } -static void destroy_sb_writers(struct super_block *s) +/** + * destroy_super - frees a superblock + * @s: superblock to free + * + * Frees a superblock. + */ +static void destroy_super(struct super_block *s) { int i; - + list_lru_destroy(&s->s_dentry_lru); + list_lru_destroy(&s->s_inode_lru); for (i = 0; i < SB_FREEZE_LEVELS; i++) percpu_counter_destroy(&s->s_writers.counter[i]); + security_sb_free(s); + WARN_ON(!list_empty(&s->s_mounts)); + kfree(s->s_subtype); + kfree(s->s_options); + kfree_rcu(s, rcu); } /** @@ -150,106 +165,75 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) { struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER); static const struct super_operations default_op; + int i; - if (s) { - if (security_sb_alloc(s)) { - /* - * We cannot call security_sb_free() without - * security_sb_alloc() succeeding. So bail out manually - */ - kfree(s); - s = NULL; - goto out; - } -#ifdef CONFIG_SMP - s->s_files = alloc_percpu(struct list_head); - if (!s->s_files) - goto err_out; - else { - int i; - - for_each_possible_cpu(i) - INIT_LIST_HEAD(per_cpu_ptr(s->s_files, i)); - } -#else - INIT_LIST_HEAD(&s->s_files); -#endif - if (init_sb_writers(s, type)) - goto err_out; - s->s_flags = flags; - s->s_bdi = &default_backing_dev_info; - INIT_HLIST_NODE(&s->s_instances); - INIT_HLIST_BL_HEAD(&s->s_anon); - INIT_LIST_HEAD(&s->s_inodes); - INIT_LIST_HEAD(&s->s_dentry_lru); - INIT_LIST_HEAD(&s->s_inode_lru); - spin_lock_init(&s->s_inode_lru_lock); - INIT_LIST_HEAD(&s->s_mounts); - init_rwsem(&s->s_umount); - lockdep_set_class(&s->s_umount, &type->s_umount_key); - /* - * sget() can have s_umount recursion. - * - * When it cannot find a suitable sb, it allocates a new - * one (this one), and tries again to find a suitable old - * one. - * - * In case that succeeds, it will acquire the s_umount - * lock of the old one. Since these are clearly distrinct - * locks, and this object isn't exposed yet, there's no - * risk of deadlocks. - * - * Annotate this by putting this lock in a different - * subclass. - */ - down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING); - s->s_count = 1; - atomic_set(&s->s_active, 1); - mutex_init(&s->s_vfs_rename_mutex); - lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key); - mutex_init(&s->s_dquot.dqio_mutex); - mutex_init(&s->s_dquot.dqonoff_mutex); - init_rwsem(&s->s_dquot.dqptr_sem); - s->s_maxbytes = MAX_NON_LFS; - s->s_op = &default_op; - s->s_time_gran = 1000000000; - s->cleancache_poolid = -1; - - s->s_shrink.seeks = DEFAULT_SEEKS; - s->s_shrink.shrink = prune_super; - s->s_shrink.batch = 1024; + if (!s) + return NULL; + + INIT_LIST_HEAD(&s->s_mounts); + + if (security_sb_alloc(s)) + goto fail; + + for (i = 0; i < SB_FREEZE_LEVELS; i++) { + if (percpu_counter_init(&s->s_writers.counter[i], 0) < 0) + goto fail; + lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i], + &type->s_writers_key[i], 0); } -out: + init_waitqueue_head(&s->s_writers.wait); + init_waitqueue_head(&s->s_writers.wait_unfrozen); + s->s_flags = flags; + s->s_bdi = &default_backing_dev_info; + INIT_HLIST_NODE(&s->s_instances); + INIT_HLIST_BL_HEAD(&s->s_anon); + INIT_LIST_HEAD(&s->s_inodes); + + if (list_lru_init(&s->s_dentry_lru)) + goto fail; + if (list_lru_init(&s->s_inode_lru)) + goto fail; + + init_rwsem(&s->s_umount); + lockdep_set_class(&s->s_umount, &type->s_umount_key); + /* + * sget() can have s_umount recursion. + * + * When it cannot find a suitable sb, it allocates a new + * one (this one), and tries again to find a suitable old + * one. + * + * In case that succeeds, it will acquire the s_umount + * lock of the old one. Since these are clearly distrinct + * locks, and this object isn't exposed yet, there's no + * risk of deadlocks. + * + * Annotate this by putting this lock in a different + * subclass. + */ + down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING); + s->s_count = 1; + atomic_set(&s->s_active, 1); + mutex_init(&s->s_vfs_rename_mutex); + lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key); + mutex_init(&s->s_dquot.dqio_mutex); + mutex_init(&s->s_dquot.dqonoff_mutex); + init_rwsem(&s->s_dquot.dqptr_sem); + s->s_maxbytes = MAX_NON_LFS; + s->s_op = &default_op; + s->s_time_gran = 1000000000; + s->cleancache_poolid = -1; + + s->s_shrink.seeks = DEFAULT_SEEKS; + s->s_shrink.scan_objects = super_cache_scan; + s->s_shrink.count_objects = super_cache_count; + s->s_shrink.batch = 1024; + s->s_shrink.flags = SHRINKER_NUMA_AWARE; return s; -err_out: - security_sb_free(s); -#ifdef CONFIG_SMP - if (s->s_files) - free_percpu(s->s_files); -#endif - destroy_sb_writers(s); - kfree(s); - s = NULL; - goto out; -} -/** - * destroy_super - frees a superblock - * @s: superblock to free - * - * Frees a superblock. - */ -static inline void destroy_super(struct super_block *s) -{ -#ifdef CONFIG_SMP - free_percpu(s->s_files); -#endif - destroy_sb_writers(s); - security_sb_free(s); - WARN_ON(!list_empty(&s->s_mounts)); - kfree(s->s_subtype); - kfree(s->s_options); - kfree(s); +fail: + destroy_super(s); + return NULL; } /* Superblock refcounting */ @@ -296,10 +280,9 @@ void deactivate_locked_super(struct super_block *s) struct file_system_type *fs = s->s_type; if (atomic_dec_and_test(&s->s_active)) { cleancache_invalidate_fs(s); + unregister_shrinker(&s->s_shrink); fs->kill_sb(s); - /* caches are now gone, we can safely kill the shrinker now */ - unregister_shrinker(&s->s_shrink); put_filesystem(fs); put_super(s); } else { @@ -336,19 +319,19 @@ EXPORT_SYMBOL(deactivate_super); * and want to turn it into a full-blown active reference. grab_super() * is called with sb_lock held and drops it. Returns 1 in case of * success, 0 if we had failed (superblock contents was already dead or - * dying when grab_super() had been called). + * dying when grab_super() had been called). Note that this is only + * called for superblocks not in rundown mode (== ones still on ->fs_supers + * of their type), so increment of ->s_count is OK here. */ static int grab_super(struct super_block *s) __releases(sb_lock) { - if (atomic_inc_not_zero(&s->s_active)) { - spin_unlock(&sb_lock); - return 1; - } - /* it's going away */ s->s_count++; spin_unlock(&sb_lock); - /* wait for it to die */ down_write(&s->s_umount); + if ((s->s_flags & MS_BORN) && atomic_inc_not_zero(&s->s_active)) { + put_super(s); + return 1; + } up_write(&s->s_umount); put_super(s); return 0; @@ -414,6 +397,11 @@ void generic_shutdown_super(struct super_block *sb) evict_inodes(sb); + if (sb->s_dio_done_wq) { + destroy_workqueue(sb->s_dio_done_wq); + sb->s_dio_done_wq = NULL; + } + if (sop->put_super) sop->put_super(sb); @@ -463,11 +451,6 @@ retry: destroy_super(s); s = NULL; } - down_write(&old->s_umount); - if (unlikely(!(old->s_flags & MS_BORN))) { - deactivate_locked_super(old); - goto retry; - } return old; } } @@ -660,10 +643,10 @@ restart: if (hlist_unhashed(&sb->s_instances)) continue; if (sb->s_bdev == bdev) { - if (grab_super(sb)) /* drops sb_lock */ - return sb; - else + if (!grab_super(sb)) goto restart; + up_write(&sb->s_umount); + return sb; } } spin_unlock(&sb_lock); @@ -722,7 +705,6 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) if (flags & MS_RDONLY) acct_auto_close(sb); shrink_dcache_sb(sb); - sync_filesystem(sb); remount_ro = (flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY); @@ -730,7 +712,8 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) make sure there are no rw files opened */ if (remount_ro) { if (force) { - mark_files_ro(sb); + sb->s_readonly_remount = 1; + smp_wmb(); } else { retval = sb_prepare_remount_readonly(sb); if (retval) @@ -819,7 +802,10 @@ void emergency_remount(void) static DEFINE_IDA(unnamed_dev_ida); static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */ -static int unnamed_dev_start = 0; /* don't bother trying below it */ +/* Many userspace utilities consider an FSID of 0 invalid. + * Always return at least 1 from get_anon_bdev. + */ +static int unnamed_dev_start = 1; int get_anon_bdev(dev_t *p) { |
