From e400c28524af2d344b1663b27bf28984fa959a0e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 10 Aug 2010 18:02:54 -0700 Subject: cgroups: save space for the terminator The original code didn't leave enough space for a NULL terminator. These strings are copied with strcpy() into fixed length buffers in cgroup_root_from_opts(). Signed-off-by: Dan Carpenter Acked-by: Serge E. Hallyn Reviewd-by: KAMEZAWA Hiroyuki Cc: Paul Menage Cc: Li Zefan Cc: Ben Blum Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cgroup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/cgroup.c') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index d83cab06da8..192f88c5b0f 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1102,7 +1102,7 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) if (opts->release_agent) return -EINVAL; opts->release_agent = - kstrndup(token + 14, PATH_MAX, GFP_KERNEL); + kstrndup(token + 14, PATH_MAX - 1, GFP_KERNEL); if (!opts->release_agent) return -ENOMEM; } else if (!strncmp(token, "name=", 5)) { @@ -1123,7 +1123,7 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) if (opts->name) return -EINVAL; opts->name = kstrndup(name, - MAX_CGROUP_ROOT_NAMELEN, + MAX_CGROUP_ROOT_NAMELEN - 1, GFP_KERNEL); if (!opts->name) return -ENOMEM; -- cgit v1.2.3-70-g09d2 From 2c392b8c3450ceb69ba1b93cb0cddb3998fb8cdc Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 24 Feb 2010 19:41:39 +0100 Subject: cgroups: __rcu annotations Signed-off-by: Arnd Bergmann Signed-off-by: Paul E. McKenney Acked-by: Paul Menage Cc: Li Zefan Reviewed-by: Josh Triplett --- include/linux/cgroup.h | 4 ++-- include/linux/sched.h | 2 +- kernel/cgroup.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel/cgroup.c') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index ed3e92e41c6..3cb7d04308c 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -75,7 +75,7 @@ struct cgroup_subsys_state { unsigned long flags; /* ID for this css, if possible */ - struct css_id *id; + struct css_id __rcu *id; }; /* bits in struct cgroup_subsys_state flags field */ @@ -205,7 +205,7 @@ struct cgroup { struct list_head children; /* my children */ struct cgroup *parent; /* my parent */ - struct dentry *dentry; /* cgroup fs entry, RCU protected */ + struct dentry __rcu *dentry; /* cgroup fs entry, RCU protected */ /* Private pointers for each registered subsystem */ struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; diff --git a/include/linux/sched.h b/include/linux/sched.h index 1e2a6db2d7d..bbffd087476 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1418,7 +1418,7 @@ struct task_struct { #endif #ifdef CONFIG_CGROUPS /* Control Group info protected by css_set_lock */ - struct css_set *cgroups; + struct css_set __rcu *cgroups; /* cg_list protected by css_set_lock and tsk->alloc_lock */ struct list_head cg_list; #endif diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 192f88c5b0f..e5c5497a7dc 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -138,7 +138,7 @@ struct css_id { * is called after synchronize_rcu(). But for safe use, css_is_removed() * css_tryget() should be used for avoiding race. */ - struct cgroup_subsys_state *css; + struct cgroup_subsys_state __rcu *css; /* * ID of this css. */ -- cgit v1.2.3-70-g09d2 From 31583bb0cf6cc40f2a468a4d2f3b9cbefd24f891 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 9 Sep 2010 16:37:37 -0700 Subject: cgroups: fix API thinko Add cgroup_attach_task_all() The existing cgroup_attach_task_current_cg() API is called by a thread to attach another thread to all of its cgroups; this is unsuitable for cases where a privileged task wants to attach itself to the cgroups of a less privileged one, since the call must be made from the context of the target task. This patch adds a more generic cgroup_attach_task_all() API that allows both the source task and to-be-moved task to be specified. cgroup_attach_task_current_cg() becomes a specialization of the more generic new function. [menage@google.com: rewrote changelog] [akpm@linux-foundation.org: address reviewer comments] Signed-off-by: Michael S. Tsirkin Tested-by: Alex Williamson Acked-by: Paul Menage Cc: Li Zefan Cc: Ben Blum Cc: Sridhar Samudrala Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 12 +++++++++++- kernel/cgroup.c | 13 +++++++------ 2 files changed, 18 insertions(+), 7 deletions(-) (limited to 'kernel/cgroup.c') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index ed3e92e41c6..0c991023ee4 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -578,7 +578,12 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp, void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it); int cgroup_scan_tasks(struct cgroup_scanner *scan); int cgroup_attach_task(struct cgroup *, struct task_struct *); -int cgroup_attach_task_current_cg(struct task_struct *); +int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); + +static inline int cgroup_attach_task_current_cg(struct task_struct *tsk) +{ + return cgroup_attach_task_all(current, tsk); +} /* * CSS ID is ID for cgroup_subsys_state structs under subsys. This only works @@ -636,6 +641,11 @@ static inline int cgroupstats_build(struct cgroupstats *stats, } /* No cgroups - nothing to do */ +static inline int cgroup_attach_task_all(struct task_struct *from, + struct task_struct *t) +{ + return 0; +} static inline int cgroup_attach_task_current_cg(struct task_struct *t) { return 0; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 192f88c5b0f..c9483d8f614 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1791,19 +1791,20 @@ out: } /** - * cgroup_attach_task_current_cg - attach task 'tsk' to current task's cgroup + * cgroup_attach_task_all - attach task 'tsk' to all cgroups of task 'from' + * @from: attach to all cgroups of a given task * @tsk: the task to be attached */ -int cgroup_attach_task_current_cg(struct task_struct *tsk) +int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk) { struct cgroupfs_root *root; - struct cgroup *cur_cg; int retval = 0; cgroup_lock(); for_each_active_root(root) { - cur_cg = task_cgroup_from_root(current, root); - retval = cgroup_attach_task(cur_cg, tsk); + struct cgroup *from_cg = task_cgroup_from_root(from, root); + + retval = cgroup_attach_task(from_cg, tsk); if (retval) break; } @@ -1811,7 +1812,7 @@ int cgroup_attach_task_current_cg(struct task_struct *tsk) return retval; } -EXPORT_SYMBOL_GPL(cgroup_attach_task_current_cg); +EXPORT_SYMBOL_GPL(cgroup_attach_task_all); /* * Attach task with pid 'pid' to cgroup 'cgrp'. Call with cgroup_mutex -- cgit v1.2.3-70-g09d2 From db71922217a214e5c9268448e537b54fc1f301ea Mon Sep 17 00:00:00 2001 From: Jan Blunck Date: Sun, 15 Aug 2010 22:51:10 +0200 Subject: BKL: Explicitly add BKL around get_sb/fill_super This patch is a preparation necessary to remove the BKL from do_new_mount(). It explicitly adds calls to lock_kernel()/unlock_kernel() around get_sb/fill_super operations for filesystems that still uses the BKL. I've read through all the code formerly covered by the BKL inside do_kern_mount() and have satisfied myself that it doesn't need the BKL any more. do_kern_mount() is already called without the BKL when mounting the rootfs and in nfsctl. do_kern_mount() calls vfs_kern_mount(), which is called from various places without BKL: simple_pin_fs(), nfs_do_clone_mount() through nfs_follow_mountpoint(), afs_mntpt_do_automount() through afs_mntpt_follow_link(). Both later functions are actually the filesystems follow_link inode operation. vfs_kern_mount() is calling the specified get_sb function and lets the filesystem do its job by calling the given fill_super function. Therefore I think it is safe to push down the BKL from the VFS to the low-level filesystems get_sb/fill_super operation. [arnd: do not add the BKL to those file systems that already don't use it elsewhere] Signed-off-by: Jan Blunck Signed-off-by: Arnd Bergmann Cc: Matthew Wilcox Cc: Christoph Hellwig --- fs/adfs/super.c | 8 +++++++- fs/affs/super.c | 9 ++++++++- fs/afs/super.c | 5 +++++ fs/bfs/inode.c | 8 +++++++- fs/cifs/cifsfs.c | 12 ++++++++++-- fs/coda/inode.c | 8 +++++++- fs/ecryptfs/main.c | 4 ++++ fs/ext2/super.c | 10 ++++++++-- fs/ext3/super.c | 9 ++++++++- fs/ext4/super.c | 8 ++++++-- fs/fat/namei_msdos.c | 7 ++++++- fs/fat/namei_vfat.c | 7 ++++++- fs/freevxfs/vxfs_super.c | 7 ++++++- fs/hfs/super.c | 8 +++++++- fs/hpfs/super.c | 8 +++++++- fs/isofs/inode.c | 8 +++++++- fs/jffs2/super.c | 11 +++++++++-- fs/jfs/super.c | 12 ++++++++++-- fs/nilfs2/super.c | 8 +++++++- fs/ntfs/super.c | 5 +++++ fs/ocfs2/dlmfs/dlmfs.c | 9 ++++++++- fs/ocfs2/super.c | 5 +++++ fs/qnx4/inode.c | 8 +++++++- fs/smbfs/inode.c | 5 +++++ fs/squashfs/super.c | 6 ++++++ fs/udf/super.c | 8 +++++++- fs/ufs/super.c | 5 +++++ kernel/cgroup.c | 4 ++++ 28 files changed, 187 insertions(+), 25 deletions(-) (limited to 'kernel/cgroup.c') diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 4a3af7075c1..d9803f73236 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -352,11 +352,15 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent) struct adfs_sb_info *asb; struct inode *root; + lock_kernel(); + sb->s_flags |= MS_NODIRATIME; asb = kzalloc(sizeof(*asb), GFP_KERNEL); - if (!asb) + if (!asb) { + unlock_kernel(); return -ENOMEM; + } sb->s_fs_info = asb; /* set default options */ @@ -474,6 +478,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent) goto error; } else sb->s_root->d_op = &adfs_dentry_operations; + unlock_kernel(); return 0; error_free_bh: @@ -481,6 +486,7 @@ error_free_bh: error: sb->s_fs_info = NULL; kfree(asb); + unlock_kernel(); return -EINVAL; } diff --git a/fs/affs/super.c b/fs/affs/super.c index 33c4e7eef47..3a6d1dee4a5 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -291,6 +291,8 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent) u8 sig[4]; int ret = -EINVAL; + lock_kernel(); + save_mount_options(sb, data); pr_debug("AFFS: read_super(%s)\n",data ? (const char *)data : "no options"); @@ -300,8 +302,10 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent) sb->s_flags |= MS_NODIRATIME; sbi = kzalloc(sizeof(struct affs_sb_info), GFP_KERNEL); - if (!sbi) + if (!sbi) { + unlock_kernel(); return -ENOMEM; + } sb->s_fs_info = sbi; mutex_init(&sbi->s_bmlock); spin_lock_init(&sbi->symlink_lock); @@ -312,6 +316,7 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent) printk(KERN_ERR "AFFS: Error parsing options\n"); kfree(sbi->s_prefix); kfree(sbi); + unlock_kernel(); return -EINVAL; } /* N.B. after this point s_prefix must be released */ @@ -482,6 +487,7 @@ got_root: sb->s_root->d_op = &affs_dentry_operations; pr_debug("AFFS: s_flags=%lX\n",sb->s_flags); + unlock_kernel(); return 0; /* @@ -496,6 +502,7 @@ out_error_noinode: kfree(sbi->s_prefix); kfree(sbi); sb->s_fs_info = NULL; + unlock_kernel(); return ret; } diff --git a/fs/afs/super.c b/fs/afs/super.c index 77e1e5a6115..6c2fef44d38 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -302,12 +302,15 @@ static int afs_fill_super(struct super_block *sb, void *data) struct inode *inode = NULL; int ret; + lock_kernel(); + _enter(""); /* allocate a superblock info record */ as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL); if (!as) { _leave(" = -ENOMEM"); + unlock_kernel(); return -ENOMEM; } @@ -341,6 +344,7 @@ static int afs_fill_super(struct super_block *sb, void *data) sb->s_root = root; _leave(" = 0"); + unlock_kernel(); return 0; error_inode: @@ -354,6 +358,7 @@ error: sb->s_fs_info = NULL; _leave(" = %d", ret); + unlock_kernel(); return ret; } diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index c4daf0f5fc0..d2e09363dd9 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -322,9 +322,13 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) int ret = -EINVAL; unsigned long i_sblock, i_eblock, i_eoff, s_size; + lock_kernel(); + info = kzalloc(sizeof(*info), GFP_KERNEL); - if (!info) + if (!info) { + unlock_kernel(); return -ENOMEM; + } mutex_init(&info->bfs_lock); s->s_fs_info = info; @@ -439,6 +443,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) brelse(bh); brelse(sbh); dump_imap("read_super", s); + unlock_kernel(); return 0; out3: @@ -452,6 +457,7 @@ out: mutex_destroy(&info->bfs_lock); kfree(info); s->s_fs_info = NULL; + unlock_kernel(); return ret; } diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index b7431afdd76..070bf1aecd2 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -514,22 +514,30 @@ cifs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) { int rc; - struct super_block *sb = sget(fs_type, NULL, set_anon_super, NULL); + struct super_block *sb; + + lock_kernel(); + + sb = sget(fs_type, NULL, set_anon_super, NULL); cFYI(1, "Devname: %s flags: %d ", dev_name, flags); - if (IS_ERR(sb)) + if (IS_ERR(sb)) { + unlock_kernel(); return PTR_ERR(sb); + } sb->s_flags = flags; rc = cifs_read_super(sb, data, dev_name, flags & MS_SILENT ? 1 : 0); if (rc) { deactivate_locked_super(sb); + unlock_kernel(); return rc; } sb->s_flags |= MS_ACTIVE; simple_set_mnt(mnt, sb); + unlock_kernel(); return 0; } diff --git a/fs/coda/inode.c b/fs/coda/inode.c index 6526e6f21ec..bfe8179b129 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -148,6 +148,8 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent) int error; int idx; + lock_kernel(); + idx = get_device_index((struct coda_mount_data *) data); /* Ignore errors in data, for backward compatibility */ @@ -159,11 +161,13 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent) vc = &coda_comms[idx]; if (!vc->vc_inuse) { printk("coda_read_super: No pseudo device\n"); + unlock_kernel(); return -EINVAL; } if ( vc->vc_sb ) { printk("coda_read_super: Device already mounted\n"); + unlock_kernel(); return -EBUSY; } @@ -202,7 +206,8 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent) sb->s_root = d_alloc_root(root); if (!sb->s_root) goto error; - return 0; + unlock_kernel(); + return 0; error: bdi_destroy(&vc->bdi); @@ -212,6 +217,7 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent) if (vc) vc->vc_sb = NULL; + unlock_kernel(); return -EINVAL; } diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index cbd4e18adb2..c4af92fa12c 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -36,6 +36,7 @@ #include #include #include +#include /* For lock_kernel() */ #include "ecryptfs_kernel.h" /** @@ -550,6 +551,7 @@ static int ecryptfs_get_sb(struct file_system_type *fs_type, int flags, const char *err = "Getting sb failed"; int rc; + lock_kernel(); sbi = kmem_cache_zalloc(ecryptfs_sb_info_cache, GFP_KERNEL); if (!sbi) { rc = -ENOMEM; @@ -608,6 +610,7 @@ static int ecryptfs_get_sb(struct file_system_type *fs_type, int flags, goto out; } simple_set_mnt(mnt, s); + unlock_kernel(); return 0; out: @@ -616,6 +619,7 @@ out: kmem_cache_free(ecryptfs_sb_info_cache, sbi); } printk(KERN_ERR "%s; rc = [%d]\n", err, rc); + unlock_kernel(); return rc; } diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 1ec602673ea..f98c390caf1 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -747,15 +747,18 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) __le32 features; int err; + lock_kernel(); + + err = -ENOMEM; sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); if (!sbi) - return -ENOMEM; + goto failed_unlock; sbi->s_blockgroup_lock = kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL); if (!sbi->s_blockgroup_lock) { kfree(sbi); - return -ENOMEM; + goto failed_unlock; } sb->s_fs_info = sbi; sbi->s_sb_block = sb_block; @@ -1083,6 +1086,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) if (ext2_setup_super (sb, es, sb->s_flags & MS_RDONLY)) sb->s_flags |= MS_RDONLY; ext2_write_super(sb); + unlock_kernel(); return 0; cantfind_ext2: @@ -1107,6 +1111,8 @@ failed_sbi: sb->s_fs_info = NULL; kfree(sbi->s_blockgroup_lock); kfree(sbi); +failed_unlock: + unlock_kernel(); return ret; } diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 5dbf4dba03c..41f9dcd73e9 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -1611,14 +1611,19 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) __le32 features; int err; + lock_kernel(); + sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); - if (!sbi) + if (!sbi) { + unlock_kernel(); return -ENOMEM; + } sbi->s_blockgroup_lock = kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL); if (!sbi->s_blockgroup_lock) { kfree(sbi); + unlock_kernel(); return -ENOMEM; } sb->s_fs_info = sbi; @@ -2026,6 +2031,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) "writeback"); lock_kernel(); + unlock_kernel(); return 0; cantfind_ext3: @@ -2056,6 +2062,7 @@ out_fail: kfree(sbi->s_blockgroup_lock); kfree(sbi); lock_kernel(); + unlock_kernel(); return ret; } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 26147746c27..0f0021f4990 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2568,6 +2568,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) int err; unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; + lock_kernel(); + sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); if (!sbi) goto out_free_orig; @@ -3166,7 +3168,6 @@ no_journal: if (es->s_error_count) mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */ - lock_kernel(); kfree(orig_data); return 0; @@ -3213,8 +3214,11 @@ out_fail: sb->s_fs_info = NULL; kfree(sbi->s_blockgroup_lock); kfree(sbi); - lock_kernel(); + kfree(orig_data); + return ret; + out_free_orig: + unlock_kernel(); kfree(orig_data); return ret; } diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index bbc94ae4fd7..e2b0b978340 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -9,6 +9,7 @@ #include #include #include +#include /* For lock_kernel() */ #include "fat.h" /* Characters that are undesirable in an MS-DOS file name */ @@ -662,12 +663,16 @@ static int msdos_fill_super(struct super_block *sb, void *data, int silent) { int res; + lock_kernel(); res = fat_fill_super(sb, data, silent, &msdos_dir_inode_operations, 0); - if (res) + if (res) { + unlock_kernel(); return res; + } sb->s_flags |= MS_NOATIME; sb->s_root->d_op = &msdos_dentry_operations; + unlock_kernel(); return 0; } diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 6fcc7e71fba..9006ad9c7b1 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -21,6 +21,7 @@ #include #include #include +#include /* For lock_kernel() */ #include "fat.h" /* @@ -1055,15 +1056,19 @@ static int vfat_fill_super(struct super_block *sb, void *data, int silent) { int res; + lock_kernel(); res = fat_fill_super(sb, data, silent, &vfat_dir_inode_operations, 1); - if (res) + if (res) { + unlock_kernel(); return res; + } if (MSDOS_SB(sb)->options.name_check != 's') sb->s_root->d_op = &vfat_ci_dentry_ops; else sb->s_root->d_op = &vfat_dentry_ops; + unlock_kernel(); return 0; } diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index dc0c041e85c..eb2b9e09c99 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c @@ -148,7 +148,7 @@ static int vxfs_remount(struct super_block *sb, int *flags, char *data) * The superblock on success, else %NULL. * * Locking: - * We are under the bkl and @sbp->s_lock. + * We are under @sbp->s_lock. */ static int vxfs_fill_super(struct super_block *sbp, void *dp, int silent) { @@ -159,11 +159,14 @@ static int vxfs_fill_super(struct super_block *sbp, void *dp, int silent) struct inode *root; int ret = -EINVAL; + lock_kernel(); + sbp->s_flags |= MS_RDONLY; infp = kzalloc(sizeof(*infp), GFP_KERNEL); if (!infp) { printk(KERN_WARNING "vxfs: unable to allocate incore superblock\n"); + unlock_kernel(); return -ENOMEM; } @@ -236,6 +239,7 @@ static int vxfs_fill_super(struct super_block *sbp, void *dp, int silent) goto out_free_ilist; } + unlock_kernel(); return 0; out_free_ilist: @@ -245,6 +249,7 @@ out_free_ilist: out: brelse(bp); kfree(infp); + unlock_kernel(); return ret; } diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 34235d4bf08..3069416fa8e 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -382,9 +382,13 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent) struct inode *root_inode; int res; + lock_kernel(); + sbi = kzalloc(sizeof(struct hfs_sb_info), GFP_KERNEL); - if (!sbi) + if (!sbi) { + unlock_kernel(); return -ENOMEM; + } sb->s_fs_info = sbi; INIT_HLIST_HEAD(&sbi->rsrc_inodes); @@ -435,6 +439,7 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_root->d_op = &hfs_dentry_operations; /* everything's okay */ + unlock_kernel(); return 0; bail_iput: @@ -443,6 +448,7 @@ bail_no_root: printk(KERN_ERR "hfs: get root inode failed.\n"); bail: hfs_mdb_put(sb); + unlock_kernel(); return res; } diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 2607010be2f..c969a1aa163 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -477,11 +477,15 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) int o; + lock_kernel(); + save_mount_options(s, options); sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); - if (!sbi) + if (!sbi) { + unlock_kernel(); return -ENOMEM; + } s->s_fs_info = sbi; sbi->sb_bmp_dir = NULL; @@ -666,6 +670,7 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) root->i_blocks = 5; hpfs_brelse4(&qbh); } + unlock_kernel(); return 0; bail4: brelse(bh2); @@ -677,6 +682,7 @@ bail0: kfree(sbi->sb_cp_table); s->s_fs_info = NULL; kfree(sbi); + unlock_kernel(); return -EINVAL; } diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 5a44811b502..05baf7721e8 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -571,11 +571,15 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent) int table, error = -EINVAL; unsigned int vol_desc_start; + lock_kernel(); + save_mount_options(s, data); sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); - if (!sbi) + if (!sbi) { + unlock_kernel(); return -ENOMEM; + } s->s_fs_info = sbi; if (!parse_options((char *)data, &opt)) @@ -900,6 +904,7 @@ root_found: kfree(opt.iocharset); + unlock_kernel(); return 0; /* @@ -939,6 +944,7 @@ out_freesbi: kfree(opt.iocharset); kfree(sbi); s->s_fs_info = NULL; + unlock_kernel(); return error; } diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 662bba09950..58dd9cf0620 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -146,14 +146,19 @@ static const struct super_operations jffs2_super_operations = static int jffs2_fill_super(struct super_block *sb, void *data, int silent) { struct jffs2_sb_info *c; + int ret; + + lock_kernel(); D1(printk(KERN_DEBUG "jffs2_get_sb_mtd():" " New superblock for device %d (\"%s\")\n", sb->s_mtd->index, sb->s_mtd->name)); c = kzalloc(sizeof(*c), GFP_KERNEL); - if (!c) + if (!c) { + unlock_kernel(); return -ENOMEM; + } c->mtd = sb->s_mtd; c->os_priv = sb; @@ -175,7 +180,9 @@ static int jffs2_fill_super(struct super_block *sb, void *data, int silent) #ifdef CONFIG_JFFS2_FS_POSIX_ACL sb->s_flags |= MS_POSIXACL; #endif - return jffs2_do_fill_super(sb, data, silent); + ret = jffs2_do_fill_super(sb, data, silent); + unlock_kernel(); + return ret; } static int jffs2_get_sb(struct file_system_type *fs_type, diff --git a/fs/jfs/super.c b/fs/jfs/super.c index ec8c3e4baca..eb31f677347 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -438,14 +438,20 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) s64 newLVSize = 0; int flag, ret = -EINVAL; + lock_kernel(); + jfs_info("In jfs_read_super: s_flags=0x%lx", sb->s_flags); - if (!new_valid_dev(sb->s_bdev->bd_dev)) + if (!new_valid_dev(sb->s_bdev->bd_dev)) { + unlock_kernel(); return -EOVERFLOW; + } sbi = kzalloc(sizeof (struct jfs_sb_info), GFP_KERNEL); - if (!sbi) + if (!sbi) { + unlock_kernel(); return -ENOMEM; + } sb->s_fs_info = sbi; sbi->sb = sb; sbi->uid = sbi->gid = sbi->umask = -1; @@ -542,6 +548,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_maxbytes = min(((u64) PAGE_CACHE_SIZE << 32) - 1, (u64)sb->s_maxbytes); #endif sb->s_time_gran = 1; + unlock_kernel(); return 0; out_no_root: @@ -564,6 +571,7 @@ out_unload: unload_nls(sbi->nls_tab); out_kfree: kfree(sbi); + unlock_kernel(); return ret; } diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 922263393c7..0d573c2a686 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -1113,9 +1113,12 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags, if (!(flags & MS_RDONLY)) mode |= FMODE_WRITE; + lock_kernel(); sd.bdev = open_bdev_exclusive(dev_name, mode, fs_type); - if (IS_ERR(sd.bdev)) + if (IS_ERR(sd.bdev)) { + unlock_kernel(); return PTR_ERR(sd.bdev); + } /* * To get mount instance using sget() vfs-routine, NILFS needs @@ -1198,6 +1201,7 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags, if (need_to_close) close_bdev_exclusive(sd.bdev, mode); simple_set_mnt(mnt, s); + unlock_kernel(); return 0; failed_unlock: @@ -1206,6 +1210,7 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags, failed: close_bdev_exclusive(sd.bdev, mode); + unlock_kernel(); return err; cancel_new: @@ -1218,6 +1223,7 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags, * We must finish all post-cleaning before this call; * put_nilfs() needs the block device. */ + unlock_kernel(); return err; } diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 512806171bf..1f31e77fc41 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -2732,6 +2732,8 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent) struct inode *tmp_ino; int blocksize, result; + lock_kernel(); + /* * We do a pretty difficult piece of bootstrap by reading the * MFT (and other metadata) from disk into memory. We'll only @@ -2755,6 +2757,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent) ntfs_error(sb, "Allocation of NTFS volume structure " "failed. Aborting mount..."); lockdep_on(); + unlock_kernel(); return -ENOMEM; } /* Initialize ntfs_volume structure. */ @@ -2942,6 +2945,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent) sb->s_export_op = &ntfs_export_ops; lock_kernel(); lockdep_on(); + unlock_kernel(); return 0; } ntfs_error(sb, "Failed to allocate root directory."); @@ -3062,6 +3066,7 @@ err_out_now: kfree(vol); ntfs_debug("Failed, returning -EINVAL."); lockdep_on(); + unlock_kernel(); return -EINVAL; } diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index c2903b84bb7..667d7ceba8c 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c @@ -44,6 +44,7 @@ #include #include #include +#include #include @@ -588,21 +589,27 @@ static int dlmfs_fill_super(struct super_block * sb, struct inode * inode; struct dentry * root; + lock_kernel(); + sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_blocksize = PAGE_CACHE_SIZE; sb->s_blocksize_bits = PAGE_CACHE_SHIFT; sb->s_magic = DLMFS_MAGIC; sb->s_op = &dlmfs_ops; inode = dlmfs_get_root_inode(sb); - if (!inode) + if (!inode) { + unlock_kernel(); return -ENOMEM; + } root = d_alloc_root(inode); if (!root) { iput(inode); + unlock_kernel(); return -ENOMEM; } sb->s_root = root; + unlock_kernel(); return 0; } diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index fa1be1b304d..b7e4f2d19d4 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1002,6 +1002,8 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) char nodestr[8]; struct ocfs2_blockcheck_stats stats; + lock_kernel(); + mlog_entry("%p, %p, %i", sb, data, silent); if (!ocfs2_parse_options(sb, data, &parsed_options, 0)) { @@ -1179,6 +1181,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) atomic_set(&osb->vol_state, VOLUME_DISABLED); wake_up(&osb->osb_mount_event); mlog_exit(status); + unlock_kernel(); return status; } } @@ -1193,6 +1196,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) ocfs2_orphan_scan_start(osb); mlog_exit(status); + unlock_kernel(); return status; read_super_error: @@ -1208,6 +1212,7 @@ read_super_error: } mlog_exit(status); + unlock_kernel(); return status; } diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 16829722be9..86a7be1399a 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -234,9 +234,13 @@ static int qnx4_fill_super(struct super_block *s, void *data, int silent) struct qnx4_sb_info *qs; int ret = -EINVAL; + lock_kernel(); + qs = kzalloc(sizeof(struct qnx4_sb_info), GFP_KERNEL); - if (!qs) + if (!qs) { + unlock_kernel(); return -ENOMEM; + } s->s_fs_info = qs; sb_set_blocksize(s, QNX4_BLOCK_SIZE); @@ -284,6 +288,7 @@ static int qnx4_fill_super(struct super_block *s, void *data, int silent) brelse(bh); + unlock_kernel(); return 0; outi: @@ -293,6 +298,7 @@ static int qnx4_fill_super(struct super_block *s, void *data, int silent) outnobh: kfree(qs); s->s_fs_info = NULL; + unlock_kernel(); return ret; } diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c index 450c9194198..8fc5e50e142 100644 --- a/fs/smbfs/inode.c +++ b/fs/smbfs/inode.c @@ -501,6 +501,8 @@ static int smb_fill_super(struct super_block *sb, void *raw_data, int silent) void *mem; static int warn_count; + lock_kernel(); + if (warn_count < 5) { warn_count++; printk(KERN_EMERG "smbfs is deprecated and will be removed" @@ -621,6 +623,7 @@ static int smb_fill_super(struct super_block *sb, void *raw_data, int silent) smb_new_dentry(sb->s_root); + unlock_kernel(); return 0; out_no_root: @@ -643,9 +646,11 @@ out_wrong_data: out_no_data: printk(KERN_ERR "smb_fill_super: missing data argument\n"); out_fail: + unlock_kernel(); return -EINVAL; out_no_server: printk(KERN_ERR "smb_fill_super: cannot allocate struct smb_sb_info\n"); + unlock_kernel(); return -ENOMEM; } diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 88b4f860665..ab1a401a0e1 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -87,11 +87,14 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) u64 lookup_table_start, xattr_id_table_start; int err; + lock_kernel(); + TRACE("Entered squashfs_fill_superblock\n"); sb->s_fs_info = kzalloc(sizeof(*msblk), GFP_KERNEL); if (sb->s_fs_info == NULL) { ERROR("Failed to allocate squashfs_sb_info\n"); + unlock_kernel(); return -ENOMEM; } msblk = sb->s_fs_info; @@ -301,6 +304,7 @@ allocate_root: TRACE("Leaving squashfs_fill_super\n"); kfree(sblk); + unlock_kernel(); return 0; failed_mount: @@ -315,11 +319,13 @@ failed_mount: kfree(sb->s_fs_info); sb->s_fs_info = NULL; kfree(sblk); + unlock_kernel(); return err; failure: kfree(sb->s_fs_info); sb->s_fs_info = NULL; + unlock_kernel(); return -ENOMEM; } diff --git a/fs/udf/super.c b/fs/udf/super.c index 65412d84a45..76f3d6d97b4 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -1880,6 +1880,8 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) struct kernel_lb_addr rootdir, fileset; struct udf_sb_info *sbi; + lock_kernel(); + uopt.flags = (1 << UDF_FLAG_USE_AD_IN_ICB) | (1 << UDF_FLAG_STRICT); uopt.uid = -1; uopt.gid = -1; @@ -1888,8 +1890,10 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) uopt.dmode = UDF_INVALID_MODE; sbi = kzalloc(sizeof(struct udf_sb_info), GFP_KERNEL); - if (!sbi) + if (!sbi) { + unlock_kernel(); return -ENOMEM; + } sb->s_fs_info = sbi; @@ -2035,6 +2039,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) goto error_out; } sb->s_maxbytes = MAX_LFS_FILESIZE; + unlock_kernel(); return 0; error_out: @@ -2055,6 +2060,7 @@ error_out: kfree(sbi); sb->s_fs_info = NULL; + unlock_kernel(); return -EINVAL; } diff --git a/fs/ufs/super.c b/fs/ufs/super.c index d510c1b9181..6b9be90dae7 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -696,6 +696,8 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) unsigned maxsymlen; int ret = -EINVAL; + lock_kernel(); + uspi = NULL; ubh = NULL; flags = 0; @@ -1163,6 +1165,7 @@ magic_found: goto failed; UFSD("EXIT\n"); + unlock_kernel(); return 0; dalloc_failed: @@ -1174,10 +1177,12 @@ failed: kfree(sbi); sb->s_fs_info = NULL; UFSD("EXIT (FAILED)\n"); + unlock_kernel(); return ret; failed_nomem: UFSD("EXIT (NOMEM)\n"); + unlock_kernel(); return -ENOMEM; } diff --git a/kernel/cgroup.c b/kernel/cgroup.c index c9483d8f614..a7ba3bccadc 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1430,6 +1430,8 @@ static int cgroup_get_sb(struct file_system_type *fs_type, struct super_block *sb; struct cgroupfs_root *new_root; + lock_kernel(); + /* First find the desired set of subsystems */ mutex_lock(&cgroup_mutex); ret = parse_cgroupfs_options(data, &opts); @@ -1559,6 +1561,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type, simple_set_mnt(mnt, sb); kfree(opts.release_agent); kfree(opts.name); + unlock_kernel(); return 0; drop_new_super: @@ -1568,6 +1571,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type, out_err: kfree(opts.release_agent); kfree(opts.name); + unlock_kernel(); return ret; } -- cgit v1.2.3-70-g09d2 From 38d018dba3f725b969f196550d92a6ec1c092428 Mon Sep 17 00:00:00 2001 From: Jan Blunck Date: Wed, 24 Feb 2010 13:25:34 +0100 Subject: BKL: Remove BKL from cgroup The BKL is only used in remount_fs and get_sb that are both protected by the superblocks s_umount rw_semaphore. Therefore it is safe to remove the BKL entirely. Signed-off-by: Jan Blunck Signed-off-by: Arnd Bergmann --- kernel/cgroup.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'kernel/cgroup.c') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index a7ba3bccadc..304d2775994 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -52,7 +52,6 @@ #include #include #include -#include #include #include #include /* TODO: replace with more sophisticated array */ @@ -1222,7 +1221,6 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) struct cgroup *cgrp = &root->top_cgroup; struct cgroup_sb_opts opts; - lock_kernel(); mutex_lock(&cgrp->dentry->d_inode->i_mutex); mutex_lock(&cgroup_mutex); @@ -1255,7 +1253,6 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) kfree(opts.name); mutex_unlock(&cgroup_mutex); mutex_unlock(&cgrp->dentry->d_inode->i_mutex); - unlock_kernel(); return ret; } @@ -1430,8 +1427,6 @@ static int cgroup_get_sb(struct file_system_type *fs_type, struct super_block *sb; struct cgroupfs_root *new_root; - lock_kernel(); - /* First find the desired set of subsystems */ mutex_lock(&cgroup_mutex); ret = parse_cgroupfs_options(data, &opts); @@ -1561,7 +1556,6 @@ static int cgroup_get_sb(struct file_system_type *fs_type, simple_set_mnt(mnt, sb); kfree(opts.release_agent); kfree(opts.name); - unlock_kernel(); return 0; drop_new_super: @@ -1571,8 +1565,6 @@ static int cgroup_get_sb(struct file_system_type *fs_type, out_err: kfree(opts.release_agent); kfree(opts.name); - unlock_kernel(); - return ret; } -- cgit v1.2.3-70-g09d2 From 85fe4025c616a7c0ed07bc2fc8c5371b07f3888c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 23 Oct 2010 11:19:54 -0400 Subject: fs: do not assign default i_ino in new_inode Instead of always assigning an increasing inode number in new_inode move the call to assign it into those callers that actually need it. For now callers that need it is estimated conservatively, that is the call is added to all filesystems that do not assign an i_ino by themselves. For a few more filesystems we can avoid assigning any inode number given that they aren't user visible, and for others it could be done lazily when an inode number is actually needed, but that's left for later patches. Signed-off-by: Christoph Hellwig Signed-off-by: Dave Chinner Signed-off-by: Al Viro --- drivers/infiniband/hw/ipath/ipath_fs.c | 1 + drivers/infiniband/hw/qib/qib_fs.c | 1 + drivers/misc/ibmasm/ibmasmfs.c | 1 + drivers/oprofile/oprofilefs.c | 1 + drivers/usb/core/inode.c | 1 + drivers/usb/gadget/f_fs.c | 1 + drivers/usb/gadget/inode.c | 1 + fs/anon_inodes.c | 1 + fs/autofs4/inode.c | 1 + fs/binfmt_misc.c | 1 + fs/configfs/inode.c | 1 + fs/debugfs/inode.c | 1 + fs/ext4/mballoc.c | 1 + fs/freevxfs/vxfs_inode.c | 1 + fs/fuse/control.c | 1 + fs/hugetlbfs/inode.c | 1 + fs/inode.c | 4 ++-- fs/ocfs2/dlmfs/dlmfs.c | 2 ++ fs/pipe.c | 2 ++ fs/proc/base.c | 2 ++ fs/proc/proc_sysctl.c | 2 ++ fs/ramfs/inode.c | 1 + fs/xfs/linux-2.6/xfs_buf.c | 1 + include/linux/fs.h | 1 + ipc/mqueue.c | 1 + kernel/cgroup.c | 1 + mm/shmem.c | 1 + net/socket.c | 1 + net/sunrpc/rpc_pipe.c | 1 + security/inode.c | 1 + security/selinux/selinuxfs.c | 1 + 31 files changed, 36 insertions(+), 2 deletions(-) (limited to 'kernel/cgroup.c') diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index d13e72685dc..12d5bf76302 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -57,6 +57,7 @@ static int ipathfs_mknod(struct inode *dir, struct dentry *dentry, goto bail; } + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; inode->i_private = data; diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index a0e6613e8be..7e433d75c77 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -58,6 +58,7 @@ static int qibfs_mknod(struct inode *dir, struct dentry *dentry, goto bail; } + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_uid = 0; inode->i_gid = 0; diff --git a/drivers/misc/ibmasm/ibmasmfs.c b/drivers/misc/ibmasm/ibmasmfs.c index af2497ae5fe..0a53500636c 100644 --- a/drivers/misc/ibmasm/ibmasmfs.c +++ b/drivers/misc/ibmasm/ibmasmfs.c @@ -146,6 +146,7 @@ static struct inode *ibmasmfs_make_inode(struct super_block *sb, int mode) struct inode *ret = new_inode(sb); if (ret) { + ret->i_ino = get_next_ino(); ret->i_mode = mode; ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME; } diff --git a/drivers/oprofile/oprofilefs.c b/drivers/oprofile/oprofilefs.c index 95f711b251a..449de59bf35 100644 --- a/drivers/oprofile/oprofilefs.c +++ b/drivers/oprofile/oprofilefs.c @@ -28,6 +28,7 @@ static struct inode *oprofilefs_get_inode(struct super_block *sb, int mode) struct inode *inode = new_inode(sb); if (inode) { + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; } diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c index 095fa536669..e2f63c0ea09 100644 --- a/drivers/usb/core/inode.c +++ b/drivers/usb/core/inode.c @@ -276,6 +276,7 @@ static struct inode *usbfs_get_inode (struct super_block *sb, int mode, dev_t de struct inode *inode = new_inode(sb); if (inode) { + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); diff --git a/drivers/usb/gadget/f_fs.c b/drivers/usb/gadget/f_fs.c index e4f59505520..e093fd8d04d 100644 --- a/drivers/usb/gadget/f_fs.c +++ b/drivers/usb/gadget/f_fs.c @@ -980,6 +980,7 @@ ffs_sb_make_inode(struct super_block *sb, void *data, if (likely(inode)) { struct timespec current_time = CURRENT_TIME; + inode->i_ino = usbfs_get_inode(); inode->i_mode = perms->mode; inode->i_uid = perms->uid; inode->i_gid = perms->gid; diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c index d1d72d946b0..ba145e7fbe0 100644 --- a/drivers/usb/gadget/inode.c +++ b/drivers/usb/gadget/inode.c @@ -1991,6 +1991,7 @@ gadgetfs_make_inode (struct super_block *sb, struct inode *inode = new_inode (sb); if (inode) { + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_uid = default_uid; inode->i_gid = default_gid; diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 9c8e87b0361..5365527ca43 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -193,6 +193,7 @@ static struct inode *anon_inode_mkinode(void) if (!inode) return ERR_PTR(-ENOMEM); + inode->i_ino = get_next_ino(); inode->i_fop = &anon_inode_fops; inode->i_mapping->a_ops = &anon_aops; diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 821b2b955da..ac87e49fa70 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -398,6 +398,7 @@ struct inode *autofs4_get_inode(struct super_block *sb, inode->i_gid = sb->s_root->d_inode->i_gid; } inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + inode->i_ino = get_next_ino(); if (S_ISDIR(inf->mode)) { inode->i_nlink = 2; diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 139fc8083f5..29990f0eee0 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -495,6 +495,7 @@ static struct inode *bm_get_inode(struct super_block *sb, int mode) struct inode * inode = new_inode(sb); if (inode) { + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_atime = inode->i_mtime = inode->i_ctime = current_fs_time(inode->i_sb); diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index cf78d44a8d6..253476d78ed 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c @@ -135,6 +135,7 @@ struct inode * configfs_new_inode(mode_t mode, struct configfs_dirent * sd) { struct inode * inode = new_inode(configfs_sb); if (inode) { + inode->i_ino = get_next_ino(); inode->i_mapping->a_ops = &configfs_aops; inode->i_mapping->backing_dev_info = &configfs_backing_dev_info; inode->i_op = &configfs_inode_operations; diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 30a87b3dbca..a4ed8380e98 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -40,6 +40,7 @@ static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t d struct inode *inode = new_inode(sb); if (inode) { + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; switch (mode & S_IFMT) { diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 19aa0d44d82..42f77b1dc72 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2373,6 +2373,7 @@ static int ext4_mb_init_backend(struct super_block *sb) printk(KERN_ERR "EXT4-fs: can't get new inode\n"); goto err_freesgi; } + sbi->s_buddy_cache->i_ino = get_next_ino(); EXT4_I(sbi->s_buddy_cache)->i_disksize = 0; for (i = 0; i < ngroups; i++) { desc = ext4_get_group_desc(sb, i, NULL); diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c index 79d1b4ea13e..8c04eac5079 100644 --- a/fs/freevxfs/vxfs_inode.c +++ b/fs/freevxfs/vxfs_inode.c @@ -260,6 +260,7 @@ vxfs_get_fake_inode(struct super_block *sbp, struct vxfs_inode_info *vip) struct inode *ip = NULL; if ((ip = new_inode(sbp))) { + ip->i_ino = get_next_ino(); vxfs_iinit(ip, vip); ip->i_mapping->a_ops = &vxfs_aops; } diff --git a/fs/fuse/control.c b/fs/fuse/control.c index 7367e177186..4eba07661e5 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c @@ -222,6 +222,7 @@ static struct dentry *fuse_ctl_add_dentry(struct dentry *parent, if (!inode) return NULL; + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_uid = fc->user_id; inode->i_gid = fc->group_id; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 113eba3d3c3..8d0607b3726 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -455,6 +455,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid, inode = new_inode(sb); if (inode) { struct hugetlbfs_inode_info *info; + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_uid = uid; inode->i_gid = gid; diff --git a/fs/inode.c b/fs/inode.c index 46a3e120b19..2cd2e48f7a2 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -735,7 +735,7 @@ repeat: #define LAST_INO_BATCH 1024 static DEFINE_PER_CPU(unsigned int, last_ino); -static unsigned int get_next_ino(void) +unsigned int get_next_ino(void) { unsigned int *p = &get_cpu_var(last_ino); unsigned int res = *p; @@ -753,6 +753,7 @@ static unsigned int get_next_ino(void) put_cpu_var(last_ino); return res; } +EXPORT_SYMBOL(get_next_ino); /** * new_inode - obtain an inode @@ -776,7 +777,6 @@ struct inode *new_inode(struct super_block *sb) if (inode) { spin_lock(&inode_lock); __inode_sb_list_add(inode); - inode->i_ino = get_next_ino(); inode->i_state = 0; spin_unlock(&inode_lock); } diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index a7ebd9d42dc..75e115f1bd7 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c @@ -400,6 +400,7 @@ static struct inode *dlmfs_get_root_inode(struct super_block *sb) if (inode) { ip = DLMFS_I(inode); + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); @@ -425,6 +426,7 @@ static struct inode *dlmfs_get_inode(struct inode *parent, if (!inode) return NULL; + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); diff --git a/fs/pipe.c b/fs/pipe.c index 37eb1ebeaa9..d2d7566ce68 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -954,6 +954,8 @@ static struct inode * get_pipe_inode(void) if (!inode) goto fail_inode; + inode->i_ino = get_next_ino(); + pipe = alloc_pipe_info(inode); if (!pipe) goto fail_iput; diff --git a/fs/proc/base.c b/fs/proc/base.c index fb2a5abd4e4..9883f1e1833 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1603,6 +1603,7 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st /* Common stuff */ ei = PROC_I(inode); + inode->i_ino = get_next_ino(); inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; inode->i_op = &proc_def_inode_operations; @@ -2549,6 +2550,7 @@ static struct dentry *proc_base_instantiate(struct inode *dir, /* Initialize the inode */ ei = PROC_I(inode); + inode->i_ino = get_next_ino(); inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; /* diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 2fc52552271..b652cb00906 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -23,6 +23,8 @@ static struct inode *proc_sys_make_inode(struct super_block *sb, if (!inode) goto out; + inode->i_ino = get_next_ino(); + sysctl_head_get(head); ei = PROC_I(inode); ei->sysctl = head; diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index a5ebae70dc6..67fadb1ad2c 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -58,6 +58,7 @@ struct inode *ramfs_get_inode(struct super_block *sb, struct inode * inode = new_inode(sb); if (inode) { + inode->i_ino = get_next_ino(); inode_init_owner(inode, dir, mode); inode->i_mapping->a_ops = &ramfs_aops; inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info; diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index ba5312802aa..63fd2c07cb5 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -1580,6 +1580,7 @@ xfs_mapping_buftarg( XFS_BUFTARG_NAME(btp)); return ENOMEM; } + inode->i_ino = get_next_ino(); inode->i_mode = S_IFBLK; inode->i_bdev = bdev; inode->i_rdev = bdev->bd_dev; diff --git a/include/linux/fs.h b/include/linux/fs.h index bd6ae6c71fc..4a573cf13f5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2191,6 +2191,7 @@ extern struct inode * iget_locked(struct super_block *, unsigned long); extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *); extern int insert_inode_locked(struct inode *); extern void unlock_new_inode(struct inode *); +extern unsigned int get_next_ino(void); extern void __iget(struct inode * inode); extern void iget_failed(struct inode *); diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 80b35ffca25..3a61ffefe88 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -116,6 +116,7 @@ static struct inode *mqueue_get_inode(struct super_block *sb, inode = new_inode(sb); if (inode) { + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 7b69b8d0313..9270d532ec3 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -777,6 +777,7 @@ static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb) struct inode *inode = new_inode(sb); if (inode) { + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); diff --git a/mm/shmem.c b/mm/shmem.c index d4e2852526e..f6d350e8adc 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1586,6 +1586,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode inode = new_inode(sb); if (inode) { + inode->i_ino = get_next_ino(); inode_init_owner(inode, dir, mode); inode->i_blocks = 0; inode->i_mapping->backing_dev_info = &shmem_backing_dev_info; diff --git a/net/socket.c b/net/socket.c index d223725f99e..5cac1c70775 100644 --- a/net/socket.c +++ b/net/socket.c @@ -480,6 +480,7 @@ static struct socket *sock_alloc(void) sock = SOCKET_I(inode); kmemcheck_annotate_bitfield(sock, type); + inode->i_ino = get_next_ino(); inode->i_mode = S_IFSOCK | S_IRWXUGO; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 52f25243214..7df92d237cb 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -445,6 +445,7 @@ rpc_get_inode(struct super_block *sb, umode_t mode) struct inode *inode = new_inode(sb); if (!inode) return NULL; + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; switch(mode & S_IFMT) { diff --git a/security/inode.c b/security/inode.c index 88839866cbc..cb8f47c66a5 100644 --- a/security/inode.c +++ b/security/inode.c @@ -61,6 +61,7 @@ static struct inode *get_inode(struct super_block *sb, int mode, dev_t dev) struct inode *inode = new_inode(sb); if (inode) { + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; switch (mode & S_IFMT) { diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 87e0556bae7..55a755c1a1b 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -978,6 +978,7 @@ static struct inode *sel_make_inode(struct super_block *sb, int mode) struct inode *ret = new_inode(sb); if (ret) { + ret->i_ino = get_next_ino(); ret->i_mode = mode; ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME; } -- cgit v1.2.3-70-g09d2 From 97978e6d1f2da0073416870410459694fbdbfd9b Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 27 Oct 2010 15:33:35 -0700 Subject: cgroup: add clone_children control file The ns_cgroup is a control group interacting with the namespaces. When a new namespace is created, a corresponding cgroup is automatically created too. The cgroup name is the pid of the process who did 'unshare' or the child of 'clone'. This cgroup is tied with the namespace because it prevents a process to escape the control group and use the post_clone callback, so the child cgroup inherits the values of the parent cgroup. Unfortunately, the more we use this cgroup and the more we are facing problems with it: (1) when a process unshares, the cgroup name may conflict with a previous cgroup with the same pid, so unshare or clone return -EEXIST (2) the cgroup creation is out of control because there may have an application creating several namespaces where the system will automatically create several cgroups in his back and let them on the cgroupfs (eg. a vrf based on the network namespace). (3) the mix of (1) and (2) force an administrator to regularly check and clean these cgroups. This patchset removes the ns_cgroup by adding a new flag to the cgroup and the cgroupfs mount option. It enables the copy of the parent cgroup when a child cgroup is created. We can then safely remove the ns_cgroup as this flag brings a compatibility. We have now to manually create and add the task to a cgroup, which is consistent with the cgroup framework. This patch: Sent as an answer to a previous thread around the ns_cgroup. https://lists.linux-foundation.org/pipermail/containers/2009-June/018627.html It adds a control file 'clone_children' for a cgroup. This control file is a boolean specifying if the child cgroup should be a clone of the parent cgroup or not. The default value is 'false'. This flag makes the child cgroup to call the post_clone callback of all the subsystem, if it is available. At present, the cpuset is the only one which had implemented the post_clone callback. The option can be set at mount time by specifying the 'clone_children' mount option. Signed-off-by: Daniel Lezcano Signed-off-by: Serge E. Hallyn Cc: Eric W. Biederman Acked-by: Paul Menage Reviewed-by: Li Zefan Cc: Jamal Hadi Salim Cc: Matt Helsley Acked-by: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/cgroups/cgroups.txt | 14 ++++++++++++-- include/linux/cgroup.h | 4 ++++ kernel/cgroup.c | 39 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 55 insertions(+), 2 deletions(-) (limited to 'kernel/cgroup.c') diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt index b34823ff164..190018b0c64 100644 --- a/Documentation/cgroups/cgroups.txt +++ b/Documentation/cgroups/cgroups.txt @@ -18,7 +18,8 @@ CONTENTS: 1.2 Why are cgroups needed ? 1.3 How are cgroups implemented ? 1.4 What does notify_on_release do ? - 1.5 How do I use cgroups ? + 1.5 What does clone_children do ? + 1.6 How do I use cgroups ? 2. Usage Examples and Syntax 2.1 Basic Usage 2.2 Attaching processes @@ -293,7 +294,16 @@ notify_on_release in the root cgroup at system boot is disabled value of their parents notify_on_release setting. The default value of a cgroup hierarchy's release_agent path is empty. -1.5 How do I use cgroups ? +1.5 What does clone_children do ? +--------------------------------- + +If the clone_children flag is enabled (1) in a cgroup, then all +cgroups created beneath will call the post_clone callbacks for each +subsystem of the newly created cgroup. Usually when this callback is +implemented for a subsystem, it copies the values of the parent +subsystem, this is the case for the cpuset. + +1.6 How do I use cgroups ? -------------------------- To start a new job that is to be contained within a cgroup, using diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 709dfb901d1..ed4ba111bc8 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -154,6 +154,10 @@ enum { * A thread in rmdir() is wating for this cgroup. */ CGRP_WAIT_ON_RMDIR, + /* + * Clone cgroup values when creating a new child cgroup + */ + CGRP_CLONE_CHILDREN, }; /* which pidlist file are we talking about? */ diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 9270d532ec3..4b218a46ddd 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -243,6 +243,11 @@ static int notify_on_release(const struct cgroup *cgrp) return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); } +static int clone_children(const struct cgroup *cgrp) +{ + return test_bit(CGRP_CLONE_CHILDREN, &cgrp->flags); +} + /* * for_each_subsys() allows you to iterate on each subsystem attached to * an active hierarchy @@ -1040,6 +1045,8 @@ static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs) seq_puts(seq, ",noprefix"); if (strlen(root->release_agent_path)) seq_printf(seq, ",release_agent=%s", root->release_agent_path); + if (clone_children(&root->top_cgroup)) + seq_puts(seq, ",clone_children"); if (strlen(root->name)) seq_printf(seq, ",name=%s", root->name); mutex_unlock(&cgroup_mutex); @@ -1050,6 +1057,7 @@ struct cgroup_sb_opts { unsigned long subsys_bits; unsigned long flags; char *release_agent; + bool clone_children; char *name; /* User explicitly requested empty subsystem */ bool none; @@ -1097,6 +1105,8 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) opts->none = true; } else if (!strcmp(token, "noprefix")) { set_bit(ROOT_NOPREFIX, &opts->flags); + } else if (!strcmp(token, "clone_children")) { + opts->clone_children = true; } else if (!strncmp(token, "release_agent=", 14)) { /* Specifying two release agents is forbidden */ if (opts->release_agent) @@ -1355,6 +1365,8 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts) strcpy(root->release_agent_path, opts->release_agent); if (opts->name) strcpy(root->name, opts->name); + if (opts->clone_children) + set_bit(CGRP_CLONE_CHILDREN, &root->top_cgroup.flags); return root; } @@ -3173,6 +3185,23 @@ fail: return ret; } +static u64 cgroup_clone_children_read(struct cgroup *cgrp, + struct cftype *cft) +{ + return clone_children(cgrp); +} + +static int cgroup_clone_children_write(struct cgroup *cgrp, + struct cftype *cft, + u64 val) +{ + if (val) + set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags); + else + clear_bit(CGRP_CLONE_CHILDREN, &cgrp->flags); + return 0; +} + /* * for the common functions, 'private' gives the type of file */ @@ -3203,6 +3232,11 @@ static struct cftype files[] = { .write_string = cgroup_write_event_control, .mode = S_IWUGO, }, + { + .name = "cgroup.clone_children", + .read_u64 = cgroup_clone_children_read, + .write_u64 = cgroup_clone_children_write, + }, }; static struct cftype cft_release_agent = { @@ -3332,6 +3366,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, if (notify_on_release(parent)) set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); + if (clone_children(parent)) + set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags); + for_each_subsys(root, ss) { struct cgroup_subsys_state *css = ss->create(ss, cgrp); @@ -3346,6 +3383,8 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, goto err_destroy; } /* At error, ->destroy() callback has to free assigned ID. */ + if (clone_children(parent) && ss->post_clone) + ss->post_clone(ss, cgrp); } cgroup_lock_hierarchy(root); -- cgit v1.2.3-70-g09d2 From 32a8cf235e2f192eb002755076994525cdbaa35a Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 27 Oct 2010 15:33:37 -0700 Subject: cgroup: make the mount options parsing more accurate Current behavior: ================= (1) When we mount a cgroup, we can specify the 'all' option which means to enable all the cgroup subsystems. This is the default option when no option is specified. (2) If we want to mount a cgroup with a subset of the supported cgroup subsystems, we have to specify a subsystems name list for the mount option. (3) If we specify another option like 'noprefix' or 'release_agent', the actual code wants the 'all' or a subsystem name option specified also. Not critical but a bit not friendly as we should assume (1) in this case. (4) Logically, the 'all' option is mutually exclusive with a subsystem name, but this is not detected. In other words: succeed : mount -t cgroup -o all,freezer cgroup /cgroup => is it 'all' or 'freezer' ? fails : mount -t cgroup -o noprefix cgroup /cgroup => succeed if we do '-o noprefix,all' The following patches consolidate a bit the mount options check. New behavior: ============= (1) untouched (2) untouched (3) the 'all' option will be by default when specifying other than a subsystem name option (4) raises an error In other words: fails : mount -t cgroup -o all,freezer cgroup /cgroup succeed : mount -t cgroup -o noprefix cgroup /cgroup For the sake of lisibility, the if ... then ... else ... if ... indentation when parsing the options has been changed to: if ... then ... continue fi Signed-off-by: Daniel Lezcano Signed-off-by: Serge E. Hallyn Reviewed-by: Li Zefan Reviewed-by: Paul Menage Cc: Eric W. Biederman Cc: Jamal Hadi Salim Cc: Matt Helsley Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cgroup.c | 90 ++++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 60 insertions(+), 30 deletions(-) (limited to 'kernel/cgroup.c') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 4b218a46ddd..3e6517e51fd 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1074,7 +1074,8 @@ struct cgroup_sb_opts { */ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) { - char *token, *o = data ?: "all"; + char *token, *o = data; + bool all_ss = false, one_ss = false; unsigned long mask = (unsigned long)-1; int i; bool module_pin_failed = false; @@ -1090,24 +1091,27 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) while ((token = strsep(&o, ",")) != NULL) { if (!*token) return -EINVAL; - if (!strcmp(token, "all")) { - /* Add all non-disabled subsystems */ - opts->subsys_bits = 0; - for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { - struct cgroup_subsys *ss = subsys[i]; - if (ss == NULL) - continue; - if (!ss->disabled) - opts->subsys_bits |= 1ul << i; - } - } else if (!strcmp(token, "none")) { + if (!strcmp(token, "none")) { /* Explicitly have no subsystems */ opts->none = true; - } else if (!strcmp(token, "noprefix")) { + continue; + } + if (!strcmp(token, "all")) { + /* Mutually exclusive option 'all' + subsystem name */ + if (one_ss) + return -EINVAL; + all_ss = true; + continue; + } + if (!strcmp(token, "noprefix")) { set_bit(ROOT_NOPREFIX, &opts->flags); - } else if (!strcmp(token, "clone_children")) { + continue; + } + if (!strcmp(token, "clone_children")) { opts->clone_children = true; - } else if (!strncmp(token, "release_agent=", 14)) { + continue; + } + if (!strncmp(token, "release_agent=", 14)) { /* Specifying two release agents is forbidden */ if (opts->release_agent) return -EINVAL; @@ -1115,7 +1119,9 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) kstrndup(token + 14, PATH_MAX - 1, GFP_KERNEL); if (!opts->release_agent) return -ENOMEM; - } else if (!strncmp(token, "name=", 5)) { + continue; + } + if (!strncmp(token, "name=", 5)) { const char *name = token + 5; /* Can't specify an empty name */ if (!strlen(name)) @@ -1137,20 +1143,44 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) GFP_KERNEL); if (!opts->name) return -ENOMEM; - } else { - struct cgroup_subsys *ss; - for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { - ss = subsys[i]; - if (ss == NULL) - continue; - if (!strcmp(token, ss->name)) { - if (!ss->disabled) - set_bit(i, &opts->subsys_bits); - break; - } - } - if (i == CGROUP_SUBSYS_COUNT) - return -ENOENT; + + continue; + } + + for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { + struct cgroup_subsys *ss = subsys[i]; + if (ss == NULL) + continue; + if (strcmp(token, ss->name)) + continue; + if (ss->disabled) + continue; + + /* Mutually exclusive option 'all' + subsystem name */ + if (all_ss) + return -EINVAL; + set_bit(i, &opts->subsys_bits); + one_ss = true; + + break; + } + if (i == CGROUP_SUBSYS_COUNT) + return -ENOENT; + } + + /* + * If the 'all' option was specified select all the subsystems, + * otherwise 'all, 'none' and a subsystem name options were not + * specified, let's default to 'all' + */ + if (all_ss || (!all_ss && !one_ss && !opts->none)) { + for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { + struct cgroup_subsys *ss = subsys[i]; + if (ss == NULL) + continue; + if (ss->disabled) + continue; + set_bit(i, &opts->subsys_bits); } } -- cgit v1.2.3-70-g09d2 From f4a2589feaef0a9b737a3e582b37ee96695bb25f Mon Sep 17 00:00:00 2001 From: Evgeny Kuznetsov Date: Wed, 27 Oct 2010 15:33:37 -0700 Subject: cgroups: add check for strcpy destination string overflow Function "strcpy" is used without check for maximum allowed source string length and could cause destination string overflow. Check for string length is added before using "strcpy". Function now is return error if source string length is more than a maximum. akpm: presently considered NotABug, but add the check for general future-safeness and robustness. Signed-off-by: Evgeny Kuznetsov Acked-by: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cgroup.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/cgroup.c') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 3e6517e51fd..5cf366965d0 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1922,6 +1922,8 @@ static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft, const char *buffer) { BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX); + if (strlen(buffer) >= PATH_MAX) + return -EINVAL; if (!cgroup_lock_live_group(cgrp)) return -ENODEV; strcpy(cgrp->root->release_agent_path, buffer); -- cgit v1.2.3-70-g09d2 From f7e835710ab5f6e43933c983f38f2d2e262b718c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 26 Jul 2010 13:23:11 +0400 Subject: convert cgroup and cpuset Signed-off-by: Al Viro --- kernel/cgroup.c | 11 +++++------ kernel/cpuset.c | 13 ++++++------- 2 files changed, 11 insertions(+), 13 deletions(-) (limited to 'kernel/cgroup.c') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 5cf366965d0..66a416b42c1 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1460,9 +1460,9 @@ static int cgroup_get_rootdir(struct super_block *sb) return 0; } -static int cgroup_get_sb(struct file_system_type *fs_type, +static struct dentry *cgroup_mount(struct file_system_type *fs_type, int flags, const char *unused_dev_name, - void *data, struct vfsmount *mnt) + void *data) { struct cgroup_sb_opts opts; struct cgroupfs_root *root; @@ -1596,10 +1596,9 @@ static int cgroup_get_sb(struct file_system_type *fs_type, drop_parsed_module_refcounts(opts.subsys_bits); } - simple_set_mnt(mnt, sb); kfree(opts.release_agent); kfree(opts.name); - return 0; + return dget(sb->s_root); drop_new_super: deactivate_locked_super(sb); @@ -1608,7 +1607,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type, out_err: kfree(opts.release_agent); kfree(opts.name); - return ret; + return ERR_PTR(ret); } static void cgroup_kill_sb(struct super_block *sb) { @@ -1658,7 +1657,7 @@ static void cgroup_kill_sb(struct super_block *sb) { static struct file_system_type cgroup_fs_type = { .name = "cgroup", - .get_sb = cgroup_get_sb, + .mount = cgroup_mount, .kill_sb = cgroup_kill_sb, }; diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 51b143e2a07..4349935c2ad 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -231,18 +231,17 @@ static DEFINE_SPINLOCK(cpuset_buffer_lock); * users. If someone tries to mount the "cpuset" filesystem, we * silently switch it to mount "cgroup" instead */ -static int cpuset_get_sb(struct file_system_type *fs_type, - int flags, const char *unused_dev_name, - void *data, struct vfsmount *mnt) +static struct dentry *cpuset_mount(struct file_system_type *fs_type, + int flags, const char *unused_dev_name, void *data) { struct file_system_type *cgroup_fs = get_fs_type("cgroup"); - int ret = -ENODEV; + struct dentry *ret = ERR_PTR(-ENODEV); if (cgroup_fs) { char mountopts[] = "cpuset,noprefix," "release_agent=/sbin/cpuset_release_agent"; - ret = cgroup_fs->get_sb(cgroup_fs, flags, - unused_dev_name, mountopts, mnt); + ret = cgroup_fs->mount(cgroup_fs, flags, + unused_dev_name, mountopts); put_filesystem(cgroup_fs); } return ret; @@ -250,7 +249,7 @@ static int cpuset_get_sb(struct file_system_type *fs_type, static struct file_system_type cpuset_fs_type = { .name = "cpuset", - .get_sb = cpuset_get_sb, + .mount = cpuset_mount, }; /* -- cgit v1.2.3-70-g09d2