diff options
Diffstat (limited to 'kernel/cgroup.c')
-rw-r--r-- | kernel/cgroup.c | 202 |
1 files changed, 146 insertions, 56 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index c9483d8f614..b24d7027b83 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -52,7 +52,6 @@ #include <linux/cgroupstats.h> #include <linux/hash.h> #include <linux/namei.h> -#include <linux/smp_lock.h> #include <linux/pid_namespace.h> #include <linux/idr.h> #include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */ @@ -138,7 +137,7 @@ struct css_id { * is called after synchronize_rcu(). But for safe use, css_is_removed() * css_tryget() should be used for avoiding race. */ - struct cgroup_subsys_state *css; + struct cgroup_subsys_state __rcu *css; /* * ID of this css. */ @@ -244,6 +243,11 @@ static int notify_on_release(const struct cgroup *cgrp) return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); } +static int clone_children(const struct cgroup *cgrp) +{ + return test_bit(CGRP_CLONE_CHILDREN, &cgrp->flags); +} + /* * for_each_subsys() allows you to iterate on each subsystem attached to * an active hierarchy @@ -760,6 +764,7 @@ EXPORT_SYMBOL_GPL(cgroup_unlock); */ static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode); +static struct dentry *cgroup_lookup(struct inode *, struct dentry *, struct nameidata *); static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry); static int cgroup_populate_dir(struct cgroup *cgrp); static const struct inode_operations cgroup_dir_inode_operations; @@ -778,6 +783,7 @@ static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb) struct inode *inode = new_inode(sb); if (inode) { + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); @@ -855,6 +861,11 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode) iput(inode); } +static int cgroup_delete(const struct dentry *d) +{ + return 1; +} + static void remove_dir(struct dentry *d) { struct dentry *parent = dget(d->d_parent); @@ -869,25 +880,29 @@ static void cgroup_clear_directory(struct dentry *dentry) struct list_head *node; BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); - spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); node = dentry->d_subdirs.next; while (node != &dentry->d_subdirs) { struct dentry *d = list_entry(node, struct dentry, d_u.d_child); + + spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED); list_del_init(node); if (d->d_inode) { /* This should never be called on a cgroup * directory with child cgroups */ BUG_ON(d->d_inode->i_mode & S_IFDIR); - d = dget_locked(d); - spin_unlock(&dcache_lock); + dget_dlock(d); + spin_unlock(&d->d_lock); + spin_unlock(&dentry->d_lock); d_delete(d); simple_unlink(dentry->d_inode, d); dput(d); - spin_lock(&dcache_lock); - } + spin_lock(&dentry->d_lock); + } else + spin_unlock(&d->d_lock); node = dentry->d_subdirs.next; } - spin_unlock(&dcache_lock); + spin_unlock(&dentry->d_lock); } /* @@ -895,11 +910,16 @@ static void cgroup_clear_directory(struct dentry *dentry) */ static void cgroup_d_remove_dir(struct dentry *dentry) { + struct dentry *parent; + cgroup_clear_directory(dentry); - spin_lock(&dcache_lock); + parent = dentry->d_parent; + spin_lock(&parent->d_lock); + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); list_del_init(&dentry->d_u.d_child); - spin_unlock(&dcache_lock); + spin_unlock(&dentry->d_lock); + spin_unlock(&parent->d_lock); remove_dir(dentry); } @@ -1040,6 +1060,8 @@ static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs) seq_puts(seq, ",noprefix"); if (strlen(root->release_agent_path)) seq_printf(seq, ",release_agent=%s", root->release_agent_path); + if (clone_children(&root->top_cgroup)) + seq_puts(seq, ",clone_children"); if (strlen(root->name)) seq_printf(seq, ",name=%s", root->name); mutex_unlock(&cgroup_mutex); @@ -1050,6 +1072,7 @@ struct cgroup_sb_opts { unsigned long subsys_bits; unsigned long flags; char *release_agent; + bool clone_children; char *name; /* User explicitly requested empty subsystem */ bool none; @@ -1066,7 +1089,8 @@ struct cgroup_sb_opts { */ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) { - char *token, *o = data ?: "all"; + char *token, *o = data; + bool all_ss = false, one_ss = false; unsigned long mask = (unsigned long)-1; int i; bool module_pin_failed = false; @@ -1082,22 +1106,27 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) while ((token = strsep(&o, ",")) != NULL) { if (!*token) return -EINVAL; - if (!strcmp(token, "all")) { - /* Add all non-disabled subsystems */ - opts->subsys_bits = 0; - for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { - struct cgroup_subsys *ss = subsys[i]; - if (ss == NULL) - continue; - if (!ss->disabled) - opts->subsys_bits |= 1ul << i; - } - } else if (!strcmp(token, "none")) { + if (!strcmp(token, "none")) { /* Explicitly have no subsystems */ opts->none = true; - } else if (!strcmp(token, "noprefix")) { + continue; + } + if (!strcmp(token, "all")) { + /* Mutually exclusive option 'all' + subsystem name */ + if (one_ss) + return -EINVAL; + all_ss = true; + continue; + } + if (!strcmp(token, "noprefix")) { set_bit(ROOT_NOPREFIX, &opts->flags); - } else if (!strncmp(token, "release_agent=", 14)) { + continue; + } + if (!strcmp(token, "clone_children")) { + opts->clone_children = true; + continue; + } + if (!strncmp(token, "release_agent=", 14)) { /* Specifying two release agents is forbidden */ if (opts->release_agent) return -EINVAL; @@ -1105,7 +1134,9 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) kstrndup(token + 14, PATH_MAX - 1, GFP_KERNEL); if (!opts->release_agent) return -ENOMEM; - } else if (!strncmp(token, "name=", 5)) { + continue; + } + if (!strncmp(token, "name=", 5)) { const char *name = token + 5; /* Can't specify an empty name */ if (!strlen(name)) @@ -1127,20 +1158,44 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) GFP_KERNEL); if (!opts->name) return -ENOMEM; - } else { - struct cgroup_subsys *ss; - for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { - ss = subsys[i]; - if (ss == NULL) - continue; - if (!strcmp(token, ss->name)) { - if (!ss->disabled) - set_bit(i, &opts->subsys_bits); - break; - } - } - if (i == CGROUP_SUBSYS_COUNT) - return -ENOENT; + + continue; + } + + for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { + struct cgroup_subsys *ss = subsys[i]; + if (ss == NULL) + continue; + if (strcmp(token, ss->name)) + continue; + if (ss->disabled) + continue; + + /* Mutually exclusive option 'all' + subsystem name */ + if (all_ss) + return -EINVAL; + set_bit(i, &opts->subsys_bits); + one_ss = true; + + break; + } + if (i == CGROUP_SUBSYS_COUNT) + return -ENOENT; + } + + /* + * If the 'all' option was specified select all the subsystems, + * otherwise 'all, 'none' and a subsystem name options were not + * specified, let's default to 'all' + */ + if (all_ss || (!all_ss && !one_ss && !opts->none)) { + for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { + struct cgroup_subsys *ss = subsys[i]; + if (ss == NULL) + continue; + if (ss->disabled) + continue; + set_bit(i, &opts->subsys_bits); } } @@ -1222,7 +1277,6 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) struct cgroup *cgrp = &root->top_cgroup; struct cgroup_sb_opts opts; - lock_kernel(); mutex_lock(&cgrp->dentry->d_inode->i_mutex); mutex_lock(&cgroup_mutex); @@ -1255,7 +1309,6 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) kfree(opts.name); mutex_unlock(&cgroup_mutex); mutex_unlock(&cgrp->dentry->d_inode->i_mutex); - unlock_kernel(); return ret; } @@ -1357,6 +1410,8 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts) strcpy(root->release_agent_path, opts->release_agent); if (opts->name) strcpy(root->name, opts->name); + if (opts->clone_children) + set_bit(CGRP_CLONE_CHILDREN, &root->top_cgroup.flags); return root; } @@ -1400,6 +1455,11 @@ static int cgroup_set_super(struct super_block *sb, void *data) static int cgroup_get_rootdir(struct super_block *sb) { + static const struct dentry_operations cgroup_dops = { + .d_iput = cgroup_diput, + .d_delete = cgroup_delete, + }; + struct inode *inode = cgroup_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR, sb); struct dentry *dentry; @@ -1417,12 +1477,14 @@ static int cgroup_get_rootdir(struct super_block *sb) return -ENOMEM; } sb->s_root = dentry; + /* for everything else we want ->d_op set */ + sb->s_d_op = &cgroup_dops; return 0; } -static int cgroup_get_sb(struct file_system_type *fs_type, +static struct dentry *cgroup_mount(struct file_system_type *fs_type, int flags, const char *unused_dev_name, - void *data, struct vfsmount *mnt) + void *data) { struct cgroup_sb_opts opts; struct cgroupfs_root *root; @@ -1556,10 +1618,9 @@ static int cgroup_get_sb(struct file_system_type *fs_type, drop_parsed_module_refcounts(opts.subsys_bits); } - simple_set_mnt(mnt, sb); kfree(opts.release_agent); kfree(opts.name); - return 0; + return dget(sb->s_root); drop_new_super: deactivate_locked_super(sb); @@ -1568,8 +1629,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type, out_err: kfree(opts.release_agent); kfree(opts.name); - - return ret; + return ERR_PTR(ret); } static void cgroup_kill_sb(struct super_block *sb) { @@ -1619,7 +1679,7 @@ static void cgroup_kill_sb(struct super_block *sb) { static struct file_system_type cgroup_fs_type = { .name = "cgroup", - .get_sb = cgroup_get_sb, + .mount = cgroup_mount, .kill_sb = cgroup_kill_sb, }; @@ -1883,6 +1943,8 @@ static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft, const char *buffer) { BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX); + if (strlen(buffer) >= PATH_MAX) + return -EINVAL; if (!cgroup_lock_live_group(cgrp)) return -ENODEV; strcpy(cgrp->root->release_agent_path, buffer); @@ -2140,12 +2202,20 @@ static const struct file_operations cgroup_file_operations = { }; static const struct inode_operations cgroup_dir_inode_operations = { - .lookup = simple_lookup, + .lookup = cgroup_lookup, .mkdir = cgroup_mkdir, .rmdir = cgroup_rmdir, .rename = cgroup_rename, }; +static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +{ + if (dentry->d_name.len > NAME_MAX) + return ERR_PTR(-ENAMETOOLONG); + d_add(dentry, NULL); + return NULL; +} + /* * Check if a file is a control file */ @@ -2159,10 +2229,6 @@ static inline struct cftype *__file_cft(struct file *file) static int cgroup_create_file(struct dentry *dentry, mode_t mode, struct super_block *sb) { - static const struct dentry_operations cgroup_dops = { - .d_iput = cgroup_diput, - }; - struct inode *inode; if (!dentry) @@ -2188,7 +2254,6 @@ static int cgroup_create_file(struct dentry *dentry, mode_t mode, inode->i_size = 0; inode->i_fop = &cgroup_file_operations; } - dentry->d_op = &cgroup_dops; d_instantiate(dentry, inode); dget(dentry); /* Extra count - pin the dentry in core */ return 0; @@ -3176,6 +3241,23 @@ fail: return ret; } +static u64 cgroup_clone_children_read(struct cgroup *cgrp, + struct cftype *cft) +{ + return clone_children(cgrp); +} + +static int cgroup_clone_children_write(struct cgroup *cgrp, + struct cftype *cft, + u64 val) +{ + if (val) + set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags); + else + clear_bit(CGRP_CLONE_CHILDREN, &cgrp->flags); + return 0; +} + /* * for the common functions, 'private' gives the type of file */ @@ -3206,6 +3288,11 @@ static struct cftype files[] = { .write_string = cgroup_write_event_control, .mode = S_IWUGO, }, + { + .name = "cgroup.clone_children", + .read_u64 = cgroup_clone_children_read, + .write_u64 = cgroup_clone_children_write, + }, }; static struct cftype cft_release_agent = { @@ -3335,6 +3422,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, if (notify_on_release(parent)) set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); + if (clone_children(parent)) + set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags); + for_each_subsys(root, ss) { struct cgroup_subsys_state *css = ss->create(ss, cgrp); @@ -3349,6 +3439,8 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, goto err_destroy; } /* At error, ->destroy() callback has to free assigned ID. */ + if (clone_children(parent) && ss->post_clone) + ss->post_clone(ss, cgrp); } cgroup_lock_hierarchy(root); @@ -3571,9 +3663,7 @@ again: list_del(&cgrp->sibling); cgroup_unlock_hierarchy(cgrp->root); - spin_lock(&cgrp->dentry->d_lock); d = dget(cgrp->dentry); - spin_unlock(&d->d_lock); cgroup_d_remove_dir(d); dput(d); |