aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--block/blk-cgroup.c49
-rw-r--r--block/blk-cgroup.h38
-rw-r--r--block/blk-throttle.c43
-rw-r--r--block/cfq-iosched.c90
-rw-r--r--fs/bio.c2
-rw-r--r--include/linux/cgroup.h303
-rw-r--r--include/linux/memcontrol.h2
-rw-r--r--include/linux/vmpressure.h6
-rw-r--r--include/net/cls_cgroup.h4
-rw-r--r--include/net/netprio_cgroup.h8
-rw-r--r--kernel/cgroup.c1643
-rw-r--r--kernel/cgroup_freezer.c155
-rw-r--r--kernel/cpuset.c317
-rw-r--r--kernel/events/core.c27
-rw-r--r--kernel/sched/core.c113
-rw-r--r--kernel/sched/cpuacct.c51
-rw-r--r--kernel/sched/sched.h6
-rw-r--r--mm/hugetlb_cgroup.c69
-rw-r--r--mm/memcontrol.c223
-rw-r--r--mm/vmpressure.c25
-rw-r--r--net/core/netprio_cgroup.c72
-rw-r--r--net/ipv4/tcp_memcontrol.c12
-rw-r--r--net/sched/cls_cgroup.c39
-rw-r--r--security/device_cgroup.c65
24 files changed, 1751 insertions, 1611 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 290792a13e3..e90c7c164c8 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -437,10 +437,10 @@ struct request_list *__blk_queue_next_rl(struct request_list *rl,
return &blkg->rl;
}
-static int blkcg_reset_stats(struct cgroup *cgroup, struct cftype *cftype,
- u64 val)
+static int blkcg_reset_stats(struct cgroup_subsys_state *css,
+ struct cftype *cftype, u64 val)
{
- struct blkcg *blkcg = cgroup_to_blkcg(cgroup);
+ struct blkcg *blkcg = css_to_blkcg(css);
struct blkcg_gq *blkg;
int i;
@@ -614,15 +614,13 @@ u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off)
{
struct blkcg_policy *pol = blkcg_policy[pd->plid];
struct blkcg_gq *pos_blkg;
- struct cgroup *pos_cgrp;
- u64 sum;
+ struct cgroup_subsys_state *pos_css;
+ u64 sum = 0;
lockdep_assert_held(pd->blkg->q->queue_lock);
- sum = blkg_stat_read((void *)pd + off);
-
rcu_read_lock();
- blkg_for_each_descendant_pre(pos_blkg, pos_cgrp, pd_to_blkg(pd)) {
+ blkg_for_each_descendant_pre(pos_blkg, pos_css, pd_to_blkg(pd)) {
struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol);
struct blkg_stat *stat = (void *)pos_pd + off;
@@ -649,16 +647,14 @@ struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd,
{
struct blkcg_policy *pol = blkcg_policy[pd->plid];
struct blkcg_gq *pos_blkg;
- struct cgroup *pos_cgrp;
- struct blkg_rwstat sum;
+ struct cgroup_subsys_state *pos_css;
+ struct blkg_rwstat sum = { };
int i;
lockdep_assert_held(pd->blkg->q->queue_lock);
- sum = blkg_rwstat_read((void *)pd + off);
-
rcu_read_lock();
- blkg_for_each_descendant_pre(pos_blkg, pos_cgrp, pd_to_blkg(pd)) {
+ blkg_for_each_descendant_pre(pos_blkg, pos_css, pd_to_blkg(pd)) {
struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol);
struct blkg_rwstat *rwstat = (void *)pos_pd + off;
struct blkg_rwstat tmp;
@@ -765,18 +761,18 @@ struct cftype blkcg_files[] = {
/**
* blkcg_css_offline - cgroup css_offline callback
- * @cgroup: cgroup of interest
+ * @css: css of interest
*
- * This function is called when @cgroup is about to go away and responsible
- * for shooting down all blkgs associated with @cgroup. blkgs should be
+ * This function is called when @css is about to go away and responsible
+ * for shooting down all blkgs associated with @css. blkgs should be
* removed while holding both q and blkcg locks. As blkcg lock is nested
* inside q lock, this function performs reverse double lock dancing.
*
* This is the blkcg counterpart of ioc_release_fn().
*/
-static void blkcg_css_offline(struct cgroup *cgroup)
+static void blkcg_css_offline(struct cgroup_subsys_state *css)
{
- struct blkcg *blkcg = cgroup_to_blkcg(cgroup);
+ struct blkcg *blkcg = css_to_blkcg(css);
spin_lock_irq(&blkcg->lock);
@@ -798,21 +794,21 @@ static void blkcg_css_offline(struct cgroup *cgroup)
spin_unlock_irq(&blkcg->lock);
}
-static void blkcg_css_free(struct cgroup *cgroup)
+static void blkcg_css_free(struct cgroup_subsys_state *css)
{
- struct blkcg *blkcg = cgroup_to_blkcg(cgroup);
+ struct blkcg *blkcg = css_to_blkcg(css);
if (blkcg != &blkcg_root)
kfree(blkcg);
}
-static struct cgroup_subsys_state *blkcg_css_alloc(struct cgroup *cgroup)
+static struct cgroup_subsys_state *
+blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
{
static atomic64_t id_seq = ATOMIC64_INIT(0);
struct blkcg *blkcg;
- struct cgroup *parent = cgroup->parent;
- if (!parent) {
+ if (!parent_css) {
blkcg = &blkcg_root;
goto done;
}
@@ -883,14 +879,15 @@ void blkcg_exit_queue(struct request_queue *q)
* of the main cic data structures. For now we allow a task to change
* its cgroup only if it's the only owner of its ioc.
*/
-static int blkcg_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
+static int blkcg_can_attach(struct cgroup_subsys_state *css,
+ struct cgroup_taskset *tset)
{
struct task_struct *task;
struct io_context *ioc;
int ret = 0;
/* task_lock() is needed to avoid races with exit_io_context() */
- cgroup_taskset_for_each(task, cgrp, tset) {
+ cgroup_taskset_for_each(task, css, tset) {
task_lock(task);
ioc = task->io_context;
if (ioc && atomic_read(&ioc->nr_tasks) > 1)
@@ -1127,7 +1124,7 @@ void blkcg_policy_unregister(struct blkcg_policy *pol)
/* kill the intf files first */
if (pol->cftypes)
- cgroup_rm_cftypes(&blkio_subsys, pol->cftypes);
+ cgroup_rm_cftypes(pol->cftypes);
/* unregister and update blkgs */
blkcg_policy[pol->plid] = NULL;
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 8056c03a338..ae6969a7ffd 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -179,22 +179,20 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
void blkg_conf_finish(struct blkg_conf_ctx *ctx);
-static inline struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup)
+static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
{
- return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
- struct blkcg, css);
+ return css ? container_of(css, struct blkcg, css) : NULL;
}
static inline struct blkcg *task_blkcg(struct task_struct *tsk)
{
- return container_of(task_subsys_state(tsk, blkio_subsys_id),
- struct blkcg, css);
+ return css_to_blkcg(task_css(tsk, blkio_subsys_id));
}
static inline struct blkcg *bio_blkcg(struct bio *bio)
{
if (bio && bio->bi_css)
- return container_of(bio->bi_css, struct blkcg, css);
+ return css_to_blkcg(bio->bi_css);
return task_blkcg(current);
}
@@ -206,9 +204,7 @@ static inline struct blkcg *bio_blkcg(struct bio *bio)
*/
static inline struct blkcg *blkcg_parent(struct blkcg *blkcg)
{
- struct cgroup *pcg = blkcg->css.cgroup->parent;
-
- return pcg ? cgroup_to_blkcg(pcg) : NULL;
+ return css_to_blkcg(css_parent(&blkcg->css));
}
/**
@@ -288,32 +284,33 @@ struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, struct request_queue *q,
/**
* blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants
* @d_blkg: loop cursor pointing to the current descendant
- * @pos_cgrp: used for iteration
+ * @pos_css: used for iteration
* @p_blkg: target blkg to walk descendants of
*
* Walk @c_blkg through the descendants of @p_blkg. Must be used with RCU
* read locked. If called under either blkcg or queue lock, the iteration
* is guaranteed to include all and only online blkgs. The caller may
- * update @pos_cgrp by calling cgroup_rightmost_descendant() to skip
- * subtree.
+ * update @pos_css by calling css_rightmost_descendant() to skip subtree.
+ * @p_blkg is included in the iteration and the first node to be visited.
*/
-#define blkg_for_each_descendant_pre(d_blkg, pos_cgrp, p_blkg) \
- cgroup_for_each_descendant_pre((pos_cgrp), (p_blkg)->blkcg->css.cgroup) \
- if (((d_blkg) = __blkg_lookup(cgroup_to_blkcg(pos_cgrp), \
+#define blkg_for_each_descendant_pre(d_blkg, pos_css, p_blkg) \
+ css_for_each_descendant_pre((pos_css), &(p_blkg)->blkcg->css) \
+ if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \
(p_blkg)->q, false)))
/**
* blkg_for_each_descendant_post - post-order walk of a blkg's descendants
* @d_blkg: loop cursor pointing to the current descendant
- * @pos_cgrp: used for iteration
+ * @pos_css: used for iteration
* @p_blkg: target blkg to walk descendants of
*
* Similar to blkg_for_each_descendant_pre() but performs post-order
- * traversal instead. Synchronization rules are the same.
+ * traversal instead. Synchronization rules are the same. @p_blkg is
+ * included in the iteration and the last node to be visited.
*/
-#define blkg_for_each_descendant_post(d_blkg, pos_cgrp, p_blkg) \
- cgroup_for_each_descendant_post((pos_cgrp), (p_blkg)->blkcg->css.cgroup) \
- if (((d_blkg) = __blkg_lookup(cgroup_to_blkcg(pos_cgrp), \
+#define blkg_for_each_descendant_post(d_blkg, pos_css, p_blkg) \
+ css_for_each_descendant_post((pos_css), &(p_blkg)->blkcg->css) \
+ if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \
(p_blkg)->q, false)))
/**
@@ -576,7 +573,6 @@ static inline int blkcg_activate_policy(struct request_queue *q,
static inline void blkcg_deactivate_policy(struct request_queue *q,
const struct blkcg_policy *pol) { }
-static inline struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup) { return NULL; }
static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; }
static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 08a32dfd384..8331aba9426 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -1293,10 +1293,10 @@ static u64 tg_prfill_cpu_rwstat(struct seq_file *sf,
return __blkg_prfill_rwstat(sf, pd, &rwstat);
}
-static int tg_print_cpu_rwstat(struct cgroup *cgrp, struct cftype *cft,
- struct seq_file *sf)
+static int tg_print_cpu_rwstat(struct cgroup_subsys_state *css,
+ struct cftype *cft, struct seq_file *sf)
{
- struct blkcg *blkcg = cgroup_to_blkcg(cgrp);
+ struct blkcg *blkcg = css_to_blkcg(css);
blkcg_print_blkgs(sf, blkcg, tg_prfill_cpu_rwstat, &blkcg_policy_throtl,
cft->private, true);
@@ -1325,31 +1325,31 @@ static u64 tg_prfill_conf_uint(struct seq_file *sf, struct blkg_policy_data *pd,
return __blkg_prfill_u64(sf, pd, v);
}
-static int tg_print_conf_u64(struct cgroup *cgrp, struct cftype *cft,
- struct seq_file *sf)
+static int tg_print_conf_u64(struct cgroup_subsys_state *css,
+ struct cftype *cft, struct seq_file *sf)
{
- blkcg_print_blkgs(sf, cgroup_to_blkcg(cgrp), tg_prfill_conf_u64,
+ blkcg_print_blkgs(sf, css_to_blkcg(css), tg_prfill_conf_u64,
&blkcg_policy_throtl, cft->private, false);
return 0;
}
-static int tg_print_conf_uint(struct cgroup *cgrp, struct cftype *cft,
- struct seq_file *sf)
+static int tg_print_conf_uint(struct cgroup_subsys_state *css,
+ struct cftype *cft, struct seq_file *sf)
{
- blkcg_print_blkgs(sf, cgroup_to_blkcg(cgrp), tg_prfill_conf_uint,
+ blkcg_print_blkgs(sf, css_to_blkcg(css), tg_prfill_conf_uint,
&blkcg_policy_throtl, cft->private, false);
return 0;
}
-static int tg_set_conf(struct cgroup *cgrp, struct cftype *cft, const char *buf,
- bool is_u64)
+static int tg_set_conf(struct cgroup_subsys_state *css, struct cftype *cft,
+ const char *buf, bool is_u64)
{
- struct blkcg *blkcg = cgroup_to_blkcg(cgrp);
+ struct blkcg *blkcg = css_to_blkcg(css);
struct blkg_conf_ctx ctx;
struct throtl_grp *tg;
struct throtl_service_queue *sq;
struct blkcg_gq *blkg;
- struct cgroup *pos_cgrp;
+ struct cgroup_subsys_state *pos_css;
int ret;
ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, buf, &ctx);
@@ -1379,8 +1379,7 @@ static int tg_set_conf(struct cgroup *cgrp, struct cftype *cft, const char *buf,
* restrictions in the whole hierarchy and allows them to bypass
* blk-throttle.
*/
- tg_update_has_rules(tg);
- blkg_for_each_descendant_pre(blkg, pos_cgrp, ctx.blkg)
+ blkg_for_each_descendant_pre(blkg, pos_css, ctx.blkg)
tg_update_has_rules(blkg_to_tg(blkg));
/*
@@ -1403,16 +1402,16 @@ static int tg_set_conf(struct cgroup *cgrp, struct cftype *cft, const char *buf,
return 0;
}
-static int tg_set_conf_u64(struct cgroup *cgrp, struct cftype *cft,
+static int tg_set_conf_u64(struct cgroup_subsys_state *css, struct cftype *cft,
const char *buf)
{
- return tg_set_conf(cgrp, cft, buf, true);
+ return tg_set_conf(css, cft, buf, true);
}
-static int tg_set_conf_uint(struct cgroup *cgrp, struct cftype *cft,
+static int tg_set_conf_uint(struct cgroup_subsys_state *css, struct cftype *cft,
const char *buf)
{
- return tg_set_conf(cgrp, cft, buf, false);
+ return tg_set_conf(css, cft, buf, false);
}
static struct cftype throtl_files[] = {
@@ -1623,7 +1622,7 @@ void blk_throtl_drain(struct request_queue *q)
{
struct throtl_data *td = q->td;
struct blkcg_gq *blkg;
- struct cgroup *pos_cgrp;
+ struct cgroup_subsys_state *pos_css;
struct bio *bio;
int rw;
@@ -1636,11 +1635,9 @@ void blk_throtl_drain(struct request_queue *q)
* better to walk service_queue tree directly but blkg walk is
* easier.
*/
- blkg_for_each_descendant_post(blkg, pos_cgrp, td->queue->root_blkg)
+ blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg)
tg_drain_bios(&blkg_to_tg(blkg)->service_queue);
- tg_drain_bios(&td_root_tg(td)->service_queue);
-
/* finally, transfer bios from top-level tg's into the td */
tg_drain_bios(&td->service_queue);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index d5bbdcfd0da..dabb9d02cf9 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1607,12 +1607,11 @@ static u64 cfqg_prfill_weight_device(struct seq_file *sf,
return __blkg_prfill_u64(sf, pd, cfqg->dev_weight);
}
-static int cfqg_print_weight_device(struct cgroup *cgrp, struct cftype *cft,
- struct seq_file *sf)
+static int cfqg_print_weight_device(struct cgroup_subsys_state *css,
+ struct cftype *cft, struct seq_file *sf)
{
- blkcg_print_blkgs(sf, cgroup_to_blkcg(cgrp),
- cfqg_prfill_weight_device, &blkcg_policy_cfq, 0,
- false);
+ blkcg_print_blkgs(sf, css_to_blkcg(css), cfqg_prfill_weight_device,
+ &blkcg_policy_cfq, 0, false);
return 0;
}
@@ -1626,35 +1625,34 @@ static u64 cfqg_prfill_leaf_weight_device(struct seq_file *sf,
return __blkg_prfill_u64(sf, pd, cfqg->dev_leaf_weight);
}
-static int cfqg_print_leaf_weight_device(struct cgroup *cgrp,
+static int cfqg_print_leaf_weight_device(struct cgroup_subsys_state *css,
struct cftype *cft,
struct seq_file *sf)
{
- blkcg_print_blkgs(sf, cgroup_to_blkcg(cgrp),
- cfqg_prfill_leaf_weight_device, &blkcg_policy_cfq, 0,
- false);
+ blkcg_print_blkgs(sf, css_to_blkcg(css), cfqg_prfill_leaf_weight_device,
+ &blkcg_policy_cfq, 0, false);
return 0;
}
-static int cfq_print_weight(struct cgroup *cgrp, struct cftype *cft,
+static int cfq_print_weight(struct cgroup_subsys_state *css, struct cftype *cft,
struct seq_file *sf)
{
- seq_printf(sf, "%u\n", cgroup_to_blkcg(cgrp)->cfq_weight);
+ seq_printf(sf, "%u\n", css_to_blkcg(css)->cfq_weight);
return 0;
}
-static int cfq_print_leaf_weight(struct cgroup *cgrp, struct cftype *cft,
- struct seq_file *sf)
+static int cfq_print_leaf_weight(struct cgroup_subsys_state *css,
+ struct cftype *cft, struct seq_file *sf)
{
- seq_printf(sf, "%u\n",
- cgroup_to_blkcg(cgrp)->cfq_leaf_weight);
+ seq_printf(sf, "%u\n", css_to_blkcg(css)->cfq_leaf_weight);
return 0;
}
-static int __cfqg_set_weight_device(struct cgroup *cgrp, struct cftype *cft,
- const char *buf, bool is_leaf_weight)
+static int __cfqg_set_weight_device(struct cgroup_subsys_state *css,
+ struct cftype *cft, const char *buf,
+ bool is_leaf_weight)
{
- struct blkcg *blkcg = cgroup_to_blkcg(cgrp);
+ struct blkcg *blkcg = css_to_blkcg(css);
struct blkg_conf_ctx ctx;
struct cfq_group *cfqg;
int ret;
@@ -1680,22 +1678,22 @@ static int __cfqg_set_weight_device(struct cgroup *cgrp, struct cftype *cft,
return ret;
}
-static int cfqg_set_weight_device(struct cgroup *cgrp, struct cftype *cft,
- const char *buf)
+static int cfqg_set_weight_device(struct cgroup_subsys_state *css,
+ struct cftype *cft, const char *buf)
{
- return __cfqg_set_weight_device(cgrp, cft, buf, false);
+ return __cfqg_set_weight_device(css, cft, buf, false);
}
-static int cfqg_set_leaf_weight_device(struct cgroup *cgrp, struct cftype *cft,
- const char *buf)
+static int cfqg_set_leaf_weight_device(struct cgroup_subsys_state *css,
+ struct cftype *cft, const char *buf)
{
- return __cfqg_set_weight_device(cgrp, cft, buf, true);
+ return __cfqg_set_weight_device(css, cft, buf, true);
}
-static int __cfq_set_weight(struct cgroup *cgrp, struct cftype *cft, u64 val,
- bool is_leaf_weight)
+static int __cfq_set_weight(struct cgroup_subsys_state *css, struct cftype *cft,
+ u64 val, bool is_leaf_weight)
{
- struct blkcg *blkcg = cgroup_to_blkcg(cgrp);
+ struct blkcg *blkcg = css_to_blkcg(css);
struct blkcg_gq *blkg;
if (val < CFQ_WEIGHT_MIN || val > CFQ_WEIGHT_MAX)
@@ -1727,30 +1725,32 @@ static int __cfq_set_weight(struct cgroup *cgrp, struct cftype *cft, u64 val,
return 0;
}
-static int cfq_set_weight(struct cgroup *cgrp, struct cftype *cft, u64 val)
+static int cfq_set_weight(struct cgroup_subsys_state *css, struct cftype *cft,
+ u64 val)
{
- return __cfq_set_weight(cgrp, cft, val, false);
+ return __cfq_set_weight(css, cft, val, false);
}
-static int cfq_set_leaf_weight(struct cgroup *cgrp, struct cftype *cft, u64 val)
+static int cfq_set_leaf_weight(struct cgroup_subsys_state *css,
+ struct cftype *cft, u64 val)
{
- return __cfq_set_weight(cgrp, cft, val, true);
+ return __cfq_set_weight(css, cft, val, true);
}
-static int cfqg_print_stat(struct cgroup *cgrp, struct cftype *cft,
+static int cfqg_print_stat(struct cgroup_subsys_state *css, struct cftype *cft,
struct seq_file *sf)
{
- struct blkcg *blkcg = cgroup_to_blkcg(cgrp);
+ struct blkcg *blkcg = css_to_blkcg(css);
blkcg_print_blkgs(sf, blkcg, blkg_prfill_stat, &blkcg_policy_cfq,
cft->private, false);
return 0;
}
-static int cfqg_print_rwstat(struct cgroup *cgrp, struct cftype *cft,
- struct seq_file *sf)
+static int cfqg_print_rwstat(struct cgroup_subsys_state *css,
+ struct cftype *cft, struct seq_file *sf)
{
- struct blkcg *blkcg = cgroup_to_blkcg(cgrp);
+ struct blkcg *blkcg = css_to_blkcg(css);
blkcg_print_blkgs(sf, blkcg, blkg_prfill_rwstat, &blkcg_policy_cfq,
cft->private, true);
@@ -1773,20 +1773,20 @@ static u64 cfqg_prfill_rwstat_recursive(struct seq_file *sf,
return __blkg_prfill_rwstat(sf, pd, &sum);
}
-static int cfqg_print_stat_recursive(struct cgroup *cgrp, struct cftype *cft,
- struct seq_file *sf)
+static int cfqg_print_stat_recursive(struct cgroup_subsys_state *css,
+ struct cftype *cft, struct seq_file *sf)
{
- struct blkcg *blkcg = cgroup_to_blkcg(cgrp);
+ struct blkcg *blkcg = css_to_blkcg(css);
blkcg_print_blkgs(sf, blkcg, cfqg_prfill_stat_recursive,
&blkcg_policy_cfq, cft->private, false);
return 0;
}
-static int cfqg_print_rwstat_recursive(struct cgroup *cgrp, struct cftype *cft,
- struct seq_file *sf)
+static int cfqg_print_rwstat_recursive(struct cgroup_subsys_state *css,
+ struct cftype *cft, struct seq_file *sf)
{
- struct blkcg *blkcg = cgroup_to_blkcg(cgrp);
+ struct blkcg *blkcg = css_to_blkcg(css);
blkcg_print_blkgs(sf, blkcg, cfqg_prfill_rwstat_recursive,
&blkcg_policy_cfq, cft->private, true);
@@ -1810,10 +1810,10 @@ static u64 cfqg_prfill_avg_queue_size(struct seq_file *sf,
}
/* print avg_queue_size */
-static int cfqg_print_avg_queue_size(struct cgroup *cgrp, struct cftype *cft,
- struct seq_file *sf)
+static int cfqg_print_avg_queue_size(struct cgroup_subsys_state *css,
+ struct cftype *cft, struct seq_file *sf)
{
- struct blkcg *blkcg = cgroup_to_blkcg(cgrp);
+ struct blkcg *blkcg = css_to_blkcg(css);
blkcg_print_blkgs(sf, blkcg, cfqg_prfill_avg_queue_size,
&blkcg_policy_cfq, 0, false);
diff --git a/fs/bio.c b/fs/bio.c
index c5eae725149..b3b20ed9510 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -1956,7 +1956,7 @@ int bio_associate_current(struct bio *bio)
/* associate blkcg if exists */
rcu_read_lock();
- css = task_subsys_state(current, blkio_subsys_id);
+ css = task_css(current, blkio_subsys_id);
if (css && css_tryget(css))
bio->bi_css = css;
rcu_read_unlock();
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index e9ac882868c..3561d305b1e 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -66,22 +66,25 @@ enum cgroup_subsys_id {
/* Per-subsystem/per-cgroup state maintained by the system. */
struct cgroup_subsys_state {
- /*
- * The cgroup that this subsystem is attached to. Useful
- * for subsystems that want to know about the cgroup
- * hierarchy structure
- */
+ /* the cgroup that this css is attached to */
struct cgroup *cgroup;
+ /* the cgroup subsystem that this css is attached to */
+ struct cgroup_subsys *ss;
+
/* reference count - access via css_[try]get() and css_put() */
struct percpu_ref refcnt;
+ /* the parent css */
+ struct cgroup_subsys_state *parent;
+
unsigned long flags;
/* ID for this css, if possible */
struct css_id __rcu *id;
- /* Used to put @cgroup->dentry on the last css_put() */
- struct work_struct dput_work;
+ /* percpu_ref killing and RCU release */
+ struct rcu_head rcu_head;
+ struct work_struct destroy_work;
};
/* bits in struct cgroup_subsys_state flags field */
@@ -161,7 +164,16 @@ struct cgroup_name {
struct cgroup {
unsigned long flags; /* "unsigned long" so bitops work */
- int id; /* ida allocated in-hierarchy ID */
+ /*
+ * idr allocated in-hierarchy ID.
+ *
+ * The ID of the root cgroup is always 0, and a new cgroup
+ * will be assigned with a smallest available ID.
+ */
+ int id;
+
+ /* the number of attached css's */
+ int nr_css;
/*
* We link our 'sibling' struct into our parent's 'children'.
@@ -196,7 +208,7 @@ struct cgroup {
struct cgroup_name __rcu *name;
/* Private pointers for each registered subsystem */
- struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
+ struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT];
struct cgroupfs_root *root;
@@ -220,10 +232,12 @@ struct cgroup {
struct list_head pidlists;
struct mutex pidlist_mutex;
+ /* dummy css with NULL ->ss, points back to this cgroup */
+ struct cgroup_subsys_state dummy_css;
+
/* For css percpu_ref killing and RCU-protected deletion */
struct rcu_head rcu_head;
struct work_struct destroy_work;
- atomic_t css_kill_cnt;
/* List of events which userspace want to receive */
struct list_head event_list;
@@ -322,7 +336,7 @@ struct cgroupfs_root {
unsigned long flags;
/* IDs for cgroups in this hierarchy */
- struct ida cgroup_ida;
+ struct idr cgroup_idr;
/* The path to use for release notifications. */
char release_agent_path[PATH_MAX];
@@ -394,9 +408,10 @@ struct cgroup_map_cb {
/* cftype->flags */
enum {
- CFTYPE_ONLY_ON_ROOT = (1 << 0), /* only create on root cg */
- CFTYPE_NOT_ON_ROOT = (1 << 1), /* don't create on root cg */
+ CFTYPE_ONLY_ON_ROOT = (1 << 0), /* only create on root cgrp */
+ CFTYPE_NOT_ON_ROOT = (1 << 1), /* don't create on root cgrp */
CFTYPE_INSANE = (1 << 2), /* don't create if sane_behavior */
+ CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */
};
#define MAX_CFTYPE_NAME 64
@@ -424,35 +439,41 @@ struct cftype {
/* CFTYPE_* flags */
unsigned int flags;
+ /*
+ * The subsys this file belongs to. Initialized automatically
+ * during registration. NULL for cgroup core files.
+ */
+ struct cgroup_subsys *ss;
+
int (*open)(struct inode *inode, struct file *file);
- ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft,
+ ssize_t (*read)(struct cgroup_subsys_state *css, struct cftype *cft,
struct file *file,
char __user *buf, size_t nbytes, loff_t *ppos);
/*
* read_u64() is a shortcut for the common case of returning a
* single integer. Use it in place of read()
*/
- u64 (*read_u64)(struct cgroup *cgrp, struct cftype *cft);
+ u64 (*read_u64)(struct cgroup_subsys_state *css, struct cftype *cft);
/*
* read_s64() is a signed version of read_u64()
*/
- s64 (*read_s64)(struct cgroup *cgrp, struct cftype *cft);
+ s64 (*read_s64)(struct cgroup_subsys_state *css, struct cftype *cft);
/*
* read_map() is used for defining a map of key/value
* pairs. It should call cb->fill(cb, key, value) for each
* entry. The key/value pairs (and their ordering) should not
* change between reboots.
*/
- int (*read_map)(struct cgroup *cgrp, struct cftype *cft,
+ int (*read_map)(struct cgroup_subsys_state *css, struct cftype *cft,
struct cgroup_map_cb *cb);
/*
* read_seq_string() is used for outputting a simple sequence
* using seqfile.
*/
- int (*read_seq_string)(struct cgroup *cgrp, struct cftype *cft,
- struct seq_file *m);
+ int (*read_seq_string)(struct cgroup_subsys_state *css,
+ struct cftype *cft, struct seq_file *m);
- ssize_t (*write)(struct cgroup *cgrp, struct cftype *cft,
+ ssize_t (*write)(struct cgroup_subsys_state *css, struct cftype *cft,
struct file *file,
const char __user *buf, size_t nbytes, loff_t *ppos);
@@ -461,18 +482,20 @@ struct cftype {
* a single integer (as parsed by simple_strtoull) from
* userspace. Use in place of write(); return 0 or error.
*/
- int (*write_u64)(struct cgroup *cgrp, struct cftype *cft, u64 val);
+ int (*write_u64)(struct cgroup_subsys_state *css, struct cftype *cft,
+ u64 val);
/*
* write_s64() is a signed version of write_u64()
*/
- int (*write_s64)(struct cgroup *cgrp, struct cftype *cft, s64 val);
+ int (*write_s64)(struct cgroup_subsys_state *css, struct cftype *cft,
+ s64 val);
/*
* write_string() is passed a nul-terminated kernelspace
* buffer of maximum length determined by max_write_len.
* Returns 0 or -ve error code.
*/
- int (*write_string)(struct cgroup *cgrp, struct cftype *cft,
+ int (*write_string)(struct cgroup_subsys_state *css, struct cftype *cft,
const char *buffer);
/*
* trigger() callback can be used to get some kick from the
@@ -480,7 +503,7 @@ struct cftype {
* at all. The private field can be used to determine the
* kick type for multiplexing.
*/
- int (*trigger)(struct cgroup *cgrp, unsigned int event);
+ int (*trigger)(struct cgroup_subsys_state *css, unsigned int event);
int (*release)(struct inode *inode, struct file *file);
@@ -490,16 +513,18 @@ struct cftype {
* you want to provide this functionality. Use eventfd_signal()
* on eventfd to send notification to userspace.
*/
- int (*register_event)(struct cgroup *cgrp, struct cftype *cft,
- struct eventfd_ctx *eventfd, const char *args);
+ int (*register_event)(struct cgroup_subsys_state *css,
+ struct cftype *cft, struct eventfd_ctx *eventfd,
+ const char *args);
/*
* unregister_event() callback will be called when userspace
* closes the eventfd or on cgroup removing.
* This callback must be implemented, if you want provide
* notification functionality.
*/
- void (*unregister_event)(struct cgroup *cgrp, struct cftype *cft,
- struct eventfd_ctx *eventfd);
+ void (*unregister_event)(struct cgroup_subsys_state *css,
+ struct cftype *cft,
+ struct eventfd_ctx *eventfd);
};
/*
@@ -512,15 +537,6 @@ struct cftype_set {
struct cftype *cfts;
};
-struct cgroup_scanner {
- struct cgroup *cg;
- int (*test_task)(struct task_struct *p, struct cgroup_scanner *scan);
- void (*process_task)(struct task_struct *p,
- struct cgroup_scanner *scan);
- struct ptr_heap *heap;
- void *data;
-};
-
/*
* See the comment above CGRP_ROOT_SANE_BEHAVIOR for details. This
* function can be called as long as @cgrp is accessible.
@@ -537,7 +553,7 @@ static inline const char *cgroup_name(const struct cgroup *cgrp)
}
int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
-int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
+int cgroup_rm_cftypes(struct cftype *cfts);
bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor);
@@ -553,20 +569,22 @@ int cgroup_task_count(const struct cgroup *cgrp);
struct cgroup_taskset;
struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset);
struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset);
-struct cgroup *cgroup_taskset_cur_cgroup(struct cgroup_taskset *tset);
+struct cgroup_subsys_state *cgroup_taskset_cur_css(struct cgroup_taskset *tset,
+ int subsys_id);
int cgroup_taskset_size(struct cgroup_taskset *tset);
/**
* cgroup_taskset_for_each - iterate cgroup_taskset
* @task: the loop cursor
- * @skip_cgrp: skip if task's cgroup matches this, %NULL to iterate through all
+ * @skip_css: skip if task's css matches this, %NULL to iterate through all
* @tset: taskset to iterate
*/
-#define cgroup_taskset_for_each(task, skip_cgrp, tset) \
+#define cgroup_taskset_for_each(task, skip_css, tset) \
for ((task) = cgroup_taskset_first((tset)); (task); \
(task) = cgroup_taskset_next((tset))) \
- if (!(skip_cgrp) || \
- cgroup_taskset_cur_cgroup((tset)) != (skip_cgrp))
+ if (!(skip_css) || \
+ cgroup_taskset_cur_css((tset), \
+ (skip_css)->ss->subsys_id) != (skip_css))
/*
* Control Group subsystem type.
@@ -574,18 +592,22 @@ int cgroup_taskset_size(struct cgroup_taskset *tset);
*/
struct cgroup_subsys {
- struct cgroup_subsys_state *(*css_alloc)(struct cgroup *cgrp);
- int (*css_online)(struct cgroup *cgrp);
- void (*css_offline)(struct cgroup *cgrp);
- void (*css_free)(struct cgroup *cgrp);
-
- int (*can_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset);
- void (*cancel_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset);
- void (*attach)(struct cgroup *cgrp, struct cgroup_taskset *tset);
+ struct cgroup_subsys_state *(*css_alloc)(struct cgroup_subsys_state *parent_css);
+ int (*css_online)(struct cgroup_subsys_state *css);
+ void (*css_offline)(struct cgroup_subsys_state *css);
+ void (*css_free)(struct cgroup_subsys_state *css);
+
+ int (*can_attach)(struct cgroup_subsys_state *css,
+ struct cgroup_taskset *tset);
+ void (*cancel_attach)(struct cgroup_subsys_state *css,
+ struct cgroup_taskset *tset);
+ void (*attach)(struct cgroup_subsys_state *css,
+ struct cgroup_taskset *tset);
void (*fork)(struct task_struct *task);
- void (*exit)(struct cgroup *cgrp, struct cgroup *old_cgrp,
+ void (*exit)(struct cgroup_subsys_state *css,
+ struct cgroup_subsys_state *old_css,
struct task_struct *task);
- void (*bind)(struct cgroup *root);
+ void (*bind)(struct cgroup_subsys_state *root_css);
int subsys_id;
int disabled;
@@ -641,10 +663,17 @@ struct cgroup_subsys {
#undef IS_SUBSYS_ENABLED
#undef SUBSYS
-static inline struct cgroup_subsys_state *cgroup_subsys_state(
- struct cgroup *cgrp, int subsys_id)
+/**
+ * css_parent - find the parent css
+ * @css: the target cgroup_subsys_state
+ *
+ * Return the parent css of @css. This function is guaranteed to return
+ * non-NULL parent as long as @css isn't the root.
+ */
+static inline
+struct cgroup_subsys_state *css_parent(struct cgroup_subsys_state *css)
{
- return cgrp->subsys[subsys_id];
+ return css->parent;
}
/**
@@ -672,7 +701,7 @@ extern struct mutex cgroup_mutex;
#endif
/**
- * task_subsys_state_check - obtain css for (task, subsys) w/ extra access conds
+ * task_css_check - obtain css for (task, subsys) w/ extra access conds
* @task: the target task
* @subsys_id: the target subsystem ID
* @__c: extra condition expression to be passed to rcu_dereference_check()
@@ -680,7 +709,7 @@ extern struct mutex cgroup_mutex;
* Return the cgroup_subsys_state for the (@task, @subsys_id) pair. The
* synchronization rules are the same as task_css_set_check().
*/
-#define task_subsys_state_check(task, subsys_id, __c) \
+#define task_css_check(task, subsys_id, __c) \
task_css_set_check((task), (__c))->subsys[(subsys_id)]
/**
@@ -695,87 +724,92 @@ static inline struct css_set *task_css_set(struct task_struct *task)
}
/**
- * task_subsys_state - obtain css for (task, subsys)
+ * task_css - obtain css for (task, subsys)
* @task: the target task
* @subsys_id: the target subsystem ID
*
- * See task_subsys_state_check().
+ * See task_css_check().
*/
-static inline struct cgroup_subsys_state *
-task_subsys_state(struct task_struct *task, int subsys_id)
+static inline struct cgroup_subsys_state *task_css(struct task_struct *task,
+ int subsys_id)
{
- return task_subsys_state_check(task, subsys_id, false);
+ return task_css_check(task, subsys_id, false);
}
-static inline struct cgroup* task_cgroup(struct task_struct *task,
- int subsys_id)
+static inline struct cgroup *task_cgroup(struct task_struct *task,
+ int subsys_id)
{
- return task_subsys_state(task, subsys_id)->cgroup;
+ return task_css(task, subsys_id)->cgroup;
}
-struct cgroup *cgroup_next_sibling(struct cgroup *pos);
+struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos,
+ struct cgroup_subsys_state *parent);
+
+struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss);
/**
- * cgroup_for_each_child - iterate through children of a cgroup
- * @pos: the cgroup * to use as the loop cursor
- * @cgrp: cgroup whose children to walk
+ * css_for_each_child - iterate through children of a css
+ * @pos: the css * to use as the loop cursor
+ * @parent: css whose children to walk
*
- * Walk @cgrp's children. Must be called under rcu_read_lock(). A child
- * cgroup which hasn't finished ->css_online() or already has finished
+ * Walk @parent's children. Must be called under rcu_read_lock(). A child
+ * css which hasn't finished ->css_online() or already has finished
* ->css_offline() may show up during traversal and it's each subsystem's
* responsibility to verify that each @pos is alive.
*
* If a subsystem synchronizes against the parent in its ->css_online() and
- * before starting iterating, a cgroup which finished ->css_online() is
+ * before starting iterating, a css which finished ->css_online() is
* guaranteed to be visible in the future iterations.
*
* It is allowed to temporarily drop RCU read lock during iteration. The
* caller is responsible for ensuring that @pos remains accessible until
* the start of the next iteration by, for example, bumping the css refcnt.
*/
-#define cgroup_for_each_child(pos, cgrp) \
- for ((pos) = list_first_or_null_rcu(&(cgrp)->children, \
- struct cgroup, sibling); \
- (pos); (pos) = cgroup_next_sibling((pos)))
+#define css_for_each_child(pos, parent) \
+ for ((pos) = css_next_child(NULL, (parent)); (pos); \
+ (pos) = css_next_child((pos), (parent)))
+
+struct cgroup_subsys_state *
+css_next_descendant_pre(struct cgroup_subsys_state *pos,
+ struct cgroup_subsys_state *css);
-struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos,
- struct cgroup *cgroup);
-struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos);
+struct cgroup_subsys_state *
+css_rightmost_descendant(struct cgroup_subsys_state *pos);
/**
- * cgroup_for_each_descendant_pre - pre-order walk of a cgroup's descendants
- * @pos: the cgroup * to use as the loop cursor
- * @cgroup: cgroup whose descendants to walk
+ * css_for_each_descendant_pre - pre-order walk of a css's descendants
+ * @pos: the css * to use as the loop cursor
+ * @root: css whose descendants to walk
*
- * Walk @cgroup's descendants. Must be called under rcu_read_lock(). A
- * descendant cgroup which hasn't finished ->css_online() or already has
+ * Walk @root's descendants. @root is included in the iteration and the
+ * first node to be visited. Must be called under rcu_read_lock(). A
+ * descendant css which hasn't finished ->css_online() or already has
* finished ->css_offline() may show up during traversal and it's each
* subsystem's responsibility to verify that each @pos is alive.
*
* If a subsystem synchronizes against the parent in its ->css_online() and
* before starting iterating, and synchronizes against @pos on each
- * iteration, any descendant cgroup which finished ->css_online() is
+ * iteration, any descendant css which finished ->css_online() is
* guaranteed to be visible in the future iterations.
*
* In other words, the following guarantees that a descendant can't escape
* state updates of its ancestors.
*
- * my_online(@cgrp)
+ * my_online(@css)
* {
- * Lock @cgrp->parent and @cgrp;
- * Inherit state from @cgrp->parent;
+ * Lock @css's parent and @css;
+ * Inherit state from the parent;
* Unlock both.
* }
*
- * my_update_state(@cgrp)
+ * my_update_state(@css)
* {
- * Lock @cgrp;
- * Update @cgrp's state;
- * Unlock @cgrp;
- *
- * cgroup_for_each_descendant_pre(@pos, @cgrp) {
+ * css_for_each_descendant_pre(@pos, @css) {
* Lock @pos;
- * Verify @pos is alive and inherit state from @pos->parent;
+ * if (@pos == @css)
+ * Update @css's state;
+ * else
+ * Verify @pos is alive and inherit state from its parent;
* Unlock @pos;
* }
* }
@@ -786,8 +820,7 @@ struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos);
* visible by walking order and, as long as inheriting operations to the
* same @pos are atomic to each other, multiple updates racing each other
* still result in the correct state. It's guaranateed that at least one
- * inheritance happens for any cgroup after the latest update to its
- * parent.
+ * inheritance happens for any css after the latest update to its parent.
*
* If checking parent's state requires locking the parent, each inheriting
* iteration should lock and unlock both @pos->parent and @pos.
@@ -800,52 +833,45 @@ struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos);
* caller is responsible for ensuring that @pos remains accessible until
* the start of the next iteration by, for example, bumping the css refcnt.
*/
-#define cgroup_for_each_descendant_pre(pos, cgroup) \
- for (pos = cgroup_next_descendant_pre(NULL, (cgroup)); (pos); \
- pos = cgroup_next_descendant_pre((pos), (cgroup)))
+#define css_for_each_descendant_pre(pos, css) \
+ for ((pos) = css_next_descendant_pre(NULL, (css)); (pos); \
+ (pos) = css_next_descendant_pre((pos), (css)))
-struct cgroup *cgroup_next_descendant_post(struct cgroup *pos,
- struct cgroup *cgroup);
+struct cgroup_subsys_state *
+css_next_descendant_post(struct cgroup_subsys_state *pos,
+ struct cgroup_subsys_state *css);
/**
- * cgroup_for_each_descendant_post - post-order walk of a cgroup's descendants
- * @pos: the cgroup * to use as the loop cursor
- * @cgroup: cgroup whose descendants to walk
+ * css_for_each_descendant_post - post-order walk of a css's descendants
+ * @pos: the css * to use as the loop cursor
+ * @css: css whose descendants to walk
*
- * Similar to cgroup_for_each_descendant_pre() but performs post-order
- * traversal instead. Note that the walk visibility guarantee described in
- * pre-order walk doesn't apply the same to post-order walks.
+ * Similar to css_for_each_descendant_pre() but performs post-order
+ * traversal instead. @root is included in the iteration and the last
+ * node to be visited. Note that the walk visibility guarantee described
+ * in pre-order walk doesn't apply the same to post-order walks.
*/
-#define cgroup_for_each_descendant_post(pos, cgroup) \
- for (pos = cgroup_next_descendant_post(NULL, (cgroup)); (pos); \
- pos = cgroup_next_descendant_post((pos), (cgroup)))
-
-/* A cgroup_iter should be treated as an opaque object */
-struct cgroup_iter {
- struct list_head *cset_link;
- struct list_head *task;
+#define css_for_each_descendant_post(pos, css) \
+ for ((pos) = css_next_descendant_post(NULL, (css)); (pos); \
+ (pos) = css_next_descendant_post((pos), (css)))
+
+/* A css_task_iter should be treated as an opaque object */
+struct css_task_iter {
+ struct cgroup_subsys_state *origin_css;
+ struct list_head *cset_link;
+ struct list_head *task;
};
-/*
- * To iterate across the tasks in a cgroup:
- *
- * 1) call cgroup_iter_start to initialize an iterator
- *
- * 2) call cgroup_iter_next() to retrieve member tasks until it
- * returns NULL or until you want to end the iteration
- *
- * 3) call cgroup_iter_end() to destroy the iterator.
- *
- * Or, call cgroup_scan_tasks() to iterate through every task in a
- * cgroup - cgroup_scan_tasks() holds the css_set_lock when calling
- * the test_task() callback, but not while calling the process_task()
- * callback.
- */
-void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it);
-struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
- struct cgroup_iter *it);
-void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it);
-int cgroup_scan_tasks(struct cgroup_scanner *scan);
+void css_task_iter_start(struct cgroup_subsys_state *css,
+ struct css_task_iter *it);
+struct task_struct *css_task_iter_next(struct css_task_iter *it);
+void css_task_iter_end(struct css_task_iter *it);
+
+int css_scan_tasks(struct cgroup_subsys_state *css,
+ bool (*test)(struct task_struct *, void *),
+ void (*process)(struct task_struct *, void *),
+ void *data, struct ptr_heap *heap);
+
int cgroup_attach_task_all(struct task_struct *from, struct task_struct *);
int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from);
@@ -878,7 +904,8 @@ bool css_is_ancestor(struct cgroup_subsys_state *cg,
/* Get id and depth of css */
unsigned short css_id(struct cgroup_subsys_state *css);
-struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id);
+struct cgroup_subsys_state *css_from_dir(struct dentry *dentry,
+ struct cgroup_subsys *ss);
#else /* !CONFIG_CGROUPS */
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 7b4d9d79570..6c416092e32 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -85,7 +85,7 @@ extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
extern struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm);
extern struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg);
-extern struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont);
+extern struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css);
static inline
bool mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *memcg)
diff --git a/include/linux/vmpressure.h b/include/linux/vmpressure.h
index 7dc17e2456d..3f3788d4936 100644
--- a/include/linux/vmpressure.h
+++ b/include/linux/vmpressure.h
@@ -34,10 +34,12 @@ extern void vmpressure_cleanup(struct vmpressure *vmpr);
extern struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg);
extern struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr);
extern struct vmpressure *css_to_vmpressure(struct cgroup_subsys_state *css);
-extern int vmpressure_register_event(struct cgroup *cg, struct cftype *cft,
+extern int vmpressure_register_event(struct cgroup_subsys_state *css,
+ struct cftype *cft,
struct eventfd_ctx *eventfd,
const char *args);
-extern void vmpressure_unregister_event(struct cgroup *cg, struct cftype *cft,
+extern void vmpressure_unregister_event(struct cgroup_subsys_state *css,
+ struct cftype *cft,
struct eventfd_ctx *eventfd);
#else
static inline void vmpressure(gfp_t gfp, struct mem_cgroup *memcg,
diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h
index 0fee0617fb7..52adaa75dac 100644
--- a/include/net/cls_cgroup.h
+++ b/include/net/cls_cgroup.h
@@ -35,7 +35,7 @@ static inline u32 task_cls_classid(struct task_struct *p)
return 0;
rcu_read_lock();
- classid = container_of(task_subsys_state(p, net_cls_subsys_id),
+ classid = container_of(task_css(p, net_cls_subsys_id),
struct cgroup_cls_state, css)->classid;
rcu_read_unlock();
@@ -51,7 +51,7 @@ static inline u32 task_cls_classid(struct task_struct *p)
return 0;
rcu_read_lock();
- css = task_subsys_state(p, net_cls_subsys_id);
+ css = task_css(p, net_cls_subsys_id);
if (css)
classid = container_of(css,
struct cgroup_cls_state, css)->classid;
diff --git a/include/net/netprio_cgroup.h b/include/net/netprio_cgroup.h
index 50ab8c26ab5..a24f8bb3ca4 100644
--- a/include/net/netprio_cgroup.h
+++ b/include/net/netprio_cgroup.h
@@ -25,10 +25,6 @@ struct netprio_map {
u32 priomap[];
};
-struct cgroup_netprio_state {
- struct cgroup_subsys_state css;
-};
-
extern void sock_update_netprioidx(struct sock *sk);
#if IS_BUILTIN(CONFIG_NETPRIO_CGROUP)
@@ -39,7 +35,7 @@ static inline u32 task_netprioidx(struct task_struct *p)
u32 idx;
rcu_read_lock();
- css = task_subsys_state(p, net_prio_subsys_id);
+ css = task_css(p, net_prio_subsys_id);
idx = css->cgroup->id;
rcu_read_unlock();
return idx;
@@ -53,7 +49,7 @@ static inline u32 task_netprioidx(struct task_struct *p)
u32 idx = 0;
rcu_read_lock();
- css = task_subsys_state(p, net_prio_subsys_id);
+ css = task_css(p, net_prio_subsys_id);
if (css)
idx = css->cgroup->id;
rcu_read_unlock();
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index e91963302c0..e0aeb32415f 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -81,7 +81,7 @@
*/
#ifdef CONFIG_PROVE_RCU
DEFINE_MUTEX(cgroup_mutex);
-EXPORT_SYMBOL_GPL(cgroup_mutex); /* only for task_subsys_state_check() */
+EXPORT_SYMBOL_GPL(cgroup_mutex); /* only for lockdep */
#else
static DEFINE_MUTEX(cgroup_mutex);
#endif
@@ -117,6 +117,7 @@ struct cfent {
struct list_head node;
struct dentry *dentry;
struct cftype *type;
+ struct cgroup_subsys_state *css;
/* file xattrs */
struct simple_xattrs xattrs;
@@ -159,9 +160,9 @@ struct css_id {
*/
struct cgroup_event {
/*
- * Cgroup which the event belongs to.
+ * css which the event belongs to.
*/
- struct cgroup *cgrp;
+ struct cgroup_subsys_state *css;
/*
* Control file which the event associated.
*/
@@ -215,10 +216,33 @@ static u64 cgroup_serial_nr_next = 1;
*/
static int need_forkexit_callback __read_mostly;
-static void cgroup_offline_fn(struct work_struct *work);
+static struct cftype cgroup_base_files[];
+
+static void cgroup_destroy_css_killed(struct cgroup *cgrp);
static int cgroup_destroy_locked(struct cgroup *cgrp);
-static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys,
- struct cftype cfts[], bool is_add);
+static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[],
+ bool is_add);
+
+/**
+ * cgroup_css - obtain a cgroup's css for the specified subsystem
+ * @cgrp: the cgroup of interest
+ * @ss: the subsystem of interest (%NULL returns the dummy_css)
+ *
+ * Return @cgrp's css (cgroup_subsys_state) associated with @ss. This
+ * function must be called either under cgroup_mutex or rcu_read_lock() and
+ * the caller is responsible for pinning the returned css if it wants to
+ * keep accessing it outside the said locks. This function may return
+ * %NULL if @cgrp doesn't have @subsys_id enabled.
+ */
+static struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp,
+ struct cgroup_subsys *ss)
+{
+ if (ss)
+ return rcu_dereference_check(cgrp->subsys[ss->subsys_id],
+ lockdep_is_held(&cgroup_mutex));
+ else
+ return &cgrp->dummy_css;
+}
/* convenient tests for these bits */
static inline bool cgroup_is_dead(const struct cgroup *cgrp)
@@ -365,9 +389,11 @@ static struct cgrp_cset_link init_cgrp_cset_link;
static int cgroup_init_idr(struct cgroup_subsys *ss,
struct cgroup_subsys_state *css);
-/* css_set_lock protects the list of css_set objects, and the
- * chain of tasks off each css_set. Nests outside task->alloc_lock
- * due to cgroup_iter_start() */
+/*
+ * css_set_lock protects the list of css_set objects, and the chain of
+ * tasks off each css_set. Nests outside task->alloc_lock due to
+ * css_task_iter_start().
+ */
static DEFINE_RWLOCK(css_set_lock);
static int css_set_count;
@@ -392,10 +418,12 @@ static unsigned long css_set_hash(struct cgroup_subsys_state *css[])
return key;
}
-/* We don't maintain the lists running through each css_set to its
- * task until after the first call to cgroup_iter_start(). This
- * reduces the fork()/exit() overhead for people who have cgroups
- * compiled into their kernel but not actually in use */
+/*
+ * We don't maintain the lists running through each css_set to its task
+ * until after the first call to css_task_iter_start(). This reduces the
+ * fork()/exit() overhead for people who have cgroups compiled into their
+ * kernel but not actually in use.
+ */
static int use_task_css_set_links __read_mostly;
static void __put_css_set(struct css_set *cset, int taskexit)
@@ -464,7 +492,7 @@ static inline void put_css_set_taskexit(struct css_set *cset)
* @new_cgrp: cgroup that's being entered by the task
* @template: desired set of css pointers in css_set (pre-calculated)
*
- * Returns true if "cg" matches "old_cg" except for the hierarchy
+ * Returns true if "cset" matches "old_cset" except for the hierarchy
* which "new_cgrp" belongs to, for which it should match "new_cgrp".
*/
static bool compare_css_sets(struct css_set *cset,
@@ -555,7 +583,7 @@ static struct css_set *find_existing_css_set(struct css_set *old_cset,
/* Subsystem is in this hierarchy. So we want
* the subsystem state from the new
* cgroup */
- template[i] = cgrp->subsys[i];
+ template[i] = cgroup_css(cgrp, ss);
} else {
/* Subsystem is not in this hierarchy, so we
* don't want to change the subsystem state */
@@ -803,8 +831,7 @@ static struct cgroup *task_cgroup_from_root(struct task_struct *task,
static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry);
-static int cgroup_populate_dir(struct cgroup *cgrp, bool base_files,
- unsigned long subsys_mask);
+static int cgroup_populate_dir(struct cgroup *cgrp, unsigned long subsys_mask);
static const struct inode_operations cgroup_dir_inode_operations;
static const struct file_operations proc_cgroupstats_operations;
@@ -813,8 +840,7 @@ static struct backing_dev_info cgroup_backing_dev_info = {
.capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
};
-static int alloc_css_id(struct cgroup_subsys *ss,
- struct cgroup *parent, struct cgroup *child);
+static int alloc_css_id(struct cgroup_subsys_state *child_css);
static struct inode *cgroup_new_inode(umode_t mode, struct super_block *sb)
{
@@ -845,15 +871,8 @@ static struct cgroup_name *cgroup_alloc_name(struct dentry *dentry)
static void cgroup_free_fn(struct work_struct *work)
{
struct cgroup *cgrp = container_of(work, struct cgroup, destroy_work);
- struct cgroup_subsys *ss;
mutex_lock(&cgroup_mutex);
- /*
- * Release the subsystem state objects.
- */
- for_each_root_subsys(cgrp->root, ss)
- ss->css_free(cgrp);
-
cgrp->root->number_of_cgroups--;
mutex_unlock(&cgroup_mutex);
@@ -864,8 +883,6 @@ static void cgroup_free_fn(struct work_struct *work)
*/
dput(cgrp->parent->dentry);
- ida_simple_remove(&cgrp->root->cgroup_ida, cgrp->id);
-
/*
* Drop the active superblock reference that we took when we
* created the cgroup. This will free cgrp->root, if we are
@@ -956,27 +973,22 @@ static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
}
/**
- * cgroup_clear_directory - selective removal of base and subsystem files
- * @dir: directory containing the files
- * @base_files: true if the base files should be removed
+ * cgroup_clear_dir - remove subsys files in a cgroup directory
+ * @cgrp: target cgroup
* @subsys_mask: mask of the subsystem ids whose files should be removed
*/
-static void cgroup_clear_directory(struct dentry *dir, bool base_files,
- unsigned long subsys_mask)
+static void cgroup_clear_dir(struct cgroup *cgrp, unsigned long subsys_mask)
{
- struct cgroup *cgrp = __d_cgrp(dir);
struct cgroup_subsys *ss;
+ int i;
- for_each_root_subsys(cgrp->root, ss) {
+ for_each_subsys(ss, i) {
struct cftype_set *set;
- if (!test_bit(ss->subsys_id, &subsys_mask))
+
+ if (!test_bit(i, &subsys_mask))
continue;
list_for_each_entry(set, &ss->cftsets, node)
- cgroup_addrm_files(cgrp, NULL, set->cfts, false);
- }
- if (base_files) {
- while (!list_empty(&cgrp->files))
- cgroup_rm_file(cgrp, NULL);
+ cgroup_addrm_files(cgrp, set->cfts, false);
}
}
@@ -986,9 +998,6 @@ static void cgroup_clear_directory(struct dentry *dir, bool base_files,
static void cgroup_d_remove_dir(struct dentry *dentry)
{
struct dentry *parent;
- struct cgroupfs_root *root = dentry->d_sb->s_fs_info;
-
- cgroup_clear_directory(dentry, true, root->subsys_mask);
parent = dentry->d_parent;
spin_lock(&parent->d_lock);
@@ -1009,79 +1018,84 @@ static int rebind_subsystems(struct cgroupfs_root *root,
{
struct cgroup *cgrp = &root->top_cgroup;
struct cgroup_subsys *ss;
- int i;
+ unsigned long pinned = 0;
+ int i, ret;
BUG_ON(!mutex_is_locked(&cgroup_mutex));
BUG_ON(!mutex_is_locked(&cgroup_root_mutex));
/* Check that any added subsystems are currently free */
for_each_subsys(ss, i) {
- unsigned long bit = 1UL << i;
-
- if (!(bit & added_mask))
+ if (!(added_mask & (1 << i)))
continue;
+ /* is the subsystem mounted elsewhere? */
if (ss->root != &cgroup_dummy_root) {
- /* Subsystem isn't free */
- return -EBUSY;
+ ret = -EBUSY;
+ goto out_put;
+ }
+
+ /* pin the module */
+ if (!try_module_get(ss->module)) {
+ ret = -ENOENT;
+ goto out_put;
}
+ pinned |= 1 << i;
}
- /* Currently we don't handle adding/removing subsystems when
- * any child cgroups exist. This is theoretically supportable
- * but involves complex error handling, so it's being left until
- * later */
- if (root->number_of_cgroups > 1)
- return -EBUSY;
+ /* subsys could be missing if unloaded between parsing and here */
+ if (added_mask != pinned) {
+ ret = -ENOENT;
+ goto out_put;
+ }
+
+ ret = cgroup_populate_dir(cgrp, added_mask);
+ if (ret)
+ goto out_put;
+
+ /*
+ * Nothing can fail from this point on. Remove files for the
+ * removed subsystems and rebind each subsystem.
+ */
+ cgroup_clear_dir(cgrp, removed_mask);
- /* Process each subsystem */
for_each_subsys(ss, i) {
unsigned long bit = 1UL << i;
if (bit & added_mask) {
/* We're binding this subsystem to this hierarchy */
- BUG_ON(cgrp->subsys[i]);
- BUG_ON(!cgroup_dummy_top->subsys[i]);
- BUG_ON(cgroup_dummy_top->subsys[i]->cgroup != cgroup_dummy_top);
+ BUG_ON(cgroup_css(cgrp, ss));
+ BUG_ON(!cgroup_css(cgroup_dummy_top, ss));
+ BUG_ON(cgroup_css(cgroup_dummy_top, ss)->cgroup != cgroup_dummy_top);
+
+ rcu_assign_pointer(cgrp->subsys[i],
+ cgroup_css(cgroup_dummy_top, ss));
+ cgroup_css(cgrp, ss)->cgroup = cgrp;
- cgrp->subsys[i] = cgroup_dummy_top->subsys[i];
- cgrp->subsys[i]->cgroup = cgrp;
list_move(&ss->sibling, &root->subsys_list);
ss->root = root;
if (ss->bind)
- ss->bind(cgrp);
+ ss->bind(cgroup_css(cgrp, ss));
/* refcount was already taken, and we're keeping it */
root->subsys_mask |= bit;
} else if (bit & removed_mask) {
/* We're removing this subsystem */
- BUG_ON(cgrp->subsys[i] != cgroup_dummy_top->subsys[i]);
- BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
+ BUG_ON(cgroup_css(cgrp, ss) != cgroup_css(cgroup_dummy_top, ss));
+ BUG_ON(cgroup_css(cgrp, ss)->cgroup != cgrp);
if (ss->bind)
- ss->bind(cgroup_dummy_top);
- cgroup_dummy_top->subsys[i]->cgroup = cgroup_dummy_top;
- cgrp->subsys[i] = NULL;
+ ss->bind(cgroup_css(cgroup_dummy_top, ss));
+
+ cgroup_css(cgroup_dummy_top, ss)->cgroup = cgroup_dummy_top;
+ RCU_INIT_POINTER(cgrp->subsys[i], NULL);
+
cgroup_subsys[i]->root = &cgroup_dummy_root;
list_move(&ss->sibling, &cgroup_dummy_root.subsys_list);
/* subsystem is now free - drop reference on module */
module_put(ss->module);
root->subsys_mask &= ~bit;
- } else if (bit & root->subsys_mask) {
- /* Subsystem state should already exist */
- BUG_ON(!cgrp->subsys[i]);
- /*
- * a refcount was taken, but we already had one, so
- * drop the extra reference.
- */
- module_put(ss->module);
-#ifdef CONFIG_MODULE_UNLOAD
- BUG_ON(ss->module && !module_refcount(ss->module));
-#endif
- } else {
- /* Subsystem state shouldn't exist */
- BUG_ON(cgrp->subsys[i]);
}
}
@@ -1092,6 +1106,12 @@ static int rebind_subsystems(struct cgroupfs_root *root,
root->flags |= CGRP_ROOT_SUBSYS_BOUND;
return 0;
+
+out_put:
+ for_each_subsys(ss, i)
+ if (pinned & (1 << i))
+ module_put(ss->module);
+ return ret;
}
static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry)
@@ -1142,7 +1162,6 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
char *token, *o = data;
bool all_ss = false, one_ss = false;
unsigned long mask = (unsigned long)-1;
- bool module_pin_failed = false;
struct cgroup_subsys *ss;
int i;
@@ -1285,52 +1304,9 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
if (!opts->subsys_mask && !opts->name)
return -EINVAL;
- /*
- * Grab references on all the modules we'll need, so the subsystems
- * don't dance around before rebind_subsystems attaches them. This may
- * take duplicate reference counts on a subsystem that's already used,
- * but rebind_subsystems handles this case.
- */
- for_each_subsys(ss, i) {
- if (!(opts->subsys_mask & (1UL << i)))
- continue;
- if (!try_module_get(cgroup_subsys[i]->module)) {
- module_pin_failed = true;
- break;
- }
- }
- if (module_pin_failed) {
- /*
- * oops, one of the modules was going away. this means that we
- * raced with a module_delete call, and to the user this is
- * essentially a "subsystem doesn't exist" case.
- */
- for (i--; i >= 0; i--) {
- /* drop refcounts only on the ones we took */
- unsigned long bit = 1UL << i;
-
- if (!(bit & opts->subsys_mask))
- continue;
- module_put(cgroup_subsys[i]->module);
- }
- return -ENOENT;
- }
-
return 0;
}
-static void drop_parsed_module_refcounts(unsigned long subsys_mask)
-{
- struct cgroup_subsys *ss;
- int i;
-
- mutex_lock(&cgroup_mutex);
- for_each_subsys(ss, i)
- if (subsys_mask & (1UL << i))
- module_put(cgroup_subsys[i]->module);
- mutex_unlock(&cgroup_mutex);
-}
-
static int cgroup_remount(struct super_block *sb, int *flags, char *data)
{
int ret = 0;
@@ -1370,22 +1346,15 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
goto out_unlock;
}
- /*
- * Clear out the files of subsystems that should be removed, do
- * this before rebind_subsystems, since rebind_subsystems may
- * change this hierarchy's subsys_list.
- */
- cgroup_clear_directory(cgrp->dentry, false, removed_mask);
-
- ret = rebind_subsystems(root, added_mask, removed_mask);
- if (ret) {
- /* rebind_subsystems failed, re-populate the removed files */
- cgroup_populate_dir(cgrp, false, removed_mask);
+ /* remounting is not allowed for populated hierarchies */
+ if (root->number_of_cgroups > 1) {
+ ret = -EBUSY;
goto out_unlock;
}
- /* re-populate subsystem files */
- cgroup_populate_dir(cgrp, false, added_mask);
+ ret = rebind_subsystems(root, added_mask, removed_mask);
+ if (ret)
+ goto out_unlock;
if (opts.release_agent)
strcpy(root->release_agent_path, opts.release_agent);
@@ -1395,8 +1364,6 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
mutex_unlock(&cgroup_root_mutex);
mutex_unlock(&cgroup_mutex);
mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
- if (ret)
- drop_parsed_module_refcounts(opts.subsys_mask);
return ret;
}
@@ -1416,6 +1383,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
INIT_LIST_HEAD(&cgrp->release_list);
INIT_LIST_HEAD(&cgrp->pidlists);
mutex_init(&cgrp->pidlist_mutex);
+ cgrp->dummy_css.cgroup = cgrp;
INIT_LIST_HEAD(&cgrp->event_list);
spin_lock_init(&cgrp->event_list_lock);
simple_xattrs_init(&cgrp->xattrs);
@@ -1431,6 +1399,7 @@ static void init_cgroup_root(struct cgroupfs_root *root)
cgrp->root = root;
RCU_INIT_POINTER(cgrp->name, &root_cgroup_name);
init_cgroup_housekeeping(cgrp);
+ idr_init(&root->cgroup_idr);
}
static int cgroup_init_root_id(struct cgroupfs_root *root, int start, int end)
@@ -1503,7 +1472,6 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
*/
root->subsys_mask = opts->subsys_mask;
root->flags = opts->flags;
- ida_init(&root->cgroup_ida);
if (opts->release_agent)
strcpy(root->release_agent_path, opts->release_agent);
if (opts->name)
@@ -1519,7 +1487,7 @@ static void cgroup_free_root(struct cgroupfs_root *root)
/* hierarhcy ID shoulid already have been released */
WARN_ON_ONCE(root->hierarchy_id);
- ida_destroy(&root->cgroup_ida);
+ idr_destroy(&root->cgroup_idr);
kfree(root);
}
}
@@ -1584,7 +1552,9 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
int ret = 0;
struct super_block *sb;
struct cgroupfs_root *new_root;
+ struct list_head tmp_links;
struct inode *inode;
+ const struct cred *cred;
/* First find the desired set of subsystems */
mutex_lock(&cgroup_mutex);
@@ -1600,7 +1570,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
new_root = cgroup_root_from_opts(&opts);
if (IS_ERR(new_root)) {
ret = PTR_ERR(new_root);
- goto drop_modules;
+ goto out_err;
}
opts.new_root = new_root;
@@ -1609,17 +1579,15 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
if (IS_ERR(sb)) {
ret = PTR_ERR(sb);
cgroup_free_root(opts.new_root);
- goto drop_modules;
+ goto out_err;
}
root = sb->s_fs_info;
BUG_ON(!root);
if (root == opts.new_root) {
/* We used the new root structure, so this is a new hierarchy */
- struct list_head tmp_links;
struct cgroup *root_cgrp = &root->top_cgroup;
struct cgroupfs_root *existing_root;
- const struct cred *cred;
int i;
struct css_set *cset;
@@ -1634,6 +1602,11 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
mutex_lock(&cgroup_mutex);
mutex_lock(&cgroup_root_mutex);
+ root_cgrp->id = idr_alloc(&root->cgroup_idr, root_cgrp,
+ 0, 1, GFP_KERNEL);
+ if (root_cgrp->id < 0)
+ goto unlock_drop;
+
/* Check for name clashes with existing mounts */
ret = -EBUSY;
if (strlen(root->name))
@@ -1657,26 +1630,37 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
if (ret)
goto unlock_drop;
+ sb->s_root->d_fsdata = root_cgrp;
+ root_cgrp->dentry = sb->s_root;
+
+ /*
+ * We're inside get_sb() and will call lookup_one_len() to
+ * create the root files, which doesn't work if SELinux is
+ * in use. The following cred dancing somehow works around
+ * it. See 2ce9738ba ("cgroupfs: use init_cred when
+ * populating new cgroupfs mount") for more details.
+ */
+ cred = override_creds(&init_cred);
+
+ ret = cgroup_addrm_files(root_cgrp, cgroup_base_files, true);
+ if (ret)
+ goto rm_base_files;
+
ret = rebind_subsystems(root, root->subsys_mask, 0);
- if (ret == -EBUSY) {
- free_cgrp_cset_links(&tmp_links);
- goto unlock_drop;
- }
+ if (ret)
+ goto rm_base_files;
+
+ revert_creds(cred);
+
/*
* There must be no failure case after here, since rebinding
* takes care of subsystems' refcounts, which are explicitly
* dropped in the failure exit path.
*/
- /* EBUSY should be the only error here */
- BUG_ON(ret);
-
list_add(&root->root_list, &cgroup_roots);
cgroup_root_count++;
- sb->s_root->d_fsdata = root_cgrp;
- root->top_cgroup.dentry = sb->s_root;
-
/* Link the top cgroup in this hierarchy into all
* the css_set objects */
write_lock(&css_set_lock);
@@ -1689,9 +1673,6 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
BUG_ON(!list_empty(&root_cgrp->children));
BUG_ON(root->number_of_cgroups != 1);
- cred = override_creds(&init_cred);
- cgroup_populate_dir(root_cgrp, true, root->subsys_mask);
- revert_creds(cred);
mutex_unlock(&cgroup_root_mutex);
mutex_unlock(&cgroup_mutex);
mutex_unlock(&inode->i_mutex);
@@ -1711,15 +1692,16 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
pr_warning("cgroup: new mount options do not match the existing superblock, will be ignored\n");
}
}
-
- /* no subsys rebinding, so refcounts don't change */
- drop_parsed_module_refcounts(opts.subsys_mask);
}
kfree(opts.release_agent);
kfree(opts.name);
return dget(sb->s_root);
+ rm_base_files:
+ free_cgrp_cset_links(&tmp_links);
+ cgroup_addrm_files(&root->top_cgroup, cgroup_base_files, false);
+ revert_creds(cred);
unlock_drop:
cgroup_exit_root_id(root);
mutex_unlock(&cgroup_root_mutex);
@@ -1727,8 +1709,6 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
mutex_unlock(&inode->i_mutex);
drop_new_super:
deactivate_locked_super(sb);
- drop_modules:
- drop_parsed_module_refcounts(opts.subsys_mask);
out_err:
kfree(opts.release_agent);
kfree(opts.name);
@@ -1746,6 +1726,7 @@ static void cgroup_kill_sb(struct super_block *sb) {
BUG_ON(root->number_of_cgroups != 1);
BUG_ON(!list_empty(&cgrp->children));
+ mutex_lock(&cgrp->dentry->d_inode->i_mutex);
mutex_lock(&cgroup_mutex);
mutex_lock(&cgroup_root_mutex);
@@ -1778,6 +1759,7 @@ static void cgroup_kill_sb(struct super_block *sb) {
mutex_unlock(&cgroup_root_mutex);
mutex_unlock(&cgroup_mutex);
+ mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
simple_xattrs_free(&cgrp->xattrs);
@@ -1889,7 +1871,7 @@ EXPORT_SYMBOL_GPL(task_cgroup_path);
struct task_and_cgroup {
struct task_struct *task;
struct cgroup *cgrp;
- struct css_set *cg;
+ struct css_set *cset;
};
struct cgroup_taskset {
@@ -1939,18 +1921,20 @@ struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset)
EXPORT_SYMBOL_GPL(cgroup_taskset_next);
/**
- * cgroup_taskset_cur_cgroup - return the matching cgroup for the current task
+ * cgroup_taskset_cur_css - return the matching css for the current task
* @tset: taskset of interest
+ * @subsys_id: the ID of the target subsystem
*
- * Return the cgroup for the current (last returned) task of @tset. This
- * function must be preceded by either cgroup_taskset_first() or
- * cgroup_taskset_next().
+ * Return the css for the current (last returned) task of @tset for
+ * subsystem specified by @subsys_id. This function must be preceded by
+ * either cgroup_taskset_first() or cgroup_taskset_next().
*/
-struct cgroup *cgroup_taskset_cur_cgroup(struct cgroup_taskset *tset)
+struct cgroup_subsys_state *cgroup_taskset_cur_css(struct cgroup_taskset *tset,
+ int subsys_id)
{
- return tset->cur_cgrp;
+ return cgroup_css(tset->cur_cgrp, cgroup_subsys[subsys_id]);
}
-EXPORT_SYMBOL_GPL(cgroup_taskset_cur_cgroup);
+EXPORT_SYMBOL_GPL(cgroup_taskset_cur_css);
/**
* cgroup_taskset_size - return the number of tasks in taskset
@@ -2089,8 +2073,10 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk,
* step 1: check that we can legitimately attach to the cgroup.
*/
for_each_root_subsys(root, ss) {
+ struct cgroup_subsys_state *css = cgroup_css(cgrp, ss);
+
if (ss->can_attach) {
- retval = ss->can_attach(cgrp, &tset);
+ retval = ss->can_attach(css, &tset);
if (retval) {
failed_ss = ss;
goto out_cancel_attach;
@@ -2107,8 +2093,8 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk,
tc = flex_array_get(group, i);
old_cset = task_css_set(tc->task);
- tc->cg = find_css_set(old_cset, cgrp);
- if (!tc->cg) {
+ tc->cset = find_css_set(old_cset, cgrp);
+ if (!tc->cset) {
retval = -ENOMEM;
goto out_put_css_set_refs;
}
@@ -2121,7 +2107,7 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk,
*/
for (i = 0; i < group_size; i++) {
tc = flex_array_get(group, i);
- cgroup_task_migrate(tc->cgrp, tc->task, tc->cg);
+ cgroup_task_migrate(tc->cgrp, tc->task, tc->cset);
}
/* nothing is sensitive to fork() after this point. */
@@ -2129,8 +2115,10 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk,
* step 4: do subsystem attach callbacks.
*/
for_each_root_subsys(root, ss) {
+ struct cgroup_subsys_state *css = cgroup_css(cgrp, ss);
+
if (ss->attach)
- ss->attach(cgrp, &tset);
+ ss->attach(css, &tset);
}
/*
@@ -2141,18 +2129,20 @@ out_put_css_set_refs:
if (retval) {
for (i = 0; i < group_size; i++) {
tc = flex_array_get(group, i);
- if (!tc->cg)
+ if (!tc->cset)
break;
- put_css_set(tc->cg);
+ put_css_set(tc->cset);
}
}
out_cancel_attach:
if (retval) {
for_each_root_subsys(root, ss) {
+ struct cgroup_subsys_state *css = cgroup_css(cgrp, ss);
+
if (ss == failed_ss)
break;
if (ss->cancel_attach)
- ss->cancel_attach(cgrp, &tset);
+ ss->cancel_attach(css, &tset);
}
}
out_free_group_list:
@@ -2253,9 +2243,9 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
mutex_lock(&cgroup_mutex);
for_each_active_root(root) {
- struct cgroup *from_cg = task_cgroup_from_root(from, root);
+ struct cgroup *from_cgrp = task_cgroup_from_root(from, root);
- retval = cgroup_attach_task(from_cg, tsk, false);
+ retval = cgroup_attach_task(from_cgrp, tsk, false);
if (retval)
break;
}
@@ -2265,34 +2255,38 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
}
EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
-static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid)
+static int cgroup_tasks_write(struct cgroup_subsys_state *css,
+ struct cftype *cft, u64 pid)
{
- return attach_task_by_pid(cgrp, pid, false);
+ return attach_task_by_pid(css->cgroup, pid, false);
}
-static int cgroup_procs_write(struct cgroup *cgrp, struct cftype *cft, u64 tgid)
+static int cgroup_procs_write(struct cgroup_subsys_state *css,
+ struct cftype *cft, u64 tgid)
{
- return attach_task_by_pid(cgrp, tgid, true);
+ return attach_task_by_pid(css->cgroup, tgid, true);
}
-static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
- const char *buffer)
+static int cgroup_release_agent_write(struct cgroup_subsys_state *css,
+ struct cftype *cft, const char *buffer)
{
- BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
+ BUILD_BUG_ON(sizeof(css->cgroup->root->release_agent_path) < PATH_MAX);
if (strlen(buffer) >= PATH_MAX)
return -EINVAL;
- if (!cgroup_lock_live_group(cgrp))
+ if (!cgroup_lock_live_group(css->cgroup))
return -ENODEV;
mutex_lock(&cgroup_root_mutex);
- strcpy(cgrp->root->release_agent_path, buffer);
+ strcpy(css->cgroup->root->release_agent_path, buffer);
mutex_unlock(&cgroup_root_mutex);
mutex_unlock(&cgroup_mutex);
return 0;
}
-static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft,
- struct seq_file *seq)
+static int cgroup_release_agent_show(struct cgroup_subsys_state *css,
+ struct cftype *cft, struct seq_file *seq)
{
+ struct cgroup *cgrp = css->cgroup;
+
if (!cgroup_lock_live_group(cgrp))
return -ENODEV;
seq_puts(seq, cgrp->root->release_agent_path);
@@ -2301,20 +2295,20 @@ static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft,
return 0;
}
-static int cgroup_sane_behavior_show(struct cgroup *cgrp, struct cftype *cft,
- struct seq_file *seq)
+static int cgroup_sane_behavior_show(struct cgroup_subsys_state *css,
+ struct cftype *cft, struct seq_file *seq)
{
- seq_printf(seq, "%d\n", cgroup_sane_behavior(cgrp));
+ seq_printf(seq, "%d\n", cgroup_sane_behavior(css->cgroup));
return 0;
}
/* A buffer size big enough for numbers or short strings */
#define CGROUP_LOCAL_BUFFER_SIZE 64
-static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft,
- struct file *file,
- const char __user *userbuf,
- size_t nbytes, loff_t *unused_ppos)
+static ssize_t cgroup_write_X64(struct cgroup_subsys_state *css,
+ struct cftype *cft, struct file *file,
+ const char __user *userbuf, size_t nbytes,
+ loff_t *unused_ppos)
{
char buffer[CGROUP_LOCAL_BUFFER_SIZE];
int retval = 0;
@@ -2332,22 +2326,22 @@ static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft,
u64 val = simple_strtoull(strstrip(buffer), &end, 0);
if (*end)
return -EINVAL;
- retval = cft->write_u64(cgrp, cft, val);
+ retval = cft->write_u64(css, cft, val);
} else {
s64 val = simple_strtoll(strstrip(buffer), &end, 0);
if (*end)
return -EINVAL;
- retval = cft->write_s64(cgrp, cft, val);
+ retval = cft->write_s64(css, cft, val);
}
if (!retval)
retval = nbytes;
return retval;
}
-static ssize_t cgroup_write_string(struct cgroup *cgrp, struct cftype *cft,
- struct file *file,
- const char __user *userbuf,
- size_t nbytes, loff_t *unused_ppos)
+static ssize_t cgroup_write_string(struct cgroup_subsys_state *css,
+ struct cftype *cft, struct file *file,
+ const char __user *userbuf, size_t nbytes,
+ loff_t *unused_ppos)
{
char local_buffer[CGROUP_LOCAL_BUFFER_SIZE];
int retval = 0;
@@ -2370,7 +2364,7 @@ static ssize_t cgroup_write_string(struct cgroup *cgrp, struct cftype *cft,
}
buffer[nbytes] = 0; /* nul-terminate */
- retval = cft->write_string(cgrp, cft, strstrip(buffer));
+ retval = cft->write_string(css, cft, strstrip(buffer));
if (!retval)
retval = nbytes;
out:
@@ -2380,65 +2374,60 @@ out:
}
static ssize_t cgroup_file_write(struct file *file, const char __user *buf,
- size_t nbytes, loff_t *ppos)
+ size_t nbytes, loff_t *ppos)
{
+ struct cfent *cfe = __d_cfe(file->f_dentry);
struct cftype *cft = __d_cft(file->f_dentry);
- struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
+ struct cgroup_subsys_state *css = cfe->css;
- if (cgroup_is_dead(cgrp))
- return -ENODEV;
if (cft->write)
- return cft->write(cgrp, cft, file, buf, nbytes, ppos);
+ return cft->write(css, cft, file, buf, nbytes, ppos);
if (cft->write_u64 || cft->write_s64)
- return cgroup_write_X64(cgrp, cft, file, buf, nbytes, ppos);
+ return cgroup_write_X64(css, cft, file, buf, nbytes, ppos);
if (cft->write_string)
- return cgroup_write_string(cgrp, cft, file, buf, nbytes, ppos);
+ return cgroup_write_string(css, cft, file, buf, nbytes, ppos);
if (cft->trigger) {
- int ret = cft->trigger(cgrp, (unsigned int)cft->private);
+ int ret = cft->trigger(css, (unsigned int)cft->private);
return ret ? ret : nbytes;
}
return -EINVAL;
}
-static ssize_t cgroup_read_u64(struct cgroup *cgrp, struct cftype *cft,
- struct file *file,
- char __user *buf, size_t nbytes,
- loff_t *ppos)
+static ssize_t cgroup_read_u64(struct cgroup_subsys_state *css,
+ struct cftype *cft, struct file *file,
+ char __user *buf, size_t nbytes, loff_t *ppos)
{
char tmp[CGROUP_LOCAL_BUFFER_SIZE];
- u64 val = cft->read_u64(cgrp, cft);
+ u64 val = cft->read_u64(css, cft);
int len = sprintf(tmp, "%llu\n", (unsigned long long) val);
return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
}
-static ssize_t cgroup_read_s64(struct cgroup *cgrp, struct cftype *cft,
- struct file *file,
- char __user *buf, size_t nbytes,
- loff_t *ppos)
+static ssize_t cgroup_read_s64(struct cgroup_subsys_state *css,
+ struct cftype *cft, struct file *file,
+ char __user *buf, size_t nbytes, loff_t *ppos)
{
char tmp[CGROUP_LOCAL_BUFFER_SIZE];
- s64 val = cft->read_s64(cgrp, cft);
+ s64 val = cft->read_s64(css, cft);
int len = sprintf(tmp, "%lld\n", (long long) val);
return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
}
static ssize_t cgroup_file_read(struct file *file, char __user *buf,
- size_t nbytes, loff_t *ppos)
+ size_t nbytes, loff_t *ppos)
{
+ struct cfent *cfe = __d_cfe(file->f_dentry);
struct cftype *cft = __d_cft(file->f_dentry);
- struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
-
- if (cgroup_is_dead(cgrp))
- return -ENODEV;
+ struct cgroup_subsys_state *css = cfe->css;
if (cft->read)
- return cft->read(cgrp, cft, file, buf, nbytes, ppos);
+ return cft->read(css, cft, file, buf, nbytes, ppos);
if (cft->read_u64)
- return cgroup_read_u64(cgrp, cft, file, buf, nbytes, ppos);
+ return cgroup_read_u64(css, cft, file, buf, nbytes, ppos);
if (cft->read_s64)
- return cgroup_read_s64(cgrp, cft, file, buf, nbytes, ppos);
+ return cgroup_read_s64(css, cft, file, buf, nbytes, ppos);
return -EINVAL;
}
@@ -2447,11 +2436,6 @@ static ssize_t cgroup_file_read(struct file *file, char __user *buf,
* supports string->u64 maps, but can be extended in future.
*/
-struct cgroup_seqfile_state {
- struct cftype *cft;
- struct cgroup *cgroup;
-};
-
static int cgroup_map_add(struct cgroup_map_cb *cb, const char *key, u64 value)
{
struct seq_file *sf = cb->state;
@@ -2460,69 +2444,86 @@ static int cgroup_map_add(struct cgroup_map_cb *cb, const char *key, u64 value)
static int cgroup_seqfile_show(struct seq_file *m, void *arg)
{
- struct cgroup_seqfile_state *state = m->private;
- struct cftype *cft = state->cft;
+ struct cfent *cfe = m->private;
+ struct cftype *cft = cfe->type;
+ struct cgroup_subsys_state *css = cfe->css;
+
if (cft->read_map) {
struct cgroup_map_cb cb = {
.fill = cgroup_map_add,
.state = m,
};
- return cft->read_map(state->cgroup, cft, &cb);
+ return cft->read_map(css, cft, &cb);
}
- return cft->read_seq_string(state->cgroup, cft, m);
-}
-
-static int cgroup_seqfile_release(struct inode *inode, struct file *file)
-{
- struct seq_file *seq = file->private_data;
- kfree(seq->private);
- return single_release(inode, file);
+ return cft->read_seq_string(css, cft, m);
}
static const struct file_operations cgroup_seqfile_operations = {
.read = seq_read,
.write = cgroup_file_write,
.llseek = seq_lseek,
- .release = cgroup_seqfile_release,
+ .release = single_release,
};
static int cgroup_file_open(struct inode *inode, struct file *file)
{
+ struct cfent *cfe = __d_cfe(file->f_dentry);
+ struct cftype *cft = __d_cft(file->f_dentry);
+ struct cgroup *cgrp = __d_cgrp(cfe->dentry->d_parent);
+ struct cgroup_subsys_state *css;
int err;
- struct cftype *cft;
err = generic_file_open(inode, file);
if (err)
return err;
- cft = __d_cft(file->f_dentry);
- if (cft->read_map || cft->read_seq_string) {
- struct cgroup_seqfile_state *state;
+ /*
+ * If the file belongs to a subsystem, pin the css. Will be
+ * unpinned either on open failure or release. This ensures that
+ * @css stays alive for all file operations.
+ */
+ rcu_read_lock();
+ css = cgroup_css(cgrp, cft->ss);
+ if (cft->ss && !css_tryget(css))
+ css = NULL;
+ rcu_read_unlock();
- state = kzalloc(sizeof(*state), GFP_USER);
- if (!state)
- return -ENOMEM;
+ if (!css)
+ return -ENODEV;
+
+ /*
+ * @cfe->css is used by read/write/close to determine the
+ * associated css. @file->private_data would be a better place but
+ * that's already used by seqfile. Multiple accessors may use it
+ * simultaneously which is okay as the association never changes.
+ */
+ WARN_ON_ONCE(cfe->css && cfe->css != css);
+ cfe->css = css;
- state->cft = cft;
- state->cgroup = __d_cgrp(file->f_dentry->d_parent);
+ if (cft->read_map || cft->read_seq_string) {
file->f_op = &cgroup_seqfile_operations;
- err = single_open(file, cgroup_seqfile_show, state);
- if (err < 0)
- kfree(state);
- } else if (cft->open)
+ err = single_open(file, cgroup_seqfile_show, cfe);
+ } else if (cft->open) {
err = cft->open(inode, file);
- else
- err = 0;
+ }
+ if (css->ss && err)
+ css_put(css);
return err;
}
static int cgroup_file_release(struct inode *inode, struct file *file)
{
+ struct cfent *cfe = __d_cfe(file->f_dentry);
struct cftype *cft = __d_cft(file->f_dentry);
+ struct cgroup_subsys_state *css = cfe->css;
+ int ret = 0;
+
if (cft->release)
- return cft->release(inode, file);
- return 0;
+ ret = cft->release(inode, file);
+ if (css->ss)
+ css_put(css);
+ return ret;
}
/*
@@ -2736,8 +2737,7 @@ static umode_t cgroup_file_mode(const struct cftype *cft)
return mode;
}
-static int cgroup_add_file(struct cgroup *cgrp, struct cgroup_subsys *subsys,
- struct cftype *cft)
+static int cgroup_add_file(struct cgroup *cgrp, struct cftype *cft)
{
struct dentry *dir = cgrp->dentry;
struct cgroup *parent = __d_cgrp(dir);
@@ -2747,8 +2747,9 @@ static int cgroup_add_file(struct cgroup *cgrp, struct cgroup_subsys *subsys,
umode_t mode;
char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 };
- if (subsys && !(cgrp->root->flags & CGRP_ROOT_NOPREFIX)) {
- strcpy(name, subsys->name);
+ if (cft->ss && !(cft->flags & CFTYPE_NO_PREFIX) &&
+ !(cgrp->root->flags & CGRP_ROOT_NOPREFIX)) {
+ strcpy(name, cft->ss->name);
strcat(name, ".");
}
strcat(name, cft->name);
@@ -2782,11 +2783,25 @@ out:
return error;
}
-static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys,
- struct cftype cfts[], bool is_add)
+/**
+ * cgroup_addrm_files - add or remove files to a cgroup directory
+ * @cgrp: the target cgroup
+ * @cfts: array of cftypes to be added
+ * @is_add: whether to add or remove
+ *
+ * Depending on @is_add, add or remove files defined by @cfts on @cgrp.
+ * For removals, this function never fails. If addition fails, this
+ * function doesn't remove files already added. The caller is responsible
+ * for cleaning up.
+ */
+static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[],
+ bool is_add)
{
struct cftype *cft;
- int err, ret = 0;
+ int ret;
+
+ lockdep_assert_held(&cgrp->dentry->d_inode->i_mutex);
+ lockdep_assert_held(&cgroup_mutex);
for (cft = cfts; cft->name[0] != '\0'; cft++) {
/* does cft->flags tell us to skip this file on @cgrp? */
@@ -2798,16 +2813,17 @@ static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys,
continue;
if (is_add) {
- err = cgroup_add_file(cgrp, subsys, cft);
- if (err)
+ ret = cgroup_add_file(cgrp, cft);
+ if (ret) {
pr_warn("cgroup_addrm_files: failed to add %s, err=%d\n",
- cft->name, err);
- ret = err;
+ cft->name, ret);
+ return ret;
+ }
} else {
cgroup_rm_file(cgrp, cft);
}
}
- return ret;
+ return 0;
}
static void cgroup_cfts_prepare(void)
@@ -2816,28 +2832,30 @@ static void cgroup_cfts_prepare(void)
/*
* Thanks to the entanglement with vfs inode locking, we can't walk
* the existing cgroups under cgroup_mutex and create files.
- * Instead, we use cgroup_for_each_descendant_pre() and drop RCU
- * read lock before calling cgroup_addrm_files().
+ * Instead, we use css_for_each_descendant_pre() and drop RCU read
+ * lock before calling cgroup_addrm_files().
*/
mutex_lock(&cgroup_mutex);
}
-static void cgroup_cfts_commit(struct cgroup_subsys *ss,
- struct cftype *cfts, bool is_add)
+static int cgroup_cfts_commit(struct cftype *cfts, bool is_add)
__releases(&cgroup_mutex)
{
LIST_HEAD(pending);
- struct cgroup *cgrp, *root = &ss->root->top_cgroup;
+ struct cgroup_subsys *ss = cfts[0].ss;
+ struct cgroup *root = &ss->root->top_cgroup;
struct super_block *sb = ss->root->sb;
struct dentry *prev = NULL;
struct inode *inode;
+ struct cgroup_subsys_state *css;
u64 update_before;
+ int ret = 0;
/* %NULL @cfts indicates abort and don't bother if @ss isn't attached */
if (!cfts || ss->root == &cgroup_dummy_root ||
!atomic_inc_not_zero(&sb->s_active)) {
mutex_unlock(&cgroup_mutex);
- return;
+ return 0;
}
/*
@@ -2849,17 +2867,11 @@ static void cgroup_cfts_commit(struct cgroup_subsys *ss,
mutex_unlock(&cgroup_mutex);
- /* @root always needs to be updated */
- inode = root->dentry->d_inode;
- mutex_lock(&inode->i_mutex);
- mutex_lock(&cgroup_mutex);
- cgroup_addrm_files(root, ss, cfts, is_add);
- mutex_unlock(&cgroup_mutex);
- mutex_unlock(&inode->i_mutex);
-
/* add/rm files for all cgroups created before */
rcu_read_lock();
- cgroup_for_each_descendant_pre(cgrp, root) {
+ css_for_each_descendant_pre(css, cgroup_css(root, ss)) {
+ struct cgroup *cgrp = css->cgroup;
+
if (cgroup_is_dead(cgrp))
continue;
@@ -2873,15 +2885,18 @@ static void cgroup_cfts_commit(struct cgroup_subsys *ss,
mutex_lock(&inode->i_mutex);
mutex_lock(&cgroup_mutex);
if (cgrp->serial_nr < update_before && !cgroup_is_dead(cgrp))
- cgroup_addrm_files(cgrp, ss, cfts, is_add);
+ ret = cgroup_addrm_files(cgrp, cfts, is_add);
mutex_unlock(&cgroup_mutex);
mutex_unlock(&inode->i_mutex);
rcu_read_lock();
+ if (ret)
+ break;
}
rcu_read_unlock();
dput(prev);
deactivate_super(sb);
+ return ret;
}
/**
@@ -2901,49 +2916,56 @@ static void cgroup_cfts_commit(struct cgroup_subsys *ss,
int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
{
struct cftype_set *set;
+ struct cftype *cft;
+ int ret;
set = kzalloc(sizeof(*set), GFP_KERNEL);
if (!set)
return -ENOMEM;
+ for (cft = cfts; cft->name[0] != '\0'; cft++)
+ cft->ss = ss;
+
cgroup_cfts_prepare();
set->cfts = cfts;
list_add_tail(&set->node, &ss->cftsets);
- cgroup_cfts_commit(ss, cfts, true);
-
- return 0;
+ ret = cgroup_cfts_commit(cfts, true);
+ if (ret)
+ cgroup_rm_cftypes(cfts);
+ return ret;
}
EXPORT_SYMBOL_GPL(cgroup_add_cftypes);
/**
* cgroup_rm_cftypes - remove an array of cftypes from a subsystem
- * @ss: target cgroup subsystem
* @cfts: zero-length name terminated array of cftypes
*
- * Unregister @cfts from @ss. Files described by @cfts are removed from
- * all existing cgroups to which @ss is attached and all future cgroups
- * won't have them either. This function can be called anytime whether @ss
- * is attached or not.
+ * Unregister @cfts. Files described by @cfts are removed from all
+ * existing cgroups and all future cgroups won't have them either. This
+ * function can be called anytime whether @cfts' subsys is attached or not.
*
* Returns 0 on successful unregistration, -ENOENT if @cfts is not
- * registered with @ss.
+ * registered.
*/
-int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
+int cgroup_rm_cftypes(struct cftype *cfts)
{
struct cftype_set *set;
+ if (!cfts || !cfts[0].ss)
+ return -ENOENT;
+
cgroup_cfts_prepare();
- list_for_each_entry(set, &ss->cftsets, node) {
+ list_for_each_entry(set, &cfts[0].ss->cftsets, node) {
if (set->cfts == cfts) {
list_del(&set->node);
kfree(set);
- cgroup_cfts_commit(ss, cfts, false);
+ cgroup_cfts_commit(cfts, false);
return 0;
}
}
- cgroup_cfts_commit(ss, NULL, false);
+ cgroup_cfts_commit(NULL, false);
return -ENOENT;
}
@@ -2966,34 +2988,10 @@ int cgroup_task_count(const struct cgroup *cgrp)
}
/*
- * Advance a list_head iterator. The iterator should be positioned at
- * the start of a css_set
- */
-static void cgroup_advance_iter(struct cgroup *cgrp, struct cgroup_iter *it)
-{
- struct list_head *l = it->cset_link;
- struct cgrp_cset_link *link;
- struct css_set *cset;
-
- /* Advance to the next non-empty css_set */
- do {
- l = l->next;
- if (l == &cgrp->cset_links) {
- it->cset_link = NULL;
- return;
- }
- link = list_entry(l, struct cgrp_cset_link, cset_link);
- cset = link->cset;
- } while (list_empty(&cset->tasks));
- it->cset_link = l;
- it->task = cset->tasks.next;
-}
-
-/*
- * To reduce the fork() overhead for systems that are not actually
- * using their cgroups capability, we don't maintain the lists running
- * through each css_set to its tasks until we see the list actually
- * used - in other words after the first call to cgroup_iter_start().
+ * To reduce the fork() overhead for systems that are not actually using
+ * their cgroups capability, we don't maintain the lists running through
+ * each css_set to its tasks until we see the list actually used - in other
+ * words after the first call to css_task_iter_start().
*/
static void cgroup_enable_task_cg_lists(void)
{
@@ -3024,16 +3022,21 @@ static void cgroup_enable_task_cg_lists(void)
}
/**
- * cgroup_next_sibling - find the next sibling of a given cgroup
- * @pos: the current cgroup
+ * css_next_child - find the next child of a given css
+ * @pos_css: the current position (%NULL to initiate traversal)
+ * @parent_css: css whose children to walk
*
- * This function returns the next sibling of @pos and should be called
- * under RCU read lock. The only requirement is that @pos is accessible.
- * The next sibling is guaranteed to be returned regardless of @pos's
- * state.
+ * This function returns the next child of @parent_css and should be called
+ * under RCU read lock. The only requirement is that @parent_css and
+ * @pos_css are accessible. The next sibling is guaranteed to be returned
+ * regardless of their states.
*/
-struct cgroup *cgroup_next_sibling(struct cgroup *pos)
+struct cgroup_subsys_state *
+css_next_child(struct cgroup_subsys_state *pos_css,
+ struct cgroup_subsys_state *parent_css)
{
+ struct cgroup *pos = pos_css ? pos_css->cgroup : NULL;
+ struct cgroup *cgrp = parent_css->cgroup;
struct cgroup *next;
WARN_ON_ONCE(!rcu_read_lock_held());
@@ -3048,78 +3051,81 @@ struct cgroup *cgroup_next_sibling(struct cgroup *pos)
* safe to dereference from this RCU critical section. If
* ->sibling.next is inaccessible, cgroup_is_dead() is guaranteed
* to be visible as %true here.
+ *
+ * If @pos is dead, its next pointer can't be dereferenced;
+ * however, as each cgroup is given a monotonically increasing
+ * unique serial number and always appended to the sibling list,
+ * the next one can be found by walking the parent's children until
+ * we see a cgroup with higher serial number than @pos's. While
+ * this path can be slower, it's taken only when either the current
+ * cgroup is removed or iteration and removal race.
*/
- if (likely(!cgroup_is_dead(pos))) {
+ if (!pos) {
+ next = list_entry_rcu(cgrp->children.next, struct cgroup, sibling);
+ } else if (likely(!cgroup_is_dead(pos))) {
next = list_entry_rcu(pos->sibling.next, struct cgroup, sibling);
- if (&next->sibling != &pos->parent->children)
- return next;
- return NULL;
+ } else {
+ list_for_each_entry_rcu(next, &cgrp->children, sibling)
+ if (next->serial_nr > pos->serial_nr)
+ break;
}
- /*
- * Can't dereference the next pointer. Each cgroup is given a
- * monotonically increasing unique serial number and always
- * appended to the sibling list, so the next one can be found by
- * walking the parent's children until we see a cgroup with higher
- * serial number than @pos's.
- *
- * While this path can be slow, it's taken only when either the
- * current cgroup is removed or iteration and removal race.
- */
- list_for_each_entry_rcu(next, &pos->parent->children, sibling)
- if (next->serial_nr > pos->serial_nr)
- return next;
- return NULL;
+ if (&next->sibling == &cgrp->children)
+ return NULL;
+
+ return cgroup_css(next, parent_css->ss);
}
-EXPORT_SYMBOL_GPL(cgroup_next_sibling);
+EXPORT_SYMBOL_GPL(css_next_child);
/**
- * cgroup_next_descendant_pre - find the next descendant for pre-order walk
+ * css_next_descendant_pre - find the next descendant for pre-order walk
* @pos: the current position (%NULL to initiate traversal)
- * @cgroup: cgroup whose descendants to walk
+ * @root: css whose descendants to walk
*
- * To be used by cgroup_for_each_descendant_pre(). Find the next
- * descendant to visit for pre-order traversal of @cgroup's descendants.
+ * To be used by css_for_each_descendant_pre(). Find the next descendant
+ * to visit for pre-order traversal of @root's descendants. @root is
+ * included in the iteration and the first node to be visited.
*
* While this function requires RCU read locking, it doesn't require the
* whole traversal to be contained in a single RCU critical section. This
* function will return the correct next descendant as long as both @pos
- * and @cgroup are accessible and @pos is a descendant of @cgroup.
+ * and @root are accessible and @pos is a descendant of @root.
*/
-struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos,
- struct cgroup *cgroup)
+struct cgroup_subsys_state *
+css_next_descendant_pre(struct cgroup_subsys_state *pos,
+ struct cgroup_subsys_state *root)
{
- struct cgroup *next;
+ struct cgroup_subsys_state *next;
WARN_ON_ONCE(!rcu_read_lock_held());
- /* if first iteration, pretend we just visited @cgroup */
+ /* if first iteration, visit @root */
if (!pos)
- pos = cgroup;
+ return root;
/* visit the first child if exists */
- next = list_first_or_null_rcu(&pos->children, struct cgroup, sibling);
+ next = css_next_child(NULL, pos);
if (next)
return next;
/* no child, visit my or the closest ancestor's next sibling */
- while (pos != cgroup) {
- next = cgroup_next_sibling(pos);
+ while (pos != root) {
+ next = css_next_child(pos, css_parent(pos));
if (next)
return next;
- pos = pos->parent;
+ pos = css_parent(pos);
}
return NULL;
}
-EXPORT_SYMBOL_GPL(cgroup_next_descendant_pre);
+EXPORT_SYMBOL_GPL(css_next_descendant_pre);
/**
- * cgroup_rightmost_descendant - return the rightmost descendant of a cgroup
- * @pos: cgroup of interest
+ * css_rightmost_descendant - return the rightmost descendant of a css
+ * @pos: css of interest
*
- * Return the rightmost descendant of @pos. If there's no descendant,
- * @pos is returned. This can be used during pre-order traversal to skip
+ * Return the rightmost descendant of @pos. If there's no descendant, @pos
+ * is returned. This can be used during pre-order traversal to skip
* subtree of @pos.
*
* While this function requires RCU read locking, it doesn't require the
@@ -3127,9 +3133,10 @@ EXPORT_SYMBOL_GPL(cgroup_next_descendant_pre);
* function will return the correct rightmost descendant as long as @pos is
* accessible.
*/
-struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos)
+struct cgroup_subsys_state *
+css_rightmost_descendant(struct cgroup_subsys_state *pos)
{
- struct cgroup *last, *tmp;
+ struct cgroup_subsys_state *last, *tmp;
WARN_ON_ONCE(!rcu_read_lock_held());
@@ -3137,82 +3144,138 @@ struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos)
last = pos;
/* ->prev isn't RCU safe, walk ->next till the end */
pos = NULL;
- list_for_each_entry_rcu(tmp, &last->children, sibling)
+ css_for_each_child(tmp, last)
pos = tmp;
} while (pos);
return last;
}
-EXPORT_SYMBOL_GPL(cgroup_rightmost_descendant);
+EXPORT_SYMBOL_GPL(css_rightmost_descendant);
-static struct cgroup *cgroup_leftmost_descendant(struct cgroup *pos)
+static struct cgroup_subsys_state *
+css_leftmost_descendant(struct cgroup_subsys_state *pos)
{
- struct cgroup *last;
+ struct cgroup_subsys_state *last;
do {
last = pos;
- pos = list_first_or_null_rcu(&pos->children, struct cgroup,
- sibling);
+ pos = css_next_child(NULL, pos);
} while (pos);
return last;
}
/**
- * cgroup_next_descendant_post - find the next descendant for post-order walk
+ * css_next_descendant_post - find the next descendant for post-order walk
* @pos: the current position (%NULL to initiate traversal)
- * @cgroup: cgroup whose descendants to walk
+ * @root: css whose descendants to walk
*
- * To be used by cgroup_for_each_descendant_post(). Find the next
- * descendant to visit for post-order traversal of @cgroup's descendants.
+ * To be used by css_for_each_descendant_post(). Find the next descendant
+ * to visit for post-order traversal of @root's descendants. @root is
+ * included in the iteration and the last node to be visited.
*
* While this function requires RCU read locking, it doesn't require the
* whole traversal to be contained in a single RCU critical section. This
* function will return the correct next descendant as long as both @pos
* and @cgroup are accessible and @pos is a descendant of @cgroup.
*/
-struct cgroup *cgroup_next_descendant_post(struct cgroup *pos,
- struct cgroup *cgroup)
+struct cgroup_subsys_state *
+css_next_descendant_post(struct cgroup_subsys_state *pos,
+ struct cgroup_subsys_state *root)
{
- struct cgroup *next;
+ struct cgroup_subsys_state *next;
WARN_ON_ONCE(!rcu_read_lock_held());
/* if first iteration, visit the leftmost descendant */
if (!pos) {
- next = cgroup_leftmost_descendant(cgroup);
- return next != cgroup ? next : NULL;
+ next = css_leftmost_descendant(root);
+ return next != root ? next : NULL;
}
+ /* if we visited @root, we're done */
+ if (pos == root)
+ return NULL;
+
/* if there's an unvisited sibling, visit its leftmost descendant */
- next = cgroup_next_sibling(pos);
+ next = css_next_child(pos, css_parent(pos));
if (next)
- return cgroup_leftmost_descendant(next);
+ return css_leftmost_descendant(next);
/* no sibling left, visit parent */
- next = pos->parent;
- return next != cgroup ? next : NULL;
+ return css_parent(pos);
}
-EXPORT_SYMBOL_GPL(cgroup_next_descendant_post);
+EXPORT_SYMBOL_GPL(css_next_descendant_post);
-void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it)
+/**
+ * css_advance_task_iter - advance a task itererator to the next css_set
+ * @it: the iterator to advance
+ *
+ * Advance @it to the next css_set to walk.
+ */
+static void css_advance_task_iter(struct css_task_iter *it)
+{
+ struct list_head *l = it->cset_link;
+ struct cgrp_cset_link *link;
+ struct css_set *cset;
+
+ /* Advance to the next non-empty css_set */
+ do {
+ l = l->next;
+ if (l == &it->origin_css->cgroup->cset_links) {
+ it->cset_link = NULL;
+ return;
+ }
+ link = list_entry(l, struct cgrp_cset_link, cset_link);
+ cset = link->cset;
+ } while (list_empty(&cset->tasks));
+ it->cset_link = l;
+ it->task = cset->tasks.next;
+}
+
+/**
+ * css_task_iter_start - initiate task iteration
+ * @css: the css to walk tasks of
+ * @it: the task iterator to use
+ *
+ * Initiate iteration through the tasks of @css. The caller can call
+ * css_task_iter_next() to walk through the tasks until the function
+ * returns NULL. On completion of iteration, css_task_iter_end() must be
+ * called.
+ *
+ * Note that this function acquires a lock which is released when the
+ * iteration finishes. The caller can't sleep while iteration is in
+ * progress.
+ */
+void css_task_iter_start(struct cgroup_subsys_state *css,
+ struct css_task_iter *it)
__acquires(css_set_lock)
{
/*
- * The first time anyone tries to iterate across a cgroup,
- * we need to enable the list linking each css_set to its
- * tasks, and fix up all existing tasks.
+ * The first time anyone tries to iterate across a css, we need to
+ * enable the list linking each css_set to its tasks, and fix up
+ * all existing tasks.
*/
if (!use_task_css_set_links)
cgroup_enable_task_cg_lists();
read_lock(&css_set_lock);
- it->cset_link = &cgrp->cset_links;
- cgroup_advance_iter(cgrp, it);
+
+ it->origin_css = css;
+ it->cset_link = &css->cgroup->cset_links;
+
+ css_advance_task_iter(it);
}
-struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
- struct cgroup_iter *it)
+/**
+ * css_task_iter_next - return the next task for the iterator
+ * @it: the task iterator being iterated
+ *
+ * The "next" function for task iteration. @it should have been
+ * initialized via css_task_iter_start(). Returns NULL when the iteration
+ * reaches the end.
+ */
+struct task_struct *css_task_iter_next(struct css_task_iter *it)
{
struct task_struct *res;
struct list_head *l = it->task;
@@ -3226,16 +3289,24 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
l = l->next;
link = list_entry(it->cset_link, struct cgrp_cset_link, cset_link);
if (l == &link->cset->tasks) {
- /* We reached the end of this task list - move on to
- * the next cg_cgroup_link */
- cgroup_advance_iter(cgrp, it);
+ /*
+ * We reached the end of this task list - move on to the
+ * next cgrp_cset_link.
+ */
+ css_advance_task_iter(it);
} else {
it->task = l;
}
return res;
}
-void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it)
+/**
+ * css_task_iter_end - finish task iteration
+ * @it: the task iterator to finish
+ *
+ * Finish task iteration started by css_task_iter_start().
+ */
+void css_task_iter_end(struct css_task_iter *it)
__releases(css_set_lock)
{
read_unlock(&css_set_lock);
@@ -3276,46 +3347,49 @@ static inline int started_after(void *p1, void *p2)
}
/**
- * cgroup_scan_tasks - iterate though all the tasks in a cgroup
- * @scan: struct cgroup_scanner containing arguments for the scan
+ * css_scan_tasks - iterate though all the tasks in a css
+ * @css: the css to iterate tasks of
+ * @test: optional test callback
+ * @process: process callback
+ * @data: data passed to @test and @process
+ * @heap: optional pre-allocated heap used for task iteration
+ *
+ * Iterate through all the tasks in @css, calling @test for each, and if it
+ * returns %true, call @process for it also.
+ *
+ * @test may be NULL, meaning always true (select all tasks), which
+ * effectively duplicates css_task_iter_{start,next,end}() but does not
+ * lock css_set_lock for the call to @process.
+ *
+ * It is guaranteed that @process will act on every task that is a member
+ * of @css for the duration of this call. This function may or may not
+ * call @process for tasks that exit or move to a different css during the
+ * call, or are forked or move into the css during the call.
*
- * Arguments include pointers to callback functions test_task() and
- * process_task().
- * Iterate through all the tasks in a cgroup, calling test_task() for each,
- * and if it returns true, call process_task() for it also.
- * The test_task pointer may be NULL, meaning always true (select all tasks).
- * Effectively duplicates cgroup_iter_{start,next,end}()
- * but does not lock css_set_lock for the call to process_task().
- * The struct cgroup_scanner may be embedded in any structure of the caller's
- * creation.
- * It is guaranteed that process_task() will act on every task that
- * is a member of the cgroup for the duration of this call. This
- * function may or may not call process_task() for tasks that exit
- * or move to a different cgroup during the call, or are forked or
- * move into the cgroup during the call.
+ * Note that @test may be called with locks held, and may in some
+ * situations be called multiple times for the same task, so it should be
+ * cheap.
*
- * Note that test_task() may be called with locks held, and may in some
- * situations be called multiple times for the same task, so it should
- * be cheap.
- * If the heap pointer in the struct cgroup_scanner is non-NULL, a heap has been
- * pre-allocated and will be used for heap operations (and its "gt" member will
- * be overwritten), else a temporary heap will be used (allocation of which
- * may cause this function to fail).
+ * If @heap is non-NULL, a heap has been pre-allocated and will be used for
+ * heap operations (and its "gt" member will be overwritten), else a
+ * temporary heap will be used (allocation of which may cause this function
+ * to fail).
*/
-int cgroup_scan_tasks(struct cgroup_scanner *scan)
+int css_scan_tasks(struct cgroup_subsys_state *css,
+ bool (*test)(struct task_struct *, void *),
+ void (*process)(struct task_struct *, void *),
+ void *data, struct ptr_heap *heap)
{
int retval, i;
- struct cgroup_iter it;
+ struct css_task_iter it;
struct task_struct *p, *dropped;
/* Never dereference latest_task, since it's not refcounted */
struct task_struct *latest_task = NULL;
struct ptr_heap tmp_heap;
- struct ptr_heap *heap;
struct timespec latest_time = { 0, 0 };
- if (scan->heap) {
+ if (heap) {
/* The caller supplied our heap and pre-allocated its memory */
- heap = scan->heap;
heap->gt = &started_after;
} else {
/* We need to allocate our own heap memory */
@@ -3328,25 +3402,24 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan)
again:
/*
- * Scan tasks in the cgroup, using the scanner's "test_task" callback
- * to determine which are of interest, and using the scanner's
- * "process_task" callback to process any of them that need an update.
- * Since we don't want to hold any locks during the task updates,
- * gather tasks to be processed in a heap structure.
- * The heap is sorted by descending task start time.
- * If the statically-sized heap fills up, we overflow tasks that
- * started later, and in future iterations only consider tasks that
- * started after the latest task in the previous pass. This
+ * Scan tasks in the css, using the @test callback to determine
+ * which are of interest, and invoking @process callback on the
+ * ones which need an update. Since we don't want to hold any
+ * locks during the task updates, gather tasks to be processed in a
+ * heap structure. The heap is sorted by descending task start
+ * time. If the statically-sized heap fills up, we overflow tasks
+ * that started later, and in future iterations only consider tasks
+ * that started after the latest task in the previous pass. This
* guarantees forward progress and that we don't miss any tasks.
*/
heap->size = 0;
- cgroup_iter_start(scan->cg, &it);
- while ((p = cgroup_iter_next(scan->cg, &it))) {
+ css_task_iter_start(css, &it);
+ while ((p = css_task_iter_next(&it))) {
/*
* Only affect tasks that qualify per the caller's callback,
* if he provided one
*/
- if (scan->test_task && !scan->test_task(p, scan))
+ if (test && !test(p, data))
continue;
/*
* Only process tasks that started after the last task
@@ -3374,7 +3447,7 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan)
* the heap and wasn't inserted
*/
}
- cgroup_iter_end(scan->cg, &it);
+ css_task_iter_end(&it);
if (heap->size) {
for (i = 0; i < heap->size; i++) {
@@ -3384,7 +3457,7 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan)
latest_task = q;
}
/* Process the task per the caller's callback */
- scan->process_task(q, scan);
+ process(q, data);
put_task_struct(q);
}
/*
@@ -3401,10 +3474,9 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan)
return 0;
}
-static void cgroup_transfer_one_task(struct task_struct *task,
- struct cgroup_scanner *scan)
+static void cgroup_transfer_one_task(struct task_struct *task, void *data)
{
- struct cgroup *new_cgroup = scan->data;
+ struct cgroup *new_cgroup = data;
mutex_lock(&cgroup_mutex);
cgroup_attach_task(new_cgroup, task, false);
@@ -3418,15 +3490,8 @@ static void cgroup_transfer_one_task(struct task_struct *task,
*/
int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
{
- struct cgroup_scanner scan;
-
- scan.cg = from;
- scan.test_task = NULL; /* select all tasks in cgroup */
- scan.process_task = cgroup_transfer_one_task;
- scan.heap = NULL;
- scan.data = to;
-
- return cgroup_scan_tasks(&scan);
+ return css_scan_tasks(&from->dummy_css, NULL, cgroup_transfer_one_task,
+ to, NULL);
}
/*
@@ -3468,7 +3533,7 @@ struct cgroup_pidlist {
/* pointer to the cgroup we belong to, for list removal purposes */
struct cgroup *owner;
/* protects the other fields */
- struct rw_semaphore mutex;
+ struct rw_semaphore rwsem;
};
/*
@@ -3541,7 +3606,7 @@ static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
struct pid_namespace *ns = task_active_pid_ns(current);
/*
- * We can't drop the pidlist_mutex before taking the l->mutex in case
+ * We can't drop the pidlist_mutex before taking the l->rwsem in case
* the last ref-holder is trying to remove l from the list at the same
* time. Holding the pidlist_mutex precludes somebody taking whichever
* list we find out from under us - compare release_pid_array().
@@ -3550,7 +3615,7 @@ static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
list_for_each_entry(l, &cgrp->pidlists, links) {
if (l->key.type == type && l->key.ns == ns) {
/* make sure l doesn't vanish out from under us */
- down_write(&l->mutex);
+ down_write(&l->rwsem);
mutex_unlock(&cgrp->pidlist_mutex);
return l;
}
@@ -3561,8 +3626,8 @@ static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
mutex_unlock(&cgrp->pidlist_mutex);
return l;
}
- init_rwsem(&l->mutex);
- down_write(&l->mutex);
+ init_rwsem(&l->rwsem);
+ down_write(&l->rwsem);
l->key.type = type;
l->key.ns = get_pid_ns(ns);
l->owner = cgrp;
@@ -3580,7 +3645,7 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
pid_t *array;
int length;
int pid, n = 0; /* used for populating the array */
- struct cgroup_iter it;
+ struct css_task_iter it;
struct task_struct *tsk;
struct cgroup_pidlist *l;
@@ -3595,8 +3660,8 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
if (!array)
return -ENOMEM;
/* now, populate the array */
- cgroup_iter_start(cgrp, &it);
- while ((tsk = cgroup_iter_next(cgrp, &it))) {
+ css_task_iter_start(&cgrp->dummy_css, &it);
+ while ((tsk = css_task_iter_next(&it))) {
if (unlikely(n == length))
break;
/* get tgid or pid for procs or tasks file respectively */
@@ -3607,7 +3672,7 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
if (pid > 0) /* make sure to only use valid results */
array[n++] = pid;
}
- cgroup_iter_end(cgrp, &it);
+ css_task_iter_end(&it);
length = n;
/* now sort & (if procs) strip out duplicates */
sort(array, length, sizeof(pid_t), cmppid, NULL);
@@ -3623,7 +3688,7 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
l->list = array;
l->length = length;
l->use_count++;
- up_write(&l->mutex);
+ up_write(&l->rwsem);
*lp = l;
return 0;
}
@@ -3641,7 +3706,7 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
{
int ret = -EINVAL;
struct cgroup *cgrp;
- struct cgroup_iter it;
+ struct css_task_iter it;
struct task_struct *tsk;
/*
@@ -3655,8 +3720,8 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
ret = 0;
cgrp = dentry->d_fsdata;
- cgroup_iter_start(cgrp, &it);
- while ((tsk = cgroup_iter_next(cgrp, &it))) {
+ css_task_iter_start(&cgrp->dummy_css, &it);
+ while ((tsk = css_task_iter_next(&it))) {
switch (tsk->state) {
case TASK_RUNNING:
stats->nr_running++;
@@ -3676,7 +3741,7 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
break;
}
}
- cgroup_iter_end(cgrp, &it);
+ css_task_iter_end(&it);
err:
return ret;
@@ -3701,7 +3766,7 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
int index = 0, pid = *pos;
int *iter;
- down_read(&l->mutex);
+ down_read(&l->rwsem);
if (pid) {
int end = l->length;
@@ -3728,7 +3793,7 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
static void cgroup_pidlist_stop(struct seq_file *s, void *v)
{
struct cgroup_pidlist *l = s->private;
- up_read(&l->mutex);
+ up_read(&l->rwsem);
}
static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
@@ -3774,7 +3839,7 @@ static void cgroup_release_pid_array(struct cgroup_pidlist *l)
* pidlist_mutex, we have to take pidlist_mutex first.
*/
mutex_lock(&l->owner->pidlist_mutex);
- down_write(&l->mutex);
+ down_write(&l->rwsem);
BUG_ON(!l->use_count);
if (!--l->use_count) {
/* we're the last user if refcount is 0; remove and free */
@@ -3782,12 +3847,12 @@ static void cgroup_release_pid_array(struct cgroup_pidlist *l)
mutex_unlock(&l->owner->pidlist_mutex);
pidlist_free(l->list);
put_pid_ns(l->key.ns);
- up_write(&l->mutex);
+ up_write(&l->rwsem);
kfree(l);
return;
}
mutex_unlock(&l->owner->pidlist_mutex);
- up_write(&l->mutex);
+ up_write(&l->rwsem);
}
static int cgroup_pidlist_release(struct inode *inode, struct file *file)
@@ -3851,21 +3916,20 @@ static int cgroup_procs_open(struct inode *unused, struct file *file)
return cgroup_pidlist_open(file, CGROUP_FILE_PROCS);
}
-static u64 cgroup_read_notify_on_release(struct cgroup *cgrp,
- struct cftype *cft)
+static u64 cgroup_read_notify_on_release(struct cgroup_subsys_state *css,
+ struct cftype *cft)
{
- return notify_on_release(cgrp);
+ return notify_on_release(css->cgroup);
}
-static int cgroup_write_notify_on_release(struct cgroup *cgrp,
- struct cftype *cft,
- u64 val)
+static int cgroup_write_notify_on_release(struct cgroup_subsys_state *css,
+ struct cftype *cft, u64 val)
{
- clear_bit(CGRP_RELEASABLE, &cgrp->flags);
+ clear_bit(CGRP_RELEASABLE, &css->cgroup->flags);
if (val)
- set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
+ set_bit(CGRP_NOTIFY_ON_RELEASE, &css->cgroup->flags);
else
- clear_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
+ clear_bit(CGRP_NOTIFY_ON_RELEASE, &css->cgroup->flags);
return 0;
}
@@ -3895,18 +3959,18 @@ static void cgroup_event_remove(struct work_struct *work)
{
struct cgroup_event *event = container_of(work, struct cgroup_event,
remove);
- struct cgroup *cgrp = event->cgrp;
+ struct cgroup_subsys_state *css = event->css;
remove_wait_queue(event->wqh, &event->wait);
- event->cft->unregister_event(cgrp, event->cft, event->eventfd);
+ event->cft->unregister_event(css, event->cft, event->eventfd);
/* Notify userspace the event is going away. */
eventfd_signal(event->eventfd, 1);
eventfd_ctx_put(event->eventfd);
kfree(event);
- cgroup_dput(cgrp);
+ css_put(css);
}
/*
@@ -3919,7 +3983,7 @@ static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
{
struct cgroup_event *event = container_of(wait,
struct cgroup_event, wait);
- struct cgroup *cgrp = event->cgrp;
+ struct cgroup *cgrp = event->css->cgroup;
unsigned long flags = (unsigned long)key;
if (flags & POLLHUP) {
@@ -3963,14 +4027,15 @@ static void cgroup_event_ptable_queue_proc(struct file *file,
* Input must be in format '<event_fd> <control_fd> <args>'.
* Interpretation of args is defined by control file implementation.
*/
-static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft,
- const char *buffer)
+static int cgroup_write_event_control(struct cgroup_subsys_state *dummy_css,
+ struct cftype *cft, const char *buffer)
{
- struct cgroup_event *event = NULL;
- struct cgroup *cgrp_cfile;
+ struct cgroup *cgrp = dummy_css->cgroup;
+ struct cgroup_event *event;
+ struct cgroup_subsys_state *cfile_css;
unsigned int efd, cfd;
- struct file *efile = NULL;
- struct file *cfile = NULL;
+ struct file *efile;
+ struct file *cfile;
char *endp;
int ret;
@@ -3987,7 +4052,7 @@ static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft,
event = kzalloc(sizeof(*event), GFP_KERNEL);
if (!event)
return -ENOMEM;
- event->cgrp = cgrp;
+
INIT_LIST_HEAD(&event->list);
init_poll_funcptr(&event->pt, cgroup_event_ptable_queue_proc);
init_waitqueue_func_entry(&event->wait, cgroup_event_wake);
@@ -3996,62 +4061,68 @@ static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft,
efile = eventfd_fget(efd);
if (IS_ERR(efile)) {
ret = PTR_ERR(efile);
- goto fail;
+ goto out_kfree;
}
event->eventfd = eventfd_ctx_fileget(efile);
if (IS_ERR(event->eventfd)) {
ret = PTR_ERR(event->eventfd);
- goto fail;
+ goto out_put_efile;
}
cfile = fget(cfd);
if (!cfile) {
ret = -EBADF;
- goto fail;
+ goto out_put_eventfd;
}
/* the process need read permission on control file */
/* AV: shouldn't we check that it's been opened for read instead? */
ret = inode_permission(file_inode(cfile), MAY_READ);
if (ret < 0)
- goto fail;
+ goto out_put_cfile;
event->cft = __file_cft(cfile);
if (IS_ERR(event->cft)) {
ret = PTR_ERR(event->cft);
- goto fail;
+ goto out_put_cfile;
+ }
+
+ if (!event->cft->ss) {
+ ret = -EBADF;
+ goto out_put_cfile;
}
/*
- * The file to be monitored must be in the same cgroup as
- * cgroup.event_control is.
+ * Determine the css of @cfile, verify it belongs to the same
+ * cgroup as cgroup.event_control, and associate @event with it.
+ * Remaining events are automatically removed on cgroup destruction
+ * but the removal is asynchronous, so take an extra ref.
*/
- cgrp_cfile = __d_cgrp(cfile->f_dentry->d_parent);
- if (cgrp_cfile != cgrp) {
- ret = -EINVAL;
- goto fail;
- }
+ rcu_read_lock();
+
+ ret = -EINVAL;
+ event->css = cgroup_css(cgrp, event->cft->ss);
+ cfile_css = css_from_dir(cfile->f_dentry->d_parent, event->cft->ss);
+ if (event->css && event->css == cfile_css && css_tryget(event->css))
+ ret = 0;
+
+ rcu_read_unlock();
+ if (ret)
+ goto out_put_cfile;
if (!event->cft->register_event || !event->cft->unregister_event) {
ret = -EINVAL;
- goto fail;
+ goto out_put_css;
}
- ret = event->cft->register_event(cgrp, event->cft,
+ ret = event->cft->register_event(event->css, event->cft,
event->eventfd, buffer);
if (ret)
- goto fail;
+ goto out_put_css;
efile->f_op->poll(efile, &event->pt);
- /*
- * Events should be removed after rmdir of cgroup directory, but before
- * destroying subsystem state objects. Let's take reference to cgroup
- * directory dentry to do that.
- */
- dget(cgrp->dentry);
-
spin_lock(&cgrp->event_list_lock);
list_add(&event->list, &cgrp->event_list);
spin_unlock(&cgrp->event_list_lock);
@@ -4061,35 +4132,33 @@ static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft,
return 0;
-fail:
- if (cfile)
- fput(cfile);
-
- if (event && event->eventfd && !IS_ERR(event->eventfd))
- eventfd_ctx_put(event->eventfd);
-
- if (!IS_ERR_OR_NULL(efile))
- fput(efile);
-
+out_put_css:
+ css_put(event->css);
+out_put_cfile:
+ fput(cfile);
+out_put_eventfd:
+ eventfd_ctx_put(event->eventfd);
+out_put_efile:
+ fput(efile);
+out_kfree:
kfree(event);
return ret;
}
-static u64 cgroup_clone_children_read(struct cgroup *cgrp,
- struct cftype *cft)
+static u64 cgroup_clone_children_read(struct cgroup_subsys_state *css,
+ struct cftype *cft)
{
- return test_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
+ return test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags);
}
-static int cgroup_clone_children_write(struct cgroup *cgrp,
- struct cftype *cft,
- u64 val)
+static int cgroup_clone_children_write(struct cgroup_subsys_state *css,
+ struct cftype *cft, u64 val)
{
if (val)
- set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
+ set_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags);
else
- clear_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
+ clear_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags);
return 0;
}
@@ -4148,36 +4217,34 @@ static struct cftype cgroup_base_files[] = {
};
/**
- * cgroup_populate_dir - selectively creation of files in a directory
+ * cgroup_populate_dir - create subsys files in a cgroup directory
* @cgrp: target cgroup
- * @base_files: true if the base files should be added
* @subsys_mask: mask of the subsystem ids whose files should be added
+ *
+ * On failure, no file is added.
*/
-static int cgroup_populate_dir(struct cgroup *cgrp, bool base_files,
- unsigned long subsys_mask)
+static int cgroup_populate_dir(struct cgroup *cgrp, unsigned long subsys_mask)
{
- int err;
struct cgroup_subsys *ss;
-
- if (base_files) {
- err = cgroup_addrm_files(cgrp, NULL, cgroup_base_files, true);
- if (err < 0)
- return err;
- }
+ int i, ret = 0;
/* process cftsets of each subsystem */
- for_each_root_subsys(cgrp->root, ss) {
+ for_each_subsys(ss, i) {
struct cftype_set *set;
- if (!test_bit(ss->subsys_id, &subsys_mask))
+
+ if (!test_bit(i, &subsys_mask))
continue;
- list_for_each_entry(set, &ss->cftsets, node)
- cgroup_addrm_files(cgrp, ss, set->cfts, true);
+ list_for_each_entry(set, &ss->cftsets, node) {
+ ret = cgroup_addrm_files(cgrp, set->cfts, true);
+ if (ret < 0)
+ goto err;
+ }
}
/* This cgroup is ready now */
for_each_root_subsys(cgrp->root, ss) {
- struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
+ struct cgroup_subsys_state *css = cgroup_css(cgrp, ss);
struct css_id *id = rcu_dereference_protected(css->id, true);
/*
@@ -4190,14 +4257,57 @@ static int cgroup_populate_dir(struct cgroup *cgrp, bool base_files,
}
return 0;
+err:
+ cgroup_clear_dir(cgrp, subsys_mask);
+ return ret;
}
-static void css_dput_fn(struct work_struct *work)
+/*
+ * css destruction is four-stage process.
+ *
+ * 1. Destruction starts. Killing of the percpu_ref is initiated.
+ * Implemented in kill_css().
+ *
+ * 2. When the percpu_ref is confirmed to be visible as killed on all CPUs
+ * and thus css_tryget() is guaranteed to fail, the css can be offlined
+ * by invoking offline_css(). After offlining, the base ref is put.
+ * Implemented in css_killed_work_fn().
+ *
+ * 3. When the percpu_ref reaches zero, the only possible remaining
+ * accessors are inside RCU read sections. css_release() schedules the
+ * RCU callback.
+ *
+ * 4. After the grace period, the css can be freed. Implemented in
+ * css_free_work_fn().
+ *
+ * It is actually hairier because both step 2 and 4 require process context
+ * and thus involve punting to css->destroy_work adding two additional
+ * steps to the already complex sequence.
+ */
+static void css_free_work_fn(struct work_struct *work)
{
struct cgroup_subsys_state *css =
- container_of(work, struct cgroup_subsys_state, dput_work);
+ container_of(work, struct cgroup_subsys_state, destroy_work);
+ struct cgroup *cgrp = css->cgroup;
- cgroup_dput(css->cgroup);
+ if (css->parent)
+ css_put(css->parent);
+
+ css->ss->css_free(css);
+ cgroup_dput(cgrp);
+}
+
+static void css_free_rcu_fn(struct rcu_head *rcu_head)
+{
+ struct cgroup_subsys_state *css =
+ container_of(rcu_head, struct cgroup_subsys_state, rcu_head);
+
+ /*
+ * css holds an extra ref to @cgrp->dentry which is put on the last
+ * css_put(). dput() requires process context which we don't have.
+ */
+ INIT_WORK(&css->destroy_work, css_free_work_fn);
+ schedule_work(&css->destroy_work);
}
static void css_release(struct percpu_ref *ref)
@@ -4205,49 +4315,47 @@ static void css_release(struct percpu_ref *ref)
struct cgroup_subsys_state *css =
container_of(ref, struct cgroup_subsys_state, refcnt);
- schedule_work(&css->dput_work);
+ call_rcu(&css->rcu_head, css_free_rcu_fn);
}
-static void init_cgroup_css(struct cgroup_subsys_state *css,
- struct cgroup_subsys *ss,
- struct cgroup *cgrp)
+static void init_css(struct cgroup_subsys_state *css, struct cgroup_subsys *ss,
+ struct cgroup *cgrp)
{
css->cgroup = cgrp;
+ css->ss = ss;
css->flags = 0;
css->id = NULL;
- if (cgrp == cgroup_dummy_top)
+
+ if (cgrp->parent)
+ css->parent = cgroup_css(cgrp->parent, ss);
+ else
css->flags |= CSS_ROOT;
- BUG_ON(cgrp->subsys[ss->subsys_id]);
- cgrp->subsys[ss->subsys_id] = css;
- /*
- * css holds an extra ref to @cgrp->dentry which is put on the last
- * css_put(). dput() requires process context, which css_put() may
- * be called without. @css->dput_work will be used to invoke
- * dput() asynchronously from css_put().
- */
- INIT_WORK(&css->dput_work, css_dput_fn);
+ BUG_ON(cgroup_css(cgrp, ss));
}
-/* invoke ->post_create() on a new CSS and mark it online if successful */
-static int online_css(struct cgroup_subsys *ss, struct cgroup *cgrp)
+/* invoke ->css_online() on a new CSS and mark it online if successful */
+static int online_css(struct cgroup_subsys_state *css)
{
+ struct cgroup_subsys *ss = css->ss;
int ret = 0;
lockdep_assert_held(&cgroup_mutex);
if (ss->css_online)
- ret = ss->css_online(cgrp);
- if (!ret)
- cgrp->subsys[ss->subsys_id]->flags |= CSS_ONLINE;
+ ret = ss->css_online(css);
+ if (!ret) {
+ css->flags |= CSS_ONLINE;
+ css->cgroup->nr_css++;
+ rcu_assign_pointer(css->cgroup->subsys[ss->subsys_id], css);
+ }
return ret;
}
-/* if the CSS is online, invoke ->pre_destory() on it and mark it offline */
-static void offline_css(struct cgroup_subsys *ss, struct cgroup *cgrp)
- __releases(&cgroup_mutex) __acquires(&cgroup_mutex)
+/* if the CSS is online, invoke ->css_offline() on it and mark it offline */
+static void offline_css(struct cgroup_subsys_state *css)
{
- struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
+ struct cgroup_subsys *ss = css->ss;
lockdep_assert_held(&cgroup_mutex);
@@ -4255,9 +4363,11 @@ static void offline_css(struct cgroup_subsys *ss, struct cgroup *cgrp)
return;
if (ss->css_offline)
- ss->css_offline(cgrp);
+ ss->css_offline(css);
- cgrp->subsys[ss->subsys_id]->flags &= ~CSS_ONLINE;
+ css->flags &= ~CSS_ONLINE;
+ css->cgroup->nr_css--;
+ RCU_INIT_POINTER(css->cgroup->subsys[ss->subsys_id], css);
}
/*
@@ -4271,6 +4381,7 @@ static void offline_css(struct cgroup_subsys *ss, struct cgroup *cgrp)
static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
umode_t mode)
{
+ struct cgroup_subsys_state *css_ar[CGROUP_SUBSYS_COUNT] = { };
struct cgroup *cgrp;
struct cgroup_name *name;
struct cgroupfs_root *root = parent->root;
@@ -4288,7 +4399,11 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
goto err_free_cgrp;
rcu_assign_pointer(cgrp->name, name);
- cgrp->id = ida_simple_get(&root->cgroup_ida, 1, 0, GFP_KERNEL);
+ /*
+ * Temporarily set the pointer to NULL, so idr_find() won't return
+ * a half-baked cgroup.
+ */
+ cgrp->id = idr_alloc(&root->cgroup_idr, NULL, 1, 0, GFP_KERNEL);
if (cgrp->id < 0)
goto err_free_name;
@@ -4317,6 +4432,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
cgrp->dentry = dentry;
cgrp->parent = parent;
+ cgrp->dummy_css.parent = &parent->dummy_css;
cgrp->root = parent->root;
if (notify_on_release(parent))
@@ -4328,22 +4444,21 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
for_each_root_subsys(root, ss) {
struct cgroup_subsys_state *css;
- css = ss->css_alloc(cgrp);
+ css = ss->css_alloc(cgroup_css(parent, ss));
if (IS_ERR(css)) {
err = PTR_ERR(css);
goto err_free_all;
}
+ css_ar[ss->subsys_id] = css;
err = percpu_ref_init(&css->refcnt, css_release);
- if (err) {
- ss->css_free(cgrp);
+ if (err)
goto err_free_all;
- }
- init_cgroup_css(css, ss, cgrp);
+ init_css(css, ss, cgrp);
if (ss->use_id) {
- err = alloc_css_id(ss, parent, cgrp);
+ err = alloc_css_id(css);
if (err)
goto err_free_all;
}
@@ -4365,16 +4480,22 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children);
root->number_of_cgroups++;
- /* each css holds a ref to the cgroup's dentry */
- for_each_root_subsys(root, ss)
+ /* each css holds a ref to the cgroup's dentry and the parent css */
+ for_each_root_subsys(root, ss) {
+ struct cgroup_subsys_state *css = css_ar[ss->subsys_id];
+
dget(dentry);
+ css_get(css->parent);
+ }
/* hold a ref to the parent's dentry */
dget(parent->dentry);
/* creation succeeded, notify subsystems */
for_each_root_subsys(root, ss) {
- err = online_css(ss, cgrp);
+ struct cgroup_subsys_state *css = css_ar[ss->subsys_id];
+
+ err = online_css(css);
if (err)
goto err_destroy;
@@ -4388,7 +4509,13 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
}
}
- err = cgroup_populate_dir(cgrp, true, root->subsys_mask);
+ idr_replace(&root->cgroup_idr, cgrp, cgrp->id);
+
+ err = cgroup_addrm_files(cgrp, cgroup_base_files, true);
+ if (err)
+ goto err_destroy;
+
+ err = cgroup_populate_dir(cgrp, root->subsys_mask);
if (err)
goto err_destroy;
@@ -4399,18 +4526,18 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
err_free_all:
for_each_root_subsys(root, ss) {
- struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
+ struct cgroup_subsys_state *css = css_ar[ss->subsys_id];
if (css) {
percpu_ref_cancel_init(&css->refcnt);
- ss->css_free(cgrp);
+ ss->css_free(css);
}
}
mutex_unlock(&cgroup_mutex);
/* Release the reference count that we took on the superblock */
deactivate_super(sb);
err_free_id:
- ida_simple_remove(&root->cgroup_ida, cgrp->id);
+ idr_remove(&root->cgroup_idr, cgrp->id);
err_free_name:
kfree(rcu_dereference_raw(cgrp->name));
err_free_cgrp:
@@ -4432,22 +4559,84 @@ static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
return cgroup_create(c_parent, dentry, mode | S_IFDIR);
}
-static void cgroup_css_killed(struct cgroup *cgrp)
+/*
+ * This is called when the refcnt of a css is confirmed to be killed.
+ * css_tryget() is now guaranteed to fail.
+ */
+static void css_killed_work_fn(struct work_struct *work)
{
- if (!atomic_dec_and_test(&cgrp->css_kill_cnt))
- return;
+ struct cgroup_subsys_state *css =
+ container_of(work, struct cgroup_subsys_state, destroy_work);
+ struct cgroup *cgrp = css->cgroup;
- /* percpu ref's of all css's are killed, kick off the next step */
- INIT_WORK(&cgrp->destroy_work, cgroup_offline_fn);
- schedule_work(&cgrp->destroy_work);
+ mutex_lock(&cgroup_mutex);
+
+ /*
+ * css_tryget() is guaranteed to fail now. Tell subsystems to
+ * initate destruction.
+ */
+ offline_css(css);
+
+ /*
+ * If @cgrp is marked dead, it's waiting for refs of all css's to
+ * be disabled before proceeding to the second phase of cgroup
+ * destruction. If we are the last one, kick it off.
+ */
+ if (!cgrp->nr_css && cgroup_is_dead(cgrp))
+ cgroup_destroy_css_killed(cgrp);
+
+ mutex_unlock(&cgroup_mutex);
+
+ /*
+ * Put the css refs from kill_css(). Each css holds an extra
+ * reference to the cgroup's dentry and cgroup removal proceeds
+ * regardless of css refs. On the last put of each css, whenever
+ * that may be, the extra dentry ref is put so that dentry
+ * destruction happens only after all css's are released.
+ */
+ css_put(css);
}
-static void css_ref_killed_fn(struct percpu_ref *ref)
+/* css kill confirmation processing requires process context, bounce */
+static void css_killed_ref_fn(struct percpu_ref *ref)
{
struct cgroup_subsys_state *css =
container_of(ref, struct cgroup_subsys_state, refcnt);
- cgroup_css_killed(css->cgroup);
+ INIT_WORK(&css->destroy_work, css_killed_work_fn);
+ schedule_work(&css->destroy_work);
+}
+
+/**
+ * kill_css - destroy a css
+ * @css: css to destroy
+ *
+ * This function initiates destruction of @css by removing cgroup interface
+ * files and putting its base reference. ->css_offline() will be invoked
+ * asynchronously once css_tryget() is guaranteed to fail and when the
+ * reference count reaches zero, @css will be released.
+ */
+static void kill_css(struct cgroup_subsys_state *css)
+{
+ cgroup_clear_dir(css->cgroup, 1 << css->ss->subsys_id);
+
+ /*
+ * Killing would put the base ref, but we need to keep it alive
+ * until after ->css_offline().
+ */
+ css_get(css);
+
+ /*
+ * cgroup core guarantees that, by the time ->css_offline() is
+ * invoked, no new css reference will be given out via
+ * css_tryget(). We can't simply call percpu_ref_kill() and
+ * proceed to offlining css's because percpu_ref_kill() doesn't
+ * guarantee that the ref is seen as killed on all CPUs on return.
+ *
+ * Use percpu_ref_kill_and_confirm() to get notifications as each
+ * css is confirmed to be seen as killed on all CPUs.
+ */
+ percpu_ref_kill_and_confirm(&css->refcnt, css_killed_ref_fn);
}
/**
@@ -4513,41 +4702,19 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
return -EBUSY;
/*
- * Block new css_tryget() by killing css refcnts. cgroup core
- * guarantees that, by the time ->css_offline() is invoked, no new
- * css reference will be given out via css_tryget(). We can't
- * simply call percpu_ref_kill() and proceed to offlining css's
- * because percpu_ref_kill() doesn't guarantee that the ref is seen
- * as killed on all CPUs on return.
- *
- * Use percpu_ref_kill_and_confirm() to get notifications as each
- * css is confirmed to be seen as killed on all CPUs. The
- * notification callback keeps track of the number of css's to be
- * killed and schedules cgroup_offline_fn() to perform the rest of
- * destruction once the percpu refs of all css's are confirmed to
- * be killed.
+ * Initiate massacre of all css's. cgroup_destroy_css_killed()
+ * will be invoked to perform the rest of destruction once the
+ * percpu refs of all css's are confirmed to be killed.
*/
- atomic_set(&cgrp->css_kill_cnt, 1);
- for_each_root_subsys(cgrp->root, ss) {
- struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
-
- /*
- * Killing would put the base ref, but we need to keep it
- * alive until after ->css_offline.
- */
- percpu_ref_get(&css->refcnt);
-
- atomic_inc(&cgrp->css_kill_cnt);
- percpu_ref_kill_and_confirm(&css->refcnt, css_ref_killed_fn);
- }
- cgroup_css_killed(cgrp);
+ for_each_root_subsys(cgrp->root, ss)
+ kill_css(cgroup_css(cgrp, ss));
/*
* Mark @cgrp dead. This prevents further task migration and child
* creation by disabling cgroup_lock_live_group(). Note that
- * CGRP_DEAD assertion is depended upon by cgroup_next_sibling() to
+ * CGRP_DEAD assertion is depended upon by css_next_child() to
* resume iteration after dropping RCU read lock. See
- * cgroup_next_sibling() for details.
+ * css_next_child() for details.
*/
set_bit(CGRP_DEAD, &cgrp->flags);
@@ -4558,9 +4725,20 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
raw_spin_unlock(&release_list_lock);
/*
- * Remove @cgrp directory. The removal puts the base ref but we
- * aren't quite done with @cgrp yet, so hold onto it.
+ * If @cgrp has css's attached, the second stage of cgroup
+ * destruction is kicked off from css_killed_work_fn() after the
+ * refs of all attached css's are killed. If @cgrp doesn't have
+ * any css, we kick it off here.
+ */
+ if (!cgrp->nr_css)
+ cgroup_destroy_css_killed(cgrp);
+
+ /*
+ * Clear the base files and remove @cgrp directory. The removal
+ * puts the base ref but we aren't quite done with @cgrp yet, so
+ * hold onto it.
*/
+ cgroup_addrm_files(cgrp, cgroup_base_files, false);
dget(d);
cgroup_d_remove_dir(d);
@@ -4580,50 +4758,36 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
};
/**
- * cgroup_offline_fn - the second step of cgroup destruction
+ * cgroup_destroy_css_killed - the second step of cgroup destruction
* @work: cgroup->destroy_free_work
*
* This function is invoked from a work item for a cgroup which is being
- * destroyed after the percpu refcnts of all css's are guaranteed to be
- * seen as killed on all CPUs, and performs the rest of destruction. This
- * is the second step of destruction described in the comment above
- * cgroup_destroy_locked().
+ * destroyed after all css's are offlined and performs the rest of
+ * destruction. This is the second step of destruction described in the
+ * comment above cgroup_destroy_locked().
*/
-static void cgroup_offline_fn(struct work_struct *work)
+static void cgroup_destroy_css_killed(struct cgroup *cgrp)
{
- struct cgroup *cgrp = container_of(work, struct cgroup, destroy_work);
struct cgroup *parent = cgrp->parent;
struct dentry *d = cgrp->dentry;
- struct cgroup_subsys *ss;
- mutex_lock(&cgroup_mutex);
+ lockdep_assert_held(&cgroup_mutex);
- /*
- * css_tryget() is guaranteed to fail now. Tell subsystems to
- * initate destruction.
- */
- for_each_root_subsys(cgrp->root, ss)
- offline_css(ss, cgrp);
+ /* delete this cgroup from parent->children */
+ list_del_rcu(&cgrp->sibling);
/*
- * Put the css refs from cgroup_destroy_locked(). Each css holds
- * an extra reference to the cgroup's dentry and cgroup removal
- * proceeds regardless of css refs. On the last put of each css,
- * whenever that may be, the extra dentry ref is put so that dentry
- * destruction happens only after all css's are released.
+ * We should remove the cgroup object from idr before its grace
+ * period starts, so we won't be looking up a cgroup while the
+ * cgroup is being freed.
*/
- for_each_root_subsys(cgrp->root, ss)
- css_put(cgrp->subsys[ss->subsys_id]);
-
- /* delete this cgroup from parent->children */
- list_del_rcu(&cgrp->sibling);
+ idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
+ cgrp->id = -1;
dput(d);
set_bit(CGRP_RELEASABLE, &parent->flags);
check_for_release(parent);
-
- mutex_unlock(&cgroup_mutex);
}
static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
@@ -4646,6 +4810,11 @@ static void __init_or_module cgroup_init_cftsets(struct cgroup_subsys *ss)
* deregistration.
*/
if (ss->base_cftypes) {
+ struct cftype *cft;
+
+ for (cft = ss->base_cftypes; cft->name[0] != '\0'; cft++)
+ cft->ss = ss;
+
ss->base_cftset.cfts = ss->base_cftypes;
list_add_tail(&ss->base_cftset.node, &ss->cftsets);
}
@@ -4665,10 +4834,10 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
/* Create the top cgroup state for this subsystem */
list_add(&ss->sibling, &cgroup_dummy_root.subsys_list);
ss->root = &cgroup_dummy_root;
- css = ss->css_alloc(cgroup_dummy_top);
+ css = ss->css_alloc(cgroup_css(cgroup_dummy_top, ss));
/* We don't handle early failures gracefully */
BUG_ON(IS_ERR(css));
- init_cgroup_css(css, ss, cgroup_dummy_top);
+ init_css(css, ss, cgroup_dummy_top);
/* Update the init_css_set to contain a subsys
* pointer to this state - since the subsystem is
@@ -4683,7 +4852,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
* need to invoke fork callbacks here. */
BUG_ON(!list_empty(&init_task.tasks));
- BUG_ON(online_css(ss, cgroup_dummy_top));
+ BUG_ON(online_css(css));
mutex_unlock(&cgroup_mutex);
@@ -4744,7 +4913,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
* struct, so this can happen first (i.e. before the dummy root
* attachment).
*/
- css = ss->css_alloc(cgroup_dummy_top);
+ css = ss->css_alloc(cgroup_css(cgroup_dummy_top, ss));
if (IS_ERR(css)) {
/* failure case - need to deassign the cgroup_subsys[] slot. */
cgroup_subsys[ss->subsys_id] = NULL;
@@ -4756,8 +4925,8 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
ss->root = &cgroup_dummy_root;
/* our new subsystem will be attached to the dummy hierarchy. */
- init_cgroup_css(css, ss, cgroup_dummy_top);
- /* init_idr must be after init_cgroup_css because it sets css->id. */
+ init_css(css, ss, cgroup_dummy_top);
+ /* init_idr must be after init_css() because it sets css->id. */
if (ss->use_id) {
ret = cgroup_init_idr(ss, css);
if (ret)
@@ -4787,7 +4956,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
}
write_unlock(&css_set_lock);
- ret = online_css(ss, cgroup_dummy_top);
+ ret = online_css(css);
if (ret)
goto err_unload;
@@ -4819,14 +4988,14 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss)
/*
* we shouldn't be called if the subsystem is in use, and the use of
- * try_module_get in parse_cgroupfs_options should ensure that it
+ * try_module_get() in rebind_subsystems() should ensure that it
* doesn't start being used while we're killing it off.
*/
BUG_ON(ss->root != &cgroup_dummy_root);
mutex_lock(&cgroup_mutex);
- offline_css(ss, cgroup_dummy_top);
+ offline_css(cgroup_css(cgroup_dummy_top, ss));
if (ss->use_id)
idr_destroy(&ss->idr);
@@ -4860,8 +5029,8 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss)
* the cgrp->subsys pointer to find their state. note that this
* also takes care of freeing the css_id.
*/
- ss->css_free(cgroup_dummy_top);
- cgroup_dummy_top->subsys[ss->subsys_id] = NULL;
+ ss->css_free(cgroup_css(cgroup_dummy_top, ss));
+ RCU_INIT_POINTER(cgroup_dummy_top->subsys[ss->subsys_id], NULL);
mutex_unlock(&cgroup_mutex);
}
@@ -4943,6 +5112,10 @@ int __init cgroup_init(void)
BUG_ON(cgroup_init_root_id(&cgroup_dummy_root, 0, 1));
+ err = idr_alloc(&cgroup_dummy_root.cgroup_idr, cgroup_dummy_top,
+ 0, 1, GFP_KERNEL);
+ BUG_ON(err < 0);
+
mutex_unlock(&cgroup_root_mutex);
mutex_unlock(&cgroup_mutex);
@@ -5099,7 +5272,7 @@ void cgroup_fork(struct task_struct *child)
* Adds the task to the list running through its css_set if necessary and
* call the subsystem fork() callbacks. Has to be after the task is
* visible on the task list in case we race with the first call to
- * cgroup_iter_start() - to guarantee that the new task ends up on its
+ * cgroup_task_iter_start() - to guarantee that the new task ends up on its
* list.
*/
void cgroup_post_fork(struct task_struct *child)
@@ -5212,10 +5385,10 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
*/
for_each_builtin_subsys(ss, i) {
if (ss->exit) {
- struct cgroup *old_cgrp = cset->subsys[i]->cgroup;
- struct cgroup *cgrp = task_cgroup(tsk, i);
+ struct cgroup_subsys_state *old_css = cset->subsys[i];
+ struct cgroup_subsys_state *css = task_css(tsk, i);
- ss->exit(cgrp, old_cgrp, tsk);
+ ss->exit(css, old_css, tsk);
}
}
}
@@ -5474,20 +5647,16 @@ static int __init_or_module cgroup_init_idr(struct cgroup_subsys *ss,
return 0;
}
-static int alloc_css_id(struct cgroup_subsys *ss, struct cgroup *parent,
- struct cgroup *child)
+static int alloc_css_id(struct cgroup_subsys_state *child_css)
{
- int subsys_id, i, depth = 0;
- struct cgroup_subsys_state *parent_css, *child_css;
+ struct cgroup_subsys_state *parent_css = css_parent(child_css);
struct css_id *child_id, *parent_id;
+ int i, depth;
- subsys_id = ss->subsys_id;
- parent_css = parent->subsys[subsys_id];
- child_css = child->subsys[subsys_id];
parent_id = rcu_dereference_protected(parent_css->id, true);
depth = parent_id->depth + 1;
- child_id = get_new_cssid(ss, depth);
+ child_id = get_new_cssid(child_css->ss, depth);
if (IS_ERR(child_id))
return PTR_ERR(child_id);
@@ -5525,31 +5694,56 @@ struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id)
}
EXPORT_SYMBOL_GPL(css_lookup);
-/*
- * get corresponding css from file open on cgroupfs directory
+/**
+ * css_from_dir - get corresponding css from the dentry of a cgroup dir
+ * @dentry: directory dentry of interest
+ * @ss: subsystem of interest
+ *
+ * Must be called under RCU read lock. The caller is responsible for
+ * pinning the returned css if it needs to be accessed outside the RCU
+ * critical section.
*/
-struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id)
+struct cgroup_subsys_state *css_from_dir(struct dentry *dentry,
+ struct cgroup_subsys *ss)
{
struct cgroup *cgrp;
- struct inode *inode;
- struct cgroup_subsys_state *css;
- inode = file_inode(f);
- /* check in cgroup filesystem dir */
- if (inode->i_op != &cgroup_dir_inode_operations)
+ WARN_ON_ONCE(!rcu_read_lock_held());
+
+ /* is @dentry a cgroup dir? */
+ if (!dentry->d_inode ||
+ dentry->d_inode->i_op != &cgroup_dir_inode_operations)
return ERR_PTR(-EBADF);
- if (id < 0 || id >= CGROUP_SUBSYS_COUNT)
- return ERR_PTR(-EINVAL);
+ cgrp = __d_cgrp(dentry);
+ return cgroup_css(cgrp, ss) ?: ERR_PTR(-ENOENT);
+}
- /* get cgroup */
- cgrp = __d_cgrp(f->f_dentry);
- css = cgrp->subsys[id];
- return css ? css : ERR_PTR(-ENOENT);
+/**
+ * css_from_id - lookup css by id
+ * @id: the cgroup id
+ * @ss: cgroup subsys to be looked into
+ *
+ * Returns the css if there's valid one with @id, otherwise returns NULL.
+ * Should be called under rcu_read_lock().
+ */
+struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss)
+{
+ struct cgroup *cgrp;
+
+ rcu_lockdep_assert(rcu_read_lock_held() ||
+ lockdep_is_held(&cgroup_mutex),
+ "css_from_id() needs proper protection");
+
+ cgrp = idr_find(&ss->root->cgroup_idr, id);
+ if (cgrp)
+ return cgroup_css(cgrp, ss);
+ return NULL;
}
#ifdef CONFIG_CGROUP_DEBUG
-static struct cgroup_subsys_state *debug_css_alloc(struct cgroup *cgrp)
+static struct cgroup_subsys_state *
+debug_css_alloc(struct cgroup_subsys_state *parent_css)
{
struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
@@ -5559,22 +5753,24 @@ static struct cgroup_subsys_state *debug_css_alloc(struct cgroup *cgrp)
return css;
}
-static void debug_css_free(struct cgroup *cgrp)
+static void debug_css_free(struct cgroup_subsys_state *css)
{
- kfree(cgrp->subsys[debug_subsys_id]);
+ kfree(css);
}
-static u64 debug_taskcount_read(struct cgroup *cgrp, struct cftype *cft)
+static u64 debug_taskcount_read(struct cgroup_subsys_state *css,
+ struct cftype *cft)
{
- return cgroup_task_count(cgrp);
+ return cgroup_task_count(css->cgroup);
}
-static u64 current_css_set_read(struct cgroup *cgrp, struct cftype *cft)
+static u64 current_css_set_read(struct cgroup_subsys_state *css,
+ struct cftype *cft)
{
return (u64)(unsigned long)current->cgroups;
}
-static u64 current_css_set_refcount_read(struct cgroup *cgrp,
+static u64 current_css_set_refcount_read(struct cgroup_subsys_state *css,
struct cftype *cft)
{
u64 count;
@@ -5585,7 +5781,7 @@ static u64 current_css_set_refcount_read(struct cgroup *cgrp,
return count;
}
-static int current_css_set_cg_links_read(struct cgroup *cgrp,
+static int current_css_set_cg_links_read(struct cgroup_subsys_state *css,
struct cftype *cft,
struct seq_file *seq)
{
@@ -5612,14 +5808,13 @@ static int current_css_set_cg_links_read(struct cgroup *cgrp,
}
#define MAX_TASKS_SHOWN_PER_CSS 25
-static int cgroup_css_links_read(struct cgroup *cgrp,
- struct cftype *cft,
- struct seq_file *seq)
+static int cgroup_css_links_read(struct cgroup_subsys_state *css,
+ struct cftype *cft, struct seq_file *seq)
{
struct cgrp_cset_link *link;
read_lock(&css_set_lock);
- list_for_each_entry(link, &cgrp->cset_links, cset_link) {
+ list_for_each_entry(link, &css->cgroup->cset_links, cset_link) {
struct css_set *cset = link->cset;
struct task_struct *task;
int count = 0;
@@ -5638,9 +5833,9 @@ static int cgroup_css_links_read(struct cgroup *cgrp,
return 0;
}
-static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft)
+static u64 releasable_read(struct cgroup_subsys_state *css, struct cftype *cft)
{
- return test_bit(CGRP_RELEASABLE, &cgrp->flags);
+ return test_bit(CGRP_RELEASABLE, &css->cgroup->flags);
}
static struct cftype debug_files[] = {
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index 75dda1ea502..f0ff64d0eba 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -45,25 +45,19 @@ struct freezer {
spinlock_t lock;
};
-static inline struct freezer *cgroup_freezer(struct cgroup *cgroup)
+static inline struct freezer *css_freezer(struct cgroup_subsys_state *css)
{
- return container_of(cgroup_subsys_state(cgroup, freezer_subsys_id),
- struct freezer, css);
+ return css ? container_of(css, struct freezer, css) : NULL;
}
static inline struct freezer *task_freezer(struct task_struct *task)
{
- return container_of(task_subsys_state(task, freezer_subsys_id),
- struct freezer, css);
+ return css_freezer(task_css(task, freezer_subsys_id));
}
static struct freezer *parent_freezer(struct freezer *freezer)
{
- struct cgroup *pcg = freezer->css.cgroup->parent;
-
- if (pcg)
- return cgroup_freezer(pcg);
- return NULL;
+ return css_freezer(css_parent(&freezer->css));
}
bool cgroup_freezing(struct task_struct *task)
@@ -92,7 +86,8 @@ static const char *freezer_state_strs(unsigned int state)
struct cgroup_subsys freezer_subsys;
-static struct cgroup_subsys_state *freezer_css_alloc(struct cgroup *cgroup)
+static struct cgroup_subsys_state *
+freezer_css_alloc(struct cgroup_subsys_state *parent_css)
{
struct freezer *freezer;
@@ -105,22 +100,22 @@ static struct cgroup_subsys_state *freezer_css_alloc(struct cgroup *cgroup)
}
/**
- * freezer_css_online - commit creation of a freezer cgroup
- * @cgroup: cgroup being created
+ * freezer_css_online - commit creation of a freezer css
+ * @css: css being created
*
- * We're committing to creation of @cgroup. Mark it online and inherit
+ * We're committing to creation of @css. Mark it online and inherit
* parent's freezing state while holding both parent's and our
* freezer->lock.
*/
-static int freezer_css_online(struct cgroup *cgroup)
+static int freezer_css_online(struct cgroup_subsys_state *css)
{
- struct freezer *freezer = cgroup_freezer(cgroup);
+ struct freezer *freezer = css_freezer(css);
struct freezer *parent = parent_freezer(freezer);
/*
* The following double locking and freezing state inheritance
* guarantee that @cgroup can never escape ancestors' freezing
- * states. See cgroup_for_each_descendant_pre() for details.
+ * states. See css_for_each_descendant_pre() for details.
*/
if (parent)
spin_lock_irq(&parent->lock);
@@ -141,15 +136,15 @@ static int freezer_css_online(struct cgroup *cgroup)
}
/**
- * freezer_css_offline - initiate destruction of @cgroup
- * @cgroup: cgroup being destroyed
+ * freezer_css_offline - initiate destruction of a freezer css
+ * @css: css being destroyed
*
- * @cgroup is going away. Mark it dead and decrement system_freezing_count
- * if it was holding one.
+ * @css is going away. Mark it dead and decrement system_freezing_count if
+ * it was holding one.
*/
-static void freezer_css_offline(struct cgroup *cgroup)
+static void freezer_css_offline(struct cgroup_subsys_state *css)
{
- struct freezer *freezer = cgroup_freezer(cgroup);
+ struct freezer *freezer = css_freezer(css);
spin_lock_irq(&freezer->lock);
@@ -161,9 +156,9 @@ static void freezer_css_offline(struct cgroup *cgroup)
spin_unlock_irq(&freezer->lock);
}
-static void freezer_css_free(struct cgroup *cgroup)
+static void freezer_css_free(struct cgroup_subsys_state *css)
{
- kfree(cgroup_freezer(cgroup));
+ kfree(css_freezer(css));
}
/*
@@ -175,25 +170,26 @@ static void freezer_css_free(struct cgroup *cgroup)
* @freezer->lock. freezer_attach() makes the new tasks conform to the
* current state and all following state changes can see the new tasks.
*/
-static void freezer_attach(struct cgroup *new_cgrp, struct cgroup_taskset *tset)
+static void freezer_attach(struct cgroup_subsys_state *new_css,
+ struct cgroup_taskset *tset)
{
- struct freezer *freezer = cgroup_freezer(new_cgrp);
+ struct freezer *freezer = css_freezer(new_css);
struct task_struct *task;
bool clear_frozen = false;
spin_lock_irq(&freezer->lock);
/*
- * Make the new tasks conform to the current state of @new_cgrp.
+ * Make the new tasks conform to the current state of @new_css.
* For simplicity, when migrating any task to a FROZEN cgroup, we
* revert it to FREEZING and let update_if_frozen() determine the
* correct state later.
*
- * Tasks in @tset are on @new_cgrp but may not conform to its
+ * Tasks in @tset are on @new_css but may not conform to its
* current state before executing the following - !frozen tasks may
* be visible in a FROZEN cgroup and frozen tasks in a THAWED one.
*/
- cgroup_taskset_for_each(task, new_cgrp, tset) {
+ cgroup_taskset_for_each(task, new_css, tset) {
if (!(freezer->state & CGROUP_FREEZING)) {
__thaw_task(task);
} else {
@@ -231,7 +227,7 @@ static void freezer_fork(struct task_struct *task)
* The root cgroup is non-freezable, so we can skip the
* following check.
*/
- if (!freezer->css.cgroup->parent)
+ if (!parent_freezer(freezer))
goto out;
spin_lock_irq(&freezer->lock);
@@ -244,7 +240,7 @@ out:
/**
* update_if_frozen - update whether a cgroup finished freezing
- * @cgroup: cgroup of interest
+ * @css: css of interest
*
* Once FREEZING is initiated, transition to FROZEN is lazily updated by
* calling this function. If the current state is FREEZING but not FROZEN,
@@ -255,14 +251,14 @@ out:
* update_if_frozen() on all descendants prior to invoking this function.
*
* Task states and freezer state might disagree while tasks are being
- * migrated into or out of @cgroup, so we can't verify task states against
+ * migrated into or out of @css, so we can't verify task states against
* @freezer state here. See freezer_attach() for details.
*/
-static void update_if_frozen(struct cgroup *cgroup)
+static void update_if_frozen(struct cgroup_subsys_state *css)
{
- struct freezer *freezer = cgroup_freezer(cgroup);
- struct cgroup *pos;
- struct cgroup_iter it;
+ struct freezer *freezer = css_freezer(css);
+ struct cgroup_subsys_state *pos;
+ struct css_task_iter it;
struct task_struct *task;
WARN_ON_ONCE(!rcu_read_lock_held());
@@ -274,8 +270,8 @@ static void update_if_frozen(struct cgroup *cgroup)
goto out_unlock;
/* are all (live) children frozen? */
- cgroup_for_each_child(pos, cgroup) {
- struct freezer *child = cgroup_freezer(pos);
+ css_for_each_child(pos, css) {
+ struct freezer *child = css_freezer(pos);
if ((child->state & CGROUP_FREEZER_ONLINE) &&
!(child->state & CGROUP_FROZEN))
@@ -283,9 +279,9 @@ static void update_if_frozen(struct cgroup *cgroup)
}
/* are all tasks frozen? */
- cgroup_iter_start(cgroup, &it);
+ css_task_iter_start(css, &it);
- while ((task = cgroup_iter_next(cgroup, &it))) {
+ while ((task = css_task_iter_next(&it))) {
if (freezing(task)) {
/*
* freezer_should_skip() indicates that the task
@@ -300,52 +296,49 @@ static void update_if_frozen(struct cgroup *cgroup)
freezer->state |= CGROUP_FROZEN;
out_iter_end:
- cgroup_iter_end(cgroup, &it);
+ css_task_iter_end(&it);
out_unlock:
spin_unlock_irq(&freezer->lock);
}
-static int freezer_read(struct cgroup *cgroup, struct cftype *cft,
+static int freezer_read(struct cgroup_subsys_state *css, struct cftype *cft,
struct seq_file *m)
{
- struct cgroup *pos;
+ struct cgroup_subsys_state *pos;
rcu_read_lock();
/* update states bottom-up */
- cgroup_for_each_descendant_post(pos, cgroup)
+ css_for_each_descendant_post(pos, css)
update_if_frozen(pos);
- update_if_frozen(cgroup);
rcu_read_unlock();
- seq_puts(m, freezer_state_strs(cgroup_freezer(cgroup)->state));
+ seq_puts(m, freezer_state_strs(css_freezer(css)->state));
seq_putc(m, '\n');
return 0;
}
static void freeze_cgroup(struct freezer *freezer)
{
- struct cgroup *cgroup = freezer->css.cgroup;
- struct cgroup_iter it;
+ struct css_task_iter it;
struct task_struct *task;
- cgroup_iter_start(cgroup, &it);
- while ((task = cgroup_iter_next(cgroup, &it)))
+ css_task_iter_start(&freezer->css, &it);
+ while ((task = css_task_iter_next(&it)))
freeze_task(task);
- cgroup_iter_end(cgroup, &it);
+ css_task_iter_end(&it);
}
static void unfreeze_cgroup(struct freezer *freezer)
{
- struct cgroup *cgroup = freezer->css.cgroup;
- struct cgroup_iter it;
+ struct css_task_iter it;
struct task_struct *task;
- cgroup_iter_start(cgroup, &it);
- while ((task = cgroup_iter_next(cgroup, &it)))
+ css_task_iter_start(&freezer->css, &it);
+ while ((task = css_task_iter_next(&it)))
__thaw_task(task);
- cgroup_iter_end(cgroup, &it);
+ css_task_iter_end(&it);
}
/**
@@ -395,12 +388,7 @@ static void freezer_apply_state(struct freezer *freezer, bool freeze,
*/
static void freezer_change_state(struct freezer *freezer, bool freeze)
{
- struct cgroup *pos;
-
- /* update @freezer */
- spin_lock_irq(&freezer->lock);
- freezer_apply_state(freezer, freeze, CGROUP_FREEZING_SELF);
- spin_unlock_irq(&freezer->lock);
+ struct cgroup_subsys_state *pos;
/*
* Update all its descendants in pre-order traversal. Each
@@ -408,24 +396,33 @@ static void freezer_change_state(struct freezer *freezer, bool freeze)
* CGROUP_FREEZING_PARENT.
*/
rcu_read_lock();
- cgroup_for_each_descendant_pre(pos, freezer->css.cgroup) {
- struct freezer *pos_f = cgroup_freezer(pos);
+ css_for_each_descendant_pre(pos, &freezer->css) {
+ struct freezer *pos_f = css_freezer(pos);
struct freezer *parent = parent_freezer(pos_f);
- /*
- * Our update to @parent->state is already visible which is
- * all we need. No need to lock @parent. For more info on
- * synchronization, see freezer_post_create().
- */
spin_lock_irq(&pos_f->lock);
- freezer_apply_state(pos_f, parent->state & CGROUP_FREEZING,
- CGROUP_FREEZING_PARENT);
+
+ if (pos_f == freezer) {
+ freezer_apply_state(pos_f, freeze,
+ CGROUP_FREEZING_SELF);
+ } else {
+ /*
+ * Our update to @parent->state is already visible
+ * which is all we need. No need to lock @parent.
+ * For more info on synchronization, see
+ * freezer_post_create().
+ */
+ freezer_apply_state(pos_f,
+ parent->state & CGROUP_FREEZING,
+ CGROUP_FREEZING_PARENT);
+ }
+
spin_unlock_irq(&pos_f->lock);
}
rcu_read_unlock();
}
-static int freezer_write(struct cgroup *cgroup, struct cftype *cft,
+static int freezer_write(struct cgroup_subsys_state *css, struct cftype *cft,
const char *buffer)
{
bool freeze;
@@ -437,20 +434,22 @@ static int freezer_write(struct cgroup *cgroup, struct cftype *cft,
else
return -EINVAL;
- freezer_change_state(cgroup_freezer(cgroup), freeze);
+ freezer_change_state(css_freezer(css), freeze);
return 0;
}
-static u64 freezer_self_freezing_read(struct cgroup *cgroup, struct cftype *cft)
+static u64 freezer_self_freezing_read(struct cgroup_subsys_state *css,
+ struct cftype *cft)
{
- struct freezer *freezer = cgroup_freezer(cgroup);
+ struct freezer *freezer = css_freezer(css);
return (bool)(freezer->state & CGROUP_FREEZING_SELF);
}
-static u64 freezer_parent_freezing_read(struct cgroup *cgroup, struct cftype *cft)
+static u64 freezer_parent_freezing_read(struct cgroup_subsys_state *css,
+ struct cftype *cft)
{
- struct freezer *freezer = cgroup_freezer(cgroup);
+ struct freezer *freezer = css_freezer(css);
return (bool)(freezer->state & CGROUP_FREEZING_PARENT);
}
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index ea1966db34f..6bf981e13c4 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -68,10 +68,6 @@
*/
int number_of_cpusets __read_mostly;
-/* Forward declare cgroup structures */
-struct cgroup_subsys cpuset_subsys;
-struct cpuset;
-
/* See "Frequency meter" comments, below. */
struct fmeter {
@@ -115,27 +111,20 @@ struct cpuset {
int relax_domain_level;
};
-/* Retrieve the cpuset for a cgroup */
-static inline struct cpuset *cgroup_cs(struct cgroup *cgrp)
+static inline struct cpuset *css_cs(struct cgroup_subsys_state *css)
{
- return container_of(cgroup_subsys_state(cgrp, cpuset_subsys_id),
- struct cpuset, css);
+ return css ? container_of(css, struct cpuset, css) : NULL;
}
/* Retrieve the cpuset for a task */
static inline struct cpuset *task_cs(struct task_struct *task)
{
- return container_of(task_subsys_state(task, cpuset_subsys_id),
- struct cpuset, css);
+ return css_cs(task_css(task, cpuset_subsys_id));
}
-static inline struct cpuset *parent_cs(const struct cpuset *cs)
+static inline struct cpuset *parent_cs(struct cpuset *cs)
{
- struct cgroup *pcgrp = cs->css.cgroup->parent;
-
- if (pcgrp)
- return cgroup_cs(pcgrp);
- return NULL;
+ return css_cs(css_parent(&cs->css));
}
#ifdef CONFIG_NUMA
@@ -212,29 +201,30 @@ static struct cpuset top_cpuset = {
/**
* cpuset_for_each_child - traverse online children of a cpuset
* @child_cs: loop cursor pointing to the current child
- * @pos_cgrp: used for iteration
+ * @pos_css: used for iteration
* @parent_cs: target cpuset to walk children of
*
* Walk @child_cs through the online children of @parent_cs. Must be used
* with RCU read locked.
*/
-#define cpuset_for_each_child(child_cs, pos_cgrp, parent_cs) \
- cgroup_for_each_child((pos_cgrp), (parent_cs)->css.cgroup) \
- if (is_cpuset_online(((child_cs) = cgroup_cs((pos_cgrp)))))
+#define cpuset_for_each_child(child_cs, pos_css, parent_cs) \
+ css_for_each_child((pos_css), &(parent_cs)->css) \
+ if (is_cpuset_online(((child_cs) = css_cs((pos_css)))))
/**
* cpuset_for_each_descendant_pre - pre-order walk of a cpuset's descendants
* @des_cs: loop cursor pointing to the current descendant
- * @pos_cgrp: used for iteration
+ * @pos_css: used for iteration
* @root_cs: target cpuset to walk ancestor of
*
* Walk @des_cs through the online descendants of @root_cs. Must be used
- * with RCU read locked. The caller may modify @pos_cgrp by calling
- * cgroup_rightmost_descendant() to skip subtree.
+ * with RCU read locked. The caller may modify @pos_css by calling
+ * css_rightmost_descendant() to skip subtree. @root_cs is included in the
+ * iteration and the first node to be visited.
*/
-#define cpuset_for_each_descendant_pre(des_cs, pos_cgrp, root_cs) \
- cgroup_for_each_descendant_pre((pos_cgrp), (root_cs)->css.cgroup) \
- if (is_cpuset_online(((des_cs) = cgroup_cs((pos_cgrp)))))
+#define cpuset_for_each_descendant_pre(des_cs, pos_css, root_cs) \
+ css_for_each_descendant_pre((pos_css), &(root_cs)->css) \
+ if (is_cpuset_online(((des_cs) = css_cs((pos_css)))))
/*
* There are two global mutexes guarding cpuset structures - cpuset_mutex
@@ -320,8 +310,7 @@ static struct file_system_type cpuset_fs_type = {
*
* Call with callback_mutex held.
*/
-static void guarantee_online_cpus(const struct cpuset *cs,
- struct cpumask *pmask)
+static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask)
{
while (!cpumask_intersects(cs->cpus_allowed, cpu_online_mask))
cs = parent_cs(cs);
@@ -339,7 +328,7 @@ static void guarantee_online_cpus(const struct cpuset *cs,
*
* Call with callback_mutex held.
*/
-static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask)
+static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask)
{
while (!nodes_intersects(cs->mems_allowed, node_states[N_MEMORY]))
cs = parent_cs(cs);
@@ -384,7 +373,7 @@ static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q)
* alloc_trial_cpuset - allocate a trial cpuset
* @cs: the cpuset that the trial cpuset duplicates
*/
-static struct cpuset *alloc_trial_cpuset(const struct cpuset *cs)
+static struct cpuset *alloc_trial_cpuset(struct cpuset *cs)
{
struct cpuset *trial;
@@ -431,9 +420,9 @@ static void free_trial_cpuset(struct cpuset *trial)
* Return 0 if valid, -errno if not.
*/
-static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
+static int validate_change(struct cpuset *cur, struct cpuset *trial)
{
- struct cgroup *cgrp;
+ struct cgroup_subsys_state *css;
struct cpuset *c, *par;
int ret;
@@ -441,7 +430,7 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
/* Each of our child cpusets must be a subset of us */
ret = -EBUSY;
- cpuset_for_each_child(c, cgrp, cur)
+ cpuset_for_each_child(c, css, cur)
if (!is_cpuset_subset(c, trial))
goto out;
@@ -462,7 +451,7 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
* overlap
*/
ret = -EINVAL;
- cpuset_for_each_child(c, cgrp, par) {
+ cpuset_for_each_child(c, css, par) {
if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) &&
c != cur &&
cpumask_intersects(trial->cpus_allowed, c->cpus_allowed))
@@ -515,13 +504,16 @@ static void update_domain_attr_tree(struct sched_domain_attr *dattr,
struct cpuset *root_cs)
{
struct cpuset *cp;
- struct cgroup *pos_cgrp;
+ struct cgroup_subsys_state *pos_css;
rcu_read_lock();
- cpuset_for_each_descendant_pre(cp, pos_cgrp, root_cs) {
+ cpuset_for_each_descendant_pre(cp, pos_css, root_cs) {
+ if (cp == root_cs)
+ continue;
+
/* skip the whole subtree if @cp doesn't have any CPU */
if (cpumask_empty(cp->cpus_allowed)) {
- pos_cgrp = cgroup_rightmost_descendant(pos_cgrp);
+ pos_css = css_rightmost_descendant(pos_css);
continue;
}
@@ -596,7 +588,7 @@ static int generate_sched_domains(cpumask_var_t **domains,
struct sched_domain_attr *dattr; /* attributes for custom domains */
int ndoms = 0; /* number of sched domains in result */
int nslot; /* next empty doms[] struct cpumask slot */
- struct cgroup *pos_cgrp;
+ struct cgroup_subsys_state *pos_css;
doms = NULL;
dattr = NULL;
@@ -625,7 +617,9 @@ static int generate_sched_domains(cpumask_var_t **domains,
csn = 0;
rcu_read_lock();
- cpuset_for_each_descendant_pre(cp, pos_cgrp, &top_cpuset) {
+ cpuset_for_each_descendant_pre(cp, pos_css, &top_cpuset) {
+ if (cp == &top_cpuset)
+ continue;
/*
* Continue traversing beyond @cp iff @cp has some CPUs and
* isn't load balancing. The former is obvious. The
@@ -642,7 +636,7 @@ static int generate_sched_domains(cpumask_var_t **domains,
csa[csn++] = cp;
/* skip @cp's subtree */
- pos_cgrp = cgroup_rightmost_descendant(pos_cgrp);
+ pos_css = css_rightmost_descendant(pos_css);
}
rcu_read_unlock();
@@ -837,52 +831,45 @@ static struct cpuset *effective_nodemask_cpuset(struct cpuset *cs)
/**
* cpuset_change_cpumask - make a task's cpus_allowed the same as its cpuset's
* @tsk: task to test
- * @scan: struct cgroup_scanner containing the cgroup of the task
+ * @data: cpuset to @tsk belongs to
*
- * Called by cgroup_scan_tasks() for each task in a cgroup whose
- * cpus_allowed mask needs to be changed.
+ * Called by css_scan_tasks() for each task in a cgroup whose cpus_allowed
+ * mask needs to be changed.
*
* We don't need to re-check for the cgroup/cpuset membership, since we're
* holding cpuset_mutex at this point.
*/
-static void cpuset_change_cpumask(struct task_struct *tsk,
- struct cgroup_scanner *scan)
+static void cpuset_change_cpumask(struct task_struct *tsk, void *data)
{
- struct cpuset *cpus_cs;
+ struct cpuset *cs = data;
+ struct cpuset *cpus_cs = effective_cpumask_cpuset(cs);
- cpus_cs = effective_cpumask_cpuset(cgroup_cs(scan->cg));
set_cpus_allowed_ptr(tsk, cpus_cs->cpus_allowed);
}
/**
* update_tasks_cpumask - Update the cpumasks of tasks in the cpuset.
* @cs: the cpuset in which each task's cpus_allowed mask needs to be changed
- * @heap: if NULL, defer allocating heap memory to cgroup_scan_tasks()
+ * @heap: if NULL, defer allocating heap memory to css_scan_tasks()
*
* Called with cpuset_mutex held
*
- * The cgroup_scan_tasks() function will scan all the tasks in a cgroup,
+ * The css_scan_tasks() function will scan all the tasks in a cgroup,
* calling callback functions for each.
*
- * No return value. It's guaranteed that cgroup_scan_tasks() always returns 0
+ * No return value. It's guaranteed that css_scan_tasks() always returns 0
* if @heap != NULL.
*/
static void update_tasks_cpumask(struct cpuset *cs, struct ptr_heap *heap)
{
- struct cgroup_scanner scan;
-
- scan.cg = cs->css.cgroup;
- scan.test_task = NULL;
- scan.process_task = cpuset_change_cpumask;
- scan.heap = heap;
- cgroup_scan_tasks(&scan);
+ css_scan_tasks(&cs->css, NULL, cpuset_change_cpumask, cs, heap);
}
/*
* update_tasks_cpumask_hier - Update the cpumasks of tasks in the hierarchy.
* @root_cs: the root cpuset of the hierarchy
* @update_root: update root cpuset or not?
- * @heap: the heap used by cgroup_scan_tasks()
+ * @heap: the heap used by css_scan_tasks()
*
* This will update cpumasks of tasks in @root_cs and all other empty cpusets
* which take on cpumask of @root_cs.
@@ -893,17 +880,19 @@ static void update_tasks_cpumask_hier(struct cpuset *root_cs,
bool update_root, struct ptr_heap *heap)
{
struct cpuset *cp;
- struct cgroup *pos_cgrp;
-
- if (update_root)
- update_tasks_cpumask(root_cs, heap);
+ struct cgroup_subsys_state *pos_css;
rcu_read_lock();
- cpuset_for_each_descendant_pre(cp, pos_cgrp, root_cs) {
- /* skip the whole subtree if @cp have some CPU */
- if (!cpumask_empty(cp->cpus_allowed)) {
- pos_cgrp = cgroup_rightmost_descendant(pos_cgrp);
- continue;
+ cpuset_for_each_descendant_pre(cp, pos_css, root_cs) {
+ if (cp == root_cs) {
+ if (!update_root)
+ continue;
+ } else {
+ /* skip the whole subtree if @cp have some CPU */
+ if (!cpumask_empty(cp->cpus_allowed)) {
+ pos_css = css_rightmost_descendant(pos_css);
+ continue;
+ }
}
if (!css_tryget(&cp->css))
continue;
@@ -1059,20 +1048,24 @@ static void cpuset_change_task_nodemask(struct task_struct *tsk,
task_unlock(tsk);
}
+struct cpuset_change_nodemask_arg {
+ struct cpuset *cs;
+ nodemask_t *newmems;
+};
+
/*
* Update task's mems_allowed and rebind its mempolicy and vmas' mempolicy
* of it to cpuset's new mems_allowed, and migrate pages to new nodes if
* memory_migrate flag is set. Called with cpuset_mutex held.
*/
-static void cpuset_change_nodemask(struct task_struct *p,
- struct cgroup_scanner *scan)
+static void cpuset_change_nodemask(struct task_struct *p, void *data)
{
- struct cpuset *cs = cgroup_cs(scan->cg);
+ struct cpuset_change_nodemask_arg *arg = data;
+ struct cpuset *cs = arg->cs;
struct mm_struct *mm;
int migrate;
- nodemask_t *newmems = scan->data;
- cpuset_change_task_nodemask(p, newmems);
+ cpuset_change_task_nodemask(p, arg->newmems);
mm = get_task_mm(p);
if (!mm)
@@ -1082,7 +1075,7 @@ static void cpuset_change_nodemask(struct task_struct *p,
mpol_rebind_mm(mm, &cs->mems_allowed);
if (migrate)
- cpuset_migrate_mm(mm, &cs->old_mems_allowed, newmems);
+ cpuset_migrate_mm(mm, &cs->old_mems_allowed, arg->newmems);
mmput(mm);
}
@@ -1091,28 +1084,22 @@ static void *cpuset_being_rebound;
/**
* update_tasks_nodemask - Update the nodemasks of tasks in the cpuset.
* @cs: the cpuset in which each task's mems_allowed mask needs to be changed
- * @heap: if NULL, defer allocating heap memory to cgroup_scan_tasks()
+ * @heap: if NULL, defer allocating heap memory to css_scan_tasks()
*
- * Called with cpuset_mutex held
- * No return value. It's guaranteed that cgroup_scan_tasks() always returns 0
- * if @heap != NULL.
+ * Called with cpuset_mutex held. No return value. It's guaranteed that
+ * css_scan_tasks() always returns 0 if @heap != NULL.
*/
static void update_tasks_nodemask(struct cpuset *cs, struct ptr_heap *heap)
{
static nodemask_t newmems; /* protected by cpuset_mutex */
- struct cgroup_scanner scan;
struct cpuset *mems_cs = effective_nodemask_cpuset(cs);
+ struct cpuset_change_nodemask_arg arg = { .cs = cs,
+ .newmems = &newmems };
cpuset_being_rebound = cs; /* causes mpol_dup() rebind */
guarantee_online_mems(mems_cs, &newmems);
- scan.cg = cs->css.cgroup;
- scan.test_task = NULL;
- scan.process_task = cpuset_change_nodemask;
- scan.heap = heap;
- scan.data = &newmems;
-
/*
* The mpol_rebind_mm() call takes mmap_sem, which we couldn't
* take while holding tasklist_lock. Forks can happen - the
@@ -1123,7 +1110,7 @@ static void update_tasks_nodemask(struct cpuset *cs, struct ptr_heap *heap)
* It's ok if we rebind the same mm twice; mpol_rebind_mm()
* is idempotent. Also migrate pages in each mm to new nodes.
*/
- cgroup_scan_tasks(&scan);
+ css_scan_tasks(&cs->css, NULL, cpuset_change_nodemask, &arg, heap);
/*
* All the tasks' nodemasks have been updated, update
@@ -1139,7 +1126,7 @@ static void update_tasks_nodemask(struct cpuset *cs, struct ptr_heap *heap)
* update_tasks_nodemask_hier - Update the nodemasks of tasks in the hierarchy.
* @cs: the root cpuset of the hierarchy
* @update_root: update the root cpuset or not?
- * @heap: the heap used by cgroup_scan_tasks()
+ * @heap: the heap used by css_scan_tasks()
*
* This will update nodemasks of tasks in @root_cs and all other empty cpusets
* which take on nodemask of @root_cs.
@@ -1150,17 +1137,19 @@ static void update_tasks_nodemask_hier(struct cpuset *root_cs,
bool update_root, struct ptr_heap *heap)
{
struct cpuset *cp;
- struct cgroup *pos_cgrp;
-
- if (update_root)
- update_tasks_nodemask(root_cs, heap);
+ struct cgroup_subsys_state *pos_css;
rcu_read_lock();
- cpuset_for_each_descendant_pre(cp, pos_cgrp, root_cs) {
- /* skip the whole subtree if @cp have some CPU */
- if (!nodes_empty(cp->mems_allowed)) {
- pos_cgrp = cgroup_rightmost_descendant(pos_cgrp);
- continue;
+ cpuset_for_each_descendant_pre(cp, pos_css, root_cs) {
+ if (cp == root_cs) {
+ if (!update_root)
+ continue;
+ } else {
+ /* skip the whole subtree if @cp have some CPU */
+ if (!nodes_empty(cp->mems_allowed)) {
+ pos_css = css_rightmost_descendant(pos_css);
+ continue;
+ }
}
if (!css_tryget(&cp->css))
continue;
@@ -1267,44 +1256,39 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val)
return 0;
}
-/*
+/**
* cpuset_change_flag - make a task's spread flags the same as its cpuset's
* @tsk: task to be updated
- * @scan: struct cgroup_scanner containing the cgroup of the task
+ * @data: cpuset to @tsk belongs to
*
- * Called by cgroup_scan_tasks() for each task in a cgroup.
+ * Called by css_scan_tasks() for each task in a cgroup.
*
* We don't need to re-check for the cgroup/cpuset membership, since we're
* holding cpuset_mutex at this point.
*/
-static void cpuset_change_flag(struct task_struct *tsk,
- struct cgroup_scanner *scan)
+static void cpuset_change_flag(struct task_struct *tsk, void *data)
{
- cpuset_update_task_spread_flag(cgroup_cs(scan->cg), tsk);
+ struct cpuset *cs = data;
+
+ cpuset_update_task_spread_flag(cs, tsk);
}
-/*
+/**
* update_tasks_flags - update the spread flags of tasks in the cpuset.
* @cs: the cpuset in which each task's spread flags needs to be changed
- * @heap: if NULL, defer allocating heap memory to cgroup_scan_tasks()
+ * @heap: if NULL, defer allocating heap memory to css_scan_tasks()
*
* Called with cpuset_mutex held
*
- * The cgroup_scan_tasks() function will scan all the tasks in a cgroup,
+ * The css_scan_tasks() function will scan all the tasks in a cgroup,
* calling callback functions for each.
*
- * No return value. It's guaranteed that cgroup_scan_tasks() always returns 0
+ * No return value. It's guaranteed that css_scan_tasks() always returns 0
* if @heap != NULL.
*/
static void update_tasks_flags(struct cpuset *cs, struct ptr_heap *heap)
{
- struct cgroup_scanner scan;
-
- scan.cg = cs->css.cgroup;
- scan.test_task = NULL;
- scan.process_task = cpuset_change_flag;
- scan.heap = heap;
- cgroup_scan_tasks(&scan);
+ css_scan_tasks(&cs->css, NULL, cpuset_change_flag, cs, heap);
}
/*
@@ -1462,9 +1446,10 @@ static int fmeter_getrate(struct fmeter *fmp)
}
/* Called by cgroups to determine if a cpuset is usable; cpuset_mutex held */
-static int cpuset_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
+static int cpuset_can_attach(struct cgroup_subsys_state *css,
+ struct cgroup_taskset *tset)
{
- struct cpuset *cs = cgroup_cs(cgrp);
+ struct cpuset *cs = css_cs(css);
struct task_struct *task;
int ret;
@@ -1475,11 +1460,11 @@ static int cpuset_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
* flag is set.
*/
ret = -ENOSPC;
- if (!cgroup_sane_behavior(cgrp) &&
+ if (!cgroup_sane_behavior(css->cgroup) &&
(cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)))
goto out_unlock;
- cgroup_taskset_for_each(task, cgrp, tset) {
+ cgroup_taskset_for_each(task, css, tset) {
/*
* Kthreads which disallow setaffinity shouldn't be moved
* to a new cpuset; we don't want to change their cpu
@@ -1508,11 +1493,11 @@ out_unlock:
return ret;
}
-static void cpuset_cancel_attach(struct cgroup *cgrp,
+static void cpuset_cancel_attach(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset)
{
mutex_lock(&cpuset_mutex);
- cgroup_cs(cgrp)->attach_in_progress--;
+ css_cs(css)->attach_in_progress--;
mutex_unlock(&cpuset_mutex);
}
@@ -1523,16 +1508,18 @@ static void cpuset_cancel_attach(struct cgroup *cgrp,
*/
static cpumask_var_t cpus_attach;
-static void cpuset_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
+static void cpuset_attach(struct cgroup_subsys_state *css,
+ struct cgroup_taskset *tset)
{
/* static buf protected by cpuset_mutex */
static nodemask_t cpuset_attach_nodemask_to;
struct mm_struct *mm;
struct task_struct *task;
struct task_struct *leader = cgroup_taskset_first(tset);
- struct cgroup *oldcgrp = cgroup_taskset_cur_cgroup(tset);
- struct cpuset *cs = cgroup_cs(cgrp);
- struct cpuset *oldcs = cgroup_cs(oldcgrp);
+ struct cgroup_subsys_state *oldcss = cgroup_taskset_cur_css(tset,
+ cpuset_subsys_id);
+ struct cpuset *cs = css_cs(css);
+ struct cpuset *oldcs = css_cs(oldcss);
struct cpuset *cpus_cs = effective_cpumask_cpuset(cs);
struct cpuset *mems_cs = effective_nodemask_cpuset(cs);
@@ -1546,7 +1533,7 @@ static void cpuset_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
guarantee_online_mems(mems_cs, &cpuset_attach_nodemask_to);
- cgroup_taskset_for_each(task, cgrp, tset) {
+ cgroup_taskset_for_each(task, css, tset) {
/*
* can_attach beforehand should guarantee that this doesn't
* fail. TODO: have a better way to handle failure here
@@ -1608,9 +1595,10 @@ typedef enum {
FILE_SPREAD_SLAB,
} cpuset_filetype_t;
-static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val)
+static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft,
+ u64 val)
{
- struct cpuset *cs = cgroup_cs(cgrp);
+ struct cpuset *cs = css_cs(css);
cpuset_filetype_t type = cft->private;
int retval = 0;
@@ -1657,9 +1645,10 @@ out_unlock:
return retval;
}
-static int cpuset_write_s64(struct cgroup *cgrp, struct cftype *cft, s64 val)
+static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft,
+ s64 val)
{
- struct cpuset *cs = cgroup_cs(cgrp);
+ struct cpuset *cs = css_cs(css);
cpuset_filetype_t type = cft->private;
int retval = -ENODEV;
@@ -1683,10 +1672,10 @@ out_unlock:
/*
* Common handling for a write to a "cpus" or "mems" file.
*/
-static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft,
- const char *buf)
+static int cpuset_write_resmask(struct cgroup_subsys_state *css,
+ struct cftype *cft, const char *buf)
{
- struct cpuset *cs = cgroup_cs(cgrp);
+ struct cpuset *cs = css_cs(css);
struct cpuset *trialcs;
int retval = -ENODEV;
@@ -1765,13 +1754,12 @@ static size_t cpuset_sprintf_memlist(char *page, struct cpuset *cs)
return count;
}
-static ssize_t cpuset_common_file_read(struct cgroup *cgrp,
- struct cftype *cft,
- struct file *file,
- char __user *buf,
- size_t nbytes, loff_t *ppos)
+static ssize_t cpuset_common_file_read(struct cgroup_subsys_state *css,
+ struct cftype *cft, struct file *file,
+ char __user *buf, size_t nbytes,
+ loff_t *ppos)
{
- struct cpuset *cs = cgroup_cs(cgrp);
+ struct cpuset *cs = css_cs(css);
cpuset_filetype_t type = cft->private;
char *page;
ssize_t retval = 0;
@@ -1801,9 +1789,9 @@ out:
return retval;
}
-static u64 cpuset_read_u64(struct cgroup *cgrp, struct cftype *cft)
+static u64 cpuset_read_u64(struct cgroup_subsys_state *css, struct cftype *cft)
{
- struct cpuset *cs = cgroup_cs(cgrp);
+ struct cpuset *cs = css_cs(css);
cpuset_filetype_t type = cft->private;
switch (type) {
case FILE_CPU_EXCLUSIVE:
@@ -1832,9 +1820,9 @@ static u64 cpuset_read_u64(struct cgroup *cgrp, struct cftype *cft)
return 0;
}
-static s64 cpuset_read_s64(struct cgroup *cgrp, struct cftype *cft)
+static s64 cpuset_read_s64(struct cgroup_subsys_state *css, struct cftype *cft)
{
- struct cpuset *cs = cgroup_cs(cgrp);
+ struct cpuset *cs = css_cs(css);
cpuset_filetype_t type = cft->private;
switch (type) {
case FILE_SCHED_RELAX_DOMAIN_LEVEL:
@@ -1949,11 +1937,12 @@ static struct cftype files[] = {
* cgrp: control group that the new cpuset will be part of
*/
-static struct cgroup_subsys_state *cpuset_css_alloc(struct cgroup *cgrp)
+static struct cgroup_subsys_state *
+cpuset_css_alloc(struct cgroup_subsys_state *parent_css)
{
struct cpuset *cs;
- if (!cgrp->parent)
+ if (!parent_css)
return &top_cpuset.css;
cs = kzalloc(sizeof(*cs), GFP_KERNEL);
@@ -1973,12 +1962,12 @@ static struct cgroup_subsys_state *cpuset_css_alloc(struct cgroup *cgrp)
return &cs->css;
}
-static int cpuset_css_online(struct cgroup *cgrp)
+static int cpuset_css_online(struct cgroup_subsys_state *css)
{
- struct cpuset *cs = cgroup_cs(cgrp);
+ struct cpuset *cs = css_cs(css);
struct cpuset *parent = parent_cs(cs);
struct cpuset *tmp_cs;
- struct cgroup *pos_cg;
+ struct cgroup_subsys_state *pos_css;
if (!parent)
return 0;
@@ -1993,7 +1982,7 @@ static int cpuset_css_online(struct cgroup *cgrp)
number_of_cpusets++;
- if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags))
+ if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags))
goto out_unlock;
/*
@@ -2010,7 +1999,7 @@ static int cpuset_css_online(struct cgroup *cgrp)
* (and likewise for mems) to the new cgroup.
*/
rcu_read_lock();
- cpuset_for_each_child(tmp_cs, pos_cg, parent) {
+ cpuset_for_each_child(tmp_cs, pos_css, parent) {
if (is_mem_exclusive(tmp_cs) || is_cpu_exclusive(tmp_cs)) {
rcu_read_unlock();
goto out_unlock;
@@ -2027,9 +2016,15 @@ out_unlock:
return 0;
}
-static void cpuset_css_offline(struct cgroup *cgrp)
+/*
+ * If the cpuset being removed has its flag 'sched_load_balance'
+ * enabled, then simulate turning sched_load_balance off, which
+ * will call rebuild_sched_domains_locked().
+ */
+
+static void cpuset_css_offline(struct cgroup_subsys_state *css)
{
- struct cpuset *cs = cgroup_cs(cgrp);
+ struct cpuset *cs = css_cs(css);
mutex_lock(&cpuset_mutex);
@@ -2042,15 +2037,9 @@ static void cpuset_css_offline(struct cgroup *cgrp)
mutex_unlock(&cpuset_mutex);
}
-/*
- * If the cpuset being removed has its flag 'sched_load_balance'
- * enabled, then simulate turning sched_load_balance off, which
- * will call rebuild_sched_domains_locked().
- */
-
-static void cpuset_css_free(struct cgroup *cgrp)
+static void cpuset_css_free(struct cgroup_subsys_state *css)
{
- struct cpuset *cs = cgroup_cs(cgrp);
+ struct cpuset *cs = css_cs(css);
free_cpumask_var(cs->cpus_allowed);
kfree(cs);
@@ -2257,11 +2246,11 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
/* if cpus or mems changed, we need to propagate to descendants */
if (cpus_updated || mems_updated) {
struct cpuset *cs;
- struct cgroup *pos_cgrp;
+ struct cgroup_subsys_state *pos_css;
rcu_read_lock();
- cpuset_for_each_descendant_pre(cs, pos_cgrp, &top_cpuset) {
- if (!css_tryget(&cs->css))
+ cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) {
+ if (cs == &top_cpuset || !css_tryget(&cs->css))
continue;
rcu_read_unlock();
@@ -2350,7 +2339,7 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
void cpuset_cpus_allowed_fallback(struct task_struct *tsk)
{
- const struct cpuset *cpus_cs;
+ struct cpuset *cpus_cs;
rcu_read_lock();
cpus_cs = effective_cpumask_cpuset(task_cs(tsk));
@@ -2423,7 +2412,7 @@ int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask)
* callback_mutex. If no ancestor is mem_exclusive or mem_hardwall
* (an unusual configuration), then returns the root cpuset.
*/
-static const struct cpuset *nearest_hardwall_ancestor(const struct cpuset *cs)
+static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
{
while (!(is_mem_exclusive(cs) || is_mem_hardwall(cs)) && parent_cs(cs))
cs = parent_cs(cs);
@@ -2493,7 +2482,7 @@ static const struct cpuset *nearest_hardwall_ancestor(const struct cpuset *cs)
*/
int __cpuset_node_allowed_softwall(int node, gfp_t gfp_mask)
{
- const struct cpuset *cs; /* current cpuset ancestors */
+ struct cpuset *cs; /* current cpuset ancestors */
int allowed; /* is allocation in zone z allowed? */
if (in_interrupt() || (gfp_mask & __GFP_THISNODE))
@@ -2731,7 +2720,7 @@ int proc_cpuset_show(struct seq_file *m, void *unused_v)
goto out_free;
rcu_read_lock();
- css = task_subsys_state(tsk, cpuset_subsys_id);
+ css = task_css(tsk, cpuset_subsys_id);
retval = cgroup_path(css->cgroup, buf, PAGE_SIZE);
rcu_read_unlock();
if (retval < 0)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index f86599e8c12..9300f522607 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -340,8 +340,8 @@ struct perf_cgroup {
static inline struct perf_cgroup *
perf_cgroup_from_task(struct task_struct *task)
{
- return container_of(task_subsys_state(task, perf_subsys_id),
- struct perf_cgroup, css);
+ return container_of(task_css(task, perf_subsys_id),
+ struct perf_cgroup, css);
}
static inline bool
@@ -591,7 +591,9 @@ static inline int perf_cgroup_connect(int fd, struct perf_event *event,
if (!f.file)
return -EBADF;
- css = cgroup_css_from_dir(f.file, perf_subsys_id);
+ rcu_read_lock();
+
+ css = css_from_dir(f.file->f_dentry, &perf_subsys);
if (IS_ERR(css)) {
ret = PTR_ERR(css);
goto out;
@@ -617,6 +619,7 @@ static inline int perf_cgroup_connect(int fd, struct perf_event *event,
ret = -EINVAL;
}
out:
+ rcu_read_unlock();
fdput(f);
return ret;
}
@@ -7798,7 +7801,8 @@ unlock:
device_initcall(perf_event_sysfs_init);
#ifdef CONFIG_CGROUP_PERF
-static struct cgroup_subsys_state *perf_cgroup_css_alloc(struct cgroup *cont)
+static struct cgroup_subsys_state *
+perf_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
{
struct perf_cgroup *jc;
@@ -7815,11 +7819,10 @@ static struct cgroup_subsys_state *perf_cgroup_css_alloc(struct cgroup *cont)
return &jc->css;
}
-static void perf_cgroup_css_free(struct cgroup *cont)
+static void perf_cgroup_css_free(struct cgroup_subsys_state *css)
{
- struct perf_cgroup *jc;
- jc = container_of(cgroup_subsys_state(cont, perf_subsys_id),
- struct perf_cgroup, css);
+ struct perf_cgroup *jc = container_of(css, struct perf_cgroup, css);
+
free_percpu(jc->info);
kfree(jc);
}
@@ -7831,15 +7834,17 @@ static int __perf_cgroup_move(void *info)
return 0;
}
-static void perf_cgroup_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
+static void perf_cgroup_attach(struct cgroup_subsys_state *css,
+ struct cgroup_taskset *tset)
{
struct task_struct *task;
- cgroup_taskset_for_each(task, cgrp, tset)
+ cgroup_taskset_for_each(task, css, tset)
task_function_call(task, __perf_cgroup_move, task);
}
-static void perf_cgroup_exit(struct cgroup *cgrp, struct cgroup *old_cgrp,
+static void perf_cgroup_exit(struct cgroup_subsys_state *css,
+ struct cgroup_subsys_state *old_css,
struct task_struct *task)
{
/*
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 05c39f03031..e53bda3ff2f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6815,7 +6815,7 @@ void sched_move_task(struct task_struct *tsk)
if (unlikely(running))
tsk->sched_class->put_prev_task(rq, tsk);
- tg = container_of(task_subsys_state_check(tsk, cpu_cgroup_subsys_id,
+ tg = container_of(task_css_check(tsk, cpu_cgroup_subsys_id,
lockdep_is_held(&tsk->sighand->siglock)),
struct task_group, css);
tg = autogroup_task_group(tsk, tg);
@@ -7137,23 +7137,22 @@ int sched_rt_handler(struct ctl_table *table, int write,
#ifdef CONFIG_CGROUP_SCHED
-/* return corresponding task_group object of a cgroup */
-static inline struct task_group *cgroup_tg(struct cgroup *cgrp)
+static inline struct task_group *css_tg(struct cgroup_subsys_state *css)
{
- return container_of(cgroup_subsys_state(cgrp, cpu_cgroup_subsys_id),
- struct task_group, css);
+ return css ? container_of(css, struct task_group, css) : NULL;
}
-static struct cgroup_subsys_state *cpu_cgroup_css_alloc(struct cgroup *cgrp)
+static struct cgroup_subsys_state *
+cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
{
- struct task_group *tg, *parent;
+ struct task_group *parent = css_tg(parent_css);
+ struct task_group *tg;
- if (!cgrp->parent) {
+ if (!parent) {
/* This is early initialization for the top cgroup */
return &root_task_group.css;
}
- parent = cgroup_tg(cgrp->parent);
tg = sched_create_group(parent);
if (IS_ERR(tg))
return ERR_PTR(-ENOMEM);
@@ -7161,41 +7160,38 @@ static struct cgroup_subsys_state *cpu_cgroup_css_alloc(struct cgroup *cgrp)
return &tg->css;
}
-static int cpu_cgroup_css_online(struct cgroup *cgrp)
+static int cpu_cgroup_css_online(struct cgroup_subsys_state *css)
{
- struct task_group *tg = cgroup_tg(cgrp);
- struct task_group *parent;
-
- if (!cgrp->parent)
- return 0;
+ struct task_group *tg = css_tg(css);
+ struct task_group *parent = css_tg(css_parent(css));
- parent = cgroup_tg(cgrp->parent);
- sched_online_group(tg, parent);
+ if (parent)
+ sched_online_group(tg, parent);
return 0;
}
-static void cpu_cgroup_css_free(struct cgroup *cgrp)
+static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
{
- struct task_group *tg = cgroup_tg(cgrp);
+ struct task_group *tg = css_tg(css);
sched_destroy_group(tg);
}
-static void cpu_cgroup_css_offline(struct cgroup *cgrp)
+static void cpu_cgroup_css_offline(struct cgroup_subsys_state *css)
{
- struct task_group *tg = cgroup_tg(cgrp);
+ struct task_group *tg = css_tg(css);
sched_offline_group(tg);
}
-static int cpu_cgroup_can_attach(struct cgroup *cgrp,
+static int cpu_cgroup_can_attach(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset)
{
struct task_struct *task;
- cgroup_taskset_for_each(task, cgrp, tset) {
+ cgroup_taskset_for_each(task, css, tset) {
#ifdef CONFIG_RT_GROUP_SCHED
- if (!sched_rt_can_attach(cgroup_tg(cgrp), task))
+ if (!sched_rt_can_attach(css_tg(css), task))
return -EINVAL;
#else
/* We don't support RT-tasks being in separate groups */
@@ -7206,18 +7202,18 @@ static int cpu_cgroup_can_attach(struct cgroup *cgrp,
return 0;
}
-static void cpu_cgroup_attach(struct cgroup *cgrp,
+static void cpu_cgroup_attach(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset)
{
struct task_struct *task;
- cgroup_taskset_for_each(task, cgrp, tset)
+ cgroup_taskset_for_each(task, css, tset)
sched_move_task(task);
}
-static void
-cpu_cgroup_exit(struct cgroup *cgrp, struct cgroup *old_cgrp,
- struct task_struct *task)
+static void cpu_cgroup_exit(struct cgroup_subsys_state *css,
+ struct cgroup_subsys_state *old_css,
+ struct task_struct *task)
{
/*
* cgroup_exit() is called in the copy_process() failure path.
@@ -7231,15 +7227,16 @@ cpu_cgroup_exit(struct cgroup *cgrp, struct cgroup *old_cgrp,
}
#ifdef CONFIG_FAIR_GROUP_SCHED
-static int cpu_shares_write_u64(struct cgroup *cgrp, struct cftype *cftype,
- u64 shareval)
+static int cpu_shares_write_u64(struct cgroup_subsys_state *css,
+ struct cftype *cftype, u64 shareval)
{
- return sched_group_set_shares(cgroup_tg(cgrp), scale_load(shareval));
+ return sched_group_set_shares(css_tg(css), scale_load(shareval));
}
-static u64 cpu_shares_read_u64(struct cgroup *cgrp, struct cftype *cft)
+static u64 cpu_shares_read_u64(struct cgroup_subsys_state *css,
+ struct cftype *cft)
{
- struct task_group *tg = cgroup_tg(cgrp);
+ struct task_group *tg = css_tg(css);
return (u64) scale_load_down(tg->shares);
}
@@ -7361,26 +7358,28 @@ long tg_get_cfs_period(struct task_group *tg)
return cfs_period_us;
}
-static s64 cpu_cfs_quota_read_s64(struct cgroup *cgrp, struct cftype *cft)
+static s64 cpu_cfs_quota_read_s64(struct cgroup_subsys_state *css,
+ struct cftype *cft)
{
- return tg_get_cfs_quota(cgroup_tg(cgrp));
+ return tg_get_cfs_quota(css_tg(css));
}
-static int cpu_cfs_quota_write_s64(struct cgroup *cgrp, struct cftype *cftype,
- s64 cfs_quota_us)
+static int cpu_cfs_quota_write_s64(struct cgroup_subsys_state *css,
+ struct cftype *cftype, s64 cfs_quota_us)
{
- return tg_set_cfs_quota(cgroup_tg(cgrp), cfs_quota_us);
+ return tg_set_cfs_quota(css_tg(css), cfs_quota_us);
}
-static u64 cpu_cfs_period_read_u64(struct cgroup *cgrp, struct cftype *cft)
+static u64 cpu_cfs_period_read_u64(struct cgroup_subsys_state *css,
+ struct cftype *cft)
{
- return tg_get_cfs_period(cgroup_tg(cgrp));
+ return tg_get_cfs_period(css_tg(css));
}
-static int cpu_cfs_period_write_u64(struct cgroup *cgrp, struct cftype *cftype,
- u64 cfs_period_us)
+static int cpu_cfs_period_write_u64(struct cgroup_subsys_state *css,
+ struct cftype *cftype, u64 cfs_period_us)
{
- return tg_set_cfs_period(cgroup_tg(cgrp), cfs_period_us);
+ return tg_set_cfs_period(css_tg(css), cfs_period_us);
}
struct cfs_schedulable_data {
@@ -7461,10 +7460,10 @@ static int __cfs_schedulable(struct task_group *tg, u64 period, u64 quota)
return ret;
}
-static int cpu_stats_show(struct cgroup *cgrp, struct cftype *cft,
+static int cpu_stats_show(struct cgroup_subsys_state *css, struct cftype *cft,
struct cgroup_map_cb *cb)
{
- struct task_group *tg = cgroup_tg(cgrp);
+ struct task_group *tg = css_tg(css);
struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
cb->fill(cb, "nr_periods", cfs_b->nr_periods);
@@ -7477,26 +7476,28 @@ static int cpu_stats_show(struct cgroup *cgrp, struct cftype *cft,
#endif /* CONFIG_FAIR_GROUP_SCHED */
#ifdef CONFIG_RT_GROUP_SCHED
-static int cpu_rt_runtime_write(struct cgroup *cgrp, struct cftype *cft,
- s64 val)
+static int cpu_rt_runtime_write(struct cgroup_subsys_state *css,
+ struct cftype *cft, s64 val)
{
- return sched_group_set_rt_runtime(cgroup_tg(cgrp), val);
+ return sched_group_set_rt_runtime(css_tg(css), val);
}
-static s64 cpu_rt_runtime_read(struct cgroup *cgrp, struct cftype *cft)
+static s64 cpu_rt_runtime_read(struct cgroup_subsys_state *css,
+ struct cftype *cft)
{
- return sched_group_rt_runtime(cgroup_tg(cgrp));
+ return sched_group_rt_runtime(css_tg(css));
}
-static int cpu_rt_period_write_uint(struct cgroup *cgrp, struct cftype *cftype,
- u64 rt_period_us)
+static int cpu_rt_period_write_uint(struct cgroup_subsys_state *css,
+ struct cftype *cftype, u64 rt_period_us)
{
- return sched_group_set_rt_period(cgroup_tg(cgrp), rt_period_us);
+ return sched_group_set_rt_period(css_tg(css), rt_period_us);
}
-static u64 cpu_rt_period_read_uint(struct cgroup *cgrp, struct cftype *cft)
+static u64 cpu_rt_period_read_uint(struct cgroup_subsys_state *css,
+ struct cftype *cft)
{
- return sched_group_rt_period(cgroup_tg(cgrp));
+ return sched_group_rt_period(css_tg(css));
}
#endif /* CONFIG_RT_GROUP_SCHED */
diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c
index dbb7e2cd95e..f64722ff029 100644
--- a/kernel/sched/cpuacct.c
+++ b/kernel/sched/cpuacct.c
@@ -33,30 +33,20 @@ struct cpuacct {
struct kernel_cpustat __percpu *cpustat;
};
-/* return cpu accounting group corresponding to this container */
-static inline struct cpuacct *cgroup_ca(struct cgroup *cgrp)
+static inline struct cpuacct *css_ca(struct cgroup_subsys_state *css)
{
- return container_of(cgroup_subsys_state(cgrp, cpuacct_subsys_id),
- struct cpuacct, css);
+ return css ? container_of(css, struct cpuacct, css) : NULL;
}
/* return cpu accounting group to which this task belongs */
static inline struct cpuacct *task_ca(struct task_struct *tsk)
{
- return container_of(task_subsys_state(tsk, cpuacct_subsys_id),
- struct cpuacct, css);
-}
-
-static inline struct cpuacct *__parent_ca(struct cpuacct *ca)
-{
- return cgroup_ca(ca->css.cgroup->parent);
+ return css_ca(task_css(tsk, cpuacct_subsys_id));
}
static inline struct cpuacct *parent_ca(struct cpuacct *ca)
{
- if (!ca->css.cgroup->parent)
- return NULL;
- return cgroup_ca(ca->css.cgroup->parent);
+ return css_ca(css_parent(&ca->css));
}
static DEFINE_PER_CPU(u64, root_cpuacct_cpuusage);
@@ -66,11 +56,12 @@ static struct cpuacct root_cpuacct = {
};
/* create a new cpu accounting group */
-static struct cgroup_subsys_state *cpuacct_css_alloc(struct cgroup *cgrp)
+static struct cgroup_subsys_state *
+cpuacct_css_alloc(struct cgroup_subsys_state *parent_css)
{
struct cpuacct *ca;
- if (!cgrp->parent)
+ if (!parent_css)
return &root_cpuacct.css;
ca = kzalloc(sizeof(*ca), GFP_KERNEL);
@@ -96,9 +87,9 @@ out:
}
/* destroy an existing cpu accounting group */
-static void cpuacct_css_free(struct cgroup *cgrp)
+static void cpuacct_css_free(struct cgroup_subsys_state *css)
{
- struct cpuacct *ca = cgroup_ca(cgrp);
+ struct cpuacct *ca = css_ca(css);
free_percpu(ca->cpustat);
free_percpu(ca->cpuusage);
@@ -141,9 +132,9 @@ static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
}
/* return total cpu usage (in nanoseconds) of a group */
-static u64 cpuusage_read(struct cgroup *cgrp, struct cftype *cft)
+static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft)
{
- struct cpuacct *ca = cgroup_ca(cgrp);
+ struct cpuacct *ca = css_ca(css);
u64 totalcpuusage = 0;
int i;
@@ -153,10 +144,10 @@ static u64 cpuusage_read(struct cgroup *cgrp, struct cftype *cft)
return totalcpuusage;
}
-static int cpuusage_write(struct cgroup *cgrp, struct cftype *cftype,
- u64 reset)
+static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft,
+ u64 reset)
{
- struct cpuacct *ca = cgroup_ca(cgrp);
+ struct cpuacct *ca = css_ca(css);
int err = 0;
int i;
@@ -172,10 +163,10 @@ out:
return err;
}
-static int cpuacct_percpu_seq_read(struct cgroup *cgroup, struct cftype *cft,
- struct seq_file *m)
+static int cpuacct_percpu_seq_read(struct cgroup_subsys_state *css,
+ struct cftype *cft, struct seq_file *m)
{
- struct cpuacct *ca = cgroup_ca(cgroup);
+ struct cpuacct *ca = css_ca(css);
u64 percpu;
int i;
@@ -192,10 +183,10 @@ static const char * const cpuacct_stat_desc[] = {
[CPUACCT_STAT_SYSTEM] = "system",
};
-static int cpuacct_stats_show(struct cgroup *cgrp, struct cftype *cft,
- struct cgroup_map_cb *cb)
+static int cpuacct_stats_show(struct cgroup_subsys_state *css,
+ struct cftype *cft, struct cgroup_map_cb *cb)
{
- struct cpuacct *ca = cgroup_ca(cgrp);
+ struct cpuacct *ca = css_ca(css);
int cpu;
s64 val = 0;
@@ -281,7 +272,7 @@ void cpuacct_account_field(struct task_struct *p, int index, u64 val)
while (ca != &root_cpuacct) {
kcpustat = this_cpu_ptr(ca->cpustat);
kcpustat->cpustat[index] += val;
- ca = __parent_ca(ca);
+ ca = parent_ca(ca);
}
rcu_read_unlock();
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index ef0a7b2439d..471a56db05e 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -665,9 +665,9 @@ extern int group_balance_cpu(struct sched_group *sg);
/*
* Return the group to which this tasks belongs.
*
- * We cannot use task_subsys_state() and friends because the cgroup
- * subsystem changes that value before the cgroup_subsys::attach() method
- * is called, therefore we cannot pin it and might observe the wrong value.
+ * We cannot use task_css() and friends because the cgroup subsystem
+ * changes that value before the cgroup_subsys::attach() method is called,
+ * therefore we cannot pin it and might observe the wrong value.
*
* The same is true for autogroup's p->signal->autogroup->tg, the autogroup
* core changes this before calling sched_move_task().
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index 9cea7de22ff..bda8e44f6fd 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -36,21 +36,13 @@ static struct hugetlb_cgroup *root_h_cgroup __read_mostly;
static inline
struct hugetlb_cgroup *hugetlb_cgroup_from_css(struct cgroup_subsys_state *s)
{
- return container_of(s, struct hugetlb_cgroup, css);
-}
-
-static inline
-struct hugetlb_cgroup *hugetlb_cgroup_from_cgroup(struct cgroup *cgroup)
-{
- return hugetlb_cgroup_from_css(cgroup_subsys_state(cgroup,
- hugetlb_subsys_id));
+ return s ? container_of(s, struct hugetlb_cgroup, css) : NULL;
}
static inline
struct hugetlb_cgroup *hugetlb_cgroup_from_task(struct task_struct *task)
{
- return hugetlb_cgroup_from_css(task_subsys_state(task,
- hugetlb_subsys_id));
+ return hugetlb_cgroup_from_css(task_css(task, hugetlb_subsys_id));
}
static inline bool hugetlb_cgroup_is_root(struct hugetlb_cgroup *h_cg)
@@ -58,17 +50,15 @@ static inline bool hugetlb_cgroup_is_root(struct hugetlb_cgroup *h_cg)
return (h_cg == root_h_cgroup);
}
-static inline struct hugetlb_cgroup *parent_hugetlb_cgroup(struct cgroup *cg)
+static inline struct hugetlb_cgroup *
+parent_hugetlb_cgroup(struct hugetlb_cgroup *h_cg)
{
- if (!cg->parent)
- return NULL;
- return hugetlb_cgroup_from_cgroup(cg->parent);
+ return hugetlb_cgroup_from_css(css_parent(&h_cg->css));
}
-static inline bool hugetlb_cgroup_have_usage(struct cgroup *cg)
+static inline bool hugetlb_cgroup_have_usage(struct hugetlb_cgroup *h_cg)
{
int idx;
- struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_cgroup(cg);
for (idx = 0; idx < hugetlb_max_hstate; idx++) {
if ((res_counter_read_u64(&h_cg->hugepage[idx], RES_USAGE)) > 0)
@@ -77,19 +67,18 @@ static inline bool hugetlb_cgroup_have_usage(struct cgroup *cg)
return false;
}
-static struct cgroup_subsys_state *hugetlb_cgroup_css_alloc(struct cgroup *cgroup)
+static struct cgroup_subsys_state *
+hugetlb_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
{
+ struct hugetlb_cgroup *parent_h_cgroup = hugetlb_cgroup_from_css(parent_css);
+ struct hugetlb_cgroup *h_cgroup;
int idx;
- struct cgroup *parent_cgroup;
- struct hugetlb_cgroup *h_cgroup, *parent_h_cgroup;
h_cgroup = kzalloc(sizeof(*h_cgroup), GFP_KERNEL);
if (!h_cgroup)
return ERR_PTR(-ENOMEM);
- parent_cgroup = cgroup->parent;
- if (parent_cgroup) {
- parent_h_cgroup = hugetlb_cgroup_from_cgroup(parent_cgroup);
+ if (parent_h_cgroup) {
for (idx = 0; idx < HUGE_MAX_HSTATE; idx++)
res_counter_init(&h_cgroup->hugepage[idx],
&parent_h_cgroup->hugepage[idx]);
@@ -101,11 +90,11 @@ static struct cgroup_subsys_state *hugetlb_cgroup_css_alloc(struct cgroup *cgrou
return &h_cgroup->css;
}
-static void hugetlb_cgroup_css_free(struct cgroup *cgroup)
+static void hugetlb_cgroup_css_free(struct cgroup_subsys_state *css)
{
struct hugetlb_cgroup *h_cgroup;
- h_cgroup = hugetlb_cgroup_from_cgroup(cgroup);
+ h_cgroup = hugetlb_cgroup_from_css(css);
kfree(h_cgroup);
}
@@ -117,15 +106,14 @@ static void hugetlb_cgroup_css_free(struct cgroup *cgroup)
* page reference and test for page active here. This function
* cannot fail.
*/
-static void hugetlb_cgroup_move_parent(int idx, struct cgroup *cgroup,
+static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg,
struct page *page)
{
int csize;
struct res_counter *counter;
struct res_counter *fail_res;
struct hugetlb_cgroup *page_hcg;
- struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_cgroup(cgroup);
- struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(cgroup);
+ struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(h_cg);
page_hcg = hugetlb_cgroup_from_page(page);
/*
@@ -155,8 +143,9 @@ out:
* Force the hugetlb cgroup to empty the hugetlb resources by moving them to
* the parent cgroup.
*/
-static void hugetlb_cgroup_css_offline(struct cgroup *cgroup)
+static void hugetlb_cgroup_css_offline(struct cgroup_subsys_state *css)
{
+ struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css);
struct hstate *h;
struct page *page;
int idx = 0;
@@ -165,13 +154,13 @@ static void hugetlb_cgroup_css_offline(struct cgroup *cgroup)
for_each_hstate(h) {
spin_lock(&hugetlb_lock);
list_for_each_entry(page, &h->hugepage_activelist, lru)
- hugetlb_cgroup_move_parent(idx, cgroup, page);
+ hugetlb_cgroup_move_parent(idx, h_cg, page);
spin_unlock(&hugetlb_lock);
idx++;
}
cond_resched();
- } while (hugetlb_cgroup_have_usage(cgroup));
+ } while (hugetlb_cgroup_have_usage(h_cg));
}
int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
@@ -253,14 +242,15 @@ void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
return;
}
-static ssize_t hugetlb_cgroup_read(struct cgroup *cgroup, struct cftype *cft,
- struct file *file, char __user *buf,
- size_t nbytes, loff_t *ppos)
+static ssize_t hugetlb_cgroup_read(struct cgroup_subsys_state *css,
+ struct cftype *cft, struct file *file,
+ char __user *buf, size_t nbytes,
+ loff_t *ppos)
{
u64 val;
char str[64];
int idx, name, len;
- struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_cgroup(cgroup);
+ struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css);
idx = MEMFILE_IDX(cft->private);
name = MEMFILE_ATTR(cft->private);
@@ -270,12 +260,12 @@ static ssize_t hugetlb_cgroup_read(struct cgroup *cgroup, struct cftype *cft,
return simple_read_from_buffer(buf, nbytes, ppos, str, len);
}
-static int hugetlb_cgroup_write(struct cgroup *cgroup, struct cftype *cft,
- const char *buffer)
+static int hugetlb_cgroup_write(struct cgroup_subsys_state *css,
+ struct cftype *cft, const char *buffer)
{
int idx, name, ret;
unsigned long long val;
- struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_cgroup(cgroup);
+ struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css);
idx = MEMFILE_IDX(cft->private);
name = MEMFILE_ATTR(cft->private);
@@ -300,10 +290,11 @@ static int hugetlb_cgroup_write(struct cgroup *cgroup, struct cftype *cft,
return ret;
}
-static int hugetlb_cgroup_reset(struct cgroup *cgroup, unsigned int event)
+static int hugetlb_cgroup_reset(struct cgroup_subsys_state *css,
+ unsigned int event)
{
int idx, name, ret = 0;
- struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_cgroup(cgroup);
+ struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css);
idx = MEMFILE_IDX(event);
name = MEMFILE_ATTR(event);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 0878ff7c26a..3b83957b643 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -483,10 +483,9 @@ enum res_type {
*/
static DEFINE_MUTEX(memcg_create_mutex);
-static inline
struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *s)
{
- return container_of(s, struct mem_cgroup, css);
+ return s ? container_of(s, struct mem_cgroup, css) : NULL;
}
/* Some nice accessors for the vmpressure. */
@@ -1035,12 +1034,6 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
preempt_enable();
}
-struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont)
-{
- return mem_cgroup_from_css(
- cgroup_subsys_state(cont, mem_cgroup_subsys_id));
-}
-
struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
{
/*
@@ -1051,7 +1044,7 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
if (unlikely(!p))
return NULL;
- return mem_cgroup_from_css(task_subsys_state(p, mem_cgroup_subsys_id));
+ return mem_cgroup_from_css(task_css(p, mem_cgroup_subsys_id));
}
struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
@@ -1084,20 +1077,11 @@ struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
static struct mem_cgroup *__mem_cgroup_iter_next(struct mem_cgroup *root,
struct mem_cgroup *last_visited)
{
- struct cgroup *prev_cgroup, *next_cgroup;
+ struct cgroup_subsys_state *prev_css, *next_css;
- /*
- * Root is not visited by cgroup iterators so it needs an
- * explicit visit.
- */
- if (!last_visited)
- return root;
-
- prev_cgroup = (last_visited == root) ? NULL
- : last_visited->css.cgroup;
+ prev_css = last_visited ? &last_visited->css : NULL;
skip_node:
- next_cgroup = cgroup_next_descendant_pre(
- prev_cgroup, root->css.cgroup);
+ next_css = css_next_descendant_pre(prev_css, &root->css);
/*
* Even if we found a group we have to make sure it is
@@ -1106,13 +1090,13 @@ skip_node:
* last_visited css is safe to use because it is
* protected by css_get and the tree walk is rcu safe.
*/
- if (next_cgroup) {
- struct mem_cgroup *mem = mem_cgroup_from_cont(
- next_cgroup);
+ if (next_css) {
+ struct mem_cgroup *mem = mem_cgroup_from_css(next_css);
+
if (css_tryget(&mem->css))
return mem;
else {
- prev_cgroup = next_cgroup;
+ prev_css = next_css;
goto skip_node;
}
}
@@ -1525,10 +1509,8 @@ static unsigned long mem_cgroup_margin(struct mem_cgroup *memcg)
int mem_cgroup_swappiness(struct mem_cgroup *memcg)
{
- struct cgroup *cgrp = memcg->css.cgroup;
-
/* root ? */
- if (cgrp->parent == NULL)
+ if (!css_parent(&memcg->css))
return vm_swappiness;
return memcg->swappiness;
@@ -1805,12 +1787,11 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, order, NULL);
totalpages = mem_cgroup_get_limit(memcg) >> PAGE_SHIFT ? : 1;
for_each_mem_cgroup_tree(iter, memcg) {
- struct cgroup *cgroup = iter->css.cgroup;
- struct cgroup_iter it;
+ struct css_task_iter it;
struct task_struct *task;
- cgroup_iter_start(cgroup, &it);
- while ((task = cgroup_iter_next(cgroup, &it))) {
+ css_task_iter_start(&iter->css, &it);
+ while ((task = css_task_iter_next(&it))) {
switch (oom_scan_process_thread(task, totalpages, NULL,
false)) {
case OOM_SCAN_SELECT:
@@ -1823,7 +1804,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
case OOM_SCAN_CONTINUE:
continue;
case OOM_SCAN_ABORT:
- cgroup_iter_end(cgroup, &it);
+ css_task_iter_end(&it);
mem_cgroup_iter_break(memcg, iter);
if (chosen)
put_task_struct(chosen);
@@ -1840,7 +1821,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
get_task_struct(chosen);
}
}
- cgroup_iter_end(cgroup, &it);
+ css_task_iter_end(&it);
}
if (!chosen)
@@ -2954,10 +2935,10 @@ static struct kmem_cache *memcg_params_to_cache(struct memcg_cache_params *p)
}
#ifdef CONFIG_SLABINFO
-static int mem_cgroup_slabinfo_read(struct cgroup *cont, struct cftype *cft,
- struct seq_file *m)
+static int mem_cgroup_slabinfo_read(struct cgroup_subsys_state *css,
+ struct cftype *cft, struct seq_file *m)
{
- struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
+ struct mem_cgroup *memcg = mem_cgroup_from_css(css);
struct memcg_cache_params *params;
if (!memcg_can_account_kmem(memcg))
@@ -4943,10 +4924,10 @@ static void mem_cgroup_reparent_charges(struct mem_cgroup *memcg)
*/
static inline bool __memcg_has_children(struct mem_cgroup *memcg)
{
- struct cgroup *pos;
+ struct cgroup_subsys_state *pos;
/* bounce at first found */
- cgroup_for_each_child(pos, memcg->css.cgroup)
+ css_for_each_child(pos, &memcg->css)
return true;
return false;
}
@@ -5002,9 +4983,10 @@ static int mem_cgroup_force_empty(struct mem_cgroup *memcg)
return 0;
}
-static int mem_cgroup_force_empty_write(struct cgroup *cont, unsigned int event)
+static int mem_cgroup_force_empty_write(struct cgroup_subsys_state *css,
+ unsigned int event)
{
- struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
+ struct mem_cgroup *memcg = mem_cgroup_from_css(css);
int ret;
if (mem_cgroup_is_root(memcg))
@@ -5017,21 +4999,18 @@ static int mem_cgroup_force_empty_write(struct cgroup *cont, unsigned int event)
}
-static u64 mem_cgroup_hierarchy_read(struct cgroup *cont, struct cftype *cft)
+static u64 mem_cgroup_hierarchy_read(struct cgroup_subsys_state *css,
+ struct cftype *cft)
{
- return mem_cgroup_from_cont(cont)->use_hierarchy;
+ return mem_cgroup_from_css(css)->use_hierarchy;
}
-static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft,
- u64 val)
+static int mem_cgroup_hierarchy_write(struct cgroup_subsys_state *css,
+ struct cftype *cft, u64 val)
{
int retval = 0;
- struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
- struct cgroup *parent = cont->parent;
- struct mem_cgroup *parent_memcg = NULL;
-
- if (parent)
- parent_memcg = mem_cgroup_from_cont(parent);
+ struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+ struct mem_cgroup *parent_memcg = mem_cgroup_from_css(css_parent(&memcg->css));
mutex_lock(&memcg_create_mutex);
@@ -5101,11 +5080,11 @@ static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
return val << PAGE_SHIFT;
}
-static ssize_t mem_cgroup_read(struct cgroup *cont, struct cftype *cft,
- struct file *file, char __user *buf,
- size_t nbytes, loff_t *ppos)
+static ssize_t mem_cgroup_read(struct cgroup_subsys_state *css,
+ struct cftype *cft, struct file *file,
+ char __user *buf, size_t nbytes, loff_t *ppos)
{
- struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
+ struct mem_cgroup *memcg = mem_cgroup_from_css(css);
char str[64];
u64 val;
int name, len;
@@ -5138,11 +5117,11 @@ static ssize_t mem_cgroup_read(struct cgroup *cont, struct cftype *cft,
return simple_read_from_buffer(buf, nbytes, ppos, str, len);
}
-static int memcg_update_kmem_limit(struct cgroup *cont, u64 val)
+static int memcg_update_kmem_limit(struct cgroup_subsys_state *css, u64 val)
{
int ret = -EINVAL;
#ifdef CONFIG_MEMCG_KMEM
- struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
+ struct mem_cgroup *memcg = mem_cgroup_from_css(css);
/*
* For simplicity, we won't allow this to be disabled. It also can't
* be changed if the cgroup has children already, or if tasks had
@@ -5158,7 +5137,7 @@ static int memcg_update_kmem_limit(struct cgroup *cont, u64 val)
mutex_lock(&memcg_create_mutex);
mutex_lock(&set_limit_mutex);
if (!memcg->kmem_account_flags && val != RESOURCE_MAX) {
- if (cgroup_task_count(cont) || memcg_has_children(memcg)) {
+ if (cgroup_task_count(css->cgroup) || memcg_has_children(memcg)) {
ret = -EBUSY;
goto out;
}
@@ -5228,10 +5207,10 @@ out:
* The user of this function is...
* RES_LIMIT.
*/
-static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
+static int mem_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft,
const char *buffer)
{
- struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
+ struct mem_cgroup *memcg = mem_cgroup_from_css(css);
enum res_type type;
int name;
unsigned long long val;
@@ -5255,7 +5234,7 @@ static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
else if (type == _MEMSWAP)
ret = mem_cgroup_resize_memsw_limit(memcg, val);
else if (type == _KMEM)
- ret = memcg_update_kmem_limit(cont, val);
+ ret = memcg_update_kmem_limit(css, val);
else
return -EINVAL;
break;
@@ -5283,18 +5262,15 @@ static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
static void memcg_get_hierarchical_limit(struct mem_cgroup *memcg,
unsigned long long *mem_limit, unsigned long long *memsw_limit)
{
- struct cgroup *cgroup;
unsigned long long min_limit, min_memsw_limit, tmp;
min_limit = res_counter_read_u64(&memcg->res, RES_LIMIT);
min_memsw_limit = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
- cgroup = memcg->css.cgroup;
if (!memcg->use_hierarchy)
goto out;
- while (cgroup->parent) {
- cgroup = cgroup->parent;
- memcg = mem_cgroup_from_cont(cgroup);
+ while (css_parent(&memcg->css)) {
+ memcg = mem_cgroup_from_css(css_parent(&memcg->css));
if (!memcg->use_hierarchy)
break;
tmp = res_counter_read_u64(&memcg->res, RES_LIMIT);
@@ -5307,9 +5283,9 @@ out:
*memsw_limit = min_memsw_limit;
}
-static int mem_cgroup_reset(struct cgroup *cont, unsigned int event)
+static int mem_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event)
{
- struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
+ struct mem_cgroup *memcg = mem_cgroup_from_css(css);
int name;
enum res_type type;
@@ -5342,17 +5318,17 @@ static int mem_cgroup_reset(struct cgroup *cont, unsigned int event)
return 0;
}
-static u64 mem_cgroup_move_charge_read(struct cgroup *cgrp,
+static u64 mem_cgroup_move_charge_read(struct cgroup_subsys_state *css,
struct cftype *cft)
{
- return mem_cgroup_from_cont(cgrp)->move_charge_at_immigrate;
+ return mem_cgroup_from_css(css)->move_charge_at_immigrate;
}
#ifdef CONFIG_MMU
-static int mem_cgroup_move_charge_write(struct cgroup *cgrp,
+static int mem_cgroup_move_charge_write(struct cgroup_subsys_state *css,
struct cftype *cft, u64 val)
{
- struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
+ struct mem_cgroup *memcg = mem_cgroup_from_css(css);
if (val >= (1 << NR_MOVE_TYPE))
return -EINVAL;
@@ -5367,7 +5343,7 @@ static int mem_cgroup_move_charge_write(struct cgroup *cgrp,
return 0;
}
#else
-static int mem_cgroup_move_charge_write(struct cgroup *cgrp,
+static int mem_cgroup_move_charge_write(struct cgroup_subsys_state *css,
struct cftype *cft, u64 val)
{
return -ENOSYS;
@@ -5375,13 +5351,13 @@ static int mem_cgroup_move_charge_write(struct cgroup *cgrp,
#endif
#ifdef CONFIG_NUMA
-static int memcg_numa_stat_show(struct cgroup *cont, struct cftype *cft,
- struct seq_file *m)
+static int memcg_numa_stat_show(struct cgroup_subsys_state *css,
+ struct cftype *cft, struct seq_file *m)
{
int nid;
unsigned long total_nr, file_nr, anon_nr, unevictable_nr;
unsigned long node_nr;
- struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
+ struct mem_cgroup *memcg = mem_cgroup_from_css(css);
total_nr = mem_cgroup_nr_lru_pages(memcg, LRU_ALL);
seq_printf(m, "total=%lu", total_nr);
@@ -5426,10 +5402,10 @@ static inline void mem_cgroup_lru_names_not_uptodate(void)
BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS);
}
-static int memcg_stat_show(struct cgroup *cont, struct cftype *cft,
+static int memcg_stat_show(struct cgroup_subsys_state *css, struct cftype *cft,
struct seq_file *m)
{
- struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
+ struct mem_cgroup *memcg = mem_cgroup_from_css(css);
struct mem_cgroup *mi;
unsigned int i;
@@ -5513,27 +5489,23 @@ static int memcg_stat_show(struct cgroup *cont, struct cftype *cft,
return 0;
}
-static u64 mem_cgroup_swappiness_read(struct cgroup *cgrp, struct cftype *cft)
+static u64 mem_cgroup_swappiness_read(struct cgroup_subsys_state *css,
+ struct cftype *cft)
{
- struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
+ struct mem_cgroup *memcg = mem_cgroup_from_css(css);
return mem_cgroup_swappiness(memcg);
}
-static int mem_cgroup_swappiness_write(struct cgroup *cgrp, struct cftype *cft,
- u64 val)
+static int mem_cgroup_swappiness_write(struct cgroup_subsys_state *css,
+ struct cftype *cft, u64 val)
{
- struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
- struct mem_cgroup *parent;
-
- if (val > 100)
- return -EINVAL;
+ struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+ struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(&memcg->css));
- if (cgrp->parent == NULL)
+ if (val > 100 || !parent)
return -EINVAL;
- parent = mem_cgroup_from_cont(cgrp->parent);
-
mutex_lock(&memcg_create_mutex);
/* If under hierarchy, only empty-root can set this value */
@@ -5636,10 +5608,10 @@ static void mem_cgroup_oom_notify(struct mem_cgroup *memcg)
mem_cgroup_oom_notify_cb(iter);
}
-static int mem_cgroup_usage_register_event(struct cgroup *cgrp,
+static int mem_cgroup_usage_register_event(struct cgroup_subsys_state *css,
struct cftype *cft, struct eventfd_ctx *eventfd, const char *args)
{
- struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
+ struct mem_cgroup *memcg = mem_cgroup_from_css(css);
struct mem_cgroup_thresholds *thresholds;
struct mem_cgroup_threshold_ary *new;
enum res_type type = MEMFILE_TYPE(cft->private);
@@ -5719,10 +5691,10 @@ unlock:
return ret;
}
-static void mem_cgroup_usage_unregister_event(struct cgroup *cgrp,
+static void mem_cgroup_usage_unregister_event(struct cgroup_subsys_state *css,
struct cftype *cft, struct eventfd_ctx *eventfd)
{
- struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
+ struct mem_cgroup *memcg = mem_cgroup_from_css(css);
struct mem_cgroup_thresholds *thresholds;
struct mem_cgroup_threshold_ary *new;
enum res_type type = MEMFILE_TYPE(cft->private);
@@ -5798,10 +5770,10 @@ unlock:
mutex_unlock(&memcg->thresholds_lock);
}
-static int mem_cgroup_oom_register_event(struct cgroup *cgrp,
+static int mem_cgroup_oom_register_event(struct cgroup_subsys_state *css,
struct cftype *cft, struct eventfd_ctx *eventfd, const char *args)
{
- struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
+ struct mem_cgroup *memcg = mem_cgroup_from_css(css);
struct mem_cgroup_eventfd_list *event;
enum res_type type = MEMFILE_TYPE(cft->private);
@@ -5823,10 +5795,10 @@ static int mem_cgroup_oom_register_event(struct cgroup *cgrp,
return 0;
}
-static void mem_cgroup_oom_unregister_event(struct cgroup *cgrp,
+static void mem_cgroup_oom_unregister_event(struct cgroup_subsys_state *css,
struct cftype *cft, struct eventfd_ctx *eventfd)
{
- struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
+ struct mem_cgroup *memcg = mem_cgroup_from_css(css);
struct mem_cgroup_eventfd_list *ev, *tmp;
enum res_type type = MEMFILE_TYPE(cft->private);
@@ -5844,10 +5816,10 @@ static void mem_cgroup_oom_unregister_event(struct cgroup *cgrp,
spin_unlock(&memcg_oom_lock);
}
-static int mem_cgroup_oom_control_read(struct cgroup *cgrp,
+static int mem_cgroup_oom_control_read(struct cgroup_subsys_state *css,
struct cftype *cft, struct cgroup_map_cb *cb)
{
- struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
+ struct mem_cgroup *memcg = mem_cgroup_from_css(css);
cb->fill(cb, "oom_kill_disable", memcg->oom_kill_disable);
@@ -5858,18 +5830,16 @@ static int mem_cgroup_oom_control_read(struct cgroup *cgrp,
return 0;
}
-static int mem_cgroup_oom_control_write(struct cgroup *cgrp,
+static int mem_cgroup_oom_control_write(struct cgroup_subsys_state *css,
struct cftype *cft, u64 val)
{
- struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
- struct mem_cgroup *parent;
+ struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+ struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(&memcg->css));
/* cannot set to root cgroup and only 0 and 1 are allowed */
- if (!cgrp->parent || !((val == 0) || (val == 1)))
+ if (!parent || !((val == 0) || (val == 1)))
return -EINVAL;
- parent = mem_cgroup_from_cont(cgrp->parent);
-
mutex_lock(&memcg_create_mutex);
/* oom-kill-disable is a flag for subhierarchy. */
if ((parent->use_hierarchy) || memcg_has_children(memcg)) {
@@ -6228,7 +6198,7 @@ static void __init mem_cgroup_soft_limit_tree_init(void)
}
static struct cgroup_subsys_state * __ref
-mem_cgroup_css_alloc(struct cgroup *cont)
+mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
{
struct mem_cgroup *memcg;
long error = -ENOMEM;
@@ -6243,7 +6213,7 @@ mem_cgroup_css_alloc(struct cgroup *cont)
goto free_out;
/* root ? */
- if (cont->parent == NULL) {
+ if (parent_css == NULL) {
root_mem_cgroup = memcg;
res_counter_init(&memcg->res, NULL);
res_counter_init(&memcg->memsw, NULL);
@@ -6265,17 +6235,16 @@ free_out:
}
static int
-mem_cgroup_css_online(struct cgroup *cont)
+mem_cgroup_css_online(struct cgroup_subsys_state *css)
{
- struct mem_cgroup *memcg, *parent;
+ struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+ struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(css));
int error = 0;
- if (!cont->parent)
+ if (!parent)
return 0;
mutex_lock(&memcg_create_mutex);
- memcg = mem_cgroup_from_cont(cont);
- parent = mem_cgroup_from_cont(cont->parent);
memcg->use_hierarchy = parent->use_hierarchy;
memcg->oom_kill_disable = parent->oom_kill_disable;
@@ -6326,9 +6295,9 @@ static void mem_cgroup_invalidate_reclaim_iterators(struct mem_cgroup *memcg)
mem_cgroup_iter_invalidate(root_mem_cgroup);
}
-static void mem_cgroup_css_offline(struct cgroup *cont)
+static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
{
- struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
+ struct mem_cgroup *memcg = mem_cgroup_from_css(css);
kmem_cgroup_css_offline(memcg);
@@ -6338,9 +6307,9 @@ static void mem_cgroup_css_offline(struct cgroup *cont)
vmpressure_cleanup(&memcg->vmpressure);
}
-static void mem_cgroup_css_free(struct cgroup *cont)
+static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
{
- struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
+ struct mem_cgroup *memcg = mem_cgroup_from_css(css);
memcg_destroy_kmem(memcg);
__mem_cgroup_free(memcg);
@@ -6710,12 +6679,12 @@ static void mem_cgroup_clear_mc(void)
mem_cgroup_end_move(from);
}
-static int mem_cgroup_can_attach(struct cgroup *cgroup,
+static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset)
{
struct task_struct *p = cgroup_taskset_first(tset);
int ret = 0;
- struct mem_cgroup *memcg = mem_cgroup_from_cont(cgroup);
+ struct mem_cgroup *memcg = mem_cgroup_from_css(css);
unsigned long move_charge_at_immigrate;
/*
@@ -6757,7 +6726,7 @@ static int mem_cgroup_can_attach(struct cgroup *cgroup,
return ret;
}
-static void mem_cgroup_cancel_attach(struct cgroup *cgroup,
+static void mem_cgroup_cancel_attach(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset)
{
mem_cgroup_clear_mc();
@@ -6905,7 +6874,7 @@ retry:
up_read(&mm->mmap_sem);
}
-static void mem_cgroup_move_task(struct cgroup *cont,
+static void mem_cgroup_move_task(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset)
{
struct task_struct *p = cgroup_taskset_first(tset);
@@ -6920,16 +6889,16 @@ static void mem_cgroup_move_task(struct cgroup *cont,
mem_cgroup_clear_mc();
}
#else /* !CONFIG_MMU */
-static int mem_cgroup_can_attach(struct cgroup *cgroup,
+static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset)
{
return 0;
}
-static void mem_cgroup_cancel_attach(struct cgroup *cgroup,
+static void mem_cgroup_cancel_attach(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset)
{
}
-static void mem_cgroup_move_task(struct cgroup *cont,
+static void mem_cgroup_move_task(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset)
{
}
@@ -6939,15 +6908,15 @@ static void mem_cgroup_move_task(struct cgroup *cont,
* Cgroup retains root cgroups across [un]mount cycles making it necessary
* to verify sane_behavior flag on each mount attempt.
*/
-static void mem_cgroup_bind(struct cgroup *root)
+static void mem_cgroup_bind(struct cgroup_subsys_state *root_css)
{
/*
* use_hierarchy is forced with sane_behavior. cgroup core
* guarantees that @root doesn't have any children, so turning it
* on for the root memcg is enough.
*/
- if (cgroup_sane_behavior(root))
- mem_cgroup_from_cont(root)->use_hierarchy = true;
+ if (cgroup_sane_behavior(root_css->cgroup))
+ mem_cgroup_from_css(root_css)->use_hierarchy = true;
}
struct cgroup_subsys mem_cgroup_subsys = {
diff --git a/mm/vmpressure.c b/mm/vmpressure.c
index 0c1e37d829f..e0f62837c3f 100644
--- a/mm/vmpressure.c
+++ b/mm/vmpressure.c
@@ -74,15 +74,10 @@ static struct vmpressure *work_to_vmpressure(struct work_struct *work)
return container_of(work, struct vmpressure, work);
}
-static struct vmpressure *cg_to_vmpressure(struct cgroup *cg)
-{
- return css_to_vmpressure(cgroup_subsys_state(cg, mem_cgroup_subsys_id));
-}
-
static struct vmpressure *vmpressure_parent(struct vmpressure *vmpr)
{
- struct cgroup *cg = vmpressure_to_css(vmpr)->cgroup;
- struct mem_cgroup *memcg = mem_cgroup_from_cont(cg);
+ struct cgroup_subsys_state *css = vmpressure_to_css(vmpr);
+ struct mem_cgroup *memcg = mem_cgroup_from_css(css);
memcg = parent_mem_cgroup(memcg);
if (!memcg)
@@ -283,7 +278,7 @@ void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio)
/**
* vmpressure_register_event() - Bind vmpressure notifications to an eventfd
- * @cg: cgroup that is interested in vmpressure notifications
+ * @css: css that is interested in vmpressure notifications
* @cft: cgroup control files handle
* @eventfd: eventfd context to link notifications with
* @args: event arguments (used to set up a pressure level threshold)
@@ -298,10 +293,11 @@ void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio)
* cftype).register_event, and then cgroup core will handle everything by
* itself.
*/
-int vmpressure_register_event(struct cgroup *cg, struct cftype *cft,
- struct eventfd_ctx *eventfd, const char *args)
+int vmpressure_register_event(struct cgroup_subsys_state *css,
+ struct cftype *cft, struct eventfd_ctx *eventfd,
+ const char *args)
{
- struct vmpressure *vmpr = cg_to_vmpressure(cg);
+ struct vmpressure *vmpr = css_to_vmpressure(css);
struct vmpressure_event *ev;
int level;
@@ -329,7 +325,7 @@ int vmpressure_register_event(struct cgroup *cg, struct cftype *cft,
/**
* vmpressure_unregister_event() - Unbind eventfd from vmpressure
- * @cg: cgroup handle
+ * @css: css handle
* @cft: cgroup control files handle
* @eventfd: eventfd context that was used to link vmpressure with the @cg
*
@@ -341,10 +337,11 @@ int vmpressure_register_event(struct cgroup *cg, struct cftype *cft,
* cftype).unregister_event, and then cgroup core will handle everything
* by itself.
*/
-void vmpressure_unregister_event(struct cgroup *cg, struct cftype *cft,
+void vmpressure_unregister_event(struct cgroup_subsys_state *css,
+ struct cftype *cft,
struct eventfd_ctx *eventfd)
{
- struct vmpressure *vmpr = cg_to_vmpressure(cg);
+ struct vmpressure *vmpr = css_to_vmpressure(css);
struct vmpressure_event *ev;
mutex_lock(&vmpr->events_lock);
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index e533259dce3..d9cd627e6a1 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -29,12 +29,6 @@
#define PRIOMAP_MIN_SZ 128
-static inline struct cgroup_netprio_state *cgrp_netprio_state(struct cgroup *cgrp)
-{
- return container_of(cgroup_subsys_state(cgrp, net_prio_subsys_id),
- struct cgroup_netprio_state, css);
-}
-
/*
* Extend @dev->priomap so that it's large enough to accomodate
* @target_idx. @dev->priomap.priomap_len > @target_idx after successful
@@ -87,67 +81,70 @@ static int extend_netdev_table(struct net_device *dev, u32 target_idx)
/**
* netprio_prio - return the effective netprio of a cgroup-net_device pair
- * @cgrp: cgroup part of the target pair
+ * @css: css part of the target pair
* @dev: net_device part of the target pair
*
* Should be called under RCU read or rtnl lock.
*/
-static u32 netprio_prio(struct cgroup *cgrp, struct net_device *dev)
+static u32 netprio_prio(struct cgroup_subsys_state *css, struct net_device *dev)
{
struct netprio_map *map = rcu_dereference_rtnl(dev->priomap);
+ int id = css->cgroup->id;
- if (map && cgrp->id < map->priomap_len)
- return map->priomap[cgrp->id];
+ if (map && id < map->priomap_len)
+ return map->priomap[id];
return 0;
}
/**
* netprio_set_prio - set netprio on a cgroup-net_device pair
- * @cgrp: cgroup part of the target pair
+ * @css: css part of the target pair
* @dev: net_device part of the target pair
* @prio: prio to set
*
- * Set netprio to @prio on @cgrp-@dev pair. Should be called under rtnl
+ * Set netprio to @prio on @css-@dev pair. Should be called under rtnl
* lock and may fail under memory pressure for non-zero @prio.
*/
-static int netprio_set_prio(struct cgroup *cgrp, struct net_device *dev,
- u32 prio)
+static int netprio_set_prio(struct cgroup_subsys_state *css,
+ struct net_device *dev, u32 prio)
{
struct netprio_map *map;
+ int id = css->cgroup->id;
int ret;
/* avoid extending priomap for zero writes */
map = rtnl_dereference(dev->priomap);
- if (!prio && (!map || map->priomap_len <= cgrp->id))
+ if (!prio && (!map || map->priomap_len <= id))
return 0;
- ret = extend_netdev_table(dev, cgrp->id);
+ ret = extend_netdev_table(dev, id);
if (ret)
return ret;
map = rtnl_dereference(dev->priomap);
- map->priomap[cgrp->id] = prio;
+ map->priomap[id] = prio;
return 0;
}
-static struct cgroup_subsys_state *cgrp_css_alloc(struct cgroup *cgrp)
+static struct cgroup_subsys_state *
+cgrp_css_alloc(struct cgroup_subsys_state *parent_css)
{
- struct cgroup_netprio_state *cs;
+ struct cgroup_subsys_state *css;
- cs = kzalloc(sizeof(*cs), GFP_KERNEL);
- if (!cs)
+ css = kzalloc(sizeof(*css), GFP_KERNEL);
+ if (!css)
return ERR_PTR(-ENOMEM);
- return &cs->css;
+ return css;
}
-static int cgrp_css_online(struct cgroup *cgrp)
+static int cgrp_css_online(struct cgroup_subsys_state *css)
{
- struct cgroup *parent = cgrp->parent;
+ struct cgroup_subsys_state *parent_css = css_parent(css);
struct net_device *dev;
int ret = 0;
- if (!parent)
+ if (!parent_css)
return 0;
rtnl_lock();
@@ -156,9 +153,9 @@ static int cgrp_css_online(struct cgroup *cgrp)
* onlining, there is no need to clear them on offline.
*/
for_each_netdev(&init_net, dev) {
- u32 prio = netprio_prio(parent, dev);
+ u32 prio = netprio_prio(parent_css, dev);
- ret = netprio_set_prio(cgrp, dev, prio);
+ ret = netprio_set_prio(css, dev, prio);
if (ret)
break;
}
@@ -166,29 +163,29 @@ static int cgrp_css_online(struct cgroup *cgrp)
return ret;
}
-static void cgrp_css_free(struct cgroup *cgrp)
+static void cgrp_css_free(struct cgroup_subsys_state *css)
{
- kfree(cgrp_netprio_state(cgrp));
+ kfree(css);
}
-static u64 read_prioidx(struct cgroup *cgrp, struct cftype *cft)
+static u64 read_prioidx(struct cgroup_subsys_state *css, struct cftype *cft)
{
- return cgrp->id;
+ return css->cgroup->id;
}
-static int read_priomap(struct cgroup *cont, struct cftype *cft,
+static int read_priomap(struct cgroup_subsys_state *css, struct cftype *cft,
struct cgroup_map_cb *cb)
{
struct net_device *dev;
rcu_read_lock();
for_each_netdev_rcu(&init_net, dev)
- cb->fill(cb, dev->name, netprio_prio(cont, dev));
+ cb->fill(cb, dev->name, netprio_prio(css, dev));
rcu_read_unlock();
return 0;
}
-static int write_priomap(struct cgroup *cgrp, struct cftype *cft,
+static int write_priomap(struct cgroup_subsys_state *css, struct cftype *cft,
const char *buffer)
{
char devname[IFNAMSIZ + 1];
@@ -205,7 +202,7 @@ static int write_priomap(struct cgroup *cgrp, struct cftype *cft,
rtnl_lock();
- ret = netprio_set_prio(cgrp, dev, prio);
+ ret = netprio_set_prio(css, dev, prio);
rtnl_unlock();
dev_put(dev);
@@ -221,12 +218,13 @@ static int update_netprio(const void *v, struct file *file, unsigned n)
return 0;
}
-static void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
+static void net_prio_attach(struct cgroup_subsys_state *css,
+ struct cgroup_taskset *tset)
{
struct task_struct *p;
void *v;
- cgroup_taskset_for_each(p, cgrp, tset) {
+ cgroup_taskset_for_each(p, css, tset) {
task_lock(p);
v = (void *)(unsigned long)task_netprioidx(p);
iterate_fd(p->files, 0, update_netprio, v);
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
index da14436c173..8a57d79b0b1 100644
--- a/net/ipv4/tcp_memcontrol.c
+++ b/net/ipv4/tcp_memcontrol.c
@@ -132,10 +132,10 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val)
return 0;
}
-static int tcp_cgroup_write(struct cgroup *cont, struct cftype *cft,
+static int tcp_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft,
const char *buffer)
{
- struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
+ struct mem_cgroup *memcg = mem_cgroup_from_css(css);
unsigned long long val;
int ret = 0;
@@ -180,9 +180,9 @@ static u64 tcp_read_usage(struct mem_cgroup *memcg)
return res_counter_read_u64(&tcp->tcp_memory_allocated, RES_USAGE);
}
-static u64 tcp_cgroup_read(struct cgroup *cont, struct cftype *cft)
+static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft)
{
- struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
+ struct mem_cgroup *memcg = mem_cgroup_from_css(css);
u64 val;
switch (cft->private) {
@@ -202,13 +202,13 @@ static u64 tcp_cgroup_read(struct cgroup *cont, struct cftype *cft)
return val;
}
-static int tcp_cgroup_reset(struct cgroup *cont, unsigned int event)
+static int tcp_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event)
{
struct mem_cgroup *memcg;
struct tcp_memcontrol *tcp;
struct cg_proto *cg_proto;
- memcg = mem_cgroup_from_cont(cont);
+ memcg = mem_cgroup_from_css(css);
cg_proto = tcp_prot.proto_cgroup(memcg);
if (!cg_proto)
return 0;
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 3a294eb98d6..867b4a3e398 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -23,19 +23,18 @@
#include <net/sock.h>
#include <net/cls_cgroup.h>
-static inline struct cgroup_cls_state *cgrp_cls_state(struct cgroup *cgrp)
+static inline struct cgroup_cls_state *css_cls_state(struct cgroup_subsys_state *css)
{
- return container_of(cgroup_subsys_state(cgrp, net_cls_subsys_id),
- struct cgroup_cls_state, css);
+ return css ? container_of(css, struct cgroup_cls_state, css) : NULL;
}
static inline struct cgroup_cls_state *task_cls_state(struct task_struct *p)
{
- return container_of(task_subsys_state(p, net_cls_subsys_id),
- struct cgroup_cls_state, css);
+ return css_cls_state(task_css(p, net_cls_subsys_id));
}
-static struct cgroup_subsys_state *cgrp_css_alloc(struct cgroup *cgrp)
+static struct cgroup_subsys_state *
+cgrp_css_alloc(struct cgroup_subsys_state *parent_css)
{
struct cgroup_cls_state *cs;
@@ -45,17 +44,19 @@ static struct cgroup_subsys_state *cgrp_css_alloc(struct cgroup *cgrp)
return &cs->css;
}
-static int cgrp_css_online(struct cgroup *cgrp)
+static int cgrp_css_online(struct cgroup_subsys_state *css)
{
- if (cgrp->parent)
- cgrp_cls_state(cgrp)->classid =
- cgrp_cls_state(cgrp->parent)->classid;
+ struct cgroup_cls_state *cs = css_cls_state(css);
+ struct cgroup_cls_state *parent = css_cls_state(css_parent(css));
+
+ if (parent)
+ cs->classid = parent->classid;
return 0;
}
-static void cgrp_css_free(struct cgroup *cgrp)
+static void cgrp_css_free(struct cgroup_subsys_state *css)
{
- kfree(cgrp_cls_state(cgrp));
+ kfree(css_cls_state(css));
}
static int update_classid(const void *v, struct file *file, unsigned n)
@@ -67,12 +68,13 @@ static int update_classid(const void *v, struct file *file, unsigned n)
return 0;
}
-static void cgrp_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
+static void cgrp_attach(struct cgroup_subsys_state *css,
+ struct cgroup_taskset *tset)
{
struct task_struct *p;
void *v;
- cgroup_taskset_for_each(p, cgrp, tset) {
+ cgroup_taskset_for_each(p, css, tset) {
task_lock(p);
v = (void *)(unsigned long)task_cls_classid(p);
iterate_fd(p->files, 0, update_classid, v);
@@ -80,14 +82,15 @@ static void cgrp_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
}
}
-static u64 read_classid(struct cgroup *cgrp, struct cftype *cft)
+static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft)
{
- return cgrp_cls_state(cgrp)->classid;
+ return css_cls_state(css)->classid;
}
-static int write_classid(struct cgroup *cgrp, struct cftype *cft, u64 value)
+static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
+ u64 value)
{
- cgrp_cls_state(cgrp)->classid = (u32) value;
+ css_cls_state(css)->classid = (u32) value;
return 0;
}
diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index e8aad69f0d6..c123628d3f8 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -53,22 +53,17 @@ struct dev_cgroup {
static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s)
{
- return container_of(s, struct dev_cgroup, css);
-}
-
-static inline struct dev_cgroup *cgroup_to_devcgroup(struct cgroup *cgroup)
-{
- return css_to_devcgroup(cgroup_subsys_state(cgroup, devices_subsys_id));
+ return s ? container_of(s, struct dev_cgroup, css) : NULL;
}
static inline struct dev_cgroup *task_devcgroup(struct task_struct *task)
{
- return css_to_devcgroup(task_subsys_state(task, devices_subsys_id));
+ return css_to_devcgroup(task_css(task, devices_subsys_id));
}
struct cgroup_subsys devices_subsys;
-static int devcgroup_can_attach(struct cgroup *new_cgrp,
+static int devcgroup_can_attach(struct cgroup_subsys_state *new_css,
struct cgroup_taskset *set)
{
struct task_struct *task = cgroup_taskset_first(set);
@@ -193,18 +188,16 @@ static inline bool is_devcg_online(const struct dev_cgroup *devcg)
/**
* devcgroup_online - initializes devcgroup's behavior and exceptions based on
* parent's
- * @cgroup: cgroup getting online
+ * @css: css getting online
* returns 0 in case of success, error code otherwise
*/
-static int devcgroup_online(struct cgroup *cgroup)
+static int devcgroup_online(struct cgroup_subsys_state *css)
{
- struct dev_cgroup *dev_cgroup, *parent_dev_cgroup = NULL;
+ struct dev_cgroup *dev_cgroup = css_to_devcgroup(css);
+ struct dev_cgroup *parent_dev_cgroup = css_to_devcgroup(css_parent(css));
int ret = 0;
mutex_lock(&devcgroup_mutex);
- dev_cgroup = cgroup_to_devcgroup(cgroup);
- if (cgroup->parent)
- parent_dev_cgroup = cgroup_to_devcgroup(cgroup->parent);
if (parent_dev_cgroup == NULL)
dev_cgroup->behavior = DEVCG_DEFAULT_ALLOW;
@@ -219,9 +212,9 @@ static int devcgroup_online(struct cgroup *cgroup)
return ret;
}
-static void devcgroup_offline(struct cgroup *cgroup)
+static void devcgroup_offline(struct cgroup_subsys_state *css)
{
- struct dev_cgroup *dev_cgroup = cgroup_to_devcgroup(cgroup);
+ struct dev_cgroup *dev_cgroup = css_to_devcgroup(css);
mutex_lock(&devcgroup_mutex);
dev_cgroup->behavior = DEVCG_DEFAULT_NONE;
@@ -231,7 +224,8 @@ static void devcgroup_offline(struct cgroup *cgroup)
/*
* called from kernel/cgroup.c with cgroup_lock() held.
*/
-static struct cgroup_subsys_state *devcgroup_css_alloc(struct cgroup *cgroup)
+static struct cgroup_subsys_state *
+devcgroup_css_alloc(struct cgroup_subsys_state *parent_css)
{
struct dev_cgroup *dev_cgroup;
@@ -244,11 +238,10 @@ static struct cgroup_subsys_state *devcgroup_css_alloc(struct cgroup *cgroup)
return &dev_cgroup->css;
}
-static void devcgroup_css_free(struct cgroup *cgroup)
+static void devcgroup_css_free(struct cgroup_subsys_state *css)
{
- struct dev_cgroup *dev_cgroup;
+ struct dev_cgroup *dev_cgroup = css_to_devcgroup(css);
- dev_cgroup = cgroup_to_devcgroup(cgroup);
__dev_exception_clean(dev_cgroup);
kfree(dev_cgroup);
}
@@ -291,10 +284,10 @@ static void set_majmin(char *str, unsigned m)
sprintf(str, "%u", m);
}
-static int devcgroup_seq_read(struct cgroup *cgroup, struct cftype *cft,
- struct seq_file *m)
+static int devcgroup_seq_read(struct cgroup_subsys_state *css,
+ struct cftype *cft, struct seq_file *m)
{
- struct dev_cgroup *devcgroup = cgroup_to_devcgroup(cgroup);
+ struct dev_cgroup *devcgroup = css_to_devcgroup(css);
struct dev_exception_item *ex;
char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN];
@@ -394,12 +387,10 @@ static bool may_access(struct dev_cgroup *dev_cgroup,
static int parent_has_perm(struct dev_cgroup *childcg,
struct dev_exception_item *ex)
{
- struct cgroup *pcg = childcg->css.cgroup->parent;
- struct dev_cgroup *parent;
+ struct dev_cgroup *parent = css_to_devcgroup(css_parent(&childcg->css));
- if (!pcg)
+ if (!parent)
return 1;
- parent = cgroup_to_devcgroup(pcg);
return may_access(parent, ex, childcg->behavior);
}
@@ -451,13 +442,13 @@ static void revalidate_active_exceptions(struct dev_cgroup *devcg)
static int propagate_exception(struct dev_cgroup *devcg_root,
struct dev_exception_item *ex)
{
- struct cgroup *root = devcg_root->css.cgroup, *pos;
+ struct cgroup_subsys_state *pos;
int rc = 0;
rcu_read_lock();
- cgroup_for_each_descendant_pre(pos, root) {
- struct dev_cgroup *devcg = cgroup_to_devcgroup(pos);
+ css_for_each_descendant_pre(pos, &devcg_root->css) {
+ struct dev_cgroup *devcg = css_to_devcgroup(pos);
/*
* Because devcgroup_mutex is held, no devcg will become
@@ -465,7 +456,7 @@ static int propagate_exception(struct dev_cgroup *devcg_root,
* methods), and online ones are safe to access outside RCU
* read lock without bumping refcnt.
*/
- if (!is_devcg_online(devcg))
+ if (pos == &devcg_root->css || !is_devcg_online(devcg))
continue;
rcu_read_unlock();
@@ -524,15 +515,11 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup,
char temp[12]; /* 11 + 1 characters needed for a u32 */
int count, rc = 0;
struct dev_exception_item ex;
- struct cgroup *p = devcgroup->css.cgroup;
- struct dev_cgroup *parent = NULL;
+ struct dev_cgroup *parent = css_to_devcgroup(css_parent(&devcgroup->css));
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- if (p->parent)
- parent = cgroup_to_devcgroup(p->parent);
-
memset(&ex, 0, sizeof(ex));
b = buffer;
@@ -677,13 +664,13 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup,
return rc;
}
-static int devcgroup_access_write(struct cgroup *cgrp, struct cftype *cft,
- const char *buffer)
+static int devcgroup_access_write(struct cgroup_subsys_state *css,
+ struct cftype *cft, const char *buffer)
{
int retval;
mutex_lock(&devcgroup_mutex);
- retval = devcgroup_update_access(cgroup_to_devcgroup(cgrp),
+ retval = devcgroup_update_access(css_to_devcgroup(css),
cft->private, buffer);
mutex_unlock(&devcgroup_mutex);
return retval;