diff options
Diffstat (limited to 'mm/memcontrol.c')
| -rw-r--r-- | mm/memcontrol.c | 315 |
1 files changed, 155 insertions, 160 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index a500cb0594c..1f14a430c65 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -526,18 +526,14 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg) static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg) { - /* - * The ID of the root cgroup is 0, but memcg treat 0 as an - * invalid ID, so we return (cgroup_id + 1). - */ - return memcg->css.cgroup->id + 1; + return memcg->css.id; } static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id) { struct cgroup_subsys_state *css; - css = css_from_id(id - 1, &memory_cgrp_subsys); + css = css_from_id(id, &memory_cgrp_subsys); return mem_cgroup_from_css(css); } @@ -570,7 +566,8 @@ void sock_update_memcg(struct sock *sk) memcg = mem_cgroup_from_task(current); cg_proto = sk->sk_prot->proto_cgroup(memcg); if (!mem_cgroup_is_root(memcg) && - memcg_proto_active(cg_proto) && css_tryget(&memcg->css)) { + memcg_proto_active(cg_proto) && + css_tryget_online(&memcg->css)) { sk->sk_cgrp = cg_proto; } rcu_read_unlock(); @@ -676,9 +673,11 @@ static void disarm_static_keys(struct mem_cgroup *memcg) static void drain_all_stock_async(struct mem_cgroup *memcg); static struct mem_cgroup_per_zone * -mem_cgroup_zoneinfo(struct mem_cgroup *memcg, int nid, int zid) +mem_cgroup_zone_zoneinfo(struct mem_cgroup *memcg, struct zone *zone) { - VM_BUG_ON((unsigned)nid >= nr_node_ids); + int nid = zone_to_nid(zone); + int zid = zone_idx(zone); + return &memcg->nodeinfo[nid]->zoneinfo[zid]; } @@ -688,12 +687,12 @@ struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg) } static struct mem_cgroup_per_zone * -page_cgroup_zoneinfo(struct mem_cgroup *memcg, struct page *page) +mem_cgroup_page_zoneinfo(struct mem_cgroup *memcg, struct page *page) { int nid = page_to_nid(page); int zid = page_zonenum(page); - return mem_cgroup_zoneinfo(memcg, nid, zid); + return &memcg->nodeinfo[nid]->zoneinfo[zid]; } static struct mem_cgroup_tree_per_zone * @@ -711,11 +710,9 @@ soft_limit_tree_from_page(struct page *page) return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid]; } -static void -__mem_cgroup_insert_exceeded(struct mem_cgroup *memcg, - struct mem_cgroup_per_zone *mz, - struct mem_cgroup_tree_per_zone *mctz, - unsigned long long new_usage_in_excess) +static void __mem_cgroup_insert_exceeded(struct mem_cgroup_per_zone *mz, + struct mem_cgroup_tree_per_zone *mctz, + unsigned long long new_usage_in_excess) { struct rb_node **p = &mctz->rb_root.rb_node; struct rb_node *parent = NULL; @@ -745,10 +742,8 @@ __mem_cgroup_insert_exceeded(struct mem_cgroup *memcg, mz->on_tree = true; } -static void -__mem_cgroup_remove_exceeded(struct mem_cgroup *memcg, - struct mem_cgroup_per_zone *mz, - struct mem_cgroup_tree_per_zone *mctz) +static void __mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz, + struct mem_cgroup_tree_per_zone *mctz) { if (!mz->on_tree) return; @@ -756,13 +751,11 @@ __mem_cgroup_remove_exceeded(struct mem_cgroup *memcg, mz->on_tree = false; } -static void -mem_cgroup_remove_exceeded(struct mem_cgroup *memcg, - struct mem_cgroup_per_zone *mz, - struct mem_cgroup_tree_per_zone *mctz) +static void mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz, + struct mem_cgroup_tree_per_zone *mctz) { spin_lock(&mctz->lock); - __mem_cgroup_remove_exceeded(memcg, mz, mctz); + __mem_cgroup_remove_exceeded(mz, mctz); spin_unlock(&mctz->lock); } @@ -772,16 +765,14 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page) unsigned long long excess; struct mem_cgroup_per_zone *mz; struct mem_cgroup_tree_per_zone *mctz; - int nid = page_to_nid(page); - int zid = page_zonenum(page); - mctz = soft_limit_tree_from_page(page); + mctz = soft_limit_tree_from_page(page); /* * Necessary to update all ancestors when hierarchy is used. * because their event counter is not touched. */ for (; memcg; memcg = parent_mem_cgroup(memcg)) { - mz = mem_cgroup_zoneinfo(memcg, nid, zid); + mz = mem_cgroup_page_zoneinfo(memcg, page); excess = res_counter_soft_limit_excess(&memcg->res); /* * We have to update the tree if mz is on RB-tree or @@ -791,12 +782,12 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page) spin_lock(&mctz->lock); /* if on-tree, remove it */ if (mz->on_tree) - __mem_cgroup_remove_exceeded(memcg, mz, mctz); + __mem_cgroup_remove_exceeded(mz, mctz); /* * Insert again. mz->usage_in_excess will be updated. * If excess is 0, no tree ops. */ - __mem_cgroup_insert_exceeded(memcg, mz, mctz, excess); + __mem_cgroup_insert_exceeded(mz, mctz, excess); spin_unlock(&mctz->lock); } } @@ -804,15 +795,15 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page) static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg) { - int node, zone; - struct mem_cgroup_per_zone *mz; struct mem_cgroup_tree_per_zone *mctz; + struct mem_cgroup_per_zone *mz; + int nid, zid; - for_each_node(node) { - for (zone = 0; zone < MAX_NR_ZONES; zone++) { - mz = mem_cgroup_zoneinfo(memcg, node, zone); - mctz = soft_limit_tree_node_zone(node, zone); - mem_cgroup_remove_exceeded(memcg, mz, mctz); + for_each_node(nid) { + for (zid = 0; zid < MAX_NR_ZONES; zid++) { + mz = &memcg->nodeinfo[nid]->zoneinfo[zid]; + mctz = soft_limit_tree_node_zone(nid, zid); + mem_cgroup_remove_exceeded(mz, mctz); } } } @@ -835,9 +826,9 @@ retry: * we will to add it back at the end of reclaim to its correct * position in the tree. */ - __mem_cgroup_remove_exceeded(mz->memcg, mz, mctz); + __mem_cgroup_remove_exceeded(mz, mctz); if (!res_counter_soft_limit_excess(&mz->memcg->res) || - !css_tryget(&mz->memcg->css)) + !css_tryget_online(&mz->memcg->css)) goto retry; done: return mz; @@ -946,8 +937,7 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg, __this_cpu_add(memcg->stat->nr_page_events, nr_pages); } -unsigned long -mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru) +unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru) { struct mem_cgroup_per_zone *mz; @@ -955,46 +945,38 @@ mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru) return mz->lru_size[lru]; } -static unsigned long -mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg, int nid, int zid, - unsigned int lru_mask) -{ - struct mem_cgroup_per_zone *mz; - enum lru_list lru; - unsigned long ret = 0; - - mz = mem_cgroup_zoneinfo(memcg, nid, zid); - - for_each_lru(lru) { - if (BIT(lru) & lru_mask) - ret += mz->lru_size[lru]; - } - return ret; -} - -static unsigned long -mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg, - int nid, unsigned int lru_mask) +static unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg, + int nid, + unsigned int lru_mask) { - u64 total = 0; + unsigned long nr = 0; int zid; - for (zid = 0; zid < MAX_NR_ZONES; zid++) - total += mem_cgroup_zone_nr_lru_pages(memcg, - nid, zid, lru_mask); + VM_BUG_ON((unsigned)nid >= nr_node_ids); - return total; + for (zid = 0; zid < MAX_NR_ZONES; zid++) { + struct mem_cgroup_per_zone *mz; + enum lru_list lru; + + for_each_lru(lru) { + if (!(BIT(lru) & lru_mask)) + continue; + mz = &memcg->nodeinfo[nid]->zoneinfo[zid]; + nr += mz->lru_size[lru]; + } + } + return nr; } static unsigned long mem_cgroup_nr_lru_pages(struct mem_cgroup *memcg, unsigned int lru_mask) { + unsigned long nr = 0; int nid; - u64 total = 0; for_each_node_state(nid, N_MEMORY) - total += mem_cgroup_node_nr_lru_pages(memcg, nid, lru_mask); - return total; + nr += mem_cgroup_node_nr_lru_pages(memcg, nid, lru_mask); + return nr; } static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg, @@ -1088,7 +1070,7 @@ static struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm) if (unlikely(!memcg)) memcg = root_mem_cgroup; } - } while (!css_tryget(&memcg->css)); + } while (!css_tryget_online(&memcg->css)); rcu_read_unlock(); return memcg; } @@ -1125,7 +1107,8 @@ skip_node: */ if (next_css) { if ((next_css == &root->css) || - ((next_css->flags & CSS_ONLINE) && css_tryget(next_css))) + ((next_css->flags & CSS_ONLINE) && + css_tryget_online(next_css))) return mem_cgroup_from_css(next_css); prev_css = next_css; @@ -1171,7 +1154,7 @@ mem_cgroup_iter_load(struct mem_cgroup_reclaim_iter *iter, * would be returned all the time. */ if (position && position != root && - !css_tryget(&position->css)) + !css_tryget_online(&position->css)) position = NULL; } return position; @@ -1242,11 +1225,9 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, int uninitialized_var(seq); if (reclaim) { - int nid = zone_to_nid(reclaim->zone); - int zid = zone_idx(reclaim->zone); struct mem_cgroup_per_zone *mz; - mz = mem_cgroup_zoneinfo(root, nid, zid); + mz = mem_cgroup_zone_zoneinfo(root, reclaim->zone); iter = &mz->reclaim_iter[reclaim->priority]; if (prev && reclaim->generation != iter->generation) { iter->last_visited = NULL; @@ -1353,7 +1334,7 @@ struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone, goto out; } - mz = mem_cgroup_zoneinfo(memcg, zone_to_nid(zone), zone_idx(zone)); + mz = mem_cgroup_zone_zoneinfo(memcg, zone); lruvec = &mz->lruvec; out: /* @@ -1412,7 +1393,7 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct zone *zone) if (!PageLRU(page) && !PageCgroupUsed(pc) && memcg != root_mem_cgroup) pc->mem_cgroup = memcg = root_mem_cgroup; - mz = page_cgroup_zoneinfo(memcg, page); + mz = mem_cgroup_page_zoneinfo(memcg, page); lruvec = &mz->lruvec; out: /* @@ -1550,7 +1531,7 @@ static unsigned long mem_cgroup_margin(struct mem_cgroup *memcg) int mem_cgroup_swappiness(struct mem_cgroup *memcg) { /* root ? */ - if (!css_parent(&memcg->css)) + if (mem_cgroup_disabled() || !memcg->css.parent) return vm_swappiness; return memcg->swappiness; @@ -2786,9 +2767,9 @@ static void __mem_cgroup_cancel_local_charge(struct mem_cgroup *memcg, /* * A helper function to get mem_cgroup from ID. must be called under - * rcu_read_lock(). The caller is responsible for calling css_tryget if - * the mem_cgroup is used for charging. (dropping refcnt from swap can be - * called against removed memcg.) + * rcu_read_lock(). The caller is responsible for calling + * css_tryget_online() if the mem_cgroup is used for charging. (dropping + * refcnt from swap can be called against removed memcg.) */ static struct mem_cgroup *mem_cgroup_lookup(unsigned short id) { @@ -2811,14 +2792,14 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) lock_page_cgroup(pc); if (PageCgroupUsed(pc)) { memcg = pc->mem_cgroup; - if (memcg && !css_tryget(&memcg->css)) + if (memcg && !css_tryget_online(&memcg->css)) memcg = NULL; } else if (PageSwapCache(page)) { ent.val = page_private(page); id = lookup_swap_cgroup_id(ent); rcu_read_lock(); memcg = mem_cgroup_lookup(id); - if (memcg && !css_tryget(&memcg->css)) + if (memcg && !css_tryget_online(&memcg->css)) memcg = NULL; rcu_read_unlock(); } @@ -3382,7 +3363,7 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep, } /* The corresponding put will be done in the workqueue. */ - if (!css_tryget(&memcg->css)) + if (!css_tryget_online(&memcg->css)) goto out; rcu_read_unlock(); @@ -4142,8 +4123,8 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent) memcg = mem_cgroup_lookup(id); if (memcg) { /* - * We uncharge this because swap is freed. - * This memcg can be obsolete one. We avoid calling css_tryget + * We uncharge this because swap is freed. This memcg can + * be obsolete one. We avoid calling css_tryget_online(). */ if (!mem_cgroup_is_root(memcg)) res_counter_uncharge(&memcg->memsw, PAGE_SIZE); @@ -4597,7 +4578,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, break; } while (1); } - __mem_cgroup_remove_exceeded(mz->memcg, mz, mctz); + __mem_cgroup_remove_exceeded(mz, mctz); excess = res_counter_soft_limit_excess(&mz->memcg->res); /* * One school of thought says that we should not add @@ -4608,7 +4589,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, * term TODO. */ /* If excess == 0, no tree ops */ - __mem_cgroup_insert_exceeded(mz->memcg, mz, mctz, excess); + __mem_cgroup_insert_exceeded(mz, mctz, excess); spin_unlock(&mctz->lock); css_put(&mz->memcg->css); loop++; @@ -4728,18 +4709,28 @@ static void mem_cgroup_reparent_charges(struct mem_cgroup *memcg) } while (usage > 0); } +/* + * Test whether @memcg has children, dead or alive. Note that this + * function doesn't care whether @memcg has use_hierarchy enabled and + * returns %true if there are child csses according to the cgroup + * hierarchy. Testing use_hierarchy is the caller's responsiblity. + */ static inline bool memcg_has_children(struct mem_cgroup *memcg) { - lockdep_assert_held(&memcg_create_mutex); + bool ret; + /* - * The lock does not prevent addition or deletion to the list - * of children, but it prevents a new child from being - * initialized based on this parent in css_online(), so it's - * enough to decide whether hierarchically inherited - * attributes can still be changed or not. + * The lock does not prevent addition or deletion of children, but + * it prevents a new child from being initialized based on this + * parent in css_online(), so it's enough to decide whether + * hierarchically inherited attributes can still be changed or not. */ - return memcg->use_hierarchy && - !list_empty(&memcg->css.cgroup->children); + lockdep_assert_held(&memcg_create_mutex); + + rcu_read_lock(); + ret = css_next_child(NULL, &memcg->css); + rcu_read_unlock(); + return ret; } /* @@ -4751,11 +4742,6 @@ static inline bool memcg_has_children(struct mem_cgroup *memcg) static int mem_cgroup_force_empty(struct mem_cgroup *memcg) { int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; - struct cgroup *cgrp = memcg->css.cgroup; - - /* returns EBUSY if there is a task or if we come here twice. */ - if (cgroup_has_tasks(cgrp) || !list_empty(&cgrp->children)) - return -EBUSY; /* we call try-to-free pages for make this cgroup empty */ lru_add_drain_all(); @@ -4775,20 +4761,19 @@ static int mem_cgroup_force_empty(struct mem_cgroup *memcg) } } - lru_add_drain(); - mem_cgroup_reparent_charges(memcg); return 0; } -static int mem_cgroup_force_empty_write(struct cgroup_subsys_state *css, - unsigned int event) +static ssize_t mem_cgroup_force_empty_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, + loff_t off) { - struct mem_cgroup *memcg = mem_cgroup_from_css(css); + struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); if (mem_cgroup_is_root(memcg)) return -EINVAL; - return mem_cgroup_force_empty(memcg); + return mem_cgroup_force_empty(memcg) ?: nbytes; } static u64 mem_cgroup_hierarchy_read(struct cgroup_subsys_state *css, @@ -4802,7 +4787,7 @@ static int mem_cgroup_hierarchy_write(struct cgroup_subsys_state *css, { int retval = 0; struct mem_cgroup *memcg = mem_cgroup_from_css(css); - struct mem_cgroup *parent_memcg = mem_cgroup_from_css(css_parent(&memcg->css)); + struct mem_cgroup *parent_memcg = mem_cgroup_from_css(memcg->css.parent); mutex_lock(&memcg_create_mutex); @@ -4819,7 +4804,7 @@ static int mem_cgroup_hierarchy_write(struct cgroup_subsys_state *css, */ if ((!parent_memcg || !parent_memcg->use_hierarchy) && (val == 1 || val == 0)) { - if (list_empty(&memcg->css.cgroup->children)) + if (!memcg_has_children(memcg)) memcg->use_hierarchy = val; else retval = -EBUSY; @@ -4936,7 +4921,8 @@ static int __memcg_activate_kmem(struct mem_cgroup *memcg, * of course permitted. */ mutex_lock(&memcg_create_mutex); - if (cgroup_has_tasks(memcg->css.cgroup) || memcg_has_children(memcg)) + if (cgroup_has_tasks(memcg->css.cgroup) || + (memcg->use_hierarchy && memcg_has_children(memcg))) err = -EBUSY; mutex_unlock(&memcg_create_mutex); if (err) @@ -5038,17 +5024,18 @@ static int memcg_update_kmem_limit(struct mem_cgroup *memcg, * The user of this function is... * RES_LIMIT. */ -static int mem_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft, - char *buffer) +static ssize_t mem_cgroup_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) { - struct mem_cgroup *memcg = mem_cgroup_from_css(css); + struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); enum res_type type; int name; unsigned long long val; int ret; - type = MEMFILE_TYPE(cft->private); - name = MEMFILE_ATTR(cft->private); + buf = strstrip(buf); + type = MEMFILE_TYPE(of_cft(of)->private); + name = MEMFILE_ATTR(of_cft(of)->private); switch (name) { case RES_LIMIT: @@ -5057,7 +5044,7 @@ static int mem_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft, break; } /* This function does all necessary parse...reuse it */ - ret = res_counter_memparse_write_strategy(buffer, &val); + ret = res_counter_memparse_write_strategy(buf, &val); if (ret) break; if (type == _MEM) @@ -5070,7 +5057,7 @@ static int mem_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft, return -EINVAL; break; case RES_SOFT_LIMIT: - ret = res_counter_memparse_write_strategy(buffer, &val); + ret = res_counter_memparse_write_strategy(buf, &val); if (ret) break; /* @@ -5087,7 +5074,7 @@ static int mem_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft, ret = -EINVAL; /* should be BUG() ? */ break; } - return ret; + return ret ?: nbytes; } static void memcg_get_hierarchical_limit(struct mem_cgroup *memcg, @@ -5100,8 +5087,8 @@ static void memcg_get_hierarchical_limit(struct mem_cgroup *memcg, if (!memcg->use_hierarchy) goto out; - while (css_parent(&memcg->css)) { - memcg = mem_cgroup_from_css(css_parent(&memcg->css)); + while (memcg->css.parent) { + memcg = mem_cgroup_from_css(memcg->css.parent); if (!memcg->use_hierarchy) break; tmp = res_counter_read_u64(&memcg->res, RES_LIMIT); @@ -5114,14 +5101,15 @@ out: *memsw_limit = min_memsw_limit; } -static int mem_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event) +static ssize_t mem_cgroup_reset(struct kernfs_open_file *of, char *buf, + size_t nbytes, loff_t off) { - struct mem_cgroup *memcg = mem_cgroup_from_css(css); + struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); int name; enum res_type type; - type = MEMFILE_TYPE(event); - name = MEMFILE_ATTR(event); + type = MEMFILE_TYPE(of_cft(of)->private); + name = MEMFILE_ATTR(of_cft(of)->private); switch (name) { case RES_MAX_USAGE: @@ -5146,7 +5134,7 @@ static int mem_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event) break; } - return 0; + return nbytes; } static u64 mem_cgroup_move_charge_read(struct cgroup_subsys_state *css, @@ -5305,7 +5293,7 @@ static int memcg_stat_show(struct seq_file *m, void *v) for_each_online_node(nid) for (zid = 0; zid < MAX_NR_ZONES; zid++) { - mz = mem_cgroup_zoneinfo(memcg, nid, zid); + mz = &memcg->nodeinfo[nid]->zoneinfo[zid]; rstat = &mz->lruvec.reclaim_stat; recent_rotated[0] += rstat->recent_rotated[0]; @@ -5339,7 +5327,7 @@ static int mem_cgroup_swappiness_write(struct cgroup_subsys_state *css, if (val > 100) return -EINVAL; - if (css_parent(css)) + if (css->parent) memcg->swappiness = val; else vm_swappiness = val; @@ -5427,8 +5415,12 @@ static int mem_cgroup_oom_notify_cb(struct mem_cgroup *memcg) { struct mem_cgroup_eventfd_list *ev; + spin_lock(&memcg_oom_lock); + list_for_each_entry(ev, &memcg->oom_notify, list) eventfd_signal(ev->eventfd, 1); + + spin_unlock(&memcg_oom_lock); return 0; } @@ -5676,7 +5668,7 @@ static int mem_cgroup_oom_control_write(struct cgroup_subsys_state *css, struct mem_cgroup *memcg = mem_cgroup_from_css(css); /* cannot set to root cgroup and only 0 and 1 are allowed */ - if (!css_parent(css) || !((val == 0) || (val == 1))) + if (!css->parent || !((val == 0) || (val == 1))) return -EINVAL; memcg->oom_kill_disable = val; @@ -5722,10 +5714,10 @@ static void kmem_cgroup_css_offline(struct mem_cgroup *memcg) * which is then paired with css_put during uncharge resp. here. * * Although this might sound strange as this path is called from - * css_offline() when the referencemight have dropped down to 0 - * and shouldn't be incremented anymore (css_tryget would fail) - * we do not have other options because of the kmem allocations - * lifetime. + * css_offline() when the referencemight have dropped down to 0 and + * shouldn't be incremented anymore (css_tryget_online() would + * fail) we do not have other options because of the kmem + * allocations lifetime. */ css_get(&memcg->css); @@ -5844,9 +5836,10 @@ static void memcg_event_ptable_queue_proc(struct file *file, * Input must be in format '<event_fd> <control_fd> <args>'. * Interpretation of args is defined by control file implementation. */ -static int memcg_write_event_control(struct cgroup_subsys_state *css, - struct cftype *cft, char *buffer) +static ssize_t memcg_write_event_control(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) { + struct cgroup_subsys_state *css = of_css(of); struct mem_cgroup *memcg = mem_cgroup_from_css(css); struct mem_cgroup_event *event; struct cgroup_subsys_state *cfile_css; @@ -5857,15 +5850,17 @@ static int memcg_write_event_control(struct cgroup_subsys_state *css, char *endp; int ret; - efd = simple_strtoul(buffer, &endp, 10); + buf = strstrip(buf); + + efd = simple_strtoul(buf, &endp, 10); if (*endp != ' ') return -EINVAL; - buffer = endp + 1; + buf = endp + 1; - cfd = simple_strtoul(buffer, &endp, 10); + cfd = simple_strtoul(buf, &endp, 10); if ((*endp != ' ') && (*endp != '\0')) return -EINVAL; - buffer = endp + 1; + buf = endp + 1; event = kzalloc(sizeof(*event), GFP_KERNEL); if (!event) @@ -5933,8 +5928,8 @@ static int memcg_write_event_control(struct cgroup_subsys_state *css, * automatically removed on cgroup destruction but the removal is * asynchronous, so take an extra ref on @css. */ - cfile_css = css_tryget_from_dir(cfile.file->f_dentry->d_parent, - &memory_cgrp_subsys); + cfile_css = css_tryget_online_from_dir(cfile.file->f_dentry->d_parent, + &memory_cgrp_subsys); ret = -EINVAL; if (IS_ERR(cfile_css)) goto out_put_cfile; @@ -5943,7 +5938,7 @@ static int memcg_write_event_control(struct cgroup_subsys_state *css, goto out_put_cfile; } - ret = event->register_event(memcg, event->eventfd, buffer); + ret = event->register_event(memcg, event->eventfd, buf); if (ret) goto out_put_css; @@ -5956,7 +5951,7 @@ static int memcg_write_event_control(struct cgroup_subsys_state *css, fdput(cfile); fdput(efile); - return 0; + return nbytes; out_put_css: css_put(css); @@ -5981,25 +5976,25 @@ static struct cftype mem_cgroup_files[] = { { .name = "max_usage_in_bytes", .private = MEMFILE_PRIVATE(_MEM, RES_MAX_USAGE), - .trigger = mem_cgroup_reset, + .write = mem_cgroup_reset, .read_u64 = mem_cgroup_read_u64, }, { .name = "limit_in_bytes", .private = MEMFILE_PRIVATE(_MEM, RES_LIMIT), - .write_string = mem_cgroup_write, + .write = mem_cgroup_write, .read_u64 = mem_cgroup_read_u64, }, { .name = "soft_limit_in_bytes", .private = MEMFILE_PRIVATE(_MEM, RES_SOFT_LIMIT), - .write_string = mem_cgroup_write, + .write = mem_cgroup_write, .read_u64 = mem_cgroup_read_u64, }, { .name = "failcnt", .private = MEMFILE_PRIVATE(_MEM, RES_FAILCNT), - .trigger = mem_cgroup_reset, + .write = mem_cgroup_reset, .read_u64 = mem_cgroup_read_u64, }, { @@ -6008,7 +6003,7 @@ static struct cftype mem_cgroup_files[] = { }, { .name = "force_empty", - .trigger = mem_cgroup_force_empty_write, + .write = mem_cgroup_force_empty_write, }, { .name = "use_hierarchy", @@ -6018,7 +6013,7 @@ static struct cftype mem_cgroup_files[] = { }, { .name = "cgroup.event_control", /* XXX: for compat */ - .write_string = memcg_write_event_control, + .write = memcg_write_event_control, .flags = CFTYPE_NO_PREFIX, .mode = S_IWUGO, }, @@ -6051,7 +6046,7 @@ static struct cftype mem_cgroup_files[] = { { .name = "kmem.limit_in_bytes", .private = MEMFILE_PRIVATE(_KMEM, RES_LIMIT), - .write_string = mem_cgroup_write, + .write = mem_cgroup_write, .read_u64 = mem_cgroup_read_u64, }, { @@ -6062,13 +6057,13 @@ static struct cftype mem_cgroup_files[] = { { .name = "kmem.failcnt", .private = MEMFILE_PRIVATE(_KMEM, RES_FAILCNT), - .trigger = mem_cgroup_reset, + .write = mem_cgroup_reset, .read_u64 = mem_cgroup_read_u64, }, { .name = "kmem.max_usage_in_bytes", .private = MEMFILE_PRIVATE(_KMEM, RES_MAX_USAGE), - .trigger = mem_cgroup_reset, + .write = mem_cgroup_reset, .read_u64 = mem_cgroup_read_u64, }, #ifdef CONFIG_SLABINFO @@ -6091,19 +6086,19 @@ static struct cftype memsw_cgroup_files[] = { { .name = "memsw.max_usage_in_bytes", .private = MEMFILE_PRIVATE(_MEMSWAP, RES_MAX_USAGE), - .trigger = mem_cgroup_reset, + .write = mem_cgroup_reset, .read_u64 = mem_cgroup_read_u64, }, { .name = "memsw.limit_in_bytes", .private = MEMFILE_PRIVATE(_MEMSWAP, RES_LIMIT), - .write_string = mem_cgroup_write, + .write = mem_cgroup_write, .read_u64 = mem_cgroup_read_u64, }, { .name = "memsw.failcnt", .private = MEMFILE_PRIVATE(_MEMSWAP, RES_FAILCNT), - .trigger = mem_cgroup_reset, + .write = mem_cgroup_reset, .read_u64 = mem_cgroup_read_u64, }, { }, /* terminate */ @@ -6281,9 +6276,9 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css) { struct mem_cgroup *memcg = mem_cgroup_from_css(css); - struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(css)); + struct mem_cgroup *parent = mem_cgroup_from_css(css->parent); - if (css->cgroup->id > MEM_CGROUP_ID_MAX) + if (css->id > MEM_CGROUP_ID_MAX) return -ENOSPC; if (!parent) @@ -6378,7 +6373,7 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css) /* * XXX: css_offline() would be where we should reparent all * memory to prepare the cgroup for destruction. However, - * memcg does not do css_tryget() and res_counter charging + * memcg does not do css_tryget_online() and res_counter charging * under the same RCU lock region, which means that charging * could race with offlining. Offlining only happens to * cgroups with no tasks in them but charges can show up @@ -6392,9 +6387,9 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css) * lookup_swap_cgroup_id() * rcu_read_lock() * mem_cgroup_lookup() - * css_tryget() + * css_tryget_online() * rcu_read_unlock() - * disable css_tryget() + * disable css_tryget_online() * call_rcu() * offline_css() * reparent_charges() |
