diff options
Diffstat (limited to 'net/ipv4/tcp_memcontrol.c')
| -rw-r--r-- | net/ipv4/tcp_memcontrol.c | 228 |
1 files changed, 228 insertions, 0 deletions
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c new file mode 100644 index 00000000000..f7a2ec3ac58 --- /dev/null +++ b/net/ipv4/tcp_memcontrol.c @@ -0,0 +1,228 @@ +#include <net/tcp.h> +#include <net/tcp_memcontrol.h> +#include <net/sock.h> +#include <net/ip.h> +#include <linux/nsproxy.h> +#include <linux/memcontrol.h> +#include <linux/module.h> + +int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss) +{ + /* + * The root cgroup does not use res_counters, but rather, + * rely on the data already collected by the network + * subsystem + */ + struct res_counter *res_parent = NULL; + struct cg_proto *cg_proto, *parent_cg; + struct mem_cgroup *parent = parent_mem_cgroup(memcg); + + cg_proto = tcp_prot.proto_cgroup(memcg); + if (!cg_proto) + return 0; + + cg_proto->sysctl_mem[0] = sysctl_tcp_mem[0]; + cg_proto->sysctl_mem[1] = sysctl_tcp_mem[1]; + cg_proto->sysctl_mem[2] = sysctl_tcp_mem[2]; + cg_proto->memory_pressure = 0; + cg_proto->memcg = memcg; + + parent_cg = tcp_prot.proto_cgroup(parent); + if (parent_cg) + res_parent = &parent_cg->memory_allocated; + + res_counter_init(&cg_proto->memory_allocated, res_parent); + percpu_counter_init(&cg_proto->sockets_allocated, 0); + + return 0; +} +EXPORT_SYMBOL(tcp_init_cgroup); + +void tcp_destroy_cgroup(struct mem_cgroup *memcg) +{ + struct cg_proto *cg_proto; + + cg_proto = tcp_prot.proto_cgroup(memcg); + if (!cg_proto) + return; + + percpu_counter_destroy(&cg_proto->sockets_allocated); +} +EXPORT_SYMBOL(tcp_destroy_cgroup); + +static int tcp_update_limit(struct mem_cgroup *memcg, u64 val) +{ + struct cg_proto *cg_proto; + int i; + int ret; + + cg_proto = tcp_prot.proto_cgroup(memcg); + if (!cg_proto) + return -EINVAL; + + if (val > RES_COUNTER_MAX) + val = RES_COUNTER_MAX; + + ret = res_counter_set_limit(&cg_proto->memory_allocated, val); + if (ret) + return ret; + + for (i = 0; i < 3; i++) + cg_proto->sysctl_mem[i] = min_t(long, val >> PAGE_SHIFT, + sysctl_tcp_mem[i]); + + if (val == RES_COUNTER_MAX) + clear_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags); + else if (val != RES_COUNTER_MAX) { + /* + * The active bit needs to be written after the static_key + * update. This is what guarantees that the socket activation + * function is the last one to run. See sock_update_memcg() for + * details, and note that we don't mark any socket as belonging + * to this memcg until that flag is up. + * + * We need to do this, because static_keys will span multiple + * sites, but we can't control their order. If we mark a socket + * as accounted, but the accounting functions are not patched in + * yet, we'll lose accounting. + * + * We never race with the readers in sock_update_memcg(), + * because when this value change, the code to process it is not + * patched in yet. + * + * The activated bit is used to guarantee that no two writers + * will do the update in the same memcg. Without that, we can't + * properly shutdown the static key. + */ + if (!test_and_set_bit(MEMCG_SOCK_ACTIVATED, &cg_proto->flags)) + static_key_slow_inc(&memcg_socket_limit_enabled); + set_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags); + } + + return 0; +} + +static ssize_t tcp_cgroup_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) +{ + struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); + unsigned long long val; + int ret = 0; + + buf = strstrip(buf); + + switch (of_cft(of)->private) { + case RES_LIMIT: + /* see memcontrol.c */ + ret = res_counter_memparse_write_strategy(buf, &val); + if (ret) + break; + ret = tcp_update_limit(memcg, val); + break; + default: + ret = -EINVAL; + break; + } + return ret ?: nbytes; +} + +static u64 tcp_read_stat(struct mem_cgroup *memcg, int type, u64 default_val) +{ + struct cg_proto *cg_proto; + + cg_proto = tcp_prot.proto_cgroup(memcg); + if (!cg_proto) + return default_val; + + return res_counter_read_u64(&cg_proto->memory_allocated, type); +} + +static u64 tcp_read_usage(struct mem_cgroup *memcg) +{ + struct cg_proto *cg_proto; + + cg_proto = tcp_prot.proto_cgroup(memcg); + if (!cg_proto) + return atomic_long_read(&tcp_memory_allocated) << PAGE_SHIFT; + + return res_counter_read_u64(&cg_proto->memory_allocated, RES_USAGE); +} + +static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft) +{ + struct mem_cgroup *memcg = mem_cgroup_from_css(css); + u64 val; + + switch (cft->private) { + case RES_LIMIT: + val = tcp_read_stat(memcg, RES_LIMIT, RES_COUNTER_MAX); + break; + case RES_USAGE: + val = tcp_read_usage(memcg); + break; + case RES_FAILCNT: + case RES_MAX_USAGE: + val = tcp_read_stat(memcg, cft->private, 0); + break; + default: + BUG(); + } + return val; +} + +static ssize_t tcp_cgroup_reset(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) +{ + struct mem_cgroup *memcg; + struct cg_proto *cg_proto; + + memcg = mem_cgroup_from_css(of_css(of)); + cg_proto = tcp_prot.proto_cgroup(memcg); + if (!cg_proto) + return nbytes; + + switch (of_cft(of)->private) { + case RES_MAX_USAGE: + res_counter_reset_max(&cg_proto->memory_allocated); + break; + case RES_FAILCNT: + res_counter_reset_failcnt(&cg_proto->memory_allocated); + break; + } + + return nbytes; +} + +static struct cftype tcp_files[] = { + { + .name = "kmem.tcp.limit_in_bytes", + .write = tcp_cgroup_write, + .read_u64 = tcp_cgroup_read, + .private = RES_LIMIT, + }, + { + .name = "kmem.tcp.usage_in_bytes", + .read_u64 = tcp_cgroup_read, + .private = RES_USAGE, + }, + { + .name = "kmem.tcp.failcnt", + .private = RES_FAILCNT, + .write = tcp_cgroup_reset, + .read_u64 = tcp_cgroup_read, + }, + { + .name = "kmem.tcp.max_usage_in_bytes", + .private = RES_MAX_USAGE, + .write = tcp_cgroup_reset, + .read_u64 = tcp_cgroup_read, + }, + { } /* terminate */ +}; + +static int __init tcp_memcontrol_init(void) +{ + WARN_ON(cgroup_add_cftypes(&memory_cgrp_subsys, tcp_files)); + return 0; +} +__initcall(tcp_memcontrol_init); |
