aboutsummaryrefslogtreecommitdiff
path: root/net/core/netprio_cgroup.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/netprio_cgroup.c')
-rw-r--r--net/core/netprio_cgroup.c381
1 files changed, 165 insertions, 216 deletions
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 4dacc44637e..2f385b9bccc 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -9,6 +9,8 @@
* Authors: Neil Horman <nhorman@tuxdriver.com>
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/types.h>
@@ -23,230 +25,210 @@
#include <net/sock.h>
#include <net/netprio_cgroup.h>
-static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss,
- struct cgroup *cgrp);
-static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp);
-static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp);
-
-struct cgroup_subsys net_prio_subsys = {
- .name = "net_prio",
- .create = cgrp_create,
- .destroy = cgrp_destroy,
- .populate = cgrp_populate,
-#ifdef CONFIG_NETPRIO_CGROUP
- .subsys_id = net_prio_subsys_id,
-#endif
- .module = THIS_MODULE
-};
-
-#define PRIOIDX_SZ 128
+#include <linux/fdtable.h>
-static unsigned long prioidx_map[PRIOIDX_SZ];
-static DEFINE_SPINLOCK(prioidx_map_lock);
-static atomic_t max_prioidx = ATOMIC_INIT(0);
+#define PRIOMAP_MIN_SZ 128
-static inline struct cgroup_netprio_state *cgrp_netprio_state(struct cgroup *cgrp)
-{
- return container_of(cgroup_subsys_state(cgrp, net_prio_subsys_id),
- struct cgroup_netprio_state, css);
-}
-
-static int get_prioidx(u32 *prio)
+/*
+ * Extend @dev->priomap so that it's large enough to accommodate
+ * @target_idx. @dev->priomap.priomap_len > @target_idx after successful
+ * return. Must be called under rtnl lock.
+ */
+static int extend_netdev_table(struct net_device *dev, u32 target_idx)
{
- unsigned long flags;
- u32 prioidx;
-
- spin_lock_irqsave(&prioidx_map_lock, flags);
- prioidx = find_first_zero_bit(prioidx_map, sizeof(unsigned long) * PRIOIDX_SZ);
- if (prioidx == sizeof(unsigned long) * PRIOIDX_SZ) {
- spin_unlock_irqrestore(&prioidx_map_lock, flags);
- return -ENOSPC;
- }
- set_bit(prioidx, prioidx_map);
- spin_unlock_irqrestore(&prioidx_map_lock, flags);
- atomic_set(&max_prioidx, prioidx);
- *prio = prioidx;
- return 0;
-}
+ struct netprio_map *old, *new;
+ size_t new_sz, new_len;
-static void put_prioidx(u32 idx)
-{
- unsigned long flags;
+ /* is the existing priomap large enough? */
+ old = rtnl_dereference(dev->priomap);
+ if (old && old->priomap_len > target_idx)
+ return 0;
- spin_lock_irqsave(&prioidx_map_lock, flags);
- clear_bit(idx, prioidx_map);
- spin_unlock_irqrestore(&prioidx_map_lock, flags);
-}
+ /*
+ * Determine the new size. Let's keep it power-of-two. We start
+ * from PRIOMAP_MIN_SZ and double it until it's large enough to
+ * accommodate @target_idx.
+ */
+ new_sz = PRIOMAP_MIN_SZ;
+ while (true) {
+ new_len = (new_sz - offsetof(struct netprio_map, priomap)) /
+ sizeof(new->priomap[0]);
+ if (new_len > target_idx)
+ break;
+ new_sz *= 2;
+ /* overflowed? */
+ if (WARN_ON(new_sz < PRIOMAP_MIN_SZ))
+ return -ENOSPC;
+ }
-static void extend_netdev_table(struct net_device *dev, u32 new_len)
-{
- size_t new_size = sizeof(struct netprio_map) +
- ((sizeof(u32) * new_len));
- struct netprio_map *new_priomap = kzalloc(new_size, GFP_KERNEL);
- struct netprio_map *old_priomap;
- int i;
+ /* allocate & copy */
+ new = kzalloc(new_sz, GFP_KERNEL);
+ if (!new)
+ return -ENOMEM;
- old_priomap = rtnl_dereference(dev->priomap);
+ if (old)
+ memcpy(new->priomap, old->priomap,
+ old->priomap_len * sizeof(old->priomap[0]));
- if (!new_priomap) {
- printk(KERN_WARNING "Unable to alloc new priomap!\n");
- return;
- }
+ new->priomap_len = new_len;
- for (i = 0;
- old_priomap && (i < old_priomap->priomap_len);
- i++)
- new_priomap->priomap[i] = old_priomap->priomap[i];
+ /* install the new priomap */
+ rcu_assign_pointer(dev->priomap, new);
+ if (old)
+ kfree_rcu(old, rcu);
+ return 0;
+}
- new_priomap->priomap_len = new_len;
+/**
+ * netprio_prio - return the effective netprio of a cgroup-net_device pair
+ * @css: css part of the target pair
+ * @dev: net_device part of the target pair
+ *
+ * Should be called under RCU read or rtnl lock.
+ */
+static u32 netprio_prio(struct cgroup_subsys_state *css, struct net_device *dev)
+{
+ struct netprio_map *map = rcu_dereference_rtnl(dev->priomap);
+ int id = css->cgroup->id;
- rcu_assign_pointer(dev->priomap, new_priomap);
- if (old_priomap)
- kfree_rcu(old_priomap, rcu);
+ if (map && id < map->priomap_len)
+ return map->priomap[id];
+ return 0;
}
-static void update_netdev_tables(void)
+/**
+ * netprio_set_prio - set netprio on a cgroup-net_device pair
+ * @css: css part of the target pair
+ * @dev: net_device part of the target pair
+ * @prio: prio to set
+ *
+ * Set netprio to @prio on @css-@dev pair. Should be called under rtnl
+ * lock and may fail under memory pressure for non-zero @prio.
+ */
+static int netprio_set_prio(struct cgroup_subsys_state *css,
+ struct net_device *dev, u32 prio)
{
- struct net_device *dev;
- u32 max_len = atomic_read(&max_prioidx) + 1;
struct netprio_map *map;
+ int id = css->cgroup->id;
+ int ret;
- rtnl_lock();
- for_each_netdev(&init_net, dev) {
- map = rtnl_dereference(dev->priomap);
- if ((!map) ||
- (map->priomap_len < max_len))
- extend_netdev_table(dev, max_len);
- }
- rtnl_unlock();
+ /* avoid extending priomap for zero writes */
+ map = rtnl_dereference(dev->priomap);
+ if (!prio && (!map || map->priomap_len <= id))
+ return 0;
+
+ ret = extend_netdev_table(dev, id);
+ if (ret)
+ return ret;
+
+ map = rtnl_dereference(dev->priomap);
+ map->priomap[id] = prio;
+ return 0;
}
-static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss,
- struct cgroup *cgrp)
+static struct cgroup_subsys_state *
+cgrp_css_alloc(struct cgroup_subsys_state *parent_css)
{
- struct cgroup_netprio_state *cs;
- int ret;
+ struct cgroup_subsys_state *css;
- cs = kzalloc(sizeof(*cs), GFP_KERNEL);
- if (!cs)
+ css = kzalloc(sizeof(*css), GFP_KERNEL);
+ if (!css)
return ERR_PTR(-ENOMEM);
- if (cgrp->parent && cgrp_netprio_state(cgrp->parent)->prioidx) {
- kfree(cs);
- return ERR_PTR(-EINVAL);
- }
-
- ret = get_prioidx(&cs->prioidx);
- if (ret != 0) {
- printk(KERN_WARNING "No space in priority index array\n");
- kfree(cs);
- return ERR_PTR(ret);
- }
-
- return &cs->css;
+ return css;
}
-static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
+static int cgrp_css_online(struct cgroup_subsys_state *css)
{
- struct cgroup_netprio_state *cs;
+ struct cgroup_subsys_state *parent_css = css->parent;
struct net_device *dev;
- struct netprio_map *map;
+ int ret = 0;
+
+ if (!parent_css)
+ return 0;
- cs = cgrp_netprio_state(cgrp);
rtnl_lock();
+ /*
+ * Inherit prios from the parent. As all prios are set during
+ * onlining, there is no need to clear them on offline.
+ */
for_each_netdev(&init_net, dev) {
- map = rtnl_dereference(dev->priomap);
- if (map)
- map->priomap[cs->prioidx] = 0;
+ u32 prio = netprio_prio(parent_css, dev);
+
+ ret = netprio_set_prio(css, dev, prio);
+ if (ret)
+ break;
}
rtnl_unlock();
- put_prioidx(cs->prioidx);
- kfree(cs);
+ return ret;
}
-static u64 read_prioidx(struct cgroup *cgrp, struct cftype *cft)
+static void cgrp_css_free(struct cgroup_subsys_state *css)
{
- return (u64)cgrp_netprio_state(cgrp)->prioidx;
+ kfree(css);
}
-static int read_priomap(struct cgroup *cont, struct cftype *cft,
- struct cgroup_map_cb *cb)
+static u64 read_prioidx(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+ return css->cgroup->id;
+}
+
+static int read_priomap(struct seq_file *sf, void *v)
{
struct net_device *dev;
- u32 prioidx = cgrp_netprio_state(cont)->prioidx;
- u32 priority;
- struct netprio_map *map;
rcu_read_lock();
- for_each_netdev_rcu(&init_net, dev) {
- map = rcu_dereference(dev->priomap);
- priority = map ? map->priomap[prioidx] : 0;
- cb->fill(cb, dev->name, priority);
- }
+ for_each_netdev_rcu(&init_net, dev)
+ seq_printf(sf, "%s %u\n", dev->name,
+ netprio_prio(seq_css(sf), dev));
rcu_read_unlock();
return 0;
}
-static int write_priomap(struct cgroup *cgrp, struct cftype *cft,
- const char *buffer)
+static ssize_t write_priomap(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off)
{
- char *devname = kstrdup(buffer, GFP_KERNEL);
- int ret = -EINVAL;
- u32 prioidx = cgrp_netprio_state(cgrp)->prioidx;
- unsigned long priority;
- char *priostr;
+ char devname[IFNAMSIZ + 1];
struct net_device *dev;
- struct netprio_map *map;
-
- if (!devname)
- return -ENOMEM;
-
- /*
- * Minimally sized valid priomap string
- */
- if (strlen(devname) < 3)
- goto out_free_devname;
+ u32 prio;
+ int ret;
- priostr = strstr(devname, " ");
- if (!priostr)
- goto out_free_devname;
+ if (sscanf(buf, "%"__stringify(IFNAMSIZ)"s %u", devname, &prio) != 2)
+ return -EINVAL;
- /*
- *Separate the devname from the associated priority
- *and advance the priostr poitner to the priority value
- */
- *priostr = '\0';
- priostr++;
+ dev = dev_get_by_name(&init_net, devname);
+ if (!dev)
+ return -ENODEV;
- /*
- * If the priostr points to NULL, we're at the end of the passed
- * in string, and its not a valid write
- */
- if (*priostr == '\0')
- goto out_free_devname;
+ rtnl_lock();
- ret = kstrtoul(priostr, 10, &priority);
- if (ret < 0)
- goto out_free_devname;
+ ret = netprio_set_prio(of_css(of), dev, prio);
- ret = -ENODEV;
+ rtnl_unlock();
+ dev_put(dev);
+ return ret ?: nbytes;
+}
- dev = dev_get_by_name(&init_net, devname);
- if (!dev)
- goto out_free_devname;
+static int update_netprio(const void *v, struct file *file, unsigned n)
+{
+ int err;
+ struct socket *sock = sock_from_file(file, &err);
+ if (sock)
+ sock->sk->sk_cgrp_prioidx = (u32)(unsigned long)v;
+ return 0;
+}
- update_netdev_tables();
- ret = 0;
- rcu_read_lock();
- map = rcu_dereference(dev->priomap);
- if (map)
- map->priomap[prioidx] = priority;
- rcu_read_unlock();
- dev_put(dev);
+static void net_prio_attach(struct cgroup_subsys_state *css,
+ struct cgroup_taskset *tset)
+{
+ struct task_struct *p;
+ void *v = (void *)(unsigned long)css->cgroup->id;
-out_free_devname:
- kfree(devname);
- return ret;
+ cgroup_taskset_for_each(p, tset) {
+ task_lock(p);
+ iterate_fd(p->files, 0, update_netprio, v);
+ task_unlock(p);
+ }
}
static struct cftype ss_files[] = {
@@ -256,20 +238,24 @@ static struct cftype ss_files[] = {
},
{
.name = "ifpriomap",
- .read_map = read_priomap,
- .write_string = write_priomap,
+ .seq_show = read_priomap,
+ .write = write_priomap,
},
+ { } /* terminate */
};
-static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
-{
- return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files));
-}
+struct cgroup_subsys net_prio_cgrp_subsys = {
+ .css_alloc = cgrp_css_alloc,
+ .css_online = cgrp_css_online,
+ .css_free = cgrp_css_free,
+ .attach = net_prio_attach,
+ .base_cftypes = ss_files,
+};
static int netprio_device_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
- struct net_device *dev = ptr;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct netprio_map *old;
/*
@@ -294,46 +280,9 @@ static struct notifier_block netprio_device_notifier = {
static int __init init_cgroup_netprio(void)
{
- int ret;
-
- ret = cgroup_load_subsys(&net_prio_subsys);
- if (ret)
- goto out;
-#ifndef CONFIG_NETPRIO_CGROUP
- smp_wmb();
- net_prio_subsys_id = net_prio_subsys.subsys_id;
-#endif
-
register_netdevice_notifier(&netprio_device_notifier);
-
-out:
- return ret;
-}
-
-static void __exit exit_cgroup_netprio(void)
-{
- struct netprio_map *old;
- struct net_device *dev;
-
- unregister_netdevice_notifier(&netprio_device_notifier);
-
- cgroup_unload_subsys(&net_prio_subsys);
-
-#ifndef CONFIG_NETPRIO_CGROUP
- net_prio_subsys_id = -1;
- synchronize_rcu();
-#endif
-
- rtnl_lock();
- for_each_netdev(&init_net, dev) {
- old = rtnl_dereference(dev->priomap);
- RCU_INIT_POINTER(dev->priomap, NULL);
- if (old)
- kfree_rcu(old, rcu);
- }
- rtnl_unlock();
+ return 0;
}
-module_init(init_cgroup_netprio);
-module_exit(exit_cgroup_netprio);
+subsys_initcall(init_cgroup_netprio);
MODULE_LICENSE("GPL v2");