diff options
Diffstat (limited to 'net/core/net_namespace.c')
| -rw-r--r-- | net/core/net_namespace.c | 512 |
1 files changed, 419 insertions, 93 deletions
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 7b660834a4c..85b62691f4f 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -1,3 +1,5 @@ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/workqueue.h> #include <linux/rtnetlink.h> #include <linux/cache.h> @@ -5,7 +7,16 @@ #include <linux/list.h> #include <linux/delay.h> #include <linux/sched.h> +#include <linux/idr.h> +#include <linux/rculist.h> +#include <linux/nsproxy.h> +#include <linux/fs.h> +#include <linux/proc_ns.h> +#include <linux/file.h> +#include <linux/export.h> +#include <linux/user_namespace.h> #include <net/net_namespace.h> +#include <net/netns/generic.h> /* * Our network namespace constructor/destructor lists @@ -13,32 +24,149 @@ static LIST_HEAD(pernet_list); static struct list_head *first_device = &pernet_list; -static DEFINE_MUTEX(net_mutex); +DEFINE_MUTEX(net_mutex); LIST_HEAD(net_namespace_list); +EXPORT_SYMBOL_GPL(net_namespace_list); -struct net init_net; +struct net init_net = { + .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head), +}; EXPORT_SYMBOL(init_net); +#define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ + +static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS; + +static struct net_generic *net_alloc_generic(void) +{ + struct net_generic *ng; + size_t generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]); + + ng = kzalloc(generic_size, GFP_KERNEL); + if (ng) + ng->len = max_gen_ptrs; + + return ng; +} + +static int net_assign_generic(struct net *net, int id, void *data) +{ + struct net_generic *ng, *old_ng; + + BUG_ON(!mutex_is_locked(&net_mutex)); + BUG_ON(id == 0); + + old_ng = rcu_dereference_protected(net->gen, + lockdep_is_held(&net_mutex)); + ng = old_ng; + if (old_ng->len >= id) + goto assign; + + ng = net_alloc_generic(); + if (ng == NULL) + return -ENOMEM; + + /* + * Some synchronisation notes: + * + * The net_generic explores the net->gen array inside rcu + * read section. Besides once set the net->gen->ptr[x] + * pointer never changes (see rules in netns/generic.h). + * + * That said, we simply duplicate this array and schedule + * the old copy for kfree after a grace period. + */ + + memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*)); + + rcu_assign_pointer(net->gen, ng); + kfree_rcu(old_ng, rcu); +assign: + ng->ptr[id - 1] = data; + return 0; +} + +static int ops_init(const struct pernet_operations *ops, struct net *net) +{ + int err = -ENOMEM; + void *data = NULL; + + if (ops->id && ops->size) { + data = kzalloc(ops->size, GFP_KERNEL); + if (!data) + goto out; + + err = net_assign_generic(net, *ops->id, data); + if (err) + goto cleanup; + } + err = 0; + if (ops->init) + err = ops->init(net); + if (!err) + return 0; + +cleanup: + kfree(data); + +out: + return err; +} + +static void ops_free(const struct pernet_operations *ops, struct net *net) +{ + if (ops->id && ops->size) { + int id = *ops->id; + kfree(net_generic(net, id)); + } +} + +static void ops_exit_list(const struct pernet_operations *ops, + struct list_head *net_exit_list) +{ + struct net *net; + if (ops->exit) { + list_for_each_entry(net, net_exit_list, exit_list) + ops->exit(net); + } + if (ops->exit_batch) + ops->exit_batch(net_exit_list); +} + +static void ops_free_list(const struct pernet_operations *ops, + struct list_head *net_exit_list) +{ + struct net *net; + if (ops->size && ops->id) { + list_for_each_entry(net, net_exit_list, exit_list) + ops_free(ops, net); + } +} + /* * setup_net runs the initializers for the network namespace object. */ -static __net_init int setup_net(struct net *net) +static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) { /* Must be called with net_mutex held */ - struct pernet_operations *ops; - int error; + const struct pernet_operations *ops, *saved_ops; + int error = 0; + LIST_HEAD(net_exit_list); atomic_set(&net->count, 1); + atomic_set(&net->passive, 1); + net->dev_base_seq = 1; + net->user_ns = user_ns; + +#ifdef NETNS_REFCNT_DEBUG atomic_set(&net->use_count, 0); +#endif - error = 0; list_for_each_entry(ops, &pernet_list, list) { - if (ops->init) { - error = ops->init(net); - if (error < 0) - goto out_undo; - } + error = ops_init(ops, net); + if (error < 0) + goto out_undo; } out: return error; @@ -47,93 +175,136 @@ out_undo: /* Walk through the list backwards calling the exit functions * for the pernet modules whose init functions did not fail. */ - list_for_each_entry_continue_reverse(ops, &pernet_list, list) { - if (ops->exit) - ops->exit(net); - } + list_add(&net->exit_list, &net_exit_list); + saved_ops = ops; + list_for_each_entry_continue_reverse(ops, &pernet_list, list) + ops_exit_list(ops, &net_exit_list); + + ops = saved_ops; + list_for_each_entry_continue_reverse(ops, &pernet_list, list) + ops_free_list(ops, &net_exit_list); rcu_barrier(); goto out; } + #ifdef CONFIG_NET_NS static struct kmem_cache *net_cachep; static struct workqueue_struct *netns_wq; static struct net *net_alloc(void) { - return kmem_cache_zalloc(net_cachep, GFP_KERNEL); + struct net *net = NULL; + struct net_generic *ng; + + ng = net_alloc_generic(); + if (!ng) + goto out; + + net = kmem_cache_zalloc(net_cachep, GFP_KERNEL); + if (!net) + goto out_free; + + rcu_assign_pointer(net->gen, ng); +out: + return net; + +out_free: + kfree(ng); + goto out; } static void net_free(struct net *net) { - if (!net) - return; - +#ifdef NETNS_REFCNT_DEBUG if (unlikely(atomic_read(&net->use_count) != 0)) { - printk(KERN_EMERG "network namespace not free! Usage: %d\n", - atomic_read(&net->use_count)); + pr_emerg("network namespace not free! Usage: %d\n", + atomic_read(&net->use_count)); return; } - +#endif + kfree(net->gen); kmem_cache_free(net_cachep, net); } -struct net *copy_net_ns(unsigned long flags, struct net *old_net) +void net_drop_ns(void *p) { - struct net *new_net = NULL; - int err; + struct net *ns = p; + if (ns && atomic_dec_and_test(&ns->passive)) + net_free(ns); +} - get_net(old_net); +struct net *copy_net_ns(unsigned long flags, + struct user_namespace *user_ns, struct net *old_net) +{ + struct net *net; + int rv; if (!(flags & CLONE_NEWNET)) - return old_net; - - err = -ENOMEM; - new_net = net_alloc(); - if (!new_net) - goto out; - - mutex_lock(&net_mutex); - err = setup_net(new_net); - if (err) - goto out_unlock; + return get_net(old_net); - rtnl_lock(); - list_add_tail(&new_net->list, &net_namespace_list); - rtnl_unlock(); + net = net_alloc(); + if (!net) + return ERR_PTR(-ENOMEM); + get_user_ns(user_ns); -out_unlock: + mutex_lock(&net_mutex); + rv = setup_net(net, user_ns); + if (rv == 0) { + rtnl_lock(); + list_add_tail_rcu(&net->list, &net_namespace_list); + rtnl_unlock(); + } mutex_unlock(&net_mutex); -out: - put_net(old_net); - if (err) { - net_free(new_net); - new_net = ERR_PTR(err); + if (rv < 0) { + put_user_ns(user_ns); + net_drop_ns(net); + return ERR_PTR(rv); } - return new_net; + return net; } +static DEFINE_SPINLOCK(cleanup_list_lock); +static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */ + static void cleanup_net(struct work_struct *work) { - struct pernet_operations *ops; - struct net *net; + const struct pernet_operations *ops; + struct net *net, *tmp; + struct list_head net_kill_list; + LIST_HEAD(net_exit_list); - net = container_of(work, struct net, work); + /* Atomically snapshot the list of namespaces to cleanup */ + spin_lock_irq(&cleanup_list_lock); + list_replace_init(&cleanup_list, &net_kill_list); + spin_unlock_irq(&cleanup_list_lock); mutex_lock(&net_mutex); /* Don't let anyone else find us. */ rtnl_lock(); - list_del(&net->list); + list_for_each_entry(net, &net_kill_list, cleanup_list) { + list_del_rcu(&net->list); + list_add_tail(&net->exit_list, &net_exit_list); + } rtnl_unlock(); + /* + * Another CPU might be rcu-iterating the list, wait for it. + * This needs to be before calling the exit() notifiers, so + * the rcu_barrier() below isn't sufficient alone. + */ + synchronize_rcu(); + /* Run all of the network namespace exit methods */ - list_for_each_entry_reverse(ops, &pernet_list, list) { - if (ops->exit) - ops->exit(net); - } + list_for_each_entry_reverse(ops, &pernet_list, list) + ops_exit_list(ops, &net_exit_list); + + /* Free the net generic variables */ + list_for_each_entry_reverse(ops, &pernet_list, list) + ops_free_list(ops, &net_exit_list); mutex_unlock(&net_mutex); @@ -143,31 +314,93 @@ static void cleanup_net(struct work_struct *work) rcu_barrier(); /* Finally it is safe to free my network namespace structure */ - net_free(net); + list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { + list_del_init(&net->exit_list); + put_user_ns(net->user_ns); + net_drop_ns(net); + } } +static DECLARE_WORK(net_cleanup_work, cleanup_net); void __put_net(struct net *net) { /* Cleanup the network namespace in process context */ - INIT_WORK(&net->work, cleanup_net); - queue_work(netns_wq, &net->work); + unsigned long flags; + + spin_lock_irqsave(&cleanup_list_lock, flags); + list_add(&net->cleanup_list, &cleanup_list); + spin_unlock_irqrestore(&cleanup_list_lock, flags); + + queue_work(netns_wq, &net_cleanup_work); } EXPORT_SYMBOL_GPL(__put_net); +struct net *get_net_ns_by_fd(int fd) +{ + struct proc_ns *ei; + struct file *file; + struct net *net; + + file = proc_ns_fget(fd); + if (IS_ERR(file)) + return ERR_CAST(file); + + ei = get_proc_ns(file_inode(file)); + if (ei->ns_ops == &netns_operations) + net = get_net(ei->ns); + else + net = ERR_PTR(-EINVAL); + + fput(file); + return net; +} + #else -struct net *copy_net_ns(unsigned long flags, struct net *old_net) +struct net *get_net_ns_by_fd(int fd) { - if (flags & CLONE_NEWNET) - return ERR_PTR(-EINVAL); - return old_net; + return ERR_PTR(-EINVAL); } #endif +struct net *get_net_ns_by_pid(pid_t pid) +{ + struct task_struct *tsk; + struct net *net; + + /* Lookup the network namespace */ + net = ERR_PTR(-ESRCH); + rcu_read_lock(); + tsk = find_task_by_vpid(pid); + if (tsk) { + struct nsproxy *nsproxy; + nsproxy = task_nsproxy(tsk); + if (nsproxy) + net = get_net(nsproxy->net_ns); + } + rcu_read_unlock(); + return net; +} +EXPORT_SYMBOL_GPL(get_net_ns_by_pid); + +static __net_init int net_ns_net_init(struct net *net) +{ + return proc_alloc_inum(&net->proc_inum); +} + +static __net_exit void net_ns_net_exit(struct net *net) +{ + proc_free_inum(net->proc_inum); +} + +static struct pernet_operations __net_initdata net_ns_ops = { + .init = net_ns_net_init, + .exit = net_ns_net_exit, +}; + static int __init net_ns_init(void) { - int err; + struct net_generic *ng; - printk(KERN_INFO "net_namespace: %zd bytes\n", sizeof(struct net)); #ifdef CONFIG_NET_NS net_cachep = kmem_cache_create("net_namespace", sizeof(struct net), SMP_CACHE_BYTES, @@ -179,16 +412,23 @@ static int __init net_ns_init(void) panic("Could not create netns workq"); #endif + ng = net_alloc_generic(); + if (!ng) + panic("Could not allocate generic netns"); + + rcu_assign_pointer(init_net.gen, ng); + mutex_lock(&net_mutex); - err = setup_net(&init_net); + if (setup_net(&init_net, &init_user_ns)) + panic("Could not setup the initial network namespace"); rtnl_lock(); - list_add_tail(&init_net.list, &net_namespace_list); + list_add_tail_rcu(&init_net.list, &net_namespace_list); rtnl_unlock(); mutex_unlock(&net_mutex); - if (err) - panic("Could not setup the initial network namespace"); + + register_pernet_subsys(&net_ns_ops); return 0; } @@ -196,18 +436,20 @@ static int __init net_ns_init(void) pure_initcall(net_ns_init); #ifdef CONFIG_NET_NS -static int register_pernet_operations(struct list_head *list, - struct pernet_operations *ops) +static int __register_pernet_operations(struct list_head *list, + struct pernet_operations *ops) { - struct net *net, *undo_net; + struct net *net; int error; + LIST_HEAD(net_exit_list); list_add_tail(&ops->list, list); - if (ops->init) { + if (ops->init || (ops->id && ops->size)) { for_each_net(net) { - error = ops->init(net); + error = ops_init(ops, net); if (error) goto out_undo; + list_add_tail(&net->exit_list, &net_exit_list); } } return 0; @@ -215,43 +457,78 @@ static int register_pernet_operations(struct list_head *list, out_undo: /* If I have an error cleanup all namespaces I initialized */ list_del(&ops->list); - if (ops->exit) { - for_each_net(undo_net) { - if (undo_net == net) - goto undone; - ops->exit(undo_net); - } - } -undone: + ops_exit_list(ops, &net_exit_list); + ops_free_list(ops, &net_exit_list); return error; } -static void unregister_pernet_operations(struct pernet_operations *ops) +static void __unregister_pernet_operations(struct pernet_operations *ops) { struct net *net; + LIST_HEAD(net_exit_list); list_del(&ops->list); - if (ops->exit) - for_each_net(net) - ops->exit(net); + for_each_net(net) + list_add_tail(&net->exit_list, &net_exit_list); + ops_exit_list(ops, &net_exit_list); + ops_free_list(ops, &net_exit_list); } #else +static int __register_pernet_operations(struct list_head *list, + struct pernet_operations *ops) +{ + return ops_init(ops, &init_net); +} + +static void __unregister_pernet_operations(struct pernet_operations *ops) +{ + LIST_HEAD(net_exit_list); + list_add(&init_net.exit_list, &net_exit_list); + ops_exit_list(ops, &net_exit_list); + ops_free_list(ops, &net_exit_list); +} + +#endif /* CONFIG_NET_NS */ + +static DEFINE_IDA(net_generic_ids); + static int register_pernet_operations(struct list_head *list, struct pernet_operations *ops) { - if (ops->init == NULL) - return 0; - return ops->init(&init_net); + int error; + + if (ops->id) { +again: + error = ida_get_new_above(&net_generic_ids, 1, ops->id); + if (error < 0) { + if (error == -EAGAIN) { + ida_pre_get(&net_generic_ids, GFP_KERNEL); + goto again; + } + return error; + } + max_gen_ptrs = max_t(unsigned int, max_gen_ptrs, *ops->id); + } + error = __register_pernet_operations(list, ops); + if (error) { + rcu_barrier(); + if (ops->id) + ida_remove(&net_generic_ids, *ops->id); + } + + return error; } static void unregister_pernet_operations(struct pernet_operations *ops) { - if (ops->exit) - ops->exit(&init_net); + + __unregister_pernet_operations(ops); + rcu_barrier(); + if (ops->id) + ida_remove(&net_generic_ids, *ops->id); } -#endif /** * register_pernet_subsys - register a network namespace subsystem @@ -291,10 +568,10 @@ EXPORT_SYMBOL_GPL(register_pernet_subsys); * addition run the exit method for all existing network * namespaces. */ -void unregister_pernet_subsys(struct pernet_operations *module) +void unregister_pernet_subsys(struct pernet_operations *ops) { mutex_lock(&net_mutex); - unregister_pernet_operations(module); + unregister_pernet_operations(ops); mutex_unlock(&net_mutex); } EXPORT_SYMBOL_GPL(unregister_pernet_subsys); @@ -348,3 +625,52 @@ void unregister_pernet_device(struct pernet_operations *ops) mutex_unlock(&net_mutex); } EXPORT_SYMBOL_GPL(unregister_pernet_device); + +#ifdef CONFIG_NET_NS +static void *netns_get(struct task_struct *task) +{ + struct net *net = NULL; + struct nsproxy *nsproxy; + + rcu_read_lock(); + nsproxy = task_nsproxy(task); + if (nsproxy) + net = get_net(nsproxy->net_ns); + rcu_read_unlock(); + + return net; +} + +static void netns_put(void *ns) +{ + put_net(ns); +} + +static int netns_install(struct nsproxy *nsproxy, void *ns) +{ + struct net *net = ns; + + if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) || + !ns_capable(current_user_ns(), CAP_SYS_ADMIN)) + return -EPERM; + + put_net(nsproxy->net_ns); + nsproxy->net_ns = get_net(net); + return 0; +} + +static unsigned int netns_inum(void *ns) +{ + struct net *net = ns; + return net->proc_inum; +} + +const struct proc_ns_operations netns_operations = { + .name = "net", + .type = CLONE_NEWNET, + .get = netns_get, + .put = netns_put, + .install = netns_install, + .inum = netns_inum, +}; +#endif |
