diff options
Diffstat (limited to 'net/core/net_namespace.c')
| -rw-r--r-- | net/core/net_namespace.c | 575 |
1 files changed, 467 insertions, 108 deletions
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 6f71db8c442..85b62691f4f 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -1,3 +1,5 @@ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/workqueue.h> #include <linux/rtnetlink.h> #include <linux/cache.h> @@ -5,7 +7,16 @@ #include <linux/list.h> #include <linux/delay.h> #include <linux/sched.h> +#include <linux/idr.h> +#include <linux/rculist.h> +#include <linux/nsproxy.h> +#include <linux/fs.h> +#include <linux/proc_ns.h> +#include <linux/file.h> +#include <linux/export.h> +#include <linux/user_namespace.h> #include <net/net_namespace.h> +#include <net/netns/generic.h> /* * Our network namespace constructor/destructor lists @@ -13,93 +24,149 @@ static LIST_HEAD(pernet_list); static struct list_head *first_device = &pernet_list; -static DEFINE_MUTEX(net_mutex); +DEFINE_MUTEX(net_mutex); LIST_HEAD(net_namespace_list); +EXPORT_SYMBOL_GPL(net_namespace_list); -static struct kmem_cache *net_cachep; +struct net init_net = { + .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head), +}; +EXPORT_SYMBOL(init_net); -struct net init_net; -EXPORT_SYMBOL_GPL(init_net); +#define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ -static struct net *net_alloc(void) -{ - return kmem_cache_zalloc(net_cachep, GFP_KERNEL); -} +static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS; -static void net_free(struct net *net) +static struct net_generic *net_alloc_generic(void) { - if (!net) - return; + struct net_generic *ng; + size_t generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]); - if (unlikely(atomic_read(&net->use_count) != 0)) { - printk(KERN_EMERG "network namespace not free! Usage: %d\n", - atomic_read(&net->use_count)); - return; - } + ng = kzalloc(generic_size, GFP_KERNEL); + if (ng) + ng->len = max_gen_ptrs; - kmem_cache_free(net_cachep, net); + return ng; } -static void cleanup_net(struct work_struct *work) +static int net_assign_generic(struct net *net, int id, void *data) { - struct pernet_operations *ops; - struct net *net; + struct net_generic *ng, *old_ng; + + BUG_ON(!mutex_is_locked(&net_mutex)); + BUG_ON(id == 0); + + old_ng = rcu_dereference_protected(net->gen, + lockdep_is_held(&net_mutex)); + ng = old_ng; + if (old_ng->len >= id) + goto assign; + + ng = net_alloc_generic(); + if (ng == NULL) + return -ENOMEM; + + /* + * Some synchronisation notes: + * + * The net_generic explores the net->gen array inside rcu + * read section. Besides once set the net->gen->ptr[x] + * pointer never changes (see rules in netns/generic.h). + * + * That said, we simply duplicate this array and schedule + * the old copy for kfree after a grace period. + */ - net = container_of(work, struct net, work); + memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*)); - mutex_lock(&net_mutex); + rcu_assign_pointer(net->gen, ng); + kfree_rcu(old_ng, rcu); +assign: + ng->ptr[id - 1] = data; + return 0; +} - /* Don't let anyone else find us. */ - rtnl_lock(); - list_del(&net->list); - rtnl_unlock(); +static int ops_init(const struct pernet_operations *ops, struct net *net) +{ + int err = -ENOMEM; + void *data = NULL; - /* Run all of the network namespace exit methods */ - list_for_each_entry_reverse(ops, &pernet_list, list) { - if (ops->exit) - ops->exit(net); + if (ops->id && ops->size) { + data = kzalloc(ops->size, GFP_KERNEL); + if (!data) + goto out; + + err = net_assign_generic(net, *ops->id, data); + if (err) + goto cleanup; } + err = 0; + if (ops->init) + err = ops->init(net); + if (!err) + return 0; - mutex_unlock(&net_mutex); +cleanup: + kfree(data); - /* Ensure there are no outstanding rcu callbacks using this - * network namespace. - */ - rcu_barrier(); +out: + return err; +} - /* Finally it is safe to free my network namespace structure */ - net_free(net); +static void ops_free(const struct pernet_operations *ops, struct net *net) +{ + if (ops->id && ops->size) { + int id = *ops->id; + kfree(net_generic(net, id)); + } } +static void ops_exit_list(const struct pernet_operations *ops, + struct list_head *net_exit_list) +{ + struct net *net; + if (ops->exit) { + list_for_each_entry(net, net_exit_list, exit_list) + ops->exit(net); + } + if (ops->exit_batch) + ops->exit_batch(net_exit_list); +} -void __put_net(struct net *net) +static void ops_free_list(const struct pernet_operations *ops, + struct list_head *net_exit_list) { - /* Cleanup the network namespace in process context */ - INIT_WORK(&net->work, cleanup_net); - schedule_work(&net->work); + struct net *net; + if (ops->size && ops->id) { + list_for_each_entry(net, net_exit_list, exit_list) + ops_free(ops, net); + } } -EXPORT_SYMBOL_GPL(__put_net); /* * setup_net runs the initializers for the network namespace object. */ -static int setup_net(struct net *net) +static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) { /* Must be called with net_mutex held */ - struct pernet_operations *ops; - int error; + const struct pernet_operations *ops, *saved_ops; + int error = 0; + LIST_HEAD(net_exit_list); atomic_set(&net->count, 1); + atomic_set(&net->passive, 1); + net->dev_base_seq = 1; + net->user_ns = user_ns; + +#ifdef NETNS_REFCNT_DEBUG atomic_set(&net->use_count, 0); +#endif - error = 0; list_for_each_entry(ops, &pernet_list, list) { - if (ops->init) { - error = ops->init(net); - if (error < 0) - goto out_undo; - } + error = ops_init(ops, net); + if (error < 0) + goto out_undo; } out: return error; @@ -108,116 +175,359 @@ out_undo: /* Walk through the list backwards calling the exit functions * for the pernet modules whose init functions did not fail. */ - list_for_each_entry_continue_reverse(ops, &pernet_list, list) { - if (ops->exit) - ops->exit(net); - } + list_add(&net->exit_list, &net_exit_list); + saved_ops = ops; + list_for_each_entry_continue_reverse(ops, &pernet_list, list) + ops_exit_list(ops, &net_exit_list); + + ops = saved_ops; + list_for_each_entry_continue_reverse(ops, &pernet_list, list) + ops_free_list(ops, &net_exit_list); + + rcu_barrier(); goto out; } -struct net *copy_net_ns(unsigned long flags, struct net *old_net) + +#ifdef CONFIG_NET_NS +static struct kmem_cache *net_cachep; +static struct workqueue_struct *netns_wq; + +static struct net *net_alloc(void) { - struct net *new_net = NULL; - int err; + struct net *net = NULL; + struct net_generic *ng; - get_net(old_net); + ng = net_alloc_generic(); + if (!ng) + goto out; - if (!(flags & CLONE_NEWNET)) - return old_net; + net = kmem_cache_zalloc(net_cachep, GFP_KERNEL); + if (!net) + goto out_free; -#ifndef CONFIG_NET_NS - return ERR_PTR(-EINVAL); + rcu_assign_pointer(net->gen, ng); +out: + return net; + +out_free: + kfree(ng); + goto out; +} + +static void net_free(struct net *net) +{ +#ifdef NETNS_REFCNT_DEBUG + if (unlikely(atomic_read(&net->use_count) != 0)) { + pr_emerg("network namespace not free! Usage: %d\n", + atomic_read(&net->use_count)); + return; + } #endif + kfree(net->gen); + kmem_cache_free(net_cachep, net); +} - err = -ENOMEM; - new_net = net_alloc(); - if (!new_net) - goto out; +void net_drop_ns(void *p) +{ + struct net *ns = p; + if (ns && atomic_dec_and_test(&ns->passive)) + net_free(ns); +} + +struct net *copy_net_ns(unsigned long flags, + struct user_namespace *user_ns, struct net *old_net) +{ + struct net *net; + int rv; + + if (!(flags & CLONE_NEWNET)) + return get_net(old_net); + + net = net_alloc(); + if (!net) + return ERR_PTR(-ENOMEM); + + get_user_ns(user_ns); + + mutex_lock(&net_mutex); + rv = setup_net(net, user_ns); + if (rv == 0) { + rtnl_lock(); + list_add_tail_rcu(&net->list, &net_namespace_list); + rtnl_unlock(); + } + mutex_unlock(&net_mutex); + if (rv < 0) { + put_user_ns(user_ns); + net_drop_ns(net); + return ERR_PTR(rv); + } + return net; +} + +static DEFINE_SPINLOCK(cleanup_list_lock); +static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */ + +static void cleanup_net(struct work_struct *work) +{ + const struct pernet_operations *ops; + struct net *net, *tmp; + struct list_head net_kill_list; + LIST_HEAD(net_exit_list); + + /* Atomically snapshot the list of namespaces to cleanup */ + spin_lock_irq(&cleanup_list_lock); + list_replace_init(&cleanup_list, &net_kill_list); + spin_unlock_irq(&cleanup_list_lock); mutex_lock(&net_mutex); - err = setup_net(new_net); - if (err) - goto out_unlock; + /* Don't let anyone else find us. */ rtnl_lock(); - list_add_tail(&new_net->list, &net_namespace_list); + list_for_each_entry(net, &net_kill_list, cleanup_list) { + list_del_rcu(&net->list); + list_add_tail(&net->exit_list, &net_exit_list); + } rtnl_unlock(); + /* + * Another CPU might be rcu-iterating the list, wait for it. + * This needs to be before calling the exit() notifiers, so + * the rcu_barrier() below isn't sufficient alone. + */ + synchronize_rcu(); + + /* Run all of the network namespace exit methods */ + list_for_each_entry_reverse(ops, &pernet_list, list) + ops_exit_list(ops, &net_exit_list); + + /* Free the net generic variables */ + list_for_each_entry_reverse(ops, &pernet_list, list) + ops_free_list(ops, &net_exit_list); -out_unlock: mutex_unlock(&net_mutex); -out: - put_net(old_net); - if (err) { - net_free(new_net); - new_net = ERR_PTR(err); + + /* Ensure there are no outstanding rcu callbacks using this + * network namespace. + */ + rcu_barrier(); + + /* Finally it is safe to free my network namespace structure */ + list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { + list_del_init(&net->exit_list); + put_user_ns(net->user_ns); + net_drop_ns(net); + } +} +static DECLARE_WORK(net_cleanup_work, cleanup_net); + +void __put_net(struct net *net) +{ + /* Cleanup the network namespace in process context */ + unsigned long flags; + + spin_lock_irqsave(&cleanup_list_lock, flags); + list_add(&net->cleanup_list, &cleanup_list); + spin_unlock_irqrestore(&cleanup_list_lock, flags); + + queue_work(netns_wq, &net_cleanup_work); +} +EXPORT_SYMBOL_GPL(__put_net); + +struct net *get_net_ns_by_fd(int fd) +{ + struct proc_ns *ei; + struct file *file; + struct net *net; + + file = proc_ns_fget(fd); + if (IS_ERR(file)) + return ERR_CAST(file); + + ei = get_proc_ns(file_inode(file)); + if (ei->ns_ops == &netns_operations) + net = get_net(ei->ns); + else + net = ERR_PTR(-EINVAL); + + fput(file); + return net; +} + +#else +struct net *get_net_ns_by_fd(int fd) +{ + return ERR_PTR(-EINVAL); +} +#endif + +struct net *get_net_ns_by_pid(pid_t pid) +{ + struct task_struct *tsk; + struct net *net; + + /* Lookup the network namespace */ + net = ERR_PTR(-ESRCH); + rcu_read_lock(); + tsk = find_task_by_vpid(pid); + if (tsk) { + struct nsproxy *nsproxy; + nsproxy = task_nsproxy(tsk); + if (nsproxy) + net = get_net(nsproxy->net_ns); } - return new_net; + rcu_read_unlock(); + return net; } +EXPORT_SYMBOL_GPL(get_net_ns_by_pid); + +static __net_init int net_ns_net_init(struct net *net) +{ + return proc_alloc_inum(&net->proc_inum); +} + +static __net_exit void net_ns_net_exit(struct net *net) +{ + proc_free_inum(net->proc_inum); +} + +static struct pernet_operations __net_initdata net_ns_ops = { + .init = net_ns_net_init, + .exit = net_ns_net_exit, +}; static int __init net_ns_init(void) { - int err; + struct net_generic *ng; - printk(KERN_INFO "net_namespace: %zd bytes\n", sizeof(struct net)); +#ifdef CONFIG_NET_NS net_cachep = kmem_cache_create("net_namespace", sizeof(struct net), SMP_CACHE_BYTES, SLAB_PANIC, NULL); + + /* Create workqueue for cleanup */ + netns_wq = create_singlethread_workqueue("netns"); + if (!netns_wq) + panic("Could not create netns workq"); +#endif + + ng = net_alloc_generic(); + if (!ng) + panic("Could not allocate generic netns"); + + rcu_assign_pointer(init_net.gen, ng); + mutex_lock(&net_mutex); - err = setup_net(&init_net); + if (setup_net(&init_net, &init_user_ns)) + panic("Could not setup the initial network namespace"); rtnl_lock(); - list_add_tail(&init_net.list, &net_namespace_list); + list_add_tail_rcu(&init_net.list, &net_namespace_list); rtnl_unlock(); mutex_unlock(&net_mutex); - if (err) - panic("Could not setup the initial network namespace"); + + register_pernet_subsys(&net_ns_ops); return 0; } pure_initcall(net_ns_init); -static int register_pernet_operations(struct list_head *list, - struct pernet_operations *ops) +#ifdef CONFIG_NET_NS +static int __register_pernet_operations(struct list_head *list, + struct pernet_operations *ops) { - struct net *net, *undo_net; + struct net *net; int error; + LIST_HEAD(net_exit_list); - error = 0; list_add_tail(&ops->list, list); - for_each_net(net) { - if (ops->init) { - error = ops->init(net); + if (ops->init || (ops->id && ops->size)) { + for_each_net(net) { + error = ops_init(ops, net); if (error) goto out_undo; + list_add_tail(&net->exit_list, &net_exit_list); } } -out: - return error; + return 0; out_undo: /* If I have an error cleanup all namespaces I initialized */ list_del(&ops->list); - for_each_net(undo_net) { - if (undo_net == net) - goto undone; - if (ops->exit) - ops->exit(undo_net); - } -undone: - goto out; + ops_exit_list(ops, &net_exit_list); + ops_free_list(ops, &net_exit_list); + return error; } -static void unregister_pernet_operations(struct pernet_operations *ops) +static void __unregister_pernet_operations(struct pernet_operations *ops) { struct net *net; + LIST_HEAD(net_exit_list); list_del(&ops->list); for_each_net(net) - if (ops->exit) - ops->exit(net); + list_add_tail(&net->exit_list, &net_exit_list); + ops_exit_list(ops, &net_exit_list); + ops_free_list(ops, &net_exit_list); +} + +#else + +static int __register_pernet_operations(struct list_head *list, + struct pernet_operations *ops) +{ + return ops_init(ops, &init_net); +} + +static void __unregister_pernet_operations(struct pernet_operations *ops) +{ + LIST_HEAD(net_exit_list); + list_add(&init_net.exit_list, &net_exit_list); + ops_exit_list(ops, &net_exit_list); + ops_free_list(ops, &net_exit_list); +} + +#endif /* CONFIG_NET_NS */ + +static DEFINE_IDA(net_generic_ids); + +static int register_pernet_operations(struct list_head *list, + struct pernet_operations *ops) +{ + int error; + + if (ops->id) { +again: + error = ida_get_new_above(&net_generic_ids, 1, ops->id); + if (error < 0) { + if (error == -EAGAIN) { + ida_pre_get(&net_generic_ids, GFP_KERNEL); + goto again; + } + return error; + } + max_gen_ptrs = max_t(unsigned int, max_gen_ptrs, *ops->id); + } + error = __register_pernet_operations(list, ops); + if (error) { + rcu_barrier(); + if (ops->id) + ida_remove(&net_generic_ids, *ops->id); + } + + return error; +} + +static void unregister_pernet_operations(struct pernet_operations *ops) +{ + + __unregister_pernet_operations(ops); + rcu_barrier(); + if (ops->id) + ida_remove(&net_generic_ids, *ops->id); } /** @@ -254,14 +564,14 @@ EXPORT_SYMBOL_GPL(register_pernet_subsys); * @ops: pernet operations structure to manipulate * * Remove the pernet operations structure from the list to be - * used when network namespaces are created or destoryed. In + * used when network namespaces are created or destroyed. In * addition run the exit method for all existing network * namespaces. */ -void unregister_pernet_subsys(struct pernet_operations *module) +void unregister_pernet_subsys(struct pernet_operations *ops) { mutex_lock(&net_mutex); - unregister_pernet_operations(module); + unregister_pernet_operations(ops); mutex_unlock(&net_mutex); } EXPORT_SYMBOL_GPL(unregister_pernet_subsys); @@ -302,7 +612,7 @@ EXPORT_SYMBOL_GPL(register_pernet_device); * @ops: pernet operations structure to manipulate * * Remove the pernet operations structure from the list to be - * used when network namespaces are created or destoryed. In + * used when network namespaces are created or destroyed. In * addition run the exit method for all existing network * namespaces. */ @@ -315,3 +625,52 @@ void unregister_pernet_device(struct pernet_operations *ops) mutex_unlock(&net_mutex); } EXPORT_SYMBOL_GPL(unregister_pernet_device); + +#ifdef CONFIG_NET_NS +static void *netns_get(struct task_struct *task) +{ + struct net *net = NULL; + struct nsproxy *nsproxy; + + rcu_read_lock(); + nsproxy = task_nsproxy(task); + if (nsproxy) + net = get_net(nsproxy->net_ns); + rcu_read_unlock(); + + return net; +} + +static void netns_put(void *ns) +{ + put_net(ns); +} + +static int netns_install(struct nsproxy *nsproxy, void *ns) +{ + struct net *net = ns; + + if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) || + !ns_capable(current_user_ns(), CAP_SYS_ADMIN)) + return -EPERM; + + put_net(nsproxy->net_ns); + nsproxy->net_ns = get_net(net); + return 0; +} + +static unsigned int netns_inum(void *ns) +{ + struct net *net = ns; + return net->proc_inum; +} + +const struct proc_ns_operations netns_operations = { + .name = "net", + .type = CLONE_NEWNET, + .get = netns_get, + .put = netns_put, + .install = netns_install, + .inum = netns_inum, +}; +#endif |
