diff options
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/dev.c | 44 | ||||
-rw-r--r-- | net/core/dev_mcast.c | 37 | ||||
-rw-r--r-- | net/core/dst.c | 15 | ||||
-rw-r--r-- | net/core/ethtool.c | 64 | ||||
-rw-r--r-- | net/core/fib_rules.c | 13 | ||||
-rw-r--r-- | net/core/filter.c | 57 | ||||
-rw-r--r-- | net/core/neighbour.c | 171 | ||||
-rw-r--r-- | net/core/net_namespace.c | 104 | ||||
-rw-r--r-- | net/core/netpoll.c | 6 | ||||
-rw-r--r-- | net/core/pktgen.c | 2 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 45 | ||||
-rw-r--r-- | net/core/skbuff.c | 100 | ||||
-rw-r--r-- | net/core/sock.c | 153 | ||||
-rw-r--r-- | net/core/sysctl_net_core.c | 12 |
14 files changed, 598 insertions, 225 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 460e7f99ce3..e1df1ab3e04 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -216,7 +216,7 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) /* Device list insertion */ static int list_netdevice(struct net_device *dev) { - struct net *net = dev->nd_net; + struct net *net = dev_net(dev); ASSERT_RTNL(); @@ -852,8 +852,8 @@ int dev_alloc_name(struct net_device *dev, const char *name) struct net *net; int ret; - BUG_ON(!dev->nd_net); - net = dev->nd_net; + BUG_ON(!dev_net(dev)); + net = dev_net(dev); ret = __dev_alloc_name(net, name, buf); if (ret >= 0) strlcpy(dev->name, buf, IFNAMSIZ); @@ -877,9 +877,9 @@ int dev_change_name(struct net_device *dev, char *newname) struct net *net; ASSERT_RTNL(); - BUG_ON(!dev->nd_net); + BUG_ON(!dev_net(dev)); - net = dev->nd_net; + net = dev_net(dev); if (dev->flags & IFF_UP) return -EBUSY; @@ -2615,7 +2615,7 @@ static int ptype_seq_show(struct seq_file *seq, void *v) if (v == SEQ_START_TOKEN) seq_puts(seq, "Type Device Function\n"); - else { + else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) { if (pt->type == htons(ETH_P_ALL)) seq_puts(seq, "ALL "); else @@ -2639,7 +2639,8 @@ static const struct seq_operations ptype_seq_ops = { static int ptype_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &ptype_seq_ops); + return seq_open_net(inode, file, &ptype_seq_ops, + sizeof(struct seq_net_private)); } static const struct file_operations ptype_seq_fops = { @@ -2647,7 +2648,7 @@ static const struct file_operations ptype_seq_fops = { .open = ptype_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = seq_release_net, }; @@ -3688,8 +3689,8 @@ int register_netdevice(struct net_device *dev) /* When net_device's are persistent, this will be fatal. */ BUG_ON(dev->reg_state != NETREG_UNINITIALIZED); - BUG_ON(!dev->nd_net); - net = dev->nd_net; + BUG_ON(!dev_net(dev)); + net = dev_net(dev); spin_lock_init(&dev->queue_lock); spin_lock_init(&dev->_xmit_lock); @@ -3995,11 +3996,15 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, BUG_ON(strlen(name) >= sizeof(dev->name)); - /* ensure 32-byte alignment of both the device and private area */ - alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST + - (sizeof(struct net_device_subqueue) * (queue_count - 1))) & - ~NETDEV_ALIGN_CONST; - alloc_size += sizeof_priv + NETDEV_ALIGN_CONST; + alloc_size = sizeof(struct net_device) + + sizeof(struct net_device_subqueue) * (queue_count - 1); + if (sizeof_priv) { + /* ensure 32-byte alignment of private area */ + alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST; + alloc_size += sizeof_priv; + } + /* ensure 32-byte alignment of whole construct */ + alloc_size += NETDEV_ALIGN_CONST; p = kzalloc(alloc_size, GFP_KERNEL); if (!p) { @@ -4010,7 +4015,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, dev = (struct net_device *) (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST); dev->padded = (char *)dev - (char *)p; - dev->nd_net = &init_net; + dev_net_set(dev, &init_net); if (sizeof_priv) { dev->priv = ((char *)dev + @@ -4021,6 +4026,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, } dev->egress_subqueue_count = queue_count; + dev->gso_max_size = GSO_MAX_SIZE; dev->get_stats = internal_stats; netpoll_netdev_init(dev); @@ -4040,6 +4046,8 @@ EXPORT_SYMBOL(alloc_netdev_mq); */ void free_netdev(struct net_device *dev) { + release_net(dev_net(dev)); + /* Compatibility with error handling in drivers */ if (dev->reg_state == NETREG_UNINITIALIZED) { kfree((char *)dev - dev->padded); @@ -4134,7 +4142,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char /* Get out if there is nothing todo */ err = 0; - if (dev->nd_net == net) + if (net_eq(dev_net(dev), net)) goto out; /* Pick the destination device name, and ensure @@ -4185,7 +4193,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char dev_addr_discard(dev); /* Actually switch the network namespace */ - dev->nd_net = net; + dev_net_set(dev, net); /* Assign the new device name */ if (destname != dev->name) diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c index cec582563e0..f8a3455f449 100644 --- a/net/core/dev_mcast.c +++ b/net/core/dev_mcast.c @@ -156,39 +156,14 @@ void dev_mc_unsync(struct net_device *to, struct net_device *from) EXPORT_SYMBOL(dev_mc_unsync); #ifdef CONFIG_PROC_FS -static void *dev_mc_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(dev_base_lock) -{ - struct net *net = seq_file_net(seq); - struct net_device *dev; - loff_t off = 0; - - read_lock(&dev_base_lock); - for_each_netdev(net, dev) { - if (off++ == *pos) - return dev; - } - return NULL; -} - -static void *dev_mc_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - ++*pos; - return next_net_device((struct net_device *)v); -} - -static void dev_mc_seq_stop(struct seq_file *seq, void *v) - __releases(dev_base_lock) -{ - read_unlock(&dev_base_lock); -} - - static int dev_mc_seq_show(struct seq_file *seq, void *v) { struct dev_addr_list *m; struct net_device *dev = v; + if (v == SEQ_START_TOKEN) + return 0; + netif_tx_lock_bh(dev); for (m = dev->mc_list; m; m = m->next) { int i; @@ -206,9 +181,9 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v) } static const struct seq_operations dev_mc_seq_ops = { - .start = dev_mc_seq_start, - .next = dev_mc_seq_next, - .stop = dev_mc_seq_stop, + .start = dev_seq_start, + .next = dev_seq_next, + .stop = dev_seq_stop, .show = dev_mc_seq_show, }; diff --git a/net/core/dst.c b/net/core/dst.c index 7deef483c79..fe03266130b 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -259,6 +259,16 @@ again: return NULL; } +void dst_release(struct dst_entry *dst) +{ + if (dst) { + WARN_ON(atomic_read(&dst->__refcnt) < 1); + smp_mb__before_atomic_dec(); + atomic_dec(&dst->__refcnt); + } +} +EXPORT_SYMBOL(dst_release); + /* Dirty hack. We did it in 2.2 (in __dst_free), * we have _very_ good reasons not to repeat * this mistake in 2.3, but we have no choice @@ -279,7 +289,7 @@ static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev, if (!unregister) { dst->input = dst->output = dst_discard; } else { - dst->dev = dst->dev->nd_net->loopback_dev; + dst->dev = dev_net(dst->dev)->loopback_dev; dev_hold(dst->dev); dev_put(dev); if (dst->neighbour && dst->neighbour->dev == dev) { @@ -295,9 +305,6 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event, void struct net_device *dev = ptr; struct dst_entry *dst, *last = NULL; - if (dev->nd_net != &init_net) - return NOTIFY_DONE; - switch (event) { case NETDEV_UNREGISTER: case NETDEV_DOWN: diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 1163eb2256d..a29b43d0b45 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -284,8 +284,10 @@ static int ethtool_get_eeprom(struct net_device *dev, void __user *useraddr) { struct ethtool_eeprom eeprom; const struct ethtool_ops *ops = dev->ethtool_ops; + void __user *userbuf = useraddr + sizeof(eeprom); + u32 bytes_remaining; u8 *data; - int ret; + int ret = 0; if (!ops->get_eeprom || !ops->get_eeprom_len) return -EOPNOTSUPP; @@ -301,26 +303,26 @@ static int ethtool_get_eeprom(struct net_device *dev, void __user *useraddr) if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev)) return -EINVAL; - data = kmalloc(eeprom.len, GFP_USER); + data = kmalloc(PAGE_SIZE, GFP_USER); if (!data) return -ENOMEM; - ret = -EFAULT; - if (copy_from_user(data, useraddr + sizeof(eeprom), eeprom.len)) - goto out; - - ret = ops->get_eeprom(dev, &eeprom, data); - if (ret) - goto out; + bytes_remaining = eeprom.len; + while (bytes_remaining > 0) { + eeprom.len = min(bytes_remaining, (u32)PAGE_SIZE); - ret = -EFAULT; - if (copy_to_user(useraddr, &eeprom, sizeof(eeprom))) - goto out; - if (copy_to_user(useraddr + sizeof(eeprom), data, eeprom.len)) - goto out; - ret = 0; + ret = ops->get_eeprom(dev, &eeprom, data); + if (ret) + break; + if (copy_to_user(userbuf, data, eeprom.len)) { + ret = -EFAULT; + break; + } + userbuf += eeprom.len; + eeprom.offset += eeprom.len; + bytes_remaining -= eeprom.len; + } - out: kfree(data); return ret; } @@ -329,8 +331,10 @@ static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr) { struct ethtool_eeprom eeprom; const struct ethtool_ops *ops = dev->ethtool_ops; + void __user *userbuf = useraddr + sizeof(eeprom); + u32 bytes_remaining; u8 *data; - int ret; + int ret = 0; if (!ops->set_eeprom || !ops->get_eeprom_len) return -EOPNOTSUPP; @@ -346,22 +350,26 @@ static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr) if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev)) return -EINVAL; - data = kmalloc(eeprom.len, GFP_USER); + data = kmalloc(PAGE_SIZE, GFP_USER); if (!data) return -ENOMEM; - ret = -EFAULT; - if (copy_from_user(data, useraddr + sizeof(eeprom), eeprom.len)) - goto out; - - ret = ops->set_eeprom(dev, &eeprom, data); - if (ret) - goto out; + bytes_remaining = eeprom.len; + while (bytes_remaining > 0) { + eeprom.len = min(bytes_remaining, (u32)PAGE_SIZE); - if (copy_to_user(useraddr + sizeof(eeprom), data, eeprom.len)) - ret = -EFAULT; + if (copy_from_user(data, userbuf, eeprom.len)) { + ret = -EFAULT; + break; + } + ret = ops->set_eeprom(dev, &eeprom, data); + if (ret) + break; + userbuf += eeprom.len; + eeprom.offset += eeprom.len; + bytes_remaining -= eeprom.len; + } - out: kfree(data); return ret; } diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 42ccaf5b850..e3e9ab0f74e 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -29,7 +29,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops, r->pref = pref; r->table = table; r->flags = flags; - r->fr_net = ops->fro_net; + r->fr_net = hold_net(ops->fro_net); /* The lock is not required here, the list in unreacheable * at the moment this function is called */ @@ -214,7 +214,7 @@ errout: static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); struct fib_rule_hdr *frh = nlmsg_data(nlh); struct fib_rules_ops *ops = NULL; struct fib_rule *rule, *r, *last = NULL; @@ -243,7 +243,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) err = -ENOMEM; goto errout; } - rule->fr_net = net; + rule->fr_net = hold_net(net); if (tb[FRA_PRIORITY]) rule->pref = nla_get_u32(tb[FRA_PRIORITY]); @@ -344,6 +344,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) return 0; errout_free: + release_net(rule->fr_net); kfree(rule); errout: rules_ops_put(ops); @@ -352,7 +353,7 @@ errout: static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); struct fib_rule_hdr *frh = nlmsg_data(nlh); struct fib_rules_ops *ops = NULL; struct fib_rule *rule, *tmp; @@ -534,7 +535,7 @@ skip: static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); struct fib_rules_ops *ops; int idx = 0, family; @@ -618,7 +619,7 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = ptr; - struct net *net = dev->nd_net; + struct net *net = dev_net(dev); struct fib_rules_ops *ops; ASSERT_RTNL(); diff --git a/net/core/filter.c b/net/core/filter.c index e0a06942c02..f5f3cf60306 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -27,6 +27,7 @@ #include <linux/if_packet.h> #include <net/ip.h> #include <net/protocol.h> +#include <net/netlink.h> #include <linux/skbuff.h> #include <net/sock.h> #include <linux/errno.h> @@ -64,6 +65,41 @@ static inline void *load_pointer(struct sk_buff *skb, int k, } /** + * sk_filter - run a packet through a socket filter + * @sk: sock associated with &sk_buff + * @skb: buffer to filter + * @needlock: set to 1 if the sock is not locked by caller. + * + * Run the filter code and then cut skb->data to correct size returned by + * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller + * than pkt_len we keep whole skb->data. This is the socket level + * wrapper to sk_run_filter. It returns 0 if the packet should + * be accepted or -EPERM if the packet should be tossed. + * + */ +int sk_filter(struct sock *sk, struct sk_buff *skb) +{ + int err; + struct sk_filter *filter; + + err = security_sock_rcv_skb(sk, skb); + if (err) + return err; + + rcu_read_lock_bh(); + filter = rcu_dereference(sk->sk_filter); + if (filter) { + unsigned int pkt_len = sk_run_filter(skb, filter->insns, + filter->len); + err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; + } + rcu_read_unlock_bh(); + + return err; +} +EXPORT_SYMBOL(sk_filter); + +/** * sk_run_filter - run a filter on a socket * @skb: buffer to run the filter on * @filter: filter to apply @@ -268,6 +304,22 @@ load_b: case SKF_AD_IFINDEX: A = skb->dev->ifindex; continue; + case SKF_AD_NLATTR: { + struct nlattr *nla; + + if (skb_is_nonlinear(skb)) + return 0; + if (A > skb->len - sizeof(struct nlattr)) + return 0; + + nla = nla_find((struct nlattr *)&skb->data[A], + skb->len - A, X); + if (nla) + A = (void *)nla - (void *)skb->data; + else + A = 0; + continue; + } default: return 0; } @@ -275,6 +327,7 @@ load_b: return 0; } +EXPORT_SYMBOL(sk_run_filter); /** * sk_chk_filter - verify socket filter code @@ -385,6 +438,7 @@ int sk_chk_filter(struct sock_filter *filter, int flen) return (BPF_CLASS(filter[flen - 1].code) == BPF_RET) ? 0 : -EINVAL; } +EXPORT_SYMBOL(sk_chk_filter); /** * sk_filter_rcu_release: Release a socket filter by rcu_head @@ -467,6 +521,3 @@ int sk_detach_filter(struct sock *sk) rcu_read_unlock_bh(); return ret; } - -EXPORT_SYMBOL(sk_chk_filter); -EXPORT_SYMBOL(sk_run_filter); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 19b8e003f15..75075c303c4 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -123,6 +123,7 @@ unsigned long neigh_rand_reach_time(unsigned long base) { return (base ? (net_random() % base) + (base >> 1) : 0); } +EXPORT_SYMBOL(neigh_rand_reach_time); static int neigh_forced_gc(struct neigh_table *tbl) @@ -241,6 +242,7 @@ void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev) neigh_flush_dev(tbl, dev); write_unlock_bh(&tbl->lock); } +EXPORT_SYMBOL(neigh_changeaddr); int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev) { @@ -253,6 +255,7 @@ int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev) pneigh_queue_purge(&tbl->proxy_queue); return 0; } +EXPORT_SYMBOL(neigh_ifdown); static struct neighbour *neigh_alloc(struct neigh_table *tbl) { @@ -374,6 +377,7 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, read_unlock_bh(&tbl->lock); return n; } +EXPORT_SYMBOL(neigh_lookup); struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, const void *pkey) @@ -388,7 +392,7 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, hash_val = tbl->hash(pkey, NULL); for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) { if (!memcmp(n->primary_key, pkey, key_len) && - (net == n->dev->nd_net)) { + net_eq(dev_net(n->dev), net)) { neigh_hold(n); NEIGH_CACHE_STAT_INC(tbl, hits); break; @@ -397,6 +401,7 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, read_unlock_bh(&tbl->lock); return n; } +EXPORT_SYMBOL(neigh_lookup_nodev); struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey, struct net_device *dev) @@ -465,28 +470,44 @@ out_neigh_release: neigh_release(n); goto out; } +EXPORT_SYMBOL(neigh_create); -struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl, - struct net *net, const void *pkey, struct net_device *dev) +static u32 pneigh_hash(const void *pkey, int key_len) { - struct pneigh_entry *n; - int key_len = tbl->key_len; u32 hash_val = *(u32 *)(pkey + key_len - 4); - hash_val ^= (hash_val >> 16); hash_val ^= hash_val >> 8; hash_val ^= hash_val >> 4; hash_val &= PNEIGH_HASHMASK; + return hash_val; +} - for (n = tbl->phash_buckets[hash_val]; n; n = n->next) { +static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n, + struct net *net, + const void *pkey, + int key_len, + struct net_device *dev) +{ + while (n) { if (!memcmp(n->key, pkey, key_len) && - (n->net == net) && + net_eq(pneigh_net(n), net) && (n->dev == dev || !n->dev)) - break; + return n; + n = n->next; } + return NULL; +} - return n; +struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl, + struct net *net, const void *pkey, struct net_device *dev) +{ + int key_len = tbl->key_len; + u32 hash_val = pneigh_hash(pkey, key_len); + + return __pneigh_lookup_1(tbl->phash_buckets[hash_val], + net, pkey, key_len, dev); } +EXPORT_SYMBOL_GPL(__pneigh_lookup); struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, struct net *net, const void *pkey, @@ -494,26 +515,14 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, { struct pneigh_entry *n; int key_len = tbl->key_len; - u32 hash_val = *(u32 *)(pkey + key_len - 4); - - hash_val ^= (hash_val >> 16); - hash_val ^= hash_val >> 8; - hash_val ^= hash_val >> 4; - hash_val &= PNEIGH_HASHMASK; + u32 hash_val = pneigh_hash(pkey, key_len); read_lock_bh(&tbl->lock); - - for (n = tbl->phash_buckets[hash_val]; n; n = n->next) { - if (!memcmp(n->key, pkey, key_len) && - (n->net == net) && - (n->dev == dev || !n->dev)) { - read_unlock_bh(&tbl->lock); - goto out; - } - } + n = __pneigh_lookup_1(tbl->phash_buckets[hash_val], + net, pkey, key_len, dev); read_unlock_bh(&tbl->lock); - n = NULL; - if (!creat) + + if (n || !creat) goto out; ASSERT_RTNL(); @@ -522,7 +531,9 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, if (!n) goto out; +#ifdef CONFIG_NET_NS n->net = hold_net(net); +#endif memcpy(n->key, pkey, key_len); n->dev = dev; if (dev) @@ -544,6 +555,7 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, out: return n; } +EXPORT_SYMBOL(pneigh_lookup); int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey, @@ -551,25 +563,20 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey, { struct pneigh_entry *n, **np; int key_len = tbl->key_len; - u32 hash_val = *(u32 *)(pkey + key_len - 4); - - hash_val ^= (hash_val >> 16); - hash_val ^= hash_val >> 8; - hash_val ^= hash_val >> 4; - hash_val &= PNEIGH_HASHMASK; + u32 hash_val = pneigh_hash(pkey, key_len); write_lock_bh(&tbl->lock); for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL; np = &n->next) { if (!memcmp(n->key, pkey, key_len) && n->dev == dev && - (n->net == net)) { + net_eq(pneigh_net(n), net)) { *np = n->next; write_unlock_bh(&tbl->lock); if (tbl->pdestructor) tbl->pdestructor(n); if (n->dev) dev_put(n->dev); - release_net(n->net); + release_net(pneigh_net(n)); kfree(n); return 0; } @@ -592,7 +599,7 @@ static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev) tbl->pdestructor(n); if (n->dev) dev_put(n->dev); - release_net(n->net); + release_net(pneigh_net(n)); kfree(n); continue; } @@ -651,6 +658,7 @@ void neigh_destroy(struct neighbour *neigh) atomic_dec(&neigh->tbl->entries); kmem_cache_free(neigh->tbl->kmem_cachep, neigh); } +EXPORT_SYMBOL(neigh_destroy); /* Neighbour state is suspicious; disable fast path. @@ -931,6 +939,7 @@ out_unlock_bh: write_unlock_bh(&neigh->lock); return rc; } +EXPORT_SYMBOL(__neigh_event_send); static void neigh_update_hhs(struct neighbour *neigh) { @@ -1103,6 +1112,7 @@ out: return err; } +EXPORT_SYMBOL(neigh_update); struct neighbour *neigh_event_ns(struct neigh_table *tbl, u8 *lladdr, void *saddr, @@ -1115,6 +1125,7 @@ struct neighbour *neigh_event_ns(struct neigh_table *tbl, NEIGH_UPDATE_F_OVERRIDE); return neigh; } +EXPORT_SYMBOL(neigh_event_ns); static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst, __be16 protocol) @@ -1169,6 +1180,7 @@ int neigh_compat_output(struct sk_buff *skb) return dev_queue_xmit(skb); } +EXPORT_SYMBOL(neigh_compat_output); /* Slow and careful. */ @@ -1214,6 +1226,7 @@ out_kfree_skb: kfree_skb(skb); goto out; } +EXPORT_SYMBOL(neigh_resolve_output); /* As fast as possible without hh cache */ @@ -1238,6 +1251,7 @@ int neigh_connected_output(struct sk_buff *skb) } return err; } +EXPORT_SYMBOL(neigh_connected_output); static void neigh_proxy_process(unsigned long arg) { @@ -1299,6 +1313,7 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, mod_timer(&tbl->proxy_timer, sched_next); spin_unlock(&tbl->proxy_queue.lock); } +EXPORT_SYMBOL(pneigh_enqueue); static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl, struct net *net, int ifindex) @@ -1306,9 +1321,7 @@ static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl, struct neigh_parms *p; for (p = &tbl->parms; p; p = p->next) { - if (p->net != net) - continue; - if ((p->dev && p->dev->ifindex == ifindex) || + if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) || (!p->dev && !ifindex)) return p; } @@ -1322,7 +1335,7 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, struct neigh_parms *p, *ref; struct net *net; - net = dev->nd_net; + net = dev_net(dev); ref = lookup_neigh_params(tbl, net, 0); if (!ref) return NULL; @@ -1342,7 +1355,9 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, dev_hold(dev); p->dev = dev; +#ifdef CONFIG_NET_NS p->net = hold_net(net); +#endif p->sysctl_table = NULL; write_lock_bh(&tbl->lock); p->next = tbl->parms.next; @@ -1351,6 +1366,7 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, } return p; } +EXPORT_SYMBOL(neigh_parms_alloc); static void neigh_rcu_free_parms(struct rcu_head *head) { @@ -1381,10 +1397,11 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms) write_unlock_bh(&tbl->lock); NEIGH_PRINTK1("neigh_parms_release: not found\n"); } +EXPORT_SYMBOL(neigh_parms_release); static void neigh_parms_destroy(struct neigh_parms *parms) { - release_net(parms->net); + release_net(neigh_parms_net(parms)); kfree(parms); } @@ -1395,7 +1412,9 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl) unsigned long now = jiffies; unsigned long phsize; +#ifdef CONFIG_NET_NS tbl->parms.net = &init_net; +#endif atomic_set(&tbl->parms.refcnt, 1); INIT_RCU_HEAD(&tbl->parms.rcu_head); tbl->parms.reachable_time = @@ -1441,6 +1460,7 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl) tbl->last_flush = now; tbl->last_rand = now + tbl->parms.reachable_time * 20; } +EXPORT_SYMBOL(neigh_table_init_no_netlink); void neigh_table_init(struct neigh_table *tbl) { @@ -1462,6 +1482,7 @@ void neigh_table_init(struct neigh_table *tbl) dump_stack(); } } +EXPORT_SYMBOL(neigh_table_init); int neigh_table_clear(struct neigh_table *tbl) { @@ -1499,10 +1520,11 @@ int neigh_table_clear(struct neigh_table *tbl) return 0; } +EXPORT_SYMBOL(neigh_table_clear); static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); struct ndmsg *ndm; struct nlattr *dst_attr; struct neigh_table *tbl; @@ -1568,7 +1590,7 @@ out: static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); struct ndmsg *ndm; struct nlattr *tb[NDA_MAX+1]; struct neigh_table *tbl; @@ -1836,7 +1858,7 @@ static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = { static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); struct neigh_table *tbl; struct ndtmsg *ndtmsg; struct nlattr *tb[NDTA_MAX+1]; @@ -1961,7 +1983,7 @@ errout: static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); int family, tidx, nidx = 0; int tbl_skip = cb->args[0]; int neigh_skip = cb->args[1]; @@ -1982,7 +2004,7 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) break; for (nidx = 0, p = tbl->parms.next; p; p = p->next) { - if (net != p->net) + if (!net_eq(neigh_parms_net(p), net)) continue; if (nidx++ < neigh_skip) @@ -2061,7 +2083,7 @@ static void neigh_update_notify(struct neighbour *neigh) static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, struct netlink_callback *cb) { - struct net * net = skb->sk->sk_net; + struct net * net = sock_net(skb->sk); struct neighbour *n; int rc, h, s_h = cb->args[1]; int idx, s_idx = idx = cb->args[2]; @@ -2074,7 +2096,7 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, s_idx = 0; for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next) { int lidx; - if (n->dev->nd_net != net) + if (dev_net(n->dev) != net) continue; lidx = idx++; if (lidx < s_idx) @@ -2169,7 +2191,7 @@ EXPORT_SYMBOL(__neigh_for_each_release); static struct neighbour *neigh_get_first(struct seq_file *seq) { struct neigh_seq_state *state = seq->private; - struct net *net = state->p.net; + struct net *net = seq_file_net(seq); struct neigh_table *tbl = state->tbl; struct neighbour *n = NULL; int bucket = state->bucket; @@ -2179,7 +2201,7 @@ static struct neighbour *neigh_get_first(struct seq_file *seq) n = tbl->hash_buckets[bucket]; while (n) { - if (n->dev->nd_net != net) + if (!net_eq(dev_net(n->dev), net)) goto next; if (state->neigh_sub_iter) { loff_t fakep = 0; @@ -2210,7 +2232,7 @@ static struct neighbour *neigh_get_next(struct seq_file *seq, loff_t *pos) { struct neigh_seq_state *state = seq->private; - struct net *net = state->p.net; + struct net *net = seq_file_net(seq); struct neigh_table *tbl = state->tbl; if (state->neigh_sub_iter) { @@ -2222,7 +2244,7 @@ static struct neighbour *neigh_get_next(struct seq_file *seq, while (1) { while (n) { - if (n->dev->nd_net != net) + if (!net_eq(dev_net(n->dev), net)) goto next; if (state->neigh_sub_iter) { void *v = state->neigh_sub_iter(state, n, pos); @@ -2270,7 +2292,7 @@ static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos) static struct pneigh_entry *pneigh_get_first(struct seq_file *seq) { struct neigh_seq_state *state = seq->private; - struct net * net = state->p.net; + struct net *net = seq_file_net(seq); struct neigh_table *tbl = state->tbl; struct pneigh_entry *pn = NULL; int bucket = state->bucket; @@ -2278,7 +2300,7 @@ static struct pneigh_entry *pneigh_get_first(struct seq_file *seq) state->flags |= NEIGH_SEQ_IS_PNEIGH; for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) { pn = tbl->phash_buckets[bucket]; - while (pn && (pn->net != net)) + while (pn && !net_eq(pneigh_net(pn), net)) pn = pn->next; if (pn) break; @@ -2293,7 +2315,7 @@ static struct pneigh_entry *pneigh_get_next(struct seq_file *seq, loff_t *pos) { struct neigh_seq_state *state = seq->private; - struct net * net = state->p.net; + struct net *net = seq_file_net(seq); struct neigh_table *tbl = state->tbl; pn = pn->next; @@ -2301,7 +2323,7 @@ static struct pneigh_entry *pneigh_get_next(struct seq_file *seq, if (++state->bucket > PNEIGH_HASHMASK) break; pn = tbl->phash_buckets[state->bucket]; - while (pn && (pn->net != net)) + while (pn && !net_eq(pneigh_net(pn), net)) pn = pn->next; if (pn) break; @@ -2506,7 +2528,7 @@ static inline size_t neigh_nlmsg_size(void) static void __neigh_notify(struct neighbour *n, int type, int flags) { - struct net *net = n->dev->nd_net; + struct net *net = dev_net(n->dev); struct sk_buff *skb; int err = -ENOBUFS; @@ -2532,6 +2554,7 @@ void neigh_app_ns(struct neighbour *n) { __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST); } +EXPORT_SYMBOL(neigh_app_ns); #endif /* CONFIG_ARPD */ #ifdef CONFIG_SYSCTL @@ -2763,7 +2786,8 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, neigh_path[NEIGH_CTL_PATH_PROTO].procname = p_name; neigh_path[NEIGH_CTL_PATH_PROTO].ctl_name = p_id; - t->sysctl_header = register_sysctl_paths(neigh_path, t->neigh_vars); + t->sysctl_header = + register_net_sysctl_table(neigh_parms_net(p), neigh_path, t->neigh_vars); if (!t->sysctl_header) goto free_procname; @@ -2777,6 +2801,7 @@ free: err: return -ENOBUFS; } +EXPORT_SYMBOL(neigh_sysctl_register); void neigh_sysctl_unregister(struct neigh_parms *p) { @@ -2788,6 +2813,7 @@ void neigh_sysctl_unregister(struct neigh_parms *p) kfree(t); } } +EXPORT_SYMBOL(neigh_sysctl_unregister); #endif /* CONFIG_SYSCTL */ @@ -2805,32 +2831,3 @@ static int __init neigh_init(void) subsys_initcall(neigh_init); -EXPORT_SYMBOL(__neigh_event_send); -EXPORT_SYMBOL(neigh_changeaddr); -EXPORT_SYMBOL(neigh_compat_output); -EXPORT_SYMBOL(neigh_connected_output); -EXPORT_SYMBOL(neigh_create); -EXPORT_SYMBOL(neigh_destroy); -EXPORT_SYMBOL(neigh_event_ns); -EXPORT_SYMBOL(neigh_ifdown); -EXPORT_SYMBOL(neigh_lookup); -EXPORT_SYMBOL(neigh_lookup_nodev); -EXPORT_SYMBOL(neigh_parms_alloc); -EXPORT_SYMBOL(neigh_parms_release); -EXPORT_SYMBOL(neigh_rand_reach_time); -EXPORT_SYMBOL(neigh_resolve_output); -EXPORT_SYMBOL(neigh_table_clear); -EXPORT_SYMBOL(neigh_table_init); -EXPORT_SYMBOL(neigh_table_init_no_netlink); -EXPORT_SYMBOL(neigh_update); -EXPORT_SYMBOL(pneigh_enqueue); -EXPORT_SYMBOL(pneigh_lookup); -EXPORT_SYMBOL_GPL(__pneigh_lookup); - -#ifdef CONFIG_ARPD -EXPORT_SYMBOL(neigh_app_ns); -#endif -#ifdef CONFIG_SYSCTL -EXPORT_SYMBOL(neigh_sysctl_register); -EXPORT_SYMBOL(neigh_sysctl_unregister); -#endif diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 7b660834a4c..72b4c184dd8 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -5,7 +5,9 @@ #include <linux/list.h> #include <linux/delay.h> #include <linux/sched.h> +#include <linux/idr.h> #include <net/net_namespace.h> +#include <net/netns/generic.h> /* * Our network namespace constructor/destructor lists @@ -20,6 +22,8 @@ LIST_HEAD(net_namespace_list); struct net init_net; EXPORT_SYMBOL(init_net); +#define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ + /* * setup_net runs the initializers for the network namespace object. */ @@ -28,9 +32,22 @@ static __net_init int setup_net(struct net *net) /* Must be called with net_mutex held */ struct pernet_operations *ops; int error; + struct net_generic *ng; atomic_set(&net->count, 1); +#ifdef NETNS_REFCNT_DEBUG atomic_set(&net->use_count, 0); +#endif + + error = -ENOMEM; + ng = kzalloc(sizeof(struct net_generic) + + INITIAL_NET_GEN_PTRS * sizeof(void *), GFP_KERNEL); + if (ng == NULL) + goto out; + + ng->len = INITIAL_NET_GEN_PTRS; + INIT_RCU_HEAD(&ng->rcu); + rcu_assign_pointer(net->gen, ng); error = 0; list_for_each_entry(ops, &pernet_list, list) { @@ -53,6 +70,7 @@ out_undo: } rcu_barrier(); + kfree(ng); goto out; } @@ -70,11 +88,13 @@ static void net_free(struct net *net) if (!net) return; +#ifdef NETNS_REFCNT_DEBUG if (unlikely(atomic_read(&net->use_count) != 0)) { printk(KERN_EMERG "network namespace not free! Usage: %d\n", atomic_read(&net->use_count)); return; } +#endif kmem_cache_free(net_cachep, net); } @@ -253,6 +273,8 @@ static void unregister_pernet_operations(struct pernet_operations *ops) } #endif +static DEFINE_IDA(net_generic_ids); + /** * register_pernet_subsys - register a network namespace subsystem * @ops: pernet operations structure for the subsystem @@ -330,6 +352,30 @@ int register_pernet_device(struct pernet_operations *ops) } EXPORT_SYMBOL_GPL(register_pernet_device); +int register_pernet_gen_device(int *id, struct pernet_operations *ops) +{ + int error; + mutex_lock(&net_mutex); +again: + error = ida_get_new_above(&net_generic_ids, 1, id); + if (error) { + if (error == -EAGAIN) { + ida_pre_get(&net_generic_ids, GFP_KERNEL); + goto again; + } + goto out; + } + error = register_pernet_operations(&pernet_list, ops); + if (error) + ida_remove(&net_generic_ids, *id); + else if (first_device == &pernet_list) + first_device = &ops->list; +out: + mutex_unlock(&net_mutex); + return error; +} +EXPORT_SYMBOL_GPL(register_pernet_gen_device); + /** * unregister_pernet_device - unregister a network namespace netdevice * @ops: pernet operations structure to manipulate @@ -348,3 +394,61 @@ void unregister_pernet_device(struct pernet_operations *ops) mutex_unlock(&net_mutex); } EXPORT_SYMBOL_GPL(unregister_pernet_device); + +void unregister_pernet_gen_device(int id, struct pernet_operations *ops) +{ + mutex_lock(&net_mutex); + if (&ops->list == first_device) + first_device = first_device->next; + unregister_pernet_operations(ops); + ida_remove(&net_generic_ids, id); + mutex_unlock(&net_mutex); +} +EXPORT_SYMBOL_GPL(unregister_pernet_gen_device); + +static void net_generic_release(struct rcu_head *rcu) +{ + struct net_generic *ng; + + ng = container_of(rcu, struct net_generic, rcu); + kfree(ng); +} + +int net_assign_generic(struct net *net, int id, void *data) +{ + struct net_generic *ng, *old_ng; + + BUG_ON(!mutex_is_locked(&net_mutex)); + BUG_ON(id == 0); + + ng = old_ng = net->gen; + if (old_ng->len >= id) + goto assign; + + ng = kzalloc(sizeof(struct net_generic) + + id * sizeof(void *), GFP_KERNEL); + if (ng == NULL) + return -ENOMEM; + + /* + * Some synchronisation notes: + * + * The net_generic explores the net->gen array inside rcu + * read section. Besides once set the net->gen->ptr[x] + * pointer never changes (see rules in netns/generic.h). + * + * That said, we simply duplicate this array and schedule + * the old copy for kfree after a grace period. + */ + + ng->len = id; + INIT_RCU_HEAD(&ng->rcu); + memcpy(&ng->ptr, &old_ng->ptr, old_ng->len); + + rcu_assign_pointer(net->gen, ng); + call_rcu(&old_ng->rcu, net_generic_release); +assign: + ng->ptr[id - 1] = data; + return 0; +} +EXPORT_SYMBOL_GPL(net_assign_generic); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index c635de52526..b04d643fc3c 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -390,9 +390,7 @@ static void arp_reply(struct sk_buff *skb) if (skb->dev->flags & IFF_NOARP) return; - if (!pskb_may_pull(skb, (sizeof(struct arphdr) + - (2 * skb->dev->addr_len) + - (2 * sizeof(u32))))) + if (!pskb_may_pull(skb, arp_hdr_len(skb->dev))) return; skb_reset_network_header(skb); @@ -420,7 +418,7 @@ static void arp_reply(struct sk_buff *skb) ipv4_is_loopback(tip) || ipv4_is_multicast(tip)) return; - size = sizeof(struct arphdr) + 2 * (skb->dev->addr_len + 4); + size = arp_hdr_len(skb->dev); send_skb = find_skb(np, size + LL_RESERVED_SPACE(np->dev), LL_RESERVED_SPACE(np->dev)); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 20e63b302ba..a803b442234 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -1874,7 +1874,7 @@ static int pktgen_device_event(struct notifier_block *unused, { struct net_device *dev = ptr; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) return NOTIFY_DONE; /* It is OK that we do not hold the group lock right now, diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 2bd9c5f7627..bc39e417694 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -269,6 +269,26 @@ int rtnl_link_register(struct rtnl_link_ops *ops) EXPORT_SYMBOL_GPL(rtnl_link_register); +static void __rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops) +{ + struct net_device *dev; +restart: + for_each_netdev(net, dev) { + if (dev->rtnl_link_ops == ops) { + ops->dellink(dev); + goto restart; + } + } +} + +void rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops) +{ + rtnl_lock(); + __rtnl_kill_links(net, ops); + rtnl_unlock(); +} +EXPORT_SYMBOL_GPL(rtnl_kill_links); + /** * __rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink. * @ops: struct rtnl_link_ops * to unregister @@ -277,17 +297,10 @@ EXPORT_SYMBOL_GPL(rtnl_link_register); */ void __rtnl_link_unregister(struct rtnl_link_ops *ops) { - struct net_device *dev, *n; struct net *net; for_each_net(net) { -restart: - for_each_netdev_safe(net, dev, n) { - if (dev->rtnl_link_ops == ops) { - ops->dellink(dev); - goto restart; - } - } + __rtnl_kill_links(net, ops); } list_del(&ops->list); } @@ -662,7 +675,7 @@ nla_put_failure: static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); int idx; int s_idx = cb->args[0]; struct net_device *dev; @@ -879,7 +892,7 @@ errout: static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); struct ifinfomsg *ifm; struct net_device *dev; int err; @@ -921,7 +934,7 @@ errout: static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); const struct rtnl_link_ops *ops; struct net_device *dev; struct ifinfomsg *ifm; @@ -972,7 +985,7 @@ struct net_device *rtnl_create_link(struct net *net, char *ifname, goto err_free; } - dev->nd_net = net; + dev_net_set(dev, net); dev->rtnl_link_ops = ops; if (tb[IFLA_MTU]) @@ -1000,7 +1013,7 @@ err: static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); const struct rtnl_link_ops *ops; struct net_device *dev; struct ifinfomsg *ifm; @@ -1132,7 +1145,7 @@ replay: static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); struct ifinfomsg *ifm; struct nlattr *tb[IFLA_MAX+1]; struct net_device *dev = NULL; @@ -1198,7 +1211,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) { - struct net *net = dev->nd_net; + struct net *net = dev_net(dev); struct sk_buff *skb; int err = -ENOBUFS; @@ -1227,7 +1240,7 @@ static int rtattr_max; static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); rtnl_doit_func doit; int sz_idx, kind; int min_len; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 0d0fd28a904..4fe605fa6f8 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -263,6 +263,28 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, return skb; } +/** + * dev_alloc_skb - allocate an skbuff for receiving + * @length: length to allocate + * + * Allocate a new &sk_buff and assign it a usage count of one. The + * buffer has unspecified headroom built in. Users should allocate + * the headroom they think they need without accounting for the + * built in space. The built in space is used for optimisations. + * + * %NULL is returned if there is no free memory. Although this function + * allocates memory it can be called from an interrupt. + */ +struct sk_buff *dev_alloc_skb(unsigned int length) +{ + /* + * There is more code here than it seems: + * __dev_alloc_skb is an inline + */ + return __dev_alloc_skb(length, GFP_ATOMIC); +} +EXPORT_SYMBOL(dev_alloc_skb); + static void skb_drop_list(struct sk_buff **listp) { struct sk_buff *list = *listp; @@ -857,6 +879,78 @@ free_skb: return err; } +/** + * skb_put - add data to a buffer + * @skb: buffer to use + * @len: amount of data to add + * + * This function extends the used data area of the buffer. If this would + * exceed the total buffer size the kernel will panic. A pointer to the + * first byte of the extra data is returned. + */ +unsigned char *skb_put(struct sk_buff *skb, unsigned int len) +{ + unsigned char *tmp = skb_tail_pointer(skb); + SKB_LINEAR_ASSERT(skb); + skb->tail += len; + skb->len += len; + if (unlikely(skb->tail > skb->end)) + skb_over_panic(skb, len, __builtin_return_address(0)); + return tmp; +} +EXPORT_SYMBOL(skb_put); + +/** + * skb_push - add data to the start of a buffer + * @skb: buffer to use + * @len: amount of data to add + * + * This function extends the used data area of the buffer at the buffer + * start. If this would exceed the total buffer headroom the kernel will + * panic. A pointer to the first byte of the extra data is returned. + */ +unsigned char *skb_push(struct sk_buff *skb, unsigned int len) +{ + skb->data -= len; + skb->len += len; + if (unlikely(skb->data<skb->head)) + skb_under_panic(skb, len, __builtin_return_address(0)); + return skb->data; +} +EXPORT_SYMBOL(skb_push); + +/** + * skb_pull - remove data from the start of a buffer + * @skb: buffer to use + * @len: amount of data to remove + * + * This function removes data from the start of a buffer, returning + * the memory to the headroom. A pointer to the next data in the buffer + * is returned. Once the data has been pulled future pushes will overwrite + * the old data. + */ +unsigned char *skb_pull(struct sk_buff *skb, unsigned int len) +{ + return unlikely(len > skb->len) ? NULL : __skb_pull(skb, len); +} +EXPORT_SYMBOL(skb_pull); + +/** + * skb_trim - remove end from a buffer + * @skb: buffer to alter + * @len: new length + * + * Cut the length of a buffer down by removing data from the tail. If + * the buffer is already under the length specified it is not modified. + * The skb must be linear. + */ +void skb_trim(struct sk_buff *skb, unsigned int len) +{ + if (skb->len > len) + __skb_trim(skb, len); +} +EXPORT_SYMBOL(skb_trim); + /* Trims skb to length len. It can change skb pointers. */ @@ -1766,7 +1860,7 @@ void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head unsigned long flags; spin_lock_irqsave(&list->lock, flags); - __skb_append(old, newsk, list); + __skb_queue_after(list, old, newsk); spin_unlock_irqrestore(&list->lock, flags); } @@ -2131,8 +2225,8 @@ EXPORT_SYMBOL_GPL(skb_pull_rcsum); * @features: features for the output path (see dev->features) * * This function performs segmentation on the given skb. It returns - * the segment at the given position. It returns NULL if there are - * no more segments to generate, or when an error is encountered. + * a pointer to the first in a list of new skbs for the segments. + * In case of error it returns ERR_PTR(err). */ struct sk_buff *skb_segment(struct sk_buff *skb, int features) { diff --git a/net/core/sock.c b/net/core/sock.c index 2654c147c00..54c836a2216 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -372,7 +372,7 @@ static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen) { int ret = -ENOPROTOOPT; #ifdef CONFIG_NETDEVICES - struct net *net = sk->sk_net; + struct net *net = sock_net(sk); char devname[IFNAMSIZ]; int index; @@ -958,7 +958,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, */ sk->sk_prot = sk->sk_prot_creator = prot; sock_lock_init(sk); - sk->sk_net = get_net(net); + sock_net_set(sk, get_net(net)); } return sk; @@ -981,12 +981,32 @@ void sk_free(struct sock *sk) if (atomic_read(&sk->sk_omem_alloc)) printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n", - __FUNCTION__, atomic_read(&sk->sk_omem_alloc)); + __func__, atomic_read(&sk->sk_omem_alloc)); - put_net(sk->sk_net); + put_net(sock_net(sk)); sk_prot_free(sk->sk_prot_creator, sk); } +/* + * Last sock_put should drop referrence to sk->sk_net. It has already + * been dropped in sk_change_net. Taking referrence to stopping namespace + * is not an option. + * Take referrence to a socket to remove it from hash _alive_ and after that + * destroy it in the context of init_net. + */ +void sk_release_kernel(struct sock *sk) +{ + if (sk == NULL || sk->sk_socket == NULL) + return; + + sock_hold(sk); + sock_release(sk->sk_socket); + release_net(sock_net(sk)); + sock_net_set(sk, get_net(&init_net)); + sock_put(sk); +} +EXPORT_SYMBOL(sk_release_kernel); + struct sock *sk_clone(const struct sock *sk, const gfp_t priority) { struct sock *newsk; @@ -998,7 +1018,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) sock_copy(newsk, sk); /* SANITY */ - get_net(newsk->sk_net); + get_net(sock_net(newsk)); sk_node_init(&newsk->sk_node); sock_lock_init(newsk); bh_lock_sock(newsk); @@ -1076,10 +1096,12 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst) if (sk->sk_route_caps & NETIF_F_GSO) sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE; if (sk_can_gso(sk)) { - if (dst->header_len) + if (dst->header_len) { sk->sk_route_caps &= ~NETIF_F_GSO_MASK; - else + } else { sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM; + sk->sk_gso_max_size = dst->dev->gso_max_size; + } } } EXPORT_SYMBOL_GPL(sk_setup_caps); @@ -1725,7 +1747,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; - sk->sk_stamp = ktime_set(-1L, -1L); + sk->sk_stamp = ktime_set(-1L, 0); atomic_set(&sk->sk_refcnt, 1); atomic_set(&sk->sk_drops, 0); @@ -1919,16 +1941,113 @@ EXPORT_SYMBOL(sk_common_release); static DEFINE_RWLOCK(proto_list_lock); static LIST_HEAD(proto_list); +#ifdef CONFIG_PROC_FS +#define PROTO_INUSE_NR 64 /* should be enough for the first time */ +struct prot_inuse { + int val[PROTO_INUSE_NR]; +}; + +static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR); + +#ifdef CONFIG_NET_NS +void sock_prot_inuse_add(struct net *net, struct proto *prot, int val) +{ + int cpu = smp_processor_id(); + per_cpu_ptr(net->core.inuse, cpu)->val[prot->inuse_idx] += val; +} +EXPORT_SYMBOL_GPL(sock_prot_inuse_add); + +int sock_prot_inuse_get(struct net *net, struct proto *prot) +{ + int cpu, idx = prot->inuse_idx; + int res = 0; + + for_each_possible_cpu(cpu) + res += per_cpu_ptr(net->core.inuse, cpu)->val[idx]; + + return res >= 0 ? res : 0; +} +EXPORT_SYMBOL_GPL(sock_prot_inuse_get); + +static int sock_inuse_init_net(struct net *net) +{ + net->core.inuse = alloc_percpu(struct prot_inuse); + return net->core.inuse ? 0 : -ENOMEM; +} + +static void sock_inuse_exit_net(struct net *net) +{ + free_percpu(net->core.inuse); +} + +static struct pernet_operations net_inuse_ops = { + .init = sock_inuse_init_net, + .exit = sock_inuse_exit_net, +}; + +static __init int net_inuse_init(void) +{ + if (register_pernet_subsys(&net_inuse_ops)) + panic("Cannot initialize net inuse counters"); + + return 0; +} + +core_initcall(net_inuse_init); +#else +static DEFINE_PER_CPU(struct prot_inuse, prot_inuse); + +void sock_prot_inuse_add(struct net *net, struct proto *prot, int val) +{ + __get_cpu_var(prot_inuse).val[prot->inuse_idx] += val; +} +EXPORT_SYMBOL_GPL(sock_prot_inuse_add); + +int sock_prot_inuse_get(struct net *net, struct proto *prot) +{ + int cpu, idx = prot->inuse_idx; + int res = 0; + + for_each_possible_cpu(cpu) + res += per_cpu(prot_inuse, cpu).val[idx]; + + return res >= 0 ? res : 0; +} +EXPORT_SYMBOL_GPL(sock_prot_inuse_get); +#endif + +static void assign_proto_idx(struct proto *prot) +{ + prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR); + + if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) { + printk(KERN_ERR "PROTO_INUSE_NR exhausted\n"); + return; + } + + set_bit(prot->inuse_idx, proto_inuse_idx); +} + +static void release_proto_idx(struct proto *prot) +{ + if (prot->inuse_idx != PROTO_INUSE_NR - 1) + clear_bit(prot->inuse_idx, proto_inuse_idx); +} +#else +static inline void assign_proto_idx(struct proto *prot) +{ +} + +static inline void release_proto_idx(struct proto *prot) +{ +} +#endif + int proto_register(struct proto *prot, int alloc_slab) { char *request_sock_slab_name = NULL; char *timewait_sock_slab_name; - if (sock_prot_inuse_init(prot) != 0) { - printk(KERN_CRIT "%s: Can't alloc inuse counters!\n", prot->name); - goto out; - } - if (alloc_slab) { prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0, SLAB_HWCACHE_ALIGN, NULL); @@ -1936,7 +2055,7 @@ int proto_register(struct proto *prot, int alloc_slab) if (prot->slab == NULL) { printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n", prot->name); - goto out_free_inuse; + goto out; } if (prot->rsk_prot != NULL) { @@ -1979,6 +2098,7 @@ int proto_register(struct proto *prot, int alloc_slab) write_lock(&proto_list_lock); list_add(&prot->node, &proto_list); + assign_proto_idx(prot); write_unlock(&proto_list_lock); return 0; @@ -1994,8 +2114,6 @@ out_free_request_sock_slab_name: out_free_sock_slab: kmem_cache_destroy(prot->slab); prot->slab = NULL; -out_free_inuse: - sock_prot_inuse_free(prot); out: return -ENOBUFS; } @@ -2005,11 +2123,10 @@ EXPORT_SYMBOL(proto_register); void proto_unregister(struct proto *prot) { write_lock(&proto_list_lock); + release_proto_idx(prot); list_del(&prot->node); write_unlock(&proto_list_lock); - sock_prot_inuse_free(prot); - if (prot->slab != NULL) { kmem_cache_destroy(prot->slab); prot->slab = NULL; diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 130338f83ae..5fc80105724 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -127,7 +127,7 @@ static struct ctl_table net_core_table[] = { { .ctl_name = NET_CORE_SOMAXCONN, .procname = "somaxconn", - .data = &init_net.sysctl_somaxconn, + .data = &init_net.core.sysctl_somaxconn, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec @@ -161,7 +161,7 @@ static __net_init int sysctl_core_net_init(struct net *net) { struct ctl_table *tbl, *tmp; - net->sysctl_somaxconn = SOMAXCONN; + net->core.sysctl_somaxconn = SOMAXCONN; tbl = net_core_table; if (net != &init_net) { @@ -178,9 +178,9 @@ static __net_init int sysctl_core_net_init(struct net *net) } } - net->sysctl_core_hdr = register_net_sysctl_table(net, + net->core.sysctl_hdr = register_net_sysctl_table(net, net_core_path, tbl); - if (net->sysctl_core_hdr == NULL) + if (net->core.sysctl_hdr == NULL) goto err_reg; return 0; @@ -196,8 +196,8 @@ static __net_exit void sysctl_core_net_exit(struct net *net) { struct ctl_table *tbl; - tbl = net->sysctl_core_hdr->ctl_table_arg; - unregister_net_sysctl_table(net->sysctl_core_hdr); + tbl = net->core.sysctl_hdr->ctl_table_arg; + unregister_net_sysctl_table(net->core.sysctl_hdr); BUG_ON(tbl == net_core_table); kfree(tbl); } |