diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-07-22 14:43:13 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-07-22 14:43:13 -0700 |
commit | 951cc93a7493a81a47e20231441bc6cf17c98a37 (patch) | |
tree | f53934f0f225e0215a85c8c59af4c6513e89e3f1 /net/core | |
parent | a7e1aabb28e8154ce987b622fd78d80a1ca39361 (diff) | |
parent | 415b3334a21aa67806c52d1acf4e72e14f7f402f (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1287 commits)
icmp: Fix regression in nexthop resolution during replies.
net: Fix ppc64 BPF JIT dependencies.
acenic: include NET_SKB_PAD headroom to incoming skbs
ixgbe: convert to ndo_fix_features
ixgbe: only enable WoL for magic packet by default
ixgbe: remove ifdef check for non-existent define
ixgbe: Pass staterr instead of re-reading status and error bits from descriptor
ixgbe: Move interrupt related values out of ring and into q_vector
ixgbe: add structure for containing RX/TX rings to q_vector
ixgbe: inline the ixgbe_maybe_stop_tx function
ixgbe: Update ATR to use recorded TX queues instead of CPU for routing
igb: Fix for DH89xxCC near end loopback test
e1000: always call e1000_check_for_link() on e1000_ce4100 MACs.
netxen: add fw version compatibility check
be2net: request native mode each time the card is reset
ipv4: Constrain UFO fragment sizes to multiples of 8 bytes
virtio_net: Fix panic in virtnet_remove
ipv6: make fragment identifications less predictable
ipv6: unshare inetpeers
can: make function can_get_bittiming static
...
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/dev.c | 26 | ||||
-rw-r--r-- | net/core/dst.c | 17 | ||||
-rw-r--r-- | net/core/ethtool.c | 313 | ||||
-rw-r--r-- | net/core/fib_rules.c | 6 | ||||
-rw-r--r-- | net/core/neighbour.c | 191 | ||||
-rw-r--r-- | net/core/net-sysfs.c | 2 | ||||
-rw-r--r-- | net/core/net-traces.c | 2 | ||||
-rw-r--r-- | net/core/net_namespace.c | 1 | ||||
-rw-r--r-- | net/core/netpoll.c | 13 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 64 | ||||
-rw-r--r-- | net/core/skbuff.c | 84 | ||||
-rw-r--r-- | net/core/sock.c | 11 | ||||
-rw-r--r-- | net/core/timestamping.c | 2 |
13 files changed, 241 insertions, 491 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 9c58c1ec41a..9444c5cb413 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -199,6 +199,11 @@ static struct list_head ptype_all __read_mostly; /* Taps */ DEFINE_RWLOCK(dev_base_lock); EXPORT_SYMBOL(dev_base_lock); +static inline void dev_base_seq_inc(struct net *net) +{ + while (++net->dev_base_seq == 0); +} + static inline struct hlist_head *dev_name_hash(struct net *net, const char *name) { unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ)); @@ -237,6 +242,9 @@ static int list_netdevice(struct net_device *dev) hlist_add_head_rcu(&dev->index_hlist, dev_index_hash(net, dev->ifindex)); write_unlock_bh(&dev_base_lock); + + dev_base_seq_inc(net); + return 0; } @@ -253,6 +261,8 @@ static void unlist_netdevice(struct net_device *dev) hlist_del_rcu(&dev->name_hlist); hlist_del_rcu(&dev->index_hlist); write_unlock_bh(&dev_base_lock); + + dev_base_seq_inc(dev_net(dev)); } /* @@ -2532,7 +2542,7 @@ __u32 __skb_get_rxhash(struct sk_buff *skb) goto done; ip = (const struct iphdr *) (skb->data + nhoff); - if (ip->frag_off & htons(IP_MF | IP_OFFSET)) + if (ip_is_fragment(ip)) ip_proto = 0; else ip_proto = ip->protocol; @@ -5199,7 +5209,7 @@ static void rollback_registered(struct net_device *dev) list_del(&single); } -u32 netdev_fix_features(struct net_device *dev, u32 features) +static u32 netdev_fix_features(struct net_device *dev, u32 features) { /* Fix illegal checksum combinations */ if ((features & NETIF_F_HW_CSUM) && @@ -5258,7 +5268,6 @@ u32 netdev_fix_features(struct net_device *dev, u32 features) return features; } -EXPORT_SYMBOL(netdev_fix_features); int __netdev_update_features(struct net_device *dev) { @@ -5478,11 +5487,9 @@ int register_netdevice(struct net_device *dev) dev->features |= NETIF_F_NOCACHE_COPY; } - /* Enable GRO and NETIF_F_HIGHDMA for vlans by default, - * vlan_dev_init() will do the dev->features check, so these features - * are enabled only if supported by underlying device. + /* Make NETIF_F_HIGHDMA inheritable to VLAN devices. */ - dev->vlan_features |= (NETIF_F_GRO | NETIF_F_HIGHDMA); + dev->vlan_features |= NETIF_F_HIGHDMA; ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); ret = notifier_to_errno(ret); @@ -5867,8 +5874,6 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev->gso_max_size = GSO_MAX_SIZE; - INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list); - dev->ethtool_ntuple_list.count = 0; INIT_LIST_HEAD(&dev->napi_list); INIT_LIST_HEAD(&dev->unreg_list); INIT_LIST_HEAD(&dev->link_watch_list); @@ -5932,9 +5937,6 @@ void free_netdev(struct net_device *dev) /* Flush device addresses */ dev_addr_flush(dev); - /* Clear ethtool n-tuple list */ - ethtool_ntuple_flush(dev); - list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) netif_napi_del(p); diff --git a/net/core/dst.c b/net/core/dst.c index 6135f367169..14b33baf073 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -171,8 +171,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, dst_init_metrics(dst, dst_default_metrics, true); dst->expires = 0UL; dst->path = dst; - dst->neighbour = NULL; - dst->hh = NULL; + dst->_neighbour = NULL; #ifdef CONFIG_XFRM dst->xfrm = NULL; #endif @@ -226,21 +225,15 @@ struct dst_entry *dst_destroy(struct dst_entry * dst) { struct dst_entry *child; struct neighbour *neigh; - struct hh_cache *hh; smp_rmb(); again: - neigh = dst->neighbour; - hh = dst->hh; + neigh = dst->_neighbour; child = dst->child; - dst->hh = NULL; - if (hh) - hh_cache_put(hh); - if (neigh) { - dst->neighbour = NULL; + dst->_neighbour = NULL; neigh_release(neigh); } @@ -370,8 +363,8 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev, dst->dev = dev_net(dst->dev)->loopback_dev; dev_hold(dst->dev); dev_put(dev); - if (dst->neighbour && dst->neighbour->dev == dev) { - dst->neighbour->dev = dst->dev; + if (dst->_neighbour && dst->_neighbour->dev == dev) { + dst->_neighbour->dev = dst->dev; dev_hold(dst->dev); dev_put(dev); } diff --git a/net/core/ethtool.c b/net/core/ethtool.c index fd14116ad7f..6cdba5fc2be 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -169,18 +169,6 @@ int ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported) } EXPORT_SYMBOL(ethtool_op_set_flags); -void ethtool_ntuple_flush(struct net_device *dev) -{ - struct ethtool_rx_ntuple_flow_spec_container *fsc, *f; - - list_for_each_entry_safe(fsc, f, &dev->ethtool_ntuple_list.list, list) { - list_del(&fsc->list); - kfree(fsc); - } - dev->ethtool_ntuple_list.count = 0; -} -EXPORT_SYMBOL(ethtool_ntuple_flush); - /* Handlers for each ethtool command */ #define ETHTOOL_DEV_FEATURE_WORDS 1 @@ -865,34 +853,6 @@ out: return ret; } -static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list, - struct ethtool_rx_ntuple_flow_spec *spec, - struct ethtool_rx_ntuple_flow_spec_container *fsc) -{ - - /* don't add filters forever */ - if (list->count >= ETHTOOL_MAX_NTUPLE_LIST_ENTRY) { - /* free the container */ - kfree(fsc); - return; - } - - /* Copy the whole filter over */ - fsc->fs.flow_type = spec->flow_type; - memcpy(&fsc->fs.h_u, &spec->h_u, sizeof(spec->h_u)); - memcpy(&fsc->fs.m_u, &spec->m_u, sizeof(spec->m_u)); - - fsc->fs.vlan_tag = spec->vlan_tag; - fsc->fs.vlan_tag_mask = spec->vlan_tag_mask; - fsc->fs.data = spec->data; - fsc->fs.data_mask = spec->data_mask; - fsc->fs.action = spec->action; - - /* add to the list */ - list_add_tail_rcu(&fsc->list, &list->list); - list->count++; -} - /* * ethtool does not (or did not) set masks for flow parameters that are * not specified, so if both value and mask are 0 then this must be @@ -930,8 +890,6 @@ static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev, { struct ethtool_rx_ntuple cmd; const struct ethtool_ops *ops = dev->ethtool_ops; - struct ethtool_rx_ntuple_flow_spec_container *fsc = NULL; - int ret; if (!ops->set_rx_ntuple) return -EOPNOTSUPP; @@ -944,269 +902,7 @@ static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev, rx_ntuple_fix_masks(&cmd.fs); - /* - * Cache filter in dev struct for GET operation only if - * the underlying driver doesn't have its own GET operation, and - * only if the filter was added successfully. First make sure we - * can allocate the filter, then continue if successful. - */ - if (!ops->get_rx_ntuple) { - fsc = kmalloc(sizeof(*fsc), GFP_ATOMIC); - if (!fsc) - return -ENOMEM; - } - - ret = ops->set_rx_ntuple(dev, &cmd); - if (ret) { - kfree(fsc); - return ret; - } - - if (!ops->get_rx_ntuple) - __rx_ntuple_filter_add(&dev->ethtool_ntuple_list, &cmd.fs, fsc); - - return ret; -} - -static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr) -{ - struct ethtool_gstrings gstrings; - const struct ethtool_ops *ops = dev->ethtool_ops; - struct ethtool_rx_ntuple_flow_spec_container *fsc; - u8 *data; - char *p; - int ret, i, num_strings = 0; - - if (!ops->get_sset_count) - return -EOPNOTSUPP; - - if (copy_from_user(&gstrings, useraddr, sizeof(gstrings))) - return -EFAULT; - - ret = ops->get_sset_count(dev, gstrings.string_set); - if (ret < 0) - return ret; - - gstrings.len = ret; - - data = kzalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER); - if (!data) - return -ENOMEM; - - if (ops->get_rx_ntuple) { - /* driver-specific filter grab */ - ret = ops->get_rx_ntuple(dev, gstrings.string_set, data); - goto copy; - } - - /* default ethtool filter grab */ - i = 0; - p = (char *)data; - list_for_each_entry(fsc, &dev->ethtool_ntuple_list.list, list) { - sprintf(p, "Filter %d:\n", i); - p += ETH_GSTRING_LEN; - num_strings++; - - switch (fsc->fs.flow_type) { - case TCP_V4_FLOW: - sprintf(p, "\tFlow Type: TCP\n"); - p += ETH_GSTRING_LEN; - num_strings++; - break; - case UDP_V4_FLOW: - sprintf(p, "\tFlow Type: UDP\n"); - p += ETH_GSTRING_LEN; - num_strings++; - break; - case SCTP_V4_FLOW: - sprintf(p, "\tFlow Type: SCTP\n"); - p += ETH_GSTRING_LEN; - num_strings++; - break; - case AH_ESP_V4_FLOW: - sprintf(p, "\tFlow Type: AH ESP\n"); - p += ETH_GSTRING_LEN; - num_strings++; - break; - case ESP_V4_FLOW: - sprintf(p, "\tFlow Type: ESP\n"); - p += ETH_GSTRING_LEN; - num_strings++; - break; - case IP_USER_FLOW: - sprintf(p, "\tFlow Type: Raw IP\n"); - p += ETH_GSTRING_LEN; - num_strings++; - break; - case IPV4_FLOW: - sprintf(p, "\tFlow Type: IPv4\n"); - p += ETH_GSTRING_LEN; - num_strings++; - break; - default: - sprintf(p, "\tFlow Type: Unknown\n"); - p += ETH_GSTRING_LEN; - num_strings++; - goto unknown_filter; - } - - /* now the rest of the filters */ - switch (fsc->fs.flow_type) { - case TCP_V4_FLOW: - case UDP_V4_FLOW: - case SCTP_V4_FLOW: - sprintf(p, "\tSrc IP addr: 0x%x\n", - fsc->fs.h_u.tcp_ip4_spec.ip4src); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tSrc IP mask: 0x%x\n", - fsc->fs.m_u.tcp_ip4_spec.ip4src); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tDest IP addr: 0x%x\n", - fsc->fs.h_u.tcp_ip4_spec.ip4dst); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tDest IP mask: 0x%x\n", - fsc->fs.m_u.tcp_ip4_spec.ip4dst); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tSrc Port: %d, mask: 0x%x\n", - fsc->fs.h_u.tcp_ip4_spec.psrc, - fsc->fs.m_u.tcp_ip4_spec.psrc); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tDest Port: %d, mask: 0x%x\n", - fsc->fs.h_u.tcp_ip4_spec.pdst, - fsc->fs.m_u.tcp_ip4_spec.pdst); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tTOS: %d, mask: 0x%x\n", - fsc->fs.h_u.tcp_ip4_spec.tos, - fsc->fs.m_u.tcp_ip4_spec.tos); - p += ETH_GSTRING_LEN; - num_strings++; - break; - case AH_ESP_V4_FLOW: - case ESP_V4_FLOW: - sprintf(p, "\tSrc IP addr: 0x%x\n", - fsc->fs.h_u.ah_ip4_spec.ip4src); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tSrc IP mask: 0x%x\n", - fsc->fs.m_u.ah_ip4_spec.ip4src); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tDest IP addr: 0x%x\n", - fsc->fs.h_u.ah_ip4_spec.ip4dst); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tDest IP mask: 0x%x\n", - fsc->fs.m_u.ah_ip4_spec.ip4dst); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tSPI: %d, mask: 0x%x\n", - fsc->fs.h_u.ah_ip4_spec.spi, - fsc->fs.m_u.ah_ip4_spec.spi); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tTOS: %d, mask: 0x%x\n", - fsc->fs.h_u.ah_ip4_spec.tos, - fsc->fs.m_u.ah_ip4_spec.tos); - p += ETH_GSTRING_LEN; - num_strings++; - break; - case IP_USER_FLOW: - sprintf(p, "\tSrc IP addr: 0x%x\n", - fsc->fs.h_u.usr_ip4_spec.ip4src); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tSrc IP mask: 0x%x\n", - fsc->fs.m_u.usr_ip4_spec.ip4src); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tDest IP addr: 0x%x\n", - fsc->fs.h_u.usr_ip4_spec.ip4dst); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tDest IP mask: 0x%x\n", - fsc->fs.m_u.usr_ip4_spec.ip4dst); - p += ETH_GSTRING_LEN; - num_strings++; - break; - case IPV4_FLOW: - sprintf(p, "\tSrc IP addr: 0x%x\n", - fsc->fs.h_u.usr_ip4_spec.ip4src); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tSrc IP mask: 0x%x\n", - fsc->fs.m_u.usr_ip4_spec.ip4src); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tDest IP addr: 0x%x\n", - fsc->fs.h_u.usr_ip4_spec.ip4dst); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tDest IP mask: 0x%x\n", - fsc->fs.m_u.usr_ip4_spec.ip4dst); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tL4 bytes: 0x%x, mask: 0x%x\n", - fsc->fs.h_u.usr_ip4_spec.l4_4_bytes, - fsc->fs.m_u.usr_ip4_spec.l4_4_bytes); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tTOS: %d, mask: 0x%x\n", - fsc->fs.h_u.usr_ip4_spec.tos, - fsc->fs.m_u.usr_ip4_spec.tos); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tIP Version: %d, mask: 0x%x\n", - fsc->fs.h_u.usr_ip4_spec.ip_ver, - fsc->fs.m_u.usr_ip4_spec.ip_ver); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tProtocol: %d, mask: 0x%x\n", - fsc->fs.h_u.usr_ip4_spec.proto, - fsc->fs.m_u.usr_ip4_spec.proto); - p += ETH_GSTRING_LEN; - num_strings++; - break; - } - sprintf(p, "\tVLAN: %d, mask: 0x%x\n", - fsc->fs.vlan_tag, fsc->fs.vlan_tag_mask); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tUser-defined: 0x%Lx\n", fsc->fs.data); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tUser-defined mask: 0x%Lx\n", fsc->fs.data_mask); - p += ETH_GSTRING_LEN; - num_strings++; - if (fsc->fs.action == ETHTOOL_RXNTUPLE_ACTION_DROP) - sprintf(p, "\tAction: Drop\n"); - else - sprintf(p, "\tAction: Direct to queue %d\n", - fsc->fs.action); - p += ETH_GSTRING_LEN; - num_strings++; -unknown_filter: - i++; - } -copy: - /* indicate to userspace how many strings we actually have */ - gstrings.len = num_strings; - ret = -EFAULT; - if (copy_to_user(useraddr, &gstrings, sizeof(gstrings))) - goto out; - useraddr += sizeof(gstrings); - if (copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN)) - goto out; - ret = 0; - -out: - kfree(data); - return ret; + return ops->set_rx_ntuple(dev, &cmd); } static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) @@ -1227,7 +923,7 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) regs.len = reglen; regbuf = vzalloc(reglen); - if (!regbuf) + if (reglen && !regbuf) return -ENOMEM; ops->get_regs(dev, ®s, regbuf); @@ -1236,7 +932,7 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) if (copy_to_user(useraddr, ®s, sizeof(regs))) goto out; useraddr += offsetof(struct ethtool_regs, data); - if (copy_to_user(useraddr, regbuf, regs.len)) + if (regbuf && copy_to_user(useraddr, regbuf, regs.len)) goto out; ret = 0; @@ -2101,9 +1797,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_SRXNTUPLE: rc = ethtool_set_rx_ntuple(dev, useraddr); break; - case ETHTOOL_GRXNTUPLE: - rc = ethtool_get_rx_ntuple(dev, useraddr); - break; case ETHTOOL_GSSET_INFO: rc = ethtool_get_sset_info(dev, useraddr); break; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 008dc70b064..e7ab0c0285b 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -740,9 +740,9 @@ static struct pernet_operations fib_rules_net_ops = { static int __init fib_rules_init(void) { int err; - rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL); - rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL); - rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule); + rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule, NULL); err = register_pernet_subsys(&fib_rules_net_ops); if (err < 0) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 799f06e03a2..8fab9b0bb20 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -98,7 +98,7 @@ static const struct file_operations neigh_stat_seq_fops; static DEFINE_RWLOCK(neigh_tbl_lock); -static int neigh_blackhole(struct sk_buff *skb) +static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb) { kfree_skb(skb); return -ENETDOWN; @@ -137,7 +137,7 @@ static int neigh_forced_gc(struct neigh_table *tbl) write_lock_bh(&tbl->lock); nht = rcu_dereference_protected(tbl->nht, lockdep_is_held(&tbl->lock)); - for (i = 0; i <= nht->hash_mask; i++) { + for (i = 0; i < (1 << nht->hash_shift); i++) { struct neighbour *n; struct neighbour __rcu **np; @@ -210,7 +210,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev) nht = rcu_dereference_protected(tbl->nht, lockdep_is_held(&tbl->lock)); - for (i = 0; i <= nht->hash_mask; i++) { + for (i = 0; i < (1 << nht->hash_shift); i++) { struct neighbour *n; struct neighbour __rcu **np = &nht->hash_buckets[i]; @@ -297,6 +297,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl) n->updated = n->used = now; n->nud_state = NUD_NONE; n->output = neigh_blackhole; + seqlock_init(&n->hh.hh_lock); n->parms = neigh_parms_clone(&tbl->parms); setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n); @@ -312,9 +313,9 @@ out_entries: goto out; } -static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries) +static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift) { - size_t size = entries * sizeof(struct neighbour *); + size_t size = (1 << shift) * sizeof(struct neighbour *); struct neigh_hash_table *ret; struct neighbour __rcu **buckets; @@ -332,8 +333,9 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries) return NULL; } ret->hash_buckets = buckets; - ret->hash_mask = entries - 1; + ret->hash_shift = shift; get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd)); + ret->hash_rnd |= 1; return ret; } @@ -342,7 +344,7 @@ static void neigh_hash_free_rcu(struct rcu_head *head) struct neigh_hash_table *nht = container_of(head, struct neigh_hash_table, rcu); - size_t size = (nht->hash_mask + 1) * sizeof(struct neighbour *); + size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *); struct neighbour __rcu **buckets = nht->hash_buckets; if (size <= PAGE_SIZE) @@ -353,21 +355,20 @@ static void neigh_hash_free_rcu(struct rcu_head *head) } static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl, - unsigned long new_entries) + unsigned long new_shift) { unsigned int i, hash; struct neigh_hash_table *new_nht, *old_nht; NEIGH_CACHE_STAT_INC(tbl, hash_grows); - BUG_ON(!is_power_of_2(new_entries)); old_nht = rcu_dereference_protected(tbl->nht, lockdep_is_held(&tbl->lock)); - new_nht = neigh_hash_alloc(new_entries); + new_nht = neigh_hash_alloc(new_shift); if (!new_nht) return old_nht; - for (i = 0; i <= old_nht->hash_mask; i++) { + for (i = 0; i < (1 << old_nht->hash_shift); i++) { struct neighbour *n, *next; for (n = rcu_dereference_protected(old_nht->hash_buckets[i], @@ -377,7 +378,7 @@ static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl, hash = tbl->hash(n->primary_key, n->dev, new_nht->hash_rnd); - hash &= new_nht->hash_mask; + hash >>= (32 - new_nht->hash_shift); next = rcu_dereference_protected(n->next, lockdep_is_held(&tbl->lock)); @@ -406,7 +407,7 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, rcu_read_lock_bh(); nht = rcu_dereference_bh(tbl->nht); - hash_val = tbl->hash(pkey, dev, nht->hash_rnd) & nht->hash_mask; + hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift); for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]); n != NULL; @@ -436,7 +437,7 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, rcu_read_lock_bh(); nht = rcu_dereference_bh(tbl->nht); - hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) & nht->hash_mask; + hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift); for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]); n != NULL; @@ -492,10 +493,10 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey, nht = rcu_dereference_protected(tbl->nht, lockdep_is_held(&tbl->lock)); - if (atomic_read(&tbl->entries) > (nht->hash_mask + 1)) - nht = neigh_hash_grow(tbl, (nht->hash_mask + 1) << 1); + if (atomic_read(&tbl->entries) > (1 << nht->hash_shift)) + nht = neigh_hash_grow(tbl, nht->hash_shift + 1); - hash_val = tbl->hash(pkey, dev, nht->hash_rnd) & nht->hash_mask; + hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift); if (n->parms->dead) { rc = ERR_PTR(-EINVAL); @@ -688,8 +689,6 @@ static void neigh_destroy_rcu(struct rcu_head *head) */ void neigh_destroy(struct neighbour *neigh) { - struct hh_cache *hh; - NEIGH_CACHE_STAT_INC(neigh->tbl, destroys); if (!neigh->dead) { @@ -702,16 +701,6 @@ void neigh_destroy(struct neighbour *neigh) if (neigh_del_timer(neigh)) printk(KERN_WARNING "Impossible event.\n"); - while ((hh = neigh->hh) != NULL) { - neigh->hh = hh->hh_next; - hh->hh_next = NULL; - - write_seqlock_bh(&hh->hh_lock); - hh->hh_output = neigh_blackhole; - write_sequnlock_bh(&hh->hh_lock); - hh_cache_put(hh); - } - skb_queue_purge(&neigh->arp_queue); dev_put(neigh->dev); @@ -731,14 +720,9 @@ EXPORT_SYMBOL(neigh_destroy); */ static void neigh_suspect(struct neighbour *neigh) { - struct hh_cache *hh; - NEIGH_PRINTK2("neigh %p is suspected.\n", neigh); neigh->output = neigh->ops->output; - - for (hh = neigh->hh; hh; hh = hh->hh_next) - hh->hh_output = neigh->ops->output; } /* Neighbour state is OK; @@ -748,14 +732,9 @@ static void neigh_suspect(struct neighbour *neigh) */ static void neigh_connect(struct neighbour *neigh) { - struct hh_cache *hh; - NEIGH_PRINTK2("neigh %p is connected.\n", neigh); neigh->output = neigh->ops->connected_output; - - for (hh = neigh->hh; hh; hh = hh->hh_next) - hh->hh_output = neigh->ops->hh_output; } static void neigh_periodic_work(struct work_struct *work) @@ -784,7 +763,7 @@ static void neigh_periodic_work(struct work_struct *work) neigh_rand_reach_time(p->base_reachable_time); } - for (i = 0 ; i <= nht->hash_mask; i++) { + for (i = 0 ; i < (1 << nht->hash_shift); i++) { np = &nht->hash_buckets[i]; while ((n = rcu_dereference_protected(*np, @@ -1015,7 +994,7 @@ out_unlock_bh: } EXPORT_SYMBOL(__neigh_event_send); -static void neigh_update_hhs(const struct neighbour *neigh) +static void neigh_update_hhs(struct neighbour *neigh) { struct hh_cache *hh; void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *) @@ -1025,7 +1004,8 @@ static void neigh_update_hhs(const struct neighbour *neigh) update = neigh->dev->header_ops->cache_update; if (update) { - for (hh = neigh->hh; hh; hh = hh->hh_next) { + hh = &neigh->hh; + if (hh->hh_len) { write_seqlock_bh(&hh->hh_lock); update(hh, neigh->dev, neigh->ha); write_sequnlock_bh(&hh->hh_lock); @@ -1173,12 +1153,13 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, while (neigh->nud_state & NUD_VALID && (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) { - struct neighbour *n1 = neigh; + struct dst_entry *dst = skb_dst(skb); + struct neighbour *n2, *n1 = neigh; write_unlock_bh(&neigh->lock); /* On shaper/eql skb->dst->neighbour != neigh :( */ - if (skb_dst(skb) && skb_dst(skb)->neighbour) - n1 = skb_dst(skb)->neighbour; - n1->output(skb); + if (dst && (n2 = dst_get_neighbour(dst)) != NULL) + n1 = n2; + n1->output(n1, skb); write_lock_bh(&neigh->lock); } skb_queue_purge(&neigh->arp_queue); @@ -1211,67 +1192,21 @@ struct neighbour *neigh_event_ns(struct neigh_table *tbl, } EXPORT_SYMBOL(neigh_event_ns); -static inline bool neigh_hh_lookup(struct neighbour *n, struct dst_entry *dst, - __be16 protocol) -{ - struct hh_cache *hh; - - smp_rmb(); /* paired with smp_wmb() in neigh_hh_init() */ - for (hh = n->hh; hh; hh = hh->hh_next) { - if (hh->hh_type == protocol) { - atomic_inc(&hh->hh_refcnt); - if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL)) - hh_cache_put(hh); - return true; - } - } - return false; -} - /* called with read_lock_bh(&n->lock); */ -static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst, - __be16 protocol) +static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst) { - struct hh_cache *hh; struct net_device *dev = dst->dev; - - if (likely(neigh_hh_lookup(n, dst, protocol))) - return; - - /* slow path */ - hh = kzalloc(sizeof(*hh), GFP_ATOMIC); - if (!hh) - return; - - seqlock_init(&hh->hh_lock); - hh->hh_type = protocol; - atomic_set(&hh->hh_refcnt, 2); - - if (dev->header_ops->cache(n, hh)) { - kfree(hh); - return; - } + __be16 prot = dst->ops->protocol; + struct hh_cache *hh = &n->hh; write_lock_bh(&n->lock); - /* must check if another thread already did the insert */ - if (neigh_hh_lookup(n, dst, protocol)) { - kfree(hh); - goto end; - } - - if (n->nud_state & NUD_CONNECTED) - hh->hh_output = n->ops->hh_output; - else - hh->hh_output = n->ops->output; - - hh->hh_next = n->hh; - smp_wmb(); /* paired with smp_rmb() in neigh_hh_lookup() */ - n->hh = hh; + /* Only one thread can come in here and initialize the + * hh_cache entry. + */ + if (!hh->hh_len) + dev->header_ops->cache(n, hh, prot); - if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL)) - hh_cache_put(hh); -end: write_unlock_bh(&n->lock); } @@ -1280,7 +1215,7 @@ end: * but resolution is not made yet. */ -int neigh_compat_output(struct sk_buff *skb) +int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb) { struct net_device *dev = skb->dev; @@ -1297,13 +1232,12 @@ EXPORT_SYMBOL(neigh_compat_output); /* Slow and careful. */ -int neigh_resolve_output(struct sk_buff *skb) +int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); - struct neighbour *neigh; int rc = 0; - if (!dst || !(neigh = dst->neighbour)) + if (!dst) goto discard; __skb_pull(skb, skb_network_offset(skb)); @@ -1313,10 +1247,8 @@ int neigh_resolve_output(struct sk_buff *skb) struct net_device *dev = neigh->dev; unsigned int seq; - if (dev->header_ops->cache && - !dst->hh && - !(dst->flags & DST_NOCACHE)) - neigh_hh_init(neigh, dst, dst->ops->protocol); + if (dev->header_ops->cache && !neigh->hh.hh_len) + neigh_hh_init(neigh, dst); do { seq = read_seqbegin(&neigh->ha_lock); @@ -1325,7 +1257,7 @@ int neigh_resolve_output(struct sk_buff *skb) } while (read_seqretry(&neigh->ha_lock, seq)); if (err >= 0) - rc = neigh->ops->queue_xmit(skb); + rc = dev_queue_xmit(skb); else goto out_kfree_skb; } @@ -1333,7 +1265,7 @@ out: return rc; discard: NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n", - dst, dst ? dst->neighbour : NULL); + dst, neigh); out_kfree_skb: rc = -EINVAL; kfree_skb(skb); @@ -1343,13 +1275,11 @@ EXPORT_SYMBOL(neigh_resolve_output); /* As fast as possible without hh cache */ -int neigh_connected_output(struct sk_buff *skb) +int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb) { - int err; - struct dst_entry *dst = skb_dst(skb); - struct neighbour *neigh = dst->neighbour; struct net_device *dev = neigh->dev; unsigned int seq; + int err; __skb_pull(skb, skb_network_offset(skb)); @@ -1360,7 +1290,7 @@ int neigh_connected_output(struct sk_buff *skb) } while (read_seqretry(&neigh->ha_lock, seq)); if (err >= 0) - err = neigh->ops->queue_xmit(skb); + err = dev_queue_xmit(skb); else { err = -EINVAL; kfree_skb(skb); @@ -1369,6 +1299,12 @@ int neigh_connected_output(struct sk_buff *skb) } EXPORT_SYMBOL(neigh_connected_output); +int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb) +{ + return dev_queue_xmit(skb); +} +EXPORT_SYMBOL(neigh_direct_output); + static void neigh_proxy_process(unsigned long arg) { struct neigh_table *tbl = (struct neigh_table *)arg; @@ -1540,7 +1476,7 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl) panic("cannot create neighbour proc dir entry"); #endif - RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(8)); + RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3)); phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *); tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL); @@ -1857,7 +1793,7 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, rcu_read_lock_bh(); nht = rcu_dereference_bh(tbl->nht); ndc.ndtc_hash_rnd = nht->hash_rnd; - ndc.ndtc_hash_mask = nht->hash_mask; + ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1); rcu_read_unlock_bh(); NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc); @@ -2200,7 +2136,7 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, rcu_read_lock_bh(); nht = rcu_dereference_bh(tbl->nht); - for (h = 0; h <= nht->hash_mask; h++) { + for (h = 0; h < (1 << nht->hash_shift); h++) { if (h < s_h) continue; if (h > s_h) @@ -2264,7 +2200,7 @@ void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void nht = rcu_dereference_bh(tbl->nht); read_lock(&tbl->lock); /* avoid resizes */ - for (chain = 0; chain <= nht->hash_mask; chain++) { + for (chain = 0; chain < (1 << nht->hash_shift); chain++) { struct neighbour *n; for (n = rcu_dereference_bh(nht->hash_buckets[chain]); @@ -2286,7 +2222,7 @@ void __neigh_for_each_release(struct neigh_table *tbl, nht = rcu_dereference_protected(tbl->nht, lockdep_is_held(&tbl->lock)); - for (chain = 0; chain <= nht->hash_mask; chain++) { + for (chain = 0; chain < (1 << nht->hash_shift); chain++) { struct neighbour *n; struct neighbour __rcu **np; @@ -2323,7 +2259,7 @@ static struct neighbour *neigh_get_first(struct seq_file *seq) int bucket = state->bucket; state->flags &= ~NEIGH_SEQ_IS_PNEIGH; - for (bucket = 0; bucket <= nht->hash_mask; bucket++) { + for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) { n = rcu_dereference_bh(nht->hash_buckets[bucket]); while (n) { @@ -2390,7 +2326,7 @@ next: if (n) break; - if (++state->bucket > nht->hash_mask) + if (++state->bucket >= (1 << nht->hash_shift)) break; n = rcu_dereference_bh(nht->hash_buckets[state->bucket]); @@ -2909,12 +2845,13 @@ EXPORT_SYMBOL(neigh_sysctl_unregister); static int __init neigh_init(void) { - rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL); - rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL); - rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info); + rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL); - rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info); - rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL); + rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info, + NULL); + rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL); return 0; } diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 33d2a1fba13..1683e5db2f2 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -100,7 +100,6 @@ NETDEVICE_SHOW(addr_assign_type, fmt_dec); NETDEVICE_SHOW(addr_len, fmt_dec); NETDEVICE_SHOW(iflink, fmt_dec); NETDEVICE_SHOW(ifindex, fmt_dec); -NETDEVICE_SHOW(features, fmt_hex); NETDEVICE_SHOW(type, fmt_dec); NETDEVICE_SHOW(link_mode, fmt_dec); @@ -312,7 +311,6 @@ static struct device_attribute net_class_attributes[] = { __ATTR(ifalias, S_IRUGO | S_IWUSR, show_ifalias, store_ifalias), __ATTR(iflink, S_IRUGO, show_iflink, NULL), __ATTR(ifindex, S_IRUGO, show_ifindex, NULL), - __ATTR(features, S_IRUGO, show_features, NULL), __ATTR(type, S_IRUGO, show_type, NULL), __ATTR(link_mode, S_IRUGO, show_link_mode, NULL), __ATTR(address, S_IRUGO, show_address, NULL), diff --git a/net/core/net-traces.c b/net/core/net-traces.c index 7f1bb2aba03..52380b1d552 100644 --- a/net/core/net-traces.c +++ b/net/core/net-traces.c @@ -28,6 +28,8 @@ #include <trace/events/skb.h> #include <trace/events/net.h> #include <trace/events/napi.h> +#include <trace/events/sock.h> +#include <trace/events/udp.h> EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index ea489db1bc2..5bbdbf0d366 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -129,6 +129,7 @@ static __net_init int setup_net(struct net *net) atomic_set(&net->count, 1); atomic_set(&net->passive, 1); + net->dev_base_seq = 1; #ifdef NETNS_REFCNT_DEBUG atomic_set(&net->use_count, 0); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 18d9cbda3a3..adf84dd8c7b 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -177,7 +177,7 @@ static void service_arp_queue(struct netpoll_info *npi) } } -void netpoll_poll_dev(struct net_device *dev) +static void netpoll_poll_dev(struct net_device *dev) { const struct net_device_ops *ops; @@ -208,13 +208,6 @@ void netpoll_poll_dev(struct net_device *dev) zap_completion_queue(); } -EXPORT_SYMBOL(netpoll_poll_dev); - -void netpoll_poll(struct netpoll *np) -{ - netpoll_poll_dev(np->dev); -} -EXPORT_SYMBOL(netpoll_poll); static void refill_skbs(void) { @@ -275,7 +268,7 @@ repeat: if (!skb) { if (++count < 10) { - netpoll_poll(np); + netpoll_poll_dev(np->dev); goto repeat; } return NULL; @@ -336,7 +329,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, } /* tickle device maybe there is some cleanup */ - netpoll_poll(np); + netpoll_poll_dev(np->dev); udelay(USEC_PER_POLL); } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index abd936d8a71..99d9e953fe3 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -56,9 +56,11 @@ struct rtnl_link { rtnl_doit_func doit; rtnl_dumpit_func dumpit; + rtnl_calcit_func calcit; }; static DEFINE_MUTEX(rtnl_mutex); +static u16 min_ifinfo_dump_size; void rtnl_lock(void) { @@ -144,12 +146,28 @@ static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex) return tab ? tab[msgindex].dumpit : NULL; } +static rtnl_calcit_func rtnl_get_calcit(int protocol, int msgindex) +{ + struct rtnl_link *tab; + + if (protocol <= RTNL_FAMILY_MAX) + tab = rtnl_msg_handlers[protocol]; + else + tab = NULL; + + if (tab == NULL || tab[msgindex].calcit == NULL) + tab = rtnl_msg_handlers[PF_UNSPEC]; + + return tab ? tab[msgindex].calcit : NULL; +} + /** * __rtnl_register - Register a rtnetlink message type * @protocol: Protocol family or PF_UNSPEC * @msgtype: rtnetlink message type * @doit: Function pointer called for each request message * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message + * @calcit: Function pointer to calc size of dump message * * Registers the specified function pointers (at least one of them has * to be non-NULL) to be called whenever a request message for the @@ -162,7 +180,8 @@ static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex) * Returns 0 on success or a negative error code. */ int __rtnl_register(int protocol, int msgtype, - rtnl_doit_func doit, rtnl_dumpit_func dumpit) + rtnl_doit_func doit, rtnl_dumpit_func dumpit, + rtnl_calcit_func calcit) { struct rtnl_link *tab; int msgindex; @@ -185,6 +204,9 @@ int __rtnl_register(int protocol, int msgtype, if (dumpit) tab[msgindex].dumpit = dumpit; + if (calcit) + tab[msgindex].calcit = calcit; + return 0; } EXPORT_SYMBOL_GPL(__rtnl_register); @@ -199,9 +221,10 @@ EXPORT_SYMBOL_GPL(__rtnl_register); * of memory implies no sense in continuing. */ void rtnl_register(int protocol, int msgtype, - rtnl_doit_func doit, rtnl_dumpit_func dumpit) + rtnl_doit_func doit, rtnl_dumpit_func dumpit, + rtnl_calcit_func calcit) { - if (__rtnl_register(protocol, msgtype, doit, dumpit) < 0) + if (__rtnl_register(protocol, msgtype, doit, dumpit, calcit) < 0) panic("Unable to register rtnetlink message handler, " "protocol = %d, message type = %d\n", protocol, msgtype); @@ -1009,6 +1032,8 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) s_idx = cb->args[1]; rcu_read_lock(); + cb->seq = net->dev_base_seq; + for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { idx = 0; head = &net->dev_index_head[h]; @@ -1020,6 +1045,8 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) cb->nlh->nlmsg_seq, 0, NLM_F_MULTI) <= 0) goto out; + + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); cont: idx++; } @@ -1818,6 +1845,11 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) return err; } +static u16 rtnl_calcit(struct sk_buff *skb) +{ + return min_ifinfo_dump_size; +} + static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) { int idx; @@ -1847,11 +1879,14 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) struct net *net = dev_net(dev); struct sk_buff *skb; int err = -ENOBUFS; + size_t if_info_size; - skb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL); + skb = nlmsg_new((if_info_size = if_nlmsg_size(dev)), GFP_KERNEL); if (skb == NULL) goto errout; + min_ifinfo_dump_size = max_t(u16, if_info_size, min_ifinfo_dump_size); + err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_size() */ @@ -1902,14 +1937,20 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { struct sock *rtnl; rtnl_dumpit_func dumpit; + rtnl_calcit_func calcit; + u16 min_dump_alloc = 0; dumpit = rtnl_get_dumpit(family, type); if (dumpit == NULL) return -EOPNOTSUPP; + calcit = rtnl_get_calcit(family, type); + if (calcit) + min_dump_alloc = calcit(skb); __rtnl_unlock(); rtnl = net->rtnl; - err = netlink_dump_start(rtnl, skb, nlh, dumpit, NULL); + err = netlink_dump_start(rtnl, skb, nlh, dumpit, + NULL, min_dump_alloc); rtnl_lock(); return err; } @@ -2019,12 +2060,13 @@ void __init rtnetlink_init(void) netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV); register_netdevice_notifier(&rtnetlink_dev_notifier); - rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink, rtnl_dump_ifinfo); - rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL); - rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL); - rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL); + rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink, + rtnl_dump_ifinfo, rtnl_calcit); + rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL, NULL); - rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all); - rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all); + rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all, NULL); + rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all, NULL); } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 46cbd28f40f..2beda824636 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -329,6 +329,18 @@ static void skb_release_data(struct sk_buff *skb) put_page(skb_shinfo(skb)->frags[i].page); } + /* + * If skb buf is from userspace, we need to notify the caller + * the lower device DMA has done; + */ + if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { + struct ubuf_info *uarg; + + uarg = skb_shinfo(skb)->destructor_arg; + if (uarg->callback) + uarg->callback(uarg); + } + if (skb_has_frag_list(skb)) skb_drop_fraglist(skb); @@ -481,6 +493,9 @@ bool skb_recycle_check(struct sk_buff *skb, int skb_size) if (irqs_disabled()) return false; + if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) + return false; + if (skb_is_nonlinear(skb) || skb->fclone != SKB_FCLONE_UNAVAILABLE) return false; @@ -596,6 +611,51 @@ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src) } EXPORT_SYMBOL_GPL(skb_morph); +/* skb frags copy userspace buffers to kernel */ +static int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) +{ + int i; + int num_frags = skb_shinfo(skb)->nr_frags; + struct page *page, *head = NULL; + struct ubuf_info *uarg = skb_shinfo(skb)->destructor_arg; + + for (i = 0; i < num_frags; i++) { + u8 *vaddr; + skb_frag_t *f = &skb_shinfo(skb)->frags[i]; + + page = alloc_page(GFP_ATOMIC); + if (!page) { + while (head) { + struct page *next = (struct page *)head->private; + put_page(head); + head = next; + } + return -ENOMEM; + } + vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]); + memcpy(page_address(page), + vaddr + f->page_offset, f->size); + kunmap_skb_frag(vaddr); + page->private = (unsigned long)head; + head = page; + } + + /* skb frags release userspace buffers */ + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) + put_page(skb_shinfo(skb)->frags[i].page); + + uarg->callback(uarg); + + /* skb frags point to kernel buffers */ + for (i = skb_shinfo(skb)->nr_frags; i > 0; i--) { + skb_shinfo(skb)->frags[i - 1].page_offset = 0; + skb_shinfo(skb)->frags[i - 1].page = head; + head = (struct page *)head->private; + } + return 0; +} + + /** * skb_clone - duplicate an sk_buff * @skb: buffer to clone @@ -614,6 +674,12 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) { struct sk_buff *n; + if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { + if (skb_copy_ubufs(skb, gfp_mask)) + return NULL; + skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; + } + n = skb + 1; if (skb->fclone == SKB_FCLONE_ORIG && n->fclone == SKB_FCLONE_UNAVAILABLE) { @@ -731,6 +797,14 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) if (skb_shinfo(skb)->nr_frags) { int i; + if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { + if (skb_copy_ubufs(skb, gfp_mask)) { + kfree_skb(n); + n = NULL; + goto out; + } + skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; + } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i]; get_page(skb_shinfo(n)->frags[i].page); @@ -788,7 +862,6 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, fastpath = true; else { int delta = skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1; - fastpath = atomic_read(&skb_shinfo(skb)->dataref) == delta; } @@ -819,6 +892,12 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, if (fastpath) { kfree(skb->head); } else { + /* copy this zero copy skb frags */ + if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { + if (skb_copy_ubufs(skb, gfp_mask)) + goto nofrags; + skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; + } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) get_page(skb_shinfo(skb)->frags[i].page); @@ -853,6 +932,8 @@ adjust_others: atomic_set(&skb_shinfo(skb)->dataref, 1); return 0; +nofrags: + kfree(data); nodata: return -ENOMEM; } @@ -1354,6 +1435,7 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) } start = end; } + if (!len) return 0; diff --git a/net/core/sock.c b/net/core/sock.c index 6e819780c23..bc745d00ea4 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -128,6 +128,8 @@ #include <linux/filter.h> +#include <trace/events/sock.h> + #ifdef CONFIG_INET #include <net/tcp.h> #endif @@ -158,7 +160,7 @@ static const char *const af_family_key_strings[AF_MAX+1] = { "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" , "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" , - "sk_lock-AF_MAX" + "sk_lock-AF_NFC" , "sk_lock-AF_MAX" }; static const char *const af_family_slock_key_strings[AF_MAX+1] = { "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" , @@ -174,7 +176,7 @@ static const char *const af_family_slock_key_strings[AF_MAX+1] = { "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" , "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" , - "slock-AF_MAX" + "slock-AF_NFC" , "slock-AF_MAX" }; static const char *const af_family_clock_key_strings[AF_MAX+1] = { "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" , @@ -190,7 +192,7 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = { "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" , - "clock-AF_MAX" + "clock-AF_NFC" , "clock-AF_MAX" }; /* @@ -292,6 +294,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= (unsigned)sk->sk_rcvbuf) { atomic_inc(&sk->sk_drops); + trace_sock_rcvqueue_full(sk, skb); return -ENOMEM; } @@ -1736,6 +1739,8 @@ suppress_allocation: return 1; } + trace_sock_exceed_buf_limit(sk, prot, allocated); + /* Alas. Undo changes. */ sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM; atomic_long_sub(amt, prot->memory_allocated); diff --git a/net/core/timestamping.c b/net/core/timestamping.c index 7e7ca375d43..98a52640e7c 100644 --- a/net/core/timestamping.c +++ b/net/core/timestamping.c @@ -68,6 +68,7 @@ void skb_clone_tx_timestamp(struct sk_buff *skb) break; } } +EXPORT_SYMBOL_GPL(skb_clone_tx_timestamp); void skb_complete_tx_timestamp(struct sk_buff *skb, struct skb_shared_hwtstamps *hwtstamps) @@ -121,6 +122,7 @@ bool skb_defer_rx_timestamp(struct sk_buff *skb) return false; } +EXPORT_SYMBOL_GPL(skb_defer_rx_timestamp); void __init skb_timestamping_init(void) { |