diff options
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/dev.c | 56 | ||||
-rw-r--r-- | net/core/dst.c | 23 | ||||
-rw-r--r-- | net/core/ethtool.c | 313 | ||||
-rw-r--r-- | net/core/fib_rules.c | 6 | ||||
-rw-r--r-- | net/core/neighbour.c | 191 | ||||
-rw-r--r-- | net/core/net-sysfs.c | 25 | ||||
-rw-r--r-- | net/core/net-traces.c | 2 | ||||
-rw-r--r-- | net/core/net_namespace.c | 76 | ||||
-rw-r--r-- | net/core/netpoll.c | 20 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 69 | ||||
-rw-r--r-- | net/core/skbuff.c | 84 | ||||
-rw-r--r-- | net/core/sock.c | 5 | ||||
-rw-r--r-- | net/core/timestamping.c | 2 |
13 files changed, 351 insertions, 521 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index c7e305d13b7..9444c5cb413 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -199,6 +199,11 @@ static struct list_head ptype_all __read_mostly; /* Taps */ DEFINE_RWLOCK(dev_base_lock); EXPORT_SYMBOL(dev_base_lock); +static inline void dev_base_seq_inc(struct net *net) +{ + while (++net->dev_base_seq == 0); +} + static inline struct hlist_head *dev_name_hash(struct net *net, const char *name) { unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ)); @@ -237,6 +242,9 @@ static int list_netdevice(struct net_device *dev) hlist_add_head_rcu(&dev->index_hlist, dev_index_hash(net, dev->ifindex)); write_unlock_bh(&dev_base_lock); + + dev_base_seq_inc(net); + return 0; } @@ -253,6 +261,8 @@ static void unlist_netdevice(struct net_device *dev) hlist_del_rcu(&dev->name_hlist); hlist_del_rcu(&dev->index_hlist); write_unlock_bh(&dev_base_lock); + + dev_base_seq_inc(dev_net(dev)); } /* @@ -2096,6 +2106,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, { const struct net_device_ops *ops = dev->netdev_ops; int rc = NETDEV_TX_OK; + unsigned int skb_len; if (likely(!skb->next)) { u32 features; @@ -2146,8 +2157,9 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, } } + skb_len = skb->len; rc = ops->ndo_start_xmit(skb, dev); - trace_net_dev_xmit(skb, rc); + trace_net_dev_xmit(skb, rc, dev, skb_len); if (rc == NETDEV_TX_OK) txq_trans_update(txq); return rc; @@ -2167,8 +2179,9 @@ gso: if (dev->priv_flags & IFF_XMIT_DST_RELEASE) skb_dst_drop(nskb); + skb_len = nskb->len; rc = ops->ndo_start_xmit(nskb, dev); - trace_net_dev_xmit(nskb, rc); + trace_net_dev_xmit(nskb, rc, dev, skb_len); if (unlikely(rc != NETDEV_TX_OK)) { if (rc & ~NETDEV_TX_MASK) goto out_kfree_gso_skb; @@ -2529,7 +2542,7 @@ __u32 __skb_get_rxhash(struct sk_buff *skb) goto done; ip = (const struct iphdr *) (skb->data + nhoff); - if (ip->frag_off & htons(IP_MF | IP_OFFSET)) + if (ip_is_fragment(ip)) ip_proto = 0; else ip_proto = ip->protocol; @@ -3111,7 +3124,7 @@ static int __netif_receive_skb(struct sk_buff *skb) skb_reset_network_header(skb); skb_reset_transport_header(skb); - skb->mac_len = skb->network_header - skb->mac_header; + skb_reset_mac_len(skb); pt_prev = NULL; @@ -5196,7 +5209,7 @@ static void rollback_registered(struct net_device *dev) list_del(&single); } -u32 netdev_fix_features(struct net_device *dev, u32 features) +static u32 netdev_fix_features(struct net_device *dev, u32 features) { /* Fix illegal checksum combinations */ if ((features & NETIF_F_HW_CSUM) && @@ -5255,7 +5268,6 @@ u32 netdev_fix_features(struct net_device *dev, u32 features) return features; } -EXPORT_SYMBOL(netdev_fix_features); int __netdev_update_features(struct net_device *dev) { @@ -5475,11 +5487,9 @@ int register_netdevice(struct net_device *dev) dev->features |= NETIF_F_NOCACHE_COPY; } - /* Enable GRO and NETIF_F_HIGHDMA for vlans by default, - * vlan_dev_init() will do the dev->features check, so these features - * are enabled only if supported by underlying device. + /* Make NETIF_F_HIGHDMA inheritable to VLAN devices. */ - dev->vlan_features |= (NETIF_F_GRO | NETIF_F_HIGHDMA); + dev->vlan_features |= NETIF_F_HIGHDMA; ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); ret = notifier_to_errno(ret); @@ -5864,8 +5874,6 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev->gso_max_size = GSO_MAX_SIZE; - INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list); - dev->ethtool_ntuple_list.count = 0; INIT_LIST_HEAD(&dev->napi_list); INIT_LIST_HEAD(&dev->unreg_list); INIT_LIST_HEAD(&dev->link_watch_list); @@ -5929,9 +5937,6 @@ void free_netdev(struct net_device *dev) /* Flush device addresses */ dev_addr_flush(dev); - /* Clear ethtool n-tuple list */ - ethtool_ntuple_flush(dev); - list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) netif_napi_del(p); @@ -6175,6 +6180,11 @@ static int dev_cpu_callback(struct notifier_block *nfb, oldsd->output_queue = NULL; oldsd->output_queue_tailp = &oldsd->output_queue; } + /* Append NAPI poll list from offline CPU. */ + if (!list_empty(&oldsd->poll_list)) { + list_splice_init(&oldsd->poll_list, &sd->poll_list); + raise_softirq_irqoff(NET_RX_SOFTIRQ); + } raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_enable(); @@ -6261,29 +6271,23 @@ err_name: /** * netdev_drivername - network driver for the device * @dev: network device - * @buffer: buffer for resulting name - * @len: size of buffer * * Determine network driver for device. */ -char *netdev_drivername(const struct net_device *dev, char *buffer, int len) +const char *netdev_drivername(const struct net_device *dev) { const struct device_driver *driver; const struct device *parent; - - if (len <= 0 || !buffer) - return buffer; - buffer[0] = 0; + const char *empty = ""; parent = dev->dev.parent; - if (!parent) - return buffer; + return empty; driver = parent->driver; if (driver && driver->name) - strlcpy(buffer, driver->name, len); - return buffer; + return driver->name; + return empty; } static int __netdev_printk(const char *level, const struct net_device *dev, diff --git a/net/core/dst.c b/net/core/dst.c index 9ccca038444..14b33baf073 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -171,8 +171,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, dst_init_metrics(dst, dst_default_metrics, true); dst->expires = 0UL; dst->path = dst; - dst->neighbour = NULL; - dst->hh = NULL; + dst->_neighbour = NULL; #ifdef CONFIG_XFRM dst->xfrm = NULL; #endif @@ -190,7 +189,8 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, dst->lastuse = jiffies; dst->flags = flags; dst->next = NULL; - dst_entries_add(ops, 1); + if (!(flags & DST_NOCOUNT)) + dst_entries_add(ops, 1); return dst; } EXPORT_SYMBOL(dst_alloc); @@ -225,25 +225,20 @@ struct dst_entry *dst_destroy(struct dst_entry * dst) { struct dst_entry *child; struct neighbour *neigh; - struct hh_cache *hh; smp_rmb(); again: - neigh = dst->neighbour; - hh = dst->hh; + neigh = dst->_neighbour; child = dst->child; - dst->hh = NULL; - if (hh) - hh_cache_put(hh); - if (neigh) { - dst->neighbour = NULL; + dst->_neighbour = NULL; neigh_release(neigh); } - dst_entries_add(dst->ops, -1); + if (!(dst->flags & DST_NOCOUNT)) + dst_entries_add(dst->ops, -1); if (dst->ops->destroy) dst->ops->destroy(dst); @@ -368,8 +363,8 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev, dst->dev = dev_net(dst->dev)->loopback_dev; dev_hold(dst->dev); dev_put(dev); - if (dst->neighbour && dst->neighbour->dev == dev) { - dst->neighbour->dev = dst->dev; + if (dst->_neighbour && dst->_neighbour->dev == dev) { + dst->_neighbour->dev = dst->dev; dev_hold(dst->dev); dev_put(dev); } diff --git a/net/core/ethtool.c b/net/core/ethtool.c index fd14116ad7f..6cdba5fc2be 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -169,18 +169,6 @@ int ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported) } EXPORT_SYMBOL(ethtool_op_set_flags); -void ethtool_ntuple_flush(struct net_device *dev) -{ - struct ethtool_rx_ntuple_flow_spec_container *fsc, *f; - - list_for_each_entry_safe(fsc, f, &dev->ethtool_ntuple_list.list, list) { - list_del(&fsc->list); - kfree(fsc); - } - dev->ethtool_ntuple_list.count = 0; -} -EXPORT_SYMBOL(ethtool_ntuple_flush); - /* Handlers for each ethtool command */ #define ETHTOOL_DEV_FEATURE_WORDS 1 @@ -865,34 +853,6 @@ out: return ret; } -static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list, - struct ethtool_rx_ntuple_flow_spec *spec, - struct ethtool_rx_ntuple_flow_spec_container *fsc) -{ - - /* don't add filters forever */ - if (list->count >= ETHTOOL_MAX_NTUPLE_LIST_ENTRY) { - /* free the container */ - kfree(fsc); - return; - } - - /* Copy the whole filter over */ - fsc->fs.flow_type = spec->flow_type; - memcpy(&fsc->fs.h_u, &spec->h_u, sizeof(spec->h_u)); - memcpy(&fsc->fs.m_u, &spec->m_u, sizeof(spec->m_u)); - - fsc->fs.vlan_tag = spec->vlan_tag; - fsc->fs.vlan_tag_mask = spec->vlan_tag_mask; - fsc->fs.data = spec->data; - fsc->fs.data_mask = spec->data_mask; - fsc->fs.action = spec->action; - - /* add to the list */ - list_add_tail_rcu(&fsc->list, &list->list); - list->count++; -} - /* * ethtool does not (or did not) set masks for flow parameters that are * not specified, so if both value and mask are 0 then this must be @@ -930,8 +890,6 @@ static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev, { struct ethtool_rx_ntuple cmd; const struct ethtool_ops *ops = dev->ethtool_ops; - struct ethtool_rx_ntuple_flow_spec_container *fsc = NULL; - int ret; if (!ops->set_rx_ntuple) return -EOPNOTSUPP; @@ -944,269 +902,7 @@ static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev, rx_ntuple_fix_masks(&cmd.fs); - /* - * Cache filter in dev struct for GET operation only if - * the underlying driver doesn't have its own GET operation, and - * only if the filter was added successfully. First make sure we - * can allocate the filter, then continue if successful. - */ - if (!ops->get_rx_ntuple) { - fsc = kmalloc(sizeof(*fsc), GFP_ATOMIC); - if (!fsc) - return -ENOMEM; - } - - ret = ops->set_rx_ntuple(dev, &cmd); - if (ret) { - kfree(fsc); - return ret; - } - - if (!ops->get_rx_ntuple) - __rx_ntuple_filter_add(&dev->ethtool_ntuple_list, &cmd.fs, fsc); - - return ret; -} - -static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr) -{ - struct ethtool_gstrings gstrings; - const struct ethtool_ops *ops = dev->ethtool_ops; - struct ethtool_rx_ntuple_flow_spec_container *fsc; - u8 *data; - char *p; - int ret, i, num_strings = 0; - - if (!ops->get_sset_count) - return -EOPNOTSUPP; - - if (copy_from_user(&gstrings, useraddr, sizeof(gstrings))) - return -EFAULT; - - ret = ops->get_sset_count(dev, gstrings.string_set); - if (ret < 0) - return ret; - - gstrings.len = ret; - - data = kzalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER); - if (!data) - return -ENOMEM; - - if (ops->get_rx_ntuple) { - /* driver-specific filter grab */ - ret = ops->get_rx_ntuple(dev, gstrings.string_set, data); - goto copy; - } - - /* default ethtool filter grab */ - i = 0; - p = (char *)data; - list_for_each_entry(fsc, &dev->ethtool_ntuple_list.list, list) { - sprintf(p, "Filter %d:\n", i); - p += ETH_GSTRING_LEN; - num_strings++; - - switch (fsc->fs.flow_type) { - case TCP_V4_FLOW: - sprintf(p, "\tFlow Type: TCP\n"); - p += ETH_GSTRING_LEN; - num_strings++; - break; - case UDP_V4_FLOW: - sprintf(p, "\tFlow Type: UDP\n"); - p += ETH_GSTRING_LEN; - num_strings++; - break; - case SCTP_V4_FLOW: - sprintf(p, "\tFlow Type: SCTP\n"); - p += ETH_GSTRING_LEN; - num_strings++; - break; - case AH_ESP_V4_FLOW: - sprintf(p, "\tFlow Type: AH ESP\n"); - p += ETH_GSTRING_LEN; - num_strings++; - break; - case ESP_V4_FLOW: - sprintf(p, "\tFlow Type: ESP\n"); - p += ETH_GSTRING_LEN; - num_strings++; - break; - case IP_USER_FLOW: - sprintf(p, "\tFlow Type: Raw IP\n"); - p += ETH_GSTRING_LEN; - num_strings++; - break; - case IPV4_FLOW: - sprintf(p, "\tFlow Type: IPv4\n"); - p += ETH_GSTRING_LEN; - num_strings++; - break; - default: - sprintf(p, "\tFlow Type: Unknown\n"); - p += ETH_GSTRING_LEN; - num_strings++; - goto unknown_filter; - } - - /* now the rest of the filters */ - switch (fsc->fs.flow_type) { - case TCP_V4_FLOW: - case UDP_V4_FLOW: - case SCTP_V4_FLOW: - sprintf(p, "\tSrc IP addr: 0x%x\n", - fsc->fs.h_u.tcp_ip4_spec.ip4src); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tSrc IP mask: 0x%x\n", - fsc->fs.m_u.tcp_ip4_spec.ip4src); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tDest IP addr: 0x%x\n", - fsc->fs.h_u.tcp_ip4_spec.ip4dst); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tDest IP mask: 0x%x\n", - fsc->fs.m_u.tcp_ip4_spec.ip4dst); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tSrc Port: %d, mask: 0x%x\n", - fsc->fs.h_u.tcp_ip4_spec.psrc, - fsc->fs.m_u.tcp_ip4_spec.psrc); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tDest Port: %d, mask: 0x%x\n", - fsc->fs.h_u.tcp_ip4_spec.pdst, - fsc->fs.m_u.tcp_ip4_spec.pdst); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tTOS: %d, mask: 0x%x\n", - fsc->fs.h_u.tcp_ip4_spec.tos, - fsc->fs.m_u.tcp_ip4_spec.tos); - p += ETH_GSTRING_LEN; - num_strings++; - break; - case AH_ESP_V4_FLOW: - case ESP_V4_FLOW: - sprintf(p, "\tSrc IP addr: 0x%x\n", - fsc->fs.h_u.ah_ip4_spec.ip4src); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tSrc IP mask: 0x%x\n", - fsc->fs.m_u.ah_ip4_spec.ip4src); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tDest IP addr: 0x%x\n", - fsc->fs.h_u.ah_ip4_spec.ip4dst); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tDest IP mask: 0x%x\n", - fsc->fs.m_u.ah_ip4_spec.ip4dst); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tSPI: %d, mask: 0x%x\n", - fsc->fs.h_u.ah_ip4_spec.spi, - fsc->fs.m_u.ah_ip4_spec.spi); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tTOS: %d, mask: 0x%x\n", - fsc->fs.h_u.ah_ip4_spec.tos, - fsc->fs.m_u.ah_ip4_spec.tos); - p += ETH_GSTRING_LEN; - num_strings++; - break; - case IP_USER_FLOW: - sprintf(p, "\tSrc IP addr: 0x%x\n", - fsc->fs.h_u.usr_ip4_spec.ip4src); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tSrc IP mask: 0x%x\n", - fsc->fs.m_u.usr_ip4_spec.ip4src); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tDest IP addr: 0x%x\n", - fsc->fs.h_u.usr_ip4_spec.ip4dst); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tDest IP mask: 0x%x\n", - fsc->fs.m_u.usr_ip4_spec.ip4dst); - p += ETH_GSTRING_LEN; - num_strings++; - break; - case IPV4_FLOW: - sprintf(p, "\tSrc IP addr: 0x%x\n", - fsc->fs.h_u.usr_ip4_spec.ip4src); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tSrc IP mask: 0x%x\n", - fsc->fs.m_u.usr_ip4_spec.ip4src); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tDest IP addr: 0x%x\n", - fsc->fs.h_u.usr_ip4_spec.ip4dst); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tDest IP mask: 0x%x\n", - fsc->fs.m_u.usr_ip4_spec.ip4dst); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tL4 bytes: 0x%x, mask: 0x%x\n", - fsc->fs.h_u.usr_ip4_spec.l4_4_bytes, - fsc->fs.m_u.usr_ip4_spec.l4_4_bytes); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tTOS: %d, mask: 0x%x\n", - fsc->fs.h_u.usr_ip4_spec.tos, - fsc->fs.m_u.usr_ip4_spec.tos); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tIP Version: %d, mask: 0x%x\n", - fsc->fs.h_u.usr_ip4_spec.ip_ver, - fsc->fs.m_u.usr_ip4_spec.ip_ver); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tProtocol: %d, mask: 0x%x\n", - fsc->fs.h_u.usr_ip4_spec.proto, - fsc->fs.m_u.usr_ip4_spec.proto); - p += ETH_GSTRING_LEN; - num_strings++; - break; - } - sprintf(p, "\tVLAN: %d, mask: 0x%x\n", - fsc->fs.vlan_tag, fsc->fs.vlan_tag_mask); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tUser-defined: 0x%Lx\n", fsc->fs.data); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tUser-defined mask: 0x%Lx\n", fsc->fs.data_mask); - p += ETH_GSTRING_LEN; - num_strings++; - if (fsc->fs.action == ETHTOOL_RXNTUPLE_ACTION_DROP) - sprintf(p, "\tAction: Drop\n"); - else - sprintf(p, "\tAction: Direct to queue %d\n", - fsc->fs.action); - p += ETH_GSTRING_LEN; - num_strings++; -unknown_filter: - i++; - } -copy: - /* indicate to userspace how many strings we actually have */ - gstrings.len = num_strings; - ret = -EFAULT; - if (copy_to_user(useraddr, &gstrings, sizeof(gstrings))) - goto out; - useraddr += sizeof(gstrings); - if (copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN)) - goto out; - ret = 0; - -out: - kfree(data); - return ret; + return ops->set_rx_ntuple(dev, &cmd); } static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) @@ -1227,7 +923,7 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) regs.len = reglen; regbuf = vzalloc(reglen); - if (!regbuf) + if (reglen && !regbuf) return -ENOMEM; ops->get_regs(dev, ®s, regbuf); @@ -1236,7 +932,7 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) if (copy_to_user(useraddr, ®s, sizeof(regs))) goto out; useraddr += offsetof(struct ethtool_regs, data); - if (copy_to_user(useraddr, regbuf, regs.len)) + if (regbuf && copy_to_user(useraddr, regbuf, regs.len)) goto out; ret = 0; @@ -2101,9 +1797,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_SRXNTUPLE: rc = ethtool_set_rx_ntuple(dev, useraddr); break; - case ETHTOOL_GRXNTUPLE: - rc = ethtool_get_rx_ntuple(dev, useraddr); - break; case ETHTOOL_GSSET_INFO: rc = ethtool_get_sset_info(dev, useraddr); break; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 008dc70b064..e7ab0c0285b 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -740,9 +740,9 @@ static struct pernet_operations fib_rules_net_ops = { static int __init fib_rules_init(void) { int err; - rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL); - rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL); - rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule); + rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule, NULL); err = register_pernet_subsys(&fib_rules_net_ops); if (err < 0) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 799f06e03a2..8fab9b0bb20 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -98,7 +98,7 @@ static const struct file_operations neigh_stat_seq_fops; static DEFINE_RWLOCK(neigh_tbl_lock); -static int neigh_blackhole(struct sk_buff *skb) +static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb) { kfree_skb(skb); return -ENETDOWN; @@ -137,7 +137,7 @@ static int neigh_forced_gc(struct neigh_table *tbl) write_lock_bh(&tbl->lock); nht = rcu_dereference_protected(tbl->nht, lockdep_is_held(&tbl->lock)); - for (i = 0; i <= nht->hash_mask; i++) { + for (i = 0; i < (1 << nht->hash_shift); i++) { struct neighbour *n; struct neighbour __rcu **np; @@ -210,7 +210,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev) nht = rcu_dereference_protected(tbl->nht, lockdep_is_held(&tbl->lock)); - for (i = 0; i <= nht->hash_mask; i++) { + for (i = 0; i < (1 << nht->hash_shift); i++) { struct neighbour *n; struct neighbour __rcu **np = &nht->hash_buckets[i]; @@ -297,6 +297,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl) n->updated = n->used = now; n->nud_state = NUD_NONE; n->output = neigh_blackhole; + seqlock_init(&n->hh.hh_lock); n->parms = neigh_parms_clone(&tbl->parms); setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n); @@ -312,9 +313,9 @@ out_entries: goto out; } -static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries) +static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift) { - size_t size = entries * sizeof(struct neighbour *); + size_t size = (1 << shift) * sizeof(struct neighbour *); struct neigh_hash_table *ret; struct neighbour __rcu **buckets; @@ -332,8 +333,9 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries) return NULL; } ret->hash_buckets = buckets; - ret->hash_mask = entries - 1; + ret->hash_shift = shift; get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd)); + ret->hash_rnd |= 1; return ret; } @@ -342,7 +344,7 @@ static void neigh_hash_free_rcu(struct rcu_head *head) struct neigh_hash_table *nht = container_of(head, struct neigh_hash_table, rcu); - size_t size = (nht->hash_mask + 1) * sizeof(struct neighbour *); + size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *); struct neighbour __rcu **buckets = nht->hash_buckets; if (size <= PAGE_SIZE) @@ -353,21 +355,20 @@ static void neigh_hash_free_rcu(struct rcu_head *head) } static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl, - unsigned long new_entries) + unsigned long new_shift) { unsigned int i, hash; struct neigh_hash_table *new_nht, *old_nht; NEIGH_CACHE_STAT_INC(tbl, hash_grows); - BUG_ON(!is_power_of_2(new_entries)); old_nht = rcu_dereference_protected(tbl->nht, lockdep_is_held(&tbl->lock)); - new_nht = neigh_hash_alloc(new_entries); + new_nht = neigh_hash_alloc(new_shift); if (!new_nht) return old_nht; - for (i = 0; i <= old_nht->hash_mask; i++) { + for (i = 0; i < (1 << old_nht->hash_shift); i++) { struct neighbour *n, *next; for (n = rcu_dereference_protected(old_nht->hash_buckets[i], @@ -377,7 +378,7 @@ static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl, hash = tbl->hash(n->primary_key, n->dev, new_nht->hash_rnd); - hash &= new_nht->hash_mask; + hash >>= (32 - new_nht->hash_shift); next = rcu_dereference_protected(n->next, lockdep_is_held(&tbl->lock)); @@ -406,7 +407,7 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, rcu_read_lock_bh(); nht = rcu_dereference_bh(tbl->nht); - hash_val = tbl->hash(pkey, dev, nht->hash_rnd) & nht->hash_mask; + hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift); for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]); n != NULL; @@ -436,7 +437,7 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, rcu_read_lock_bh(); nht = rcu_dereference_bh(tbl->nht); - hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) & nht->hash_mask; + hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift); for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]); n != NULL; @@ -492,10 +493,10 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey, nht = rcu_dereference_protected(tbl->nht, lockdep_is_held(&tbl->lock)); - if (atomic_read(&tbl->entries) > (nht->hash_mask + 1)) - nht = neigh_hash_grow(tbl, (nht->hash_mask + 1) << 1); + if (atomic_read(&tbl->entries) > (1 << nht->hash_shift)) + nht = neigh_hash_grow(tbl, nht->hash_shift + 1); - hash_val = tbl->hash(pkey, dev, nht->hash_rnd) & nht->hash_mask; + hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift); if (n->parms->dead) { rc = ERR_PTR(-EINVAL); @@ -688,8 +689,6 @@ static void neigh_destroy_rcu(struct rcu_head *head) */ void neigh_destroy(struct neighbour *neigh) { - struct hh_cache *hh; - NEIGH_CACHE_STAT_INC(neigh->tbl, destroys); if (!neigh->dead) { @@ -702,16 +701,6 @@ void neigh_destroy(struct neighbour *neigh) if (neigh_del_timer(neigh)) printk(KERN_WARNING "Impossible event.\n"); - while ((hh = neigh->hh) != NULL) { - neigh->hh = hh->hh_next; - hh->hh_next = NULL; - - write_seqlock_bh(&hh->hh_lock); - hh->hh_output = neigh_blackhole; - write_sequnlock_bh(&hh->hh_lock); - hh_cache_put(hh); - } - skb_queue_purge(&neigh->arp_queue); dev_put(neigh->dev); @@ -731,14 +720,9 @@ EXPORT_SYMBOL(neigh_destroy); */ static void neigh_suspect(struct neighbour *neigh) { - struct hh_cache *hh; - NEIGH_PRINTK2("neigh %p is suspected.\n", neigh); neigh->output = neigh->ops->output; - - for (hh = neigh->hh; hh; hh = hh->hh_next) - hh->hh_output = neigh->ops->output; } /* Neighbour state is OK; @@ -748,14 +732,9 @@ static void neigh_suspect(struct neighbour *neigh) */ static void neigh_connect(struct neighbour *neigh) { - struct hh_cache *hh; - NEIGH_PRINTK2("neigh %p is connected.\n", neigh); neigh->output = neigh->ops->connected_output; - - for (hh = neigh->hh; hh; hh = hh->hh_next) - hh->hh_output = neigh->ops->hh_output; } static void neigh_periodic_work(struct work_struct *work) @@ -784,7 +763,7 @@ static void neigh_periodic_work(struct work_struct *work) neigh_rand_reach_time(p->base_reachable_time); } - for (i = 0 ; i <= nht->hash_mask; i++) { + for (i = 0 ; i < (1 << nht->hash_shift); i++) { np = &nht->hash_buckets[i]; while ((n = rcu_dereference_protected(*np, @@ -1015,7 +994,7 @@ out_unlock_bh: } EXPORT_SYMBOL(__neigh_event_send); -static void neigh_update_hhs(const struct neighbour *neigh) +static void neigh_update_hhs(struct neighbour *neigh) { struct hh_cache *hh; void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *) @@ -1025,7 +1004,8 @@ static void neigh_update_hhs(const struct neighbour *neigh) update = neigh->dev->header_ops->cache_update; if (update) { - for (hh = neigh->hh; hh; hh = hh->hh_next) { + hh = &neigh->hh; + if (hh->hh_len) { write_seqlock_bh(&hh->hh_lock); update(hh, neigh->dev, neigh->ha); write_sequnlock_bh(&hh->hh_lock); @@ -1173,12 +1153,13 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, while (neigh->nud_state & NUD_VALID && (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) { - struct neighbour *n1 = neigh; + struct dst_entry *dst = skb_dst(skb); + struct neighbour *n2, *n1 = neigh; write_unlock_bh(&neigh->lock); /* On shaper/eql skb->dst->neighbour != neigh :( */ - if (skb_dst(skb) && skb_dst(skb)->neighbour) - n1 = skb_dst(skb)->neighbour; - n1->output(skb); + if (dst && (n2 = dst_get_neighbour(dst)) != NULL) + n1 = n2; + n1->output(n1, skb); write_lock_bh(&neigh->lock); } skb_queue_purge(&neigh->arp_queue); @@ -1211,67 +1192,21 @@ struct neighbour *neigh_event_ns(struct neigh_table *tbl, } EXPORT_SYMBOL(neigh_event_ns); -static inline bool neigh_hh_lookup(struct neighbour *n, struct dst_entry *dst, - __be16 protocol) -{ - struct hh_cache *hh; - - smp_rmb(); /* paired with smp_wmb() in neigh_hh_init() */ - for (hh = n->hh; hh; hh = hh->hh_next) { - if (hh->hh_type == protocol) { - atomic_inc(&hh->hh_refcnt); - if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL)) - hh_cache_put(hh); - return true; - } - } - return false; -} - /* called with read_lock_bh(&n->lock); */ -static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst, - __be16 protocol) +static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst) { - struct hh_cache *hh; struct net_device *dev = dst->dev; - - if (likely(neigh_hh_lookup(n, dst, protocol))) - return; - - /* slow path */ - hh = kzalloc(sizeof(*hh), GFP_ATOMIC); - if (!hh) - return; - - seqlock_init(&hh->hh_lock); - hh->hh_type = protocol; - atomic_set(&hh->hh_refcnt, 2); - - if (dev->header_ops->cache(n, hh)) { - kfree(hh); - return; - } + __be16 prot = dst->ops->protocol; + struct hh_cache *hh = &n->hh; write_lock_bh(&n->lock); - /* must check if another thread already did the insert */ - if (neigh_hh_lookup(n, dst, protocol)) { - kfree(hh); - goto end; - } - - if (n->nud_state & NUD_CONNECTED) - hh->hh_output = n->ops->hh_output; - else - hh->hh_output = n->ops->output; - - hh->hh_next = n->hh; - smp_wmb(); /* paired with smp_rmb() in neigh_hh_lookup() */ - n->hh = hh; + /* Only one thread can come in here and initialize the + * hh_cache entry. + */ + if (!hh->hh_len) + dev->header_ops->cache(n, hh, prot); - if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL)) - hh_cache_put(hh); -end: write_unlock_bh(&n->lock); } @@ -1280,7 +1215,7 @@ end: * but resolution is not made yet. */ -int neigh_compat_output(struct sk_buff *skb) +int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb) { struct net_device *dev = skb->dev; @@ -1297,13 +1232,12 @@ EXPORT_SYMBOL(neigh_compat_output); /* Slow and careful. */ -int neigh_resolve_output(struct sk_buff *skb) +int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); - struct neighbour *neigh; int rc = 0; - if (!dst || !(neigh = dst->neighbour)) + if (!dst) goto discard; __skb_pull(skb, skb_network_offset(skb)); @@ -1313,10 +1247,8 @@ int neigh_resolve_output(struct sk_buff *skb) struct net_device *dev = neigh->dev; unsigned int seq; - if (dev->header_ops->cache && - !dst->hh && - !(dst->flags & DST_NOCACHE)) - neigh_hh_init(neigh, dst, dst->ops->protocol); + if (dev->header_ops->cache && !neigh->hh.hh_len) + neigh_hh_init(neigh, dst); do { seq = read_seqbegin(&neigh->ha_lock); @@ -1325,7 +1257,7 @@ int neigh_resolve_output(struct sk_buff *skb) } while (read_seqretry(&neigh->ha_lock, seq)); if (err >= 0) - rc = neigh->ops->queue_xmit(skb); + rc = dev_queue_xmit(skb); else goto out_kfree_skb; } @@ -1333,7 +1265,7 @@ out: return rc; discard: NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n", - dst, dst ? dst->neighbour : NULL); + dst, neigh); out_kfree_skb: rc = -EINVAL; kfree_skb(skb); @@ -1343,13 +1275,11 @@ EXPORT_SYMBOL(neigh_resolve_output); /* As fast as possible without hh cache */ -int neigh_connected_output(struct sk_buff *skb) +int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb) { - int err; - struct dst_entry *dst = skb_dst(skb); - struct neighbour *neigh = dst->neighbour; struct net_device *dev = neigh->dev; unsigned int seq; + int err; |