aboutsummaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c56
-rw-r--r--net/core/dst.c23
-rw-r--r--net/core/ethtool.c313
-rw-r--r--net/core/fib_rules.c6
-rw-r--r--net/core/neighbour.c191
-rw-r--r--net/core/net-sysfs.c25
-rw-r--r--net/core/net-traces.c2
-rw-r--r--net/core/net_namespace.c76
-rw-r--r--net/core/netpoll.c20
-rw-r--r--net/core/rtnetlink.c69
-rw-r--r--net/core/skbuff.c84
-rw-r--r--net/core/sock.c5
-rw-r--r--net/core/timestamping.c2
13 files changed, 351 insertions, 521 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index c7e305d13b7..9444c5cb413 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -199,6 +199,11 @@ static struct list_head ptype_all __read_mostly; /* Taps */
DEFINE_RWLOCK(dev_base_lock);
EXPORT_SYMBOL(dev_base_lock);
+static inline void dev_base_seq_inc(struct net *net)
+{
+ while (++net->dev_base_seq == 0);
+}
+
static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
{
unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
@@ -237,6 +242,9 @@ static int list_netdevice(struct net_device *dev)
hlist_add_head_rcu(&dev->index_hlist,
dev_index_hash(net, dev->ifindex));
write_unlock_bh(&dev_base_lock);
+
+ dev_base_seq_inc(net);
+
return 0;
}
@@ -253,6 +261,8 @@ static void unlist_netdevice(struct net_device *dev)
hlist_del_rcu(&dev->name_hlist);
hlist_del_rcu(&dev->index_hlist);
write_unlock_bh(&dev_base_lock);
+
+ dev_base_seq_inc(dev_net(dev));
}
/*
@@ -2096,6 +2106,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
{
const struct net_device_ops *ops = dev->netdev_ops;
int rc = NETDEV_TX_OK;
+ unsigned int skb_len;
if (likely(!skb->next)) {
u32 features;
@@ -2146,8 +2157,9 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
}
}
+ skb_len = skb->len;
rc = ops->ndo_start_xmit(skb, dev);
- trace_net_dev_xmit(skb, rc);
+ trace_net_dev_xmit(skb, rc, dev, skb_len);
if (rc == NETDEV_TX_OK)
txq_trans_update(txq);
return rc;
@@ -2167,8 +2179,9 @@ gso:
if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
skb_dst_drop(nskb);
+ skb_len = nskb->len;
rc = ops->ndo_start_xmit(nskb, dev);
- trace_net_dev_xmit(nskb, rc);
+ trace_net_dev_xmit(nskb, rc, dev, skb_len);
if (unlikely(rc != NETDEV_TX_OK)) {
if (rc & ~NETDEV_TX_MASK)
goto out_kfree_gso_skb;
@@ -2529,7 +2542,7 @@ __u32 __skb_get_rxhash(struct sk_buff *skb)
goto done;
ip = (const struct iphdr *) (skb->data + nhoff);
- if (ip->frag_off & htons(IP_MF | IP_OFFSET))
+ if (ip_is_fragment(ip))
ip_proto = 0;
else
ip_proto = ip->protocol;
@@ -3111,7 +3124,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
skb_reset_network_header(skb);
skb_reset_transport_header(skb);
- skb->mac_len = skb->network_header - skb->mac_header;
+ skb_reset_mac_len(skb);
pt_prev = NULL;
@@ -5196,7 +5209,7 @@ static void rollback_registered(struct net_device *dev)
list_del(&single);
}
-u32 netdev_fix_features(struct net_device *dev, u32 features)
+static u32 netdev_fix_features(struct net_device *dev, u32 features)
{
/* Fix illegal checksum combinations */
if ((features & NETIF_F_HW_CSUM) &&
@@ -5255,7 +5268,6 @@ u32 netdev_fix_features(struct net_device *dev, u32 features)
return features;
}
-EXPORT_SYMBOL(netdev_fix_features);
int __netdev_update_features(struct net_device *dev)
{
@@ -5475,11 +5487,9 @@ int register_netdevice(struct net_device *dev)
dev->features |= NETIF_F_NOCACHE_COPY;
}
- /* Enable GRO and NETIF_F_HIGHDMA for vlans by default,
- * vlan_dev_init() will do the dev->features check, so these features
- * are enabled only if supported by underlying device.
+ /* Make NETIF_F_HIGHDMA inheritable to VLAN devices.
*/
- dev->vlan_features |= (NETIF_F_GRO | NETIF_F_HIGHDMA);
+ dev->vlan_features |= NETIF_F_HIGHDMA;
ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
ret = notifier_to_errno(ret);
@@ -5864,8 +5874,6 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev->gso_max_size = GSO_MAX_SIZE;
- INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list);
- dev->ethtool_ntuple_list.count = 0;
INIT_LIST_HEAD(&dev->napi_list);
INIT_LIST_HEAD(&dev->unreg_list);
INIT_LIST_HEAD(&dev->link_watch_list);
@@ -5929,9 +5937,6 @@ void free_netdev(struct net_device *dev)
/* Flush device addresses */
dev_addr_flush(dev);
- /* Clear ethtool n-tuple list */
- ethtool_ntuple_flush(dev);
-
list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
netif_napi_del(p);
@@ -6175,6 +6180,11 @@ static int dev_cpu_callback(struct notifier_block *nfb,
oldsd->output_queue = NULL;
oldsd->output_queue_tailp = &oldsd->output_queue;
}
+ /* Append NAPI poll list from offline CPU. */
+ if (!list_empty(&oldsd->poll_list)) {
+ list_splice_init(&oldsd->poll_list, &sd->poll_list);
+ raise_softirq_irqoff(NET_RX_SOFTIRQ);
+ }
raise_softirq_irqoff(NET_TX_SOFTIRQ);
local_irq_enable();
@@ -6261,29 +6271,23 @@ err_name:
/**
* netdev_drivername - network driver for the device
* @dev: network device
- * @buffer: buffer for resulting name
- * @len: size of buffer
*
* Determine network driver for device.
*/
-char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
+const char *netdev_drivername(const struct net_device *dev)
{
const struct device_driver *driver;
const struct device *parent;
-
- if (len <= 0 || !buffer)
- return buffer;
- buffer[0] = 0;
+ const char *empty = "";
parent = dev->dev.parent;
-
if (!parent)
- return buffer;
+ return empty;
driver = parent->driver;
if (driver && driver->name)
- strlcpy(buffer, driver->name, len);
- return buffer;
+ return driver->name;
+ return empty;
}
static int __netdev_printk(const char *level, const struct net_device *dev,
diff --git a/net/core/dst.c b/net/core/dst.c
index 9ccca038444..14b33baf073 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -171,8 +171,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
dst_init_metrics(dst, dst_default_metrics, true);
dst->expires = 0UL;
dst->path = dst;
- dst->neighbour = NULL;
- dst->hh = NULL;
+ dst->_neighbour = NULL;
#ifdef CONFIG_XFRM
dst->xfrm = NULL;
#endif
@@ -190,7 +189,8 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
dst->lastuse = jiffies;
dst->flags = flags;
dst->next = NULL;
- dst_entries_add(ops, 1);
+ if (!(flags & DST_NOCOUNT))
+ dst_entries_add(ops, 1);
return dst;
}
EXPORT_SYMBOL(dst_alloc);
@@ -225,25 +225,20 @@ struct dst_entry *dst_destroy(struct dst_entry * dst)
{
struct dst_entry *child;
struct neighbour *neigh;
- struct hh_cache *hh;
smp_rmb();
again:
- neigh = dst->neighbour;
- hh = dst->hh;
+ neigh = dst->_neighbour;
child = dst->child;
- dst->hh = NULL;
- if (hh)
- hh_cache_put(hh);
-
if (neigh) {
- dst->neighbour = NULL;
+ dst->_neighbour = NULL;
neigh_release(neigh);
}
- dst_entries_add(dst->ops, -1);
+ if (!(dst->flags & DST_NOCOUNT))
+ dst_entries_add(dst->ops, -1);
if (dst->ops->destroy)
dst->ops->destroy(dst);
@@ -368,8 +363,8 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
dst->dev = dev_net(dst->dev)->loopback_dev;
dev_hold(dst->dev);
dev_put(dev);
- if (dst->neighbour && dst->neighbour->dev == dev) {
- dst->neighbour->dev = dst->dev;
+ if (dst->_neighbour && dst->_neighbour->dev == dev) {
+ dst->_neighbour->dev = dst->dev;
dev_hold(dst->dev);
dev_put(dev);
}
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index fd14116ad7f..6cdba5fc2be 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -169,18 +169,6 @@ int ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported)
}
EXPORT_SYMBOL(ethtool_op_set_flags);
-void ethtool_ntuple_flush(struct net_device *dev)
-{
- struct ethtool_rx_ntuple_flow_spec_container *fsc, *f;
-
- list_for_each_entry_safe(fsc, f, &dev->ethtool_ntuple_list.list, list) {
- list_del(&fsc->list);
- kfree(fsc);
- }
- dev->ethtool_ntuple_list.count = 0;
-}
-EXPORT_SYMBOL(ethtool_ntuple_flush);
-
/* Handlers for each ethtool command */
#define ETHTOOL_DEV_FEATURE_WORDS 1
@@ -865,34 +853,6 @@ out:
return ret;
}
-static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list,
- struct ethtool_rx_ntuple_flow_spec *spec,
- struct ethtool_rx_ntuple_flow_spec_container *fsc)
-{
-
- /* don't add filters forever */
- if (list->count >= ETHTOOL_MAX_NTUPLE_LIST_ENTRY) {
- /* free the container */
- kfree(fsc);
- return;
- }
-
- /* Copy the whole filter over */
- fsc->fs.flow_type = spec->flow_type;
- memcpy(&fsc->fs.h_u, &spec->h_u, sizeof(spec->h_u));
- memcpy(&fsc->fs.m_u, &spec->m_u, sizeof(spec->m_u));
-
- fsc->fs.vlan_tag = spec->vlan_tag;
- fsc->fs.vlan_tag_mask = spec->vlan_tag_mask;
- fsc->fs.data = spec->data;
- fsc->fs.data_mask = spec->data_mask;
- fsc->fs.action = spec->action;
-
- /* add to the list */
- list_add_tail_rcu(&fsc->list, &list->list);
- list->count++;
-}
-
/*
* ethtool does not (or did not) set masks for flow parameters that are
* not specified, so if both value and mask are 0 then this must be
@@ -930,8 +890,6 @@ static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev,
{
struct ethtool_rx_ntuple cmd;
const struct ethtool_ops *ops = dev->ethtool_ops;
- struct ethtool_rx_ntuple_flow_spec_container *fsc = NULL;
- int ret;
if (!ops->set_rx_ntuple)
return -EOPNOTSUPP;
@@ -944,269 +902,7 @@ static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev,
rx_ntuple_fix_masks(&cmd.fs);
- /*
- * Cache filter in dev struct for GET operation only if
- * the underlying driver doesn't have its own GET operation, and
- * only if the filter was added successfully. First make sure we
- * can allocate the filter, then continue if successful.
- */
- if (!ops->get_rx_ntuple) {
- fsc = kmalloc(sizeof(*fsc), GFP_ATOMIC);
- if (!fsc)
- return -ENOMEM;
- }
-
- ret = ops->set_rx_ntuple(dev, &cmd);
- if (ret) {
- kfree(fsc);
- return ret;
- }
-
- if (!ops->get_rx_ntuple)
- __rx_ntuple_filter_add(&dev->ethtool_ntuple_list, &cmd.fs, fsc);
-
- return ret;
-}
-
-static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr)
-{
- struct ethtool_gstrings gstrings;
- const struct ethtool_ops *ops = dev->ethtool_ops;
- struct ethtool_rx_ntuple_flow_spec_container *fsc;
- u8 *data;
- char *p;
- int ret, i, num_strings = 0;
-
- if (!ops->get_sset_count)
- return -EOPNOTSUPP;
-
- if (copy_from_user(&gstrings, useraddr, sizeof(gstrings)))
- return -EFAULT;
-
- ret = ops->get_sset_count(dev, gstrings.string_set);
- if (ret < 0)
- return ret;
-
- gstrings.len = ret;
-
- data = kzalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER);
- if (!data)
- return -ENOMEM;
-
- if (ops->get_rx_ntuple) {
- /* driver-specific filter grab */
- ret = ops->get_rx_ntuple(dev, gstrings.string_set, data);
- goto copy;
- }
-
- /* default ethtool filter grab */
- i = 0;
- p = (char *)data;
- list_for_each_entry(fsc, &dev->ethtool_ntuple_list.list, list) {
- sprintf(p, "Filter %d:\n", i);
- p += ETH_GSTRING_LEN;
- num_strings++;
-
- switch (fsc->fs.flow_type) {
- case TCP_V4_FLOW:
- sprintf(p, "\tFlow Type: TCP\n");
- p += ETH_GSTRING_LEN;
- num_strings++;
- break;
- case UDP_V4_FLOW:
- sprintf(p, "\tFlow Type: UDP\n");
- p += ETH_GSTRING_LEN;
- num_strings++;
- break;
- case SCTP_V4_FLOW:
- sprintf(p, "\tFlow Type: SCTP\n");
- p += ETH_GSTRING_LEN;
- num_strings++;
- break;
- case AH_ESP_V4_FLOW:
- sprintf(p, "\tFlow Type: AH ESP\n");
- p += ETH_GSTRING_LEN;
- num_strings++;
- break;
- case ESP_V4_FLOW:
- sprintf(p, "\tFlow Type: ESP\n");
- p += ETH_GSTRING_LEN;
- num_strings++;
- break;
- case IP_USER_FLOW:
- sprintf(p, "\tFlow Type: Raw IP\n");
- p += ETH_GSTRING_LEN;
- num_strings++;
- break;
- case IPV4_FLOW:
- sprintf(p, "\tFlow Type: IPv4\n");
- p += ETH_GSTRING_LEN;
- num_strings++;
- break;
- default:
- sprintf(p, "\tFlow Type: Unknown\n");
- p += ETH_GSTRING_LEN;
- num_strings++;
- goto unknown_filter;
- }
-
- /* now the rest of the filters */
- switch (fsc->fs.flow_type) {
- case TCP_V4_FLOW:
- case UDP_V4_FLOW:
- case SCTP_V4_FLOW:
- sprintf(p, "\tSrc IP addr: 0x%x\n",
- fsc->fs.h_u.tcp_ip4_spec.ip4src);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tSrc IP mask: 0x%x\n",
- fsc->fs.m_u.tcp_ip4_spec.ip4src);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tDest IP addr: 0x%x\n",
- fsc->fs.h_u.tcp_ip4_spec.ip4dst);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tDest IP mask: 0x%x\n",
- fsc->fs.m_u.tcp_ip4_spec.ip4dst);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tSrc Port: %d, mask: 0x%x\n",
- fsc->fs.h_u.tcp_ip4_spec.psrc,
- fsc->fs.m_u.tcp_ip4_spec.psrc);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tDest Port: %d, mask: 0x%x\n",
- fsc->fs.h_u.tcp_ip4_spec.pdst,
- fsc->fs.m_u.tcp_ip4_spec.pdst);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tTOS: %d, mask: 0x%x\n",
- fsc->fs.h_u.tcp_ip4_spec.tos,
- fsc->fs.m_u.tcp_ip4_spec.tos);
- p += ETH_GSTRING_LEN;
- num_strings++;
- break;
- case AH_ESP_V4_FLOW:
- case ESP_V4_FLOW:
- sprintf(p, "\tSrc IP addr: 0x%x\n",
- fsc->fs.h_u.ah_ip4_spec.ip4src);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tSrc IP mask: 0x%x\n",
- fsc->fs.m_u.ah_ip4_spec.ip4src);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tDest IP addr: 0x%x\n",
- fsc->fs.h_u.ah_ip4_spec.ip4dst);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tDest IP mask: 0x%x\n",
- fsc->fs.m_u.ah_ip4_spec.ip4dst);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tSPI: %d, mask: 0x%x\n",
- fsc->fs.h_u.ah_ip4_spec.spi,
- fsc->fs.m_u.ah_ip4_spec.spi);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tTOS: %d, mask: 0x%x\n",
- fsc->fs.h_u.ah_ip4_spec.tos,
- fsc->fs.m_u.ah_ip4_spec.tos);
- p += ETH_GSTRING_LEN;
- num_strings++;
- break;
- case IP_USER_FLOW:
- sprintf(p, "\tSrc IP addr: 0x%x\n",
- fsc->fs.h_u.usr_ip4_spec.ip4src);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tSrc IP mask: 0x%x\n",
- fsc->fs.m_u.usr_ip4_spec.ip4src);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tDest IP addr: 0x%x\n",
- fsc->fs.h_u.usr_ip4_spec.ip4dst);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tDest IP mask: 0x%x\n",
- fsc->fs.m_u.usr_ip4_spec.ip4dst);
- p += ETH_GSTRING_LEN;
- num_strings++;
- break;
- case IPV4_FLOW:
- sprintf(p, "\tSrc IP addr: 0x%x\n",
- fsc->fs.h_u.usr_ip4_spec.ip4src);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tSrc IP mask: 0x%x\n",
- fsc->fs.m_u.usr_ip4_spec.ip4src);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tDest IP addr: 0x%x\n",
- fsc->fs.h_u.usr_ip4_spec.ip4dst);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tDest IP mask: 0x%x\n",
- fsc->fs.m_u.usr_ip4_spec.ip4dst);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tL4 bytes: 0x%x, mask: 0x%x\n",
- fsc->fs.h_u.usr_ip4_spec.l4_4_bytes,
- fsc->fs.m_u.usr_ip4_spec.l4_4_bytes);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tTOS: %d, mask: 0x%x\n",
- fsc->fs.h_u.usr_ip4_spec.tos,
- fsc->fs.m_u.usr_ip4_spec.tos);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tIP Version: %d, mask: 0x%x\n",
- fsc->fs.h_u.usr_ip4_spec.ip_ver,
- fsc->fs.m_u.usr_ip4_spec.ip_ver);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tProtocol: %d, mask: 0x%x\n",
- fsc->fs.h_u.usr_ip4_spec.proto,
- fsc->fs.m_u.usr_ip4_spec.proto);
- p += ETH_GSTRING_LEN;
- num_strings++;
- break;
- }
- sprintf(p, "\tVLAN: %d, mask: 0x%x\n",
- fsc->fs.vlan_tag, fsc->fs.vlan_tag_mask);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tUser-defined: 0x%Lx\n", fsc->fs.data);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tUser-defined mask: 0x%Lx\n", fsc->fs.data_mask);
- p += ETH_GSTRING_LEN;
- num_strings++;
- if (fsc->fs.action == ETHTOOL_RXNTUPLE_ACTION_DROP)
- sprintf(p, "\tAction: Drop\n");
- else
- sprintf(p, "\tAction: Direct to queue %d\n",
- fsc->fs.action);
- p += ETH_GSTRING_LEN;
- num_strings++;
-unknown_filter:
- i++;
- }
-copy:
- /* indicate to userspace how many strings we actually have */
- gstrings.len = num_strings;
- ret = -EFAULT;
- if (copy_to_user(useraddr, &gstrings, sizeof(gstrings)))
- goto out;
- useraddr += sizeof(gstrings);
- if (copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN))
- goto out;
- ret = 0;
-
-out:
- kfree(data);
- return ret;
+ return ops->set_rx_ntuple(dev, &cmd);
}
static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
@@ -1227,7 +923,7 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
regs.len = reglen;
regbuf = vzalloc(reglen);
- if (!regbuf)
+ if (reglen && !regbuf)
return -ENOMEM;
ops->get_regs(dev, &regs, regbuf);
@@ -1236,7 +932,7 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
if (copy_to_user(useraddr, &regs, sizeof(regs)))
goto out;
useraddr += offsetof(struct ethtool_regs, data);
- if (copy_to_user(useraddr, regbuf, regs.len))
+ if (regbuf && copy_to_user(useraddr, regbuf, regs.len))
goto out;
ret = 0;
@@ -2101,9 +1797,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
case ETHTOOL_SRXNTUPLE:
rc = ethtool_set_rx_ntuple(dev, useraddr);
break;
- case ETHTOOL_GRXNTUPLE:
- rc = ethtool_get_rx_ntuple(dev, useraddr);
- break;
case ETHTOOL_GSSET_INFO:
rc = ethtool_get_sset_info(dev, useraddr);
break;
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 008dc70b064..e7ab0c0285b 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -740,9 +740,9 @@ static struct pernet_operations fib_rules_net_ops = {
static int __init fib_rules_init(void)
{
int err;
- rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL);
- rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL);
- rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule);
+ rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL, NULL);
+ rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL, NULL);
+ rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule, NULL);
err = register_pernet_subsys(&fib_rules_net_ops);
if (err < 0)
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 799f06e03a2..8fab9b0bb20 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -98,7 +98,7 @@ static const struct file_operations neigh_stat_seq_fops;
static DEFINE_RWLOCK(neigh_tbl_lock);
-static int neigh_blackhole(struct sk_buff *skb)
+static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
{
kfree_skb(skb);
return -ENETDOWN;
@@ -137,7 +137,7 @@ static int neigh_forced_gc(struct neigh_table *tbl)
write_lock_bh(&tbl->lock);
nht = rcu_dereference_protected(tbl->nht,
lockdep_is_held(&tbl->lock));
- for (i = 0; i <= nht->hash_mask; i++) {
+ for (i = 0; i < (1 << nht->hash_shift); i++) {
struct neighbour *n;
struct neighbour __rcu **np;
@@ -210,7 +210,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
nht = rcu_dereference_protected(tbl->nht,
lockdep_is_held(&tbl->lock));
- for (i = 0; i <= nht->hash_mask; i++) {
+ for (i = 0; i < (1 << nht->hash_shift); i++) {
struct neighbour *n;
struct neighbour __rcu **np = &nht->hash_buckets[i];
@@ -297,6 +297,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl)
n->updated = n->used = now;
n->nud_state = NUD_NONE;
n->output = neigh_blackhole;
+ seqlock_init(&n->hh.hh_lock);
n->parms = neigh_parms_clone(&tbl->parms);
setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
@@ -312,9 +313,9 @@ out_entries:
goto out;
}
-static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries)
+static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
{
- size_t size = entries * sizeof(struct neighbour *);
+ size_t size = (1 << shift) * sizeof(struct neighbour *);
struct neigh_hash_table *ret;
struct neighbour __rcu **buckets;
@@ -332,8 +333,9 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries)
return NULL;
}
ret->hash_buckets = buckets;
- ret->hash_mask = entries - 1;
+ ret->hash_shift = shift;
get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd));
+ ret->hash_rnd |= 1;
return ret;
}
@@ -342,7 +344,7 @@ static void neigh_hash_free_rcu(struct rcu_head *head)
struct neigh_hash_table *nht = container_of(head,
struct neigh_hash_table,
rcu);
- size_t size = (nht->hash_mask + 1) * sizeof(struct neighbour *);
+ size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
struct neighbour __rcu **buckets = nht->hash_buckets;
if (size <= PAGE_SIZE)
@@ -353,21 +355,20 @@ static void neigh_hash_free_rcu(struct rcu_head *head)
}
static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
- unsigned long new_entries)
+ unsigned long new_shift)
{
unsigned int i, hash;
struct neigh_hash_table *new_nht, *old_nht;
NEIGH_CACHE_STAT_INC(tbl, hash_grows);
- BUG_ON(!is_power_of_2(new_entries));
old_nht = rcu_dereference_protected(tbl->nht,
lockdep_is_held(&tbl->lock));
- new_nht = neigh_hash_alloc(new_entries);
+ new_nht = neigh_hash_alloc(new_shift);
if (!new_nht)
return old_nht;
- for (i = 0; i <= old_nht->hash_mask; i++) {
+ for (i = 0; i < (1 << old_nht->hash_shift); i++) {
struct neighbour *n, *next;
for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
@@ -377,7 +378,7 @@ static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
hash = tbl->hash(n->primary_key, n->dev,
new_nht->hash_rnd);
- hash &= new_nht->hash_mask;
+ hash >>= (32 - new_nht->hash_shift);
next = rcu_dereference_protected(n->next,
lockdep_is_held(&tbl->lock));
@@ -406,7 +407,7 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
rcu_read_lock_bh();
nht = rcu_dereference_bh(tbl->nht);
- hash_val = tbl->hash(pkey, dev, nht->hash_rnd) & nht->hash_mask;
+ hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
n != NULL;
@@ -436,7 +437,7 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
rcu_read_lock_bh();
nht = rcu_dereference_bh(tbl->nht);
- hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) & nht->hash_mask;
+ hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
n != NULL;
@@ -492,10 +493,10 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
nht = rcu_dereference_protected(tbl->nht,
lockdep_is_held(&tbl->lock));
- if (atomic_read(&tbl->entries) > (nht->hash_mask + 1))
- nht = neigh_hash_grow(tbl, (nht->hash_mask + 1) << 1);
+ if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
+ nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
- hash_val = tbl->hash(pkey, dev, nht->hash_rnd) & nht->hash_mask;
+ hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
if (n->parms->dead) {
rc = ERR_PTR(-EINVAL);
@@ -688,8 +689,6 @@ static void neigh_destroy_rcu(struct rcu_head *head)
*/
void neigh_destroy(struct neighbour *neigh)
{
- struct hh_cache *hh;
-
NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
if (!neigh->dead) {
@@ -702,16 +701,6 @@ void neigh_destroy(struct neighbour *neigh)
if (neigh_del_timer(neigh))
printk(KERN_WARNING "Impossible event.\n");
- while ((hh = neigh->hh) != NULL) {
- neigh->hh = hh->hh_next;
- hh->hh_next = NULL;
-
- write_seqlock_bh(&hh->hh_lock);
- hh->hh_output = neigh_blackhole;
- write_sequnlock_bh(&hh->hh_lock);
- hh_cache_put(hh);
- }
-
skb_queue_purge(&neigh->arp_queue);
dev_put(neigh->dev);
@@ -731,14 +720,9 @@ EXPORT_SYMBOL(neigh_destroy);
*/
static void neigh_suspect(struct neighbour *neigh)
{
- struct hh_cache *hh;
-
NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
neigh->output = neigh->ops->output;
-
- for (hh = neigh->hh; hh; hh = hh->hh_next)
- hh->hh_output = neigh->ops->output;
}
/* Neighbour state is OK;
@@ -748,14 +732,9 @@ static void neigh_suspect(struct neighbour *neigh)
*/
static void neigh_connect(struct neighbour *neigh)
{
- struct hh_cache *hh;
-
NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
neigh->output = neigh->ops->connected_output;
-
- for (hh = neigh->hh; hh; hh = hh->hh_next)
- hh->hh_output = neigh->ops->hh_output;
}
static void neigh_periodic_work(struct work_struct *work)
@@ -784,7 +763,7 @@ static void neigh_periodic_work(struct work_struct *work)
neigh_rand_reach_time(p->base_reachable_time);
}
- for (i = 0 ; i <= nht->hash_mask; i++) {
+ for (i = 0 ; i < (1 << nht->hash_shift); i++) {
np = &nht->hash_buckets[i];
while ((n = rcu_dereference_protected(*np,
@@ -1015,7 +994,7 @@ out_unlock_bh:
}
EXPORT_SYMBOL(__neigh_event_send);
-static void neigh_update_hhs(const struct neighbour *neigh)
+static void neigh_update_hhs(struct neighbour *neigh)
{
struct hh_cache *hh;
void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
@@ -1025,7 +1004,8 @@ static void neigh_update_hhs(const struct neighbour *neigh)
update = neigh->dev->header_ops->cache_update;
if (update) {
- for (hh = neigh->hh; hh; hh = hh->hh_next) {
+ hh = &neigh->hh;
+ if (hh->hh_len) {
write_seqlock_bh(&hh->hh_lock);
update(hh, neigh->dev, neigh->ha);
write_sequnlock_bh(&hh->hh_lock);
@@ -1173,12 +1153,13 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
while (neigh->nud_state & NUD_VALID &&
(skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
- struct neighbour *n1 = neigh;
+ struct dst_entry *dst = skb_dst(skb);
+ struct neighbour *n2, *n1 = neigh;
write_unlock_bh(&neigh->lock);
/* On shaper/eql skb->dst->neighbour != neigh :( */
- if (skb_dst(skb) && skb_dst(skb)->neighbour)
- n1 = skb_dst(skb)->neighbour;
- n1->output(skb);
+ if (dst && (n2 = dst_get_neighbour(dst)) != NULL)
+ n1 = n2;
+ n1->output(n1, skb);
write_lock_bh(&neigh->lock);
}
skb_queue_purge(&neigh->arp_queue);
@@ -1211,67 +1192,21 @@ struct neighbour *neigh_event_ns(struct neigh_table *tbl,
}
EXPORT_SYMBOL(neigh_event_ns);
-static inline bool neigh_hh_lookup(struct neighbour *n, struct dst_entry *dst,
- __be16 protocol)
-{
- struct hh_cache *hh;
-
- smp_rmb(); /* paired with smp_wmb() in neigh_hh_init() */
- for (hh = n->hh; hh; hh = hh->hh_next) {
- if (hh->hh_type == protocol) {
- atomic_inc(&hh->hh_refcnt);
- if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL))
- hh_cache_put(hh);
- return true;
- }
- }
- return false;
-}
-
/* called with read_lock_bh(&n->lock); */
-static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
- __be16 protocol)
+static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
{
- struct hh_cache *hh;
struct net_device *dev = dst->dev;
-
- if (likely(neigh_hh_lookup(n, dst, protocol)))
- return;
-
- /* slow path */
- hh = kzalloc(sizeof(*hh), GFP_ATOMIC);
- if (!hh)
- return;
-
- seqlock_init(&hh->hh_lock);
- hh->hh_type = protocol;
- atomic_set(&hh->hh_refcnt, 2);
-
- if (dev->header_ops->cache(n, hh)) {
- kfree(hh);
- return;
- }
+ __be16 prot = dst->ops->protocol;
+ struct hh_cache *hh = &n->hh;
write_lock_bh(&n->lock);
- /* must check if another thread already did the insert */
- if (neigh_hh_lookup(n, dst, protocol)) {
- kfree(hh);
- goto end;
- }
-
- if (n->nud_state & NUD_CONNECTED)
- hh->hh_output = n->ops->hh_output;
- else
- hh->hh_output = n->ops->output;
-
- hh->hh_next = n->hh;
- smp_wmb(); /* paired with smp_rmb() in neigh_hh_lookup() */
- n->hh = hh;
+ /* Only one thread can come in here and initialize the
+ * hh_cache entry.
+ */
+ if (!hh->hh_len)
+ dev->header_ops->cache(n, hh, prot);
- if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL))
- hh_cache_put(hh);
-end:
write_unlock_bh(&n->lock);
}
@@ -1280,7 +1215,7 @@ end:
* but resolution is not made yet.
*/
-int neigh_compat_output(struct sk_buff *skb)
+int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
@@ -1297,13 +1232,12 @@ EXPORT_SYMBOL(neigh_compat_output);
/* Slow and careful. */
-int neigh_resolve_output(struct sk_buff *skb)
+int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
- struct neighbour *neigh;
int rc = 0;
- if (!dst || !(neigh = dst->neighbour))
+ if (!dst)
goto discard;
__skb_pull(skb, skb_network_offset(skb));
@@ -1313,10 +1247,8 @@ int neigh_resolve_output(struct sk_buff *skb)
struct net_device *dev = neigh->dev;
unsigned int seq;
- if (dev->header_ops->cache &&
- !dst->hh &&
- !(dst->flags & DST_NOCACHE))
- neigh_hh_init(neigh, dst, dst->ops->protocol);
+ if (dev->header_ops->cache && !neigh->hh.hh_len)
+ neigh_hh_init(neigh, dst);
do {
seq = read_seqbegin(&neigh->ha_lock);
@@ -1325,7 +1257,7 @@ int neigh_resolve_output(struct sk_buff *skb)
} while (read_seqretry(&neigh->ha_lock, seq));
if (err >= 0)
- rc = neigh->ops->queue_xmit(skb);
+ rc = dev_queue_xmit(skb);
else
goto out_kfree_skb;
}
@@ -1333,7 +1265,7 @@ out:
return rc;
discard:
NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
- dst, dst ? dst->neighbour : NULL);
+ dst, neigh);
out_kfree_skb:
rc = -EINVAL;
kfree_skb(skb);
@@ -1343,13 +1275,11 @@ EXPORT_SYMBOL(neigh_resolve_output);
/* As fast as possible without hh cache */
-int neigh_connected_output(struct sk_buff *skb)
+int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
{
- int err;
- struct dst_entry *dst = skb_dst(skb);
- struct neighbour *neigh = dst->neighbour;
struct net_device *dev = neigh->dev;
unsigned int seq;
+ int err;