aboutsummaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-07-22 14:43:13 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2011-07-22 14:43:13 -0700
commit951cc93a7493a81a47e20231441bc6cf17c98a37 (patch)
treef53934f0f225e0215a85c8c59af4c6513e89e3f1 /net/core
parenta7e1aabb28e8154ce987b622fd78d80a1ca39361 (diff)
parent415b3334a21aa67806c52d1acf4e72e14f7f402f (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1287 commits) icmp: Fix regression in nexthop resolution during replies. net: Fix ppc64 BPF JIT dependencies. acenic: include NET_SKB_PAD headroom to incoming skbs ixgbe: convert to ndo_fix_features ixgbe: only enable WoL for magic packet by default ixgbe: remove ifdef check for non-existent define ixgbe: Pass staterr instead of re-reading status and error bits from descriptor ixgbe: Move interrupt related values out of ring and into q_vector ixgbe: add structure for containing RX/TX rings to q_vector ixgbe: inline the ixgbe_maybe_stop_tx function ixgbe: Update ATR to use recorded TX queues instead of CPU for routing igb: Fix for DH89xxCC near end loopback test e1000: always call e1000_check_for_link() on e1000_ce4100 MACs. netxen: add fw version compatibility check be2net: request native mode each time the card is reset ipv4: Constrain UFO fragment sizes to multiples of 8 bytes virtio_net: Fix panic in virtnet_remove ipv6: make fragment identifications less predictable ipv6: unshare inetpeers can: make function can_get_bittiming static ...
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c26
-rw-r--r--net/core/dst.c17
-rw-r--r--net/core/ethtool.c313
-rw-r--r--net/core/fib_rules.c6
-rw-r--r--net/core/neighbour.c191
-rw-r--r--net/core/net-sysfs.c2
-rw-r--r--net/core/net-traces.c2
-rw-r--r--net/core/net_namespace.c1
-rw-r--r--net/core/netpoll.c13
-rw-r--r--net/core/rtnetlink.c64
-rw-r--r--net/core/skbuff.c84
-rw-r--r--net/core/sock.c11
-rw-r--r--net/core/timestamping.c2
13 files changed, 241 insertions, 491 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 9c58c1ec41a..9444c5cb413 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -199,6 +199,11 @@ static struct list_head ptype_all __read_mostly; /* Taps */
DEFINE_RWLOCK(dev_base_lock);
EXPORT_SYMBOL(dev_base_lock);
+static inline void dev_base_seq_inc(struct net *net)
+{
+ while (++net->dev_base_seq == 0);
+}
+
static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
{
unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
@@ -237,6 +242,9 @@ static int list_netdevice(struct net_device *dev)
hlist_add_head_rcu(&dev->index_hlist,
dev_index_hash(net, dev->ifindex));
write_unlock_bh(&dev_base_lock);
+
+ dev_base_seq_inc(net);
+
return 0;
}
@@ -253,6 +261,8 @@ static void unlist_netdevice(struct net_device *dev)
hlist_del_rcu(&dev->name_hlist);
hlist_del_rcu(&dev->index_hlist);
write_unlock_bh(&dev_base_lock);
+
+ dev_base_seq_inc(dev_net(dev));
}
/*
@@ -2532,7 +2542,7 @@ __u32 __skb_get_rxhash(struct sk_buff *skb)
goto done;
ip = (const struct iphdr *) (skb->data + nhoff);
- if (ip->frag_off & htons(IP_MF | IP_OFFSET))
+ if (ip_is_fragment(ip))
ip_proto = 0;
else
ip_proto = ip->protocol;
@@ -5199,7 +5209,7 @@ static void rollback_registered(struct net_device *dev)
list_del(&single);
}
-u32 netdev_fix_features(struct net_device *dev, u32 features)
+static u32 netdev_fix_features(struct net_device *dev, u32 features)
{
/* Fix illegal checksum combinations */
if ((features & NETIF_F_HW_CSUM) &&
@@ -5258,7 +5268,6 @@ u32 netdev_fix_features(struct net_device *dev, u32 features)
return features;
}
-EXPORT_SYMBOL(netdev_fix_features);
int __netdev_update_features(struct net_device *dev)
{
@@ -5478,11 +5487,9 @@ int register_netdevice(struct net_device *dev)
dev->features |= NETIF_F_NOCACHE_COPY;
}
- /* Enable GRO and NETIF_F_HIGHDMA for vlans by default,
- * vlan_dev_init() will do the dev->features check, so these features
- * are enabled only if supported by underlying device.
+ /* Make NETIF_F_HIGHDMA inheritable to VLAN devices.
*/
- dev->vlan_features |= (NETIF_F_GRO | NETIF_F_HIGHDMA);
+ dev->vlan_features |= NETIF_F_HIGHDMA;
ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
ret = notifier_to_errno(ret);
@@ -5867,8 +5874,6 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev->gso_max_size = GSO_MAX_SIZE;
- INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list);
- dev->ethtool_ntuple_list.count = 0;
INIT_LIST_HEAD(&dev->napi_list);
INIT_LIST_HEAD(&dev->unreg_list);
INIT_LIST_HEAD(&dev->link_watch_list);
@@ -5932,9 +5937,6 @@ void free_netdev(struct net_device *dev)
/* Flush device addresses */
dev_addr_flush(dev);
- /* Clear ethtool n-tuple list */
- ethtool_ntuple_flush(dev);
-
list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
netif_napi_del(p);
diff --git a/net/core/dst.c b/net/core/dst.c
index 6135f367169..14b33baf073 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -171,8 +171,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
dst_init_metrics(dst, dst_default_metrics, true);
dst->expires = 0UL;
dst->path = dst;
- dst->neighbour = NULL;
- dst->hh = NULL;
+ dst->_neighbour = NULL;
#ifdef CONFIG_XFRM
dst->xfrm = NULL;
#endif
@@ -226,21 +225,15 @@ struct dst_entry *dst_destroy(struct dst_entry * dst)
{
struct dst_entry *child;
struct neighbour *neigh;
- struct hh_cache *hh;
smp_rmb();
again:
- neigh = dst->neighbour;
- hh = dst->hh;
+ neigh = dst->_neighbour;
child = dst->child;
- dst->hh = NULL;
- if (hh)
- hh_cache_put(hh);
-
if (neigh) {
- dst->neighbour = NULL;
+ dst->_neighbour = NULL;
neigh_release(neigh);
}
@@ -370,8 +363,8 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
dst->dev = dev_net(dst->dev)->loopback_dev;
dev_hold(dst->dev);
dev_put(dev);
- if (dst->neighbour && dst->neighbour->dev == dev) {
- dst->neighbour->dev = dst->dev;
+ if (dst->_neighbour && dst->_neighbour->dev == dev) {
+ dst->_neighbour->dev = dst->dev;
dev_hold(dst->dev);
dev_put(dev);
}
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index fd14116ad7f..6cdba5fc2be 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -169,18 +169,6 @@ int ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported)
}
EXPORT_SYMBOL(ethtool_op_set_flags);
-void ethtool_ntuple_flush(struct net_device *dev)
-{
- struct ethtool_rx_ntuple_flow_spec_container *fsc, *f;
-
- list_for_each_entry_safe(fsc, f, &dev->ethtool_ntuple_list.list, list) {
- list_del(&fsc->list);
- kfree(fsc);
- }
- dev->ethtool_ntuple_list.count = 0;
-}
-EXPORT_SYMBOL(ethtool_ntuple_flush);
-
/* Handlers for each ethtool command */
#define ETHTOOL_DEV_FEATURE_WORDS 1
@@ -865,34 +853,6 @@ out:
return ret;
}
-static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list,
- struct ethtool_rx_ntuple_flow_spec *spec,
- struct ethtool_rx_ntuple_flow_spec_container *fsc)
-{
-
- /* don't add filters forever */
- if (list->count >= ETHTOOL_MAX_NTUPLE_LIST_ENTRY) {
- /* free the container */
- kfree(fsc);
- return;
- }
-
- /* Copy the whole filter over */
- fsc->fs.flow_type = spec->flow_type;
- memcpy(&fsc->fs.h_u, &spec->h_u, sizeof(spec->h_u));
- memcpy(&fsc->fs.m_u, &spec->m_u, sizeof(spec->m_u));
-
- fsc->fs.vlan_tag = spec->vlan_tag;
- fsc->fs.vlan_tag_mask = spec->vlan_tag_mask;
- fsc->fs.data = spec->data;
- fsc->fs.data_mask = spec->data_mask;
- fsc->fs.action = spec->action;
-
- /* add to the list */
- list_add_tail_rcu(&fsc->list, &list->list);
- list->count++;
-}
-
/*
* ethtool does not (or did not) set masks for flow parameters that are
* not specified, so if both value and mask are 0 then this must be
@@ -930,8 +890,6 @@ static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev,
{
struct ethtool_rx_ntuple cmd;
const struct ethtool_ops *ops = dev->ethtool_ops;
- struct ethtool_rx_ntuple_flow_spec_container *fsc = NULL;
- int ret;
if (!ops->set_rx_ntuple)
return -EOPNOTSUPP;
@@ -944,269 +902,7 @@ static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev,
rx_ntuple_fix_masks(&cmd.fs);
- /*
- * Cache filter in dev struct for GET operation only if
- * the underlying driver doesn't have its own GET operation, and
- * only if the filter was added successfully. First make sure we
- * can allocate the filter, then continue if successful.
- */
- if (!ops->get_rx_ntuple) {
- fsc = kmalloc(sizeof(*fsc), GFP_ATOMIC);
- if (!fsc)
- return -ENOMEM;
- }
-
- ret = ops->set_rx_ntuple(dev, &cmd);
- if (ret) {
- kfree(fsc);
- return ret;
- }
-
- if (!ops->get_rx_ntuple)
- __rx_ntuple_filter_add(&dev->ethtool_ntuple_list, &cmd.fs, fsc);
-
- return ret;
-}
-
-static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr)
-{
- struct ethtool_gstrings gstrings;
- const struct ethtool_ops *ops = dev->ethtool_ops;
- struct ethtool_rx_ntuple_flow_spec_container *fsc;
- u8 *data;
- char *p;
- int ret, i, num_strings = 0;
-
- if (!ops->get_sset_count)
- return -EOPNOTSUPP;
-
- if (copy_from_user(&gstrings, useraddr, sizeof(gstrings)))
- return -EFAULT;
-
- ret = ops->get_sset_count(dev, gstrings.string_set);
- if (ret < 0)
- return ret;
-
- gstrings.len = ret;
-
- data = kzalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER);
- if (!data)
- return -ENOMEM;
-
- if (ops->get_rx_ntuple) {
- /* driver-specific filter grab */
- ret = ops->get_rx_ntuple(dev, gstrings.string_set, data);
- goto copy;
- }
-
- /* default ethtool filter grab */
- i = 0;
- p = (char *)data;
- list_for_each_entry(fsc, &dev->ethtool_ntuple_list.list, list) {
- sprintf(p, "Filter %d:\n", i);
- p += ETH_GSTRING_LEN;
- num_strings++;
-
- switch (fsc->fs.flow_type) {
- case TCP_V4_FLOW:
- sprintf(p, "\tFlow Type: TCP\n");
- p += ETH_GSTRING_LEN;
- num_strings++;
- break;
- case UDP_V4_FLOW:
- sprintf(p, "\tFlow Type: UDP\n");
- p += ETH_GSTRING_LEN;
- num_strings++;
- break;
- case SCTP_V4_FLOW:
- sprintf(p, "\tFlow Type: SCTP\n");
- p += ETH_GSTRING_LEN;
- num_strings++;
- break;
- case AH_ESP_V4_FLOW:
- sprintf(p, "\tFlow Type: AH ESP\n");
- p += ETH_GSTRING_LEN;
- num_strings++;
- break;
- case ESP_V4_FLOW:
- sprintf(p, "\tFlow Type: ESP\n");
- p += ETH_GSTRING_LEN;
- num_strings++;
- break;
- case IP_USER_FLOW:
- sprintf(p, "\tFlow Type: Raw IP\n");
- p += ETH_GSTRING_LEN;
- num_strings++;
- break;
- case IPV4_FLOW:
- sprintf(p, "\tFlow Type: IPv4\n");
- p += ETH_GSTRING_LEN;
- num_strings++;
- break;
- default:
- sprintf(p, "\tFlow Type: Unknown\n");
- p += ETH_GSTRING_LEN;
- num_strings++;
- goto unknown_filter;
- }
-
- /* now the rest of the filters */
- switch (fsc->fs.flow_type) {
- case TCP_V4_FLOW:
- case UDP_V4_FLOW:
- case SCTP_V4_FLOW:
- sprintf(p, "\tSrc IP addr: 0x%x\n",
- fsc->fs.h_u.tcp_ip4_spec.ip4src);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tSrc IP mask: 0x%x\n",
- fsc->fs.m_u.tcp_ip4_spec.ip4src);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tDest IP addr: 0x%x\n",
- fsc->fs.h_u.tcp_ip4_spec.ip4dst);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tDest IP mask: 0x%x\n",
- fsc->fs.m_u.tcp_ip4_spec.ip4dst);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tSrc Port: %d, mask: 0x%x\n",
- fsc->fs.h_u.tcp_ip4_spec.psrc,
- fsc->fs.m_u.tcp_ip4_spec.psrc);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tDest Port: %d, mask: 0x%x\n",
- fsc->fs.h_u.tcp_ip4_spec.pdst,
- fsc->fs.m_u.tcp_ip4_spec.pdst);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tTOS: %d, mask: 0x%x\n",
- fsc->fs.h_u.tcp_ip4_spec.tos,
- fsc->fs.m_u.tcp_ip4_spec.tos);
- p += ETH_GSTRING_LEN;
- num_strings++;
- break;
- case AH_ESP_V4_FLOW:
- case ESP_V4_FLOW:
- sprintf(p, "\tSrc IP addr: 0x%x\n",
- fsc->fs.h_u.ah_ip4_spec.ip4src);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tSrc IP mask: 0x%x\n",
- fsc->fs.m_u.ah_ip4_spec.ip4src);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tDest IP addr: 0x%x\n",
- fsc->fs.h_u.ah_ip4_spec.ip4dst);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tDest IP mask: 0x%x\n",
- fsc->fs.m_u.ah_ip4_spec.ip4dst);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tSPI: %d, mask: 0x%x\n",
- fsc->fs.h_u.ah_ip4_spec.spi,
- fsc->fs.m_u.ah_ip4_spec.spi);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tTOS: %d, mask: 0x%x\n",
- fsc->fs.h_u.ah_ip4_spec.tos,
- fsc->fs.m_u.ah_ip4_spec.tos);
- p += ETH_GSTRING_LEN;
- num_strings++;
- break;
- case IP_USER_FLOW:
- sprintf(p, "\tSrc IP addr: 0x%x\n",
- fsc->fs.h_u.usr_ip4_spec.ip4src);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tSrc IP mask: 0x%x\n",
- fsc->fs.m_u.usr_ip4_spec.ip4src);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tDest IP addr: 0x%x\n",
- fsc->fs.h_u.usr_ip4_spec.ip4dst);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tDest IP mask: 0x%x\n",
- fsc->fs.m_u.usr_ip4_spec.ip4dst);
- p += ETH_GSTRING_LEN;
- num_strings++;
- break;
- case IPV4_FLOW:
- sprintf(p, "\tSrc IP addr: 0x%x\n",
- fsc->fs.h_u.usr_ip4_spec.ip4src);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tSrc IP mask: 0x%x\n",
- fsc->fs.m_u.usr_ip4_spec.ip4src);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tDest IP addr: 0x%x\n",
- fsc->fs.h_u.usr_ip4_spec.ip4dst);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tDest IP mask: 0x%x\n",
- fsc->fs.m_u.usr_ip4_spec.ip4dst);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tL4 bytes: 0x%x, mask: 0x%x\n",
- fsc->fs.h_u.usr_ip4_spec.l4_4_bytes,
- fsc->fs.m_u.usr_ip4_spec.l4_4_bytes);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tTOS: %d, mask: 0x%x\n",
- fsc->fs.h_u.usr_ip4_spec.tos,
- fsc->fs.m_u.usr_ip4_spec.tos);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tIP Version: %d, mask: 0x%x\n",
- fsc->fs.h_u.usr_ip4_spec.ip_ver,
- fsc->fs.m_u.usr_ip4_spec.ip_ver);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tProtocol: %d, mask: 0x%x\n",
- fsc->fs.h_u.usr_ip4_spec.proto,
- fsc->fs.m_u.usr_ip4_spec.proto);
- p += ETH_GSTRING_LEN;
- num_strings++;
- break;
- }
- sprintf(p, "\tVLAN: %d, mask: 0x%x\n",
- fsc->fs.vlan_tag, fsc->fs.vlan_tag_mask);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tUser-defined: 0x%Lx\n", fsc->fs.data);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tUser-defined mask: 0x%Lx\n", fsc->fs.data_mask);
- p += ETH_GSTRING_LEN;
- num_strings++;
- if (fsc->fs.action == ETHTOOL_RXNTUPLE_ACTION_DROP)
- sprintf(p, "\tAction: Drop\n");
- else
- sprintf(p, "\tAction: Direct to queue %d\n",
- fsc->fs.action);
- p += ETH_GSTRING_LEN;
- num_strings++;
-unknown_filter:
- i++;
- }
-copy:
- /* indicate to userspace how many strings we actually have */
- gstrings.len = num_strings;
- ret = -EFAULT;
- if (copy_to_user(useraddr, &gstrings, sizeof(gstrings)))
- goto out;
- useraddr += sizeof(gstrings);
- if (copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN))
- goto out;
- ret = 0;
-
-out:
- kfree(data);
- return ret;
+ return ops->set_rx_ntuple(dev, &cmd);
}
static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
@@ -1227,7 +923,7 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
regs.len = reglen;
regbuf = vzalloc(reglen);
- if (!regbuf)
+ if (reglen && !regbuf)
return -ENOMEM;
ops->get_regs(dev, &regs, regbuf);
@@ -1236,7 +932,7 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
if (copy_to_user(useraddr, &regs, sizeof(regs)))
goto out;
useraddr += offsetof(struct ethtool_regs, data);
- if (copy_to_user(useraddr, regbuf, regs.len))
+ if (regbuf && copy_to_user(useraddr, regbuf, regs.len))
goto out;
ret = 0;
@@ -2101,9 +1797,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
case ETHTOOL_SRXNTUPLE:
rc = ethtool_set_rx_ntuple(dev, useraddr);
break;
- case ETHTOOL_GRXNTUPLE:
- rc = ethtool_get_rx_ntuple(dev, useraddr);
- break;
case ETHTOOL_GSSET_INFO:
rc = ethtool_get_sset_info(dev, useraddr);
break;
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 008dc70b064..e7ab0c0285b 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -740,9 +740,9 @@ static struct pernet_operations fib_rules_net_ops = {
static int __init fib_rules_init(void)
{
int err;
- rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL);
- rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL);
- rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule);
+ rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL, NULL);
+ rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL, NULL);
+ rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule, NULL);
err = register_pernet_subsys(&fib_rules_net_ops);
if (err < 0)
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 799f06e03a2..8fab9b0bb20 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -98,7 +98,7 @@ static const struct file_operations neigh_stat_seq_fops;
static DEFINE_RWLOCK(neigh_tbl_lock);
-static int neigh_blackhole(struct sk_buff *skb)
+static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
{
kfree_skb(skb);
return -ENETDOWN;
@@ -137,7 +137,7 @@ static int neigh_forced_gc(struct neigh_table *tbl)
write_lock_bh(&tbl->lock);
nht = rcu_dereference_protected(tbl->nht,
lockdep_is_held(&tbl->lock));
- for (i = 0; i <= nht->hash_mask; i++) {
+ for (i = 0; i < (1 << nht->hash_shift); i++) {
struct neighbour *n;
struct neighbour __rcu **np;
@@ -210,7 +210,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
nht = rcu_dereference_protected(tbl->nht,
lockdep_is_held(&tbl->lock));
- for (i = 0; i <= nht->hash_mask; i++) {
+ for (i = 0; i < (1 << nht->hash_shift); i++) {
struct neighbour *n;
struct neighbour __rcu **np = &nht->hash_buckets[i];
@@ -297,6 +297,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl)
n->updated = n->used = now;
n->nud_state = NUD_NONE;
n->output = neigh_blackhole;
+ seqlock_init(&n->hh.hh_lock);
n->parms = neigh_parms_clone(&tbl->parms);
setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
@@ -312,9 +313,9 @@ out_entries:
goto out;
}
-static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries)
+static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
{
- size_t size = entries * sizeof(struct neighbour *);
+ size_t size = (1 << shift) * sizeof(struct neighbour *);
struct neigh_hash_table *ret;
struct neighbour __rcu **buckets;
@@ -332,8 +333,9 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries)
return NULL;
}
ret->hash_buckets = buckets;
- ret->hash_mask = entries - 1;
+ ret->hash_shift = shift;
get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd));
+ ret->hash_rnd |= 1;
return ret;
}
@@ -342,7 +344,7 @@ static void neigh_hash_free_rcu(struct rcu_head *head)
struct neigh_hash_table *nht = container_of(head,
struct neigh_hash_table,
rcu);
- size_t size = (nht->hash_mask + 1) * sizeof(struct neighbour *);
+ size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
struct neighbour __rcu **buckets = nht->hash_buckets;
if (size <= PAGE_SIZE)
@@ -353,21 +355,20 @@ static void neigh_hash_free_rcu(struct rcu_head *head)
}
static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
- unsigned long new_entries)
+ unsigned long new_shift)
{
unsigned int i, hash;
struct neigh_hash_table *new_nht, *old_nht;
NEIGH_CACHE_STAT_INC(tbl, hash_grows);
- BUG_ON(!is_power_of_2(new_entries));
old_nht = rcu_dereference_protected(tbl->nht,
lockdep_is_held(&tbl->lock));
- new_nht = neigh_hash_alloc(new_entries);
+ new_nht = neigh_hash_alloc(new_shift);
if (!new_nht)
return old_nht;
- for (i = 0; i <= old_nht->hash_mask; i++) {
+ for (i = 0; i < (1 << old_nht->hash_shift); i++) {
struct neighbour *n, *next;
for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
@@ -377,7 +378,7 @@ static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
hash = tbl->hash(n->primary_key, n->dev,
new_nht->hash_rnd);
- hash &= new_nht->hash_mask;
+ hash >>= (32 - new_nht->hash_shift);
next = rcu_dereference_protected(n->next,
lockdep_is_held(&tbl->lock));
@@ -406,7 +407,7 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
rcu_read_lock_bh();
nht = rcu_dereference_bh(tbl->nht);
- hash_val = tbl->hash(pkey, dev, nht->hash_rnd) & nht->hash_mask;
+ hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
n != NULL;
@@ -436,7 +437,7 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
rcu_read_lock_bh();
nht = rcu_dereference_bh(tbl->nht);
- hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) & nht->hash_mask;
+ hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
n != NULL;
@@ -492,10 +493,10 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
nht = rcu_dereference_protected(tbl->nht,
lockdep_is_held(&tbl->lock));
- if (atomic_read(&tbl->entries) > (nht->hash_mask + 1))
- nht = neigh_hash_grow(tbl, (nht->hash_mask + 1) << 1);
+ if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
+ nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
- hash_val = tbl->hash(pkey, dev, nht->hash_rnd) & nht->hash_mask;
+ hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
if (n->parms->dead) {
rc = ERR_PTR(-EINVAL);
@@ -688,8 +689,6 @@ static void neigh_destroy_rcu(struct rcu_head *head)
*/
void neigh_destroy(struct neighbour *neigh)
{
- struct hh_cache *hh;
-
NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
if (!neigh->dead) {
@@ -702,16 +701,6 @@ void neigh_destroy(struct neighbour *neigh)
if (neigh_del_timer(neigh))
printk(KERN_WARNING "Impossible event.\n");
- while ((hh = neigh->hh) != NULL) {
- neigh->hh = hh->hh_next;
- hh->hh_next = NULL;
-
- write_seqlock_bh(&hh->hh_lock);
- hh->hh_output = neigh_blackhole;
- write_sequnlock_bh(&hh->hh_lock);
- hh_cache_put(hh);
- }
-
skb_queue_purge(&neigh->arp_queue);
dev_put(neigh->dev);
@@ -731,14 +720,9 @@ EXPORT_SYMBOL(neigh_destroy);
*/
static void neigh_suspect(struct neighbour *neigh)
{
- struct hh_cache *hh;
-
NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
neigh->output = neigh->ops->output;
-
- for (hh = neigh->hh; hh; hh = hh->hh_next)
- hh->hh_output = neigh->ops->output;
}
/* Neighbour state is OK;
@@ -748,14 +732,9 @@ static void neigh_suspect(struct neighbour *neigh)
*/
static void neigh_connect(struct neighbour *neigh)
{
- struct hh_cache *hh;
-
NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
neigh->output = neigh->ops->connected_output;
-
- for (hh = neigh->hh; hh; hh = hh->hh_next)
- hh->hh_output = neigh->ops->hh_output;
}
static void neigh_periodic_work(struct work_struct *work)
@@ -784,7 +763,7 @@ static void neigh_periodic_work(struct work_struct *work)
neigh_rand_reach_time(p->base_reachable_time);
}
- for (i = 0 ; i <= nht->hash_mask; i++) {
+ for (i = 0 ; i < (1 << nht->hash_shift); i++) {
np = &nht->hash_buckets[i];
while ((n = rcu_dereference_protected(*np,
@@ -1015,7 +994,7 @@ out_unlock_bh:
}
EXPORT_SYMBOL(__neigh_event_send);
-static void neigh_update_hhs(const struct neighbour *neigh)
+static void neigh_update_hhs(struct neighbour *neigh)
{
struct hh_cache *hh;
void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
@@ -1025,7 +1004,8 @@ static void neigh_update_hhs(const struct neighbour *neigh)
update = neigh->dev->header_ops->cache_update;
if (update) {
- for (hh = neigh->hh; hh; hh = hh->hh_next) {
+ hh = &neigh->hh;
+ if (hh->hh_len) {
write_seqlock_bh(&hh->hh_lock);
update(hh, neigh->dev, neigh->ha);
write_sequnlock_bh(&hh->hh_lock);
@@ -1173,12 +1153,13 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
while (neigh->nud_state & NUD_VALID &&
(skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
- struct neighbour *n1 = neigh;
+ struct dst_entry *dst = skb_dst(skb);
+ struct neighbour *n2, *n1 = neigh;
write_unlock_bh(&neigh->lock);
/* On shaper/eql skb->dst->neighbour != neigh :( */
- if (skb_dst(skb) && skb_dst(skb)->neighbour)
- n1 = skb_dst(skb)->neighbour;
- n1->output(skb);
+ if (dst && (n2 = dst_get_neighbour(dst)) != NULL)
+ n1 = n2;
+ n1->output(n1, skb);
write_lock_bh(&neigh->lock);
}
skb_queue_purge(&neigh->arp_queue);
@@ -1211,67 +1192,21 @@ struct neighbour *neigh_event_ns(struct neigh_table *tbl,
}
EXPORT_SYMBOL(neigh_event_ns);
-static inline bool neigh_hh_lookup(struct neighbour *n, struct dst_entry *dst,
- __be16 protocol)
-{
- struct hh_cache *hh;
-
- smp_rmb(); /* paired with smp_wmb() in neigh_hh_init() */
- for (hh = n->hh; hh; hh = hh->hh_next) {
- if (hh->hh_type == protocol) {
- atomic_inc(&hh->hh_refcnt);
- if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL))
- hh_cache_put(hh);
- return true;
- }
- }
- return false;
-}
-
/* called with read_lock_bh(&n->lock); */
-static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
- __be16 protocol)
+static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
{
- struct hh_cache *hh;
struct net_device *dev = dst->dev;
-
- if (likely(neigh_hh_lookup(n, dst, protocol)))
- return;
-
- /* slow path */
- hh = kzalloc(sizeof(*hh), GFP_ATOMIC);
- if (!hh)
- return;
-
- seqlock_init(&hh->hh_lock);
- hh->hh_type = protocol;
- atomic_set(&hh->hh_refcnt, 2);
-
- if (dev->header_ops->cache(n, hh)) {
- kfree(hh);
- return;
- }
+ __be16 prot = dst->ops->protocol;
+ struct hh_cache *hh = &n->hh;
write_lock_bh(&n->lock);
- /* must check if another thread already did the insert */
- if (neigh_hh_lookup(n, dst, protocol)) {
- kfree(hh);
- goto end;
- }
-
- if (n->nud_state & NUD_CONNECTED)
- hh->hh_output = n->ops->hh_output;
- else
- hh->hh_output = n->ops->output;
-
- hh->hh_next = n->hh;
- smp_wmb(); /* paired with smp_rmb() in neigh_hh_lookup() */
- n->hh = hh;
+ /* Only one thread can come in here and initialize the
+ * hh_cache entry.
+ */
+ if (!hh->hh_len)
+ dev->header_ops->cache(n, hh, prot);
- if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL))
- hh_cache_put(hh);
-end:
write_unlock_bh(&n->lock);
}
@@ -1280,7 +1215,7 @@ end:
* but resolution is not made yet.
*/
-int neigh_compat_output(struct sk_buff *skb)
+int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
@@ -1297,13 +1232,12 @@ EXPORT_SYMBOL(neigh_compat_output);
/* Slow and careful. */
-int neigh_resolve_output(struct sk_buff *skb)
+int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
- struct neighbour *neigh;
int rc = 0;
- if (!dst || !(neigh = dst->neighbour))
+ if (!dst)
goto discard;
__skb_pull(skb, skb_network_offset(skb));
@@ -1313,10 +1247,8 @@ int neigh_resolve_output(struct sk_buff *skb)
struct net_device *dev = neigh->dev;
unsigned int seq;
- if (dev->header_ops->cache &&
- !dst->hh &&
- !(dst->flags & DST_NOCACHE))
- neigh_hh_init(neigh, dst, dst->ops->protocol);
+ if (dev->header_ops->cache && !neigh->hh.hh_len)
+ neigh_hh_init(neigh, dst);
do {
seq = read_seqbegin(&neigh->ha_lock);
@@ -1325,7 +1257,7 @@ int neigh_resolve_output(struct sk_buff *skb)
} while (read_seqretry(&neigh->ha_lock, seq));
if (err >= 0)
- rc = neigh->ops->queue_xmit(skb);
+ rc = dev_queue_xmit(skb);
else
goto out_kfree_skb;
}
@@ -1333,7 +1265,7 @@ out:
return rc;
discard:
NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
- dst, dst ? dst->neighbour : NULL);
+ dst, neigh);
out_kfree_skb:
rc = -EINVAL;
kfree_skb(skb);
@@ -1343,13 +1275,11 @@ EXPORT_SYMBOL(neigh_resolve_output);
/* As fast as possible without hh cache */
-int neigh_connected_output(struct sk_buff *skb)
+int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
{
- int err;
- struct dst_entry *dst = skb_dst(skb);
- struct neighbour *neigh = dst->neighbour;
struct net_device *dev = neigh->dev;
unsigned int seq;
+ int err;
__skb_pull(skb, skb_network_offset(skb));
@@ -1360,7 +1290,7 @@ int neigh_connected_output(struct sk_buff *skb)
} while (read_seqretry(&neigh->ha_lock, seq));
if (err >= 0)
- err = neigh->ops->queue_xmit(skb);
+ err = dev_queue_xmit(skb);
else {
err = -EINVAL;
kfree_skb(skb);
@@ -1369,6 +1299,12 @@ int neigh_connected_output(struct sk_buff *skb)
}
EXPORT_SYMBOL(neigh_connected_output);
+int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
+{
+ return dev_queue_xmit(skb);
+}
+EXPORT_SYMBOL(neigh_direct_output);
+
static void neigh_proxy_process(unsigned long arg)
{
struct neigh_table *tbl = (struct neigh_table *)arg;
@@ -1540,7 +1476,7 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl)
panic("cannot create neighbour proc dir entry");
#endif
- RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(8));
+ RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
@@ -1857,7 +1793,7 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
rcu_read_lock_bh();
nht = rcu_dereference_bh(tbl->nht);
ndc.ndtc_hash_rnd = nht->hash_rnd;
- ndc.ndtc_hash_mask = nht->hash_mask;
+ ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
rcu_read_unlock_bh();
NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);
@@ -2200,7 +2136,7 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
rcu_read_lock_bh();
nht = rcu_dereference_bh(tbl->nht);
- for (h = 0; h <= nht->hash_mask; h++) {
+ for (h = 0; h < (1 << nht->hash_shift); h++) {
if (h < s_h)
continue;
if (h > s_h)
@@ -2264,7 +2200,7 @@ void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void
nht = rcu_dereference_bh(tbl->nht);
read_lock(&tbl->lock); /* avoid resizes */
- for (chain = 0; chain <= nht->hash_mask; chain++) {
+ for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
struct neighbour *n;
for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
@@ -2286,7 +2222,7 @@ void __neigh_for_each_release(struct neigh_table *tbl,
nht = rcu_dereference_protected(tbl->nht,
lockdep_is_held(&tbl->lock));
- for (chain = 0; chain <= nht->hash_mask; chain++) {
+ for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
struct neighbour *n;
struct neighbour __rcu **np;
@@ -2323,7 +2259,7 @@ static struct neighbour *neigh_get_first(struct seq_file *seq)
int bucket = state->bucket;
state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
- for (bucket = 0; bucket <= nht->hash_mask; bucket++) {
+ for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
n = rcu_dereference_bh(nht->hash_buckets[bucket]);
while (n) {
@@ -2390,7 +2326,7 @@ next:
if (n)
break;
- if (++state->bucket > nht->hash_mask)
+ if (++state->bucket >= (1 << nht->hash_shift))
break;
n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
@@ -2909,12 +2845,13 @@ EXPORT_SYMBOL(neigh_sysctl_unregister);
static int __init neigh_init(void)
{
- rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL);
- rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL);
- rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info);
+ rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
+ rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
+ rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
- rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info);
- rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL);
+ rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
+ NULL);
+ rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
return 0;
}
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 33d2a1fba13..1683e5db2f2 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -100,7 +100,6 @@ NETDEVICE_SHOW(addr_assign_type, fmt_dec);
NETDEVICE_SHOW(addr_len, fmt_dec);
NETDEVICE_SHOW(iflink, fmt_dec);
NETDEVICE_SHOW(ifindex, fmt_dec);
-NETDEVICE_SHOW(features, fmt_hex);
NETDEVICE_SHOW(type, fmt_dec);
NETDEVICE_SHOW(link_mode, fmt_dec);
@@ -312,7 +311,6 @@ static struct device_attribute net_class_attributes[] = {
__ATTR(ifalias, S_IRUGO | S_IWUSR, show_ifalias, store_ifalias),
__ATTR(iflink, S_IRUGO, show_iflink, NULL),
__ATTR(ifindex, S_IRUGO, show_ifindex, NULL),
- __ATTR(features, S_IRUGO, show_features, NULL),
__ATTR(type, S_IRUGO, show_type, NULL),
__ATTR(link_mode, S_IRUGO, show_link_mode, NULL),
__ATTR(address, S_IRUGO, show_address, NULL),
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index 7f1bb2aba03..52380b1d552 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -28,6 +28,8 @@
#include <trace/events/skb.h>
#include <trace/events/net.h>
#include <trace/events/napi.h>
+#include <trace/events/sock.h>
+#include <trace/events/udp.h>
EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index ea489db1bc2..5bbdbf0d366 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -129,6 +129,7 @@ static __net_init int setup_net(struct net *net)
atomic_set(&net->count, 1);
atomic_set(&net->passive, 1);
+ net->dev_base_seq = 1;
#ifdef NETNS_REFCNT_DEBUG
atomic_set(&net->use_count, 0);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 18d9cbda3a3..adf84dd8c7b 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -177,7 +177,7 @@ static void service_arp_queue(struct netpoll_info *npi)
}
}
-void netpoll_poll_dev(struct net_device *dev)
+static void netpoll_poll_dev(struct net_device *dev)
{
const struct net_device_ops *ops;
@@ -208,13 +208,6 @@ void netpoll_poll_dev(struct net_device *dev)
zap_completion_queue();
}
-EXPORT_SYMBOL(netpoll_poll_dev);
-
-void netpoll_poll(struct netpoll *np)
-{
- netpoll_poll_dev(np->dev);
-}
-EXPORT_SYMBOL(netpoll_poll);
static void refill_skbs(void)
{
@@ -275,7 +268,7 @@ repeat:
if (!skb) {
if (++count < 10) {
- netpoll_poll(np);
+ netpoll_poll_dev(np->dev);
goto repeat;
}
return NULL;
@@ -336,7 +329,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
}
/* tickle device maybe there is some cleanup */
- netpoll_poll(np);
+ netpoll_poll_dev(np->dev);
udelay(USEC_PER_POLL);
}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index abd936d8a71..99d9e953fe3 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -56,9 +56,11 @@
struct rtnl_link {
rtnl_doit_func doit;
rtnl_dumpit_func dumpit;
+ rtnl_calcit_func calcit;
};
static DEFINE_MUTEX(rtnl_mutex);
+static u16 min_ifinfo_dump_size;
void rtnl_lock(void)
{
@@ -144,12 +146,28 @@ static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex)
return tab ? tab[msgindex].dumpit : NULL;
}
+static rtnl_calcit_func rtnl_get_calcit(int protocol, int msgindex)
+{
+ struct rtnl_link *tab;
+
+ if (protocol <= RTNL_FAMILY_MAX)
+ tab = rtnl_msg_handlers[protocol];
+ else
+ tab = NULL;
+
+ if (tab == NULL || tab[msgindex].calcit == NULL)
+ tab = rtnl_msg_handlers[PF_UNSPEC];
+
+ return tab ? tab[msgindex].calcit : NULL;
+}
+
/**
* __rtnl_register - Register a rtnetlink message type
* @protocol: Protocol family or PF_UNSPEC
* @msgtype: rtnetlink message type
* @doit: Function pointer called for each request message
* @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message
+ * @calcit: Function pointer to calc size of dump message
*
* Registers the specified function pointers (at least one of them has
* to be non-NULL) to be called whenever a request message for the
@@ -162,7 +180,8 @@ static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex)
* Returns 0 on success or a negative error code.
*/
int __rtnl_register(int protocol, int msgtype,
- rtnl_doit_func doit, rtnl_dumpit_func dumpit)
+ rtnl_doit_func doit, rtnl_dumpit_func dumpit,
+ rtnl_calcit_func calcit)
{
struct rtnl_link *tab;
int msgindex;
@@ -185,6 +204,9 @@ int __rtnl_register(int protocol, int msgtype,
if (dumpit)
tab[msgindex].dumpit = dumpit;
+ if (calcit)
+ tab[msgindex].calcit = calcit;
+
return 0;
}
EXPORT_SYMBOL_GPL(__rtnl_register);
@@ -199,9 +221,10 @@ EXPORT_SYMBOL_GPL(__rtnl_register);
* of memory implies no sense in continuing.
*/
void rtnl_register(int protocol, int msgtype,
- rtnl_doit_func doit, rtnl_dumpit_func dumpit)
+ rtnl_doit_func doit, rtnl_dumpit_func dumpit,
+ rtnl_calcit_func calcit)
{
- if (__rtnl_register(protocol, msgtype, doit, dumpit) < 0)
+ if (__rtnl_register(protocol, msgtype, doit, dumpit, calcit) < 0)
panic("Unable to register rtnetlink message handler, "
"protocol = %d, message type = %d\n",
protocol, msgtype);
@@ -1009,6 +1032,8 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
s_idx = cb->args[1];
rcu_read_lock();
+ cb->seq = net->dev_base_seq;
+
for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
idx = 0;
head = &net->dev_index_head[h];
@@ -1020,6 +1045,8 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
cb->nlh->nlmsg_seq, 0,
NLM_F_MULTI) <= 0)
goto out;
+
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
cont:
idx++;
}
@@ -1818,6 +1845,11 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
return err;
}
+static u16 rtnl_calcit(struct sk_buff *skb)
+{
+ return min_ifinfo_dump_size;
+}
+
static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
{
int idx;
@@ -1847,11 +1879,14 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
struct net *net = dev_net(dev);
struct sk_buff *skb;
int err = -ENOBUFS;
+ size_t if_info_size;
- skb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL);
+ skb = nlmsg_new((if_info_size = if_nlmsg_size(dev)), GFP_KERNEL);
if (skb == NULL)
goto errout;
+ min_ifinfo_dump_size = max_t(u16, if_info_size, min_ifinfo_dump_size);
+
err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0);
if (err < 0) {
/* -EMSGSIZE implies BUG in if_nlmsg_size() */
@@ -1902,14 +1937,20 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
struct sock *rtnl;
rtnl_dumpit_func dumpit;
+ rtnl_calcit_func calcit;
+ u16 min_dump_alloc = 0;
dumpit = rtnl_get_dumpit(family, type);
if (dumpit == NULL)
return -EOPNOTSUPP;
+ calcit = rtnl_get_calcit(family, type);
+ if (calcit)
+ min_dump_alloc = calcit(skb);
__rtnl_unlock();
rtnl = net->rtnl;
- err = netlink_dump_start(rtnl, skb, nlh, dumpit, NULL);
+ err = netlink_dump_start(rtnl, skb, nlh, dumpit,
+ NULL, min_dump_alloc);
rtnl_lock();
return err;
}
@@ -2019,12 +2060,13 @@ void __init rtnetlink_init(void)
netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV);
register_netdevice_notifier(&rtnetlink_dev_notifier);
- rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink, rtnl_dump_ifinfo);
- rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL);
- rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL);
- rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL);
+ rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink,
+ rtnl_dump_ifinfo, rtnl_calcit);
+ rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL, NULL);
+ rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL, NULL);
+ rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL, NULL);
- rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all);
- rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all);
+ rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all, NULL);
+ rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all, NULL);
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 46cbd28f40f..2beda824636 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -329,6 +329,18 @@ static void skb_release_data(struct sk_buff *skb)
put_page(skb_shinfo(skb)->frags[i].page);
}
+ /*
+ * If skb buf is from userspace, we need to notify the caller
+ * the lower device DMA has done;
+ */
+ if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
+ struct ubuf_info *uarg;
+
+ uarg = skb_shinfo(skb)->destructor_arg;
+ if (uarg->callback)
+ uarg->callback(uarg);
+ }
+
if (skb_has_frag_list(skb))
skb_drop_fraglist(skb);
@@ -481,6 +493,9 @@ bool skb_recycle_check(struct sk_buff *skb, int skb_size)
if (irqs_disabled())
return false;
+ if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY)
+ return false;
+
if (skb_is_nonlinear(skb) || skb->fclone != SKB_FCLONE_UNAVAILABLE)
return false;
@@ -596,6 +611,51 @@ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src)
}
EXPORT_SYMBOL_GPL(skb_morph);
+/* skb frags copy userspace buffers to kernel */
+static int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
+{
+ int i;
+ int num_frags = skb_shinfo(skb)->nr_frags;
+ struct page *page, *head = NULL;
+ struct ubuf_info *uarg = skb_shinfo(skb)->destructor_arg;
+
+ for (i = 0; i < num_frags; i++) {
+ u8 *vaddr;
+ skb_frag_t *f = &skb_shinfo(skb)->frags[i];
+
+ page = alloc_page(GFP_ATOMIC);
+ if (!page) {
+ while (head) {
+ struct page *next = (struct page *)head->private;
+ put_page(head);
+ head = next;
+ }
+ return -ENOMEM;
+ }
+ vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
+ memcpy(page_address(page),
+ vaddr + f->page_offset, f->size);
+ kunmap_skb_frag(vaddr);
+ page->private = (unsigned long)head;
+ head = page;
+ }
+
+ /* skb frags release userspace buffers */
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+ put_page(skb_shinfo(skb)->frags[i].page);
+
+ uarg->callback(uarg);
+
+ /* skb frags point to kernel buffers */
+ for (i = skb_shinfo(skb)->nr_frags; i > 0; i--) {
+ skb_shinfo(skb)->frags[i - 1].page_offset = 0;
+ skb_shinfo(skb)->frags[i - 1].page = head;
+ head = (struct page *)head->private;
+ }
+ return 0;
+}
+
+
/**
* skb_clone - duplicate an sk_buff
* @skb: buffer to clone
@@ -614,6 +674,12 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
{
struct sk_buff *n;
+ if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
+ if (skb_copy_ubufs(skb, gfp_mask))
+ return NULL;
+ skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
+ }
+
n = skb + 1;
if (skb->fclone == SKB_FCLONE_ORIG &&
n->fclone == SKB_FCLONE_UNAVAILABLE) {
@@ -731,6 +797,14 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
if (skb_shinfo(skb)->nr_frags) {
int i;
+ if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
+ if (skb_copy_ubufs(skb, gfp_mask)) {
+ kfree_skb(n);
+ n = NULL;
+ goto out;
+ }
+ skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
+ }
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
get_page(skb_shinfo(n)->frags[i].page);
@@ -788,7 +862,6 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
fastpath = true;
else {
int delta = skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1;
-
fastpath = atomic_read(&skb_shinfo(skb)->dataref) == delta;
}
@@ -819,6 +892,12 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
if (fastpath) {
kfree(skb->head);
} else {
+ /* copy this zero copy skb frags */
+ if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
+ if (skb_copy_ubufs(skb, gfp_mask))
+ goto nofrags;
+ skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
+ }
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
get_page(skb_shinfo(skb)->frags[i].page);
@@ -853,6 +932,8 @@ adjust_others:
atomic_set(&skb_shinfo(skb)->dataref, 1);
return 0;
+nofrags:
+ kfree(data);
nodata:
return -ENOMEM;
}
@@ -1354,6 +1435,7 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
}
start = end;
}
+
if (!len)
return 0;
diff --git a/net/core/sock.c b/net/core/sock.c
index 6e819780c23..bc745d00ea4 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -128,6 +128,8 @@
#include <linux/filter.h>
+#include <trace/events/sock.h>
+
#ifdef CONFIG_INET
#include <net/tcp.h>
#endif
@@ -158,7 +160,7 @@ static const char *const af_family_key_strings[AF_MAX+1] = {
"sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
"sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" ,
"sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" ,
- "sk_lock-AF_MAX"
+ "sk_lock-AF_NFC" , "sk_lock-AF_MAX"
};
static const char *const af_family_slock_key_strings[AF_MAX+1] = {
"slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" ,
@@ -174,7 +176,7 @@ static const char *const af_family_slock_key_strings[AF_MAX+1] = {
"slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
"slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" ,
"slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" ,
- "slock-AF_MAX"
+ "slock-AF_NFC" , "slock-AF_MAX"
};
static const char *const af_family_clock_key_strings[AF_MAX+1] = {
"clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" ,
@@ -190,7 +192,7 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = {
"clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" ,
"clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" ,
"clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" ,
- "clock-AF_MAX"
+ "clock-AF_NFC" , "clock-AF_MAX"
};
/*
@@ -292,6 +294,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
(unsigned)sk->sk_rcvbuf) {
atomic_inc(&sk->sk_drops);
+ trace_sock_rcvqueue_full(sk, skb);
return -ENOMEM;
}
@@ -1736,6 +1739,8 @@ suppress_allocation:
return 1;
}
+ trace_sock_exceed_buf_limit(sk, prot, allocated);
+
/* Alas. Undo changes. */
sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
atomic_long_sub(amt, prot->memory_allocated);
diff --git a/net/core/timestamping.c b/net/core/timestamping.c
index 7e7ca375d43..98a52640e7c 100644
--- a/net/core/timestamping.c
+++ b/net/core/timestamping.c
@@ -68,6 +68,7 @@ void skb_clone_tx_timestamp(struct sk_buff *skb)
break;
}
}
+EXPORT_SYMBOL_GPL(skb_clone_tx_timestamp);
void skb_complete_tx_timestamp(struct sk_buff *skb,
struct skb_shared_hwtstamps *hwtstamps)
@@ -121,6 +122,7 @@ bool skb_defer_rx_timestamp(struct sk_buff *skb)
return false;
}
+EXPORT_SYMBOL_GPL(skb_defer_rx_timestamp);
void __init skb_timestamping_init(void)
{