aboutsummaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/datagram.c5
-rw-r--r--net/core/dev.c263
-rw-r--r--net/core/drop_monitor.c4
-rw-r--r--net/core/dst.c2
-rw-r--r--net/core/ethtool.c54
-rw-r--r--net/core/fib_rules.c4
-rw-r--r--net/core/flow.c4
-rw-r--r--net/core/gen_estimator.c12
-rw-r--r--net/core/gen_stats.c22
-rw-r--r--net/core/link_watch.c3
-rw-r--r--net/core/neighbour.c63
-rw-r--r--net/core/net-procfs.c16
-rw-r--r--net/core/netpoll.c17
-rw-r--r--net/core/netprio_cgroup.c2
-rw-r--r--net/core/pktgen.c81
-rw-r--r--net/core/rtnetlink.c32
-rw-r--r--net/core/skbuff.c78
-rw-r--r--net/core/sock.c26
-rw-r--r--net/core/sysctl_net_core.c145
-rw-r--r--net/core/utils.c22
20 files changed, 626 insertions, 229 deletions
diff --git a/net/core/datagram.c b/net/core/datagram.c
index b71423db778..8ab48cd8955 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -56,6 +56,7 @@
#include <net/sock.h>
#include <net/tcp_states.h>
#include <trace/events/skb.h>
+#include <net/busy_poll.h>
/*
* Is a socket 'connection oriented' ?
@@ -207,6 +208,10 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
}
spin_unlock_irqrestore(&queue->lock, cpu_flags);
+ if (sk_can_busy_loop(sk) &&
+ sk_busy_loop(sk, flags & MSG_DONTWAIT))
+ continue;
+
/* User doesn't want to wait */
error = -EAGAIN;
if (!timeo)
diff --git a/net/core/dev.c b/net/core/dev.c
index faebb398fb4..26755dd40da 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -129,6 +129,8 @@
#include <linux/inetdevice.h>
#include <linux/cpu_rmap.h>
#include <linux/static_key.h>
+#include <linux/hashtable.h>
+#include <linux/vmalloc.h>
#include "net-sysfs.h"
@@ -166,6 +168,12 @@ static struct list_head offload_base __read_mostly;
DEFINE_RWLOCK(dev_base_lock);
EXPORT_SYMBOL(dev_base_lock);
+/* protects napi_hash addition/deletion and napi_gen_id */
+static DEFINE_SPINLOCK(napi_hash_lock);
+
+static unsigned int napi_gen_id;
+static DEFINE_HASHTABLE(napi_hash, 8);
+
seqcount_t devnet_rename_seq;
static inline void dev_base_seq_inc(struct net *net)
@@ -1232,9 +1240,7 @@ static int __dev_open(struct net_device *dev)
* If we don't do this there is a chance ndo_poll_controller
* or ndo_poll may be running while we open the device
*/
- ret = netpoll_rx_disable(dev);
- if (ret)
- return ret;
+ netpoll_rx_disable(dev);
ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
ret = notifier_to_errno(ret);
@@ -1343,9 +1349,7 @@ static int __dev_close(struct net_device *dev)
LIST_HEAD(single);
/* Temporarily disable netpoll until the interface is down */
- retval = netpoll_rx_disable(dev);
- if (retval)
- return retval;
+ netpoll_rx_disable(dev);
list_add(&dev->unreg_list, &single);
retval = __dev_close_many(&single);
@@ -1387,14 +1391,11 @@ static int dev_close_many(struct list_head *head)
*/
int dev_close(struct net_device *dev)
{
- int ret = 0;
if (dev->flags & IFF_UP) {
LIST_HEAD(single);
/* Block netpoll rx while the interface is going down */
- ret = netpoll_rx_disable(dev);
- if (ret)
- return ret;
+ netpoll_rx_disable(dev);
list_add(&dev->unreg_list, &single);
dev_close_many(&single);
@@ -1402,7 +1403,7 @@ int dev_close(struct net_device *dev)
netpoll_rx_enable(dev);
}
- return ret;
+ return 0;
}
EXPORT_SYMBOL(dev_close);
@@ -1432,6 +1433,14 @@ void dev_disable_lro(struct net_device *dev)
}
EXPORT_SYMBOL(dev_disable_lro);
+static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,
+ struct net_device *dev)
+{
+ struct netdev_notifier_info info;
+
+ netdev_notifier_info_init(&info, dev);
+ return nb->notifier_call(nb, val, &info);
+}
static int dev_boot_phase = 1;
@@ -1464,7 +1473,7 @@ int register_netdevice_notifier(struct notifier_block *nb)
goto unlock;
for_each_net(net) {
for_each_netdev(net, dev) {
- err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
+ err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev);
err = notifier_to_errno(err);
if (err)
goto rollback;
@@ -1472,7 +1481,7 @@ int register_netdevice_notifier(struct notifier_block *nb)
if (!(dev->flags & IFF_UP))
continue;
- nb->notifier_call(nb, NETDEV_UP, dev);
+ call_netdevice_notifier(nb, NETDEV_UP, dev);
}
}
@@ -1488,10 +1497,11 @@ rollback:
goto outroll;
if (dev->flags & IFF_UP) {
- nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
- nb->notifier_call(nb, NETDEV_DOWN, dev);
+ call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
+ dev);
+ call_netdevice_notifier(nb, NETDEV_DOWN, dev);
}
- nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
+ call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
}
}
@@ -1529,10 +1539,11 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
for_each_net(net) {
for_each_netdev(net, dev) {
if (dev->flags & IFF_UP) {
- nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
- nb->notifier_call(nb, NETDEV_DOWN, dev);
+ call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
+ dev);
+ call_netdevice_notifier(nb, NETDEV_DOWN, dev);
}
- nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
+ call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
}
}
unlock:
@@ -1542,6 +1553,25 @@ unlock:
EXPORT_SYMBOL(unregister_netdevice_notifier);
/**
+ * call_netdevice_notifiers_info - call all network notifier blocks
+ * @val: value passed unmodified to notifier function
+ * @dev: net_device pointer passed unmodified to notifier function
+ * @info: notifier information data
+ *
+ * Call all network notifier blocks. Parameters and return value
+ * are as for raw_notifier_call_chain().
+ */
+
+int call_netdevice_notifiers_info(unsigned long val, struct net_device *dev,
+ struct netdev_notifier_info *info)
+{
+ ASSERT_RTNL();
+ netdev_notifier_info_init(info, dev);
+ return raw_notifier_call_chain(&netdev_chain, val, info);
+}
+EXPORT_SYMBOL(call_netdevice_notifiers_info);
+
+/**
* call_netdevice_notifiers - call all network notifier blocks
* @val: value passed unmodified to notifier function
* @dev: net_device pointer passed unmodified to notifier function
@@ -1552,8 +1582,9 @@ EXPORT_SYMBOL(unregister_netdevice_notifier);
int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
{
- ASSERT_RTNL();
- return raw_notifier_call_chain(&netdev_chain, val, dev);
+ struct netdev_notifier_info info;
+
+ return call_netdevice_notifiers_info(val, dev, &info);
}
EXPORT_SYMBOL(call_netdevice_notifiers);
@@ -1655,23 +1686,19 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
}
}
- skb_orphan(skb);
-
if (unlikely(!is_skb_forwardable(dev, skb))) {
atomic_long_inc(&dev->rx_dropped);
kfree_skb(skb);
return NET_RX_DROP;
}
- skb->skb_iif = 0;
- skb->dev = dev;
- skb_dst_drop(skb);
- skb->tstamp.tv64 = 0;
- skb->pkt_type = PACKET_HOST;
+ skb_scrub_packet(skb);
skb->protocol = eth_type_trans(skb, dev);
- skb->mark = 0;
- secpath_reset(skb);
- nf_reset(skb);
- nf_reset_trace(skb);
+
+ /* eth_type_trans() can set pkt_type.
+ * clear pkt_type _after_ calling eth_type_trans()
+ */
+ skb->pkt_type = PACKET_HOST;
+
return netif_rx(skb);
}
EXPORT_SYMBOL_GPL(dev_forward_skb);
@@ -1736,7 +1763,7 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
skb_reset_mac_header(skb2);
if (skb_network_header(skb2) < skb2->data ||
- skb2->network_header > skb2->tail) {
+ skb_network_header(skb2) > skb_tail_pointer(skb2)) {
net_crit_ratelimited("protocol %04x is buggy, dev %s\n",
ntohs(skb2->protocol),
dev->name);
@@ -2454,10 +2481,10 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
}
static netdev_features_t harmonize_features(struct sk_buff *skb,
- __be16 protocol, netdev_features_t features)
+ netdev_features_t features)
{
if (skb->ip_summed != CHECKSUM_NONE &&
- !can_checksum_protocol(features, protocol)) {
+ !can_checksum_protocol(features, skb_network_protocol(skb))) {
features &= ~NETIF_F_ALL_CSUM;
} else if (illegal_highdma(skb->dev, skb)) {
features &= ~NETIF_F_SG;
@@ -2478,20 +2505,18 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
protocol = veh->h_vlan_encapsulated_proto;
} else if (!vlan_tx_tag_present(skb)) {
- return harmonize_features(skb, protocol, features);
+ return harmonize_features(skb, features);
}
features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_STAG_TX);
- if (protocol != htons(ETH_P_8021Q) && protocol != htons(ETH_P_8021AD)) {
- return harmonize_features(skb, protocol, features);
- } else {
+ if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD))
features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST |
NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_STAG_TX;
- return harmonize_features(skb, protocol, features);
- }
+
+ return harmonize_features(skb, features);
}
EXPORT_SYMBOL(netif_skb_features);
@@ -3099,6 +3124,46 @@ static int rps_ipi_queued(struct softnet_data *sd)
return 0;
}
+#ifdef CONFIG_NET_FLOW_LIMIT
+int netdev_flow_limit_table_len __read_mostly = (1 << 12);
+#endif
+
+static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)
+{
+#ifdef CONFIG_NET_FLOW_LIMIT
+ struct sd_flow_limit *fl;
+ struct softnet_data *sd;
+ unsigned int old_flow, new_flow;
+
+ if (qlen < (netdev_max_backlog >> 1))
+ return false;
+
+ sd = &__get_cpu_var(softnet_data);
+
+ rcu_read_lock();
+ fl = rcu_dereference(sd->flow_limit);
+ if (fl) {
+ new_flow = skb_get_rxhash(skb) & (fl->num_buckets - 1);
+ old_flow = fl->history[fl->history_head];
+ fl->history[fl->history_head] = new_flow;
+
+ fl->history_head++;
+ fl->history_head &= FLOW_LIMIT_HISTORY - 1;
+
+ if (likely(fl->buckets[old_flow]))
+ fl->buckets[old_flow]--;
+
+ if (++fl->buckets[new_flow] > (FLOW_LIMIT_HISTORY >> 1)) {
+ fl->count++;
+ rcu_read_unlock();
+ return true;
+ }
+ }
+ rcu_read_unlock();
+#endif
+ return false;
+}
+
/*
* enqueue_to_backlog is called to queue an skb to a per CPU backlog
* queue (may be a remote CPU queue).
@@ -3108,13 +3173,15 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
{
struct softnet_data *sd;
unsigned long flags;
+ unsigned int qlen;
sd = &per_cpu(softnet_data, cpu);
local_irq_save(flags);
rps_lock(sd);
- if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) {
+ qlen = skb_queue_len(&sd->input_pkt_queue);
+ if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) {
if (skb_queue_len(&sd->input_pkt_queue)) {
enqueue:
__skb_queue_tail(&sd->input_pkt_queue, skb);
@@ -3513,8 +3580,15 @@ ncls:
}
}
- if (vlan_tx_nonzero_tag_present(skb))
- skb->pkt_type = PACKET_OTHERHOST;
+ if (unlikely(vlan_tx_tag_present(skb))) {
+ if (vlan_tx_tag_get_id(skb))
+ skb->pkt_type = PACKET_OTHERHOST;
+ /* Note: we might in the future use prio bits
+ * and set skb->priority like in vlan_do_receive()
+ * For the time being, just ignore Priority Code Point
+ */
+ skb->vlan_tci = 0;
+ }
/* deliver only exact match when indicated */
null_or_dev = deliver_exact ? skb->dev : NULL;
@@ -3862,7 +3936,7 @@ static void skb_gro_reset_offset(struct sk_buff *skb)
NAPI_GRO_CB(skb)->frag0 = NULL;
NAPI_GRO_CB(skb)->frag0_len = 0;
- if (skb->mac_header == skb->tail &&
+ if (skb_mac_header(skb) == skb_tail_pointer(skb) &&
pinfo->nr_frags &&
!PageHighMem(skb_frag_page(frag0))) {
NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
@@ -4106,6 +4180,58 @@ void napi_complete(struct napi_struct *n)
}
EXPORT_SYMBOL(napi_complete);
+/* must be called under rcu_read_lock(), as we dont take a reference */
+struct napi_struct *napi_by_id(unsigned int napi_id)
+{
+ unsigned int hash = napi_id % HASH_SIZE(napi_hash);
+ struct napi_struct *napi;
+
+ hlist_for_each_entry_rcu(napi, &napi_hash[hash], napi_hash_node)
+ if (napi->napi_id == napi_id)
+ return napi;
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(napi_by_id);
+
+void napi_hash_add(struct napi_struct *napi)
+{
+ if (!test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) {
+
+ spin_lock(&napi_hash_lock);
+
+ /* 0 is not a valid id, we also skip an id that is taken
+ * we expect both events to be extremely rare
+ */
+ napi->napi_id = 0;
+ while (!napi->napi_id) {
+ napi->napi_id = ++napi_gen_id;
+ if (napi_by_id(napi->napi_id))
+ napi->napi_id = 0;
+ }
+
+ hlist_add_head_rcu(&napi->napi_hash_node,
+ &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]);
+
+ spin_unlock(&napi_hash_lock);
+ }
+}
+EXPORT_SYMBOL_GPL(napi_hash_add);
+
+/* Warning : caller is responsible to make sure rcu grace period
+ * is respected before freeing memory containing @napi
+ */
+void napi_hash_del(struct napi_struct *napi)
+{
+ spin_lock(&napi_hash_lock);
+
+ if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state))
+ hlist_del_rcu(&napi->napi_hash_node);
+
+ spin_unlock(&napi_hash_lock);
+}
+EXPORT_SYMBOL_GPL(napi_hash_del);
+
void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
int (*poll)(struct napi_struct *, int), int weight)
{
@@ -4404,7 +4530,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
else
list_add_tail_rcu(&upper->list, &dev->upper_dev_list);
dev_hold(upper_dev);
-
+ call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);
return 0;
}
@@ -4464,6 +4590,7 @@ void netdev_upper_dev_unlink(struct net_device *dev,
list_del_rcu(&upper->list);
dev_put(upper_dev);
kfree_rcu(upper, rcu);
+ call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);
}
EXPORT_SYMBOL(netdev_upper_dev_unlink);
@@ -4734,8 +4861,13 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags)
}
if (dev->flags & IFF_UP &&
- (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE)))
- call_netdevice_notifiers(NETDEV_CHANGE, dev);
+ (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) {
+ struct netdev_notifier_change_info change_info;
+
+ change_info.flags_changed = changes;
+ call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
+ &change_info.info);
+ }
}
/**
@@ -5158,17 +5290,28 @@ static void netdev_init_one_queue(struct net_device *dev,
#endif
}
+static void netif_free_tx_queues(struct net_device *dev)
+{
+ if (is_vmalloc_addr(dev->_tx))
+ vfree(dev->_tx);
+ else
+ kfree(dev->_tx);
+}
+
static int netif_alloc_netdev_queues(struct net_device *dev)
{
unsigned int count = dev->num_tx_queues;
struct netdev_queue *tx;
+ size_t sz = count * sizeof(*tx);
- BUG_ON(count < 1);
-
- tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL);
- if (!tx)
- return -ENOMEM;
+ BUG_ON(count < 1 || count > 0xffff);
+ tx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
+ if (!tx) {
+ tx = vzalloc(sz);
+ if (!tx)
+ return -ENOMEM;
+ }
dev->_tx = tx;
netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
@@ -5269,6 +5412,10 @@ int register_netdevice(struct net_device *dev)
*/
dev->hw_enc_features |= NETIF_F_SG;
+ /* Make NETIF_F_SG inheritable to MPLS.
+ */
+ dev->mpls_features |= NETIF_F_SG;
+
ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
ret = notifier_to_errno(ret);
if (ret)
@@ -5712,7 +5859,7 @@ free_all:
free_pcpu:
free_percpu(dev->pcpu_refcnt);
- kfree(dev->_tx);
+ netif_free_tx_queues(dev);
#ifdef CONFIG_RPS
kfree(dev->_rx);
#endif
@@ -5737,7 +5884,7 @@ void free_netdev(struct net_device *dev)
release_net(dev_net(dev));
- kfree(dev->_tx);
+ netif_free_tx_queues(dev);
#ifdef CONFIG_RPS
kfree(dev->_rx);
#endif
@@ -6048,7 +6195,7 @@ netdev_features_t netdev_increment_features(netdev_features_t all,
}
EXPORT_SYMBOL(netdev_increment_features);
-static struct hlist_head *netdev_create_hash(void)
+static struct hlist_head * __net_init netdev_create_hash(void)
{
int i;
struct hlist_head *hash;
@@ -6304,6 +6451,10 @@ static int __init net_dev_init(void)
sd->backlog.weight = weight_p;
sd->backlog.gro_list = NULL;
sd->backlog.gro_count = 0;
+
+#ifdef CONFIG_NET_FLOW_LIMIT
+ sd->flow_limit = NULL;
+#endif
}
dev_boot_phase = 0;
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index d23b6682f4e..5e78d44333b 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -295,9 +295,9 @@ static int net_dm_cmd_trace(struct sk_buff *skb,
}
static int dropmon_net_event(struct notifier_block *ev_block,
- unsigned long event, void *ptr)
+ unsigned long event, void *ptr)
{
- struct net_device *dev = ptr;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct dm_hw_stat_delta *new_stat = NULL;
struct dm_hw_stat_delta *tmp;
diff --git a/net/core/dst.c b/net/core/dst.c
index df9cc810ec8..ca4231ec734 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -372,7 +372,7 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
static int dst_dev_event(struct notifier_block *this, unsigned long event,
void *ptr)
{
- struct net_device *dev = ptr;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct dst_entry *dst, *last = NULL;
switch (event) {
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index ce91766eeca..78e9d9223e4 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -82,6 +82,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
[NETIF_F_FSO_BIT] = "tx-fcoe-segmentation",
[NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation",
[NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation",
+ [NETIF_F_GSO_MPLS_BIT] = "tx-mpls-segmentation",
[NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc",
[NETIF_F_SCTP_CSUM_BIT] = "tx-checksum-sctp",
@@ -278,11 +279,16 @@ static u32 __ethtool_get_flags(struct net_device *dev)
{
u32 flags = 0;
- if (dev->features & NETIF_F_LRO) flags |= ETH_FLAG_LRO;
- if (dev->features & NETIF_F_HW_VLAN_CTAG_RX) flags |= ETH_FLAG_RXVLAN;
- if (dev->features & NETIF_F_HW_VLAN_CTAG_TX) flags |= ETH_FLAG_TXVLAN;
- if (dev->features & NETIF_F_NTUPLE) flags |= ETH_FLAG_NTUPLE;
- if (dev->features & NETIF_F_RXHASH) flags |= ETH_FLAG_RXHASH;
+ if (dev->features & NETIF_F_LRO)
+ flags |= ETH_FLAG_LRO;
+ if (dev->features & NETIF_F_HW_VLAN_CTAG_RX)
+ flags |= ETH_FLAG_RXVLAN;
+ if (dev->features & NETIF_F_HW_VLAN_CTAG_TX)
+ flags |= ETH_FLAG_TXVLAN;
+ if (dev->features & NETIF_F_NTUPLE)
+ flags |= ETH_FLAG_NTUPLE;
+ if (dev->features & NETIF_F_RXHASH)
+ flags |= ETH_FLAG_RXHASH;
return flags;
}
@@ -294,11 +300,16 @@ static int __ethtool_set_flags(struct net_device *dev, u32 data)
if (data & ~ETH_ALL_FLAGS)
return -EINVAL;
- if (data & ETH_FLAG_LRO) features |= NETIF_F_LRO;
- if (data & ETH_FLAG_RXVLAN) features |= NETIF_F_HW_VLAN_CTAG_RX;
- if (data & ETH_FLAG_TXVLAN) features |= NETIF_F_HW_VLAN_CTAG_TX;
- if (data & ETH_FLAG_NTUPLE) features |= NETIF_F_NTUPLE;
- if (data & ETH_FLAG_RXHASH) features |= NETIF_F_RXHASH;
+ if (data & ETH_FLAG_LRO)
+ features |= NETIF_F_LRO;
+ if (data & ETH_FLAG_RXVLAN)
+ features |= NETIF_F_HW_VLAN_CTAG_RX;
+ if (data & ETH_FLAG_TXVLAN)
+ features |= NETIF_F_HW_VLAN_CTAG_TX;
+ if (data & ETH_FLAG_NTUPLE)
+ features |= NETIF_F_NTUPLE;
+ if (data & ETH_FLAG_RXHASH)
+ features |= NETIF_F_RXHASH;
/* allow changing only bits set in hw_features */
changed = (features ^ dev->features) & ETH_ALL_FEATURES;
@@ -1319,10 +1330,19 @@ static int ethtool_get_dump_data(struct net_device *dev,
if (ret)
return ret;
- len = (tmp.len > dump.len) ? dump.len : tmp.len;
+ len = min(tmp.len, dump.len);
if (!len)
return -EFAULT;
+ /* Don't ever let the driver think there's more space available
+ * than it requested with .get_dump_flag().
+ */
+ dump.len = len;
+
+ /* Always allocate enough space to hold the whole thing so that the
+ * driver does not need to check the length and bother with partial
+ * dumping.
+ */
data = vzalloc(tmp.len);
if (!data)
return -ENOMEM;
@@ -1330,6 +1350,16 @@ static int ethtool_get_dump_data(struct net_device *dev,
if (ret)
goto out;
+ /* There are two sane possibilities:
+ * 1. The driver's .get_dump_data() does not touch dump.len.
+ * 2. Or it may set dump.len to how much it really writes, which
+ * should be tmp.len (or len if it can do a partial dump).
+ * In any case respond to userspace with the actual length of data
+ * it's receiving.
+ */
+ WARN_ON(dump.len != len && dump.len != tmp.len);
+ dump.len = len;
+
if (copy_to_user(useraddr, &dump, sizeof(dump))) {
ret = -EFAULT;
goto out;
@@ -1413,7 +1443,7 @@ static int ethtool_get_module_eeprom(struct net_device *dev,
modinfo.eeprom_len);
}
-/* The main entry point in this file. Called from net/core/dev.c */
+/* The main entry point in this file. Called from net/core/dev_ioctl.c */
int dev_ethtool(struct net *net, struct ifreq *ifr)
{
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index d5a9f8ead0d..21735440c44 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -705,9 +705,9 @@ static void detach_rules(struct list_head *rules, struct net_device *dev)
static int fib_rules_event(struct notifier_block *this, unsigned long event,
- void *ptr)
+ void *ptr)
{
- struct net_device *dev = ptr;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct net *net = dev_net(dev);
struct fib_rules_ops *ops;
diff --git a/net/core/flow.c b/net/core/flow.c
index 7102f166482..dfa602ceb8c 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -403,7 +403,7 @@ void flow_cache_flush_deferred(void)
schedule_work(&flow_cache_flush_work);
}
-static int __cpuinit flow_cache_cpu_prepare(struct flow_cache *fc, int cpu)
+static int flow_cache_cpu_prepare(struct flow_cache *fc, int cpu)
{
struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
size_t sz = sizeof(struct hlist_head) * flow_cache_hash_size(fc);
@@ -421,7 +421,7 @@ static int __cpuinit flow_cache_cpu_prepare(struct flow_cache *fc, int cpu)
return 0;
}
-static int __cpuinit flow_cache_cpu(struct notifier_block *nfb,
+static int flow_cache_cpu(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
{
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index d9d198aa9fe..6b5b6e7013c 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -82,7 +82,7 @@ struct gen_estimator
{
struct list_head list;
struct gnet_stats_basic_packed *bstats;
- struct gnet_stats_rate_est *rate_est;
+ struct gnet_stats_rate_est64 *rate_est;
spinlock_t *stats_lock;
int ewma_log;
u64 last_bytes;
@@ -167,7 +167,7 @@ static void gen_add_node(struct gen_estimator *est)
static
struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats,
- const struct gnet_stats_rate_est *rate_est)
+ const struct gnet_stats_rate_est64 *rate_est)
{
struct rb_node *p = est_root.rb_node;
@@ -203,7 +203,7 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats
*
*/
int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
- struct gnet_stats_rate_est *rate_est,
+ struct gnet_stats_rate_est64 *rate_est,
spinlock_t *stats_lock,
struct nlattr *opt)
{
@@ -258,7 +258,7 @@ EXPORT_SYMBOL(gen_new_estimator);
* Note : Caller should respect an RCU grace period before freeing stats_lock
*/
void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
- struct gnet_stats_rate_est *rate_est)
+ struct gnet_stats_rate_est64 *rate_est)
{
struct gen_estimator *e;
@@ -290,7 +290,7 @@ EXPORT_SYMBOL(gen_kill_estimator);
* Returns 0 on success or a negative error code.
*/
int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
- struct gnet_stats_rate_est *rate_est,
+ struct gnet_stats_rate_est64 *rate_est,
spinlock_t *stats_lock, struct nlattr *opt)
{
gen_kill_estimator(bstats, rate_est);
@@ -306,7 +306,7 @@ EXPORT_SYMBOL(gen_replace_estimator);
* Returns true if estimator is active, and false if not.
*/
bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats,
- const struct gnet_stats_rate_est *rate_est)
+ const struct gnet_stats_rate_est64 *rate_est)
{
bool res;
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index ddedf211e58..9d3d9e78397 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -143,18 +143,30 @@ EXPORT_SYMBOL(gnet_stats_copy_basic);
int
gnet_stats_copy_rate_est(struct gnet_dump *d,
const struct gnet_stats_basic_packed *b,
- struct gnet_stats_rate_est *r)
+ struct gnet_stats_rate_est64 *r)
{
+ struct gnet_stats_rate_est est;
+ int res;
+
if (b && !gen_estimator_active(b, r))
return 0;
+ est.bps = min_t(u64, UINT_MAX, r->bps);
+ /* we have some time before reaching 2^32 packets per second */
+ est.pps = r->pps;
+
if (d->compat_tc_stats) {
- d->tc_stats.bps = r->bps;
- d->tc_stats.pps = r->pps;
+ d->tc_stats.bps = est.bps;
+ d->tc_stats.pps = est.pps;
}
- if (d->tail)
- return gnet_stats_copy(d, TCA_STATS_RATE_EST, r, sizeof(*r));
+ if (d->tail) {
+ res = gnet_stats_copy(d, TCA_STATS_RATE_EST, &est, sizeof(est));
+ if (res < 0 || est.bps == r->bps)
+ return res;
+ /* emit 64bit stats only if needed */
+ return gnet_stats_copy(d, TCA_STATS_RATE_EST64, r, sizeof(*r));
+ }
return 0;
}
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 8f82a5cc385..9c3a839322b 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -92,6 +92,9 @@ static bool linkwatch_urgent_event(struct net_device *dev)
if (dev->ifindex != dev->iflink)
return true;
+ if (dev->priv_flags & IFF_TEAM_PORT)
+ return true;
+
return netif_carrier_ok(dev) && qdisc_tx_changing(dev);
}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 5c56b217b99..9232c68941a 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -231,7 +231,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
we must kill timers etc. and move
it to safe state.
*/
- skb_queue_purge(&n->arp_queue);
+ __skb_queue_purge(&n->arp_queue);
n->arp_queue_len_bytes = 0;
n->output = neigh_blackhole;
if (n->nud_state & NUD_VALID)
@@ -286,7 +286,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device
if (!n)
goto out_entries;
- skb_queue_head_init(&n->arp_queue);
+ __skb_queue_head_init(&n->arp_queue);
rwlock_init(&n->lock);
seqlock_init(&n->ha_lock);
n->updated = n->used = now;
@@ -708,7 +708,9 @@ void neigh_destroy(struct neighbour *neigh)
if (neigh_del_timer(neigh))
pr_warn("Impossible event\n");
- skb_queue_purge(&neigh->arp_queue);
+ write_lock_bh(&neigh->lock);
+ __skb_queue_purge(&neigh->arp_queue);
+ write_unlock_bh(&neigh->lock);
neigh->arp_queue_len_bytes = 0;
if (dev->netdev_ops->ndo_neigh_destroy)
@@ -858,7 +860,7 @@ static void neigh_invalidate(struct neighbour *neigh)
neigh->ops->error_report(neigh, skb);
write_lock(&neigh->lock);
}
- skb_queue_purge(&neigh->arp_queue);
+ __skb_queue_purge(&neigh->arp_queue);
neigh->arp_queue_len_bytes = 0;
}
@@ -1210,7 +1212,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
write_lock_bh(&neigh->lock);
}
- skb_queue_purge(&neigh->arp_queue);
+ __skb_queue_purge(&neigh->arp_queue);
neigh->arp_queue_len_bytes = 0;
}
out:
@@ -1419,7 +1421,7 @@ static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
for (p = &tbl->parms; p; p = p->next) {
if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
- (!p->dev && !ifindex))
+ (!p->dev && !ifindex && net_eq(net, &init_net)))
return p;
}
@@ -1429,15 +1431,11 @@ static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
struct neigh_table *tbl)
{
- struct neigh_parms *p, *ref;
+ struct neigh_parms *p;
struct net *net = dev_net(dev);
const struct net_device_ops *ops = dev->netdev_ops;
- ref = lookup_neigh_parms(tbl, net, 0);
- if (!ref)
- return NULL;
-
- p = kmemdup(ref, sizeof(*p), GFP_KERNEL);
+ p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
if (p) {
p->tbl = tbl;
atomic_set(&p->refcnt, 1);
@@ -2053,6 +2051,12 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
}
}
+ err = -ENOENT;
+ if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
+ tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
+ !net_eq(net, &init_net))
+ goto errout_tbl_lock;
+
if (tb[NDTA_THRESH1])
tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
@@ -2763,13 +2767,14 @@ EXPORT_SYMBOL(neigh_app_ns);
#ifdef CONFIG_SYSCTL
static int zero;
+static int int_max = INT_MAX;
static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
-static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer,
- size_t *lenp, loff_t *ppos)
+static int proc_unres_qlen(struct ctl_table *ctl, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
{
int size, ret;
- ctl_table tmp = *ctl;
+ struct ctl_table tmp = *ctl;
tmp.extra1 = &zero;
tmp.extra2 = &unres_qlen_max;
@@ -2815,19 +2820,25 @@ static struct neigh_sysctl_table {
.procname = "mcast_solicit",
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .extra1 = &zero,
+ .extra2 = &int_max,
+ .proc_handler = proc_dointvec_minmax,
},
[NEIGH_VAR_UCAST_PROBE] = {
.procname = "ucast_solicit",
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .extra1 = &zero,
+ .extra2 = &int_max,
+ .proc_handler = proc_dointvec_minmax,
},
[NEIGH_VAR_APP_PROBE] = {
.procname = "app_solicit",
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .extra1 = &zero,
+ .extra2 = &int_max,
+ .proc_handler = proc_dointvec_minmax,
},
[NEIGH_VAR_RETRANS_TIME] = {
.procname = "retrans_time",
@@ -2870,7 +2881,9 @@ s