aboutsummaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
authorBenjamin Herrenschmidt <benh@kernel.crashing.org>2009-03-30 14:04:53 +1100
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2009-03-30 14:04:53 +1100
commit9ff9a26b786c35ee8d2a66222924a807ec851a9f (patch)
treedb432a17bccca1ca2c16907f0ee83ac449ed4012 /net/core
parent0a3108beea9143225119d5e7c72a8e2c64f3eb7d (diff)
parent0d34fb8e93ceba7b6dad0062dbb4a0813bacd75b (diff)
Merge commit 'origin/master' into next
Manual merge of: arch/powerpc/include/asm/elf.h drivers/i2c/busses/i2c-mpc.c
Diffstat (limited to 'net/core')
-rw-r--r--net/core/Makefile3
-rw-r--r--net/core/datagram.c2
-rw-r--r--net/core/dev.c271
-rw-r--r--net/core/drop_monitor.c263
-rw-r--r--net/core/ethtool.c58
-rw-r--r--net/core/fib_rules.c3
-rw-r--r--net/core/neighbour.c15
-rw-r--r--net/core/net-sysfs.c2
-rw-r--r--net/core/net-traces.c29
-rw-r--r--net/core/pktgen.c18
-rw-r--r--net/core/rtnetlink.c9
-rw-r--r--net/core/skbuff.c235
-rw-r--r--net/core/sock.c95
-rw-r--r--net/core/sysctl_net_core.c1
-rw-r--r--net/core/utils.c1
15 files changed, 775 insertions, 230 deletions
diff --git a/net/core/Makefile b/net/core/Makefile
index 26a37cb3192..796f46eece5 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -17,3 +17,6 @@ obj-$(CONFIG_NET_PKTGEN) += pktgen.o
obj-$(CONFIG_NETPOLL) += netpoll.o
obj-$(CONFIG_NET_DMA) += user_dma.o
obj-$(CONFIG_FIB_RULES) += fib_rules.o
+obj-$(CONFIG_TRACEPOINTS) += net-traces.o
+obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o
+
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 5e2ac0c4b07..d0de644b378 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -208,7 +208,7 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
{
- kfree_skb(skb);
+ consume_skb(skb);
sk_mem_reclaim_partial(sk);
}
diff --git a/net/core/dev.c b/net/core/dev.c
index e3fe5c70560..52fea5b28ca 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1457,7 +1457,9 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol)
((features & NETIF_F_IP_CSUM) &&
protocol == htons(ETH_P_IP)) ||
((features & NETIF_F_IPV6_CSUM) &&
- protocol == htons(ETH_P_IPV6)));
+ protocol == htons(ETH_P_IPV6)) ||
+ ((features & NETIF_F_FCOE_CRC) &&
+ protocol == htons(ETH_P_FCOE)));
}
static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
@@ -1668,8 +1670,8 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
struct netdev_queue *txq)
{
const struct net_device_ops *ops = dev->netdev_ops;
+ int rc;
- prefetch(&dev->netdev_ops->ndo_start_xmit);
if (likely(!skb->next)) {
if (!list_empty(&ptype_all))
dev_queue_xmit_nit(skb, dev);
@@ -1681,13 +1683,27 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
goto gso;
}
- return ops->ndo_start_xmit(skb, dev);
+ rc = ops->ndo_start_xmit(skb, dev);
+ /*
+ * TODO: if skb_orphan() was called by
+ * dev->hard_start_xmit() (for example, the unmodified
+ * igb driver does that; bnx2 doesn't), then
+ * skb_tx_software_timestamp() will be unable to send
+ * back the time stamp.
+ *
+ * How can this be prevented? Always create another
+ * reference to the socket before calling
+ * dev->hard_start_xmit()? Prevent that skb_orphan()
+ * does anything in dev->hard_start_xmit() by clearing
+ * the skb destructor before the call and restoring it
+ * afterwards, then doing the skb_orphan() ourselves?
+ */
+ return rc;
}
gso:
do {
struct sk_buff *nskb = skb->next;
- int rc;
skb->next = nskb->next;
nskb->next = NULL;
@@ -1708,59 +1724,24 @@ out_kfree_skb:
return 0;
}
-static u32 simple_tx_hashrnd;
-static int simple_tx_hashrnd_initialized = 0;
+static u32 skb_tx_hashrnd;
-static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb)
+u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
{
- u32 addr1, addr2, ports;
- u32 hash, ihl;
- u8 ip_proto = 0;
-
- if (unlikely(!simple_tx_hashrnd_initialized)) {
- get_random_bytes(&simple_tx_hashrnd, 4);
- simple_tx_hashrnd_initialized = 1;
- }
+ u32 hash;
- switch (skb->protocol) {
- case htons(ETH_P_IP):
- if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)))
- ip_proto = ip_hdr(skb)->protocol;
- addr1 = ip_hdr(skb)->saddr;
- addr2 = ip_hdr(skb)->daddr;
- ihl = ip_hdr(skb)->ihl;
- break;
- case htons(ETH_P_IPV6):
- ip_proto = ipv6_hdr(skb)->nexthdr;
- addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3];
- addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3];
- ihl = (40 >> 2);
- break;
- default:
- return 0;
- }
-
-
- switch (ip_proto) {
- case IPPROTO_TCP:
- case IPPROTO_UDP:
- case IPPROTO_DCCP:
- case IPPROTO_ESP:
- case IPPROTO_AH:
- case IPPROTO_SCTP:
- case IPPROTO_UDPLITE:
- ports = *((u32 *) (skb_network_header(skb) + (ihl * 4)));
- break;
-
- default:
- ports = 0;
- break;
- }
+ if (skb_rx_queue_recorded(skb)) {
+ hash = skb_get_rx_queue(skb);
+ } else if (skb->sk && skb->sk->sk_hash) {
+ hash = skb->sk->sk_hash;
+ } else
+ hash = skb->protocol;
- hash = jhash_3words(addr1, addr2, ports, simple_tx_hashrnd);
+ hash = jhash_1word(hash, skb_tx_hashrnd);
return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
}
+EXPORT_SYMBOL(skb_tx_hash);
static struct netdev_queue *dev_pick_tx(struct net_device *dev,
struct sk_buff *skb)
@@ -1771,7 +1752,7 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
if (ops->ndo_select_queue)
queue_index = ops->ndo_select_queue(dev, skb);
else if (dev->real_num_tx_queues > 1)
- queue_index = simple_tx_hash(dev, skb);
+ queue_index = skb_tx_hash(dev, skb);
skb_set_queue_mapping(skb, queue_index);
return netdev_get_tx_queue(dev, queue_index);
@@ -2297,6 +2278,8 @@ ncls:
if (!skb)
goto out;
+ skb_orphan(skb);
+
type = skb->protocol;
list_for_each_entry_rcu(ptype,
&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
@@ -2366,7 +2349,6 @@ static int napi_gro_complete(struct sk_buff *skb)
out:
skb_shinfo(skb)->gso_size = 0;
- __skb_push(skb, -skb_network_offset(skb));
return netif_receive_skb(skb);
}
@@ -2380,20 +2362,40 @@ void napi_gro_flush(struct napi_struct *napi)
napi_gro_complete(skb);
}
+ napi->gro_count = 0;
napi->gro_list = NULL;
}
EXPORT_SYMBOL(napi_gro_flush);
+void *skb_gro_header(struct sk_buff *skb, unsigned int hlen)
+{
+ unsigned int offset = skb_gro_offset(skb);
+
+ hlen += offset;
+ if (hlen <= skb_headlen(skb))
+ return skb->data + offset;
+
+ if (unlikely(!skb_shinfo(skb)->nr_frags ||
+ skb_shinfo(skb)->frags[0].size <=
+ hlen - skb_headlen(skb) ||
+ PageHighMem(skb_shinfo(skb)->frags[0].page)))
+ return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL;
+
+ return page_address(skb_shinfo(skb)->frags[0].page) +
+ skb_shinfo(skb)->frags[0].page_offset +
+ offset - skb_headlen(skb);
+}
+EXPORT_SYMBOL(skb_gro_header);
+
int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{
struct sk_buff **pp = NULL;
struct packet_type *ptype;
__be16 type = skb->protocol;
struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
- int count = 0;
int same_flow;
int mac_len;
- int free;
+ int ret;
if (!(skb->dev->features & NETIF_F_GRO))
goto normal;
@@ -2403,30 +2405,16 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
rcu_read_lock();
list_for_each_entry_rcu(ptype, head, list) {
- struct sk_buff *p;
-
if (ptype->type != type || ptype->dev || !ptype->gro_receive)
continue;
- skb_reset_network_header(skb);
+ skb_set_network_header(skb, skb_gro_offset(skb));
mac_len = skb->network_header - skb->mac_header;
skb->mac_len = mac_len;
NAPI_GRO_CB(skb)->same_flow = 0;
NAPI_GRO_CB(skb)->flush = 0;
NAPI_GRO_CB(skb)->free = 0;
- for (p = napi->gro_list; p; p = p->next) {
- count++;
-
- if (!NAPI_GRO_CB(p)->same_flow)
- continue;
-
- if (p->mac_len != mac_len ||
- memcmp(skb_mac_header(p), skb_mac_header(skb),
- mac_len))
- NAPI_GRO_CB(p)->same_flow = 0;
- }
-
pp = ptype->gro_receive(&napi->gro_list, skb);
break;
}
@@ -2436,7 +2424,7 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
goto normal;
same_flow = NAPI_GRO_CB(skb)->same_flow;
- free = NAPI_GRO_CB(skb)->free;
+ ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
if (pp) {
struct sk_buff *nskb = *pp;
@@ -2444,27 +2432,35 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
*pp = nskb->next;
nskb->next = NULL;
napi_gro_complete(nskb);
- count--;
+ napi->gro_count--;
}
if (same_flow)
goto ok;
- if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS) {
- __skb_push(skb, -skb_network_offset(skb));
+ if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS)
goto normal;
- }
+ napi->gro_count++;
NAPI_GRO_CB(skb)->count = 1;
- skb_shinfo(skb)->gso_size = skb->len;
+ skb_shinfo(skb)->gso_size = skb_gro_len(skb);
skb->next = napi->gro_list;
napi->gro_list = skb;
+ ret = GRO_HELD;
+
+pull:
+ if (unlikely(!pskb_may_pull(skb, skb_gro_offset(skb)))) {
+ if (napi->gro_list == skb)
+ napi->gro_list = skb->next;
+ ret = GRO_DROP;
+ }
ok:
- return free;
+ return ret;
normal:
- return -1;
+ ret = GRO_NORMAL;
+ goto pull;
}
EXPORT_SYMBOL(dev_gro_receive);
@@ -2472,29 +2468,44 @@ static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{
struct sk_buff *p;
+ if (netpoll_rx_on(skb))
+ return GRO_NORMAL;
+
for (p = napi->gro_list; p; p = p->next) {
- NAPI_GRO_CB(p)->same_flow = 1;
+ NAPI_GRO_CB(p)->same_flow = !compare_ether_header(
+ skb_mac_header(p), skb_gro_mac_header(skb));
NAPI_GRO_CB(p)->flush = 0;
}
return dev_gro_receive(napi, skb);
}
-int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
+int napi_skb_finish(int ret, struct sk_buff *skb)
{
- if (netpoll_receive_skb(skb))
- return NET_RX_DROP;
+ int err = NET_RX_SUCCESS;
- switch (__napi_gro_receive(napi, skb)) {
- case -1:
+ switch (ret) {
+ case GRO_NORMAL:
return netif_receive_skb(skb);
- case 1:
+ case GRO_DROP:
+ err = NET_RX_DROP;
+ /* fall through */
+
+ case GRO_MERGED_FREE:
kfree_skb(skb);
break;
}
- return NET_RX_SUCCESS;
+ return err;
+}
+EXPORT_SYMBOL(napi_skb_finish);
+
+int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
+{
+ skb_gro_reset_offset(skb);
+
+ return napi_skb_finish(__napi_gro_receive(napi, skb), skb);
}
EXPORT_SYMBOL(napi_gro_receive);
@@ -2512,6 +2523,9 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi,
{
struct net_device *dev = napi->dev;
struct sk_buff *skb = napi->skb;
+ struct ethhdr *eth;
+ skb_frag_t *frag;
+ int i;
napi->skb = NULL;
@@ -2524,20 +2538,36 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi,
}
BUG_ON(info->nr_frags > MAX_SKB_FRAGS);
+ frag = &info->frags[info->nr_frags - 1];
+
+ for (i = skb_shinfo(skb)->nr_frags; i < info->nr_frags; i++) {
+ skb_fill_page_desc(skb, i, frag->page, frag->page_offset,
+ frag->size);
+ frag++;
+ }
skb_shinfo(skb)->nr_frags = info->nr_frags;
- memcpy(skb_shinfo(skb)->frags, info->frags, sizeof(info->frags));
skb->data_len = info->len;
skb->len += info->len;
skb->truesize += info->len;
- if (!pskb_may_pull(skb, ETH_HLEN)) {
+ skb_reset_mac_header(skb);
+ skb_gro_reset_offset(skb);
+
+ eth = skb_gro_header(skb, sizeof(*eth));
+ if (!eth) {
napi_reuse_skb(napi, skb);
skb = NULL;
goto out;
}
- skb->protocol = eth_type_trans(skb, dev);
+ skb_gro_pull(skb, sizeof(*eth));
+
+ /*
+ * This works because the only protocols we care about don't require
+ * special handling. We'll fix it up properly at the end.
+ */
+ skb->protocol = eth->h_proto;
skb->ip_summed = info->ip_summed;
skb->csum = info->csum;
@@ -2547,32 +2577,43 @@ out:
}
EXPORT_SYMBOL(napi_fraginfo_skb);
-int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info)
+int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret)
{
- struct sk_buff *skb = napi_fraginfo_skb(napi, info);
- int err = NET_RX_DROP;
+ int err = NET_RX_SUCCESS;
- if (!skb)
- goto out;
+ switch (ret) {
+ case GRO_NORMAL:
+ case GRO_HELD:
+ skb->protocol = eth_type_trans(skb, napi->dev);
- if (netpoll_receive_skb(skb))
- goto out;
+ if (ret == GRO_NORMAL)
+ return netif_receive_skb(skb);
- err = NET_RX_SUCCESS;
+ skb_gro_pull(skb, -ETH_HLEN);
+ break;
- switch (__napi_gro_receive(napi, skb)) {
- case -1:
- return netif_receive_skb(skb);
+ case GRO_DROP:
+ err = NET_RX_DROP;
+ /* fall through */
- case 0:
- goto out;
+ case GRO_MERGED_FREE:
+ napi_reuse_skb(napi, skb);
+ break;
}
- napi_reuse_skb(napi, skb);
-
-out:
return err;
}
+EXPORT_SYMBOL(napi_frags_finish);
+
+int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info)
+{
+ struct sk_buff *skb = napi_fraginfo_skb(napi, info);
+
+ if (!skb)
+ return NET_RX_DROP;
+
+ return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb));
+}
EXPORT_SYMBOL(napi_gro_frags);
static int process_backlog(struct napi_struct *napi, int quota)
@@ -2588,18 +2629,15 @@ static int process_backlog(struct napi_struct *napi, int quota)
local_irq_disable();
skb = __skb_dequeue(&queue->input_pkt_queue);
if (!skb) {
+ __napi_complete(napi);
local_irq_enable();
- napi_complete(napi);
- goto out;
+ break;
}
local_irq_enable();
- napi_gro_receive(napi, skb);
+ netif_receive_skb(skb);
} while (++work < quota && jiffies == start_time);
- napi_gro_flush(napi);
-
-out:
return work;
}
@@ -2653,6 +2691,7 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
int (*poll)(struct napi_struct *, int), int weight)
{
INIT_LIST_HEAD(&napi->poll_list);
+ napi->gro_count = 0;
napi->gro_list = NULL;
napi->skb = NULL;
napi->poll = poll;
@@ -2681,6 +2720,7 @@ void netif_napi_del(struct napi_struct *napi)
}
napi->gro_list = NULL;
+ napi->gro_count = 0;
}
EXPORT_SYMBOL(netif_napi_del);
@@ -3949,6 +3989,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
cmd == SIOCSMIIREG ||
cmd == SIOCBRADDIF ||
cmd == SIOCBRDELIF ||
+ cmd == SIOCSHWTSTAMP ||
cmd == SIOCWANDEV) {
err = -EOPNOTSUPP;
if (ops->ndo_do_ioctl) {
@@ -4103,6 +4144,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
case SIOCBONDCHANGEACTIVE:
case SIOCBRADDIF:
case SIOCBRDELIF:
+ case SIOCSHWTSTAMP:
if (!capable(CAP_NET_ADMIN))
return -EPERM;
/* fall through */
@@ -5199,6 +5241,7 @@ static int __init net_dev_init(void)
queue->backlog.poll = process_backlog;
queue->backlog.weight = weight_p;
queue->backlog.gro_list = NULL;
+ queue->backlog.gro_count = 0;
}
dev_boot_phase = 0;
@@ -5231,6 +5274,14 @@ out:
subsys_initcall(net_dev_init);
+static int __init initialize_hashrnd(void)
+{
+ get_random_bytes(&skb_tx_hashrnd, sizeof(skb_tx_hashrnd));
+ return 0;
+}
+
+late_initcall_sync(initialize_hashrnd);
+
EXPORT_SYMBOL(__dev_get_by_index);
EXPORT_SYMBOL(__dev_get_by_name);
EXPORT_SYMBOL(__dev_remove_pack);
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
new file mode 100644
index 00000000000..9fd0dc3cca9
--- /dev/null
+++ b/net/core/drop_monitor.c
@@ -0,0 +1,263 @@
+/*
+ * Monitoring code for network dropped packet alerts
+ *
+ * Copyright (C) 2009 Neil Horman <nhorman@tuxdriver.com>
+ */
+
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/string.h>
+#include <linux/if_arp.h>
+#include <linux/inetdevice.h>
+#include <linux/inet.h>
+#include <linux/interrupt.h>
+#include <linux/netpoll.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include <linux/netlink.h>
+#include <linux/net_dropmon.h>
+#include <linux/percpu.h>
+#include <linux/timer.h>
+#include <linux/bitops.h>
+#include <net/genetlink.h>
+
+#include <trace/skb.h>
+
+#include <asm/unaligned.h>
+
+#define TRACE_ON 1
+#define TRACE_OFF 0
+
+static void send_dm_alert(struct work_struct *unused);
+
+
+/*
+ * Globals, our netlink socket pointer
+ * and the work handle that will send up
+ * netlink alerts
+ */
+struct sock *dm_sock;
+
+struct per_cpu_dm_data {
+ struct work_struct dm_alert_work;
+ struct sk_buff *skb;
+ atomic_t dm_hit_count;
+ struct timer_list send_timer;
+};
+
+static struct genl_family net_drop_monitor_family = {
+ .id = GENL_ID_GENERATE,
+ .hdrsize = 0,
+ .name = "NET_DM",
+ .version = 1,
+ .maxattr = NET_DM_CMD_MAX,
+};
+
+static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data);
+
+static int dm_hit_limit = 64;
+static int dm_delay = 1;
+
+
+static void reset_per_cpu_data(struct per_cpu_dm_data *data)
+{
+ size_t al;
+ struct net_dm_alert_msg *msg;
+
+ al = sizeof(struct net_dm_alert_msg);
+ al += dm_hit_limit * sizeof(struct net_dm_drop_point);
+ data->skb = genlmsg_new(al, GFP_KERNEL);
+ genlmsg_put(data->skb, 0, 0, &net_drop_monitor_family,
+ 0, NET_DM_CMD_ALERT);
+ msg = __nla_reserve_nohdr(data->skb, sizeof(struct net_dm_alert_msg));
+ memset(msg, 0, al);
+ atomic_set(&data->dm_hit_count, dm_hit_limit);
+}
+
+static void send_dm_alert(struct work_struct *unused)
+{
+ struct sk_buff *skb;
+ struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data);
+
+ /*
+ * Grab the skb we're about to send
+ */
+ skb = data->skb;
+
+ /*
+ * Replace it with a new one
+ */
+ reset_per_cpu_data(data);
+
+ /*
+ * Ship it!
+ */
+ genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL);
+
+}
+
+/*
+ * This is the timer function to delay the sending of an alert
+ * in the event that more drops will arrive during the
+ * hysteresis period. Note that it operates under the timer interrupt
+ * so we don't need to disable preemption here
+ */
+static void sched_send_work(unsigned long unused)
+{
+ struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data);
+
+ schedule_work(&data->dm_alert_work);
+}
+
+static void trace_kfree_skb_hit(struct sk_buff *skb, void *location)
+{
+ struct net_dm_alert_msg *msg;
+ struct nlmsghdr *nlh;
+ int i;
+ struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data);
+
+
+ if (!atomic_add_unless(&data->dm_hit_count, -1, 0)) {
+ /*
+ * we're already at zero, discard this hit
+ */
+ goto out;
+ }
+
+ nlh = (struct nlmsghdr *)data->skb->data;
+ msg = genlmsg_data(nlmsg_data(nlh));
+ for (i = 0; i < msg->entries; i++) {
+ if (!memcmp(&location, msg->points[i].pc, sizeof(void *))) {
+ msg->points[i].count++;
+ goto out;
+ }
+ }
+
+ /*
+ * We need to create a new entry
+ */
+ __nla_reserve_nohdr(data->skb, sizeof(struct net_dm_drop_point));
+ memcpy(msg->points[msg->entries].pc, &location, sizeof(void *));
+ msg->points[msg->entries].count = 1;
+ msg->entries++;
+
+ if (!timer_pending(&data->send_timer)) {
+ data->send_timer.expires = jiffies + dm_delay * HZ;
+ add_timer_on(&data->send_timer, smp_processor_id());
+ }
+
+out:
+ return;
+}
+
+static int set_all_monitor_traces(int state)
+{
+ int rc = 0;
+
+ switch (state) {
+ case TRACE_ON:
+ rc |= register_trace_kfree_skb(trace_kfree_skb_hit);
+ break;
+ case TRACE_OFF:
+ rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit);
+
+ tracepoint_synchronize_unregister();
+ break;
+ default:
+ rc = 1;
+ break;
+ }
+
+ if (rc)
+ return -EINPROGRESS;
+ return rc;
+}
+
+
+static int net_dm_cmd_config(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ return -ENOTSUPP;
+}
+
+static int net_dm_cmd_trace(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ switch (info->genlhdr->cmd) {
+ case NET_DM_CMD_START:
+ return set_all_monitor_traces(TRACE_ON);
+ break;
+ case NET_DM_CMD_STOP:
+ return set_all_monitor_traces(TRACE_OFF);
+ break;
+ }
+
+ return -ENOTSUPP;
+}
+
+
+static struct genl_ops dropmon_ops[] = {
+ {
+ .cmd = NET_DM_CMD_CONFIG,
+ .doit = net_dm_cmd_config,
+ },
+ {
+ .cmd = NET_DM_CMD_START,
+ .doit = net_dm_cmd_trace,
+ },
+ {
+ .cmd = NET_DM_CMD_STOP,
+ .doit = net_dm_cmd_trace,
+ },
+};
+
+static int __init init_net_drop_monitor(void)
+{
+ int cpu;
+ int rc, i, ret;
+ struct per_cpu_dm_data *data;
+ printk(KERN_INFO "Initalizing network drop monitor service\n");
+
+ if (sizeof(void *) > 8) {
+ printk(KERN_ERR "Unable to store program counters on this arch, Drop monitor failed\n");
+ return -ENOSPC;
+ }
+
+ if (genl_register_family(&net_drop_monitor_family) < 0) {
+ printk(KERN_ERR "Could not create drop monitor netlink family\n");
+ return -EFAULT;
+ }
+
+ rc = -EFAULT;
+
+ for (i = 0; i < ARRAY_SIZE(dropmon_ops); i++) {
+ ret = genl_register_ops(&net_drop_monitor_family,
+ &dropmon_ops[i]);
+ if (ret) {
+ printk(KERN_CRIT "failed to register operation %d\n",
+ dropmon_ops[i].cmd);
+ goto out_unreg;
+ }
+ }
+
+ rc = 0;
+
+ for_each_present_cpu(cpu) {
+ data = &per_cpu(dm_cpu_data, cpu);
+ reset_per_cpu_data(data);
+ INIT_WORK(&data->dm_alert_work, send_dm_alert);
+ init_timer(&data->send_timer);
+ data->send_timer.data = cpu;
+ data->send_timer.function = sched_send_work;
+ }
+ goto out;
+
+out_unreg:
+ genl_unregister_family(&net_drop_monitor_family);
+out:
+ return rc;
+}
+
+late_initcall(init_net_drop_monitor);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 947710a36ce..244ca56dffa 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -209,34 +209,62 @@ static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr)
return 0;
}
-static int ethtool_set_rxhash(struct net_device *dev, void __user *useraddr)
+static int ethtool_set_rxnfc(struct net_device *dev, void __user *useraddr)
{
struct ethtool_rxnfc cmd;
- if (!dev->ethtool_ops->set_rxhash)
+ if (!dev->ethtool_ops->set_rxnfc)
return -EOPNOTSUPP;
if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
return -EFAULT;
- return dev->ethtool_ops->set_rxhash(dev, &cmd);
+ return dev->ethtool_ops->set_rxnfc(dev, &cmd);
}
-static int ethtool_get_rxhash(struct net_device *dev, void __user *useraddr)
+static int ethtool_get_rxnfc(struct net_device *dev, void __user *useraddr)
{
struct ethtool_rxnfc info;
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ int ret;
+ void *rule_buf = NULL;
- if (!dev->ethtool_ops->get_rxhash)
+ if (!ops->get_rxnfc)
return -EOPNOTSUPP;
if (copy_from_user(&info, useraddr, sizeof(info)))
return -EFAULT;
- dev->ethtool_ops->get_rxhash(dev, &info);
+ if (info.cmd == ETHTOOL_GRXCLSRLALL) {
+ if (info.rule_cnt > 0) {
+ rule_buf = kmalloc(info.rule_cnt * sizeof(u32),
+ GFP_USER);
+ if (!rule_buf)
+ return -ENOMEM;
+ }
+ }
+ ret = ops->get_rxnfc(dev, &info, rule_buf);
+ if (ret < 0)
+ goto err_out;
+
+ ret = -EFAULT;
if (copy_to_user(useraddr, &info, sizeof(info)))
- return -EFAULT;
- return 0;
+ goto err_out;
+
+ if (rule_buf) {
+ useraddr += offsetof(struct ethtool_rxnfc, rule_locs);
+ if (copy_to_user(useraddr, rule_buf,
+ info.rule_cnt * sizeof(u32)))
+ goto err_out;
+ }
+ ret = 0;
+
+err_out:
+ if (rule_buf)
+ kfree(rule_buf);
+
+ return ret;
}
static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
@@ -901,6 +929,10 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
case ETHTOOL_GFLAGS:
case ETHTOOL_GPFLAGS:
case ETHTOOL_GRXFH:
+ case ETHTOOL_GRXRINGS:
+ case ETHTOOL_GRXCLSRLCNT:
+ case ETHTOOL_GRXCLSRULE:
+ case ETHTOOL_GRXCLSRLALL:
break;
default:
if (!capable(CAP_NET_ADMIN))
@@ -1052,10 +1084,16 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
dev->ethtool_ops->set_priv_flags);
break;
case ETHTOOL_GRXFH:
- rc = ethtool_get_rxhash(dev, useraddr);
+ case ETHTOOL_GRXRINGS:
+ case ETHTOOL_GRXCLSRLCNT:
+ case ETHTOOL_GRXCLSRULE:
+ case ETHTOOL_GRXCLSRLALL:
+ rc = ethtool_get_rxnfc(dev, useraddr);
break;
case ETHTOOL_SRXFH:
- rc = ethtool_set_rxhash(dev, useraddr);
+ case ETHTOOL_SRXCLSRLDEL:
+ case ETHTOOL_SRXCLSRLINS:
+ rc = ethtool_set_rxnfc(dev, useraddr);
break;
case ETHTOOL_GGRO:
rc = ethtool_get_gro(dev, useraddr);
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 32b3a0152d7..98691e1466b 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -588,7 +588,8 @@ static void notify_rule_change(int event, struct fib_rule *rule,
goto errout;
}
- err = rtnl_notify(skb, net, pid, ops->nlgroup, nlh, GFP_KERNEL);
+ rtnl_notify(skb, net, pid, ops->nlgroup, nlh, GFP_KERNEL);
+ return;
errout:
if (err < 0)
rtnl_set_sk_err(net, ops->nlgroup, err);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 278a142d104..a1cbce7fdae 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -871,8 +871,7 @@ static void neigh_timer_handler(unsigned long arg)
write_unlock(&neigh->lock);
neigh->ops->solicit(neigh, skb);
atomic_inc(&neigh->probes);
- if (skb)
- kfree_skb(skb);
+ kfree_skb(skb);
} else {
out:
write_unlock(&neigh->lock);
@@ -908,8 +907,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
neigh->updated = jiffies;
write_unlock_bh(&neigh->lock);
- if (skb)
- kfree_skb(skb);
+ kfree_skb(skb);
return 1;
}
} else if (neigh->nud_state & NUD_STALE) {
@@ -1656,7 +1654,11 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
flags &= ~NEIGH_UPDATE_F_OVERRIDE;
}
- err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
+ if (ndm->ndm_flags & NTF_USE) {
+ neigh_event_send(neigh, NULL);
+ err = 0;
+ } else
+ err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
neigh_release(neigh);
goto out_dev_put;
}
@@ -2534,7 +2536,8 @@ static void __neigh_notify(struct neighbour *n, int type, int flags)
kfree_skb(skb);
goto errout;
}
- err = rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
+ rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
+ return;
errout:
if (err < 0)
rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 484f58750eb..2da59a0ac4a 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -498,7 +498,7 @@ int netdev_register_kobject(struct net_device *net)
dev->groups = groups;
BUILD_BUG_ON(BUS_ID_SIZE < IFNAMSIZ);
- dev_set_name(dev, net->name);
+ dev_set_name(dev, "%s", net->name);
#ifdef CONFIG_SYSFS
*groups++ = &netstat_group;
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
new file mode 100644
index 00000000000..c8fb45665e4
--- /dev/null
+++ b/net/core/net-traces.c
@@ -0,0 +1,29 @@
+/*
+ * consolidates trace point definitions
+ *
+ * Copyright (C) 2009 Neil Horman <nhorman@tuxdriver.com>
+ */
+
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/string.h>
+#include <linux/if_arp.h>
+#include <linux/inetdevice.h>
+#include <linux/inet.h>
+#include <linux/interrupt.h>
+#include <linux/netpoll.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/rcupdate.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include <linux/netlink.h>
+#include <linux/net_dropmon.h>
+#include <trace/skb.h>
+
+#include <asm/unaligned.h>
+#include <asm/bitops.h>
+
+
+DEFINE_TRACE(kfree_skb);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 65498483325..32d419f5ac9 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3275,8 +3275,7 @@ static void pktgen_stop(struct pktgen_thread *t)
list_for_each_entry(pkt_dev, &t->if_list, list) {
pktgen_stop_device(pkt_dev);
- if (pkt_dev->skb)
- kfree_skb(pkt_dev->skb);
+ kfree_skb(pkt_dev->skb);
pkt_dev->skb = NULL;
}
@@ -3303,8 +3302,7 @@ static void pktgen_rem_one_if(struct pktgen_thread *t)
if (!cur->removal_mark)
continue;
- if (cur->skb)
- kfree_skb(cur->skb);
+ kfree_skb(cur->skb);
cur->skb = NULL;
pktgen_remove_device(t, cur);
@@ -3328,8 +3326,7 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t)
list_for_each_safe(q, n, &t->if_list) {
cur = list_entry(q, struct pktgen_dev, list);
- if (cur->skb)
- kfree_skb(cur->skb);
+ kfree_skb(cur->skb);
cur->skb = NULL;
pktgen_remove_device(t, cur);
@@ -3393,8 +3390,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
if (!netif_running(odev)) {
pktgen_stop_device(pkt