diff options
author | Benjamin Herrenschmidt <benh@kernel.crashing.org> | 2009-03-30 14:04:53 +1100 |
---|---|---|
committer | Benjamin Herrenschmidt <benh@kernel.crashing.org> | 2009-03-30 14:04:53 +1100 |
commit | 9ff9a26b786c35ee8d2a66222924a807ec851a9f (patch) | |
tree | db432a17bccca1ca2c16907f0ee83ac449ed4012 /net/core | |
parent | 0a3108beea9143225119d5e7c72a8e2c64f3eb7d (diff) | |
parent | 0d34fb8e93ceba7b6dad0062dbb4a0813bacd75b (diff) |
Merge commit 'origin/master' into next
Manual merge of:
arch/powerpc/include/asm/elf.h
drivers/i2c/busses/i2c-mpc.c
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/Makefile | 3 | ||||
-rw-r--r-- | net/core/datagram.c | 2 | ||||
-rw-r--r-- | net/core/dev.c | 271 | ||||
-rw-r--r-- | net/core/drop_monitor.c | 263 | ||||
-rw-r--r-- | net/core/ethtool.c | 58 | ||||
-rw-r--r-- | net/core/fib_rules.c | 3 | ||||
-rw-r--r-- | net/core/neighbour.c | 15 | ||||
-rw-r--r-- | net/core/net-sysfs.c | 2 | ||||
-rw-r--r-- | net/core/net-traces.c | 29 | ||||
-rw-r--r-- | net/core/pktgen.c | 18 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 9 | ||||
-rw-r--r-- | net/core/skbuff.c | 235 | ||||
-rw-r--r-- | net/core/sock.c | 95 | ||||
-rw-r--r-- | net/core/sysctl_net_core.c | 1 | ||||
-rw-r--r-- | net/core/utils.c | 1 |
15 files changed, 775 insertions, 230 deletions
diff --git a/net/core/Makefile b/net/core/Makefile index 26a37cb3192..796f46eece5 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -17,3 +17,6 @@ obj-$(CONFIG_NET_PKTGEN) += pktgen.o obj-$(CONFIG_NETPOLL) += netpoll.o obj-$(CONFIG_NET_DMA) += user_dma.o obj-$(CONFIG_FIB_RULES) += fib_rules.o +obj-$(CONFIG_TRACEPOINTS) += net-traces.o +obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o + diff --git a/net/core/datagram.c b/net/core/datagram.c index 5e2ac0c4b07..d0de644b378 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -208,7 +208,7 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, void skb_free_datagram(struct sock *sk, struct sk_buff *skb) { - kfree_skb(skb); + consume_skb(skb); sk_mem_reclaim_partial(sk); } diff --git a/net/core/dev.c b/net/core/dev.c index e3fe5c70560..52fea5b28ca 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1457,7 +1457,9 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol) ((features & NETIF_F_IP_CSUM) && protocol == htons(ETH_P_IP)) || ((features & NETIF_F_IPV6_CSUM) && - protocol == htons(ETH_P_IPV6))); + protocol == htons(ETH_P_IPV6)) || + ((features & NETIF_F_FCOE_CRC) && + protocol == htons(ETH_P_FCOE))); } static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb) @@ -1668,8 +1670,8 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq) { const struct net_device_ops *ops = dev->netdev_ops; + int rc; - prefetch(&dev->netdev_ops->ndo_start_xmit); if (likely(!skb->next)) { if (!list_empty(&ptype_all)) dev_queue_xmit_nit(skb, dev); @@ -1681,13 +1683,27 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, goto gso; } - return ops->ndo_start_xmit(skb, dev); + rc = ops->ndo_start_xmit(skb, dev); + /* + * TODO: if skb_orphan() was called by + * dev->hard_start_xmit() (for example, the unmodified + * igb driver does that; bnx2 doesn't), then + * skb_tx_software_timestamp() will be unable to send + * back the time stamp. + * + * How can this be prevented? Always create another + * reference to the socket before calling + * dev->hard_start_xmit()? Prevent that skb_orphan() + * does anything in dev->hard_start_xmit() by clearing + * the skb destructor before the call and restoring it + * afterwards, then doing the skb_orphan() ourselves? + */ + return rc; } gso: do { struct sk_buff *nskb = skb->next; - int rc; skb->next = nskb->next; nskb->next = NULL; @@ -1708,59 +1724,24 @@ out_kfree_skb: return 0; } -static u32 simple_tx_hashrnd; -static int simple_tx_hashrnd_initialized = 0; +static u32 skb_tx_hashrnd; -static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb) +u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb) { - u32 addr1, addr2, ports; - u32 hash, ihl; - u8 ip_proto = 0; - - if (unlikely(!simple_tx_hashrnd_initialized)) { - get_random_bytes(&simple_tx_hashrnd, 4); - simple_tx_hashrnd_initialized = 1; - } + u32 hash; - switch (skb->protocol) { - case htons(ETH_P_IP): - if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))) - ip_proto = ip_hdr(skb)->protocol; - addr1 = ip_hdr(skb)->saddr; - addr2 = ip_hdr(skb)->daddr; - ihl = ip_hdr(skb)->ihl; - break; - case htons(ETH_P_IPV6): - ip_proto = ipv6_hdr(skb)->nexthdr; - addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3]; - addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3]; - ihl = (40 >> 2); - break; - default: - return 0; - } - - - switch (ip_proto) { - case IPPROTO_TCP: - case IPPROTO_UDP: - case IPPROTO_DCCP: - case IPPROTO_ESP: - case IPPROTO_AH: - case IPPROTO_SCTP: - case IPPROTO_UDPLITE: - ports = *((u32 *) (skb_network_header(skb) + (ihl * 4))); - break; - - default: - ports = 0; - break; - } + if (skb_rx_queue_recorded(skb)) { + hash = skb_get_rx_queue(skb); + } else if (skb->sk && skb->sk->sk_hash) { + hash = skb->sk->sk_hash; + } else + hash = skb->protocol; - hash = jhash_3words(addr1, addr2, ports, simple_tx_hashrnd); + hash = jhash_1word(hash, skb_tx_hashrnd); return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); } +EXPORT_SYMBOL(skb_tx_hash); static struct netdev_queue *dev_pick_tx(struct net_device *dev, struct sk_buff *skb) @@ -1771,7 +1752,7 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev, if (ops->ndo_select_queue) queue_index = ops->ndo_select_queue(dev, skb); else if (dev->real_num_tx_queues > 1) - queue_index = simple_tx_hash(dev, skb); + queue_index = skb_tx_hash(dev, skb); skb_set_queue_mapping(skb, queue_index); return netdev_get_tx_queue(dev, queue_index); @@ -2297,6 +2278,8 @@ ncls: if (!skb) goto out; + skb_orphan(skb); + type = skb->protocol; list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { @@ -2366,7 +2349,6 @@ static int napi_gro_complete(struct sk_buff *skb) out: skb_shinfo(skb)->gso_size = 0; - __skb_push(skb, -skb_network_offset(skb)); return netif_receive_skb(skb); } @@ -2380,20 +2362,40 @@ void napi_gro_flush(struct napi_struct *napi) napi_gro_complete(skb); } + napi->gro_count = 0; napi->gro_list = NULL; } EXPORT_SYMBOL(napi_gro_flush); +void *skb_gro_header(struct sk_buff *skb, unsigned int hlen) +{ + unsigned int offset = skb_gro_offset(skb); + + hlen += offset; + if (hlen <= skb_headlen(skb)) + return skb->data + offset; + + if (unlikely(!skb_shinfo(skb)->nr_frags || + skb_shinfo(skb)->frags[0].size <= + hlen - skb_headlen(skb) || + PageHighMem(skb_shinfo(skb)->frags[0].page))) + return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL; + + return page_address(skb_shinfo(skb)->frags[0].page) + + skb_shinfo(skb)->frags[0].page_offset + + offset - skb_headlen(skb); +} +EXPORT_SYMBOL(skb_gro_header); + int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { struct sk_buff **pp = NULL; struct packet_type *ptype; __be16 type = skb->protocol; struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; - int count = 0; int same_flow; int mac_len; - int free; + int ret; if (!(skb->dev->features & NETIF_F_GRO)) goto normal; @@ -2403,30 +2405,16 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { - struct sk_buff *p; - if (ptype->type != type || ptype->dev || !ptype->gro_receive) continue; - skb_reset_network_header(skb); + skb_set_network_header(skb, skb_gro_offset(skb)); mac_len = skb->network_header - skb->mac_header; skb->mac_len = mac_len; NAPI_GRO_CB(skb)->same_flow = 0; NAPI_GRO_CB(skb)->flush = 0; NAPI_GRO_CB(skb)->free = 0; - for (p = napi->gro_list; p; p = p->next) { - count++; - - if (!NAPI_GRO_CB(p)->same_flow) - continue; - - if (p->mac_len != mac_len || - memcmp(skb_mac_header(p), skb_mac_header(skb), - mac_len)) - NAPI_GRO_CB(p)->same_flow = 0; - } - pp = ptype->gro_receive(&napi->gro_list, skb); break; } @@ -2436,7 +2424,7 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) goto normal; same_flow = NAPI_GRO_CB(skb)->same_flow; - free = NAPI_GRO_CB(skb)->free; + ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED; if (pp) { struct sk_buff *nskb = *pp; @@ -2444,27 +2432,35 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) *pp = nskb->next; nskb->next = NULL; napi_gro_complete(nskb); - count--; + napi->gro_count--; } if (same_flow) goto ok; - if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS) { - __skb_push(skb, -skb_network_offset(skb)); + if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS) goto normal; - } + napi->gro_count++; NAPI_GRO_CB(skb)->count = 1; - skb_shinfo(skb)->gso_size = skb->len; + skb_shinfo(skb)->gso_size = skb_gro_len(skb); skb->next = napi->gro_list; napi->gro_list = skb; + ret = GRO_HELD; + +pull: + if (unlikely(!pskb_may_pull(skb, skb_gro_offset(skb)))) { + if (napi->gro_list == skb) + napi->gro_list = skb->next; + ret = GRO_DROP; + } ok: - return free; + return ret; normal: - return -1; + ret = GRO_NORMAL; + goto pull; } EXPORT_SYMBOL(dev_gro_receive); @@ -2472,29 +2468,44 @@ static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { struct sk_buff *p; + if (netpoll_rx_on(skb)) + return GRO_NORMAL; + for (p = napi->gro_list; p; p = p->next) { - NAPI_GRO_CB(p)->same_flow = 1; + NAPI_GRO_CB(p)->same_flow = !compare_ether_header( + skb_mac_header(p), skb_gro_mac_header(skb)); NAPI_GRO_CB(p)->flush = 0; } return dev_gro_receive(napi, skb); } -int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) +int napi_skb_finish(int ret, struct sk_buff *skb) { - if (netpoll_receive_skb(skb)) - return NET_RX_DROP; + int err = NET_RX_SUCCESS; - switch (__napi_gro_receive(napi, skb)) { - case -1: + switch (ret) { + case GRO_NORMAL: return netif_receive_skb(skb); - case 1: + case GRO_DROP: + err = NET_RX_DROP; + /* fall through */ + + case GRO_MERGED_FREE: kfree_skb(skb); break; } - return NET_RX_SUCCESS; + return err; +} +EXPORT_SYMBOL(napi_skb_finish); + +int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) +{ + skb_gro_reset_offset(skb); + + return napi_skb_finish(__napi_gro_receive(napi, skb), skb); } EXPORT_SYMBOL(napi_gro_receive); @@ -2512,6 +2523,9 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi, { struct net_device *dev = napi->dev; struct sk_buff *skb = napi->skb; + struct ethhdr *eth; + skb_frag_t *frag; + int i; napi->skb = NULL; @@ -2524,20 +2538,36 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi, } BUG_ON(info->nr_frags > MAX_SKB_FRAGS); + frag = &info->frags[info->nr_frags - 1]; + + for (i = skb_shinfo(skb)->nr_frags; i < info->nr_frags; i++) { + skb_fill_page_desc(skb, i, frag->page, frag->page_offset, + frag->size); + frag++; + } skb_shinfo(skb)->nr_frags = info->nr_frags; - memcpy(skb_shinfo(skb)->frags, info->frags, sizeof(info->frags)); skb->data_len = info->len; skb->len += info->len; skb->truesize += info->len; - if (!pskb_may_pull(skb, ETH_HLEN)) { + skb_reset_mac_header(skb); + skb_gro_reset_offset(skb); + + eth = skb_gro_header(skb, sizeof(*eth)); + if (!eth) { napi_reuse_skb(napi, skb); skb = NULL; goto out; } - skb->protocol = eth_type_trans(skb, dev); + skb_gro_pull(skb, sizeof(*eth)); + + /* + * This works because the only protocols we care about don't require + * special handling. We'll fix it up properly at the end. + */ + skb->protocol = eth->h_proto; skb->ip_summed = info->ip_summed; skb->csum = info->csum; @@ -2547,32 +2577,43 @@ out: } EXPORT_SYMBOL(napi_fraginfo_skb); -int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info) +int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret) { - struct sk_buff *skb = napi_fraginfo_skb(napi, info); - int err = NET_RX_DROP; + int err = NET_RX_SUCCESS; - if (!skb) - goto out; + switch (ret) { + case GRO_NORMAL: + case GRO_HELD: + skb->protocol = eth_type_trans(skb, napi->dev); - if (netpoll_receive_skb(skb)) - goto out; + if (ret == GRO_NORMAL) + return netif_receive_skb(skb); - err = NET_RX_SUCCESS; + skb_gro_pull(skb, -ETH_HLEN); + break; - switch (__napi_gro_receive(napi, skb)) { - case -1: - return netif_receive_skb(skb); + case GRO_DROP: + err = NET_RX_DROP; + /* fall through */ - case 0: - goto out; + case GRO_MERGED_FREE: + napi_reuse_skb(napi, skb); + break; } - napi_reuse_skb(napi, skb); - -out: return err; } +EXPORT_SYMBOL(napi_frags_finish); + +int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info) +{ + struct sk_buff *skb = napi_fraginfo_skb(napi, info); + + if (!skb) + return NET_RX_DROP; + + return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb)); +} EXPORT_SYMBOL(napi_gro_frags); static int process_backlog(struct napi_struct *napi, int quota) @@ -2588,18 +2629,15 @@ static int process_backlog(struct napi_struct *napi, int quota) local_irq_disable(); skb = __skb_dequeue(&queue->input_pkt_queue); if (!skb) { + __napi_complete(napi); local_irq_enable(); - napi_complete(napi); - goto out; + break; } local_irq_enable(); - napi_gro_receive(napi, skb); + netif_receive_skb(skb); } while (++work < quota && jiffies == start_time); - napi_gro_flush(napi); - -out: return work; } @@ -2653,6 +2691,7 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), int weight) { INIT_LIST_HEAD(&napi->poll_list); + napi->gro_count = 0; napi->gro_list = NULL; napi->skb = NULL; napi->poll = poll; @@ -2681,6 +2720,7 @@ void netif_napi_del(struct napi_struct *napi) } napi->gro_list = NULL; + napi->gro_count = 0; } EXPORT_SYMBOL(netif_napi_del); @@ -3949,6 +3989,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) cmd == SIOCSMIIREG || cmd == SIOCBRADDIF || cmd == SIOCBRDELIF || + cmd == SIOCSHWTSTAMP || cmd == SIOCWANDEV) { err = -EOPNOTSUPP; if (ops->ndo_do_ioctl) { @@ -4103,6 +4144,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) case SIOCBONDCHANGEACTIVE: case SIOCBRADDIF: case SIOCBRDELIF: + case SIOCSHWTSTAMP: if (!capable(CAP_NET_ADMIN)) return -EPERM; /* fall through */ @@ -5199,6 +5241,7 @@ static int __init net_dev_init(void) queue->backlog.poll = process_backlog; queue->backlog.weight = weight_p; queue->backlog.gro_list = NULL; + queue->backlog.gro_count = 0; } dev_boot_phase = 0; @@ -5231,6 +5274,14 @@ out: subsys_initcall(net_dev_init); +static int __init initialize_hashrnd(void) +{ + get_random_bytes(&skb_tx_hashrnd, sizeof(skb_tx_hashrnd)); + return 0; +} + +late_initcall_sync(initialize_hashrnd); + EXPORT_SYMBOL(__dev_get_by_index); EXPORT_SYMBOL(__dev_get_by_name); EXPORT_SYMBOL(__dev_remove_pack); diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c new file mode 100644 index 00000000000..9fd0dc3cca9 --- /dev/null +++ b/net/core/drop_monitor.c @@ -0,0 +1,263 @@ +/* + * Monitoring code for network dropped packet alerts + * + * Copyright (C) 2009 Neil Horman <nhorman@tuxdriver.com> + */ + +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/string.h> +#include <linux/if_arp.h> +#include <linux/inetdevice.h> +#include <linux/inet.h> +#include <linux/interrupt.h> +#include <linux/netpoll.h> +#include <linux/sched.h> +#include <linux/delay.h> +#include <linux/types.h> +#include <linux/workqueue.h> +#include <linux/netlink.h> +#include <linux/net_dropmon.h> +#include <linux/percpu.h> +#include <linux/timer.h> +#include <linux/bitops.h> +#include <net/genetlink.h> + +#include <trace/skb.h> + +#include <asm/unaligned.h> + +#define TRACE_ON 1 +#define TRACE_OFF 0 + +static void send_dm_alert(struct work_struct *unused); + + +/* + * Globals, our netlink socket pointer + * and the work handle that will send up + * netlink alerts + */ +struct sock *dm_sock; + +struct per_cpu_dm_data { + struct work_struct dm_alert_work; + struct sk_buff *skb; + atomic_t dm_hit_count; + struct timer_list send_timer; +}; + +static struct genl_family net_drop_monitor_family = { + .id = GENL_ID_GENERATE, + .hdrsize = 0, + .name = "NET_DM", + .version = 1, + .maxattr = NET_DM_CMD_MAX, +}; + +static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data); + +static int dm_hit_limit = 64; +static int dm_delay = 1; + + +static void reset_per_cpu_data(struct per_cpu_dm_data *data) +{ + size_t al; + struct net_dm_alert_msg *msg; + + al = sizeof(struct net_dm_alert_msg); + al += dm_hit_limit * sizeof(struct net_dm_drop_point); + data->skb = genlmsg_new(al, GFP_KERNEL); + genlmsg_put(data->skb, 0, 0, &net_drop_monitor_family, + 0, NET_DM_CMD_ALERT); + msg = __nla_reserve_nohdr(data->skb, sizeof(struct net_dm_alert_msg)); + memset(msg, 0, al); + atomic_set(&data->dm_hit_count, dm_hit_limit); +} + +static void send_dm_alert(struct work_struct *unused) +{ + struct sk_buff *skb; + struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); + + /* + * Grab the skb we're about to send + */ + skb = data->skb; + + /* + * Replace it with a new one + */ + reset_per_cpu_data(data); + + /* + * Ship it! + */ + genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL); + +} + +/* + * This is the timer function to delay the sending of an alert + * in the event that more drops will arrive during the + * hysteresis period. Note that it operates under the timer interrupt + * so we don't need to disable preemption here + */ +static void sched_send_work(unsigned long unused) +{ + struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); + + schedule_work(&data->dm_alert_work); +} + +static void trace_kfree_skb_hit(struct sk_buff *skb, void *location) +{ + struct net_dm_alert_msg *msg; + struct nlmsghdr *nlh; + int i; + struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); + + + if (!atomic_add_unless(&data->dm_hit_count, -1, 0)) { + /* + * we're already at zero, discard this hit + */ + goto out; + } + + nlh = (struct nlmsghdr *)data->skb->data; + msg = genlmsg_data(nlmsg_data(nlh)); + for (i = 0; i < msg->entries; i++) { + if (!memcmp(&location, msg->points[i].pc, sizeof(void *))) { + msg->points[i].count++; + goto out; + } + } + + /* + * We need to create a new entry + */ + __nla_reserve_nohdr(data->skb, sizeof(struct net_dm_drop_point)); + memcpy(msg->points[msg->entries].pc, &location, sizeof(void *)); + msg->points[msg->entries].count = 1; + msg->entries++; + + if (!timer_pending(&data->send_timer)) { + data->send_timer.expires = jiffies + dm_delay * HZ; + add_timer_on(&data->send_timer, smp_processor_id()); + } + +out: + return; +} + +static int set_all_monitor_traces(int state) +{ + int rc = 0; + + switch (state) { + case TRACE_ON: + rc |= register_trace_kfree_skb(trace_kfree_skb_hit); + break; + case TRACE_OFF: + rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit); + + tracepoint_synchronize_unregister(); + break; + default: + rc = 1; + break; + } + + if (rc) + return -EINPROGRESS; + return rc; +} + + +static int net_dm_cmd_config(struct sk_buff *skb, + struct genl_info *info) +{ + return -ENOTSUPP; +} + +static int net_dm_cmd_trace(struct sk_buff *skb, + struct genl_info *info) +{ + switch (info->genlhdr->cmd) { + case NET_DM_CMD_START: + return set_all_monitor_traces(TRACE_ON); + break; + case NET_DM_CMD_STOP: + return set_all_monitor_traces(TRACE_OFF); + break; + } + + return -ENOTSUPP; +} + + +static struct genl_ops dropmon_ops[] = { + { + .cmd = NET_DM_CMD_CONFIG, + .doit = net_dm_cmd_config, + }, + { + .cmd = NET_DM_CMD_START, + .doit = net_dm_cmd_trace, + }, + { + .cmd = NET_DM_CMD_STOP, + .doit = net_dm_cmd_trace, + }, +}; + +static int __init init_net_drop_monitor(void) +{ + int cpu; + int rc, i, ret; + struct per_cpu_dm_data *data; + printk(KERN_INFO "Initalizing network drop monitor service\n"); + + if (sizeof(void *) > 8) { + printk(KERN_ERR "Unable to store program counters on this arch, Drop monitor failed\n"); + return -ENOSPC; + } + + if (genl_register_family(&net_drop_monitor_family) < 0) { + printk(KERN_ERR "Could not create drop monitor netlink family\n"); + return -EFAULT; + } + + rc = -EFAULT; + + for (i = 0; i < ARRAY_SIZE(dropmon_ops); i++) { + ret = genl_register_ops(&net_drop_monitor_family, + &dropmon_ops[i]); + if (ret) { + printk(KERN_CRIT "failed to register operation %d\n", + dropmon_ops[i].cmd); + goto out_unreg; + } + } + + rc = 0; + + for_each_present_cpu(cpu) { + data = &per_cpu(dm_cpu_data, cpu); + reset_per_cpu_data(data); + INIT_WORK(&data->dm_alert_work, send_dm_alert); + init_timer(&data->send_timer); + data->send_timer.data = cpu; + data->send_timer.function = sched_send_work; + } + goto out; + +out_unreg: + genl_unregister_family(&net_drop_monitor_family); +out: + return rc; +} + +late_initcall(init_net_drop_monitor); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 947710a36ce..244ca56dffa 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -209,34 +209,62 @@ static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr) return 0; } -static int ethtool_set_rxhash(struct net_device *dev, void __user *useraddr) +static int ethtool_set_rxnfc(struct net_device *dev, void __user *useraddr) { struct ethtool_rxnfc cmd; - if (!dev->ethtool_ops->set_rxhash) + if (!dev->ethtool_ops->set_rxnfc) return -EOPNOTSUPP; if (copy_from_user(&cmd, useraddr, sizeof(cmd))) return -EFAULT; - return dev->ethtool_ops->set_rxhash(dev, &cmd); + return dev->ethtool_ops->set_rxnfc(dev, &cmd); } -static int ethtool_get_rxhash(struct net_device *dev, void __user *useraddr) +static int ethtool_get_rxnfc(struct net_device *dev, void __user *useraddr) { struct ethtool_rxnfc info; + const struct ethtool_ops *ops = dev->ethtool_ops; + int ret; + void *rule_buf = NULL; - if (!dev->ethtool_ops->get_rxhash) + if (!ops->get_rxnfc) return -EOPNOTSUPP; if (copy_from_user(&info, useraddr, sizeof(info))) return -EFAULT; - dev->ethtool_ops->get_rxhash(dev, &info); + if (info.cmd == ETHTOOL_GRXCLSRLALL) { + if (info.rule_cnt > 0) { + rule_buf = kmalloc(info.rule_cnt * sizeof(u32), + GFP_USER); + if (!rule_buf) + return -ENOMEM; + } + } + ret = ops->get_rxnfc(dev, &info, rule_buf); + if (ret < 0) + goto err_out; + + ret = -EFAULT; if (copy_to_user(useraddr, &info, sizeof(info))) - return -EFAULT; - return 0; + goto err_out; + + if (rule_buf) { + useraddr += offsetof(struct ethtool_rxnfc, rule_locs); + if (copy_to_user(useraddr, rule_buf, + info.rule_cnt * sizeof(u32))) + goto err_out; + } + ret = 0; + +err_out: + if (rule_buf) + kfree(rule_buf); + + return ret; } static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) @@ -901,6 +929,10 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GFLAGS: case ETHTOOL_GPFLAGS: case ETHTOOL_GRXFH: + case ETHTOOL_GRXRINGS: + case ETHTOOL_GRXCLSRLCNT: + case ETHTOOL_GRXCLSRULE: + case ETHTOOL_GRXCLSRLALL: break; default: if (!capable(CAP_NET_ADMIN)) @@ -1052,10 +1084,16 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) dev->ethtool_ops->set_priv_flags); break; case ETHTOOL_GRXFH: - rc = ethtool_get_rxhash(dev, useraddr); + case ETHTOOL_GRXRINGS: + case ETHTOOL_GRXCLSRLCNT: + case ETHTOOL_GRXCLSRULE: + case ETHTOOL_GRXCLSRLALL: + rc = ethtool_get_rxnfc(dev, useraddr); break; case ETHTOOL_SRXFH: - rc = ethtool_set_rxhash(dev, useraddr); + case ETHTOOL_SRXCLSRLDEL: + case ETHTOOL_SRXCLSRLINS: + rc = ethtool_set_rxnfc(dev, useraddr); break; case ETHTOOL_GGRO: rc = ethtool_get_gro(dev, useraddr); diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 32b3a0152d7..98691e1466b 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -588,7 +588,8 @@ static void notify_rule_change(int event, struct fib_rule *rule, goto errout; } - err = rtnl_notify(skb, net, pid, ops->nlgroup, nlh, GFP_KERNEL); + rtnl_notify(skb, net, pid, ops->nlgroup, nlh, GFP_KERNEL); + return; errout: if (err < 0) rtnl_set_sk_err(net, ops->nlgroup, err); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 278a142d104..a1cbce7fdae 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -871,8 +871,7 @@ static void neigh_timer_handler(unsigned long arg) write_unlock(&neigh->lock); neigh->ops->solicit(neigh, skb); atomic_inc(&neigh->probes); - if (skb) - kfree_skb(skb); + kfree_skb(skb); } else { out: write_unlock(&neigh->lock); @@ -908,8 +907,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) neigh->updated = jiffies; write_unlock_bh(&neigh->lock); - if (skb) - kfree_skb(skb); + kfree_skb(skb); return 1; } } else if (neigh->nud_state & NUD_STALE) { @@ -1656,7 +1654,11 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) flags &= ~NEIGH_UPDATE_F_OVERRIDE; } - err = neigh_update(neigh, lladdr, ndm->ndm_state, flags); + if (ndm->ndm_flags & NTF_USE) { + neigh_event_send(neigh, NULL); + err = 0; + } else + err = neigh_update(neigh, lladdr, ndm->ndm_state, flags); neigh_release(neigh); goto out_dev_put; } @@ -2534,7 +2536,8 @@ static void __neigh_notify(struct neighbour *n, int type, int flags) kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); + rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); + return; errout: if (err < 0) rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 484f58750eb..2da59a0ac4a 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -498,7 +498,7 @@ int netdev_register_kobject(struct net_device *net) dev->groups = groups; BUILD_BUG_ON(BUS_ID_SIZE < IFNAMSIZ); - dev_set_name(dev, net->name); + dev_set_name(dev, "%s", net->name); #ifdef CONFIG_SYSFS *groups++ = &netstat_group; diff --git a/net/core/net-traces.c b/net/core/net-traces.c new file mode 100644 index 00000000000..c8fb45665e4 --- /dev/null +++ b/net/core/net-traces.c @@ -0,0 +1,29 @@ +/* + * consolidates trace point definitions + * + * Copyright (C) 2009 Neil Horman <nhorman@tuxdriver.com> + */ + +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/string.h> +#include <linux/if_arp.h> +#include <linux/inetdevice.h> +#include <linux/inet.h> +#include <linux/interrupt.h> +#include <linux/netpoll.h> +#include <linux/sched.h> +#include <linux/delay.h> +#include <linux/rcupdate.h> +#include <linux/types.h> +#include <linux/workqueue.h> +#include <linux/netlink.h> +#include <linux/net_dropmon.h> +#include <trace/skb.h> + +#include <asm/unaligned.h> +#include <asm/bitops.h> + + +DEFINE_TRACE(kfree_skb); +EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 65498483325..32d419f5ac9 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3275,8 +3275,7 @@ static void pktgen_stop(struct pktgen_thread *t) list_for_each_entry(pkt_dev, &t->if_list, list) { pktgen_stop_device(pkt_dev); - if (pkt_dev->skb) - kfree_skb(pkt_dev->skb); + kfree_skb(pkt_dev->skb); pkt_dev->skb = NULL; } @@ -3303,8 +3302,7 @@ static void pktgen_rem_one_if(struct pktgen_thread *t) if (!cur->removal_mark) continue; - if (cur->skb) - kfree_skb(cur->skb); + kfree_skb(cur->skb); cur->skb = NULL; pktgen_remove_device(t, cur); @@ -3328,8 +3326,7 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t) list_for_each_safe(q, n, &t->if_list) { cur = list_entry(q, struct pktgen_dev, list); - if (cur->skb) - kfree_skb(cur->skb); + kfree_skb(cur->skb); cur->skb = NULL; pktgen_remove_device(t, cur); @@ -3393,8 +3390,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) if (!netif_running(odev)) { pktgen_stop_device(pkt |