diff options
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/Makefile | 3 | ||||
-rw-r--r-- | net/core/datagram.c | 2 | ||||
-rw-r--r-- | net/core/dev.c | 325 | ||||
-rw-r--r-- | net/core/drop_monitor.c | 263 | ||||
-rw-r--r-- | net/core/ethtool.c | 58 | ||||
-rw-r--r-- | net/core/fib_rules.c | 3 | ||||
-rw-r--r-- | net/core/neighbour.c | 15 | ||||
-rw-r--r-- | net/core/net-sysfs.c | 6 | ||||
-rw-r--r-- | net/core/net-traces.c | 29 | ||||
-rw-r--r-- | net/core/net_namespace.c | 3 | ||||
-rw-r--r-- | net/core/pktgen.c | 18 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 9 | ||||
-rw-r--r-- | net/core/skbuff.c | 235 | ||||
-rw-r--r-- | net/core/sock.c | 95 | ||||
-rw-r--r-- | net/core/sysctl_net_core.c | 1 | ||||
-rw-r--r-- | net/core/utils.c | 1 |
16 files changed, 808 insertions, 258 deletions
diff --git a/net/core/Makefile b/net/core/Makefile index 26a37cb3192..796f46eece5 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -17,3 +17,6 @@ obj-$(CONFIG_NET_PKTGEN) += pktgen.o obj-$(CONFIG_NETPOLL) += netpoll.o obj-$(CONFIG_NET_DMA) += user_dma.o obj-$(CONFIG_FIB_RULES) += fib_rules.o +obj-$(CONFIG_TRACEPOINTS) += net-traces.o +obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o + diff --git a/net/core/datagram.c b/net/core/datagram.c index 5e2ac0c4b07..d0de644b378 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -208,7 +208,7 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, void skb_free_datagram(struct sock *sk, struct sk_buff *skb) { - kfree_skb(skb); + consume_skb(skb); sk_mem_reclaim_partial(sk); } diff --git a/net/core/dev.c b/net/core/dev.c index 3d3670640c2..52fea5b28ca 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1670,8 +1670,8 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq) { const struct net_device_ops *ops = dev->netdev_ops; + int rc; - prefetch(&dev->netdev_ops->ndo_start_xmit); if (likely(!skb->next)) { if (!list_empty(&ptype_all)) dev_queue_xmit_nit(skb, dev); @@ -1683,13 +1683,27 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, goto gso; } - return ops->ndo_start_xmit(skb, dev); + rc = ops->ndo_start_xmit(skb, dev); + /* + * TODO: if skb_orphan() was called by + * dev->hard_start_xmit() (for example, the unmodified + * igb driver does that; bnx2 doesn't), then + * skb_tx_software_timestamp() will be unable to send + * back the time stamp. + * + * How can this be prevented? Always create another + * reference to the socket before calling + * dev->hard_start_xmit()? Prevent that skb_orphan() + * does anything in dev->hard_start_xmit() by clearing + * the skb destructor before the call and restoring it + * afterwards, then doing the skb_orphan() ourselves? + */ + return rc; } gso: do { struct sk_buff *nskb = skb->next; - int rc; skb->next = nskb->next; nskb->next = NULL; @@ -1710,59 +1724,24 @@ out_kfree_skb: return 0; } -static u32 simple_tx_hashrnd; -static int simple_tx_hashrnd_initialized = 0; +static u32 skb_tx_hashrnd; -static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb) +u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb) { - u32 addr1, addr2, ports; - u32 hash, ihl; - u8 ip_proto = 0; - - if (unlikely(!simple_tx_hashrnd_initialized)) { - get_random_bytes(&simple_tx_hashrnd, 4); - simple_tx_hashrnd_initialized = 1; - } - - switch (skb->protocol) { - case htons(ETH_P_IP): - if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))) - ip_proto = ip_hdr(skb)->protocol; - addr1 = ip_hdr(skb)->saddr; - addr2 = ip_hdr(skb)->daddr; - ihl = ip_hdr(skb)->ihl; - break; - case htons(ETH_P_IPV6): - ip_proto = ipv6_hdr(skb)->nexthdr; - addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3]; - addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3]; - ihl = (40 >> 2); - break; - default: - return 0; - } - - - switch (ip_proto) { - case IPPROTO_TCP: - case IPPROTO_UDP: - case IPPROTO_DCCP: - case IPPROTO_ESP: - case IPPROTO_AH: - case IPPROTO_SCTP: - case IPPROTO_UDPLITE: - ports = *((u32 *) (skb_network_header(skb) + (ihl * 4))); - break; + u32 hash; - default: - ports = 0; - break; - } + if (skb_rx_queue_recorded(skb)) { + hash = skb_get_rx_queue(skb); + } else if (skb->sk && skb->sk->sk_hash) { + hash = skb->sk->sk_hash; + } else + hash = skb->protocol; - hash = jhash_3words(addr1, addr2, ports, simple_tx_hashrnd); + hash = jhash_1word(hash, skb_tx_hashrnd); return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); } +EXPORT_SYMBOL(skb_tx_hash); static struct netdev_queue *dev_pick_tx(struct net_device *dev, struct sk_buff *skb) @@ -1773,7 +1752,7 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev, if (ops->ndo_select_queue) queue_index = ops->ndo_select_queue(dev, skb); else if (dev->real_num_tx_queues > 1) - queue_index = simple_tx_hash(dev, skb); + queue_index = skb_tx_hash(dev, skb); skb_set_queue_mapping(skb, queue_index); return netdev_get_tx_queue(dev, queue_index); @@ -2269,12 +2248,6 @@ int netif_receive_skb(struct sk_buff *skb) rcu_read_lock(); - /* Don't receive packets in an exiting network namespace */ - if (!net_alive(dev_net(skb->dev))) { - kfree_skb(skb); - goto out; - } - #ifdef CONFIG_NET_CLS_ACT if (skb->tc_verd & TC_NCLS) { skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); @@ -2305,6 +2278,8 @@ ncls: if (!skb) goto out; + skb_orphan(skb); + type = skb->protocol; list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { @@ -2374,7 +2349,6 @@ static int napi_gro_complete(struct sk_buff *skb) out: skb_shinfo(skb)->gso_size = 0; - __skb_push(skb, -skb_network_offset(skb)); return netif_receive_skb(skb); } @@ -2388,20 +2362,40 @@ void napi_gro_flush(struct napi_struct *napi) napi_gro_complete(skb); } + napi->gro_count = 0; napi->gro_list = NULL; } EXPORT_SYMBOL(napi_gro_flush); +void *skb_gro_header(struct sk_buff *skb, unsigned int hlen) +{ + unsigned int offset = skb_gro_offset(skb); + + hlen += offset; + if (hlen <= skb_headlen(skb)) + return skb->data + offset; + + if (unlikely(!skb_shinfo(skb)->nr_frags || + skb_shinfo(skb)->frags[0].size <= + hlen - skb_headlen(skb) || + PageHighMem(skb_shinfo(skb)->frags[0].page))) + return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL; + + return page_address(skb_shinfo(skb)->frags[0].page) + + skb_shinfo(skb)->frags[0].page_offset + + offset - skb_headlen(skb); +} +EXPORT_SYMBOL(skb_gro_header); + int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { struct sk_buff **pp = NULL; struct packet_type *ptype; __be16 type = skb->protocol; struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; - int count = 0; int same_flow; int mac_len; - int free; + int ret; if (!(skb->dev->features & NETIF_F_GRO)) goto normal; @@ -2411,30 +2405,16 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { - struct sk_buff *p; - if (ptype->type != type || ptype->dev || !ptype->gro_receive) continue; - skb_reset_network_header(skb); + skb_set_network_header(skb, skb_gro_offset(skb)); mac_len = skb->network_header - skb->mac_header; skb->mac_len = mac_len; NAPI_GRO_CB(skb)->same_flow = 0; NAPI_GRO_CB(skb)->flush = 0; NAPI_GRO_CB(skb)->free = 0; - for (p = napi->gro_list; p; p = p->next) { - count++; - - if (!NAPI_GRO_CB(p)->same_flow) - continue; - - if (p->mac_len != mac_len || - memcmp(skb_mac_header(p), skb_mac_header(skb), - mac_len)) - NAPI_GRO_CB(p)->same_flow = 0; - } - pp = ptype->gro_receive(&napi->gro_list, skb); break; } @@ -2444,7 +2424,7 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) goto normal; same_flow = NAPI_GRO_CB(skb)->same_flow; - free = NAPI_GRO_CB(skb)->free; + ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED; if (pp) { struct sk_buff *nskb = *pp; @@ -2452,27 +2432,35 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) *pp = nskb->next; nskb->next = NULL; napi_gro_complete(nskb); - count--; + napi->gro_count--; } if (same_flow) goto ok; - if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS) { - __skb_push(skb, -skb_network_offset(skb)); + if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS) goto normal; - } + napi->gro_count++; NAPI_GRO_CB(skb)->count = 1; - skb_shinfo(skb)->gso_size = skb->len; + skb_shinfo(skb)->gso_size = skb_gro_len(skb); skb->next = napi->gro_list; napi->gro_list = skb; + ret = GRO_HELD; + +pull: + if (unlikely(!pskb_may_pull(skb, skb_gro_offset(skb)))) { + if (napi->gro_list == skb) + napi->gro_list = skb->next; + ret = GRO_DROP; + } ok: - return free; + return ret; normal: - return -1; + ret = GRO_NORMAL; + goto pull; } EXPORT_SYMBOL(dev_gro_receive); @@ -2480,29 +2468,44 @@ static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { struct sk_buff *p; + if (netpoll_rx_on(skb)) + return GRO_NORMAL; + for (p = napi->gro_list; p; p = p->next) { - NAPI_GRO_CB(p)->same_flow = 1; + NAPI_GRO_CB(p)->same_flow = !compare_ether_header( + skb_mac_header(p), skb_gro_mac_header(skb)); NAPI_GRO_CB(p)->flush = 0; } return dev_gro_receive(napi, skb); } -int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) +int napi_skb_finish(int ret, struct sk_buff *skb) { - if (netpoll_receive_skb(skb)) - return NET_RX_DROP; + int err = NET_RX_SUCCESS; - switch (__napi_gro_receive(napi, skb)) { - case -1: + switch (ret) { + case GRO_NORMAL: return netif_receive_skb(skb); - case 1: + case GRO_DROP: + err = NET_RX_DROP; + /* fall through */ + + case GRO_MERGED_FREE: kfree_skb(skb); break; } - return NET_RX_SUCCESS; + return err; +} +EXPORT_SYMBOL(napi_skb_finish); + +int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) +{ + skb_gro_reset_offset(skb); + + return napi_skb_finish(__napi_gro_receive(napi, skb), skb); } EXPORT_SYMBOL(napi_gro_receive); @@ -2520,6 +2523,9 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi, { struct net_device *dev = napi->dev; struct sk_buff *skb = napi->skb; + struct ethhdr *eth; + skb_frag_t *frag; + int i; napi->skb = NULL; @@ -2532,20 +2538,36 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi, } BUG_ON(info->nr_frags > MAX_SKB_FRAGS); + frag = &info->frags[info->nr_frags - 1]; + + for (i = skb_shinfo(skb)->nr_frags; i < info->nr_frags; i++) { + skb_fill_page_desc(skb, i, frag->page, frag->page_offset, + frag->size); + frag++; + } skb_shinfo(skb)->nr_frags = info->nr_frags; - memcpy(skb_shinfo(skb)->frags, info->frags, sizeof(info->frags)); skb->data_len = info->len; skb->len += info->len; skb->truesize += info->len; - if (!pskb_may_pull(skb, ETH_HLEN)) { + skb_reset_mac_header(skb); + skb_gro_reset_offset(skb); + + eth = skb_gro_header(skb, sizeof(*eth)); + if (!eth) { napi_reuse_skb(napi, skb); skb = NULL; goto out; } - skb->protocol = eth_type_trans(skb, dev); + skb_gro_pull(skb, sizeof(*eth)); + + /* + * This works because the only protocols we care about don't require + * special handling. We'll fix it up properly at the end. + */ + skb->protocol = eth->h_proto; skb->ip_summed = info->ip_summed; skb->csum = info->csum; @@ -2555,32 +2577,43 @@ out: } EXPORT_SYMBOL(napi_fraginfo_skb); -int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info) +int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret) { - struct sk_buff *skb = napi_fraginfo_skb(napi, info); - int err = NET_RX_DROP; + int err = NET_RX_SUCCESS; - if (!skb) - goto out; + switch (ret) { + case GRO_NORMAL: + case GRO_HELD: + skb->protocol = eth_type_trans(skb, napi->dev); - if (netpoll_receive_skb(skb)) - goto out; + if (ret == GRO_NORMAL) + return netif_receive_skb(skb); - err = NET_RX_SUCCESS; + skb_gro_pull(skb, -ETH_HLEN); + break; - switch (__napi_gro_receive(napi, skb)) { - case -1: - return netif_receive_skb(skb); + case GRO_DROP: + err = NET_RX_DROP; + /* fall through */ - case 0: - goto out; + case GRO_MERGED_FREE: + napi_reuse_skb(napi, skb); + break; } - napi_reuse_skb(napi, skb); - -out: return err; } +EXPORT_SYMBOL(napi_frags_finish); + +int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info) +{ + struct sk_buff *skb = napi_fraginfo_skb(napi, info); + + if (!skb) + return NET_RX_DROP; + + return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb)); +} EXPORT_SYMBOL(napi_gro_frags); static int process_backlog(struct napi_struct *napi, int quota) @@ -2602,11 +2635,9 @@ static int process_backlog(struct napi_struct *napi, int quota) } local_irq_enable(); - napi_gro_receive(napi, skb); + netif_receive_skb(skb); } while (++work < quota && jiffies == start_time); - napi_gro_flush(napi); - return work; } @@ -2660,6 +2691,7 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), int weight) { INIT_LIST_HEAD(&napi->poll_list); + napi->gro_count = 0; napi->gro_list = NULL; napi->skb = NULL; napi->poll = poll; @@ -2679,7 +2711,7 @@ void netif_napi_del(struct napi_struct *napi) struct sk_buff *skb, *next; list_del_init(&napi->dev_list); - kfree(napi->skb); + kfree_skb(napi->skb); for (skb = napi->gro_list; skb; skb = next) { next = skb->next; @@ -2688,6 +2720,7 @@ void netif_napi_del(struct napi_struct *napi) } napi->gro_list = NULL; + napi->gro_count = 0; } EXPORT_SYMBOL(netif_napi_del); @@ -3956,6 +3989,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) cmd == SIOCSMIIREG || cmd == SIOCBRADDIF || cmd == SIOCBRDELIF || + cmd == SIOCSHWTSTAMP || cmd == SIOCWANDEV) { err = -EOPNOTSUPP; if (ops->ndo_do_ioctl) { @@ -4110,6 +4144,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) case SIOCBONDCHANGEACTIVE: case SIOCBRADDIF: case SIOCBRDELIF: + case SIOCSHWTSTAMP: if (!capable(CAP_NET_ADMIN)) return -EPERM; /* fall through */ @@ -4290,6 +4325,39 @@ unsigned long netdev_fix_features(unsigned long features, const char *name) } EXPORT_SYMBOL(netdev_fix_features); +/* Some devices need to (re-)set their netdev_ops inside + * ->init() or similar. If that happens, we have to setup + * the compat pointers again. + */ +void netdev_resync_ops(struct net_device *dev) +{ +#ifdef CONFIG_COMPAT_NET_DEV_OPS + const struct net_device_ops *ops = dev->netdev_ops; + + dev->init = ops->ndo_init; + dev->uninit = ops->ndo_uninit; + dev->open = ops->ndo_open; + dev->change_rx_flags = ops->ndo_change_rx_flags; + dev->set_rx_mode = ops->ndo_set_rx_mode; + dev->set_multicast_list = ops->ndo_set_multicast_list; + dev->set_mac_address = ops->ndo_set_mac_address; + dev->validate_addr = ops->ndo_validate_addr; + dev->do_ioctl = ops->ndo_do_ioctl; + dev->set_config = ops->ndo_set_config; + dev->change_mtu = ops->ndo_change_mtu; + dev->neigh_setup = ops->ndo_neigh_setup; + dev->tx_timeout = ops->ndo_tx_timeout; + dev->get_stats = ops->ndo_get_stats; + dev->vlan_rx_register = ops->ndo_vlan_rx_register; + dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid; + dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid; +#ifdef CONFIG_NET_POLL_CONTROLLER + dev->poll_controller = ops->ndo_poll_controller; +#endif +#endif +} +EXPORT_SYMBOL(netdev_resync_ops); + /** * register_netdevice - register a network device * @dev: device to register @@ -4334,27 +4402,7 @@ int register_netdevice(struct net_device *dev) * This is temporary until all network devices are converted. */ if (dev->netdev_ops) { - const struct net_device_ops *ops = dev->netdev_ops; - - dev->init = ops->ndo_init; - dev->uninit = ops->ndo_uninit; - dev->open = ops->ndo_open; - dev->change_rx_flags = ops->ndo_change_rx_flags; - dev->set_rx_mode = ops->ndo_set_rx_mode; - dev->set_multicast_list = ops->ndo_set_multicast_list; - dev->set_mac_address = ops->ndo_set_mac_address; - dev->validate_addr = ops->ndo_validate_addr; - dev->do_ioctl = ops->ndo_do_ioctl; - dev->set_config = ops->ndo_set_config; - dev->change_mtu = ops->ndo_change_mtu; - dev->tx_timeout = ops->ndo_tx_timeout; - dev->get_stats = ops->ndo_get_stats; - dev->vlan_rx_register = ops->ndo_vlan_rx_register; - dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid; - dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid; -#ifdef CONFIG_NET_POLL_CONTROLLER - dev->poll_controller = ops->ndo_poll_controller; -#endif + netdev_resync_ops(dev); } else { char drivername[64]; pr_info("%s (%s): not using net_device_ops yet\n", @@ -5193,6 +5241,7 @@ static int __init net_dev_init(void) queue->backlog.poll = process_backlog; queue->backlog.weight = weight_p; queue->backlog.gro_list = NULL; + queue->backlog.gro_count = 0; } dev_boot_phase = 0; @@ -5225,6 +5274,14 @@ out: subsys_initcall(net_dev_init); +static int __init initialize_hashrnd(void) +{ + get_random_bytes(&skb_tx_hashrnd, sizeof(skb_tx_hashrnd)); + return 0; +} + +late_initcall_sync(initialize_hashrnd); + EXPORT_SYMBOL(__dev_get_by_index); EXPORT_SYMBOL(__dev_get_by_name); EXPORT_SYMBOL(__dev_remove_pack); diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c new file mode 100644 index 00000000000..9fd0dc3cca9 --- /dev/null +++ b/net/core/drop_monitor.c @@ -0,0 +1,263 @@ +/* + * Monitoring code for network dropped packet alerts + * + * Copyright (C) 2009 Neil Horman <nhorman@tuxdriver.com> + */ + +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/string.h> +#include <linux/if_arp.h> +#include <linux/inetdevice.h> +#include <linux/inet.h> +#include <linux/interrupt.h> +#include <linux/netpoll.h> +#include <linux/sched.h> +#include <linux/delay.h> +#include <linux/types.h> +#include <linux/workqueue.h> +#include <linux/netlink.h> +#include <linux/net_dropmon.h> +#include <linux/percpu.h> +#include <linux/timer.h> +#include <linux/bitops.h> +#include <net/genetlink.h> + +#include <trace/skb.h> + +#include <asm/unaligned.h> + +#define TRACE_ON 1 +#define TRACE_OFF 0 + +static void send_dm_alert(struct work_struct *unused); + + +/* + * Globals, our netlink socket pointer + * and the work handle that will send up + * netlink alerts + */ +struct sock *dm_sock; + +struct per_cpu_dm_data { + struct work_struct dm_alert_work; + struct sk_buff *skb; + atomic_t dm_hit_count; + struct timer_list send_timer; +}; + +static struct genl_family net_drop_monitor_family = { + .id = GENL_ID_GENERATE, + .hdrsize = 0, + .name = "NET_DM", + .version = 1, + .maxattr = NET_DM_CMD_MAX, +}; + +static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data); + +static int dm_hit_limit = 64; +static int dm_delay = 1; + + +static void reset_per_cpu_data(struct per_cpu_dm_data *data) +{ + size_t al; + struct net_dm_alert_msg *msg; + + al = sizeof(struct net_dm_alert_msg); + al += dm_hit_limit * sizeof(struct net_dm_drop_point); + data->skb = genlmsg_new(al, GFP_KERNEL); + genlmsg_put(data->skb, 0, 0, &net_drop_monitor_family, + 0, NET_DM_CMD_ALERT); + msg = __nla_reserve_nohdr(data->skb, sizeof(struct net_dm_alert_msg)); + memset(msg, 0, al); + atomic_set(&data->dm_hit_count, dm_hit_limit); +} + +static void send_dm_alert(struct work_struct *unused) +{ + struct sk_buff *skb; + struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); + + /* + * Grab the skb we're about to send + */ + skb = data->skb; + + /* + * Replace it with a new one + */ + reset_per_cpu_data(data); + + /* + * Ship it! + */ + genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL); + +} + +/* + * This is the timer function to delay the sending of an alert + * in the event that more drops will arrive during the + * hysteresis period. Note that it operates under the timer interrupt + * so we don't need to disable preemption here + */ +static void sched_send_work(unsigned long unused) +{ + struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); + + schedule_work(&data->dm_alert_work); +} + +static void trace_kfree_skb_hit(struct sk_buff *skb, void *location) +{ + struct net_dm_alert_msg *msg; + struct nlmsghdr *nlh; + int i; + struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); + + + if (!atomic_add_unless(&data->dm_hit_count, -1, 0)) { + /* + * we're already at zero, discard this hit + */ + goto out; + } + + nlh = (struct nlmsghdr *)data->skb->data; + msg = genlmsg_data(nlmsg_data(nlh)); + for (i = 0; i < msg->entries; i++) { + if (!memcmp(&location, msg->points[i].pc, sizeof(void *))) { + msg->points[i].count++; + goto out; + } + } + + /* + * We need to create a new entry + */ + __nla_reserve_nohdr(data->skb, sizeof(struct net_dm_drop_point)); + memcpy(msg->points[msg->entries].pc, &location, sizeof(void *)); + msg->points[msg->entries].count = 1; + msg->entries++; + + if (!timer_pending(&data->send_timer)) { + data->send_timer.expires = jiffies + dm_delay * HZ; + add_timer_on(&data->send_timer, smp_processor_id()); + } + +out: + return; +} + +static int set_all_monitor_traces(int state) +{ + int rc = 0; + + switch (state) { + case TRACE_ON: + rc |= register_trace_kfree_skb(trace_kfree_skb_hit); + break; + case TRACE_OFF: + rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit); + + tracepoint_synchronize_unregister(); + break; + default: + rc = 1; + break; + } + + if (rc) + return -EINPROGRESS; + return rc; +} + + +static int net_dm_cmd_config(struct sk_buff *skb, + struct genl_info *info) +{ + return -ENOTSUPP; +} + +static int net_dm_cmd_trace(struct sk_buff *skb, + struct genl_info *info) +{ + switch (info->genlhdr->cmd) { + case NET_DM_CMD_START: + return set_all_monitor_traces(TRACE_ON); + break; + case NET_DM_CMD_STOP: + return set_all_monitor_traces(TRACE_OFF); + break; + } + + return -ENOTSUPP; +} + + +static struct genl_ops dropmon_ops[] = { + { + .cmd = NET_DM_CMD_CONFIG, + .doit = net_dm_cmd_config, + }, + { + .cmd = NET_DM_CMD_START, + .doit = net_dm_cmd_trace, + }, + { + .cmd = NET_DM_CMD_STOP, + .doit = net_dm_cmd_trace, + }, +}; + +static int __init init_net_drop_monitor(void) +{ + int cpu; + int rc, i, ret; + struct per_cpu_dm_data *data; + printk(KERN_INFO "Initalizing network drop monitor service\n"); + + if (sizeof(void *) > 8) { + printk(KERN_ERR "Unable to store program counters on this arch, Drop monitor failed\n"); + return -ENOSPC; + } + + if (genl_register_family(&net_drop_monitor_family) < 0) { + printk(KERN_ERR "Could not create drop monitor netlink family\n"); + return -EFAULT; + } + + rc = -EFAULT; + + for (i = 0; i < ARRAY_SIZE(dropmon_ops); i++) { + ret = genl_register_ops(&net_drop_monitor_family, + &dropmon_ops[i]); + if (ret) { + printk(KERN_CRIT "failed to register operation %d\n", + dropmon_ops[i].cmd); + goto out_unreg; + } + } + + rc = 0; + + for_each_present_cpu(cpu) { + data = &per_cpu(dm_cpu_data, cpu); + reset_per_cpu_data(data); + INIT_WORK(&data->dm_alert_work, send_dm_alert); + init_timer(&data->send_timer); + data->send_timer.data = cpu; + data->send_timer.function = sched_send_work; + } + goto out; + +out_unreg: + genl_unregister_family(&net_drop_monitor_family); +out: + return rc; +} + +late_initcall(init_net_drop_monitor); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 947710a36ce..244ca56dffa 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -209,34 +209,62 @@ static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr) return 0; } -static int ethtool_set_rxhash(struct net_device *dev, void __user *useraddr) +static int ethtool_set_rxnfc(struct net_device *dev, void __user *useraddr) { struct ethtool_rxnfc cmd; - if (!dev->ethtool_ops->set_rxhash) + if (!dev->ethtool_ops->set_rxnfc) return -EOPNOTSUPP; if (copy_from_user(&cmd, useraddr, sizeof(cmd))) return -EFAULT; - return dev->ethtool_ops->set_rxhash(dev, &cmd); + return dev->ethtool_ops->set_rxnfc(dev, &cmd); } -static int ethtool_get_rxhash(struct net_device *dev, void __user *useraddr) +static int ethtool_get_rxnfc(struct net_device *dev, void __user *useraddr) { struct ethtool_rxnfc info; + const struct ethtool_ops *ops = dev->ethtool_ops; + int ret; + void *rule_buf = NULL; - if (!dev->ethtool_ops->get_rxhash) + if (!ops->get_rxnfc) return -EOPNOTSUPP; if (copy_from_user(&info, useraddr, sizeof(info))) return -EFAULT; - dev->ethtool_ops->get_rxhash(dev, &info); + if (info.cmd == ETHTOOL_GRXCLSRLALL) { + if (info.rule_cnt > 0) { + rule_buf = kmalloc(info.rule_cnt * sizeof(u32), + GFP_USER); + if (!rule_buf) + return -ENOMEM; + } + } + ret = ops->get_rxnfc(dev, &info, rule_buf); + if (ret < 0) + goto err_out; + + ret = -EFAULT; if (copy_to_user(useraddr, &info, sizeof(info))) - return -EFAULT; - return 0; + goto err_out; + + if (rule_buf) { + useraddr += offsetof(struct ethtool_rxnfc, rule_locs); + if (copy_to_user(useraddr, rule_buf, + info.rule_cnt * sizeof(u32))) + goto err_out; + } + ret = 0; + +err_out: + if (rule_buf) + kfree(rule_buf); + + return ret; } static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) @@ -901,6 +929,10 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GFLAGS: case ETHTOOL_GPFLAGS: case ETHTOOL_GRXFH: + case ETHTOOL_GRXRINGS: + case ETHTOOL_GRXCLSRLCNT: + case ETHTOOL_GRXCLSRULE: + case ETHTOOL_GRXCLSRLALL: break; default: if (!capable(CAP_NET_ADMIN)) @@ -1052,10 +1084,16 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) dev->ethtool_ops->set_priv_flags); break; case ETHTOOL_GRXFH: - rc = ethtool_get_rxhash(dev, useraddr); + case ETHTOOL_GRXRINGS: + case ETHTOOL_GRXCLSRLCNT: + case ETHTOOL_GRXCLSRULE: + case ETHTOOL_GRXCLSRLALL: + rc = ethtool_get_rxnfc(dev, useraddr); break; case ETHTOOL_SRXFH: - rc = ethtool_set_rxhash(dev, useraddr); + case ETHTOOL_SRXCLSRLDEL: + case ETHTOOL_SRXCLSRLINS: + rc = ethtool_set_rxnfc(dev, useraddr); break; case ETHTOOL_GGRO: rc = ethtool_get_gro(dev, useraddr); diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 32b3a0152d7..98691e1466b 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -588,7 +588,8 @@ static void notify_rule_change(int event, struct fib_rule *rule, goto errout; } - err = rtnl_notify(skb, net, pid, ops->nlgroup, nlh, GFP_KERNEL); + rtnl_notify(skb, net, pid, ops->nlgroup, nlh, GFP_KERNEL); + return; errout: if (err < 0) rtnl_set_sk_err(net, ops->nlgroup, err); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 278a142d104..a1cbce7fdae 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -871,8 +871,7 @@ static void neigh_timer_handler(unsigned long arg) write_unlock(&neigh->lock); neigh->ops->solicit(neigh, skb); atomic_inc(&neigh->probes); - if (skb) - kfree_skb(skb); + kfree_skb(skb); } else { out: write_unlock(&neigh->lock); @@ -908,8 +907,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) neigh->updated = jiffies; write_unlock_bh(&neigh->lock); - if (skb) - kfree_skb(skb); + kfree_skb(skb); return 1; } } else if (neigh->nud_state & NUD_STALE) { @@ -1656,7 +1654,11 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) flags &= ~NEIGH_UPDATE_F_OVERRIDE; } - err = neigh_update(neigh, lladdr, ndm->ndm_state, flags); + if (ndm->ndm_flags & NTF_USE) { + neigh_event_send(neigh, NULL); + err = 0; + } else + err = neigh_update(neigh, lladdr, ndm->ndm_state, flags); neigh_release(neigh); goto out_dev_put; } @@ -2534,7 +2536,8 @@ static void __neigh_notify(struct neighbour *n, int type, int flags) kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); + rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); + return; errout: if (err < 0) rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 6ac29a46e23..2da59a0ac4a 100644 --- a/net/core/net-sysfs.c +++ b/ |