From 122bdf67f15a22bcabf6c2cab3a545d17ccf68dc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 14 Mar 2012 21:13:11 +0000 Subject: ipv6: fix icmp6_dst_alloc() commit 87a115783 ( ipv6: Move xfrm_lookup() call down into icmp6_dst_alloc().) forgot to convert one error path, leading to crashes in mld_sendpack() Many thanks to Dave Jones for providing a very complete bug report. Reported-by: Dave Jones Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 8c2e3ab58f2..22b766407de 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1077,7 +1077,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct net *net = dev_net(dev); if (unlikely(!idev)) - return NULL; + return ERR_PTR(-ENODEV); rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0); if (unlikely(!rt)) { -- cgit v1.2.3-18-g5258 From cc34eb672eedb5ff248ac3bf9971a76f141fd141 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 13 Mar 2012 18:04:25 +0000 Subject: sch_sfq: revert dont put new flow at the end of flows This reverts commit d47a0ac7b6 (sch_sfq: dont put new flow at the end of flows) As Jesper found out, patch sounded great but has bad side effects. In stress situation, pushing new flows in front of the queue can prevent old flows doing any progress. Packets can stay in SFQ queue for unlimited amount of time. It's possible to add heuristics to limit this problem, but this would add complexity outside of SFQ scope. A more sensible answer to Dave Taht concerns (who reported the issued I tried to solve in original commit) is probably to use a qdisc hierarchy so that high prio packets dont enter a potentially crowded SFQ qdisc. Reported-by: Jesper Dangaard Brouer Cc: Dave Taht Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_sfq.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 60d47180f04..02a21abea65 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -469,11 +469,15 @@ enqueue: if (slot->qlen == 1) { /* The flow is new */ if (q->tail == NULL) { /* It is the first flow */ slot->next = x; - q->tail = slot; } else { slot->next = q->tail->next; q->tail->next = x; } + /* We put this flow at the end of our flow list. + * This might sound unfair for a new flow to wait after old ones, + * but we could endup servicing new flows only, and freeze old ones. + */ + q->tail = slot; /* We could use a bigger initial quantum for new flows */ slot->allot = q->scaled_quantum; } -- cgit v1.2.3-18-g5258 From 127d0a198a310970b31866af8bbb6d4b1068e546 Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Thu, 15 Mar 2012 14:08:28 +0000 Subject: bnx2x: fix a crash on corrupt firmware file If the requested firmware is deemed corrupt and then released, reset the pointer to NULL in order to avoid double-freeing it in bnx2x_release_firmware() or dereferencing it in bnx2x_init_firmware(). Signed-off-by: Michal Schmidt Acked-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 25452131915..00ff62f9285 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -10901,6 +10901,7 @@ init_ops_alloc_err: kfree(bp->init_data); request_firmware_exit: release_firmware(bp->firmware); + bp->firmware = NULL; return rc; } -- cgit v1.2.3-18-g5258 From c0ea452e422a1fc78ec8c639df64012d0b8dbb4a Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Thu, 15 Mar 2012 14:08:29 +0000 Subject: bnx2x: fix memory leak in bnx2x_init_firmware() When cycling the interface down and up, bnx2x_init_firmware() knows that the firmware is already loaded, but nevertheless it allocates certain arrays anew (init_data, init_ops, init_ops_offsets, iro_arr). The old arrays are leaked. Fix the leaks by returning early if the firmware was already loaded. Because if the firmware is loaded, so are the arrays. Signed-off-by: Michal Schmidt Acked-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 50 ++++++++++++------------ 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 00ff62f9285..b69f8762b33 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -10824,38 +10824,36 @@ do { \ int bnx2x_init_firmware(struct bnx2x *bp) { + const char *fw_file_name; struct bnx2x_fw_file_hdr *fw_hdr; int rc; + if (bp->firmware) + return 0; - if (!bp->firmware) { - const char *fw_file_name; - - if (CHIP_IS_E1(bp)) - fw_file_name = FW_FILE_NAME_E1; - else if (CHIP_IS_E1H(bp)) - fw_file_name = FW_FILE_NAME_E1H; - else if (!CHIP_IS_E1x(bp)) - fw_file_name = FW_FILE_NAME_E2; - else { - BNX2X_ERR("Unsupported chip revision\n"); - return -EINVAL; - } - BNX2X_DEV_INFO("Loading %s\n", fw_file_name); + if (CHIP_IS_E1(bp)) + fw_file_name = FW_FILE_NAME_E1; + else if (CHIP_IS_E1H(bp)) + fw_file_name = FW_FILE_NAME_E1H; + else if (!CHIP_IS_E1x(bp)) + fw_file_name = FW_FILE_NAME_E2; + else { + BNX2X_ERR("Unsupported chip revision\n"); + return -EINVAL; + } + BNX2X_DEV_INFO("Loading %s\n", fw_file_name); - rc = request_firmware(&bp->firmware, fw_file_name, - &bp->pdev->dev); - if (rc) { - BNX2X_ERR("Can't load firmware file %s\n", - fw_file_name); - goto request_firmware_exit; - } + rc = request_firmware(&bp->firmware, fw_file_name, &bp->pdev->dev); + if (rc) { + BNX2X_ERR("Can't load firmware file %s\n", + fw_file_name); + goto request_firmware_exit; + } - rc = bnx2x_check_firmware(bp); - if (rc) { - BNX2X_ERR("Corrupt firmware file %s\n", fw_file_name); - goto request_firmware_exit; - } + rc = bnx2x_check_firmware(bp); + if (rc) { + BNX2X_ERR("Corrupt firmware file %s\n", fw_file_name); + goto request_firmware_exit; } fw_hdr = (struct bnx2x_fw_file_hdr *)bp->firmware->data; -- cgit v1.2.3-18-g5258 From 7bdd402706cf26bfef9050dfee3f229b7f33ee4f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 14 Mar 2012 06:56:25 +0000 Subject: net/usbnet: reserve headroom on rx skbs network drivers should reserve some headroom on incoming skbs so that we dont need expensive reallocations, eg forwarding packets in tunnels. This NET_SKB_PAD padding is done in various helpers, like __netdev_alloc_skb_ip_align() in this patch, combining NET_SKB_PAD and NET_IP_ALIGN magic. Signed-off-by: Eric Dumazet Cc: Oliver Neukum Cc: Greg Kroah-Hartman Acked-by: Oliver Neukum Signed-off-by: David S. Miller --- drivers/net/usb/usbnet.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 81b96e30375..59681f01a54 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -328,13 +328,13 @@ static int rx_submit (struct usbnet *dev, struct urb *urb, gfp_t flags) unsigned long lockflags; size_t size = dev->rx_urb_size; - if ((skb = alloc_skb (size + NET_IP_ALIGN, flags)) == NULL) { + skb = __netdev_alloc_skb_ip_align(dev->net, size, flags); + if (!skb) { netif_dbg(dev, rx_err, dev->net, "no rx skb\n"); usbnet_defer_kevent (dev, EVENT_RX_MEMORY); usb_free_urb (urb); return -ENOMEM; } - skb_reserve (skb, NET_IP_ALIGN); entry = (struct skb_data *) skb->cb; entry->urb = urb; -- cgit v1.2.3-18-g5258 From bb6d5e76fb4fba9aa36726db41404512f3286c0f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 14 Mar 2012 08:53:34 +0000 Subject: net/hyperv: fix erroneous NETDEV_TX_BUSY use A driver start_xmit() method cannot free skb and return NETDEV_TX_BUSY, since caller is going to reuse freed skb. This is mostly a revert of commit bf769375c (staging: hv: fix the return status of netvsc_start_xmit()) In fact netif_tx_stop_queue() / netif_stop_queue() is needed before returning NETDEV_TX_BUSY or you can trigger a ksoftirqd fatal loop. In case of memory allocation error, only safe way is to drop the packet and return NETDEV_TX_OK Signed-off-by: Eric Dumazet Cc: "K. Y. Srinivasan" Cc: Haiyang Zhang Cc: Greg Kroah-Hartman Reviewed-by: Haiyang Zhang Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index bf01841bda5..610860f2896 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -166,7 +166,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) dev_kfree_skb(skb); net->stats.tx_dropped++; - return NETDEV_TX_BUSY; + return NETDEV_TX_OK; } packet->extension = (void *)(unsigned long)packet + @@ -226,7 +226,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) dev_kfree_skb_any(skb); } - return ret ? NETDEV_TX_BUSY : NETDEV_TX_OK; + return NETDEV_TX_OK; } /* -- cgit v1.2.3-18-g5258 From b8fbaef586176f6abe0eb7887ddae66e99898b79 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 14 Mar 2012 09:21:44 +0000 Subject: wimax/i2400m: fix erroneous NETDEV_TX_BUSY use A driver start_xmit() method cannot free skb and return NETDEV_TX_BUSY, since caller is going to reuse freed skb. In fact netif_tx_stop_queue() / netif_stop_queue() is needed before returning NETDEV_TX_BUSY or you can trigger a ksoftirqd fatal loop. In case of memory allocation error, only safe way is to drop the packet and return NETDEV_TX_OK Also increments tx_dropped counter Signed-off-by: Eric Dumazet Cc: Inaky Perez-Gonzalez Signed-off-by: David S. Miller --- drivers/net/wimax/i2400m/netdev.c | 30 ++++++++++-------------------- 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/drivers/net/wimax/i2400m/netdev.c b/drivers/net/wimax/i2400m/netdev.c index 64a110604ad..63e4b709efa 100644 --- a/drivers/net/wimax/i2400m/netdev.c +++ b/drivers/net/wimax/i2400m/netdev.c @@ -367,38 +367,28 @@ netdev_tx_t i2400m_hard_start_xmit(struct sk_buff *skb, { struct i2400m *i2400m = net_dev_to_i2400m(net_dev); struct device *dev = i2400m_dev(i2400m); - int result; + int result = -1; d_fnstart(3, dev, "(skb %p net_dev %p)\n", skb, net_dev); - if (skb_header_cloned(skb)) { - /* - * Make tcpdump/wireshark happy -- if they are - * running, the skb is cloned and we will overwrite - * the mac fields in i2400m_tx_prep_header. Expand - * seems to fix this... - */ - result = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); - if (result) { - result = NETDEV_TX_BUSY; - goto error_expand; - } - } + + if (skb_header_cloned(skb) && + pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + goto drop; if (i2400m->state == I2400M_SS_IDLE) result = i2400m_net_wake_tx(i2400m, net_dev, skb); else result = i2400m_net_tx(i2400m, net_dev, skb); - if (result < 0) + if (result < 0) { +drop: net_dev->stats.tx_dropped++; - else { + } else { net_dev->stats.tx_packets++; net_dev->stats.tx_bytes += skb->len; } - result = NETDEV_TX_OK; -error_expand: - kfree_skb(skb); + dev_kfree_skb(skb); d_fnend(3, dev, "(skb %p net_dev %p) = %d\n", skb, net_dev, result); - return result; + return NETDEV_TX_OK; } -- cgit v1.2.3-18-g5258 From c577923756b7fe9071f28a76b66b83b306d1d001 Mon Sep 17 00:00:00 2001 From: "RongQing.Li" Date: Thu, 15 Mar 2012 22:54:14 +0000 Subject: ipv6: Don't dev_hold(dev) in ip6_mc_find_dev_rcu. ip6_mc_find_dev_rcu() is called with rcu_read_lock(), so don't need to dev_hold(). With dev_hold(), not corresponding dev_put(), will lead to leak. [ bug introduced in 96b52e61be1 (ipv6: mcast: RCU conversions) ] Signed-off-by: RongQing.Li Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/mcast.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index b853f06cc14..16c33e30812 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -257,7 +257,6 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net, if (rt) { dev = rt->dst.dev; - dev_hold(dev); dst_release(&rt->dst); } } else -- cgit v1.2.3-18-g5258 From a16a1647fa6b6783c2e91623e72e86f0c2adac5e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 16 Mar 2012 02:00:34 +0000 Subject: netfilter: ctnetlink: fix race between delete and timeout expiration Kerin Millar reported hardlockups while running `conntrackd -c' in a busy firewall. That system (with several processors) was acting as backup in a primary-backup setup. After several tries, I found a race condition between the deletion operation of ctnetlink and timeout expiration. This patch fixes this problem. Tested-by: Kerin Millar Reported-by: Kerin Millar Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_netlink.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 10687692831..b49da6c925b 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -943,20 +943,21 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, } } - if (nf_conntrack_event_report(IPCT_DESTROY, ct, - NETLINK_CB(skb).pid, - nlmsg_report(nlh)) < 0) { + if (del_timer(&ct->timeout)) { + if (nf_conntrack_event_report(IPCT_DESTROY, ct, + NETLINK_CB(skb).pid, + nlmsg_report(nlh)) < 0) { + nf_ct_delete_from_lists(ct); + /* we failed to report the event, try later */ + nf_ct_insert_dying_list(ct); + nf_ct_put(ct); + return 0; + } + /* death_by_timeout would report the event again */ + set_bit(IPS_DYING_BIT, &ct->status); nf_ct_delete_from_lists(ct); - /* we failed to report the event, try later */ - nf_ct_insert_dying_list(ct); nf_ct_put(ct); - return 0; } - - /* death_by_timeout would report the event again */ - set_bit(IPS_DYING_BIT, &ct->status); - - nf_ct_kill(ct); nf_ct_put(ct); return 0; -- cgit v1.2.3-18-g5258