From ca0a10672dad94aa1f89645f89eb6047b7bf2a19 Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Mon, 30 Sep 2013 22:05:07 +0200
Subject: netfilter: ebt_ulog: fix info leaks

The ulog messages leak heap bytes by the means of padding bytes and
incompletely filled string arrays. Fix those by memset(0)'ing the
whole struct before filling it.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/bridge/netfilter/ebt_ulog.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index 518093802d1..7c470c371e1 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -181,6 +181,7 @@ static void ebt_ulog_packet(struct net *net, unsigned int hooknr,
 	ub->qlen++;
 
 	pm = nlmsg_data(nlh);
+	memset(pm, 0, sizeof(*pm));
 
 	/* Fill in the ulog data */
 	pm->version = EBT_ULOG_VERSION;
@@ -193,8 +194,6 @@ static void ebt_ulog_packet(struct net *net, unsigned int hooknr,
 	pm->hook = hooknr;
 	if (uloginfo->prefix != NULL)
 		strcpy(pm->prefix, uloginfo->prefix);
-	else
-		*(pm->prefix) = '\0';
 
 	if (in) {
 		strcpy(pm->physindev, in->name);
@@ -204,16 +203,14 @@ static void ebt_ulog_packet(struct net *net, unsigned int hooknr,
 			strcpy(pm->indev, br_port_get_rcu(in)->br->dev->name);
 		else
 			strcpy(pm->indev, in->name);
-	} else
-		pm->indev[0] = pm->physindev[0] = '\0';
+	}
 
 	if (out) {
 		/* If out exists, then out is a bridge port */
 		strcpy(pm->physoutdev, out->name);
 		/* rcu_read_lock()ed by nf_hook_slow */
 		strcpy(pm->outdev, br_port_get_rcu(out)->br->dev->name);
-	} else
-		pm->outdev[0] = pm->physoutdev[0] = '\0';
+	}
 
 	if (skb_copy_bits(skb, -ETH_HLEN, pm->data, copy_len) < 0)
 		BUG();
-- 
cgit v1.2.3-18-g5258


From 278f2b3e2af5f32ea1afe34fa12a2518153e6e49 Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Mon, 30 Sep 2013 22:05:08 +0200
Subject: netfilter: ipt_ULOG: fix info leaks

The ulog messages leak heap bytes by the means of padding bytes and
incompletely filled string arrays. Fix those by memset(0)'ing the
whole struct before filling it.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/ipt_ULOG.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index cbc22158af4..9cb993cd224 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -220,6 +220,7 @@ static void ipt_ulog_packet(struct net *net,
 	ub->qlen++;
 
 	pm = nlmsg_data(nlh);
+	memset(pm, 0, sizeof(*pm));
 
 	/* We might not have a timestamp, get one */
 	if (skb->tstamp.tv64 == 0)
@@ -238,8 +239,6 @@ static void ipt_ulog_packet(struct net *net,
 	}
 	else if (loginfo->prefix[0] != '\0')
 		strncpy(pm->prefix, loginfo->prefix, sizeof(pm->prefix));
-	else
-		*(pm->prefix) = '\0';
 
 	if (in && in->hard_header_len > 0 &&
 	    skb->mac_header != skb->network_header &&
@@ -251,13 +250,9 @@ static void ipt_ulog_packet(struct net *net,
 
 	if (in)
 		strncpy(pm->indev_name, in->name, sizeof(pm->indev_name));
-	else
-		pm->indev_name[0] = '\0';
 
 	if (out)
 		strncpy(pm->outdev_name, out->name, sizeof(pm->outdev_name));
-	else
-		pm->outdev_name[0] = '\0';
 
 	/* copy_len <= skb->len, so can't fail. */
 	if (skb_copy_bits(skb, 0, pm->payload, copy_len) < 0)
-- 
cgit v1.2.3-18-g5258


From b07c26511e94ab856f3700c56d582c0da36d5b4d Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Tue, 15 Oct 2013 14:54:11 -0700
Subject: openvswitch: fix vport-netdev unregister

The combination of two commits:
commit 8e4e1713e4
("openvswitch: Simplify datapath locking.")
commit 2537b4dd0a
("openvswitch:: link upper device for port devices")

introduced a bug where upper_dev wasn't unlinked upon
netdev_unregister notification

The following steps:

  modprobe openvswitch
  ovs-dpctl add-dp test
  ip tuntap add dev tap1 mode tap
  ovs-dpctl add-if test tap1
  ip tuntap del dev tap1 mode tap

are causing multiple warnings:

[   62.747557] gre: GRE over IPv4 demultiplexor driver
[   62.749579] openvswitch: Open vSwitch switching datapath
[   62.755087] device test entered promiscuous mode
[   62.765911] device tap1 entered promiscuous mode
[   62.766033] IPv6: ADDRCONF(NETDEV_UP): tap1: link is not ready
[   62.769017] ------------[ cut here ]------------
[   62.769022] WARNING: CPU: 1 PID: 3267 at net/core/dev.c:5501 rollback_registered_many+0x20f/0x240()
[   62.769023] Modules linked in: openvswitch gre vxlan ip_tunnel libcrc32c ip6table_filter ip6_tables ebtable_nat ebtables nf_conntrack_ipv4 nf_defrag_ipv4 xt_state nf_conntrack xt_CHECKSUM iptable_mangle ipt_REJECT xt_tcpudp iptable_filter ip_tables x_tables bridge stp llc vhost_net macvtap macvlan vhost kvm_intel kvm dm_crypt iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi hid_generic mxm_wmi eeepc_wmi asus_wmi sparse_keymap dm_multipath psmouse serio_raw usbhid hid parport_pc ppdev firewire_ohci lpc_ich firewire_core e1000e crc_itu_t binfmt_misc igb dca ptp pps_core mac_hid wmi lp parport i2o_config i2o_block video
[   62.769051] CPU: 1 PID: 3267 Comm: ip Not tainted 3.12.0-rc3+ #60
[   62.769052] Hardware name: System manufacturer System Product Name/P8Z77 WS, BIOS 3007 07/26/2012
[   62.769053]  0000000000000009 ffff8807f25cbd28 ffffffff8175e575 0000000000000006
[   62.769055]  0000000000000000 ffff8807f25cbd68 ffffffff8105314c ffff8807f25cbd58
[   62.769057]  ffff8807f2634000 ffff8807f25cbdc8 ffff8807f25cbd88 ffff8807f25cbdc8
[   62.769059] Call Trace:
[   62.769062]  [<ffffffff8175e575>] dump_stack+0x55/0x76
[   62.769065]  [<ffffffff8105314c>] warn_slowpath_common+0x8c/0xc0
[   62.769067]  [<ffffffff8105319a>] warn_slowpath_null+0x1a/0x20
[   62.769069]  [<ffffffff8162a04f>] rollback_registered_many+0x20f/0x240
[   62.769071]  [<ffffffff8162a101>] rollback_registered+0x31/0x40
[   62.769073]  [<ffffffff8162a488>] unregister_netdevice_queue+0x58/0x90
[   62.769075]  [<ffffffff8154f900>] __tun_detach+0x140/0x340
[   62.769077]  [<ffffffff8154fb36>] tun_chr_close+0x36/0x60
[   62.769080]  [<ffffffff811bddaf>] __fput+0xff/0x260
[   62.769082]  [<ffffffff811bdf5e>] ____fput+0xe/0x10
[   62.769084]  [<ffffffff8107b515>] task_work_run+0xb5/0xe0
[   62.769087]  [<ffffffff810029b9>] do_notify_resume+0x59/0x80
[   62.769089]  [<ffffffff813a41fe>] ? trace_hardirqs_on_thunk+0x3a/0x3f
[   62.769091]  [<ffffffff81770f5a>] int_signal+0x12/0x17
[   62.769093] ---[ end trace 838756c62e156ffb ]---
[   62.769481] ------------[ cut here ]------------
[   62.769485] WARNING: CPU: 1 PID: 92 at fs/sysfs/inode.c:325 sysfs_hash_and_remove+0xa9/0xb0()
[   62.769486] sysfs: can not remove 'master', no directory
[   62.769486] Modules linked in: openvswitch gre vxlan ip_tunnel libcrc32c ip6table_filter ip6_tables ebtable_nat ebtables nf_conntrack_ipv4 nf_defrag_ipv4 xt_state nf_conntrack xt_CHECKSUM iptable_mangle ipt_REJECT xt_tcpudp iptable_filter ip_tables x_tables bridge stp llc vhost_net macvtap macvlan vhost kvm_intel kvm dm_crypt iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi hid_generic mxm_wmi eeepc_wmi asus_wmi sparse_keymap dm_multipath psmouse serio_raw usbhid hid parport_pc ppdev firewire_ohci lpc_ich firewire_core e1000e crc_itu_t binfmt_misc igb dca ptp pps_core mac_hid wmi lp parport i2o_config i2o_block video
[   62.769514] CPU: 1 PID: 92 Comm: kworker/1:2 Tainted: G        W    3.12.0-rc3+ #60
[   62.769515] Hardware name: System manufacturer System Product Name/P8Z77 WS, BIOS 3007 07/26/2012
[   62.769518] Workqueue: events ovs_dp_notify_wq [openvswitch]
[   62.769519]  0000000000000009 ffff880807ad3ac8 ffffffff8175e575 0000000000000006
[   62.769521]  ffff880807ad3b18 ffff880807ad3b08 ffffffff8105314c ffff880807ad3b28
[   62.769523]  0000000000000000 ffffffff81a87a1f ffff8807f2634000 ffff880037038500
[   62.769525] Call Trace:
[   62.769528]  [<ffffffff8175e575>] dump_stack+0x55/0x76
[   62.769529]  [<ffffffff8105314c>] warn_slowpath_common+0x8c/0xc0
[   62.769531]  [<ffffffff81053236>] warn_slowpath_fmt+0x46/0x50
[   62.769533]  [<ffffffff8123e7e9>] sysfs_hash_and_remove+0xa9/0xb0
[   62.769535]  [<ffffffff81240e96>] sysfs_remove_link+0x26/0x30
[   62.769538]  [<ffffffff81631ef7>] __netdev_adjacent_dev_remove+0xf7/0x150
[   62.769540]  [<ffffffff81632037>] __netdev_adjacent_dev_unlink_lists+0x27/0x50
[   62.769542]  [<ffffffff8163213a>] __netdev_adjacent_dev_unlink_neighbour+0x3a/0x50
[   62.769544]  [<ffffffff8163218d>] netdev_upper_dev_unlink+0x3d/0x140
[   62.769548]  [<ffffffffa033c2db>] netdev_destroy+0x4b/0x80 [openvswitch]
[   62.769550]  [<ffffffffa033b696>] ovs_vport_del+0x46/0x60 [openvswitch]
[   62.769552]  [<ffffffffa0335314>] ovs_dp_detach_port+0x44/0x60 [openvswitch]
[   62.769555]  [<ffffffffa0336574>] ovs_dp_notify_wq+0xb4/0x150 [openvswitch]
[   62.769557]  [<ffffffff81075c28>] process_one_work+0x1d8/0x6a0
[   62.769559]  [<ffffffff81075bc8>] ? process_one_work+0x178/0x6a0
[   62.769562]  [<ffffffff8107659b>] worker_thread+0x11b/0x370
[   62.769564]  [<ffffffff81076480>] ? rescuer_thread+0x350/0x350
[   62.769566]  [<ffffffff8107f44a>] kthread+0xea/0xf0
[   62.769568]  [<ffffffff8107f360>] ? flush_kthread_worker+0x150/0x150
[   62.769570]  [<ffffffff81770bac>] ret_from_fork+0x7c/0xb0
[   62.769572]  [<ffffffff8107f360>] ? flush_kthread_worker+0x150/0x150
[   62.769573] ---[ end trace 838756c62e156ffc ]---
[   62.769574] ------------[ cut here ]------------
[   62.769576] WARNING: CPU: 1 PID: 92 at fs/sysfs/inode.c:325 sysfs_hash_and_remove+0xa9/0xb0()
[   62.769577] sysfs: can not remove 'upper_test', no directory
[   62.769577] Modules linked in: openvswitch gre vxlan ip_tunnel libcrc32c ip6table_filter ip6_tables ebtable_nat ebtables nf_conntrack_ipv4 nf_defrag_ipv4 xt_state nf_conntrack xt_CHECKSUM iptable_mangle ipt_REJECT xt_tcpudp iptable_filter ip_tables x_tables bridge stp llc vhost_net macvtap macvlan vhost kvm_intel kvm dm_crypt iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi hid_generic mxm_wmi eeepc_wmi asus_wmi sparse_keymap dm_multipath psmouse serio_raw usbhid hid parport_pc ppdev firewire_ohci lpc_ich firewire_core e1000e crc_itu_t binfmt_misc igb dca ptp pps_core mac_hid wmi lp parport i2o_config i2o_block video
[   62.769603] CPU: 1 PID: 92 Comm: kworker/1:2 Tainted: G        W    3.12.0-rc3+ #60
[   62.769604] Hardware name: System manufacturer System Product Name/P8Z77 WS, BIOS 3007 07/26/2012
[   62.769606] Workqueue: events ovs_dp_notify_wq [openvswitch]
[   62.769607]  0000000000000009 ffff880807ad3ac8 ffffffff8175e575 0000000000000006
[   62.769609]  ffff880807ad3b18 ffff880807ad3b08 ffffffff8105314c ffff880807ad3b58
[   62.769611]  0000000000000000 ffff880807ad3bd9 ffff8807f2634000 ffff880037038500
[   62.769613] Call Trace:
[   62.769615]  [<ffffffff8175e575>] dump_stack+0x55/0x76
[   62.769617]  [<ffffffff8105314c>] warn_slowpath_common+0x8c/0xc0
[   62.769619]  [<ffffffff81053236>] warn_slowpath_fmt+0x46/0x50
[   62.769621]  [<ffffffff8123e7e9>] sysfs_hash_and_remove+0xa9/0xb0
[   62.769622]  [<ffffffff81240e96>] sysfs_remove_link+0x26/0x30
[   62.769624]  [<ffffffff81631f22>] __netdev_adjacent_dev_remove+0x122/0x150
[   62.769627]  [<ffffffff81632037>] __netdev_adjacent_dev_unlink_lists+0x27/0x50
[   62.769629]  [<ffffffff8163213a>] __netdev_adjacent_dev_unlink_neighbour+0x3a/0x50
[   62.769631]  [<ffffffff8163218d>] netdev_upper_dev_unlink+0x3d/0x140
[   62.769633]  [<ffffffffa033c2db>] netdev_destroy+0x4b/0x80 [openvswitch]
[   62.769636]  [<ffffffffa033b696>] ovs_vport_del+0x46/0x60 [openvswitch]
[   62.769638]  [<ffffffffa0335314>] ovs_dp_detach_port+0x44/0x60 [openvswitch]
[   62.769640]  [<ffffffffa0336574>] ovs_dp_notify_wq+0xb4/0x150 [openvswitch]
[   62.769642]  [<ffffffff81075c28>] process_one_work+0x1d8/0x6a0
[   62.769644]  [<ffffffff81075bc8>] ? process_one_work+0x178/0x6a0
[   62.769646]  [<ffffffff8107659b>] worker_thread+0x11b/0x370
[   62.769648]  [<ffffffff81076480>] ? rescuer_thread+0x350/0x350
[   62.769650]  [<ffffffff8107f44a>] kthread+0xea/0xf0
[   62.769652]  [<ffffffff8107f360>] ? flush_kthread_worker+0x150/0x150
[   62.769654]  [<ffffffff81770bac>] ret_from_fork+0x7c/0xb0
[   62.769656]  [<ffffffff8107f360>] ? flush_kthread_worker+0x150/0x150
[   62.769657] ---[ end trace 838756c62e156ffd ]---
[   62.769724] device tap1 left promiscuous mode

This patch also affects moving devices between net namespaces.

OVS used to ignore netns move notifications which caused problems.
Like:
  ovs-dpctl add-if test tap1
  ip link set tap1 netns 3512
and then removing tap1 inside the namespace will cause hang on missing dev_put.

With this patch OVS will detach dev upon receiving netns move event.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
---
 net/openvswitch/dp_notify.c    |  7 +++++--
 net/openvswitch/vport-netdev.c | 16 +++++++++++++---
 net/openvswitch/vport-netdev.h |  1 +
 3 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c
index c3235675f35..5c2dab27610 100644
--- a/net/openvswitch/dp_notify.c
+++ b/net/openvswitch/dp_notify.c
@@ -65,8 +65,7 @@ void ovs_dp_notify_wq(struct work_struct *work)
 					continue;
 
 				netdev_vport = netdev_vport_priv(vport);
-				if (netdev_vport->dev->reg_state == NETREG_UNREGISTERED ||
-				    netdev_vport->dev->reg_state == NETREG_UNREGISTERING)
+				if (!(netdev_vport->dev->priv_flags & IFF_OVS_DATAPATH))
 					dp_detach_port_notify(vport);
 			}
 		}
@@ -88,6 +87,10 @@ static int dp_device_event(struct notifier_block *unused, unsigned long event,
 		return NOTIFY_DONE;
 
 	if (event == NETDEV_UNREGISTER) {
+		/* upper_dev_unlink and decrement promisc immediately */
+		ovs_netdev_detach_dev(vport);
+
+		/* schedule vport destroy, dev_put and genl notification */
 		ovs_net = net_generic(dev_net(dev), ovs_net_id);
 		queue_work(system_wq, &ovs_net->dp_notify_work);
 	}
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 09d93c13cfd..d21f77d875b 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -150,15 +150,25 @@ static void free_port_rcu(struct rcu_head *rcu)
 	ovs_vport_free(vport_from_priv(netdev_vport));
 }
 
-static void netdev_destroy(struct vport *vport)
+void ovs_netdev_detach_dev(struct vport *vport)
 {
 	struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
 
-	rtnl_lock();
+	ASSERT_RTNL();
 	netdev_vport->dev->priv_flags &= ~IFF_OVS_DATAPATH;
 	netdev_rx_handler_unregister(netdev_vport->dev);
-	netdev_upper_dev_unlink(netdev_vport->dev, get_dpdev(vport->dp));
+	netdev_upper_dev_unlink(netdev_vport->dev,
+				netdev_master_upper_dev_get(netdev_vport->dev));
 	dev_set_promiscuity(netdev_vport->dev, -1);
+}
+
+static void netdev_destroy(struct vport *vport)
+{
+	struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
+
+	rtnl_lock();
+	if (netdev_vport->dev->priv_flags & IFF_OVS_DATAPATH)
+		ovs_netdev_detach_dev(vport);
 	rtnl_unlock();
 
 	call_rcu(&netdev_vport->rcu, free_port_rcu);
diff --git a/net/openvswitch/vport-netdev.h b/net/openvswitch/vport-netdev.h
index dd298b5c5cd..8df01c1127e 100644
--- a/net/openvswitch/vport-netdev.h
+++ b/net/openvswitch/vport-netdev.h
@@ -39,5 +39,6 @@ netdev_vport_priv(const struct vport *vport)
 }
 
 const char *ovs_netdev_get_name(const struct vport *);
+void ovs_netdev_detach_dev(struct vport *);
 
 #endif /* vport_netdev.h */
-- 
cgit v1.2.3-18-g5258


From 12e3594698f6c3ab6ebacc79f2fb2ad2bb5952b5 Mon Sep 17 00:00:00 2001
From: Michal Kubecek <mkubecek@suse.cz>
Date: Thu, 17 Oct 2013 15:07:40 +0200
Subject: xfrm: prevent ipcomp scratch buffer race condition

In ipcomp_compress(), sortirq is enabled too early, allowing the
per-cpu scratch buffer to be rewritten by ipcomp_decompress()
(called on the same CPU in softirq context) between populating
the buffer and copying the compressed data to the skb.

v2: as pointed out by Steffen Klassert, if we also move the
local_bh_disable() before reading the per-cpu pointers, we can
get rid of get_cpu()/put_cpu().

v3: removed ipcomp_decompress part (as explained by Herbert Xu,
it cannot be called from process context), get rid of cpu
variable (thanks to Eric Dumazet)

Signed-off-by: Michal Kubecek <mkubecek@suse.cz>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_ipcomp.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c
index 2906d520eea..3be02b68026 100644
--- a/net/xfrm/xfrm_ipcomp.c
+++ b/net/xfrm/xfrm_ipcomp.c
@@ -141,14 +141,14 @@ static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb)
 	const int plen = skb->len;
 	int dlen = IPCOMP_SCRATCH_SIZE;
 	u8 *start = skb->data;
-	const int cpu = get_cpu();
-	u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu);
-	struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu);
+	struct crypto_comp *tfm;
+	u8 *scratch;
 	int err;
 
 	local_bh_disable();
+	scratch = *this_cpu_ptr(ipcomp_scratches);
+	tfm = *this_cpu_ptr(ipcd->tfms);
 	err = crypto_comp_compress(tfm, start, plen, scratch, &dlen);
-	local_bh_enable();
 	if (err)
 		goto out;
 
@@ -158,13 +158,13 @@ static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb)
 	}
 
 	memcpy(start + sizeof(struct ip_comp_hdr), scratch, dlen);
-	put_cpu();
+	local_bh_enable();
 
 	pskb_trim(skb, dlen + sizeof(struct ip_comp_hdr));
 	return 0;
 
 out:
-	put_cpu();
+	local_bh_enable();
 	return err;
 }
 
-- 
cgit v1.2.3-18-g5258


From b416c144f46af1a30ddfa4e4319a8f077381ad63 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 21 Oct 2013 13:14:53 +0100
Subject: netfilter: x_tables: fix ordering of jumpstack allocation and table
 update

During kernel stability testing on an SMP ARMv7 system, Yalin Wang
reported the following panic from the netfilter code:

  1fe0: 0000001c 5e2d3b10 4007e779 4009e110 60000010 00000032 ff565656 ff545454
  [<c06c48dc>] (ipt_do_table+0x448/0x584) from [<c0655ef0>] (nf_iterate+0x48/0x7c)
  [<c0655ef0>] (nf_iterate+0x48/0x7c) from [<c0655f7c>] (nf_hook_slow+0x58/0x104)
  [<c0655f7c>] (nf_hook_slow+0x58/0x104) from [<c0683bbc>] (ip_local_deliver+0x88/0xa8)
  [<c0683bbc>] (ip_local_deliver+0x88/0xa8) from [<c0683718>] (ip_rcv_finish+0x418/0x43c)
  [<c0683718>] (ip_rcv_finish+0x418/0x43c) from [<c062b1c4>] (__netif_receive_skb+0x4cc/0x598)
  [<c062b1c4>] (__netif_receive_skb+0x4cc/0x598) from [<c062b314>] (process_backlog+0x84/0x158)
  [<c062b314>] (process_backlog+0x84/0x158) from [<c062de84>] (net_rx_action+0x70/0x1dc)
  [<c062de84>] (net_rx_action+0x70/0x1dc) from [<c0088230>] (__do_softirq+0x11c/0x27c)
  [<c0088230>] (__do_softirq+0x11c/0x27c) from [<c008857c>] (do_softirq+0x44/0x50)
  [<c008857c>] (do_softirq+0x44/0x50) from [<c0088614>] (local_bh_enable_ip+0x8c/0xd0)
  [<c0088614>] (local_bh_enable_ip+0x8c/0xd0) from [<c06b0330>] (inet_stream_connect+0x164/0x298)
  [<c06b0330>] (inet_stream_connect+0x164/0x298) from [<c061d68c>] (sys_connect+0x88/0xc8)
  [<c061d68c>] (sys_connect+0x88/0xc8) from [<c000e340>] (ret_fast_syscall+0x0/0x30)
  Code: 2a000021 e59d2028 e59de01c e59f011c (e7824103)
  ---[ end trace da227214a82491bd ]---
  Kernel panic - not syncing: Fatal exception in interrupt

This comes about because CPU1 is executing xt_replace_table in response
to a setsockopt syscall, resulting in:

	ret = xt_jumpstack_alloc(newinfo);
		--> newinfo->jumpstack = kzalloc(size, GFP_KERNEL);

	[...]

	table->private = newinfo;
	newinfo->initial_entries = private->initial_entries;

Meanwhile, CPU0 is handling the network receive path and ends up in
ipt_do_table, resulting in:

	private = table->private;

	[...]

	jumpstack  = (struct ipt_entry **)private->jumpstack[cpu];

On weakly ordered memory architectures, the writes to table->private
and newinfo->jumpstack from CPU1 can be observed out of order by CPU0.
Furthermore, on architectures which don't respect ordering of address
dependencies (i.e. Alpha), the reads from CPU0 can also be re-ordered.

This patch adds an smp_wmb() before the assignment to table->private
(which is essentially publishing newinfo) to ensure that all writes to
newinfo will be observed before plugging it into the table structure.
A dependent-read barrier is also added on the consumer sides, to ensure
the same ordering requirements are also respected there.

Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reported-by: Wang, Yalin <Yalin.Wang@sonymobile.com>
Tested-by: Wang, Yalin <Yalin.Wang@sonymobile.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/arp_tables.c | 5 +++++
 net/ipv4/netfilter/ip_tables.c  | 5 +++++
 net/ipv6/netfilter/ip6_tables.c | 5 +++++
 net/netfilter/x_tables.c        | 7 ++++++-
 4 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 85a4f21aac1..59da7cde072 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -271,6 +271,11 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 	local_bh_disable();
 	addend = xt_write_recseq_begin();
 	private = table->private;
+	/*
+	 * Ensure we load private-> members after we've fetched the base
+	 * pointer.
+	 */
+	smp_read_barrier_depends();
 	table_base = private->entries[smp_processor_id()];
 
 	e = get_entry(table_base, private->hook_entry[hook]);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index d23118d95ff..718dfbd30cb 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -327,6 +327,11 @@ ipt_do_table(struct sk_buff *skb,
 	addend = xt_write_recseq_begin();
 	private = table->private;
 	cpu        = smp_processor_id();
+	/*
+	 * Ensure we load private-> members after we've fetched the base
+	 * pointer.
+	 */
+	smp_read_barrier_depends();
 	table_base = private->entries[cpu];
 	jumpstack  = (struct ipt_entry **)private->jumpstack[cpu];
 	stackptr   = per_cpu_ptr(private->stackptr, cpu);
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 44400c216dc..710238f58aa 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -349,6 +349,11 @@ ip6t_do_table(struct sk_buff *skb,
 	local_bh_disable();
 	addend = xt_write_recseq_begin();
 	private = table->private;
+	/*
+	 * Ensure we load private-> members after we've fetched the base
+	 * pointer.
+	 */
+	smp_read_barrier_depends();
 	cpu        = smp_processor_id();
 	table_base = private->entries[cpu];
 	jumpstack  = (struct ip6t_entry **)private->jumpstack[cpu];
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 8b03028cca6..227aa11e840 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -845,8 +845,13 @@ xt_replace_table(struct xt_table *table,
 		return NULL;
 	}
 
-	table->private = newinfo;
 	newinfo->initial_entries = private->initial_entries;
+	/*
+	 * Ensure contents of newinfo are visible before assigning to
+	 * private.
+	 */
+	smp_wmb();
+	table->private = newinfo;
 
 	/*
 	 * Even though table entries have now been swapped, other CPU's
-- 
cgit v1.2.3-18-g5258


From fecda03493646b53f53892fa3c38c75ba9310374 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Tue, 22 Oct 2013 18:34:56 +0200
Subject: net: sctp: fix ASCONF to allow non SCTP_ADDR_SRC addresses in ipv6

Commit 8a07eb0a50 ("sctp: Add ASCONF operation on the single-homed host")
implemented possible use of IPv4 addresses with non SCTP_ADDR_SRC state
as source address when sending ASCONF (ADD) packets, but IPv6 part for
that was not implemented in 8a07eb0a50. Therefore, as this is not restricted
to IPv4-only, fix this up to allow the same for IPv6 addresses in SCTP.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Cc: Michio Honda <micchie@sfc.wide.ad.jp>
Acked-by: Michio Honda <micchie@sfc.wide.ad.jp>
Acked-by: Vlad Yasevich <vyasevich@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/ipv6.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index e7b2d4fe2b6..96a55910262 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -279,7 +279,9 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
 		sctp_v6_to_addr(&dst_saddr, &fl6->saddr, htons(bp->port));
 		rcu_read_lock();
 		list_for_each_entry_rcu(laddr, &bp->address_list, list) {
-			if (!laddr->valid || (laddr->state != SCTP_ADDR_SRC))
+			if (!laddr->valid || laddr->state == SCTP_ADDR_DEL ||
+			    (laddr->state != SCTP_ADDR_SRC &&
+			     !asoc->src_out_of_asoc_ok))
 				continue;
 
 			/* Do not compare against v4 addrs */
-- 
cgit v1.2.3-18-g5258


From 0e67d9903d71ce3c5889fa2e1788d4335794a0f6 Mon Sep 17 00:00:00 2001
From: Sebastian Siewior <sebastian@breakpoint.cc>
Date: Tue, 22 Oct 2013 20:36:25 +0200
Subject: net: wan: sbni: remove assembly crc32 code

There is also a C function doing the same thing. Unless the asm code is
110% faster we could stick to the C function.

Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/sbni.c | 89 --------------------------------------------------
 1 file changed, 89 deletions(-)

diff --git a/drivers/net/wan/sbni.c b/drivers/net/wan/sbni.c
index 5bbcb5e3ee0..388ddf60a66 100644
--- a/drivers/net/wan/sbni.c
+++ b/drivers/net/wan/sbni.c
@@ -148,10 +148,6 @@ static int  enslave( struct net_device *, struct net_device * );
 static int  emancipate( struct net_device * );
 #endif
 
-#ifdef __i386__
-#define ASM_CRC 1
-#endif
-
 static const char  version[] =
 	"Granch SBNI12 driver ver 5.0.1  Jun 22 2001  Denis I.Timofeev.\n";
 
@@ -1551,88 +1547,6 @@ __setup( "sbni=", sbni_setup );
 
 /* -------------------------------------------------------------------------- */
 
-#ifdef ASM_CRC
-
-static u32
-calc_crc32( u32  crc,  u8  *p,  u32  len )
-{
-	register u32  _crc;
-	_crc = crc;
-	
-	__asm__ __volatile__ (
-		"xorl	%%ebx, %%ebx\n"
-		"movl	%2, %%esi\n" 
-		"movl	%3, %%ecx\n" 
-		"movl	$crc32tab, %%edi\n"
-		"shrl	$2, %%ecx\n"
-		"jz	1f\n"
-
-		".align 4\n"
-	"0:\n"
-		"movb	%%al, %%bl\n"
-		"movl	(%%esi), %%edx\n"
-		"shrl	$8, %%eax\n"
-		"xorb	%%dl, %%bl\n"
-		"shrl	$8, %%edx\n"
-		"xorl	(%%edi,%%ebx,4), %%eax\n"
-
-		"movb	%%al, %%bl\n"
-		"shrl	$8, %%eax\n"
-		"xorb	%%dl, %%bl\n"
-		"shrl	$8, %%edx\n"
-		"xorl	(%%edi,%%ebx,4), %%eax\n"
-
-		"movb	%%al, %%bl\n"
-		"shrl	$8, %%eax\n"
-		"xorb	%%dl, %%bl\n"
-		"movb	%%dh, %%dl\n" 
-		"xorl	(%%edi,%%ebx,4), %%eax\n"
-
-		"movb	%%al, %%bl\n"
-		"shrl	$8, %%eax\n"
-		"xorb	%%dl, %%bl\n"
-		"addl	$4, %%esi\n"
-		"xorl	(%%edi,%%ebx,4), %%eax\n"
-
-		"decl	%%ecx\n"
-		"jnz	0b\n"
-
-	"1:\n"
-		"movl	%3, %%ecx\n"
-		"andl	$3, %%ecx\n"
-		"jz	2f\n"
-
-		"movb	%%al, %%bl\n"
-		"shrl	$8, %%eax\n"
-		"xorb	(%%esi), %%bl\n"
-		"xorl	(%%edi,%%ebx,4), %%eax\n"
-
-		"decl	%%ecx\n"
-		"jz	2f\n"
-
-		"movb	%%al, %%bl\n"
-		"shrl	$8, %%eax\n"
-		"xorb	1(%%esi), %%bl\n"
-		"xorl	(%%edi,%%ebx,4), %%eax\n"
-
-		"decl	%%ecx\n"
-		"jz	2f\n"
-
-		"movb	%%al, %%bl\n"
-		"shrl	$8, %%eax\n"
-		"xorb	2(%%esi), %%bl\n"
-		"xorl	(%%edi,%%ebx,4), %%eax\n"
-	"2:\n"
-		: "=a" (_crc)
-		: "0" (_crc), "g" (p), "g" (len)
-		: "bx", "cx", "dx", "si", "di"
-	);
-
-	return  _crc;
-}
-
-#else	/* ASM_CRC */
-
 static u32
 calc_crc32( u32  crc,  u8  *p,  u32  len )
 {
@@ -1642,9 +1556,6 @@ calc_crc32( u32  crc,  u8  *p,  u32  len )
 	return  crc;
 }
 
-#endif	/* ASM_CRC */
-
-
 static u32  crc32tab[] __attribute__ ((aligned(8))) = {
 	0xD202EF8D,  0xA505DF1B,  0x3C0C8EA1,  0x4B0BBE37,
 	0xD56F2B94,  0xA2681B02,  0x3B614AB8,  0x4C667A2E,
-- 
cgit v1.2.3-18-g5258


From 45e526e80e6fdc796d3bc05716d5c930a427df4d Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@redhat.com>
Date: Wed, 23 Oct 2013 15:04:49 +0200
Subject: netconsole: fix NULL pointer dereference

We need to disable the netconsole (enabled = 0) before setting nt->np.dev
to NULL because otherwise we might still have users after the
netpoll_cleanup() since nt->enabled is set afterwards and we can
have a message which will result in a NULL pointer dereference.
It is very easy to hit dereferences all over the netpoll_send_udp function
by running the following two loops in parallel:
while [ 1 ]; do echo 1 > enabled; echo 0 > enabled; done;
while [ 1 ]; do echo 00:11:22:33:44:55 > remote_mac; done;
(the second loop is to generate messages, it can be done by anything)

We're safe to set nt->np.dev = NULL and nt->enabled = 0 with the spinlock
since it's required in the write_msg() function.

Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
Reviewed-by: Veacelsav Falico <vfalico@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/netconsole.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c
index adeee615dd1..1505dcb950f 100644
--- a/drivers/net/netconsole.c
+++ b/drivers/net/netconsole.c
@@ -310,6 +310,7 @@ static ssize_t store_enabled(struct netconsole_target *nt,
 			     const char *buf,
 			     size_t count)
 {
+	unsigned long flags;
 	int enabled;
 	int err;
 
@@ -342,6 +343,13 @@ static ssize_t store_enabled(struct netconsole_target *nt,
 		printk(KERN_INFO "netconsole: network logging started\n");
 
 	} else {	/* 0 */
+		/* We need to disable the netconsole before cleaning it up
+		 * otherwise we might end up in write_msg() with
+		 * nt->np.dev == NULL and nt->enabled == 1
+		 */
+		spin_lock_irqsave(&target_list_lock, flags);
+		nt->enabled = 0;
+		spin_unlock_irqrestore(&target_list_lock, flags);
 		netpoll_cleanup(&nt->np);
 	}
 
-- 
cgit v1.2.3-18-g5258


From c7c6effdeffcafd792b9f880ad52e48689eea4ad Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@redhat.com>
Date: Thu, 24 Oct 2013 12:09:24 +0200
Subject: netconsole: fix multiple race conditions

In every netconsole option that can be set through configfs there's a
race when checking for nt->enabled since it can be modified at the same
time. Probably the most damage can be done by store_enabled when racing
with another instance of itself. Fix all the races with one stone by
moving the mutex lock around the ->store call for all options.

Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/netconsole.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c
index 1505dcb950f..c9a15925a1f 100644
--- a/drivers/net/netconsole.c
+++ b/drivers/net/netconsole.c
@@ -325,9 +325,7 @@ static ssize_t store_enabled(struct netconsole_target *nt,
 		return -EINVAL;
 	}
 
-	mutex_lock(&nt->mutex);
 	if (enabled) {	/* 1 */
-
 		/*
 		 * Skip netpoll_parse_options() -- all the attributes are
 		 * already configured via configfs. Just print them out.
@@ -335,13 +333,10 @@ static ssize_t store_enabled(struct netconsole_target *nt,
 		netpoll_print_options(&nt->np);
 
 		err = netpoll_setup(&nt->np);
-		if (err) {
-			mutex_unlock(&nt->mutex);
+		if (err)
 			return err;
-		}
 
 		printk(KERN_INFO "netconsole: network logging started\n");
-
 	} else {	/* 0 */
 		/* We need to disable the netconsole before cleaning it up
 		 * otherwise we might end up in write_msg() with
@@ -354,7 +349,6 @@ static ssize_t store_enabled(struct netconsole_target *nt,
 	}
 
 	nt->enabled = enabled;
-	mutex_unlock(&nt->mutex);
 
 	return strnlen(buf, count);
 }
@@ -571,8 +565,10 @@ static ssize_t netconsole_target_attr_store(struct config_item *item,
 	struct netconsole_target_attr *na =
 		container_of(attr, struct netconsole_target_attr, attr);
 
+	mutex_lock(&nt->mutex);
 	if (na->store)
 		ret = na->store(nt, buf, count);
+	mutex_unlock(&nt->mutex);
 
 	return ret;
 }
-- 
cgit v1.2.3-18-g5258


From 8fb479a47c869820966e7298f38038aa334d889c Mon Sep 17 00:00:00 2001
From: Antonio Quartulli <antonio@meshcoding.com>
Date: Wed, 23 Oct 2013 23:36:30 +0200
Subject: netpoll: fix rx_hook() interface by passing the skb

Right now skb->data is passed to rx_hook() even if the skb
has not been linearised and without giving rx_hook() a way
to linearise it.

Change the rx_hook() interface and make it accept the skb
and the offset to the UDP payload as arguments. rx_hook() is
also renamed to rx_skb_hook() to ensure that out of the tree
users notice the API change.

In this way any rx_skb_hook() implementation can perform all
the needed operations to properly (and safely) access the
skb data.

Signed-off-by: Antonio Quartulli <antonio@meshcoding.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netpoll.h |  5 +++--
 net/core/netpoll.c      | 31 ++++++++++++++++++-------------
 2 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index f3c7c24bec1..fbfdb9d8d3a 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -24,7 +24,8 @@ struct netpoll {
 	struct net_device *dev;
 	char dev_name[IFNAMSIZ];
 	const char *name;
-	void (*rx_hook)(struct netpoll *, int, char *, int);
+	void (*rx_skb_hook)(struct netpoll *np, int source, struct sk_buff *skb,
+			    int offset, int len);
 
 	union inet_addr local_ip, remote_ip;
 	bool ipv6;
@@ -41,7 +42,7 @@ struct netpoll_info {
 	unsigned long rx_flags;
 	spinlock_t rx_lock;
 	struct semaphore dev_lock;
-	struct list_head rx_np; /* netpolls that registered an rx_hook */
+	struct list_head rx_np; /* netpolls that registered an rx_skb_hook */
 
 	struct sk_buff_head neigh_tx; /* list of neigh requests to reply to */
 	struct sk_buff_head txq;
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index fc75c9e461b..8f971990677 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -636,8 +636,9 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo
 
 			netpoll_send_skb(np, send_skb);
 
-			/* If there are several rx_hooks for the same address,
-			   we're fine by sending a single reply */
+			/* If there are several rx_skb_hooks for the same
+			 * address we're fine by sending a single reply
+			 */
 			break;
 		}
 		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
@@ -719,8 +720,9 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo
 
 			netpoll_send_skb(np, send_skb);
 
-			/* If there are several rx_hooks for the same address,
-			   we're fine by sending a single reply */
+			/* If there are several rx_skb_hooks for the same
+			 * address, we're fine by sending a single reply
+			 */
 			break;
 		}
 		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
@@ -756,11 +758,12 @@ static bool pkt_is_ns(struct sk_buff *skb)
 
 int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
 {
-	int proto, len, ulen;
-	int hits = 0;
+	int proto, len, ulen, data_len;
+	int hits = 0, offset;
 	const struct iphdr *iph;
 	struct udphdr *uh;
 	struct netpoll *np, *tmp;
+	uint16_t source;
 
 	if (list_empty(&npinfo->rx_np))
 		goto out;
@@ -820,7 +823,10 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
 
 		len -= iph->ihl*4;
 		uh = (struct udphdr *)(((char *)iph) + iph->ihl*4);
+		offset = (unsigned char *)(uh + 1) - skb->data;
 		ulen = ntohs(uh->len);
+		data_len = skb->len - offset;
+		source = ntohs(uh->source);
 
 		if (ulen != len)
 			goto out;
@@ -834,9 +840,7 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
 			if (np->local_port && np->local_port != ntohs(uh->dest))
 				continue;
 
-			np->rx_hook(np, ntohs(uh->source),
-				       (char *)(uh+1),
-				       ulen - sizeof(struct udphdr));
+			np->rx_skb_hook(np, source, skb, offset, data_len);
 			hits++;
 		}
 	} else {
@@ -859,7 +863,10 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
 		if (!pskb_may_pull(skb, sizeof(struct udphdr)))
 			goto out;
 		uh = udp_hdr(skb);
+		offset = (unsigned char *)(uh + 1) - skb->data;
 		ulen = ntohs(uh->len);
+		data_len = skb->len - offset;
+		source = ntohs(uh->source);
 		if (ulen != skb->len)
 			goto out;
 		if (udp6_csum_init(skb, uh, IPPROTO_UDP))
@@ -872,9 +879,7 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
 			if (np->local_port && np->local_port != ntohs(uh->dest))
 				continue;
 
-			np->rx_hook(np, ntohs(uh->source),
-				       (char *)(uh+1),
-				       ulen - sizeof(struct udphdr));
+			np->rx_skb_hook(np, source, skb, offset, data_len);
 			hits++;
 		}
 #endif
@@ -1062,7 +1067,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp)
 
 	npinfo->netpoll = np;
 
-	if (np->rx_hook) {
+	if (np->rx_skb_hook) {
 		spin_lock_irqsave(&npinfo->rx_lock, flags);
 		npinfo->rx_flags |= NETPOLL_RX_ENABLED;
 		list_add_tail(&np->rx, &npinfo->rx_np);
-- 
cgit v1.2.3-18-g5258


From 01ba16d6ec85a1ec4669c75513a76b61ec53ee50 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Thu, 24 Oct 2013 10:14:27 +0200
Subject: ipv6: reset dst.expires value when clearing expire flag

On receiving a packet too big icmp error we update the expire value by
calling rt6_update_expires. This function uses dst_set_expires which is
implemented that it can only reduce the expiration value of the dst entry.

If we insert new routing non-expiry information into the ipv6 fib where
we already have a matching rt6_info we only clear the RTF_EXPIRES flag
in rt6i_flags and leave the dst.expires value as is.

When new mtu information arrives for that cached dst_entry we again
call dst_set_expires. This time it won't update the dst.expire value
because we left the dst.expire value intact from the last update. So
dst_set_expires won't touch dst.expires.

Fix this by resetting dst.expires when clearing the RTF_EXPIRE flag.
dst_set_expires checks for a zero expiration and updates the
dst.expires.

In the past this (not updating dst.expires) was necessary because
dst.expire was placed in a union with the dst_entry *from reference
and rt6_clean_expires did assign NULL to it. This split happend in
ecd9883724b78cc72ed92c98bcb1a46c764fff21 ("ipv6: fix race condition
regarding dst->expires and dst->from").

Reported-by: Steinar H. Gunderson <sgunderson@bigfoot.com>
Reported-by: Valentijn Sessink <valentyn@blub.net>
Cc: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Acked-by: Eric Dumazet <edumazet@google.com>
Tested-by: Valentijn Sessink <valentyn@blub.net>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_fib.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 48ec25a7fcb..5e661a97969 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -165,6 +165,7 @@ static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst)
 static inline void rt6_clean_expires(struct rt6_info *rt)
 {
 	rt->rt6i_flags &= ~RTF_EXPIRES;
+	rt->dst.expires = 0;
 }
 
 static inline void rt6_set_expires(struct rt6_info *rt, unsigned long expires)
-- 
cgit v1.2.3-18-g5258


From e3bc10bd95d7fcc3f2ac690c6ff22833ea6781d6 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Thu, 24 Oct 2013 07:48:24 +0200
Subject: ipv6: ip6_dst_check needs to check for expired dst_entries

On receiving a packet too big icmp error we check if our current cached
dst_entry in the socket is still valid. This validation check did not
care about the expiration of the (cached) route.

The error path I traced down:
The socket receives a packet too big mtu notification. It still has a
valid dst_entry and thus issues the ip6_rt_pmtu_update on this dst_entry,
setting RTF_EXPIRE and updates the dst.expiration value (which could
fail because of not up-to-date expiration values, see previous patch).

In some seldom cases we race with a) the ip6_fib gc or b) another routing
lookup which would result in a recreation of the cached rt6_info from its
parent non-cached rt6_info. While copying the rt6_info we reinitialize the
metrics store by copying it over from the parent thus invalidating the
just installed pmtu update (both dsts use the same key to the inetpeer
storage). The dst_entry with the just invalidated metrics data would
just get its RTF_EXPIRES flag cleared and would continue to stay valid
for the socket.

We should have not issued the pmtu update on the already expired dst_entry
in the first placed. By checking the expiration on the dst entry and
doing a relookup in case it is out of date we close the race because
we would install a new rt6_info into the fib before we issue the pmtu
update, thus closing this race.

Not reliably updating the dst.expire value was fixed by the patch "ipv6:
reset dst.expires value when clearing expire flag".

Reported-by: Steinar H. Gunderson <sgunderson@bigfoot.com>
Reported-by: Valentijn Sessink <valentyn@blub.net>
Cc: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Tested-by: Valentijn Sessink <valentyn@blub.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index f54e3a10109..04e17b3309f 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1087,10 +1087,13 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
 	if (rt->rt6i_genid != rt_genid_ipv6(dev_net(rt->dst.dev)))
 		return NULL;
 
-	if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
-		return dst;
+	if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
+		return NULL;
 
-	return NULL;
+	if (rt6_check_expired(rt))
+		return NULL;
+
+	return dst;
 }
 
 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
-- 
cgit v1.2.3-18-g5258


From 4c8e84b22aa1bfee40b047d7810ba08615235c05 Mon Sep 17 00:00:00 2001
From: Freddy Xin <freddy@asix.com.tw>
Date: Thu, 24 Oct 2013 14:58:25 +0800
Subject: ax88179_178a: Remove AX_MEDIUM_ALWAYS_ONE bit in
 AX_MEDIUM_STATUS_MODE register to avoid TX throttling

Remove AX_MEDIUM_ALWAYS_ONE in AX_MEDIUM_STATUS_MODE register.
Setting this bit may cause TX throttling in Half-Duplex mode.

Signed-off-by: Freddy Xin <freddy@asix.com.tw>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/ax88179_178a.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c
index 846cc19c04f..8e8d0fcd497 100644
--- a/drivers/net/usb/ax88179_178a.c
+++ b/drivers/net/usb/ax88179_178a.c
@@ -78,7 +78,6 @@
 #define AX_MEDIUM_STATUS_MODE			0x22
 	#define AX_MEDIUM_GIGAMODE	0x01
 	#define AX_MEDIUM_FULL_DUPLEX	0x02
-	#define AX_MEDIUM_ALWAYS_ONE	0x04
 	#define AX_MEDIUM_EN_125MHZ	0x08
 	#define AX_MEDIUM_RXFLOW_CTRLEN	0x10
 	#define AX_MEDIUM_TXFLOW_CTRLEN	0x20
@@ -1065,8 +1064,8 @@ static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf)
 
 	/* Configure default medium type => giga */
 	*tmp16 = AX_MEDIUM_RECEIVE_EN | AX_MEDIUM_TXFLOW_CTRLEN |
-		 AX_MEDIUM_RXFLOW_CTRLEN | AX_MEDIUM_ALWAYS_ONE |
-		 AX_MEDIUM_FULL_DUPLEX | AX_MEDIUM_GIGAMODE;
+		 AX_MEDIUM_RXFLOW_CTRLEN | AX_MEDIUM_FULL_DUPLEX |
+		 AX_MEDIUM_GIGAMODE;
 	ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_MEDIUM_STATUS_MODE,
 			  2, 2, tmp16);
 
@@ -1225,7 +1224,7 @@ static int ax88179_link_reset(struct usbnet *dev)
 	}
 
 	mode = AX_MEDIUM_RECEIVE_EN | AX_MEDIUM_TXFLOW_CTRLEN |
-	       AX_MEDIUM_RXFLOW_CTRLEN | AX_MEDIUM_ALWAYS_ONE;
+	       AX_MEDIUM_RXFLOW_CTRLEN;
 
 	ax88179_read_cmd(dev, AX_ACCESS_MAC, PHYSICAL_LINK_STATUS,
 			 1, 1, &link_sts);
@@ -1339,8 +1338,8 @@ static int ax88179_reset(struct usbnet *dev)
 
 	/* Configure default medium type => giga */
 	*tmp16 = AX_MEDIUM_RECEIVE_EN | AX_MEDIUM_TXFLOW_CTRLEN |
-		 AX_MEDIUM_RXFLOW_CTRLEN | AX_MEDIUM_ALWAYS_ONE |
-		 AX_MEDIUM_FULL_DUPLEX | AX_MEDIUM_GIGAMODE;
+		 AX_MEDIUM_RXFLOW_CTRLEN | AX_MEDIUM_FULL_DUPLEX |
+		 AX_MEDIUM_GIGAMODE;
 	ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_MEDIUM_STATUS_MODE,
 			  2, 2, tmp16);
 
-- 
cgit v1.2.3-18-g5258


From 598c45b309eb401510653fed45fe74efae93be4e Mon Sep 17 00:00:00 2001
From: Shahed Shaikh <shahed.shaikh@qlogic.com>
Date: Fri, 25 Oct 2013 10:38:36 -0400
Subject: qlcnic: Do not force adapter to perform LRO without destination IP
 check

Forcing adapter to perform LRO without destination IP check
degrades the performance.

Signed-off-by: Shahed Shaikh <shahed.shaikh@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c
index f8adc7b01f1..b64e2bef942 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c
@@ -785,8 +785,6 @@ void qlcnic_82xx_config_intr_coalesce(struct qlcnic_adapter *adapter)
 
 #define QLCNIC_ENABLE_IPV4_LRO		1
 #define QLCNIC_ENABLE_IPV6_LRO		2
-#define QLCNIC_NO_DEST_IPV4_CHECK	(1 << 8)
-#define QLCNIC_NO_DEST_IPV6_CHECK	(2 << 8)
 
 int qlcnic_82xx_config_hw_lro(struct qlcnic_adapter *adapter, int enable)
 {
@@ -806,11 +804,10 @@ int qlcnic_82xx_config_hw_lro(struct qlcnic_adapter *adapter, int enable)
 
 	word = 0;
 	if (enable) {
-		word = QLCNIC_ENABLE_IPV4_LRO | QLCNIC_NO_DEST_IPV4_CHECK;
+		word = QLCNIC_ENABLE_IPV4_LRO;
 		if (adapter->ahw->extra_capability[0] &
 		    QLCNIC_FW_CAP2_HW_LRO_IPV6)
-			word |= QLCNIC_ENABLE_IPV6_LRO |
-				QLCNIC_NO_DEST_IPV6_CHECK;
+			word |= QLCNIC_ENABLE_IPV6_LRO;
 	}
 
 	req.words[0] = cpu_to_le64(word);
-- 
cgit v1.2.3-18-g5258


From d6994ca798f5897a4342f727b21d77e01d92f093 Mon Sep 17 00:00:00 2001
From: Shahed Shaikh <shahed.shaikh@qlogic.com>
Date: Fri, 25 Oct 2013 10:38:37 -0400
Subject: qlcnic: Do not read QLCNIC_FW_CAPABILITY_MORE_CAPS bit for 83xx
 adapter

Only 82xx adapter advertises QLCNIC_FW_CAPABILITY_MORE_CAPS bit.
Reading this bit from 83xx adapter causes the driver to skip
extra capabilities registers.

Because of this, driver was not issuing qlcnic_fw_cmd_set_drv_version()
for 83xx adapter.

This bug was introduced in commit 8af3f33db05c6d0146ad14905145a5c923770856
 ("qlcnic: Add support for 'set driver version' in 83XX").

Signed-off-by: Shahed Shaikh <shahed.shaikh@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c | 6 +++---
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c    | 6 ++++--
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
index 3ca00e05f23..ace217c447d 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
@@ -2276,9 +2276,9 @@ int qlcnic_83xx_get_nic_info(struct qlcnic_adapter *adapter,
 		temp = (cmd.rsp.arg[8] & 0x7FFE0000) >> 17;
 		npar_info->max_linkspeed_reg_offset = temp;
 	}
-	if (npar_info->capabilities & QLCNIC_FW_CAPABILITY_MORE_CAPS)
-		memcpy(ahw->extra_capability, &cmd.rsp.arg[16],
-		       sizeof(ahw->extra_capability));
+
+	memcpy(ahw->extra_capability, &cmd.rsp.arg[16],
+	       sizeof(ahw->extra_capability));
 
 out:
 	qlcnic_free_mbx_args(&cmd);
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index 9e61eb86745..d8f4897e9e8 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -1131,7 +1131,10 @@ qlcnic_initialize_nic(struct qlcnic_adapter *adapter)
 		if (err == -EIO)
 			return err;
 		adapter->ahw->extra_capability[0] = temp;
+	} else {
+		adapter->ahw->extra_capability[0] = 0;
 	}
+
 	adapter->ahw->max_mac_filters = nic_info.max_mac_filters;
 	adapter->ahw->max_mtu = nic_info.max_mtu;
 
@@ -2159,8 +2162,7 @@ void qlcnic_set_drv_version(struct qlcnic_adapter *adapter)
 	else if (qlcnic_83xx_check(adapter))
 		fw_cmd = QLCNIC_CMD_83XX_SET_DRV_VER;
 
-	if ((ahw->capabilities & QLCNIC_FW_CAPABILITY_MORE_CAPS) &&
-	    (ahw->extra_capability[0] & QLCNIC_FW_CAPABILITY_SET_DRV_VER))
+	if (ahw->extra_capability[0] & QLCNIC_FW_CAPABILITY_SET_DRV_VER)
 		qlcnic_fw_cmd_set_drv_version(adapter, fw_cmd);
 }
 
-- 
cgit v1.2.3-18-g5258


From e9e2a904ef0a4f46ee5c845f3ae04e62b917bb6d Mon Sep 17 00:00:00 2001
From: Somnath Kotur <somnath.kotur@emulex.com>
Date: Thu, 24 Oct 2013 14:37:53 +0530
Subject: be2net: Warn users of possible broken functionality on BE2 cards with
 very old FW versions with latest driver

On very old FW versions < 4.0, the mailbox command to set interrupts
on the card succeeds even though it is not supported and should have
failed, leading to a scenario where interrupts do not work.
Hence warn users to upgrade to a suitable FW version to avoid seeing
broken functionality.

Signed-off-by: Somnath Kotur <somnath.kotur@emulex.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/emulex/benet/be.h      | 9 +++++++++
 drivers/net/ethernet/emulex/benet/be_main.c | 6 ++++++
 2 files changed, 15 insertions(+)

diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h
index db020230bd0..c99dac6a9dd 100644
--- a/drivers/net/ethernet/emulex/benet/be.h
+++ b/drivers/net/ethernet/emulex/benet/be.h
@@ -696,6 +696,15 @@ static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
 	return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
 }
 
+static inline int fw_major_num(const char *fw_ver)
+{
+	int fw_major = 0;
+
+	sscanf(fw_ver, "%d.", &fw_major);
+
+	return fw_major;
+}
+
 extern void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm,
 		u16 num_popped);
 extern void be_link_status_update(struct be_adapter *adapter, u8 link_status);
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 2c38cc40211..53ed58b492c 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -3247,6 +3247,12 @@ static int be_setup(struct be_adapter *adapter)
 
 	be_cmd_get_fw_ver(adapter, adapter->fw_ver, adapter->fw_on_flash);
 
+	if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
+		dev_err(dev, "Firmware on card is old(%s), IRQs may not work.",
+			adapter->fw_ver);
+		dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
+	}
+
 	if (adapter->vlans_added)
 		be_vid_config(adapter);
 
-- 
cgit v1.2.3-18-g5258


From bc15afa39ecc16f01c3389d15d8f6015a427fe85 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Thu, 24 Oct 2013 08:44:25 -0700
Subject: tcp: fix SYNACK RTT estimation in Fast Open

tp->lsndtime may not always be the SYNACK timestamp if a passive
Fast Open socket sends data before handshake completes. And if the
remote acknowledges both the data and the SYNACK, the RTT sample
is already taken in tcp_ack(), so no need to call
tcp_update_ack_rtt() in tcp_synack_rtt_meas() aagain.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a16b01b537b..305cd0526a7 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2871,14 +2871,19 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
 }
 
 /* Compute time elapsed between (last) SYNACK and the ACK completing 3WHS. */
-static void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req)
+static void tcp_synack_rtt_meas(struct sock *sk, const u32 synack_stamp)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	s32 seq_rtt = -1;
 
-	if (tp->lsndtime && !tp->total_retrans)
-		seq_rtt = tcp_time_stamp - tp->lsndtime;
-	tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt, -1);
+	if (synack_stamp && !tp->total_retrans)
+		seq_rtt = tcp_time_stamp - synack_stamp;
+
+	/* If the ACK acks both the SYNACK and the (Fast Open'd) data packets
+	 * sent in SYN_RECV, SYNACK RTT is the smooth RTT computed in tcp_ack()
+	 */
+	if (!tp->srtt)
+		tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt, -1);
 }
 
 static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
@@ -5587,6 +5592,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 	struct request_sock *req;
 	int queued = 0;
 	bool acceptable;
+	u32 synack_stamp;
 
 	tp->rx_opt.saw_tstamp = 0;
 
@@ -5669,9 +5675,11 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 		 * so release it.
 		 */
 		if (req) {
+			synack_stamp = tcp_rsk(req)->snt_synack;
 			tp->total_retrans = req->num_retrans;
 			reqsk_fastopen_remove(sk, req, false);
 		} else {
+			synack_stamp = tp->lsndtime;
 			/* Make sure socket is routed, for correct metrics. */
 			icsk->icsk_af_ops->rebuild_header(sk);
 			tcp_init_congestion_control(sk);
@@ -5694,7 +5702,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 		tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
 		tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale;
 		tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
-		tcp_synack_rtt_meas(sk, req);
+		tcp_synack_rtt_meas(sk, synack_stamp);
 
 		if (tp->rx_opt.tstamp_ok)
 			tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
-- 
cgit v1.2.3-18-g5258


From 2909d874f34eae157aecab0af27c6dc4a1751f8f Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Thu, 24 Oct 2013 08:55:25 -0700
Subject: tcp: only take RTT from timestamps if new data is acked

Patch ed08495c3 "tcp: use RTT from SACK for RTO" has a bug that
it does not check if the ACK acknowledge new data before taking
the RTT sample from TCP timestamps. This patch adds the check
back as required by the RFC.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 305cd0526a7..6ffe41a82c0 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2856,7 +2856,8 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
 	 * left edge of the send window.
 	 * See draft-ietf-tcplw-high-performance-00, section 3.3.
 	 */
-	if (seq_rtt < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
+	if (seq_rtt < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
+	    flag & FLAG_ACKED)
 		seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
 
 	if (seq_rtt < 0)
-- 
cgit v1.2.3-18-g5258


From 2f715c1dde6e1760f3101358dc26f8c9489be0bf Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Thu, 24 Oct 2013 08:59:27 -0700
Subject: tcp: do not rearm RTO when future data are sacked

Patch ed08495c3 "tcp: use RTT from SACK for RTO" always re-arms RTO upon
obtaining a RTT sample from newly sacked data.

But technically RTO should only be re-armed when the data sent before
the last (re)transmission of write queue head are (s)acked. Otherwise
the RTO may continue to extend during loss recovery on data sent
in the future.

Note that RTTs from ACK or timestamps do not have this problem, as the RTT
source must be from data sent before.

The new RTO re-arm policy is
1) Always re-arm RTO if SND.UNA is advanced
2) Re-arm RTO if sack RTT is available, provided the sacked data was
   sent before the last time write_queue_head was sent.

Signed-off-by: Larry Brakmo <brakmo@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 6ffe41a82c0..068c8fb0d15 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2987,6 +2987,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 	s32 seq_rtt = -1;
 	s32 ca_seq_rtt = -1;
 	ktime_t last_ackt = net_invalid_timestamp();
+	bool rtt_update;
 
 	while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
 		struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
@@ -3063,14 +3064,13 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 	if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
 		flag |= FLAG_SACK_RENEGING;
 
-	if (tcp_ack_update_rtt(sk, flag, seq_rtt, sack_rtt) ||
-	    (flag & FLAG_ACKED))
-		tcp_rearm_rto(sk);
+	rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt, sack_rtt);
 
 	if (flag & FLAG_ACKED) {
 		const struct tcp_congestion_ops *ca_ops
 			= inet_csk(sk)->icsk_ca_ops;
 
+		tcp_rearm_rto(sk);
 		if (unlikely(icsk->icsk_mtup.probe_size &&
 			     !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) {
 			tcp_mtup_probe_success(sk);
@@ -3109,6 +3109,13 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 
 			ca_ops->pkts_acked(sk, pkts_acked, rtt_us);
 		}
+	} else if (skb && rtt_update && sack_rtt >= 0 &&
+		   sack_rtt > (s32)(now - TCP_SKB_CB(skb)->when)) {
+		/* Do not re-arm RTO if the sack RTT is measured from data sent
+		 * after when the head was last (re)transmitted. Otherwise the
+		 * timeout may continue to extend in loss recovery.
+		 */
+		tcp_rearm_rto(sk);
 	}
 
 #if FASTRETRANS_DEBUG > 0
-- 
cgit v1.2.3-18-g5258


From ad86de802d0ea6776eccd0a2526ba31101d89267 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Thu, 24 Oct 2013 18:56:57 -0700
Subject: Documentation/networking: netdev-FAQ typo corrections

Various typo fixes to netdev-FAQ.txt:
- capitalize Linux
- hyphenate dual-word adjectives
- minor punctuation fixes

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Acked-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/netdev-FAQ.txt | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/Documentation/networking/netdev-FAQ.txt b/Documentation/networking/netdev-FAQ.txt
index d9112f01c44..3a2c5860a78 100644
--- a/Documentation/networking/netdev-FAQ.txt
+++ b/Documentation/networking/netdev-FAQ.txt
@@ -4,23 +4,23 @@ Information you need to know about netdev
 
 Q: What is netdev?
 
-A: It is a mailing list for all network related linux stuff.  This includes
+A: It is a mailing list for all network-related Linux stuff.  This includes
    anything found under net/  (i.e. core code like IPv6) and drivers/net
-   (i.e. hardware specific drivers) in the linux source tree.
+   (i.e. hardware specific drivers) in the Linux source tree.
 
    Note that some subsystems (e.g. wireless drivers) which have a high volume
    of traffic have their own specific mailing lists.
 
-   The netdev list is managed (like many other linux mailing lists) through
+   The netdev list is managed (like many other Linux mailing lists) through
    VGER ( http://vger.kernel.org/ ) and archives can be found below:
 
 	http://marc.info/?l=linux-netdev
 	http://www.spinics.net/lists/netdev/
 
-   Aside from subsystems like that mentioned above, all network related linux
-   development (i.e. RFC, review, comments, etc) takes place on netdev.
+   Aside from subsystems like that mentioned above, all network-related Linux
+   development (i.e. RFC, review, comments, etc.) takes place on netdev.
 
-Q: How do the changes posted to netdev make their way into linux?
+Q: How do the changes posted to netdev make their way into Linux?
 
 A: There are always two trees (git repositories) in play.  Both are driven
    by David Miller, the main network maintainer.  There is the "net" tree,
@@ -35,7 +35,7 @@ A: There are always two trees (git repositories) in play.  Both are driven
 Q: How often do changes from these trees make it to the mainline Linus tree?
 
 A: To understand this, you need to know a bit of background information
-   on the cadence of linux development.  Each new release starts off with
+   on the cadence of Linux development.  Each new release starts off with
    a two week "merge window" where the main maintainers feed their new
    stuff to Linus for merging into the mainline tree.  After the two weeks,
    the merge window is closed, and it is called/tagged "-rc1".  No new
@@ -46,7 +46,7 @@ A: To understand this, you need to know a bit of background information
    things are in a state of churn), and a week after the last vX.Y-rcN
    was done, the official "vX.Y" is released.
 
-   Relating that to netdev:  At the beginning of the 2 week merge window,
+   Relating that to netdev:  At the beginning of the 2-week merge window,
    the net-next tree will be closed - no new changes/features.  The
    accumulated new content of the past ~10 weeks will be passed onto
    mainline/Linus via a pull request for vX.Y -- at the same time,
@@ -59,12 +59,12 @@ A: To understand this, you need to know a bit of background information
    IMPORTANT:  Do not send new net-next content to netdev during the
    period during which net-next tree is closed.
 
-   Shortly after the two weeks have passed, (and vX.Y-rc1 is released) the
+   Shortly after the two weeks have passed (and vX.Y-rc1 is released), the
    tree for net-next reopens to collect content for the next (vX.Y+1) release.
 
    If you aren't subscribed to netdev and/or are simply unsure if net-next
    has re-opened yet, simply check the net-next git repository link above for
-   any new networking related commits.
+   any new networking-related commits.
 
    The "net" tree continues to collect fixes for the vX.Y content, and
    is fed back to Linus at regular (~weekly) intervals.  Meaning that the
@@ -217,7 +217,7 @@ A: Attention to detail.  Re-read your own work as if you were the
    to why it happens, and then if necessary, explain why the fix proposed
    is the best way to get things done.   Don't mangle whitespace, and as
    is common, don't mis-indent function arguments that span multiple lines.
-   If it is your 1st patch, mail it to yourself so you can test apply
+   If it is your first patch, mail it to yourself so you can test apply
    it to an unpatched tree to confirm infrastructure didn't mangle it.
 
    Finally, go back and read Documentation/SubmittingPatches to be
-- 
cgit v1.2.3-18-g5258


From fc59d5bdf1e3dca0336d155e55d812286db075ad Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 27 Oct 2013 16:26:43 -0700
Subject: pkt_sched: fq: clear time_next_packet for reused flows

When a socket is freed/reallocated, we need to clear time_next_packet
or else we can inherit a prior value and delay first packets of the
new flow.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_fq.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index a9dfdda9ed1..fdc041c5785 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -255,6 +255,7 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
 				     f->socket_hash != sk->sk_hash)) {
 				f->credit = q->initial_quantum;
 				f->socket_hash = sk->sk_hash;
+				f->time_next_packet = 0ULL;
 			}
 			return f;
 		}
-- 
cgit v1.2.3-18-g5258


From eeb1b73378b560e00ff1da2ef09fed9254f4e128 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Fri, 25 Oct 2013 10:21:32 +0200
Subject: xfrm: Increase the garbage collector threshold

With the removal of the routing cache, we lost the
option to tweak the garbage collector threshold
along with the maximum routing cache size. So git
commit 703fb94ec ("xfrm: Fix the gc threshold value
for ipv4") moved back to a static threshold.

It turned out that the current threshold before we
start garbage collecting is much to small for some
workloads, so increase it from 1024 to 32768. This
means that we start the garbage collector if we have
more than 32768 dst entries in the system and refuse
new allocations if we are above 65536.

Reported-by: Wolfgang Walter <linux@stwm.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv4/xfrm4_policy.c | 2 +-
 net/ipv6/xfrm6_policy.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index ccde54248c8..4764ee48447 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -236,7 +236,7 @@ static struct dst_ops xfrm4_dst_ops = {
 	.destroy =		xfrm4_dst_destroy,
 	.ifdown =		xfrm4_dst_ifdown,
 	.local_out =		__ip_local_out,
-	.gc_thresh =		1024,
+	.gc_thresh =		32768,
 };
 
 static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 08ed2772b7a..dd503a3535f 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -285,7 +285,7 @@ static struct dst_ops xfrm6_dst_ops = {
 	.destroy =		xfrm6_dst_destroy,
 	.ifdown =		xfrm6_dst_ifdown,
 	.local_out =		__ip6_local_out,
-	.gc_thresh =		1024,
+	.gc_thresh =		32768,
 };
 
 static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
-- 
cgit v1.2.3-18-g5258


From 0d08c42cf9a71530fef5ebcfe368f38f2dd0476f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 25 Oct 2013 17:26:17 -0700
Subject: tcp: gso: fix truesize tracking

commit 6ff50cd55545 ("tcp: gso: do not generate out of order packets")
had an heuristic that can trigger a warning in skb_try_coalesce(),
because skb->truesize of the gso segments were exactly set to mss.

This breaks the requirement that

skb->truesize >= skb->len + truesizeof(struct sk_buff);

It can trivially be reproduced by :

ifconfig lo mtu 1500
ethtool -K lo tso off
netperf

As the skbs are looped into the TCP networking stack, skb_try_coalesce()
warns us of these skb under-estimating their truesize.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_offload.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 3a7525e6c08..533c58a5cfb 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -18,6 +18,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
 				netdev_features_t features)
 {
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
+	unsigned int sum_truesize = 0;
 	struct tcphdr *th;
 	unsigned int thlen;
 	unsigned int seq;
@@ -102,13 +103,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
 		if (copy_destructor) {
 			skb->destructor = gso_skb->destructor;
 			skb->sk = gso_skb->sk;
-			/* {tcp|sock}_wfree() use exact truesize accounting :
-			 * sum(skb->truesize) MUST be exactly be gso_skb->truesize
-			 * So we account mss bytes of 'true size' for each segment.
-			 * The last segment will contain the remaining.
-			 */
-			skb->truesize = mss;
-			gso_skb->truesize -= mss;
+			sum_truesize += skb->truesize;
 		}
 		skb = skb->next;
 		th = tcp_hdr(skb);
@@ -125,7 +120,9 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
 	if (copy_destructor) {
 		swap(gso_skb->sk, skb->sk);
 		swap(gso_skb->destructor, skb->destructor);
-		swap(gso_skb->truesize, skb->truesize);
+		sum_truesize += skb->truesize;
+		atomic_add(sum_truesize - gso_skb->truesize,
+			   &skb->sk->sk_wmem_alloc);
 	}
 
 	delta = htonl(oldlen + (skb_tail_pointer(skb) -
-- 
cgit v1.2.3-18-g5258


From e3ed4eaef4932fd3867465784d11a36deaa6d22c Mon Sep 17 00:00:00 2001
From: Dmitry Kravkov <dmitry@broadcom.com>
Date: Sun, 27 Oct 2013 13:07:00 +0200
Subject: bnx2x: prevent FW assert on low mem during unload

Buffers for FW statistics were allocated at an inappropriate time; In a machine
where the driver encounters problems allocating all of its queues, the driver
would still create FW requests for the statistics of the non-existing queues.
The wrong order of memory allocation could lead to zeroed statistics messages
being sent, leading to fw assert in case function 0 was down.

This changes the order of allocations, guaranteeing that statistic requests will
only be generated for actual queues.

Signed-off-by: Dmitry Kravkov <dmitry@broadcom.com>
Signed-off-by: Yuval Mintz <yuvalmin@broadcom.com>
Signed-off-by: Ariel Elior <ariele@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 4ab4c89c60c..74d6486fccf 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -2545,10 +2545,6 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
 		}
 	}
 
-	/* Allocated memory for FW statistics  */
-	if (bnx2x_alloc_fw_stats_mem(bp))
-		LOAD_ERROR_EXIT(bp, load_error0);
-
 	/* need to be done after alloc mem, since it's self adjusting to amount
 	 * of memory available for RSS queues
 	 */
@@ -2558,6 +2554,10 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
 		LOAD_ERROR_EXIT(bp, load_error0);
 	}
 
+	/* Allocated memory for FW statistics  */
+	if (bnx2x_alloc_fw_stats_mem(bp))
+		LOAD_ERROR_EXIT(bp, load_error0);
+
 	/* request pf to initialize status blocks */
 	if (IS_VF(bp)) {
 		rc = bnx2x_vfpf_init(bp);
@@ -2812,8 +2812,8 @@ load_error1:
 	if (IS_PF(bp))
 		bnx2x_clear_pf_load(bp);
 load_error0:
-	bnx2x_free_fp_mem(bp);
 	bnx2x_free_fw_stats_mem(bp);
+	bnx2x_free_fp_mem(bp);
 	bnx2x_free_mem(bp);
 
 	return rc;
-- 
cgit v1.2.3-18-g5258


From 826cb7b43b5bd8995f84edeacbbf569946a58f7c Mon Sep 17 00:00:00 2001
From: Ariel Elior <ariele@broadcom.com>
Date: Sun, 27 Oct 2013 13:07:01 +0200
Subject: bnx2x: Disable VF access on PF removal

When the bnx2x driver is rmmoded, if VFs of a given PF will be assigned
to a VM then that PF will be unable to call `pci_disable_sriov()'.

If for that same PF there would also exist unassigned VFs in the hypervisor,
the result will be that after the removal there will still be virtual PCI
functions on the hypervisor.
If the bnx2x module were to be re-inserted, the result will be that the VFs
on the hypervisor will be re-probed directly following the PF's probe, even
though that in regular loading flow sriov is only enabled once PF is loaded.
The probed VF will then try to access its bar, causing a PCI error as the HW
is not in a state enabling such a request.

This patch adds a missing disablement procedure to the PF's removal, one that
sets registers viewable to the VF to indicate that the VFs have no permission
to access the bar, thus resulting in probe errors instead of PCI errors.

Signed-off-by: Ariel Elior <ariele@broadcom.com>
Signed-off-by: Yuval Mintz <yuvalmin@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index bf08ad68b40..5e07efb6ec1 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -2018,6 +2018,8 @@ failed:
 
 void bnx2x_iov_remove_one(struct bnx2x *bp)
 {
+	int vf_idx;
+
 	/* if SRIOV is not enabled there's nothing to do */
 	if (!IS_SRIOV(bp))
 		return;
@@ -2026,6 +2028,18 @@ void bnx2x_iov_remove_one(struct bnx2x *bp)
 	pci_disable_sriov(bp->pdev);
 	DP(BNX2X_MSG_IOV, "sriov disabled\n");
 
+	/* disable access to all VFs */
+	for (vf_idx = 0; vf_idx < bp->vfdb->sriov.total; vf_idx++) {
+		bnx2x_pretend_func(bp,
+				   HW_VF_HANDLE(bp,
+						bp->vfdb->sriov.first_vf_in_pf +
+						vf_idx));
+		DP(BNX2X_MSG_IOV, "disabling internal access for vf %d\n",
+		   bp->vfdb->sriov.first_vf_in_pf + vf_idx);
+		bnx2x_vf_enable_internal(bp, 0);
+		bnx2x_pretend_func(bp, BP_ABS_FUNC(bp));
+	}
+
 	/* free vf database */
 	__bnx2x_iov_free_vfdb(bp);
 }
@@ -3197,7 +3211,7 @@ int bnx2x_enable_sriov(struct bnx2x *bp)
 	 * the "acquire" messages to appear on the VF PF channel.
 	 */
 	DP(BNX2X_MSG_IOV, "about to call enable sriov\n");
-	pci_disable_sriov(bp->pdev);
+	bnx2x_disable_sriov(bp);
 	rc = pci_enable_sriov(bp->pdev, req_vfs);
 	if (rc) {
 		BNX2X_ERR("pci_enable_sriov failed with %d\n", rc);
-- 
cgit v1.2.3-18-g5258


From 262e827fe745642589450ae241b7afd3912c3f25 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Sun, 27 Oct 2013 21:02:39 +0000
Subject: cxgb3: Fix length calculation in write_ofld_wr() on 32-bit
 architectures

The length calculation here is now invalid on 32-bit architectures,
since sk_buff::tail is a pointer and sk_buff::transport_header is
an integer offset:

drivers/net/ethernet/chelsio/cxgb3/sge.c: In function 'write_ofld_wr':
drivers/net/ethernet/chelsio/cxgb3/sge.c:1603:9: warning: passing argument 4 of 'make_sgl' makes integer from pointer without a cast [enabled by default]
         adap->pdev);
         ^
drivers/net/ethernet/chelsio/cxgb3/sge.c:964:28: note: expected 'unsigned int' but argument is of type 'sk_buff_data_t'
 static inline unsigned int make_sgl(const struct sk_buff *skb,
                            ^

Use the appropriate skb accessor functions.

Compile-tested only.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Fixes: 1a37e412a022 ('net: Use 16bits for *_headers fields of struct skbuff')
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb3/sge.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb3/sge.c b/drivers/net/ethernet/chelsio/cxgb3/sge.c
index 9c89dc8fe10..632b318eb38 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/sge.c
@@ -1599,7 +1599,8 @@ static void write_ofld_wr(struct adapter *adap, struct sk_buff *skb,
 	flits = skb_transport_offset(skb) / 8;
 	sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
 	sgl_flits = make_sgl(skb, sgp, skb_transport_header(skb),
-			     skb->tail - skb->transport_header,
+			     skb_tail_pointer(skb) -
+			     skb_transport_header(skb),
 			     adap->pdev);
 	if (need_skb_unmap()) {
 		setup_deferred_unmapping(skb, adap->pdev, sgp, sgl_flits);
-- 
cgit v1.2.3-18-g5258


From 059dfa6a93b779516321e5112db9d7621b1367ba Mon Sep 17 00:00:00 2001
From: Wei Liu <wei.liu2@citrix.com>
Date: Mon, 28 Oct 2013 12:07:57 +0000
Subject: xen-netback: use jiffies_64 value to calculate credit timeout

time_after_eq() only works if the delta is < MAX_ULONG/2.

For a 32bit Dom0, if netfront sends packets at a very low rate, the time
between subsequent calls to tx_credit_exceeded() may exceed MAX_ULONG/2
and the test for timer_after_eq() will be incorrect. Credit will not be
replenished and the guest may become unable to send packets (e.g., if
prior to the long gap, all credit was exhausted).

Use jiffies_64 variant to mitigate this problem for 32bit Dom0.

Suggested-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Wei Liu <wei.liu2@citrix.com>
Reviewed-by: David Vrabel <david.vrabel@citrix.com>
Cc: Ian Campbell <ian.campbell@citrix.com>
Cc: Jason Luan <jianhai.luan@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/xen-netback/common.h    |  1 +
 drivers/net/xen-netback/interface.c |  3 +--
 drivers/net/xen-netback/netback.c   | 10 +++++-----
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 5715318d6ba..400fea1de08 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -163,6 +163,7 @@ struct xenvif {
 	unsigned long   credit_usec;
 	unsigned long   remaining_credit;
 	struct timer_list credit_timeout;
+	u64 credit_window_start;
 
 	/* Statistics */
 	unsigned long rx_gso_checksum_fixup;
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 01bb854c7f6..459935a6bfa 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -312,8 +312,7 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
 	vif->credit_bytes = vif->remaining_credit = ~0UL;
 	vif->credit_usec  = 0UL;
 	init_timer(&vif->credit_timeout);
-	/* Initialize 'expires' now: it's used to track the credit window. */
-	vif->credit_timeout.expires = jiffies;
+	vif->credit_window_start = get_jiffies_64();
 
 	dev->netdev_ops	= &xenvif_netdev_ops;
 	dev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index f3e591c611d..900da4b243a 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -1185,9 +1185,8 @@ out:
 
 static bool tx_credit_exceeded(struct xenvif *vif, unsigned size)
 {
-	unsigned long now = jiffies;
-	unsigned long next_credit =
-		vif->credit_timeout.expires +
+	u64 now = get_jiffies_64();
+	u64 next_credit = vif->credit_window_start +
 		msecs_to_jiffies(vif->credit_usec / 1000);
 
 	/* Timer could already be pending in rare cases. */
@@ -1195,8 +1194,8 @@ static bool tx_credit_exceeded(struct xenvif *vif, unsigned size)
 		return true;
 
 	/* Passed the point where we can replenish credit? */
-	if (time_after_eq(now, next_credit)) {
-		vif->credit_timeout.expires = now;
+	if (time_after_eq64(now, next_credit)) {
+		vif->credit_window_start = now;
 		tx_add_credit(vif);
 	}
 
@@ -1208,6 +1207,7 @@ static bool tx_credit_exceeded(struct xenvif *vif, unsigned size)
 			tx_credit_callback;
 		mod_timer(&vif->credit_timeout,
 			  next_credit);
+		vif->credit_window_start = next_credit;
 
 		return true;
 	}
-- 
cgit v1.2.3-18-g5258


From d954777324ffcba0b2f8119c102237426c654eeb Mon Sep 17 00:00:00 2001
From: Holger Eitzenberger <holger@eitzenberger.org>
Date: Mon, 28 Oct 2013 14:42:33 +0100
Subject: netfilter: xt_NFQUEUE: fix --queue-bypass regression

V3 of the NFQUEUE target ignores the --queue-bypass flag,
causing packets to be dropped when the userspace listener
isn't running.

Regression is in since 8746ddcf12bb26 ("netfilter: xt_NFQUEUE:
introduce CPU fanout").

Reported-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Holger Eitzenberger <holger@eitzenberger.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/xt_NFQUEUE.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index 1e2fae32f81..ed00fef5899 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -147,6 +147,7 @@ nfqueue_tg_v3(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_NFQ_info_v3 *info = par->targinfo;
 	u32 queue = info->queuenum;
+	int ret;
 
 	if (info->queues_total > 1) {
 		if (info->flags & NFQ_FLAG_CPU_FANOUT) {
@@ -157,7 +158,11 @@ nfqueue_tg_v3(struct sk_buff *skb, const struct xt_action_param *par)
 			queue = nfqueue_hash(skb, par);
 	}
 
-	return NF_QUEUE_NR(queue);
+	ret = NF_QUEUE_NR(queue);
+	if (info->flags & NFQ_FLAG_BYPASS)
+		ret |= NF_VERDICT_FLAG_QUEUE_BYPASS;
+
+	return ret;
 }
 
 static struct xt_target nfqueue_tg_reg[] __read_mostly = {
-- 
cgit v1.2.3-18-g5258


From 706e282b6975305285980ddd903bcd246d2e4fcd Mon Sep 17 00:00:00 2001
From: Michael Drüing <michael@drueing.de>
Date: Mon, 28 Oct 2013 18:33:12 +0100
Subject: net: x25: Fix dead URLs in Kconfig
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Update the URLs in the Kconfig file to the new pages at sangoma.com and cisco.com

Signed-off-by: Michael Drüing <michael@drueing.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/x25/Kconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/x25/Kconfig b/net/x25/Kconfig
index c959312c45e..e2fa133f9fb 100644
--- a/net/x25/Kconfig
+++ b/net/x25/Kconfig
@@ -16,8 +16,8 @@ config X25
 	  if you want that) and the lower level data link layer protocol LAPB
 	  (say Y to "LAPB Data Link Driver" below if you want that).
 
-	  You can read more about X.25 at <http://www.sangoma.com/x25.htm> and
-	  <http://www.cisco.com/univercd/cc/td/doc/product/software/ios11/cbook/cx25.htm>.
+	  You can read more about X.25 at <http://www.sangoma.com/tutorials/x25/> and
+	  <http://docwiki.cisco.com/wiki/X.25>.
 	  Information about X.25 for Linux is contained in the files
 	  <file:Documentation/networking/x25.txt> and
 	  <file:Documentation/networking/x25-iface.txt>.
-- 
cgit v1.2.3-18-g5258


From 06499098a02b9ed906a7b6060f2c60fb813918d4 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevic@redhat.com>
Date: Mon, 28 Oct 2013 15:45:07 -0400
Subject: bridge: pass correct vlan id to multicast code

Currently multicast code attempts to extrace the vlan id from
the skb even when vlan filtering is disabled.  This can lead
to mdb entries being created with the wrong vlan id.
Pass the already extracted vlan id to the multicast
filtering code to make the correct id is used in
creation as well as lookup.

Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
Acked-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_device.c    |  2 +-
 net/bridge/br_input.c     |  2 +-
 net/bridge/br_multicast.c | 44 +++++++++++++++++++-------------------------
 net/bridge/br_private.h   |  6 ++++--
 4 files changed, 25 insertions(+), 29 deletions(-)

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index ca04163635d..e6b7fecb3af 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -64,7 +64,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 			br_flood_deliver(br, skb, false);
 			goto out;
 		}
-		if (br_multicast_rcv(br, NULL, skb)) {
+		if (br_multicast_rcv(br, NULL, skb, vid)) {
 			kfree_skb(skb);
 			goto out;
 		}
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index a2fd37ec35f..7e73c32e205 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -80,7 +80,7 @@ int br_handle_frame_finish(struct sk_buff *skb)
 		br_fdb_update(br, p, eth_hdr(skb)->h_source, vid);
 
 	if (!is_broadcast_ether_addr(dest) && is_multicast_ether_addr(dest) &&
-	    br_multicast_rcv(br, p, skb))
+	    br_multicast_rcv(br, p, skb, vid))
 		goto drop;
 
 	if (p->state == BR_STATE_LEARNING)
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 8b0b610ca2c..686284ff3d6 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -947,7 +947,8 @@ void br_multicast_disable_port(struct net_bridge_port *port)
 
 static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
 					 struct net_bridge_port *port,
-					 struct sk_buff *skb)
+					 struct sk_buff *skb,
+					 u16 vid)
 {
 	struct igmpv3_report *ih;
 	struct igmpv3_grec *grec;
@@ -957,12 +958,10 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
 	int type;
 	int err = 0;
 	__be32 group;
-	u16 vid = 0;
 
 	if (!pskb_may_pull(skb, sizeof(*ih)))
 		return -EINVAL;
 
-	br_vlan_get_tag(skb, &vid);
 	ih = igmpv3_report_hdr(skb);
 	num = ntohs(ih->ngrec);
 	len = sizeof(*ih);
@@ -1005,7 +1004,8 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
 #if IS_ENABLED(CONFIG_IPV6)
 static int br_ip6_multicast_mld2_report(struct net_bridge *br,
 					struct net_bridge_port *port,
-					struct sk_buff *skb)
+					struct sk_buff *skb,
+					u16 vid)
 {
 	struct icmp6hdr *icmp6h;
 	struct mld2_grec *grec;
@@ -1013,12 +1013,10 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
 	int len;
 	int num;
 	int err = 0;
-	u16 vid = 0;
 
 	if (!pskb_may_pull(skb, sizeof(*icmp6h)))
 		return -EINVAL;
 
-	br_vlan_get_tag(skb, &vid);
 	icmp6h = icmp6_hdr(skb);
 	num = ntohs(icmp6h->icmp6_dataun.un_data16[1]);
 	len = sizeof(*icmp6h);
@@ -1141,7 +1139,8 @@ static void br_multicast_query_received(struct net_bridge *br,
 
 static int br_ip4_multicast_query(struct net_bridge *br,
 				  struct net_bridge_port *port,
-				  struct sk_buff *skb)
+				  struct sk_buff *skb,
+				  u16 vid)
 {
 	const struct iphdr *iph = ip_hdr(skb);
 	struct igmphdr *ih = igmp_hdr(skb);
@@ -1153,7 +1152,6 @@ static int br_ip4_multicast_query(struct net_bridge *br,
 	unsigned long now = jiffies;
 	__be32 group;
 	int err = 0;
-	u16 vid = 0;
 
 	spin_lock(&br->multicast_lock);
 	if (!netif_running(br->dev) ||
@@ -1189,7 +1187,6 @@ static int br_ip4_multicast_query(struct net_bridge *br,
 	if (!group)
 		goto out;
 
-	br_vlan_get_tag(skb, &vid);
 	mp = br_mdb_ip4_get(mlock_dereference(br->mdb, br), group, vid);
 	if (!mp)
 		goto out;
@@ -1219,7 +1216,8 @@ out:
 #if IS_ENABLED(CONFIG_IPV6)
 static int br_ip6_multicast_query(struct net_bridge *br,
 				  struct net_bridge_port *port,
-				  struct sk_buff *skb)
+				  struct sk_buff *skb,
+				  u16 vid)
 {
 	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
 	struct mld_msg *mld;
@@ -1231,7 +1229,6 @@ static int br_ip6_multicast_query(struct net_bridge *br,
 	unsigned long now = jiffies;
 	const struct in6_addr *group = NULL;
 	int err = 0;
-	u16 vid = 0;
 
 	spin_lock(&br->multicast_lock);
 	if (!netif_running(br->dev) ||
@@ -1265,7 +1262,6 @@ static int br_ip6_multicast_query(struct net_bridge *br,
 	if (!group)
 		goto out;
 
-	br_vlan_get_tag(skb, &vid);
 	mp = br_mdb_ip6_get(mlock_dereference(br->mdb, br), group, vid);
 	if (!mp)
 		goto out;
@@ -1439,7 +1435,8 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br,
 
 static int br_multicast_ipv4_rcv(struct net_bridge *br,
 				 struct net_bridge_port *port,
-				 struct sk_buff *skb)
+				 struct sk_buff *skb,
+				 u16 vid)
 {
 	struct sk_buff *skb2 = skb;
 	const struct iphdr *iph;
@@ -1447,7 +1444,6 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
 	unsigned int len;
 	unsigned int offset;
 	int err;
-	u16 vid = 0;
 
 	/* We treat OOM as packet loss for now. */
 	if (!pskb_may_pull(skb, sizeof(*iph)))
@@ -1508,7 +1504,6 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
 
 	err = 0;
 
-	br_vlan_get_tag(skb2, &vid);
 	BR_INPUT_SKB_CB(skb)->igmp = 1;
 	ih = igmp_hdr(skb2);
 
@@ -1519,10 +1514,10 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
 		err = br_ip4_multicast_add_group(br, port, ih->group, vid);
 		break;
 	case IGMPV3_HOST_MEMBERSHIP_REPORT:
-		err = br_ip4_multicast_igmp3_report(br, port, skb2);
+		err = br_ip4_multicast_igmp3_report(br, port, skb2, vid);
 		break;
 	case IGMP_HOST_MEMBERSHIP_QUERY:
-		err = br_ip4_multicast_query(br, port, skb2);
+		err = br_ip4_multicast_query(br, port, skb2, vid);
 		break;
 	case IGMP_HOST_LEAVE_MESSAGE:
 		br_ip4_multicast_leave_group(br, port, ih->group, vid);
@@ -1540,7 +1535,8 @@ err_out:
 #if IS_ENABLED(CONFIG_IPV6)
 static int br_multicast_ipv6_rcv(struct net_bridge *br,
 				 struct net_bridge_port *port,
-				 struct sk_buff *skb)
+				 struct sk_buff *skb,
+				 u16 vid)
 {
 	struct sk_buff *skb2;
 	const struct ipv6hdr *ip6h;
@@ -1550,7 +1546,6 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
 	unsigned int len;
 	int offset;
 	int err;
-	u16 vid = 0;
 
 	if (!pskb_may_pull(skb, sizeof(*ip6h)))
 		return -EINVAL;
@@ -1640,7 +1635,6 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
 
 	err = 0;
 
-	br_vlan_get_tag(skb, &vid);
 	BR_INPUT_SKB_CB(skb)->igmp = 1;
 
 	switch (icmp6_type) {
@@ -1657,10 +1651,10 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
 		break;
 	    }
 	case ICMPV6_MLD2_REPORT:
-		err = br_ip6_multicast_mld2_report(br, port, skb2);
+		err = br_ip6_multicast_mld2_report(br, port, skb2, vid);
 		break;
 	case ICMPV6_MGM_QUERY:
-		err = br_ip6_multicast_query(br, port, skb2);
+		err = br_ip6_multicast_query(br, port, skb2, vid);
 		break;
 	case ICMPV6_MGM_REDUCTION:
 	    {
@@ -1681,7 +1675,7 @@ out:
 #endif
 
 int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
-		     struct sk_buff *skb)
+		     struct sk_buff *skb, u16 vid)
 {
 	BR_INPUT_SKB_CB(skb)->igmp = 0;
 	BR_INPUT_SKB_CB(skb)->mrouters_only = 0;
@@ -1691,10 +1685,10 @@ int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
 
 	switch (skb->protocol) {
 	case htons(ETH_P_IP):
-		return br_multicast_ipv4_rcv(br, port, skb);
+		return br_multicast_ipv4_rcv(br, port, skb, vid);
 #if IS_ENABLED(CONFIG_IPV6)
 	case htons(ETH_P_IPV6):
-		return br_multicast_ipv6_rcv(br, port, skb);
+		return br_multicast_ipv6_rcv(br, port, skb, vid);
 #endif
 	}
 
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index e14c33b42f7..2e8244efb26 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -451,7 +451,8 @@ extern int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __us
 extern unsigned int br_mdb_rehash_seq;
 extern int br_multicast_rcv(struct net_bridge *br,
 			    struct net_bridge_port *port,
-			    struct sk_buff *skb);
+			    struct sk_buff *skb,
+			    u16 vid);
 extern struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
 					       struct sk_buff *skb, u16 vid);
 extern void br_multicast_add_port(struct net_bridge_port *port);
@@ -522,7 +523,8 @@ static inline bool br_multicast_querier_exists(struct net_bridge *br,
 #else
 static inline int br_multicast_rcv(struct net_bridge *br,
 				   struct net_bridge_port *port,
-				   struct sk_buff *skb)
+				   struct sk_buff *skb,
+				   u16 vid)
 {
 	return 0;
 }
-- 
cgit v1.2.3-18-g5258


From ec9debbd9a88d8ea86c488d6ffcac419ee7d46d9 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Tue, 29 Oct 2013 15:11:07 +0800
Subject: virtio-net: correctly handle cpu hotplug notifier during resuming

commit 3ab098df35f8b98b6553edc2e40234af512ba877 (virtio-net: don't respond to
cpu hotplug notifier if we're not ready) tries to bypass the cpu hotplug
notifier by checking the config_enable and does nothing is it was false. So it
need to try to hold the config_lock mutex which may happen in atomic
environment which leads the following warnings:

[  622.944441] CPU0 attaching NULL sched-domain.
[  622.944446] CPU1 attaching NULL sched-domain.
[  622.944485] CPU0 attaching NULL sched-domain.
[  622.950795] BUG: sleeping function called from invalid context at kernel/mutex.c:616
[  622.950796] in_atomic(): 1, irqs_disabled(): 1, pid: 10, name: migration/1
[  622.950796] no locks held by migration/1/10.
[  622.950798] CPU: 1 PID: 10 Comm: migration/1 Not tainted 3.12.0-rc5-wl-01249-gb91e82d #317
[  622.950799] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
[  622.950802]  0000000000000000 ffff88001d42dba0 ffffffff81a32f22 ffff88001bfb9c70
[  622.950803]  ffff88001d42dbb0 ffffffff810edb02 ffff88001d42dc38 ffffffff81a396ed
[  622.950805]  0000000000000046 ffff88001d42dbe8 ffffffff810e861d 0000000000000000
[  622.950805] Call Trace:
[  622.950810]  [<ffffffff81a32f22>] dump_stack+0x54/0x74
[  622.950815]  [<ffffffff810edb02>] __might_sleep+0x112/0x114
[  622.950817]  [<ffffffff81a396ed>] mutex_lock_nested+0x3c/0x3c6
[  622.950818]  [<ffffffff810e861d>] ? up+0x39/0x3e
[  622.950821]  [<ffffffff8153ea7c>] ? acpi_os_signal_semaphore+0x21/0x2d
[  622.950824]  [<ffffffff81565ed1>] ? acpi_ut_release_mutex+0x5e/0x62
[  622.950828]  [<ffffffff816d04ec>] virtnet_cpu_callback+0x33/0x87
[  622.950830]  [<ffffffff81a42576>] notifier_call_chain+0x3c/0x5e
[  622.950832]  [<ffffffff810e86a8>] __raw_notifier_call_chain+0xe/0x10
[  622.950835]  [<ffffffff810c5556>] __cpu_notify+0x20/0x37
[  622.950836]  [<ffffffff810c5580>] cpu_notify+0x13/0x15
[  622.950838]  [<ffffffff81a237cd>] take_cpu_down+0x27/0x3a
[  622.950841]  [<ffffffff81136289>] stop_machine_cpu_stop+0x93/0xf1
[  622.950842]  [<ffffffff81136167>] cpu_stopper_thread+0xa0/0x12f
[  622.950844]  [<ffffffff811361f6>] ? cpu_stopper_thread+0x12f/0x12f
[  622.950847]  [<ffffffff81119710>] ? lock_release_holdtime.part.7+0xa3/0xa8
[  622.950848]  [<ffffffff81135e4b>] ? cpu_stop_should_run+0x3f/0x47
[  622.950850]  [<ffffffff810ea9b0>] smpboot_thread_fn+0x1c5/0x1e3
[  622.950852]  [<ffffffff810ea7eb>] ? lg_global_unlock+0x67/0x67
[  622.950854]  [<ffffffff810e36b7>] kthread+0xd8/0xe0
[  622.950857]  [<ffffffff81a3bfad>] ? wait_for_common+0x12f/0x164
[  622.950859]  [<ffffffff810e35df>] ? kthread_create_on_node+0x124/0x124
[  622.950861]  [<ffffffff81a45ffc>] ret_from_fork+0x7c/0xb0
[  622.950862]  [<ffffffff810e35df>] ? kthread_create_on_node+0x124/0x124
[  622.950876] smpboot: CPU 1 is now offline
[  623.194556] SMP alternatives: lockdep: fixing up alternatives
[  623.194559] smpboot: Booting Node 0 Processor 1 APIC 0x1
...

A correct fix is to unregister the hotcpu notifier during restore and register a
new one in resume.

Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Tested-by: Fengguang Wu <fengguang.wu@intel.com>
Cc: Wanlong Gao <gaowanlong@cn.fujitsu.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Wanlong Gao <gaowanlong@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/virtio_net.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 9fbdfcd1e1a..bbc9cb84ec1 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1118,11 +1118,6 @@ static int virtnet_cpu_callback(struct notifier_block *nfb,
 {
 	struct virtnet_info *vi = container_of(nfb, struct virtnet_info, nb);
 
-	mutex_lock(&vi->config_lock);
-
-	if (!vi->config_enable)
-		goto done;
-
 	switch(action & ~CPU_TASKS_FROZEN) {
 	case CPU_ONLINE:
 	case CPU_DOWN_FAILED:
@@ -1136,8 +1131,6 @@ static int virtnet_cpu_callback(struct notifier_block *nfb,
 		break;
 	}
 
-done:
-	mutex_unlock(&vi->config_lock);
 	return NOTIFY_OK;
 }
 
@@ -1699,6 +1692,8 @@ static int virtnet_freeze(struct virtio_device *vdev)
 	struct virtnet_info *vi = vdev->priv;
 	int i;
 
+	unregister_hotcpu_notifier(&vi->nb);
+
 	/* Prevent config work handler from accessing the device */
 	mutex_lock(&vi->config_lock);
 	vi->config_enable = false;
@@ -1747,6 +1742,10 @@ static int virtnet_restore(struct virtio_device *vdev)
 	virtnet_set_queues(vi, vi->curr_queue_pairs);
 	rtnl_unlock();
 
+	err = register_hotcpu_notifier(&vi->nb);
+	if (err)
+		return err;
+
 	return 0;
 }
 #endif
-- 
cgit v1.2.3-18-g5258


From b4dfd326c29c241c2bb8463167217eb2438b7c3d Mon Sep 17 00:00:00 2001
From: Alistair Popple <alistair@popple.id.au>
Date: Wed, 30 Oct 2013 10:50:37 +1100
Subject: ibm emac: Don't call napi_complete if napi_reschedule failed

This patch fixes a bug which would trigger the BUG_ON() at
net/core/dev.c:4156. It was found that this was due to continuing
processing in the current poll call even when the call to
napi_reschedule failed, indicating the device was already on the
polling list. This resulted in an extra call to napi_complete which
triggered the BUG_ON().

This patch ensures that we only contine processing rotting packets in
the current mal_poll call if we are not already on the polling list.

Signed-off-by: Alistair Popple <alistair@popple.id.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/emac/mal.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/ibm/emac/mal.c b/drivers/net/ethernet/ibm/emac/mal.c
index dac564c2544..909f9b6698b 100644
--- a/drivers/net/ethernet/ibm/emac/mal.c
+++ b/drivers/net/ethernet/ibm/emac/mal.c
@@ -442,15 +442,11 @@ static int mal_poll(struct napi_struct *napi, int budget)
 		if (unlikely(mc->ops->peek_rx(mc->dev) ||
 			     test_bit(MAL_COMMAC_RX_STOPPED, &mc->flags))) {
 			MAL_DBG2(mal, "rotting packet" NL);
-			if (napi_reschedule(napi))
-				mal_disable_eob_irq(mal);
-			else
-				MAL_DBG2(mal, "already in poll list" NL);
-
-			if (budget > 0)
-				goto again;
-			else
+			if (!napi_reschedule(napi))
 				goto more_work;
+
+			mal_disable_eob_irq(mal);
+			goto again;
 		}
 		mc->ops->poll_tx(mc->dev);
 	}
-- 
cgit v1.2.3-18-g5258


From 32663b8b8948cc05f812ab82c1c7db2db3ddf717 Mon Sep 17 00:00:00 2001
From: Alistair Popple <alistair@popple.id.au>
Date: Wed, 30 Oct 2013 10:50:38 +1100
Subject: ibm emac: Fix locking for enable/disable eob irq

Calls to mal_enable_eob_irq perform a read-write-modify of a dcr to
enable device irqs which is protected by a spin lock. However calls to
mal_disable_eob_irq do not take the corresponding lock.

This patch resolves the problem by ensuring that calls to
mal_disable_eob_irq also take the lock.

Signed-off-by: Alistair Popple <alistair@popple.id.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/emac/mal.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/ibm/emac/mal.c b/drivers/net/ethernet/ibm/emac/mal.c
index 909f9b6698b..e7847510eda 100644
--- a/drivers/net/ethernet/ibm/emac/mal.c
+++ b/drivers/net/ethernet/ibm/emac/mal.c
@@ -263,7 +263,9 @@ static inline void mal_schedule_poll(struct mal_instance *mal)
 {
 	if (likely(napi_schedule_prep(&mal->napi))) {
 		MAL_DBG2(mal, "schedule_poll" NL);
+		spin_lock(&mal->lock);
 		mal_disable_eob_irq(mal);
+		spin_unlock(&mal->lock);
 		__napi_schedule(&mal->napi);
 	} else
 		MAL_DBG2(mal, "already in poll" NL);
@@ -445,7 +447,9 @@ static int mal_poll(struct napi_struct *napi, int budget)
 			if (!napi_reschedule(napi))
 				goto more_work;
 
+			spin_lock_irqsave(&mal->lock, flags);
 			mal_disable_eob_irq(mal);
+			spin_unlock_irqrestore(&mal->lock, flags);
 			goto again;
 		}
 		mc->ops->poll_tx(mc->dev);
-- 
cgit v1.2.3-18-g5258


From b757a62e9f0df5c997c666dca4ad81197b5d8917 Mon Sep 17 00:00:00 2001
From: Nathan Hintz <nlhintz@hotmail.com>
Date: Tue, 29 Oct 2013 19:32:01 -0700
Subject: bgmac: don't update slot on skb alloc/dma mapping error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Don't update the slot in "bgmac_dma_rx_skb_for_slot" unless both the
skb alloc and dma mapping are successful; and free the newly allocated
skb if a dma mapping error occurs.  This relieves the caller of the need
to deduce/execute the appropriate cleanup action required when an error
occurs.

Signed-off-by: Nathan Hintz <nlhintz@hotmail.com>
Acked-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bgmac.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
index 249468f9536..9e8a3e024e0 100644
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -244,25 +244,33 @@ static int bgmac_dma_rx_skb_for_slot(struct bgmac *bgmac,
 				     struct bgmac_slot_info *slot)
 {
 	struct device *dma_dev = bgmac->core->dma_dev;
+	struct sk_buff *skb;
+	dma_addr_t dma_addr;
 	struct bgmac_rx_header *rx;
 
 	/* Alloc skb */
-	slot->skb = netdev_alloc_skb(bgmac->net_dev, BGMAC_RX_BUF_SIZE);
-	if (!slot->skb)
+	skb = netdev_alloc_skb(bgmac->net_dev, BGMAC_RX_BUF_SIZE);
+	if (!skb)
 		return -ENOMEM;
 
 	/* Poison - if everything goes fine, hardware will overwrite it */
-	rx = (struct bgmac_rx_header *)slot->skb->data;
+	rx = (struct bgmac_rx_header *)skb->data;
 	rx->len = cpu_to_le16(0xdead);
 	rx->flags = cpu_to_le16(0xbeef);
 
 	/* Map skb for the DMA */
-	slot->dma_addr = dma_map_single(dma_dev, slot->skb->data,
-					BGMAC_RX_BUF_SIZE, DMA_FROM_DEVICE);
-	if (dma_mapping_error(dma_dev, slot->dma_addr)) {
+	dma_addr = dma_map_single(dma_dev, skb->data,
+				  BGMAC_RX_BUF_SIZE, DMA_FROM_DEVICE);
+	if (dma_mapping_error(dma_dev, dma_addr)) {
 		bgmac_err(bgmac, "DMA mapping error\n");
+		dev_kfree_skb(skb);
 		return -ENOMEM;
 	}
+
+	/* Update the slot */
+	slot->skb = skb;
+	slot->dma_addr = dma_addr;
+
 	if (slot->dma_addr & 0xC0000000)
 		bgmac_warn(bgmac, "DMA address using 0xC0000000 bit(s), it may need translation trick\n");
 
-- 
cgit v1.2.3-18-g5258


From c17cb8b55b104c549aa20a72fa44141ad2c65ec2 Mon Sep 17 00:00:00 2001
From: Masanari Iida <standby24x7@gmail.com>
Date: Wed, 30 Oct 2013 16:46:15 +0900
Subject: doc:net: Fix typo in Documentation/networking

Correct spelling typo in Documentation/networking

Signed-off-by: Masanari Iida <standby24x7@gmail.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/dccp.txt         | 4 ++--
 Documentation/networking/e100.txt         | 2 +-
 Documentation/networking/ieee802154.txt   | 4 ++--
 Documentation/networking/l2tp.txt         | 2 +-
 Documentation/networking/netdev-FAQ.txt   | 2 +-
 Documentation/networking/netlink_mmap.txt | 6 +++---
 Documentation/networking/operstates.txt   | 4 ++--
 Documentation/networking/rxrpc.txt        | 2 +-
 Documentation/networking/stmmac.txt       | 8 ++++----
 Documentation/networking/vortex.txt       | 4 ++--
 Documentation/networking/x25-iface.txt    | 2 +-
 11 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/Documentation/networking/dccp.txt b/Documentation/networking/dccp.txt
index d718bc2ff1c..bf5dbe3ab8c 100644
--- a/Documentation/networking/dccp.txt
+++ b/Documentation/networking/dccp.txt
@@ -18,8 +18,8 @@ Introduction
 Datagram Congestion Control Protocol (DCCP) is an unreliable, connection
 oriented protocol designed to solve issues present in UDP and TCP, particularly
 for real-time and multimedia (streaming) traffic.
-It divides into a base protocol (RFC 4340) and plugable congestion control
-modules called CCIDs. Like plugable TCP congestion control, at least one CCID
+It divides into a base protocol (RFC 4340) and pluggable congestion control
+modules called CCIDs. Like pluggable TCP congestion control, at least one CCID
 needs to be enabled in order for the protocol to function properly. In the Linux
 implementation, this is the TCP-like CCID2 (RFC 4341). Additional CCIDs, such as
 the TCP-friendly CCID3 (RFC 4342), are optional.
diff --git a/Documentation/networking/e100.txt b/Documentation/networking/e100.txt
index 13a32124bca..f862cf3aff3 100644
--- a/Documentation/networking/e100.txt
+++ b/Documentation/networking/e100.txt
@@ -103,7 +103,7 @@ Additional Configurations
   PRO/100 Family of Adapters is e100.
 
   As an example, if you install the e100 driver for two PRO/100 adapters
-  (eth0 and eth1), add the following to a configuraton file in /etc/modprobe.d/
+  (eth0 and eth1), add the following to a configuration file in /etc/modprobe.d/
 
        alias eth0 e100
        alias eth1 e100
diff --git a/Documentation/networking/ieee802154.txt b/Documentation/networking/ieee802154.txt
index 09eb57329f1..22bbc7225f8 100644
--- a/Documentation/networking/ieee802154.txt
+++ b/Documentation/networking/ieee802154.txt
@@ -4,7 +4,7 @@
 
 Introduction
 ============
-The IEEE 802.15.4 working group focuses on standartization of bottom
+The IEEE 802.15.4 working group focuses on standardization of bottom
 two layers: Medium Access Control (MAC) and Physical (PHY). And there
 are mainly two options available for upper layers:
  - ZigBee - proprietary protocol from ZigBee Alliance
@@ -66,7 +66,7 @@ net_device, with .type = ARPHRD_IEEE802154. Data is exchanged with socket family
 code via plain sk_buffs. On skb reception skb->cb must contain additional
 info as described in the struct ieee802154_mac_cb. During packet transmission
 the skb->cb is used to provide additional data to device's header_ops->create
-function. Be aware, that this data can be overriden later (when socket code
+function. Be aware that this data can be overridden later (when socket code
 submits skb to qdisc), so if you need something from that cb later, you should
 store info in the skb->data on your own.
 
diff --git a/Documentation/networking/l2tp.txt b/Documentation/networking/l2tp.txt
index e63fc1f7bf8..c74434de2fa 100644
--- a/Documentation/networking/l2tp.txt
+++ b/Documentation/networking/l2tp.txt
@@ -197,7 +197,7 @@ state information because the file format is subject to change. It is
 implemented to provide extra debug information to help diagnose
 problems.) Users should use the netlink API.
 
-/proc/net/pppol2tp is also provided for backwards compaibility with
+/proc/net/pppol2tp is also provided for backwards compatibility with
 the original pppol2tp driver. It lists information about L2TPv2
 tunnels and sessions only. Its use is discouraged.
 
diff --git a/Documentation/networking/netdev-FAQ.txt b/Documentation/networking/netdev-FAQ.txt
index 3a2c5860a78..0fe1c6e0dbc 100644
--- a/Documentation/networking/netdev-FAQ.txt
+++ b/Documentation/networking/netdev-FAQ.txt
@@ -68,7 +68,7 @@ A: To understand this, you need to know a bit of background information
 
    The "net" tree continues to collect fixes for the vX.Y content, and
    is fed back to Linus at regular (~weekly) intervals.  Meaning that the
-   focus for "net" is on stablilization and bugfixes.
+   focus for "net" is on stabilization and bugfixes.
 
    Finally, the vX.Y gets released, and the whole cycle starts over.
 
diff --git a/Documentation/networking/netlink_mmap.txt b/Documentation/networking/netlink_mmap.txt
index 53337883954..b2612297352 100644
--- a/Documentation/networking/netlink_mmap.txt
+++ b/Documentation/networking/netlink_mmap.txt
@@ -45,7 +45,7 @@ processing.
 
 Conversion of the reception path involves calling poll() on the file
 descriptor, once the socket is readable the frames from the ring are
-processsed in order until no more messages are available, as indicated by
+processed in order until no more messages are available, as indicated by
 a status word in the frame header.
 
 On kernel side, in order to make use of memory mapped I/O on receive, the
@@ -56,7 +56,7 @@ Dumps of kernel databases automatically support memory mapped I/O.
 
 Conversion of the transmit path involves changing message construction to
 use memory from the TX ring instead of (usually) a buffer declared on the
-stack and setting up the frame header approriately. Optionally poll() can
+stack and setting up the frame header appropriately. Optionally poll() can
 be used to wait for free frames in the TX ring.
 
 Structured and definitions for using memory mapped I/O are contained in
@@ -231,7 +231,7 @@ Ring setup:
 	if (setsockopt(fd, NETLINK_TX_RING, &req, sizeof(req)) < 0)
 		exit(1)
 
-	/* Calculate size of each invididual ring */
+	/* Calculate size of each individual ring */
 	ring_size = req.nm_block_nr * req.nm_block_size;
 
 	/* Map RX/TX rings. The TX ring is located after the RX ring */
diff --git a/Documentation/networking/operstates.txt b/Documentation/networking/operstates.txt
index 97694572338..355c6d8ef8a 100644
--- a/Documentation/networking/operstates.txt
+++ b/Documentation/networking/operstates.txt
@@ -89,8 +89,8 @@ packets. The name 'carrier' and the inversion are historical, think of
 it as lower layer.
 
 Note that for certain kind of soft-devices, which are not managing any
-real hardware, there is possible to set this bit from userpsace.
-One should use TVL IFLA_CARRIER to do so.
+real hardware, it is possible to set this bit from userspace.  One
+should use TVL IFLA_CARRIER to do so.
 
 netif_carrier_ok() can be used to query that bit.
 
diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt
index 60d05eb77c6..b89bc82eed4 100644
--- a/Documentation/networking/rxrpc.txt
+++ b/Documentation/networking/rxrpc.txt
@@ -144,7 +144,7 @@ An overview of the RxRPC protocol:
  (*) Calls use ACK packets to handle reliability.  Data packets are also
      explicitly sequenced per call.
 
- (*) There are two types of positive acknowledgement: hard-ACKs and soft-ACKs.
+ (*) There are two types of positive acknowledgment: hard-ACKs and soft-ACKs.
      A hard-ACK indicates to the far side that all the data received to a point
      has been received and processed; a soft-ACK indicates that the data has
      been received but may yet be discarded and re-requested.  The sender may
diff --git a/Documentation/networking/stmmac.txt b/Documentation/networking/stmmac.txt
index 457b8bbafb0..cdd916da838 100644
--- a/Documentation/networking/stmmac.txt
+++ b/Documentation/networking/stmmac.txt
@@ -160,7 +160,7 @@ Where:
  o pmt: core has the embedded power module (optional).
  o force_sf_dma_mode: force DMA to use the Store and Forward mode
 		     instead of the Threshold.
- o force_thresh_dma_mode: force DMA to use the Shreshold mode other than
+ o force_thresh_dma_mode: force DMA to use the Threshold mode other than
 		     the Store and Forward mode.
  o riwt_off: force to disable the RX watchdog feature and switch to NAPI mode.
  o fix_mac_speed: this callback is used for modifying some syscfg registers
@@ -175,7 +175,7 @@ Where:
 	     registers.
  o custom_cfg/custom_data: this is a custom configuration that can be passed
 			   while initializing the resources.
- o bsp_priv: another private poiter.
+ o bsp_priv: another private pointer.
 
 For MDIO bus The we have:
 
@@ -271,7 +271,7 @@ reset procedure etc).
  o dwmac1000_dma.c:  dma functions for the GMAC chip;
  o dwmac1000.h: specific header file for the GMAC;
  o dwmac100_core: MAC 100 core and dma code;
- o dwmac100_dma.c: dma funtions for the MAC chip;
+ o dwmac100_dma.c: dma functions for the MAC chip;
  o dwmac1000.h: specific header file for the MAC;
  o dwmac_lib.c: generic DMA functions shared among chips;
  o enh_desc.c: functions for handling enhanced descriptors;
@@ -364,4 +364,4 @@ Auto-negotiated Link Parter Ability.
 10) TODO:
  o XGMAC is not supported.
  o Complete the TBI & RTBI support.
- o extened VLAN support for 3.70a SYNP GMAC.
+ o extend VLAN support for 3.70a SYNP GMAC.
diff --git a/Documentation/networking/vortex.txt b/Documentation/networking/vortex.txt
index 9a8041dcbb5..97282da82b7 100644
--- a/Documentation/networking/vortex.txt
+++ b/Documentation/networking/vortex.txt
@@ -68,7 +68,7 @@ Module parameters
 
 There are several parameters which may be provided to the driver when
 its module is loaded.  These are usually placed in /etc/modprobe.d/*.conf
-configuretion files.  Example:
+configuration files.  Example:
 
 options 3c59x debug=3 rx_copybreak=300
 
@@ -178,7 +178,7 @@ max_interrupt_work=N
 
   The driver's interrupt service routine can handle many receive and
   transmit packets in a single invocation.  It does this in a loop. 
-  The value of max_interrupt_work governs how mnay times the interrupt
+  The value of max_interrupt_work governs how many times the interrupt
   service routine will loop.  The default value is 32 loops.  If this
   is exceeded the interrupt service routine gives up and generates a
   warning message "eth0: Too much work in interrupt".
diff --git a/Documentation/networking/x25-iface.txt b/Documentation/networking/x25-iface.txt
index 78f662ee062..7f213b556e8 100644
--- a/Documentation/networking/x25-iface.txt
+++ b/Documentation/networking/x25-iface.txt
@@ -105,7 +105,7 @@ reduced by the following measures or a combination thereof:
     later.
     The lapb module interface was modified to support this. Its
     data_indication() method should now transparently pass the
-    netif_rx() return value to the (lapb mopdule) caller.
+    netif_rx() return value to the (lapb module) caller.
 (2) Drivers for kernel versions 2.2.x should always check the global
     variable netdev_dropping when a new frame is received. The driver
     should only call netif_rx() if netdev_dropping is zero. Otherwise
-- 
cgit v1.2.3-18-g5258


From 5d0f801a2ccec3b1fdabc3392c8d99ed0413d216 Mon Sep 17 00:00:00 2001
From: Markus Pargmann <mpa@pengutronix.de>
Date: Mon, 28 Oct 2013 09:54:40 +0100
Subject: can: c_can: Fix RX message handling, handle lost message before EOB

If we handle end of block messages with higher priority than a lost message,
we can run into an endless interrupt loop.

This is reproducable with a am335x processor and "cansequence -r" at 1Mbit.
As soon as we loose a packet we can't escape from an interrupt loop.

This patch fixes the problem by handling lost packets before EOB packets.

Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Markus Pargmann <mpa@pengutronix.de>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/c_can/c_can.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c
index a668cd491cb..e3fc07cf2f6 100644
--- a/drivers/net/can/c_can/c_can.c
+++ b/drivers/net/can/c_can/c_can.c
@@ -814,9 +814,6 @@ static int c_can_do_rx_poll(struct net_device *dev, int quota)
 			msg_ctrl_save = priv->read_reg(priv,
 					C_CAN_IFACE(MSGCTRL_REG, 0));
 
-			if (msg_ctrl_save & IF_MCONT_EOB)
-				return num_rx_pkts;
-
 			if (msg_ctrl_save & IF_MCONT_MSGLST) {
 				c_can_handle_lost_msg_obj(dev, 0, msg_obj);
 				num_rx_pkts++;
@@ -824,6 +821,9 @@ static int c_can_do_rx_poll(struct net_device *dev, int quota)
 				continue;
 			}
 
+			if (msg_ctrl_save & IF_MCONT_EOB)
+				return num_rx_pkts;
+
 			if (!(msg_ctrl_save & IF_MCONT_NEWDAT))
 				continue;
 
-- 
cgit v1.2.3-18-g5258


From 896e23bd04ea50a146dffd342e2f96180f0812a5 Mon Sep 17 00:00:00 2001
From: Olivier Sobrie <olivier@sobrie.be>
Date: Sun, 27 Oct 2013 22:07:53 +0100
Subject: can: kvaser_usb: fix usb endpoints detection

Some devices, like the Kvaser Memorator Professional, have several bulk in
endpoints. Only the first one found must be used by the driver. The same holds
for the bulk out endpoint. The official Kvaser driver (leaf) was used as
reference for this patch.

Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Olivier Sobrie <olivier@sobrie.be>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/kvaser_usb.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c
index 3b954658824..4b2d5ed62b1 100644
--- a/drivers/net/can/usb/kvaser_usb.c
+++ b/drivers/net/can/usb/kvaser_usb.c
@@ -1544,9 +1544,9 @@ static int kvaser_usb_init_one(struct usb_interface *intf,
 	return 0;
 }
 
-static void kvaser_usb_get_endpoints(const struct usb_interface *intf,
-				     struct usb_endpoint_descriptor **in,
-				     struct usb_endpoint_descriptor **out)
+static int kvaser_usb_get_endpoints(const struct usb_interface *intf,
+				    struct usb_endpoint_descriptor **in,
+				    struct usb_endpoint_descriptor **out)
 {
 	const struct usb_host_interface *iface_desc;
 	struct usb_endpoint_descriptor *endpoint;
@@ -1557,12 +1557,18 @@ static void kvaser_usb_get_endpoints(const struct usb_interface *intf,
 	for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) {
 		endpoint = &iface_desc->endpoint[i].desc;
 
-		if (usb_endpoint_is_bulk_in(endpoint))
+		if (!*in && usb_endpoint_is_bulk_in(endpoint))
 			*in = endpoint;
 
-		if (usb_endpoint_is_bulk_out(endpoint))
+		if (!*out && usb_endpoint_is_bulk_out(endpoint))
 			*out = endpoint;
+
+		/* use first bulk endpoint for in and out */
+		if (*in && *out)
+			return 0;
 	}
+
+	return -ENODEV;
 }
 
 static int kvaser_usb_probe(struct usb_interface *intf,
@@ -1576,8 +1582,8 @@ static int kvaser_usb_probe(struct usb_interface *intf,
 	if (!dev)
 		return -ENOMEM;
 
-	kvaser_usb_get_endpoints(intf, &dev->bulk_in, &dev->bulk_out);
-	if (!dev->bulk_in || !dev->bulk_out) {
+	err = kvaser_usb_get_endpoints(intf, &dev->bulk_in, &dev->bulk_out);
+	if (err) {
 		dev_err(&intf->dev, "Cannot get usb endpoint(s)");
 		return err;
 	}
-- 
cgit v1.2.3-18-g5258


From 84502b5ef9849a9694673b15c31bd3ac693010ae Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Wed, 30 Oct 2013 11:16:28 +0100
Subject: xfrm: Fix null pointer dereference when decoding sessions

On some codepaths the skb does not have a dst entry
when xfrm_decode_session() is called. So check for
a valid skb_dst() before dereferencing the device
interface index. We use 0 as the device index if
there is no valid skb_dst(), or at reverse decoding
we use skb_iif as device interface index.

Bug was introduced with git commit bafd4bd4dc
("xfrm: Decode sessions with output interface.").

Reported-by: Meelis Roos <mroos@linux.ee>
Tested-by: Meelis Roos <mroos@linux.ee>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv4/xfrm4_policy.c | 6 +++++-
 net/ipv6/xfrm6_policy.c | 6 +++++-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 4764ee48447..e1a63930a96 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -104,10 +104,14 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
 	const struct iphdr *iph = ip_hdr(skb);
 	u8 *xprth = skb_network_header(skb) + iph->ihl * 4;
 	struct flowi4 *fl4 = &fl->u.ip4;
+	int oif = 0;
+
+	if (skb_dst(skb))
+		oif = skb_dst(skb)->dev->ifindex;
 
 	memset(fl4, 0, sizeof(struct flowi4));
 	fl4->flowi4_mark = skb->mark;
-	fl4->flowi4_oif = skb_dst(skb)->dev->ifindex;
+	fl4->flowi4_oif = reverse ? skb->skb_iif : oif;
 
 	if (!ip_is_fragment(iph)) {
 		switch (iph->protocol) {
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index dd503a3535f..5f8e128c512 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -135,10 +135,14 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
 	struct ipv6_opt_hdr *exthdr;
 	const unsigned char *nh = skb_network_header(skb);
 	u8 nexthdr = nh[IP6CB(skb)->nhoff];
+	int oif = 0;
+
+	if (skb_dst(skb))
+		oif = skb_dst(skb)->dev->ifindex;
 
 	memset(fl6, 0, sizeof(struct flowi6));
 	fl6->flowi6_mark = skb->mark;
-	fl6->flowi6_oif = skb_dst(skb)->dev->ifindex;
+	fl6->flowi6_oif = reverse ? skb->skb_iif : oif;
 
 	fl6->daddr = reverse ? hdr->saddr : hdr->daddr;
 	fl6->saddr = reverse ? hdr->daddr : hdr->saddr;
-- 
cgit v1.2.3-18-g5258


From 6f092343855a71e03b8d209815d8c45bf3a27fcd Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Fri, 1 Nov 2013 15:01:10 +0800
Subject: net: flow_dissector: fail on evil iph->ihl

We don't validate iph->ihl which may lead a dead loop if we meet a IPIP
skb whose iph->ihl is zero. Fix this by failing immediately when iph->ihl
is evil (less than 5).

This issue were introduced by commit ec5efe7946280d1e84603389a1030ccec0a767ae
(rps: support IPIP encapsulation).

Cc: Eric Dumazet <edumazet@google.com>
Cc: Petr Matousek <pmatouse@redhat.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/flow_dissector.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 8d7d0dd72db..143b6fdb964 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -40,7 +40,7 @@ again:
 		struct iphdr _iph;
 ip:
 		iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
-		if (!iph)
+		if (!iph || iph->ihl < 5)
 			return false;
 
 		if (ip_is_fragment(iph))
-- 
cgit v1.2.3-18-g5258


From 7926c1d5be0b7cbe5b8d5c788d7d39237e7b212c Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Thu, 31 Oct 2013 09:13:32 +0100
Subject: net: sctp: do not trigger BUG_ON in sctp_cmd_delete_tcb

Introduced in f9e42b853523 ("net: sctp: sideeffect: throw BUG if
primary_path is NULL"), we intended to find a buggy assoc that's
part of the assoc hash table with a primary_path that is NULL.
However, we better remove the BUG_ON for now and find a more
suitable place to assert for these things as Mark reports that
this also triggers the bug when duplication cookie processing
happens, and the assoc is not part of the hash table (so all
good in this case). Such a situation can for example easily be
reproduced by:

  tc qdisc add dev eth0 root handle 1: prio bands 2 priomap 1 1 1 1 1 1
  tc qdisc add dev eth0 parent 1:2 handle 20: netem loss 20%
  tc filter add dev eth0 protocol ip parent 1: prio 2 u32 match ip \
            protocol 132 0xff match u8 0x0b 0xff at 32 flowid 1:2

This drops 20% of COOKIE-ACK packets. After some follow-up
discussion with Vlad we came to the conclusion that for now we
should still better remove this BUG_ON() assertion, and come up
with two follow-ups later on, that is, i) find a more suitable
place for this assertion, and possibly ii) have a special
allocator/initializer for such kind of temporary assocs.

Reported-by: Mark Thomas <Mark.Thomas@metaswitch.com>
Signed-off-by: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/sm_sideeffect.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 666c6684279..1a6eef39ab2 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -860,7 +860,6 @@ static void sctp_cmd_delete_tcb(sctp_cmd_seq_t *cmds,
 	    (!asoc->temp) && (sk->sk_shutdown != SHUTDOWN_MASK))
 		return;
 
-	BUG_ON(asoc->peer.primary_path == NULL);
 	sctp_unhash_established(asoc);
 	sctp_association_free(asoc);
 }
-- 
cgit v1.2.3-18-g5258


From c32b7dfbb1dfb3f0a68f250deff65103c8bb704a Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Sun, 3 Nov 2013 10:04:07 +0200
Subject: net/mlx4_core: Fix call to __mlx4_unregister_mac

In function mlx4_master_deactivate_admin_state() __mlx4_unregister_mac was
called using the MAC index. It should be called with the value of the MAC itself.

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/cmd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index ea20182c696..bb11624a1f3 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -1691,7 +1691,7 @@ static void mlx4_master_deactivate_admin_state(struct mlx4_priv *priv, int slave
 			vp_oper->vlan_idx = NO_INDX;
 		}
 		if (NO_INDX != vp_oper->mac_idx) {
-			__mlx4_unregister_mac(&priv->dev, port, vp_oper->mac_idx);
+			__mlx4_unregister_mac(&priv->dev, port, vp_oper->state.mac);
 			vp_oper->mac_idx = NO_INDX;
 		}
 	}
-- 
cgit v1.2.3-18-g5258