From 8142b227ef43119e19acf6122a9eea1a82492645 Mon Sep 17 00:00:00 2001
From: Andrey Vagin <avagin@openvz.org>
Date: Mon, 31 Mar 2014 18:14:18 +0400
Subject: netfilter: nf_conntrack: flush net_gre->keymap_list only from gre
 helper

nf_ct_gre_keymap_flush() removes a nf_ct_gre_keymap object from
net_gre->keymap_list and frees the object. But it doesn't clean
a reference on this object from ct_pptp_info->keymap[dir].
Then nf_ct_gre_keymap_destroy() may release the same object again.

So nf_ct_gre_keymap_flush() can be called only when we are sure that
when nf_ct_gre_keymap_destroy will not be called.

nf_ct_gre_keymap is created by nf_ct_gre_keymap_add() and the right way
to destroy it is to call nf_ct_gre_keymap_destroy().

This patch marks nf_ct_gre_keymap_flush() as static, so this patch can
break compilation of third party modules, which use
nf_ct_gre_keymap_flush. I'm not sure this is the right way to deprecate
this function.

[  226.540793] general protection fault: 0000 [#1] SMP
[  226.541750] Modules linked in: nf_nat_pptp nf_nat_proto_gre
nf_conntrack_pptp nf_conntrack_proto_gre ip_gre ip_tunnel gre
ppp_deflate bsd_comp ppp_async crc_ccitt ppp_generic slhc xt_nat
iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat
nf_conntrack veth tun bridge stp llc ppdev microcode joydev pcspkr
serio_raw virtio_console virtio_balloon floppy parport_pc parport
pvpanic i2c_piix4 virtio_net drm_kms_helper ttm ata_generic virtio_pci
virtio_ring virtio drm i2c_core pata_acpi [last unloaded: ip_tunnel]
[  226.541776] CPU: 0 PID: 49 Comm: kworker/u4:2 Not tainted 3.14.0-rc8+ #101
[  226.541776] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
[  226.541776] Workqueue: netns cleanup_net
[  226.541776] task: ffff8800371e0000 ti: ffff88003730c000 task.ti: ffff88003730c000
[  226.541776] RIP: 0010:[<ffffffff81389ba9>]  [<ffffffff81389ba9>] __list_del_entry+0x29/0xd0
[  226.541776] RSP: 0018:ffff88003730dbd0  EFLAGS: 00010a83
[  226.541776] RAX: 6b6b6b6b6b6b6b6b RBX: ffff8800374e6c40 RCX: dead000000200200
[  226.541776] RDX: 6b6b6b6b6b6b6b6b RSI: ffff8800371e07d0 RDI: ffff8800374e6c40
[  226.541776] RBP: ffff88003730dbd0 R08: 0000000000000000 R09: 0000000000000000
[  226.541776] R10: 0000000000000001 R11: ffff88003730d92e R12: 0000000000000002
[  226.541776] R13: ffff88007a4c42d0 R14: ffff88007aef0000 R15: ffff880036cf0018
[  226.541776] FS:  0000000000000000(0000) GS:ffff88007fc00000(0000) knlGS:0000000000000000
[  226.541776] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
[  226.541776] CR2: 00007f07f643f7d0 CR3: 0000000036fd2000 CR4: 00000000000006f0
[  226.541776] Stack:
[  226.541776]  ffff88003730dbe8 ffffffff81389c5d ffff8800374ffbe4 ffff88003730dc28
[  226.541776]  ffffffffa0162a43 ffffffffa01627c5 ffff88007a4c42d0 ffff88007aef0000
[  226.541776]  ffffffffa01651c0 ffff88007a4c45e0 ffff88007aef0000 ffff88003730dc40
[  226.541776] Call Trace:
[  226.541776]  [<ffffffff81389c5d>] list_del+0xd/0x30
[  226.541776]  [<ffffffffa0162a43>] nf_ct_gre_keymap_destroy+0x283/0x2d0 [nf_conntrack_proto_gre]
[  226.541776]  [<ffffffffa01627c5>] ? nf_ct_gre_keymap_destroy+0x5/0x2d0 [nf_conntrack_proto_gre]
[  226.541776]  [<ffffffffa0162ab7>] gre_destroy+0x27/0x70 [nf_conntrack_proto_gre]
[  226.541776]  [<ffffffffa0117de3>] destroy_conntrack+0x83/0x200 [nf_conntrack]
[  226.541776]  [<ffffffffa0117d87>] ? destroy_conntrack+0x27/0x200 [nf_conntrack]
[  226.541776]  [<ffffffffa0117d60>] ? nf_conntrack_hash_check_insert+0x2e0/0x2e0 [nf_conntrack]
[  226.541776]  [<ffffffff81630142>] nf_conntrack_destroy+0x72/0x180
[  226.541776]  [<ffffffff816300d5>] ? nf_conntrack_destroy+0x5/0x180
[  226.541776]  [<ffffffffa011ef80>] ? kill_l3proto+0x20/0x20 [nf_conntrack]
[  226.541776]  [<ffffffffa011847e>] nf_ct_iterate_cleanup+0x14e/0x170 [nf_conntrack]
[  226.541776]  [<ffffffffa011f74b>] nf_ct_l4proto_pernet_unregister+0x5b/0x90 [nf_conntrack]
[  226.541776]  [<ffffffffa0162409>] proto_gre_net_exit+0x19/0x30 [nf_conntrack_proto_gre]
[  226.541776]  [<ffffffff815edf89>] ops_exit_list.isra.1+0x39/0x60
[  226.541776]  [<ffffffff815eecc0>] cleanup_net+0x100/0x1d0
[  226.541776]  [<ffffffff810a608a>] process_one_work+0x1ea/0x4f0
[  226.541776]  [<ffffffff810a6028>] ? process_one_work+0x188/0x4f0
[  226.541776]  [<ffffffff810a64ab>] worker_thread+0x11b/0x3a0
[  226.541776]  [<ffffffff810a6390>] ? process_one_work+0x4f0/0x4f0
[  226.541776]  [<ffffffff810af42d>] kthread+0xed/0x110
[  226.541776]  [<ffffffff8173d4dc>] ? _raw_spin_unlock_irq+0x2c/0x40
[  226.541776]  [<ffffffff810af340>] ? kthread_create_on_node+0x200/0x200
[  226.541776]  [<ffffffff8174747c>] ret_from_fork+0x7c/0xb0
[  226.541776]  [<ffffffff810af340>] ? kthread_create_on_node+0x200/0x200
[  226.541776] Code: 00 00 55 48 8b 17 48 b9 00 01 10 00 00 00 ad de
48 8b 47 08 48 89 e5 48 39 ca 74 29 48 b9 00 02 20 00 00 00 ad de 48
39 c8 74 7a <4c> 8b 00 4c 39 c7 75 53 4c 8b 42 08 4c 39 c7 75 2b 48 89
42 08
[  226.541776] RIP  [<ffffffff81389ba9>] __list_del_entry+0x29/0xd0
[  226.541776]  RSP <ffff88003730dbd0>
[  226.612193] ---[ end trace 985ae23ddfcc357c ]---

Cc: Pablo Neira Ayuso <pablo@netfilter.org>
Cc: Patrick McHardy <kaber@trash.net>
Cc: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrey Vagin <avagin@openvz.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nf_conntrack_proto_gre.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')
diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h b/include/linux/netfilter/nf_conntrack_proto_gre.h
index ec2ffaf418c..df78dc2b552 100644
--- a/include/linux/netfilter/nf_conntrack_proto_gre.h
+++ b/include/linux/netfilter/nf_conntrack_proto_gre.h
@@ -87,7 +87,6 @@ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir,
 /* delete keymap entries */
 void nf_ct_gre_keymap_destroy(struct nf_conn *ct);
 
-void nf_ct_gre_keymap_flush(struct net *net);
 void nf_nat_need_gre(void);
 
 #endif /* __KERNEL__ */
-- 
cgit v1.2.3-70-g09d2


From 732d50b431dca2f3f78fc21ba9b7ed9d06bb01ce Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexdeucher@gmail.com>
Date: Mon, 7 Apr 2014 10:33:45 -0400
Subject: drm/dp/i2c: Update comments about common i2c over dp assumptions (v3)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If you are using the common dp over i2c functionality, it is
asumed that the aux transfer function does not modify the any
of the msg structure other than the reply field.  Doing so
breaks the logic in the common code.

v2: update struct drm_dp_aux comments about assumptions
v3 (chk): rebased on upstream changes

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: Thierry Reding <treding@nvidia.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/drm_dp_helper.c | 4 +++-
 include/drm/drm_dp_helper.h     | 4 ++++
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c
index bcfb0d07718..4b6e6f3ba0a 100644
--- a/drivers/gpu/drm/drm_dp_helper.c
+++ b/drivers/gpu/drm/drm_dp_helper.c
@@ -577,7 +577,9 @@ static u32 drm_dp_i2c_functionality(struct i2c_adapter *adapter)
 
 /*
  * Transfer a single I2C-over-AUX message and handle various error conditions,
- * retrying the transaction as appropriate.
+ * retrying the transaction as appropriate.  It is assumed that the
+ * aux->transfer function does not modify anything in the msg other than the
+ * reply field.
  */
 static int drm_dp_i2c_do_msg(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg)
 {
diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h
index b4f58914bf7..cfcacec5b89 100644
--- a/include/drm/drm_dp_helper.h
+++ b/include/drm/drm_dp_helper.h
@@ -456,6 +456,10 @@ struct drm_dp_aux_msg {
  * transactions. The drm_dp_aux_register_i2c_bus() function registers an
  * I2C adapter that can be passed to drm_probe_ddc(). Upon removal, drivers
  * should call drm_dp_aux_unregister_i2c_bus() to remove the I2C adapter.
+ *
+ * Note that the aux helper code assumes that the .transfer() function
+ * only modifies the reply field of the drm_dp_aux_msg structure.  The
+ * retry logic and i2c helpers assume this is the case.
  */
 struct drm_dp_aux {
 	const char *name;
-- 
cgit v1.2.3-70-g09d2


From f360d88a2efddf2d2a2d01a8ac76fded34d624b4 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@dev.mellanox.co.il>
Date: Wed, 2 Apr 2014 00:10:16 +0300
Subject: IB/mlx5: Add block multicast loopback support

Add support for the block multicast loopback QP creation flag along
the proper firmware API for that.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/hw/mlx5/main.c |  2 ++
 drivers/infiniband/hw/mlx5/qp.c   | 12 ++++++++++++
 include/linux/mlx5/device.h       |  1 +
 include/linux/mlx5/qp.h           |  1 +
 4 files changed, 16 insertions(+)

(limited to 'include')

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index fa6dc870ada..364d4b6937f 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -282,6 +282,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
 		props->sig_guard_cap = IB_GUARD_T10DIF_CRC |
 				       IB_GUARD_T10DIF_CSUM;
 	}
+	if (flags & MLX5_DEV_CAP_FLAG_BLOCK_MCAST)
+		props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
 
 	props->vendor_id	   = be32_to_cpup((__be32 *)(out_mad->data + 36)) &
 		0xffffff;
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index ae788d27b93..dc930ed21ec 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -807,6 +807,15 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 	spin_lock_init(&qp->sq.lock);
 	spin_lock_init(&qp->rq.lock);
 
+	if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
+		if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_BLOCK_MCAST)) {
+			mlx5_ib_dbg(dev, "block multicast loopback isn't supported\n");
+			return -EINVAL;
+		} else {
+			qp->flags |= MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK;
+		}
+	}
+
 	if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
 		qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
 
@@ -878,6 +887,9 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 	if (qp->wq_sig)
 		in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_ENABLE_SIG);
 
+	if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK)
+		in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_BLOCK_MCAST);
+
 	if (qp->scat_cqe && is_connected(init_attr->qp_type)) {
 		int rcqe_sz;
 		int scqe_sz;
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 407bdb67fd4..3406cfb1267 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -179,6 +179,7 @@ enum {
 	MLX5_DEV_CAP_FLAG_BAD_QKEY_CNTR	= 1LL <<  9,
 	MLX5_DEV_CAP_FLAG_APM		= 1LL << 17,
 	MLX5_DEV_CAP_FLAG_ATOMIC	= 1LL << 18,
+	MLX5_DEV_CAP_FLAG_BLOCK_MCAST	= 1LL << 23,
 	MLX5_DEV_CAP_FLAG_ON_DMND_PG	= 1LL << 24,
 	MLX5_DEV_CAP_FLAG_CQ_MODER	= 1LL << 29,
 	MLX5_DEV_CAP_FLAG_RESIZE_CQ	= 1LL << 30,
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index f829ad80ff2..9709b30e2d6 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -146,6 +146,7 @@ enum {
 
 enum {
 	MLX5_QP_LAT_SENSITIVE	= 1 << 28,
+	MLX5_QP_BLOCK_MCAST	= 1 << 30,
 	MLX5_QP_ENABLE_SIG	= 1 << 31,
 };
 
-- 
cgit v1.2.3-70-g09d2


From b855d416dc17061ebb271ea7ef1201d100531770 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 12 Apr 2014 13:17:57 +0200
Subject: netfilter: nf_tables: fix nft_cmp_fast failure on big endian for size
 < 4

nft_cmp_fast is used for equality comparisions of size <= 4. For
comparisions of size < 4 byte a mask is calculated that is applied to
both the data from userspace (during initialization) and the register
value (during runtime). Both values are stored using (in effect) memcpy
to a memory area that is then interpreted as u32 by nft_cmp_fast.

This works fine on little endian since smaller types have the same base
address, however on big endian this is not true and the smaller types
are interpreted as a big number with trailing zero bytes.

The mask therefore must not include the lower bytes, but the higher bytes
on big endian. Add a helper function that does a cpu_to_le32 to switch
the bytes on big endian. Since we're dealing with a mask of just consequitive
bits, this works out fine.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables_core.h | 10 ++++++++++
 net/netfilter/nf_tables_core.c         |  3 +--
 net/netfilter/nft_cmp.c                |  2 +-
 3 files changed, 12 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
index cf2b7ae2b9d..a75fc8e27cd 100644
--- a/include/net/netfilter/nf_tables_core.h
+++ b/include/net/netfilter/nf_tables_core.h
@@ -13,6 +13,16 @@ struct nft_cmp_fast_expr {
 	u8			len;
 };
 
+/* Calculate the mask for the nft_cmp_fast expression. On big endian the
+ * mask needs to include the *upper* bytes when interpreting that data as
+ * something smaller than the full u32, therefore a cpu_to_le32 is done.
+ */
+static inline u32 nft_cmp_fast_mask(unsigned int len)
+{
+	return cpu_to_le32(~0U >> (FIELD_SIZEOF(struct nft_cmp_fast_expr,
+						data) * BITS_PER_BYTE - len));
+}
+
 extern const struct nft_expr_ops nft_cmp_fast_ops;
 
 int nft_cmp_module_init(void);
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 90998a6ff8b..804105391b9 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -25,9 +25,8 @@ static void nft_cmp_fast_eval(const struct nft_expr *expr,
 			      struct nft_data data[NFT_REG_MAX + 1])
 {
 	const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr);
-	u32 mask;
+	u32 mask = nft_cmp_fast_mask(priv->len);
 
-	mask = ~0U >> (sizeof(priv->data) * BITS_PER_BYTE - priv->len);
 	if ((data[priv->sreg].data[0] & mask) == priv->data)
 		return;
 	data[NFT_REG_VERDICT].verdict = NFT_BREAK;
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
index 954925db414..e2b3f51c81f 100644
--- a/net/netfilter/nft_cmp.c
+++ b/net/netfilter/nft_cmp.c
@@ -128,7 +128,7 @@ static int nft_cmp_fast_init(const struct nft_ctx *ctx,
 	BUG_ON(err < 0);
 	desc.len *= BITS_PER_BYTE;
 
-	mask = ~0U >> (sizeof(priv->data) * BITS_PER_BYTE - desc.len);
+	mask = nft_cmp_fast_mask(desc.len);
 	priv->data = data.data[0] & mask;
 	priv->len  = desc.len;
 	return 0;
-- 
cgit v1.2.3-70-g09d2


From 30f78d8ebf7f514801e71b88a10c948275168518 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 10 Apr 2014 21:23:36 -0700
Subject: ipv6: Limit mtu to 65575 bytes

Francois reported that setting big mtu on loopback device could prevent
tcp sessions making progress.

We do not support (yet ?) IPv6 Jumbograms and cook corrupted packets.

We must limit the IPv6 MTU to (65535 + 40) bytes in theory.

Tested:

ifconfig lo mtu 70000
netperf -H ::1

Before patch : Throughput :   0.05 Mbits

After patch : Throughput : 35484 Mbits

Reported-by: Francois WELLENREITER <f.wellenreiter@gmail.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_route.h | 5 +++++
 net/ipv6/route.c        | 5 +++--
 2 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 3c3bb184eb8..6c4f5eac98e 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -32,6 +32,11 @@ struct route_info {
 #define RT6_LOOKUP_F_SRCPREF_PUBLIC	0x00000010
 #define RT6_LOOKUP_F_SRCPREF_COA	0x00000020
 
+/* We do not (yet ?) support IPv6 jumbograms (RFC 2675)
+ * Unlike IPv4, hdr->seg_len doesn't include the IPv6 header
+ */
+#define IP6_MAX_MTU (0xFFFF + sizeof(struct ipv6hdr))
+
 /*
  * rt6_srcprefs2flags() and rt6_flags2srcprefs() translate
  * between IPV6_ADDR_PREFERENCES socket option values
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 5015c50a5ba..5ea462eacd9 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1338,7 +1338,7 @@ static unsigned int ip6_mtu(const struct dst_entry *dst)
 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
 
 	if (mtu)
-		return mtu;
+		goto out;
 
 	mtu = IPV6_MIN_MTU;
 
@@ -1348,7 +1348,8 @@ static unsigned int ip6_mtu(const struct dst_entry *dst)
 		mtu = idev->cnf.mtu6;
 	rcu_read_unlock();
 
-	return mtu;
+out:
+	return min_t(unsigned int, mtu, IP6_MAX_MTU);
 }
 
 static struct dst_entry *icmp6_dst_gc_list;
-- 
cgit v1.2.3-70-g09d2


From 8c482cdc358ef931ee02262e0a4ef0f29946aa0c Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Mon, 14 Apr 2014 21:20:12 +0200
Subject: net: filter: seccomp: fix wrong decoding of BPF_S_ANC_SECCOMP_LD_W

While reviewing seccomp code, we found that BPF_S_ANC_SECCOMP_LD_W has
been wrongly decoded by commit a8fc927780 ("sk-filter: Add ability to
get socket filter program (v2)") into the opcode BPF_LD|BPF_B|BPF_ABS
although it should have been decoded as BPF_LD|BPF_W|BPF_ABS.

In practice, this should not have much side-effect though, as such
conversion is/was being done through prctl(2) PR_SET_SECCOMP. Reverse
operation PR_GET_SECCOMP will only return the current seccomp mode, but
not the filter itself. Since the transition to the new BPF infrastructure,
it's also not used anymore, so we can simply remove this as it's
unreachable.

Fixes: a8fc927780 ("sk-filter: Add ability to get socket filter program (v2)")
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h | 1 -
 net/core/filter.c      | 1 -
 2 files changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 262dcbb75ff..024fd03e5d1 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -220,7 +220,6 @@ enum {
 	BPF_S_ANC_RXHASH,
 	BPF_S_ANC_CPU,
 	BPF_S_ANC_ALU_XOR_X,
-	BPF_S_ANC_SECCOMP_LD_W,
 	BPF_S_ANC_VLAN_TAG,
 	BPF_S_ANC_VLAN_TAG_PRESENT,
 	BPF_S_ANC_PAY_OFFSET,
diff --git a/net/core/filter.c b/net/core/filter.c
index 0e0856f5d70..cd58614660c 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1743,7 +1743,6 @@ void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to)
 		[BPF_S_ANC_RXHASH]	= BPF_LD|BPF_B|BPF_ABS,
 		[BPF_S_ANC_CPU]		= BPF_LD|BPF_B|BPF_ABS,
 		[BPF_S_ANC_ALU_XOR_X]	= BPF_LD|BPF_B|BPF_ABS,
-		[BPF_S_ANC_SECCOMP_LD_W] = BPF_LD|BPF_B|BPF_ABS,
 		[BPF_S_ANC_VLAN_TAG]	= BPF_LD|BPF_B|BPF_ABS,
 		[BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS,
 		[BPF_S_ANC_PAY_OFFSET]	= BPF_LD|BPF_B|BPF_ABS,
-- 
cgit v1.2.3-70-g09d2


From 362d52040c71f6e8d8158be48c812d7729cb8df1 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Mon, 14 Apr 2014 21:45:17 +0200
Subject: Revert "net: sctp: Fix a_rwnd/rwnd management to reflect real state
 of the receiver's buffer"

This reverts commit ef2820a735f7 ("net: sctp: Fix a_rwnd/rwnd management
to reflect real state of the receiver's buffer") as it introduced a
serious performance regression on SCTP over IPv4 and IPv6, though a not
as dramatic on the latter. Measurements are on 10Gbit/s with ixgbe NICs.

Current state:

[root@Lab200slot2 ~]# iperf3 --sctp -4 -c 192.168.241.3 -V -l 1452 -t 60
iperf version 3.0.1 (10 January 2014)
Linux Lab200slot2 3.14.0 #1 SMP Thu Apr 3 23:18:29 EDT 2014 x86_64
Time: Fri, 11 Apr 2014 17:56:21 GMT
Connecting to host 192.168.241.3, port 5201
      Cookie: Lab200slot2.1397238981.812898.548918
[  4] local 192.168.241.2 port 38616 connected to 192.168.241.3 port 5201
Starting Test: protocol: SCTP, 1 streams, 1452 byte blocks, omitting 0 seconds, 60 second test
[ ID] Interval           Transfer     Bandwidth
[  4]   0.00-1.09   sec  20.8 MBytes   161 Mbits/sec
[  4]   1.09-2.13   sec  10.8 MBytes  86.8 Mbits/sec
[  4]   2.13-3.15   sec  3.57 MBytes  29.5 Mbits/sec
[  4]   3.15-4.16   sec  4.33 MBytes  35.7 Mbits/sec
[  4]   4.16-6.21   sec  10.4 MBytes  42.7 Mbits/sec
[  4]   6.21-6.21   sec  0.00 Bytes    0.00 bits/sec
[  4]   6.21-7.35   sec  34.6 MBytes   253 Mbits/sec
[  4]   7.35-11.45  sec  22.0 MBytes  45.0 Mbits/sec
[  4]  11.45-11.45  sec  0.00 Bytes    0.00 bits/sec
[  4]  11.45-11.45  sec  0.00 Bytes    0.00 bits/sec
[  4]  11.45-11.45  sec  0.00 Bytes    0.00 bits/sec
[  4]  11.45-12.51  sec  16.0 MBytes   126 Mbits/sec
[  4]  12.51-13.59  sec  20.3 MBytes   158 Mbits/sec
[  4]  13.59-14.65  sec  13.4 MBytes   107 Mbits/sec
[  4]  14.65-16.79  sec  33.3 MBytes   130 Mbits/sec
[  4]  16.79-16.79  sec  0.00 Bytes    0.00 bits/sec
[  4]  16.79-17.82  sec  5.94 MBytes  48.7 Mbits/sec
(etc)

[root@Lab200slot2 ~]#  iperf3 --sctp -6 -c 2001:db8:0:f101::1 -V -l 1400 -t 60
iperf version 3.0.1 (10 January 2014)
Linux Lab200slot2 3.14.0 #1 SMP Thu Apr 3 23:18:29 EDT 2014 x86_64
Time: Fri, 11 Apr 2014 19:08:41 GMT
Connecting to host 2001:db8:0:f101::1, port 5201
      Cookie: Lab200slot2.1397243321.714295.2b3f7c
[  4] local 2001:db8:0:f101::2 port 55804 connected to 2001:db8:0:f101::1 port 5201
Starting Test: protocol: SCTP, 1 streams, 1400 byte blocks, omitting 0 seconds, 60 second test
[ ID] Interval           Transfer     Bandwidth
[  4]   0.00-1.00   sec   169 MBytes  1.42 Gbits/sec
[  4]   1.00-2.00   sec   201 MBytes  1.69 Gbits/sec
[  4]   2.00-3.00   sec   188 MBytes  1.58 Gbits/sec
[  4]   3.00-4.00   sec   174 MBytes  1.46 Gbits/sec
[  4]   4.00-5.00   sec   165 MBytes  1.39 Gbits/sec
[  4]   5.00-6.00   sec   199 MBytes  1.67 Gbits/sec
[  4]   6.00-7.00   sec   163 MBytes  1.36 Gbits/sec
[  4]   7.00-8.00   sec   174 MBytes  1.46 Gbits/sec
[  4]   8.00-9.00   sec   193 MBytes  1.62 Gbits/sec
[  4]   9.00-10.00  sec   196 MBytes  1.65 Gbits/sec
[  4]  10.00-11.00  sec   157 MBytes  1.31 Gbits/sec
[  4]  11.00-12.00  sec   175 MBytes  1.47 Gbits/sec
[  4]  12.00-13.00  sec   192 MBytes  1.61 Gbits/sec
[  4]  13.00-14.00  sec   199 MBytes  1.67 Gbits/sec
(etc)

After patch:

[root@Lab200slot2 ~]#  iperf3 --sctp -4 -c 192.168.240.3 -V -l 1452 -t 60
iperf version 3.0.1 (10 January 2014)
Linux Lab200slot2 3.14.0+ #1 SMP Mon Apr 14 12:06:40 EDT 2014 x86_64
Time: Mon, 14 Apr 2014 16:40:48 GMT
Connecting to host 192.168.240.3, port 5201
      Cookie: Lab200slot2.1397493648.413274.65e131
[  4] local 192.168.240.2 port 50548 connected to 192.168.240.3 port 5201
Starting Test: protocol: SCTP, 1 streams, 1452 byte blocks, omitting 0 seconds, 60 second test
[ ID] Interval           Transfer     Bandwidth
[  4]   0.00-1.00   sec   240 MBytes  2.02 Gbits/sec
[  4]   1.00-2.00   sec   239 MBytes  2.01 Gbits/sec
[  4]   2.00-3.00   sec   240 MBytes  2.01 Gbits/sec
[  4]   3.00-4.00   sec   239 MBytes  2.00 Gbits/sec
[  4]   4.00-5.00   sec   245 MBytes  2.05 Gbits/sec
[  4]   5.00-6.00   sec   240 MBytes  2.01 Gbits/sec
[  4]   6.00-7.00   sec   240 MBytes  2.02 Gbits/sec
[  4]   7.00-8.00   sec   239 MBytes  2.01 Gbits/sec

With the reverted patch applied, the SCTP/IPv4 performance is back
to normal on latest upstream for IPv4 and IPv6 and has same throughput
as 3.4.2 test kernel, steady and interval reports are smooth again.

Fixes: ef2820a735f7 ("net: sctp: Fix a_rwnd/rwnd management to reflect real state of the receiver's buffer")
Reported-by: Peter Butler <pbutler@sonusnet.com>
Reported-by: Dongsheng Song <dongsheng.song@gmail.com>
Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Tested-by: Peter Butler <pbutler@sonusnet.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Cc: Matija Glavinic Pecotic <matija.glavinic-pecotic.ext@nsn.com>
Cc: Alexander Sverdlin <alexander.sverdlin@nsn.com>
Cc: Vlad Yasevich <vyasevich@gmail.com>
Acked-by: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h | 14 +++++++-
 net/sctp/associola.c       | 82 ++++++++++++++++++++++++++++++++++++----------
 net/sctp/sm_statefuns.c    |  2 +-
 net/sctp/socket.c          |  6 ++++
 net/sctp/ulpevent.c        |  8 ++---
 5 files changed, 87 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 6ee76c80489..d992ca3145f 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1653,6 +1653,17 @@ struct sctp_association {
 	/* This is the last advertised value of rwnd over a SACK chunk. */
 	__u32 a_rwnd;
 
+	/* Number of bytes by which the rwnd has slopped.  The rwnd is allowed
+	 * to slop over a maximum of the association's frag_point.
+	 */
+	__u32 rwnd_over;
+
+	/* Keeps treack of rwnd pressure.  This happens when we have
+	 * a window, but not recevie buffer (i.e small packets).  This one
+	 * is releases slowly (1 PMTU at a time ).
+	 */
+	__u32 rwnd_press;
+
 	/* This is the sndbuf size in use for the association.
 	 * This corresponds to the sndbuf size for the association,
 	 * as specified in the sk->sndbuf.
@@ -1881,7 +1892,8 @@ void sctp_assoc_update(struct sctp_association *old,
 __u32 sctp_association_get_next_tsn(struct sctp_association *);
 
 void sctp_assoc_sync_pmtu(struct sock *, struct sctp_association *);
-void sctp_assoc_rwnd_update(struct sctp_association *, bool);
+void sctp_assoc_rwnd_increase(struct sctp_association *, unsigned int);
+void sctp_assoc_rwnd_decrease(struct sctp_association *, unsigned int);
 void sctp_assoc_set_primary(struct sctp_association *,
 			    struct sctp_transport *);
 void sctp_assoc_del_nonprimary_peers(struct sctp_association *,
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 4f6d6f9d127..39579c3e0d1 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1395,35 +1395,44 @@ static inline bool sctp_peer_needs_update(struct sctp_association *asoc)
 	return false;
 }
 
-/* Update asoc's rwnd for the approximated state in the buffer,
- * and check whether SACK needs to be sent.
- */
-void sctp_assoc_rwnd_update(struct sctp_association *asoc, bool update_peer)
+/* Increase asoc's rwnd by len and send any window update SACK if needed. */
+void sctp_assoc_rwnd_increase(struct sctp_association *asoc, unsigned int len)
 {
-	int rx_count;
 	struct sctp_chunk *sack;
 	struct timer_list *timer;
 
-	if (asoc->ep->rcvbuf_policy)
-		rx_count = atomic_read(&asoc->rmem_alloc);
-	else
-		rx_count = atomic_read(&asoc->base.sk->sk_rmem_alloc);
+	if (asoc->rwnd_over) {
+		if (asoc->rwnd_over >= len) {
+			asoc->rwnd_over -= len;
+		} else {
+			asoc->rwnd += (len - asoc->rwnd_over);
+			asoc->rwnd_over = 0;
+		}
+	} else {
+		asoc->rwnd += len;
+	}
 
-	if ((asoc->base.sk->sk_rcvbuf - rx_count) > 0)
-		asoc->rwnd = (asoc->base.sk->sk_rcvbuf - rx_count) >> 1;
-	else
-		asoc->rwnd = 0;
+	/* If we had window pressure, start recovering it
+	 * once our rwnd had reached the accumulated pressure
+	 * threshold.  The idea is to recover slowly, but up
+	 * to the initial advertised window.
+	 */
+	if (asoc->rwnd_press && asoc->rwnd >= asoc->rwnd_press) {
+		int change = min(asoc->pathmtu, asoc->rwnd_press);
+		asoc->rwnd += change;
+		asoc->rwnd_press -= change;
+	}
 
-	pr_debug("%s: asoc:%p rwnd=%u, rx_count=%d, sk_rcvbuf=%d\n",
-		 __func__, asoc, asoc->rwnd, rx_count,
-		 asoc->base.sk->sk_rcvbuf);
+	pr_debug("%s: asoc:%p rwnd increased by %d to (%u, %u) - %u\n",
+		 __func__, asoc, len, asoc->rwnd, asoc->rwnd_over,
+		 asoc->a_rwnd);
 
 	/* Send a window update SACK if the rwnd has increased by at least the
 	 * minimum of the association's PMTU and half of the receive buffer.
 	 * The algorithm used is similar to the one described in
 	 * Section 4.2.3.3 of RFC 1122.
 	 */
-	if (update_peer && sctp_peer_needs_update(asoc)) {
+	if (sctp_peer_needs_update(asoc)) {
 		asoc->a_rwnd = asoc->rwnd;
 
 		pr_debug("%s: sending window update SACK- asoc:%p rwnd:%u "
@@ -1445,6 +1454,45 @@ void sctp_assoc_rwnd_update(struct sctp_association *asoc, bool update_peer)
 	}
 }
 
+/* Decrease asoc's rwnd by len. */
+void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned int len)
+{
+	int rx_count;
+	int over = 0;
+
+	if (unlikely(!asoc->rwnd || asoc->rwnd_over))
+		pr_debug("%s: association:%p has asoc->rwnd:%u, "
+			 "asoc->rwnd_over:%u!\n", __func__, asoc,
+			 asoc->rwnd, asoc->rwnd_over);
+
+	if (asoc->ep->rcvbuf_policy)
+		rx_count = atomic_read(&asoc->rmem_alloc);
+	else
+		rx_count = atomic_read(&asoc->base.sk->sk_rmem_alloc);
+
+	/* If we've reached or overflowed our receive buffer, announce
+	 * a 0 rwnd if rwnd would still be positive.  Store the
+	 * the potential pressure overflow so that the window can be restored
+	 * back to original value.
+	 */
+	if (rx_count >= asoc->base.sk->sk_rcvbuf)
+		over = 1;
+
+	if (asoc->rwnd >= len) {
+		asoc->rwnd -= len;
+		if (over) {
+			asoc->rwnd_press += asoc->rwnd;
+			asoc->rwnd = 0;
+		}
+	} else {
+		asoc->rwnd_over = len - asoc->rwnd;
+		asoc->rwnd = 0;
+	}
+
+	pr_debug("%s: asoc:%p rwnd decreased by %d to (%u, %u, %u)\n",
+		 __func__, asoc, len, asoc->rwnd, asoc->rwnd_over,
+		 asoc->rwnd_press);
+}
 
 /* Build the bind address list for the association based on info from the
  * local endpoint and the remote peer.
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 01e002430c8..ae9fbeba40b 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -6178,7 +6178,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
 	 * PMTU.  In cases, such as loopback, this might be a rather
 	 * large spill over.
 	 */
-	if ((!chunk->data_accepted) && (!asoc->rwnd ||
+	if ((!chunk->data_accepted) && (!asoc->rwnd || asoc->rwnd_over ||
 	    (datalen > asoc->rwnd + asoc->frag_point))) {
 
 		/* If this is the next TSN, consider reneging to make
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index e13519e9df8..ff20e2dbbbc 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2115,6 +2115,12 @@ static int sctp_recvmsg(struct kiocb *iocb, struct sock *sk,
 		sctp_skb_pull(skb, copied);
 		skb_queue_head(&sk->sk_receive_queue, skb);
 
+		/* When only partial message is copied to the user, increase
+		 * rwnd by that amount. If all the data in the skb is read,
+		 * rwnd is updated when the event is freed.
+		 */
+		if (!sctp_ulpevent_is_notification(event))
+			sctp_assoc_rwnd_increase(event->asoc, copied);
 		goto out;
 	} else if ((event->msg_flags & MSG_NOTIFICATION) ||
 		   (event->msg_flags & MSG_EOR))
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 8d198ae0360..85c64658bd0 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -989,7 +989,7 @@ static void sctp_ulpevent_receive_data(struct sctp_ulpevent *event,
 	skb = sctp_event2skb(event);
 	/* Set the owner and charge rwnd for bytes received.  */
 	sctp_ulpevent_set_owner(event, asoc);
-	sctp_assoc_rwnd_update(asoc, false);
+	sctp_assoc_rwnd_decrease(asoc, skb_headlen(skb));
 
 	if (!skb->data_len)
 		return;
@@ -1011,7 +1011,6 @@ static void sctp_ulpevent_release_data(struct sctp_ulpevent *event)
 {
 	struct sk_buff *skb, *frag;
 	unsigned int	len;
-	struct sctp_association *asoc;
 
 	/* Current stack structures assume that the rcv buffer is
 	 * per socket.   For UDP style sockets this is not true as
@@ -1036,11 +1035,8 @@ static void sctp_ulpevent_release_data(struct sctp_ulpevent *event)
 	}
 
 done:
-	asoc = event->asoc;
-	sctp_association_hold(asoc);
+	sctp_assoc_rwnd_increase(event->asoc, len);
 	sctp_ulpevent_release_owner(event);
-	sctp_assoc_rwnd_update(asoc, true);
-	sctp_association_put(asoc);
 }
 
 static void sctp_ulpevent_release_frag_data(struct sctp_ulpevent *event)
-- 
cgit v1.2.3-70-g09d2


From b0270e91014dabfceaf37f5b40ad51bbf21a1302 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 15 Apr 2014 12:58:34 -0400
Subject: ipv4: add a sock pointer to ip_queue_xmit()

ip_queue_xmit() assumes the skb it has to transmit is attached to an
inet socket. Commit 31c70d5956fc ("l2tp: keep original skb ownership")
changed l2tp to not change skb ownership and thus broke this assumption.

One fix is to add a new 'struct sock *sk' parameter to ip_queue_xmit(),
so that we do not assume skb->sk points to the socket used by l2tp
tunnel.

Fixes: 31c70d5956fc ("l2tp: keep original skb ownership")
Reported-by: Zhan Jianyu <nasa4836@gmail.com>
Tested-by: Zhan Jianyu <nasa4836@gmail.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet6_connection_sock.h | 2 +-
 include/net/inet_connection_sock.h  | 2 +-
 include/net/ip.h                    | 2 +-
 net/dccp/output.c                   | 2 +-
 net/ipv4/ip_output.c                | 5 +++--
 net/ipv4/tcp_output.c               | 2 +-
 net/ipv6/inet6_connection_sock.c    | 3 +--
 net/l2tp/l2tp_core.c                | 4 ++--
 net/l2tp/l2tp_ip.c                  | 2 +-
 net/sctp/protocol.c                 | 2 +-
 10 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h
index f981ba7adee..74af137304b 100644
--- a/include/net/inet6_connection_sock.h
+++ b/include/net/inet6_connection_sock.h
@@ -40,7 +40,7 @@ void inet6_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
 
 void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr);
 
-int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl);
+int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl);
 
 struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu);
 #endif /* _INET6_CONNECTION_SOCK_H */
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index c55aeed41ac..7a431388756 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -36,7 +36,7 @@ struct tcp_congestion_ops;
  * (i.e. things that depend on the address family)
  */
 struct inet_connection_sock_af_ops {
-	int	    (*queue_xmit)(struct sk_buff *skb, struct flowi *fl);
+	int	    (*queue_xmit)(struct sock *sk, struct sk_buff *skb, struct flowi *fl);
 	void	    (*send_check)(struct sock *sk, struct sk_buff *skb);
 	int	    (*rebuild_header)(struct sock *sk);
 	void	    (*sk_rx_dst_set)(struct sock *sk, const struct sk_buff *skb);
diff --git a/include/net/ip.h b/include/net/ip.h
index 25064c28e05..77e73d293e0 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -111,7 +111,7 @@ int ip_do_nat(struct sk_buff *skb);
 void ip_send_check(struct iphdr *ip);
 int __ip_local_out(struct sk_buff *skb);
 int ip_local_out(struct sk_buff *skb);
-int ip_queue_xmit(struct sk_buff *skb, struct flowi *fl);
+int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl);
 void ip_init(void);
 int ip_append_data(struct sock *sk, struct flowi4 *fl4,
 		   int getfrag(void *from, char *to, int offset, int len,
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 8876078859d..0248e8a3460 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -138,7 +138,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
 
 		DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
 
-		err = icsk->icsk_af_ops->queue_xmit(skb, &inet->cork.fl);
+		err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);
 		return net_xmit_eval(err);
 	}
 	return -ENOBUFS;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 1a0755fea49..7ad68b86093 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -315,9 +315,9 @@ static void ip_copy_addrs(struct iphdr *iph, const struct flowi4 *fl4)
 	       sizeof(fl4->saddr) + sizeof(fl4->daddr));
 }
 
-int ip_queue_xmit(struct sk_buff *skb, struct flowi *fl)
+/* Note: skb->sk can be different from sk, in case of tunnels */
+int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl)
 {
-	struct sock *sk = skb->sk;
 	struct inet_sock *inet = inet_sk(sk);
 	struct ip_options_rcu *inet_opt;
 	struct flowi4 *fl4;
@@ -389,6 +389,7 @@ packet_routed:
 	ip_select_ident_more(skb, &rt->dst, sk,
 			     (skb_shinfo(skb)->gso_segs ?: 1) - 1);
 
+	/* TODO : should we use skb->sk here instead of sk ? */
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 699fb102e97..025e2509398 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -981,7 +981,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 		TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
 			      tcp_skb_pcount(skb));
 
-	err = icsk->icsk_af_ops->queue_xmit(skb, &inet->cork.fl);
+	err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);
 	if (likely(err <= 0))
 		return err;
 
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index c9138189415..d4ade34ab37 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -224,9 +224,8 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk,
 	return dst;
 }
 
-int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused)
+int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused)
 {
-	struct sock *sk = skb->sk;
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct flowi6 fl6;
 	struct dst_entry *dst;
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 47f7a549055..a4e37d7158d 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1131,10 +1131,10 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb,
 	skb->local_df = 1;
 #if IS_ENABLED(CONFIG_IPV6)
 	if (tunnel->sock->sk_family == PF_INET6 && !tunnel->v4mapped)
-		error = inet6_csk_xmit(skb, NULL);
+		error = inet6_csk_xmit(tunnel->sock, skb, NULL);
 	else
 #endif
-		error = ip_queue_xmit(skb, fl);
+		error = ip_queue_xmit(tunnel->sock, skb, fl);
 
 	/* Update stats */
 	if (error >= 0) {
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 0b44d855269..3397fe6897c 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -487,7 +487,7 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m
 
 xmit:
 	/* Queue the packet to IP for output */
-	rc = ip_queue_xmit(skb, &inet->cork.fl);
+	rc = ip_queue_xmit(sk, skb, &inet->cork.fl);
 	rcu_read_unlock();
 
 error:
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 4e1d0fcb028..c09757fbf80 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -957,7 +957,7 @@ static inline int sctp_v4_xmit(struct sk_buff *skb,
 
 	SCTP_INC_STATS(sock_net(&inet->sk), SCTP_MIB_OUTSCTPPACKS);
 
-	return ip_queue_xmit(skb, &transport->fl);
+	return ip_queue_xmit(&inet->sk, skb, &transport->fl);
 }
 
 static struct sctp_af sctp_af_inet;
-- 
cgit v1.2.3-70-g09d2


From aad88724c9d54acb1a9737cb6069d8470fa85f74 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 15 Apr 2014 13:47:15 -0400
Subject: ipv4: add a sock pointer to dst->output() path.

In the dst->output() path for ipv4, the code assumes the skb it has to
transmit is attached to an inet socket, specifically via
ip_mc_output() : The sk_mc_loop() test triggers a WARN_ON() when the
provider of the packet is an AF_PACKET socket.

The dst->output() method gets an additional 'struct sock *sk'
parameter. This needs a cascade of changes so that this parameter can
be propagated from vxlan to final consumer.

Fixes: 8f646c922d55 ("vxlan: keep original skb ownership")
Reported-by: lucien xin <lucien.xin@gmail.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c         |  4 ++--
 include/net/dst.h           | 14 +++++++++++---
 include/net/ip.h            | 11 ++++++++---
 include/net/ip_tunnels.h    |  2 +-
 include/net/ipv6.h          |  2 +-
 include/net/xfrm.h          |  6 +++---
 net/core/dst.c              | 15 +++++++++------
 net/decnet/dn_route.c       | 16 ++++++++++++++--
 net/ipv4/ip_output.c        | 11 +++++------
 net/ipv4/ip_tunnel.c        |  2 +-
 net/ipv4/ip_tunnel_core.c   |  4 ++--
 net/ipv4/route.c            |  4 ++--
 net/ipv4/xfrm4_output.c     |  2 +-
 net/ipv6/ip6_output.c       |  2 +-
 net/ipv6/route.c            | 14 +++++++-------
 net/ipv6/sit.c              |  5 +++--
 net/ipv6/xfrm6_output.c     |  2 +-
 net/openvswitch/vport-gre.c |  2 +-
 net/xfrm/xfrm_policy.c      |  2 +-
 19 files changed, 74 insertions(+), 46 deletions(-)

(limited to 'include')

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index c55e316373a..82355d5d155 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1755,8 +1755,8 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
 	if (err)
 		return err;
 
-	return iptunnel_xmit(rt, skb, src, dst, IPPROTO_UDP, tos, ttl, df,
-			     false);
+	return iptunnel_xmit(vs->sock->sk, rt, skb, src, dst, IPPROTO_UDP,
+			     tos, ttl, df, false);
 }
 EXPORT_SYMBOL_GPL(vxlan_xmit_skb);
 
diff --git a/include/net/dst.h b/include/net/dst.h
index 46ed958e0c6..71c60f42be4 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -45,7 +45,7 @@ struct dst_entry {
 	void			*__pad1;
 #endif
 	int			(*input)(struct sk_buff *);
-	int			(*output)(struct sk_buff *);
+	int			(*output)(struct sock *sk, struct sk_buff *skb);
 
 	unsigned short		flags;
 #define DST_HOST		0x0001
@@ -367,7 +367,11 @@ static inline struct dst_entry *skb_dst_pop(struct sk_buff *skb)
 	return child;
 }
 
-int dst_discard(struct sk_buff *skb);
+int dst_discard_sk(struct sock *sk, struct sk_buff *skb);
+static inline int dst_discard(struct sk_buff *skb)
+{
+	return dst_discard_sk(skb->sk, skb);
+}
 void *dst_alloc(struct dst_ops *ops, struct net_device *dev, int initial_ref,
 		int initial_obsolete, unsigned short flags);
 void __dst_free(struct dst_entry *dst);
@@ -449,9 +453,13 @@ static inline void dst_set_expires(struct dst_entry *dst, int timeout)
 }
 
 /* Output packet to network from transport.  */
+static inline int dst_output_sk(struct sock *sk, struct sk_buff *skb)
+{
+	return skb_dst(skb)->output(sk, skb);
+}
 static inline int dst_output(struct sk_buff *skb)
 {
-	return skb_dst(skb)->output(skb);
+	return dst_output_sk(skb->sk, skb);
 }
 
 /* Input packet from network to transport.  */
diff --git a/include/net/ip.h b/include/net/ip.h
index 77e73d293e0..3ec2b0fb9d8 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -104,13 +104,18 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
 	   struct net_device *orig_dev);
 int ip_local_deliver(struct sk_buff *skb);
 int ip_mr_input(struct sk_buff *skb);
-int ip_output(struct sk_buff *skb);
-int ip_mc_output(struct sk_buff *skb);
+int ip_output(struct sock *sk, struct sk_buff *skb);
+int ip_mc_output(struct sock *sk, struct sk_buff *skb);
 int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
 int ip_do_nat(struct sk_buff *skb);
 void ip_send_check(struct iphdr *ip);
 int __ip_local_out(struct sk_buff *skb);
-int ip_local_out(struct sk_buff *skb);
+int ip_local_out_sk(struct sock *sk, struct sk_buff *skb);
+static inline int ip_local_out(struct sk_buff *skb)
+{
+	return ip_local_out_sk(skb->sk, skb);
+}
+
 int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl);
 void ip_init(void);
 int ip_append_data(struct sock *sk, struct flowi4 *fl4,
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index e77c10405d5..a4daf9eb856 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -153,7 +153,7 @@ static inline u8 ip_tunnel_ecn_encap(u8 tos, const struct iphdr *iph,
 }
 
 int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto);
-int iptunnel_xmit(struct rtable *rt, struct sk_buff *skb,
+int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
 		  __be32 src, __be32 dst, __u8 proto,
 		  __u8 tos, __u8 ttl, __be16 df, bool xnet);
 
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 4f541f11ce6..d640925bc45 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -731,7 +731,7 @@ struct dst_entry *ip6_blackhole_route(struct net *net,
  *	skb processing functions
  */
 
-int ip6_output(struct sk_buff *skb);
+int ip6_output(struct sock *sk, struct sk_buff *skb);
 int ip6_forward(struct sk_buff *skb);
 int ip6_input(struct sk_buff *skb);
 int ip6_mc_input(struct sk_buff *skb);
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 32682ae47b3..116e9c7e19c 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -333,7 +333,7 @@ struct xfrm_state_afinfo {
 						const xfrm_address_t *saddr);
 	int			(*tmpl_sort)(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n);
 	int			(*state_sort)(struct xfrm_state **dst, struct xfrm_state **src, int n);
-	int			(*output)(struct sk_buff *skb);
+	int			(*output)(struct sock *sk, struct sk_buff *skb);
 	int			(*output_finish)(struct sk_buff *skb);
 	int			(*extract_input)(struct xfrm_state *x,
 						 struct sk_buff *skb);
@@ -1540,7 +1540,7 @@ static inline int xfrm4_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi)
 
 int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb);
 int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb);
-int xfrm4_output(struct sk_buff *skb);
+int xfrm4_output(struct sock *sk, struct sk_buff *skb);
 int xfrm4_output_finish(struct sk_buff *skb);
 int xfrm4_rcv_cb(struct sk_buff *skb, u8 protocol, int err);
 int xfrm4_protocol_register(struct xfrm4_protocol *handler, unsigned char protocol);
@@ -1565,7 +1565,7 @@ __be32 xfrm6_tunnel_alloc_spi(struct net *net, xfrm_address_t *saddr);
 __be32 xfrm6_tunnel_spi_lookup(struct net *net, const xfrm_address_t *saddr);
 int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb);
 int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb);
-int xfrm6_output(struct sk_buff *skb);
+int xfrm6_output(struct sock *sk, struct sk_buff *skb);
 int xfrm6_output_finish(struct sk_buff *skb);
 int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb,
 			  u8 **prevhdr);
diff --git a/net/core/dst.c b/net/core/dst.c
index ca4231ec734..80d6286c8b6 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -142,12 +142,12 @@ loop:
 	mutex_unlock(&dst_gc_mutex);
 }
 
-int dst_discard(struct sk_buff *skb)
+int dst_discard_sk(struct sock *sk, struct sk_buff *skb)
 {
 	kfree_skb(skb);
 	return 0;
 }
-EXPORT_SYMBOL(dst_discard);
+EXPORT_SYMBOL(dst_discard_sk);
 
 const u32 dst_default_metrics[RTAX_MAX + 1] = {
 	/* This initializer is needed to force linker to place this variable
@@ -184,7 +184,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
 	dst->xfrm = NULL;
 #endif
 	dst->input = dst_discard;
-	dst->output = dst_discard;
+	dst->output = dst_discard_sk;
 	dst->error = 0;
 	dst->obsolete = initial_obsolete;
 	dst->header_len = 0;
@@ -209,8 +209,10 @@ static void ___dst_free(struct dst_entry *dst)
 	/* The first case (dev==NULL) is required, when
 	   protocol module is unloaded.
 	 */
-	if (dst->dev == NULL || !(dst->dev->flags&IFF_UP))
-		dst->input = dst->output = dst_discard;
+	if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) {
+		dst->input = dst_discard;
+		dst->output = dst_discard_sk;
+	}
 	dst->obsolete = DST_OBSOLETE_DEAD;
 }
 
@@ -361,7 +363,8 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 		return;
 
 	if (!unregister) {
-		dst->input = dst->output = dst_discard;
+		dst->input = dst_discard;
+		dst->output = dst_discard_sk;
 	} else {
 		dst->dev = dev_net(dst->dev)->loopback_dev;
 		dev_hold(dst->dev);
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index ce0cbbfe0f4..daccc4a36d8 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -752,7 +752,7 @@ static int dn_to_neigh_output(struct sk_buff *skb)
 	return n->output(n, skb);
 }
 
-static int dn_output(struct sk_buff *skb)
+static int dn_output(struct sock *sk, struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);
 	struct dn_route *rt = (struct dn_route *)dst;
@@ -838,6 +838,18 @@ drop:
  * Used to catch bugs. This should never normally get
  * called.
  */
+static int dn_rt_bug_sk(struct sock *sk, struct sk_buff *skb)
+{
+	struct dn_skb_cb *cb = DN_SKB_CB(skb);
+
+	net_dbg_ratelimited("dn_rt_bug: skb from:%04x to:%04x\n",
+			    le16_to_cpu(cb->src), le16_to_cpu(cb->dst));
+
+	kfree_skb(skb);
+
+	return NET_RX_DROP;
+}
+
 static int dn_rt_bug(struct sk_buff *skb)
 {
 	struct dn_skb_cb *cb = DN_SKB_CB(skb);
@@ -1463,7 +1475,7 @@ make_route:
 
 	rt->n = neigh;
 	rt->dst.lastuse = jiffies;
-	rt->dst.output = dn_rt_bug;
+	rt->dst.output = dn_rt_bug_sk;
 	switch (res.type) {
 	case RTN_UNICAST:
 		rt->dst.input = dn_forward;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 7ad68b86093..1cbeba5edff 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -101,17 +101,17 @@ int __ip_local_out(struct sk_buff *skb)
 		       skb_dst(skb)->dev, dst_output);
 }
 
-int ip_local_out(struct sk_buff *skb)
+int ip_local_out_sk(struct sock *sk, struct sk_buff *skb)
 {
 	int err;
 
 	err = __ip_local_out(skb);
 	if (likely(err == 1))
-		err = dst_output(skb);
+		err = dst_output_sk(sk, skb);
 
 	return err;
 }
-EXPORT_SYMBOL_GPL(ip_local_out);
+EXPORT_SYMBOL_GPL(ip_local_out_sk);
 
 static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
 {
@@ -226,9 +226,8 @@ static int ip_finish_output(struct sk_buff *skb)
 		return ip_finish_output2(skb);
 }
 
-int ip_mc_output(struct sk_buff *skb)
+int ip_mc_output(struct sock *sk, struct sk_buff *skb)
 {
-	struct sock *sk = skb->sk;
 	struct rtable *rt = skb_rtable(skb);
 	struct net_device *dev = rt->dst.dev;
 
@@ -287,7 +286,7 @@ int ip_mc_output(struct sk_buff *skb)
 			    !(IPCB(skb)->flags & IPSKB_REROUTED));
 }
 
-int ip_output(struct sk_buff *skb)
+int ip_output(struct sock *sk, struct sk_buff *skb)
 {
 	struct net_device *dev = skb_dst(skb)->dev;
 
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index e77381d1df9..484d0ce27ef 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -670,7 +670,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
 		return;
 	}
 
-	err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, protocol,
+	err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, protocol,
 			    tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
 	iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
 
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index e0c2b1d2ea4..bcf206c7900 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -46,7 +46,7 @@
 #include <net/netns/generic.h>
 #include <net/rtnetlink.h>
 
-int iptunnel_xmit(struct rtable *rt, struct sk_buff *skb,
+int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
 		  __be32 src, __be32 dst, __u8 proto,
 		  __u8 tos, __u8 ttl, __be16 df, bool xnet)
 {
@@ -76,7 +76,7 @@ int iptunnel_xmit(struct rtable *rt, struct sk_buff *skb,
 	iph->ttl	=	ttl;
 	__ip_select_ident(iph, &rt->dst, (skb_shinfo(skb)->gso_segs ?: 1) - 1);
 
-	err = ip_local_out(skb);
+	err = ip_local_out_sk(sk, skb);
 	if (unlikely(net_xmit_eval(err)))
 		pkt_len = 0;
 	return pkt_len;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 20a59c388e6..1485aafcad5 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1129,7 +1129,7 @@ static void ipv4_link_failure(struct sk_buff *skb)
 		dst_set_expires(&rt->dst, 0);
 }
 
-static int ip_rt_bug(struct sk_buff *skb)
+static int ip_rt_bug(struct sock *sk, struct sk_buff *skb)
 {
 	pr_debug("%s: %pI4 -> %pI4, %s\n",
 		 __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
@@ -2218,7 +2218,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
 
 		new->__use = 1;
 		new->input = dst_discard;
-		new->output = dst_discard;
+		new->output = dst_discard_sk;
 
 		new->dev = ort->dst.dev;
 		if (new->dev)
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index baa0f63731f..40e701f2e1e 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -86,7 +86,7 @@ int xfrm4_output_finish(struct sk_buff *skb)
 	return xfrm_output(skb);
 }
 
-int xfrm4_output(struct sk_buff *skb)
+int xfrm4_output(struct sock *sk, struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);
 	struct xfrm_state *x = dst->xfrm;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 3284d61577c..40e7581374f 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -132,7 +132,7 @@ static int ip6_finish_output(struct sk_buff *skb)
 		return ip6_finish_output2(skb);
 }
 
-int ip6_output(struct sk_buff *skb)
+int ip6_output(struct sock *sk, struct sk_buff *skb)
 {
 	struct net_device *dev = skb_dst(skb)->dev;
 	struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 5ea462eacd9..4011617cca6 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -84,9 +84,9 @@ static void		ip6_dst_ifdown(struct dst_entry *,
 static int		 ip6_dst_gc(struct dst_ops *ops);
 
 static int		ip6_pkt_discard(struct sk_buff *skb);
-static int		ip6_pkt_discard_out(struct sk_buff *skb);
+static int		ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
 static int		ip6_pkt_prohibit(struct sk_buff *skb);
-static int		ip6_pkt_prohibit_out(struct sk_buff *skb);
+static int		ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
 static void		ip6_link_failure(struct sk_buff *skb);
 static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
 					   struct sk_buff *skb, u32 mtu);
@@ -290,7 +290,7 @@ static const struct rt6_info ip6_blk_hole_entry_template = {
 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
 		.error		= -EINVAL,
 		.input		= dst_discard,
-		.output		= dst_discard,
+		.output		= dst_discard_sk,
 	},
 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 	.rt6i_protocol  = RTPROT_KERNEL,
@@ -1058,7 +1058,7 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
 
 		new->__use = 1;
 		new->input = dst_discard;
-		new->output = dst_discard;
+		new->output = dst_discard_sk;
 
 		if (dst_metrics_read_only(&ort->dst))
 			new->_metrics = ort->dst._metrics;
@@ -1577,7 +1577,7 @@ int ip6_route_add(struct fib6_config *cfg)
 		switch (cfg->fc_type) {
 		case RTN_BLACKHOLE:
 			rt->dst.error = -EINVAL;
-			rt->dst.output = dst_discard;
+			rt->dst.output = dst_discard_sk;
 			rt->dst.input = dst_discard;
 			break;
 		case RTN_PROHIBIT:
@@ -2129,7 +2129,7 @@ static int ip6_pkt_discard(struct sk_buff *skb)
 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
 }
 
-static int ip6_pkt_discard_out(struct sk_buff *skb)
+static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
 {
 	skb->dev = skb_dst(skb)->dev;
 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
@@ -2140,7 +2140,7 @@ static int ip6_pkt_prohibit(struct sk_buff *skb)
 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
 }
 
-static int ip6_pkt_prohibit_out(struct sk_buff *skb)
+static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
 {
 	skb->dev = skb_dst(skb)->dev;
 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 1693c8d885f..8da8268d65f 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -974,8 +974,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
 		goto out;
 	}
 
-	err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, IPPROTO_IPV6, tos,
-			    ttl, df, !net_eq(tunnel->net, dev_net(dev)));
+	err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr,
+			    IPPROTO_IPV6, tos, ttl, df,
+			    !net_eq(tunnel->net, dev_net(dev)));
 	iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
 	return NETDEV_TX_OK;
 
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 6cd625e3770..19ef329bdbf 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -163,7 +163,7 @@ static int __xfrm6_output(struct sk_buff *skb)
 	return x->outer_mode->afinfo->output_finish(skb);
 }
 
-int xfrm6_output(struct sk_buff *skb)
+int xfrm6_output(struct sock *sk, struct sk_buff *skb)
 {
 	return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL,
 		       skb_dst(skb)->dev, __xfrm6_output);
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index a3d6951602d..ebb6e244255 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -174,7 +174,7 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
 
 	skb->local_df = 1;
 
-	return iptunnel_xmit(rt, skb, fl.saddr,
+	return iptunnel_xmit(skb->sk, rt, skb, fl.saddr,
 			     OVS_CB(skb)->tun_key->ipv4_dst, IPPROTO_GRE,
 			     OVS_CB(skb)->tun_key->ipv4_tos,
 			     OVS_CB(skb)->tun_key->ipv4_ttl, df, false);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index f02f511b710..c08fbd11cef 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1842,7 +1842,7 @@ purge_queue:
 	xfrm_pol_put(pol);
 }
 
-static int xdst_queue_output(struct sk_buff *skb)
+static int xdst_queue_output(struct sock *sk, struct sk_buff *skb)
 {
 	unsigned long sched_next;
 	struct dst_entry *dst = skb_dst(skb);
-- 
cgit v1.2.3-70-g09d2


From 5be44a6fb1edb57d7d2d77151870dcd79c8c0e58 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 4 Apr 2014 08:41:26 +0200
Subject: x86: Remove the PCI reboot method from the default chain

Steve reported a reboot hang and bisected it back to this commit:

  a4f1987e4c54 x86, reboot: Add EFI and CF9 reboot methods into the default list

He heroically tested all reboot methods and found the following:

  reboot=t       # triple fault                  ok
  reboot=k       # keyboard ctrl                 FAIL
  reboot=b       # BIOS                          ok
  reboot=a       # ACPI                          FAIL
  reboot=e       # EFI                           FAIL   [system has no EFI]
  reboot=p       # PCI 0xcf9                     FAIL

And I think it's pretty obvious that we should only try PCI 0xcf9 as a
last resort - if at all.

The other observation is that (on this box) we should never try
the PCI reboot method, but close with either the 'triple fault'
or the 'BIOS' (terminal!) reboot methods.

Thirdly, CF9_COND is a total misnomer - it should be something like
CF9_SAFE or CF9_CAREFUL, and 'CF9' should be 'CF9_FORCE' ...

So this patch fixes the worst problems:

 - it orders the actual reboot logic to follow the reboot ordering
   pattern - it was in a pretty random order before for no good
   reason.

 - it fixes the CF9 misnomers and uses BOOT_CF9_FORCE and
   BOOT_CF9_SAFE flags to make the code more obvious.

 - it tries the BIOS reboot method before the PCI reboot method.
   (Since 'BIOS' is a terminal reboot method resulting in a hang
    if it does not work, this is essentially equivalent to removing
    the PCI reboot method from the default reboot chain.)

 - just for the miraculous possibility of terminal (resulting
   in hang) reboot methods of triple fault or BIOS returning
   without having done their job, there's an ordering between
   them as well.

Reported-and-bisected-and-tested-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Li Aubrey <aubrey.li@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matthew Garrett <mjg59@srcf.ucam.org>
Link: http://lkml.kernel.org/r/20140404064120.GB11877@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/reboot.c | 72 +++++++++++++++++++++++++-----------------------
 include/linux/reboot.h   | 14 +++++-----
 2 files changed, 44 insertions(+), 42 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 654b46574b9..3399d3a9973 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -114,8 +114,8 @@ EXPORT_SYMBOL(machine_real_restart);
  */
 static int __init set_pci_reboot(const struct dmi_system_id *d)
 {
-	if (reboot_type != BOOT_CF9) {
-		reboot_type = BOOT_CF9;
+	if (reboot_type != BOOT_CF9_FORCE) {
+		reboot_type = BOOT_CF9_FORCE;
 		pr_info("%s series board detected. Selecting %s-method for reboots.\n",
 			d->ident, "PCI");
 	}
@@ -458,20 +458,23 @@ void __attribute__((weak)) mach_reboot_fixups(void)
 }
 
 /*
- * Windows compatible x86 hardware expects the following on reboot:
+ * To the best of our knowledge Windows compatible x86 hardware expects
+ * the following on reboot:
  *
  * 1) If the FADT has the ACPI reboot register flag set, try it
  * 2) If still alive, write to the keyboard controller
  * 3) If still alive, write to the ACPI reboot register again
  * 4) If still alive, write to the keyboard controller again
  * 5) If still alive, call the EFI runtime service to reboot
- * 6) If still alive, write to the PCI IO port 0xCF9 to reboot
- * 7) If still alive, inform BIOS to do a proper reboot
+ * 6) If no EFI runtime service, call the BIOS to do a reboot
  *
- * If the machine is still alive at this stage, it gives up. We default to
- * following the same pattern, except that if we're still alive after (7) we'll
- * try to force a triple fault and then cycle between hitting the keyboard
- * controller and doing that
+ * We default to following the same pattern. We also have
+ * two other reboot methods: 'triple fault' and 'PCI', which
+ * can be triggered via the reboot= kernel boot option or
+ * via quirks.
+ *
+ * This means that this function can never return, it can misbehave
+ * by not rebooting properly and hanging.
  */
 static void native_machine_emergency_restart(void)
 {
@@ -492,6 +495,11 @@ static void native_machine_emergency_restart(void)
 	for (;;) {
 		/* Could also try the reset bit in the Hammer NB */
 		switch (reboot_type) {
+		case BOOT_ACPI:
+			acpi_reboot();
+			reboot_type = BOOT_KBD;
+			break;
+
 		case BOOT_KBD:
 			mach_reboot_fixups(); /* For board specific fixups */
 
@@ -509,43 +517,29 @@ static void native_machine_emergency_restart(void)
 			}
 			break;
 
-		case BOOT_TRIPLE:
-			load_idt(&no_idt);
-			__asm__ __volatile__("int3");
-
-			/* We're probably dead after this, but... */
-			reboot_type = BOOT_KBD;
-			break;
-
-		case BOOT_BIOS:
-			machine_real_restart(MRR_BIOS);
-
-			/* We're probably dead after this, but... */
-			reboot_type = BOOT_TRIPLE;
-			break;
-
-		case BOOT_ACPI:
-			acpi_reboot();
-			reboot_type = BOOT_KBD;
-			break;
-
 		case BOOT_EFI:
 			if (efi_enabled(EFI_RUNTIME_SERVICES))
 				efi.reset_system(reboot_mode == REBOOT_WARM ?
 						 EFI_RESET_WARM :
 						 EFI_RESET_COLD,
 						 EFI_SUCCESS, 0, NULL);
-			reboot_type = BOOT_CF9_COND;
+			reboot_type = BOOT_BIOS;
+			break;
+
+		case BOOT_BIOS:
+			machine_real_restart(MRR_BIOS);
+
+			/* We're probably dead after this, but... */
+			reboot_type = BOOT_CF9_SAFE;
 			break;
 
-		case BOOT_CF9:
+		case BOOT_CF9_FORCE:
 			port_cf9_safe = true;
 			/* Fall through */
 
-		case BOOT_CF9_COND:
+		case BOOT_CF9_SAFE:
 			if (port_cf9_safe) {
-				u8 reboot_code = reboot_mode == REBOOT_WARM ?
-					0x06 : 0x0E;
+				u8 reboot_code = reboot_mode == REBOOT_WARM ?  0x06 : 0x0E;
 				u8 cf9 = inb(0xcf9) & ~reboot_code;
 				outb(cf9|2, 0xcf9); /* Request hard reset */
 				udelay(50);
@@ -553,7 +547,15 @@ static void native_machine_emergency_restart(void)
 				outb(cf9|reboot_code, 0xcf9);
 				udelay(50);
 			}
-			reboot_type = BOOT_BIOS;
+			reboot_type = BOOT_TRIPLE;
+			break;
+
+		case BOOT_TRIPLE:
+			load_idt(&no_idt);
+			__asm__ __volatile__("int3");
+
+			/* We're probably dead after this, but... */
+			reboot_type = BOOT_KBD;
 			break;
 		}
 	}
diff --git a/include/linux/reboot.h b/include/linux/reboot.h
index 9e7db9e73cc..48bf152761c 100644
--- a/include/linux/reboot.h
+++ b/include/linux/reboot.h
@@ -20,13 +20,13 @@ enum reboot_mode {
 extern enum reboot_mode reboot_mode;
 
 enum reboot_type {
-	BOOT_TRIPLE = 't',
-	BOOT_KBD = 'k',
-	BOOT_BIOS = 'b',
-	BOOT_ACPI = 'a',
-	BOOT_EFI = 'e',
-	BOOT_CF9 = 'p',
-	BOOT_CF9_COND = 'q',
+	BOOT_TRIPLE	= 't',
+	BOOT_KBD	= 'k',
+	BOOT_BIOS	= 'b',
+	BOOT_ACPI	= 'a',
+	BOOT_EFI	= 'e',
+	BOOT_CF9_FORCE	= 'p',
+	BOOT_CF9_SAFE	= 'q',
 };
 extern enum reboot_type reboot_type;
 
-- 
cgit v1.2.3-70-g09d2


From cbfbbabb89b37f6bad05f478d906a385149f288d Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Wed, 9 Apr 2014 14:26:59 +0200
Subject: drm/tegra: Remove gratuitous pad field

The version of the drm_tegra_submit structure that was merged all the
way back in 3.10 contains a pad field that was originally intended to
properly pad the following __u64 field. Unfortunately it seems like a
different field was dropped during review that caused this padding to
become unnecessary, but the pad field wasn't removed at that time.

One possible side-effect of this is that since the __u64 following the
pad is now no longer properly aligned, the compiler may (or may not)
introduce padding itself, which results in no predictable ABI.

Rectify this by removing the pad field so that all fields are again
naturally aligned. Technically this is breaking existing userspace ABI,
but given that there aren't any (released) userspace drivers that make
use of this yet, the fallout should be minimal.

Fixes: d43f81cbaf43 ("drm/tegra: Add gr2d device")
Cc: <stable@vger.kernel.org> # 3.10
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 include/uapi/drm/tegra_drm.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/drm/tegra_drm.h b/include/uapi/drm/tegra_drm.h
index b042b48495d..b7548211242 100644
--- a/include/uapi/drm/tegra_drm.h
+++ b/include/uapi/drm/tegra_drm.h
@@ -120,7 +120,6 @@ struct drm_tegra_submit {
 	__u32 num_waitchks;
 	__u32 waitchk_mask;
 	__u32 timeout;
-	__u32 pad;
 	__u64 syncpts;
 	__u64 cmdbufs;
 	__u64 relocs;
-- 
cgit v1.2.3-70-g09d2


From b9d12085f2f531fdea67f0361564e0812696227c Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Tue, 15 Apr 2014 15:50:19 +0200
Subject: net: phy: add minimal support for QSGMII PHY

This commit adds the necessary definitions for the PHY layer to
recognize "qsgmii" as a valid PHY interface. A QSMII interface, as
defined at
http://en.wikipedia.org/wiki/Media_Independent_Interface#Quad_Serial_Gigabit_Media_Independent_Interface,
is "is a method of combining four SGMII lines into a 5Gbit/s
interface. QSGMII, like SGMII, uses LVDS signalling for the TX and RX
data and a single LVDS clock signal. QSGMII uses significantly fewer
signal lines than four SGMII busses."

This type of MAC <-> PHY connection might require special handling on
the MAC driver side, so it should be possible to express this type of
MAC <-> PHY connection, for example in the Device Tree.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Cc: devicetree@vger.kernel.org
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/ethernet.txt | 2 +-
 include/linux/phy.h                                | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/net/ethernet.txt b/Documentation/devicetree/bindings/net/ethernet.txt
index 9ecd43d8792..3fc360523bc 100644
--- a/Documentation/devicetree/bindings/net/ethernet.txt
+++ b/Documentation/devicetree/bindings/net/ethernet.txt
@@ -10,7 +10,7 @@ The following properties are common to the Ethernet controllers:
 - max-frame-size: number, maximum transfer unit (IEEE defined MTU), rather than
   the maximum frame size (there's contradiction in ePAPR).
 - phy-mode: string, operation mode of the PHY interface; supported values are
-  "mii", "gmii", "sgmii", "tbi", "rev-mii", "rmii", "rgmii", "rgmii-id",
+  "mii", "gmii", "sgmii", "qsgmii", "tbi", "rev-mii", "rmii", "rgmii", "rgmii-id",
   "rgmii-rxid", "rgmii-txid", "rtbi", "smii", "xgmii"; this is now a de-facto
   standard property;
 - phy-connection-type: the same as "phy-mode" property but described in ePAPR;
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 24126c4b27b..4d0221fd068 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -75,6 +75,7 @@ typedef enum {
 	PHY_INTERFACE_MODE_SMII,
 	PHY_INTERFACE_MODE_XGMII,
 	PHY_INTERFACE_MODE_MOCA,
+	PHY_INTERFACE_MODE_QSGMII,
 	PHY_INTERFACE_MODE_MAX,
 } phy_interface_t;
 
@@ -116,6 +117,8 @@ static inline const char *phy_modes(phy_interface_t interface)
 		return "xgmii";
 	case PHY_INTERFACE_MODE_MOCA:
 		return "moca";
+	case PHY_INTERFACE_MODE_QSGMII:
+		return "qsgmii";
 	default:
 		return "unknown";
 	}
-- 
cgit v1.2.3-70-g09d2


From 33ac1257ff0dee2e9c7f009b1c1914b7990217b2 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 10 Jan 2014 08:57:31 -0500
Subject: sysfs, driver-core: remove unused
 {sysfs|device}_schedule_callback_owner()

All device_schedule_callback_owner() users are converted to use
device_remove_file_self().  Remove now unused
{sysfs|device}_schedule_callback_owner().

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/core.c    | 33 ------------------
 fs/sysfs/file.c        | 92 --------------------------------------------------
 include/linux/device.h | 11 +-----
 include/linux/sysfs.h  |  9 -----
 4 files changed, 1 insertion(+), 144 deletions(-)

(limited to 'include')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 0dd65281cc6..20da3ad1696 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -614,39 +614,6 @@ void device_remove_bin_file(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(device_remove_bin_file);
 
-/**
- * device_schedule_callback_owner - helper to schedule a callback for a device
- * @dev: device.
- * @func: callback function to invoke later.
- * @owner: module owning the callback routine
- *
- * Attribute methods must not unregister themselves or their parent device
- * (which would amount to the same thing).  Attempts to do so will deadlock,
- * since unregistration is mutually exclusive with driver callbacks.
- *
- * Instead methods can call this routine, which will attempt to allocate
- * and schedule a workqueue request to call back @func with @dev as its
- * argument in the workqueue's process context.  @dev will be pinned until
- * @func returns.
- *
- * This routine is usually called via the inline device_schedule_callback(),
- * which automatically sets @owner to THIS_MODULE.
- *
- * Returns 0 if the request was submitted, -ENOMEM if storage could not
- * be allocated, -ENODEV if a reference to @owner isn't available.
- *
- * NOTE: This routine won't work if CONFIG_SYSFS isn't set!  It uses an
- * underlying sysfs routine (since it is intended for use by attribute
- * methods), and if sysfs isn't available you'll get nothing but -ENOSYS.
- */
-int device_schedule_callback_owner(struct device *dev,
-		void (*func)(struct device *), struct module *owner)
-{
-	return sysfs_schedule_callback(&dev->kobj,
-			(void (*)(void *)) func, dev, owner);
-}
-EXPORT_SYMBOL_GPL(device_schedule_callback_owner);
-
 static void klist_children_get(struct klist_node *n)
 {
 	struct device_private *p = to_device_private_parent(n);
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 1b8b91b67fd..28cc1acd543 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -453,95 +453,3 @@ void sysfs_remove_bin_file(struct kobject *kobj,
 	kernfs_remove_by_name(kobj->sd, attr->attr.name);
 }
 EXPORT_SYMBOL_GPL(sysfs_remove_bin_file);
-
-struct sysfs_schedule_callback_struct {
-	struct list_head	workq_list;
-	struct kobject		*kobj;
-	void			(*func)(void *);
-	void			*data;
-	struct module		*owner;
-	struct work_struct	work;
-};
-
-static struct workqueue_struct *sysfs_workqueue;
-static DEFINE_MUTEX(sysfs_workq_mutex);
-static LIST_HEAD(sysfs_workq);
-static void sysfs_schedule_callback_work(struct work_struct *work)
-{
-	struct sysfs_schedule_callback_struct *ss = container_of(work,
-			struct sysfs_schedule_callback_struct, work);
-
-	(ss->func)(ss->data);
-	kobject_put(ss->kobj);
-	module_put(ss->owner);
-	mutex_lock(&sysfs_workq_mutex);
-	list_del(&ss->workq_list);
-	mutex_unlock(&sysfs_workq_mutex);
-	kfree(ss);
-}
-
-/**
- * sysfs_schedule_callback - helper to schedule a callback for a kobject
- * @kobj: object we're acting for.
- * @func: callback function to invoke later.
- * @data: argument to pass to @func.
- * @owner: module owning the callback code
- *
- * sysfs attribute methods must not unregister themselves or their parent
- * kobject (which would amount to the same thing).  Attempts to do so will
- * deadlock, since unregistration is mutually exclusive with driver
- * callbacks.
- *
- * Instead methods can call this routine, which will attempt to allocate
- * and schedule a workqueue request to call back @func with @data as its
- * argument in the workqueue's process context.  @kobj will be pinned
- * until @func returns.
- *
- * Returns 0 if the request was submitted, -ENOMEM if storage could not
- * be allocated, -ENODEV if a reference to @owner isn't available,
- * -EAGAIN if a callback has already been scheduled for @kobj.
- */
-int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *),
-		void *data, struct module *owner)
-{
-	struct sysfs_schedule_callback_struct *ss, *tmp;
-
-	if (!try_module_get(owner))
-		return -ENODEV;
-
-	mutex_lock(&sysfs_workq_mutex);
-	list_for_each_entry_safe(ss, tmp, &sysfs_workq, workq_list)
-		if (ss->kobj == kobj) {
-			module_put(owner);
-			mutex_unlock(&sysfs_workq_mutex);
-			return -EAGAIN;
-		}
-	mutex_unlock(&sysfs_workq_mutex);
-
-	if (sysfs_workqueue == NULL) {
-		sysfs_workqueue = create_singlethread_workqueue("sysfsd");
-		if (sysfs_workqueue == NULL) {
-			module_put(owner);
-			return -ENOMEM;
-		}
-	}
-
-	ss = kmalloc(sizeof(*ss), GFP_KERNEL);
-	if (!ss) {
-		module_put(owner);
-		return -ENOMEM;
-	}
-	kobject_get(kobj);
-	ss->kobj = kobj;
-	ss->func = func;
-	ss->data = data;
-	ss->owner = owner;
-	INIT_WORK(&ss->work, sysfs_schedule_callback_work);
-	INIT_LIST_HEAD(&ss->workq_list);
-	mutex_lock(&sysfs_workq_mutex);
-	list_add_tail(&ss->workq_list, &sysfs_workq);
-	mutex_unlock(&sysfs_workq_mutex);
-	queue_work(sysfs_workqueue, &ss->work);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(sysfs_schedule_callback);
diff --git a/include/linux/device.h b/include/linux/device.h
index 233bbbeb768..d1d1c055b48 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -566,12 +566,6 @@ extern int __must_check device_create_bin_file(struct device *dev,
 					const struct bin_attribute *attr);
 extern void device_remove_bin_file(struct device *dev,
 				   const struct bin_attribute *attr);
-extern int device_schedule_callback_owner(struct device *dev,
-		void (*func)(struct device *dev), struct module *owner);
-
-/* This is a macro to avoid include problems with THIS_MODULE */
-#define device_schedule_callback(dev, func)			\
-	device_schedule_callback_owner(dev, func, THIS_MODULE)
 
 /* device resource management */
 typedef void (*dr_release_t)(struct device *dev, void *res);
@@ -932,10 +926,7 @@ extern int device_online(struct device *dev);
 extern struct device *__root_device_register(const char *name,
 					     struct module *owner);
 
-/*
- * This is a macro to avoid include problems with THIS_MODULE,
- * just as per what is done for device_schedule_callback() above.
- */
+/* This is a macro to avoid include problems with THIS_MODULE */
 #define root_device_register(name) \
 	__root_device_register(name, THIS_MODULE)
 
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 084354b0e81..5ffaa344371 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -179,9 +179,6 @@ struct sysfs_ops {
 
 #ifdef CONFIG_SYSFS
 
-int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *),
-			    void *data, struct module *owner);
-
 int __must_check sysfs_create_dir_ns(struct kobject *kobj, const void *ns);
 void sysfs_remove_dir(struct kobject *kobj);
 int __must_check sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name,
@@ -255,12 +252,6 @@ static inline void sysfs_enable_ns(struct kernfs_node *kn)
 
 #else /* CONFIG_SYSFS */
 
-static inline int sysfs_schedule_callback(struct kobject *kobj,
-		void (*func)(void *), void *data, struct module *owner)
-{
-	return -ENOSYS;
-}
-
 static inline int sysfs_create_dir_ns(struct kobject *kobj, const void *ns)
 {
 	return 0;
-- 
cgit v1.2.3-70-g09d2


From 6a662719c9868b3d6c7d26b3a085f0cd3cc15e64 Mon Sep 17 00:00:00 2001
From: Cong Wang <cwang@twopensource.com>
Date: Tue, 15 Apr 2014 16:25:34 -0700
Subject: ipv4, fib: pass LOOPBACK_IFINDEX instead of 0 to flowi4_iif
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As suggested by Julian:

	Simply, flowi4_iif must not contain 0, it does not
	look logical to ignore all ip rules with specified iif.

because in fib_rule_match() we do:

        if (rule->iifindex && (rule->iifindex != fl->flowi_iif))
                goto out;

flowi4_iif should be LOOPBACK_IFINDEX by default.

We need to move LOOPBACK_IFINDEX to include/net/flow.h:

1) It is mostly used by flowi_iif

2) Fix the following compile error if we use it in flow.h
by the patches latter:

In file included from include/linux/netfilter.h:277:0,
                 from include/net/netns/netfilter.h:5,
                 from include/net/net_namespace.h:21,
                 from include/linux/netdevice.h:43,
                 from include/linux/icmpv6.h:12,
                 from include/linux/ipv6.h:61,
                 from include/net/ipv6.h:16,
                 from include/linux/sunrpc/clnt.h:27,
                 from include/linux/nfs_fs.h:30,
                 from init/do_mounts.c:32:
include/net/flow.h: In function ‘flowi4_init_output’:
include/net/flow.h:84:32: error: ‘LOOPBACK_IFINDEX’ undeclared (first use in this function)

Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Julian Anastasov <ja@ssi.bg>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Cong Wang <cwang@twopensource.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow.h                | 10 +++++++++-
 include/net/net_namespace.h       |  9 +--------
 net/ipv4/fib_frontend.c           |  2 +-
 net/ipv4/fib_semantics.c          |  1 +
 net/ipv4/ipmr.c                   |  2 +-
 net/ipv4/netfilter/ipt_rpfilter.c |  5 +----
 net/ipv6/ip6mr.c                  |  2 +-
 7 files changed, 15 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/net/flow.h b/include/net/flow.h
index 64fd2483665..8109a159d1b 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -11,6 +11,14 @@
 #include <linux/in6.h>
 #include <linux/atomic.h>
 
+/*
+ * ifindex generation is per-net namespace, and loopback is
+ * always the 1st device in ns (see net_dev_init), thus any
+ * loopback device should get ifindex 1
+ */
+
+#define LOOPBACK_IFINDEX	1
+
 struct flowi_common {
 	int	flowic_oif;
 	int	flowic_iif;
@@ -80,7 +88,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif,
 				      __be16 dport, __be16 sport)
 {
 	fl4->flowi4_oif = oif;
-	fl4->flowi4_iif = 0;
+	fl4->flowi4_iif = LOOPBACK_IFINDEX;
 	fl4->flowi4_mark = mark;
 	fl4->flowi4_tos = tos;
 	fl4->flowi4_scope = scope;
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 79387f73f87..5f9eb260990 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -9,6 +9,7 @@
 #include <linux/list.h>
 #include <linux/sysctl.h>
 
+#include <net/flow.h>
 #include <net/netns/core.h>
 #include <net/netns/mib.h>
 #include <net/netns/unix.h>
@@ -131,14 +132,6 @@ struct net {
 	atomic_t		fnhe_genid;
 };
 
-/*
- * ifindex generation is per-net namespace, and loopback is
- * always the 1st device in ns (see net_dev_init), thus any
- * loopback device should get ifindex 1
- */
-
-#define LOOPBACK_IFINDEX	1
-
 #include <linux/seq_file_net.h>
 
 /* Init's network namespace */
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 1a629f87027..255aa9946fe 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -250,7 +250,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
 	bool dev_match;
 
 	fl4.flowi4_oif = 0;
-	fl4.flowi4_iif = oif;
+	fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX;
 	fl4.daddr = src;
 	fl4.saddr = dst;
 	fl4.flowi4_tos = tos;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index b53f0bf84dc..8a043f03c88 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -631,6 +631,7 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
 				.daddr = nh->nh_gw,
 				.flowi4_scope = cfg->fc_scope + 1,
 				.flowi4_oif = nh->nh_oif,
+				.flowi4_iif = LOOPBACK_IFINDEX,
 			};
 
 			/* It is not necessary, but requires a bit of thinking */
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 28863570dd6..d84dc8d4c91 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -455,7 +455,7 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct mr_table *mrt;
 	struct flowi4 fl4 = {
 		.flowi4_oif	= dev->ifindex,
-		.flowi4_iif	= skb->skb_iif,
+		.flowi4_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
 		.flowi4_mark	= skb->mark,
 	};
 	int err;
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
index c49dcd0284a..4bfaedf9b34 100644
--- a/net/ipv4/netfilter/ipt_rpfilter.c
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -89,11 +89,8 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	if (ipv4_is_multicast(iph->daddr)) {
 		if (ipv4_is_zeronet(iph->saddr))
 			return ipv4_is_local_multicast(iph->daddr) ^ invert;
-		flow.flowi4_iif = 0;
-	} else {
-		flow.flowi4_iif = LOOPBACK_IFINDEX;
 	}
-
+	flow.flowi4_iif = LOOPBACK_IFINDEX;
 	flow.daddr = iph->saddr;
 	flow.saddr = rpfilter_get_saddr(iph->daddr);
 	flow.flowi4_oif = 0;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 8737400af0a..8659067da28 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -700,7 +700,7 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
 	struct mr6_table *mrt;
 	struct flowi6 fl6 = {
 		.flowi6_oif	= dev->ifindex,
-		.flowi6_iif	= skb->skb_iif,
+		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
 		.flowi6_mark	= skb->mark,
 	};
 	int err;
-- 
cgit v1.2.3-70-g09d2


From 1d2514818a2c3d94dd250e6027cb928a4e192548 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Tue, 15 Apr 2014 19:16:41 -0700
Subject: net: mdio-gpio: Add support for active low gpio pins

Some systems using mdio-gpio may use active-low gpio pins
(eg with inverters or FETs connected to all or some of the
gpio pins).

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio-gpio.c | 19 +++++++++++++------
 include/linux/mdio-gpio.h   |  3 +++
 2 files changed, 16 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/net/phy/mdio-gpio.c b/drivers/net/phy/mdio-gpio.c
index e853066c805..fac211a5001 100644
--- a/drivers/net/phy/mdio-gpio.c
+++ b/drivers/net/phy/mdio-gpio.c
@@ -33,28 +33,32 @@
 struct mdio_gpio_info {
 	struct mdiobb_ctrl ctrl;
 	int mdc, mdio;
+	int mdc_active_low, mdio_active_low;
 };
 
 static void *mdio_gpio_of_get_data(struct platform_device *pdev)
 {
 	struct device_node *np = pdev->dev.of_node;
 	struct mdio_gpio_platform_data *pdata;
+	enum of_gpio_flags flags;
 	int ret;
 
 	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
 	if (!pdata)
 		return NULL;
 
-	ret = of_get_gpio(np, 0);
+	ret = of_get_gpio_flags(np, 0, &flags);
 	if (ret < 0)
 		return NULL;
 
 	pdata->mdc = ret;
+	pdata->mdc_active_low = flags & OF_GPIO_ACTIVE_LOW;
 
-	ret = of_get_gpio(np, 1);
+	ret = of_get_gpio_flags(np, 1, &flags);
 	if (ret < 0)
 		return NULL;
 	pdata->mdio = ret;
+	pdata->mdio_active_low = flags & OF_GPIO_ACTIVE_LOW;
 
 	return pdata;
 }
@@ -65,7 +69,8 @@ static void mdio_dir(struct mdiobb_ctrl *ctrl, int dir)
 		container_of(ctrl, struct mdio_gpio_info, ctrl);
 
 	if (dir)
-		gpio_direction_output(bitbang->mdio, 1);
+		gpio_direction_output(bitbang->mdio,
+				      1 ^ bitbang->mdio_active_low);
 	else
 		gpio_direction_input(bitbang->mdio);
 }
@@ -75,7 +80,7 @@ static int mdio_get(struct mdiobb_ctrl *ctrl)
 	struct mdio_gpio_info *bitbang =
 		container_of(ctrl, struct mdio_gpio_info, ctrl);
 
-	return gpio_get_value(bitbang->mdio);
+	return gpio_get_value(bitbang->mdio) ^ bitbang->mdio_active_low;
 }
 
 static void mdio_set(struct mdiobb_ctrl *ctrl, int what)
@@ -83,7 +88,7 @@ static void mdio_set(struct mdiobb_ctrl *ctrl, int what)
 	struct mdio_gpio_info *bitbang =
 		container_of(ctrl, struct mdio_gpio_info, ctrl);
 
-	gpio_set_value(bitbang->mdio, what);
+	gpio_set_value(bitbang->mdio, what ^ bitbang->mdio_active_low);
 }
 
 static void mdc_set(struct mdiobb_ctrl *ctrl, int what)
@@ -91,7 +96,7 @@ static void mdc_set(struct mdiobb_ctrl *ctrl, int what)
 	struct mdio_gpio_info *bitbang =
 		container_of(ctrl, struct mdio_gpio_info, ctrl);
 
-	gpio_set_value(bitbang->mdc, what);
+	gpio_set_value(bitbang->mdc, what ^ bitbang->mdc_active_low);
 }
 
 static struct mdiobb_ops mdio_gpio_ops = {
@@ -117,7 +122,9 @@ static struct mii_bus *mdio_gpio_bus_init(struct device *dev,
 	bitbang->ctrl.ops = &mdio_gpio_ops;
 	bitbang->ctrl.reset = pdata->reset;
 	bitbang->mdc = pdata->mdc;
+	bitbang->mdc_active_low = pdata->mdc_active_low;
 	bitbang->mdio = pdata->mdio;
+	bitbang->mdio_active_low = pdata->mdio_active_low;
 
 	new_bus = alloc_mdio_bitbang(&bitbang->ctrl);
 	if (!new_bus)
diff --git a/include/linux/mdio-gpio.h b/include/linux/mdio-gpio.h
index 7c9fe3c2be7..57e57fe6055 100644
--- a/include/linux/mdio-gpio.h
+++ b/include/linux/mdio-gpio.h
@@ -18,6 +18,9 @@ struct mdio_gpio_platform_data {
 	unsigned int mdc;
 	unsigned int mdio;
 
+	bool mdc_active_low;
+	bool mdio_active_low;
+
 	unsigned int phy_mask;
 	int irqs[PHY_MAX_ADDR];
 	/* reset callback */
-- 
cgit v1.2.3-70-g09d2


From f1d54c47502f42f5c4b89dacbe845ecd87ca002e Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Tue, 15 Apr 2014 19:16:42 -0700
Subject: net: mdio-gpio: Add support for separate MDI and MDO gpio pins

This is for a system with fixed assignments of input and output pins
(various variants of Kontron COMe).

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio-gpio.c | 34 +++++++++++++++++++++++++++++++---
 include/linux/mdio-gpio.h   |  2 ++
 2 files changed, 33 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/net/phy/mdio-gpio.c b/drivers/net/phy/mdio-gpio.c
index fac211a5001..9c4defdec67 100644
--- a/drivers/net/phy/mdio-gpio.c
+++ b/drivers/net/phy/mdio-gpio.c
@@ -32,8 +32,8 @@
 
 struct mdio_gpio_info {
 	struct mdiobb_ctrl ctrl;
-	int mdc, mdio;
-	int mdc_active_low, mdio_active_low;
+	int mdc, mdio, mdo;
+	int mdc_active_low, mdio_active_low, mdo_active_low;
 };
 
 static void *mdio_gpio_of_get_data(struct platform_device *pdev)
@@ -60,6 +60,12 @@ static void *mdio_gpio_of_get_data(struct platform_device *pdev)
 	pdata->mdio = ret;
 	pdata->mdio_active_low = flags & OF_GPIO_ACTIVE_LOW;
 
+	ret = of_get_gpio_flags(np, 2, &flags);
+	if (ret > 0) {
+		pdata->mdo = ret;
+		pdata->mdo_active_low = flags & OF_GPIO_ACTIVE_LOW;
+	}
+
 	return pdata;
 }
 
@@ -68,6 +74,16 @@ static void mdio_dir(struct mdiobb_ctrl *ctrl, int dir)
 	struct mdio_gpio_info *bitbang =
 		container_of(ctrl, struct mdio_gpio_info, ctrl);
 
+	if (bitbang->mdo) {
+		/* Separate output pin. Always set its value to high
+		 * when changing direction. If direction is input,
+		 * assume the pin serves as pull-up. If direction is
+		 * output, the default value is high.
+		 */
+		gpio_set_value(bitbang->mdo, 1 ^ bitbang->mdo_active_low);
+		return;
+	}
+
 	if (dir)
 		gpio_direction_output(bitbang->mdio,
 				      1 ^ bitbang->mdio_active_low);
@@ -88,7 +104,10 @@ static void mdio_set(struct mdiobb_ctrl *ctrl, int what)
 	struct mdio_gpio_info *bitbang =
 		container_of(ctrl, struct mdio_gpio_info, ctrl);
 
-	gpio_set_value(bitbang->mdio, what ^ bitbang->mdio_active_low);
+	if (bitbang->mdo)
+		gpio_set_value(bitbang->mdo, what ^ bitbang->mdo_active_low);
+	else
+		gpio_set_value(bitbang->mdio, what ^ bitbang->mdio_active_low);
 }
 
 static void mdc_set(struct mdiobb_ctrl *ctrl, int what)
@@ -125,6 +144,8 @@ static struct mii_bus *mdio_gpio_bus_init(struct device *dev,
 	bitbang->mdc_active_low = pdata->mdc_active_low;
 	bitbang->mdio = pdata->mdio;
 	bitbang->mdio_active_low = pdata->mdio_active_low;
+	bitbang->mdo = pdata->mdo;
+	bitbang->mdo_active_low = pdata->mdo_active_low;
 
 	new_bus = alloc_mdio_bitbang(&bitbang->ctrl);
 	if (!new_bus)
@@ -151,6 +172,13 @@ static struct mii_bus *mdio_gpio_bus_init(struct device *dev,
 	if (devm_gpio_request(dev, bitbang->mdio, "mdio"))
 		goto out_free_bus;
 
+	if (bitbang->mdo) {
+		if (devm_gpio_request(dev, bitbang->mdo, "mdo"))
+			goto out_free_bus;
+		gpio_direction_output(bitbang->mdo, 1);
+		gpio_direction_input(bitbang->mdio);
+	}
+
 	gpio_direction_output(bitbang->mdc, 0);
 
 	dev_set_drvdata(dev, new_bus);
diff --git a/include/linux/mdio-gpio.h b/include/linux/mdio-gpio.h
index 57e57fe6055..66c30a763b1 100644
--- a/include/linux/mdio-gpio.h
+++ b/include/linux/mdio-gpio.h
@@ -17,9 +17,11 @@ struct mdio_gpio_platform_data {
 	/* GPIO numbers for bus pins */
 	unsigned int mdc;
 	unsigned int mdio;
+	unsigned int mdo;
 
 	bool mdc_active_low;
 	bool mdio_active_low;
+	bool mdo_active_low;
 
 	unsigned int phy_mask;
 	int irqs[PHY_MAX_ADDR];
-- 
cgit v1.2.3-70-g09d2


From 314672a2c2780212fb770bb02d2fffaa1019823f Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Wed, 9 Apr 2014 17:24:16 -0700
Subject: Tools: hv: Handle the case when the target file exists correctly

Return the appropriate error code and handle the case when the target
file exists correctly. This fixes a bug.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Cc: <stable@vger.kernel.org> [3.14]
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/linux/hyperv.h | 1 +
 tools/hv/hv_fcopy_daemon.c  | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/hyperv.h b/include/uapi/linux/hyperv.h
index 9beb7c99163..78e4a86030d 100644
--- a/include/uapi/linux/hyperv.h
+++ b/include/uapi/linux/hyperv.h
@@ -305,6 +305,7 @@ enum hv_kvp_exchg_pool {
 #define HV_ERROR_DEVICE_NOT_CONNECTED	0x8007048F
 #define HV_INVALIDARG			0x80070057
 #define HV_GUID_NOTFOUND		0x80041002
+#define HV_ERROR_ALREADY_EXISTS		0x80070050
 
 #define ADDR_FAMILY_NONE	0x00
 #define ADDR_FAMILY_IPV4	0x01
diff --git a/tools/hv/hv_fcopy_daemon.c b/tools/hv/hv_fcopy_daemon.c
index 4ecc4fd0bc1..fba1c75aa48 100644
--- a/tools/hv/hv_fcopy_daemon.c
+++ b/tools/hv/hv_fcopy_daemon.c
@@ -82,8 +82,10 @@ static int hv_start_fcopy(struct hv_start_fcopy *smsg)
 
 	if (!access(target_fname, F_OK)) {
 		syslog(LOG_INFO, "File: %s exists", target_fname);
-		if (!smsg->copy_flags & OVER_WRITE)
+		if (!(smsg->copy_flags & OVER_WRITE)) {
+			error = HV_ERROR_ALREADY_EXISTS;
 			goto done;
+		}
 	}
 
 	target_fd = open(target_fname, O_RDWR | O_CREAT | O_CLOEXEC, 0744);
-- 
cgit v1.2.3-70-g09d2


From 03367ef5ea811475187a0732aada068919e14d61 Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Thu, 3 Apr 2014 18:02:45 -0700
Subject: Drivers: hv: vmbus: Negotiate version 3.0 when running on ws2012r2
 hosts

Only ws2012r2 hosts support the ability to reconnect to the host on VMBUS. This functionality
is needed by kexec in Linux. To use this functionality we need to negotiate version 3.0 of the
VMBUS protocol.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Cc: <stable@vger.kernel.org>        [3.9+]
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/connection.c | 5 ++++-
 include/linux/hyperv.h  | 4 +++-
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index f2d7bf90c9f..2e7801af466 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -55,6 +55,9 @@ static __u32 vmbus_get_next_version(__u32 current_version)
 	case (VERSION_WIN8):
 		return VERSION_WIN7;
 
+	case (VERSION_WIN8_1):
+		return VERSION_WIN8;
+
 	case (VERSION_WS2008):
 	default:
 		return VERSION_INVAL;
@@ -77,7 +80,7 @@ static int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo,
 	msg->interrupt_page = virt_to_phys(vmbus_connection.int_page);
 	msg->monitor_page1 = virt_to_phys(vmbus_connection.monitor_pages[0]);
 	msg->monitor_page2 = virt_to_phys(vmbus_connection.monitor_pages[1]);
-	if (version == VERSION_WIN8)
+	if (version == VERSION_WIN8_1)
 		msg->target_vcpu = hv_context.vp_index[smp_processor_id()];
 
 	/*
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index ab7359fde98..2d7b4f139c3 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -147,15 +147,17 @@ hv_get_ringbuffer_availbytes(struct hv_ring_buffer_info *rbi,
  * 0 . 13 (Windows Server 2008)
  * 1 . 1  (Windows 7)
  * 2 . 4  (Windows 8)
+ * 3 . 0  (Windows 8 R2)
  */
 
 #define VERSION_WS2008  ((0 << 16) | (13))
 #define VERSION_WIN7    ((1 << 16) | (1))
 #define VERSION_WIN8    ((2 << 16) | (4))
+#define VERSION_WIN8_1    ((3 << 16) | (0))
 
 #define VERSION_INVAL -1
 
-#define VERSION_CURRENT VERSION_WIN8
+#define VERSION_CURRENT VERSION_WIN8_1
 
 /* Make maximum size of pipe payload of 16K */
 #define MAX_PIPE_DATA_PAYLOAD		(sizeof(u8) * 16384)
-- 
cgit v1.2.3-70-g09d2


From 89986496de141213206d49450ffdd36098d41209 Mon Sep 17 00:00:00 2001
From: Corey Minyard <cminyard@mvista.com>
Date: Mon, 14 Apr 2014 09:46:54 -0500
Subject: ipmi: Turn off all activity on an idle ipmi interface

The IPMI driver would wake up periodically looking for events and
watchdog pretimeouts.  If there is nothing waiting for these events,
it's really kind of pointless to be checking for them.  So modify the
driver so the message handler can pass down if it needs the lower layer
to be waiting for these.  Modify the system interface lower layer to
turn off all timer and thread activity if the upper layer doesn't need
anything and it is not currently handling messages.  And modify the
message handler to not restart the timer if its timer is not needed.

The timers and kthread will still be enabled if:
 - the SI interface is handling a message.
 - a user has enabled watching for events.
 - the IPMI watchdog timer is in use (since it uses pretimeouts).
 - the message handler is waiting on a remote response.
 - a user has registered to receive commands.

This mostly affects interfaces without interrupts.  Interfaces with
interrupts already don't use CPU in the system interface when the
interface is idle.

Signed-off-by: Corey Minyard <cminyard@mvista.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/ipmi/ipmi_msghandler.c | 219 ++++++++++++++++++++++--------------
 drivers/char/ipmi/ipmi_si_intf.c    |  51 +++++++--
 include/linux/ipmi.h                |   2 +-
 include/linux/ipmi_smi.h            |   7 ++
 4 files changed, 182 insertions(+), 97 deletions(-)

(limited to 'include')

diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index ec4e10fcf1a..872c4ec79bf 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -55,6 +55,7 @@ static struct ipmi_recv_msg *ipmi_alloc_recv_msg(void);
 static int ipmi_init_msghandler(void);
 static void smi_recv_tasklet(unsigned long);
 static void handle_new_recv_msgs(ipmi_smi_t intf);
+static void need_waiter(ipmi_smi_t intf);
 
 static int initialized;
 
@@ -73,6 +74,20 @@ static struct proc_dir_entry *proc_ipmi_root;
  */
 #define MAX_MSG_TIMEOUT		60000
 
+/* Call every ~1000 ms. */
+#define IPMI_TIMEOUT_TIME	1000
+
+/* How many jiffies does it take to get to the timeout time. */
+#define IPMI_TIMEOUT_JIFFIES	((IPMI_TIMEOUT_TIME * HZ) / 1000)
+
+/*
+ * Request events from the queue every second (this is the number of
+ * IPMI_TIMEOUT_TIMES between event requests).  Hopefully, in the
+ * future, IPMI will add a way to know immediately if an event is in
+ * the queue and this silliness can go away.
+ */
+#define IPMI_REQUEST_EV_TIME	(1000 / (IPMI_TIMEOUT_TIME))
+
 /*
  * The main "user" data structure.
  */
@@ -92,7 +107,7 @@ struct ipmi_user {
 	ipmi_smi_t intf;
 
 	/* Does this interface receive IPMI events? */
-	int gets_events;
+	bool gets_events;
 };
 
 struct cmd_rcvr {
@@ -383,6 +398,9 @@ struct ipmi_smi {
 	unsigned int     waiting_events_count; /* How many events in queue? */
 	char             delivering_events;
 	char             event_msg_printed;
+	atomic_t         event_waiters;
+	unsigned int     ticks_to_req_ev;
+	int              last_needs_timer;
 
 	/*
 	 * The event receiver for my BMC, only really used at panic
@@ -451,7 +469,6 @@ static DEFINE_MUTEX(ipmi_interfaces_mutex);
 static LIST_HEAD(smi_watchers);
 static DEFINE_MUTEX(smi_watchers_mutex);
 
-
 #define ipmi_inc_stat(intf, stat) \
 	atomic_inc(&(intf)->stats[IPMI_STAT_ ## stat])
 #define ipmi_get_stat(intf, stat) \
@@ -772,6 +789,7 @@ static int intf_next_seq(ipmi_smi_t           intf,
 		*seq = i;
 		*seqid = intf->seq_table[i].seqid;
 		intf->curr_seq = (i+1)%IPMI_IPMB_NUM_SEQ;
+		need_waiter(intf);
 	} else {
 		rv = -EAGAIN;
 	}
@@ -941,7 +959,7 @@ int ipmi_create_user(unsigned int          if_num,
 	new_user->handler = handler;
 	new_user->handler_data = handler_data;
 	new_user->intf = intf;
-	new_user->gets_events = 0;
+	new_user->gets_events = false;
 
 	if (!try_module_get(intf->handlers->owner)) {
 		rv = -ENODEV;
@@ -966,6 +984,11 @@ int ipmi_create_user(unsigned int          if_num,
 	spin_lock_irqsave(&intf->seq_lock, flags);
 	list_add_rcu(&new_user->link, &intf->users);
 	spin_unlock_irqrestore(&intf->seq_lock, flags);
+	if (handler->ipmi_watchdog_pretimeout) {
+		/* User wants pretimeouts, so make sure to watch for them. */
+		if (atomic_inc_return(&intf->event_waiters) == 1)
+			need_waiter(intf);
+	}
 	*user = new_user;
 	return 0;
 
@@ -1021,6 +1044,12 @@ int ipmi_destroy_user(ipmi_user_t user)
 
 	user->valid = 0;
 
+	if (user->handler->ipmi_watchdog_pretimeout)
+		atomic_dec(&intf->event_waiters);
+
+	if (user->gets_events)
+		atomic_dec(&intf->event_waiters);
+
 	/* Remove the user from the interface's sequence table. */
 	spin_lock_irqsave(&intf->seq_lock, flags);
 	list_del_rcu(&user->link);
@@ -1184,7 +1213,7 @@ int ipmi_set_maintenance_mode(ipmi_user_t user, int mode)
 }
 EXPORT_SYMBOL(ipmi_set_maintenance_mode);
 
-int ipmi_set_gets_events(ipmi_user_t user, int val)
+int ipmi_set_gets_events(ipmi_user_t user, bool val)
 {
 	unsigned long        flags;
 	ipmi_smi_t           intf = user->intf;
@@ -1194,8 +1223,18 @@ int ipmi_set_gets_events(ipmi_user_t user, int val)
 	INIT_LIST_HEAD(&msgs);
 
 	spin_lock_irqsave(&intf->events_lock, flags);
+	if (user->gets_events == val)
+		goto out;
+
 	user->gets_events = val;
 
+	if (val) {
+		if (atomic_inc_return(&intf->event_waiters) == 1)
+			need_waiter(intf);
+	} else {
+		atomic_dec(&intf->event_waiters);
+	}
+
 	if (intf->delivering_events)
 		/*
 		 * Another thread is delivering events for this, so
@@ -1289,6 +1328,9 @@ int ipmi_register_for_cmd(ipmi_user_t   user,
 		goto out_unlock;
 	}
 
+	if (atomic_inc_return(&intf->event_waiters) == 1)
+		need_waiter(intf);
+
 	list_add_rcu(&rcvr->link, &intf->cmd_rcvrs);
 
  out_unlock:
@@ -1330,6 +1372,7 @@ int ipmi_unregister_for_cmd(ipmi_user_t   user,
 	mutex_unlock(&intf->cmd_rcvrs_mutex);
 	synchronize_rcu();
 	while (rcvrs) {
+		atomic_dec(&intf->event_waiters);
 		rcvr = rcvrs;
 		rcvrs = rcvr->next;
 		kfree(rcvr);
@@ -2876,6 +2919,8 @@ int ipmi_register_smi(struct ipmi_smi_handlers *handlers,
 		     (unsigned long) intf);
 	atomic_set(&intf->watchdog_pretimeouts_to_deliver, 0);
 	spin_lock_init(&intf->events_lock);
+	atomic_set(&intf->event_waiters, 0);
+	intf->ticks_to_req_ev = IPMI_REQUEST_EV_TIME;
 	INIT_LIST_HEAD(&intf->waiting_events);
 	intf->waiting_events_count = 0;
 	mutex_init(&intf->cmd_rcvrs_mutex);
@@ -3965,7 +4010,8 @@ smi_from_recv_msg(ipmi_smi_t intf, struct ipmi_recv_msg *recv_msg,
 
 static void check_msg_timeout(ipmi_smi_t intf, struct seq_table *ent,
 			      struct list_head *timeouts, long timeout_period,
-			      int slot, unsigned long *flags)
+			      int slot, unsigned long *flags,
+			      unsigned int *waiting_msgs)
 {
 	struct ipmi_recv_msg     *msg;
 	struct ipmi_smi_handlers *handlers;
@@ -3977,8 +4023,10 @@ static void check_msg_timeout(ipmi_smi_t intf, struct seq_table *ent,
 		return;
 
 	ent->timeout -= timeout_period;
-	if (ent->timeout > 0)
+	if (ent->timeout > 0) {
+		(*waiting_msgs)++;
 		return;
+	}
 
 	if (ent->retries_left == 0) {
 		/* The message has used all its retries. */
@@ -3995,6 +4043,8 @@ static void check_msg_timeout(ipmi_smi_t intf, struct seq_table *ent,
 		struct ipmi_smi_msg *smi_msg;
 		/* More retries, send again. */
 
+		(*waiting_msgs)++;
+
 		/*
 		 * Start with the max timer, set to normal timer after
 		 * the message is sent.
@@ -4040,117 +4090,118 @@ static void check_msg_timeout(ipmi_smi_t intf, struct seq_table *ent,
 	}
 }
 
-static void ipmi_timeout_handler(long timeout_period)
+static unsigned int ipmi_timeout_handler(ipmi_smi_t intf, long timeout_period)
 {
-	ipmi_smi_t           intf;
 	struct list_head     timeouts;
 	struct ipmi_recv_msg *msg, *msg2;
 	unsigned long        flags;
 	int                  i;
+	unsigned int         waiting_msgs = 0;
 
-	rcu_read_lock();
-	list_for_each_entry_rcu(intf, &ipmi_interfaces, link) {
-		tasklet_schedule(&intf->recv_tasklet);
-
-		/*
-		 * Go through the seq table and find any messages that
-		 * have timed out, putting them in the timeouts
-		 * list.
-		 */
-		INIT_LIST_HEAD(&timeouts);
-		spin_lock_irqsave(&intf->seq_lock, flags);
-		for (i = 0; i < IPMI_IPMB_NUM_SEQ; i++)
-			check_msg_timeout(intf, &(intf->seq_table[i]),
-					  &timeouts, timeout_period, i,
-					  &flags);
-		spin_unlock_irqrestore(&intf->seq_lock, flags);
+	/*
+	 * Go through the seq table and find any messages that
+	 * have timed out, putting them in the timeouts
+	 * list.
+	 */
+	INIT_LIST_HEAD(&timeouts);
+	spin_lock_irqsave(&intf->seq_lock, flags);
+	for (i = 0; i < IPMI_IPMB_NUM_SEQ; i++)
+		check_msg_timeout(intf, &(intf->seq_table[i]),
+				  &timeouts, timeout_period, i,
+				  &flags, &waiting_msgs);
+	spin_unlock_irqrestore(&intf->seq_lock, flags);
 
-		list_for_each_entry_safe(msg, msg2, &timeouts, link)
-			deliver_err_response(msg, IPMI_TIMEOUT_COMPLETION_CODE);
+	list_for_each_entry_safe(msg, msg2, &timeouts, link)
+		deliver_err_response(msg, IPMI_TIMEOUT_COMPLETION_CODE);
 
-		/*
-		 * Maintenance mode handling.  Check the timeout
-		 * optimistically before we claim the lock.  It may
-		 * mean a timeout gets missed occasionally, but that
-		 * only means the timeout gets extended by one period
-		 * in that case.  No big deal, and it avoids the lock
-		 * most of the time.
-		 */
+	/*
+	 * Maintenance mode handling.  Check the timeout
+	 * optimistically before we claim the lock.  It may
+	 * mean a timeout gets missed occasionally, but that
+	 * only means the timeout gets extended by one period
+	 * in that case.  No big deal, and it avoids the lock
+	 * most of the time.
+	 */
+	if (intf->auto_maintenance_timeout > 0) {
+		spin_lock_irqsave(&intf->maintenance_mode_lock, flags);
 		if (intf->auto_maintenance_timeout > 0) {
-			spin_lock_irqsave(&intf->maintenance_mode_lock, flags);
-			if (intf->auto_maintenance_timeout > 0) {
-				intf->auto_maintenance_timeout
-					-= timeout_period;
-				if (!intf->maintenance_mode
-				    && (intf->auto_maintenance_timeout <= 0)) {
-					intf->maintenance_mode_enable = 0;
-					maintenance_mode_update(intf);
-				}
+			intf->auto_maintenance_timeout
+				-= timeout_period;
+			if (!intf->maintenance_mode
+			    && (intf->auto_maintenance_timeout <= 0)) {
+				intf->maintenance_mode_enable = 0;
+				maintenance_mode_update(intf);
 			}
-			spin_unlock_irqrestore(&intf->maintenance_mode_lock,
-					       flags);
 		}
+		spin_unlock_irqrestore(&intf->maintenance_mode_lock,
+				       flags);
 	}
-	rcu_read_unlock();
+
+	tasklet_schedule(&intf->recv_tasklet);
+
+	return waiting_msgs;
 }
 
-static void ipmi_request_event(void)
+static void ipmi_request_event(ipmi_smi_t intf)
 {
-	ipmi_smi_t               intf;
 	struct ipmi_smi_handlers *handlers;
 
-	rcu_read_lock();
-	/*
-	 * Called from the timer, no need to check if handlers is
-	 * valid.
-	 */
-	list_for_each_entry_rcu(intf, &ipmi_interfaces, link) {
-		/* No event requests when in maintenance mode. */
-		if (intf->maintenance_mode_enable)
-			continue;
+	/* No event requests when in maintenance mode. */
+	if (intf->maintenance_mode_enable)
+		return;
 
-		handlers = intf->handlers;
-		if (handlers)
-			handlers->request_events(intf->send_info);
-	}
-	rcu_read_unlock();
+	handlers = intf->handlers;
+	if (handlers)
+		handlers->request_events(intf->send_info);
 }
 
 static struct timer_list ipmi_timer;
 
-/* Call every ~1000 ms. */
-#define IPMI_TIMEOUT_TIME	1000
-
-/* How many jiffies does it take to get to the timeout time. */
-#define IPMI_TIMEOUT_JIFFIES	((IPMI_TIMEOUT_TIME * HZ) / 1000)
-
-/*
- * Request events from the queue every second (this is the number of
- * IPMI_TIMEOUT_TIMES between event requests).  Hopefully, in the
- * future, IPMI will add a way to know immediately if an event is in
- * the queue and this silliness can go away.
- */
-#define IPMI_REQUEST_EV_TIME	(1000 / (IPMI_TIMEOUT_TIME))
-
 static atomic_t stop_operation;
-static unsigned int ticks_to_req_ev = IPMI_REQUEST_EV_TIME;
 
 static void ipmi_timeout(unsigned long data)
 {
+	ipmi_smi_t intf;
+	int nt = 0;
+
 	if (atomic_read(&stop_operation))
 		return;
 
-	ticks_to_req_ev--;
-	if (ticks_to_req_ev == 0) {
-		ipmi_request_event();
-		ticks_to_req_ev = IPMI_REQUEST_EV_TIME;
-	}
+	rcu_read_lock();
+	list_for_each_entry_rcu(intf, &ipmi_interfaces, link) {
+		int lnt = 0;
+
+		if (atomic_read(&intf->event_waiters)) {
+			intf->ticks_to_req_ev--;
+			if (intf->ticks_to_req_ev == 0) {
+				ipmi_request_event(intf);
+				intf->ticks_to_req_ev = IPMI_REQUEST_EV_TIME;
+			}
+			lnt++;
+		}
 
-	ipmi_timeout_handler(IPMI_TIMEOUT_TIME);
+		lnt += ipmi_timeout_handler(intf, IPMI_TIMEOUT_TIME);
 
-	mod_timer(&ipmi_timer, jiffies + IPMI_TIMEOUT_JIFFIES);
+		lnt = !!lnt;
+		if (lnt != intf->last_needs_timer &&
+					intf->handlers->set_need_watch)
+			intf->handlers->set_need_watch(intf->send_info, lnt);
+		intf->last_needs_timer = lnt;
+
+		nt += lnt;
+	}
+	rcu_read_unlock();
+
+	if (nt)
+		mod_timer(&ipmi_timer, jiffies + IPMI_TIMEOUT_JIFFIES);
 }
 
+static void need_waiter(ipmi_smi_t intf)
+{
+	/* Racy, but worst case we start the timer twice. */
+	if (!timer_pending(&ipmi_timer))
+		mod_timer(&ipmi_timer, jiffies + IPMI_TIMEOUT_JIFFIES);
+}
 
 static atomic_t smi_msg_inuse_count = ATOMIC_INIT(0);
 static atomic_t recv_msg_inuse_count = ATOMIC_INIT(0);
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 25a1436a429..444ea548dfe 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -257,6 +257,9 @@ struct smi_info {
 	/* Used to gracefully stop the timer without race conditions. */
 	atomic_t            stop_operation;
 
+	/* Are we waiting for the events, pretimeouts, received msgs? */
+	atomic_t            need_watch;
+
 	/*
 	 * The driver will disable interrupts when it gets into a
 	 * situation where it cannot handle messages due to lack of
@@ -862,6 +865,19 @@ static enum si_sm_result smi_event_handler(struct smi_info *smi_info,
 	return si_sm_result;
 }
 
+static void check_start_timer_thread(struct smi_info *smi_info)
+{
+	if (smi_info->si_state == SI_NORMAL && smi_info->curr_msg == NULL) {
+		smi_mod_timer(smi_info, jiffies + SI_TIMEOUT_JIFFIES);
+
+		if (smi_info->thread)
+			wake_up_process(smi_info->thread);
+
+		start_next_msg(smi_info);
+		smi_event_handler(smi_info, 0);
+	}
+}
+
 static void sender(void                *send_info,
 		   struct ipmi_smi_msg *msg,
 		   int                 priority)
@@ -915,15 +931,7 @@ static void sender(void                *send_info,
 	else
 		list_add_tail(&msg->link, &smi_info->xmit_msgs);
 
-	if (smi_info->si_state == SI_NORMAL && smi_info->curr_msg == NULL) {
-		smi_mod_timer(smi_info, jiffies + SI_TIMEOUT_JIFFIES);
-
-		if (smi_info->thread)
-			wake_up_process(smi_info->thread);
-
-		start_next_msg(smi_info);
-		smi_event_handler(smi_info, 0);
-	}
+	check_start_timer_thread(smi_info);
 	spin_unlock_irqrestore(&smi_info->si_lock, flags);
 }
 
@@ -1023,9 +1031,15 @@ static int ipmi_thread(void *data)
 			; /* do nothing */
 		else if (smi_result == SI_SM_CALL_WITH_DELAY && busy_wait)
 			schedule();
-		else if (smi_result == SI_SM_IDLE)
-			schedule_timeout_interruptible(100);
-		else
+		else if (smi_result == SI_SM_IDLE) {
+			if (atomic_read(&smi_info->need_watch)) {
+				schedule_timeout_interruptible(100);
+			} else {
+				/* Wait to be woken up when we are needed. */
+				__set_current_state(TASK_INTERRUPTIBLE);
+				schedule();
+			}
+		} else
 			schedule_timeout_interruptible(1);
 	}
 	return 0;
@@ -1061,6 +1075,17 @@ static void request_events(void *send_info)
 	atomic_set(&smi_info->req_events, 1);
 }
 
+static void set_need_watch(void *send_info, int enable)
+{
+	struct smi_info *smi_info = send_info;
+	unsigned long flags;
+
+	atomic_set(&smi_info->need_watch, enable);
+	spin_lock_irqsave(&smi_info->si_lock, flags);
+	check_start_timer_thread(smi_info);
+	spin_unlock_irqrestore(&smi_info->si_lock, flags);
+}
+
 static int initialized;
 
 static void smi_timeout(unsigned long data)
@@ -1212,6 +1237,7 @@ static struct ipmi_smi_handlers handlers = {
 	.get_smi_info		= get_smi_info,
 	.sender			= sender,
 	.request_events		= request_events,
+	.set_need_watch		= set_need_watch,
 	.set_maintenance_mode   = set_maintenance_mode,
 	.set_run_to_completion  = set_run_to_completion,
 	.poll			= poll,
@@ -3352,6 +3378,7 @@ static int try_smi_init(struct smi_info *new_smi)
 
 	new_smi->interrupt_disabled = 1;
 	atomic_set(&new_smi->stop_operation, 0);
+	atomic_set(&new_smi->need_watch, 0);
 	new_smi->intf_num = smi_num;
 	smi_num++;
 
diff --git a/include/linux/ipmi.h b/include/linux/ipmi.h
index 1f9f56e2885..76d2acbfa7c 100644
--- a/include/linux/ipmi.h
+++ b/include/linux/ipmi.h
@@ -237,7 +237,7 @@ int ipmi_set_maintenance_mode(ipmi_user_t user, int mode);
  * The first user that sets this to TRUE will receive all events that
  * have been queued while no one was waiting for events.
  */
-int ipmi_set_gets_events(ipmi_user_t user, int val);
+int ipmi_set_gets_events(ipmi_user_t user, bool val);
 
 /*
  * Called when a new SMI is registered.  This will also be called on
diff --git a/include/linux/ipmi_smi.h b/include/linux/ipmi_smi.h
index 8ea3fe0b975..2a7ff302d99 100644
--- a/include/linux/ipmi_smi.h
+++ b/include/linux/ipmi_smi.h
@@ -109,6 +109,13 @@ struct ipmi_smi_handlers {
 	   events from the BMC we are attached to. */
 	void (*request_events)(void *send_info);
 
+	/* Called by the upper layer when some user requires that the
+	   interface watch for events, received messages, watchdog
+	   pretimeouts, or not.  Used by the SMI to know if it should
+	   watch for these.  This may be NULL if the SMI does not
+	   implement it. */
+	void (*set_need_watch)(void *send_info, int enable);
+
 	/* Called when the interface should go into "run to
 	   completion" mode.  If this call sets the value to true, the
 	   interface should make sure that all messages are flushed
-- 
cgit v1.2.3-70-g09d2


From 7aefac26fc67158cb8826a5f5bfc2a5086a7d962 Mon Sep 17 00:00:00 2001
From: Corey Minyard <cminyard@mvista.com>
Date: Mon, 14 Apr 2014 09:46:56 -0500
Subject: ipmi: boolify some things

Convert some ints to bools.

Signed-off-by: Corey Minyard <cminyard@mvista.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/ipmi/ipmi_msghandler.c | 22 ++++++++--------
 drivers/char/ipmi/ipmi_si_intf.c    | 50 ++++++++++++++++++-------------------
 include/linux/ipmi_smi.h            |  6 ++---
 3 files changed, 38 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index 872c4ec79bf..e6db9381b2c 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -94,8 +94,8 @@ static struct proc_dir_entry *proc_ipmi_root;
 struct ipmi_user {
 	struct list_head link;
 
-	/* Set to "0" when the user is destroyed. */
-	int valid;
+	/* Set to false when the user is destroyed. */
+	bool valid;
 
 	struct kref refcount;
 
@@ -413,7 +413,7 @@ struct ipmi_smi {
 
 	/* For handling of maintenance mode. */
 	int maintenance_mode;
-	int maintenance_mode_enable;
+	bool maintenance_mode_enable;
 	int auto_maintenance_timeout;
 	spinlock_t maintenance_mode_lock; /* Used in a timer... */
 
@@ -980,7 +980,7 @@ int ipmi_create_user(unsigned int          if_num,
 	 */
 	mutex_unlock(&ipmi_interfaces_mutex);
 
-	new_user->valid = 1;
+	new_user->valid = true;
 	spin_lock_irqsave(&intf->seq_lock, flags);
 	list_add_rcu(&new_user->link, &intf->users);
 	spin_unlock_irqrestore(&intf->seq_lock, flags);
@@ -1042,7 +1042,7 @@ int ipmi_destroy_user(ipmi_user_t user)
 	struct cmd_rcvr  *rcvr;
 	struct cmd_rcvr  *rcvrs = NULL;
 
-	user->valid = 0;
+	user->valid = false;
 
 	if (user->handler->ipmi_watchdog_pretimeout)
 		atomic_dec(&intf->event_waiters);
@@ -1184,25 +1184,23 @@ int ipmi_set_maintenance_mode(ipmi_user_t user, int mode)
 	if (intf->maintenance_mode != mode) {
 		switch (mode) {
 		case IPMI_MAINTENANCE_MODE_AUTO:
-			intf->maintenance_mode = mode;
 			intf->maintenance_mode_enable
 				= (intf->auto_maintenance_timeout > 0);
 			break;
 
 		case IPMI_MAINTENANCE_MODE_OFF:
-			intf->maintenance_mode = mode;
-			intf->maintenance_mode_enable = 0;
+			intf->maintenance_mode_enable = false;
 			break;
 
 		case IPMI_MAINTENANCE_MODE_ON:
-			intf->maintenance_mode = mode;
-			intf->maintenance_mode_enable = 1;
+			intf->maintenance_mode_enable = true;
 			break;
 
 		default:
 			rv = -EINVAL;
 			goto out_unlock;
 		}
+		intf->maintenance_mode = mode;
 
 		maintenance_mode_update(intf);
 	}
@@ -1578,7 +1576,7 @@ static int i_ipmi_request(ipmi_user_t          user,
 				= IPMI_MAINTENANCE_MODE_TIMEOUT;
 			if (!intf->maintenance_mode
 			    && !intf->maintenance_mode_enable) {
-				intf->maintenance_mode_enable = 1;
+				intf->maintenance_mode_enable = true;
 				maintenance_mode_update(intf);
 			}
 			spin_unlock_irqrestore(&intf->maintenance_mode_lock,
@@ -4129,7 +4127,7 @@ static unsigned int ipmi_timeout_handler(ipmi_smi_t intf, long timeout_period)
 				-= timeout_period;
 			if (!intf->maintenance_mode
 			    && (intf->auto_maintenance_timeout <= 0)) {
-				intf->maintenance_mode_enable = 0;
+				intf->maintenance_mode_enable = false;
 				maintenance_mode_update(intf);
 			}
 		}
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 444ea548dfe..1c4bb4f6ce9 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -217,7 +217,7 @@ struct smi_info {
 	unsigned char       msg_flags;
 
 	/* Does the BMC have an event buffer? */
-	char		    has_event_buffer;
+	bool		    has_event_buffer;
 
 	/*
 	 * If set to true, this will request events the next time the
@@ -230,7 +230,7 @@ struct smi_info {
 	 * call.  Generally used after a panic to make sure stuff goes
 	 * out.
 	 */
-	int                 run_to_completion;
+	bool                run_to_completion;
 
 	/* The I/O port of an SI interface. */
 	int                 port;
@@ -266,7 +266,7 @@ struct smi_info {
 	 * memory.  Once that situation clears up, it will re-enable
 	 * interrupts.
 	 */
-	int interrupt_disabled;
+	bool interrupt_disabled;
 
 	/* From the get device id response... */
 	struct ipmi_device_id device_id;
@@ -279,7 +279,7 @@ struct smi_info {
 	 * True if we allocated the device, false if it came from
 	 * someplace else (like PCI).
 	 */
-	int dev_registered;
+	bool dev_registered;
 
 	/* Slave address, could be reported from DMI. */
 	unsigned char slave_addr;
@@ -303,19 +303,19 @@ struct smi_info {
 static int force_kipmid[SI_MAX_PARMS];
 static int num_force_kipmid;
 #ifdef CONFIG_PCI
-static int pci_registered;
+static bool pci_registered;
 #endif
 #ifdef CONFIG_ACPI
-static int pnp_registered;
+static bool pnp_registered;
 #endif
 #ifdef CONFIG_PARISC
-static int parisc_registered;
+static bool parisc_registered;
 #endif
 
 static unsigned int kipmid_max_busy_us[SI_MAX_PARMS];
 static int num_max_busy_us;
 
-static int unload_when_empty = 1;
+static bool unload_when_empty = true;
 
 static int add_smi(struct smi_info *smi);
 static int try_smi_init(struct smi_info *smi);
@@ -457,7 +457,7 @@ static inline void disable_si_irq(struct smi_info *smi_info)
 {
 	if ((smi_info->irq) && (!smi_info->interrupt_disabled)) {
 		start_disable_irq(smi_info);
-		smi_info->interrupt_disabled = 1;
+		smi_info->interrupt_disabled = true;
 		if (!atomic_read(&smi_info->stop_operation))
 			smi_mod_timer(smi_info, jiffies + SI_TIMEOUT_JIFFIES);
 	}
@@ -467,7 +467,7 @@ static inline void enable_si_irq(struct smi_info *smi_info)
 {
 	if ((smi_info->irq) && (smi_info->interrupt_disabled)) {
 		start_enable_irq(smi_info);
-		smi_info->interrupt_disabled = 0;
+		smi_info->interrupt_disabled = false;
 	}
 }
 
@@ -712,7 +712,7 @@ static void handle_transaction_done(struct smi_info *smi_info)
 			dev_warn(smi_info->dev,
 				 "Maybe ok, but ipmi might run very slowly.\n");
 		} else
-			smi_info->interrupt_disabled = 0;
+			smi_info->interrupt_disabled = false;
 		smi_info->si_state = SI_NORMAL;
 		break;
 	}
@@ -935,7 +935,7 @@ static void sender(void                *send_info,
 	spin_unlock_irqrestore(&smi_info->si_lock, flags);
 }
 
-static void set_run_to_completion(void *send_info, int i_run_to_completion)
+static void set_run_to_completion(void *send_info, bool i_run_to_completion)
 {
 	struct smi_info   *smi_info = send_info;
 	enum si_sm_result result;
@@ -1050,7 +1050,7 @@ static void poll(void *send_info)
 {
 	struct smi_info *smi_info = send_info;
 	unsigned long flags = 0;
-	int run_to_completion = smi_info->run_to_completion;
+	bool run_to_completion = smi_info->run_to_completion;
 
 	/*
 	 * Make sure there is some delay in the poll loop so we can
@@ -1075,7 +1075,7 @@ static void request_events(void *send_info)
 	atomic_set(&smi_info->req_events, 1);
 }
 
-static void set_need_watch(void *send_info, int enable)
+static void set_need_watch(void *send_info, bool enable)
 {
 	struct smi_info *smi_info = send_info;
 	unsigned long flags;
@@ -1223,7 +1223,7 @@ static int get_smi_info(void *send_info, struct ipmi_smi_info *data)
 	return 0;
 }
 
-static void set_maintenance_mode(void *send_info, int enable)
+static void set_maintenance_mode(void *send_info, bool enable)
 {
 	struct smi_info   *smi_info = send_info;
 
@@ -1364,7 +1364,7 @@ module_param_array(force_kipmid, int, &num_force_kipmid, 0);
 MODULE_PARM_DESC(force_kipmid, "Force the kipmi daemon to be enabled (1) or"
 		 " disabled(0).  Normally the IPMI driver auto-detects"
 		 " this, but the value may be overridden by this parm.");
-module_param(unload_when_empty, int, 0);
+module_param(unload_when_empty, bool, 0);
 MODULE_PARM_DESC(unload_when_empty, "Unload the module if no interfaces are"
 		 " specified or found, default is 1.  Setting to 0"
 		 " is useful for hot add of devices using hotmod.");
@@ -3372,11 +3372,11 @@ static int try_smi_init(struct smi_info *new_smi)
 	INIT_LIST_HEAD(&(new_smi->hp_xmit_msgs));
 	new_smi->curr_msg = NULL;
 	atomic_set(&new_smi->req_events, 0);
-	new_smi->run_to_completion = 0;
+	new_smi->run_to_completion = false;
 	for (i = 0; i < SI_NUM_STATS; i++)
 		atomic_set(&new_smi->stats[i], 0);
 
-	new_smi->interrupt_disabled = 1;
+	new_smi->interrupt_disabled = true;
 	atomic_set(&new_smi->stop_operation, 0);
 	atomic_set(&new_smi->need_watch, 0);
 	new_smi->intf_num = smi_num;
@@ -3384,7 +3384,7 @@ static int try_smi_init(struct smi_info *new_smi)
 
 	rv = try_enable_event_buffer(new_smi);
 	if (rv == 0)
-		new_smi->has_event_buffer = 1;
+		new_smi->has_event_buffer = true;
 
 	/*
 	 * Start clearing the flags before we enable interrupts or the
@@ -3418,7 +3418,7 @@ static int try_smi_init(struct smi_info *new_smi)
 			       rv);
 			goto out_err;
 		}
-		new_smi->dev_registered = 1;
+		new_smi->dev_registered = true;
 	}
 
 	rv = ipmi_register_smi(&handlers,
@@ -3467,7 +3467,7 @@ static int try_smi_init(struct smi_info *new_smi)
 	wait_for_timer_and_thread(new_smi);
 
  out_err:
-	new_smi->interrupt_disabled = 1;
+	new_smi->interrupt_disabled = true;
 
 	if (new_smi->intf) {
 		ipmi_unregister_smi(new_smi->intf);
@@ -3503,7 +3503,7 @@ static int try_smi_init(struct smi_info *new_smi)
 
 	if (new_smi->dev_registered) {
 		platform_device_unregister(new_smi->pdev);
-		new_smi->dev_registered = 0;
+		new_smi->dev_registered = false;
 	}
 
 	return rv;
@@ -3558,14 +3558,14 @@ static int init_ipmi_si(void)
 			printk(KERN_ERR PFX "Unable to register "
 			       "PCI driver: %d\n", rv);
 		else
-			pci_registered = 1;
+			pci_registered = true;
 	}
 #endif
 
 #ifdef CONFIG_ACPI
 	if (si_tryacpi) {
 		pnp_register_driver(&ipmi_pnp_driver);
-		pnp_registered = 1;
+		pnp_registered = true;
 	}
 #endif
 
@@ -3581,7 +3581,7 @@ static int init_ipmi_si(void)
 
 #ifdef CONFIG_PARISC
 	register_parisc_driver(&ipmi_parisc_driver);
-	parisc_registered = 1;
+	parisc_registered = true;
 	/* poking PC IO addresses will crash machine, don't do it */
 	si_trydefaults = 0;
 #endif
diff --git a/include/linux/ipmi_smi.h b/include/linux/ipmi_smi.h
index 2a7ff302d99..bd349240d50 100644
--- a/include/linux/ipmi_smi.h
+++ b/include/linux/ipmi_smi.h
@@ -114,14 +114,14 @@ struct ipmi_smi_handlers {
 	   pretimeouts, or not.  Used by the SMI to know if it should
 	   watch for these.  This may be NULL if the SMI does not
 	   implement it. */
-	void (*set_need_watch)(void *send_info, int enable);
+	void (*set_need_watch)(void *send_info, bool enable);
 
 	/* Called when the interface should go into "run to
 	   completion" mode.  If this call sets the value to true, the
 	   interface should make sure that all messages are flushed
 	   out and that none are pending, and any new requests are run
 	   to completion immediately. */
-	void (*set_run_to_completion)(void *send_info, int run_to_completion);
+	void (*set_run_to_completion)(void *send_info, bool run_to_completion);
 
 	/* Called to poll for work to do.  This is so upper layers can
 	   poll for operations during things like crash dumps. */
@@ -132,7 +132,7 @@ struct ipmi_smi_handlers {
 	   setting.  The message handler does the mode handling.  Note
 	   that this is called from interrupt context, so it cannot
 	   block. */
-	void (*set_maintenance_mode)(void *send_info, int enable);
+	void (*set_maintenance_mode)(void *send_info, bool enable);
 
 	/* Tell the handler that we are using it/not using it.  The
 	   message handler get the modules that this handler belongs
-- 
cgit v1.2.3-70-g09d2


From 8d754544202c0e4ef02e9c1abdf379bcf7ef9384 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Thu, 10 Apr 2014 10:51:11 +0200
Subject: drm: Split out drm_probe_helper.c from drm_crtc_helper.c

This is leftover stuff from my previous doc round which I kinda wanted
to do but didn't yet due to rebase hell.

The modeset helpers and the probing helpers a independent and e.g.
i915 uses the probing stuff but has its own modeset infrastructure. It
hence makes to split this up. While at it add a DOC: comment for the
probing libraray.

It would be rather neat to pull some of the DocBook documenting these
two helpers into in-line DOC: comments. But unfortunately kerneldoc
doesn't support markdown or something similar to make nice-looking
documentation, so the current state is better.

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 Documentation/DocBook/drm.tmpl     |   5 +
 drivers/gpu/drm/Makefile           |   2 +-
 drivers/gpu/drm/drm_crtc_helper.c  | 370 --------------------------------
 drivers/gpu/drm/drm_probe_helper.c | 426 +++++++++++++++++++++++++++++++++++++
 include/drm/drm_crtc_helper.h      |   6 +-
 5 files changed, 437 insertions(+), 372 deletions(-)
 create mode 100644 drivers/gpu/drm/drm_probe_helper.c

(limited to 'include')

diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl
index 702c4474919..677a02553ec 100644
--- a/Documentation/DocBook/drm.tmpl
+++ b/Documentation/DocBook/drm.tmpl
@@ -2285,6 +2285,11 @@ void intel_crt_init(struct drm_device *dev)
     <sect2>
       <title>Modeset Helper Functions Reference</title>
 !Edrivers/gpu/drm/drm_crtc_helper.c
+    </sect2>
+    <sect2>
+      <title>Output Probing Helper Functions Reference</title>
+!Pdrivers/gpu/drm/drm_probe_helper.c output probing helper overview
+!Edrivers/gpu/drm/drm_probe_helper.c
     </sect2>
     <sect2>
       <title>fbdev Helper Functions Reference</title>
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 9d25dbbe677..48e38ba2278 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -23,7 +23,7 @@ drm-$(CONFIG_DRM_PANEL) += drm_panel.o
 
 drm-usb-y   := drm_usb.o
 
-drm_kms_helper-y := drm_crtc_helper.o drm_dp_helper.o
+drm_kms_helper-y := drm_crtc_helper.o drm_dp_helper.o drm_probe_helper.o
 drm_kms_helper-$(CONFIG_DRM_LOAD_EDID_FIRMWARE) += drm_edid_load.o
 drm_kms_helper-$(CONFIG_DRM_KMS_FB_HELPER) += drm_fb_helper.o
 drm_kms_helper-$(CONFIG_DRM_KMS_CMA_HELPER) += drm_fb_cma_helper.o
diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c
index c43825e8f5c..df281b54db0 100644
--- a/drivers/gpu/drm/drm_crtc_helper.c
+++ b/drivers/gpu/drm/drm_crtc_helper.c
@@ -72,147 +72,6 @@ void drm_helper_move_panel_connectors_to_head(struct drm_device *dev)
 }
 EXPORT_SYMBOL(drm_helper_move_panel_connectors_to_head);
 
-static bool drm_kms_helper_poll = true;
-module_param_named(poll, drm_kms_helper_poll, bool, 0600);
-
-static void drm_mode_validate_flag(struct drm_connector *connector,
-				   int flags)
-{
-	struct drm_display_mode *mode;
-
-	if (flags == (DRM_MODE_FLAG_DBLSCAN | DRM_MODE_FLAG_INTERLACE |
-		      DRM_MODE_FLAG_3D_MASK))
-		return;
-
-	list_for_each_entry(mode, &connector->modes, head) {
-		if ((mode->flags & DRM_MODE_FLAG_INTERLACE) &&
-				!(flags & DRM_MODE_FLAG_INTERLACE))
-			mode->status = MODE_NO_INTERLACE;
-		if ((mode->flags & DRM_MODE_FLAG_DBLSCAN) &&
-				!(flags & DRM_MODE_FLAG_DBLSCAN))
-			mode->status = MODE_NO_DBLESCAN;
-		if ((mode->flags & DRM_MODE_FLAG_3D_MASK) &&
-				!(flags & DRM_MODE_FLAG_3D_MASK))
-			mode->status = MODE_NO_STEREO;
-	}
-
-	return;
-}
-
-/**
- * drm_helper_probe_single_connector_modes - get complete set of display modes
- * @connector: connector to probe
- * @maxX: max width for modes
- * @maxY: max height for modes
- *
- * Based on the helper callbacks implemented by @connector try to detect all
- * valid modes.  Modes will first be added to the connector's probed_modes list,
- * then culled (based on validity and the @maxX, @maxY parameters) and put into
- * the normal modes list.
- *
- * Intended to be use as a generic implementation of the ->fill_modes()
- * @connector vfunc for drivers that use the crtc helpers for output mode
- * filtering and detection.
- *
- * Returns:
- * The number of modes found on @connector.
- */
-int drm_helper_probe_single_connector_modes(struct drm_connector *connector,
-					    uint32_t maxX, uint32_t maxY)
-{
-	struct drm_device *dev = connector->dev;
-	struct drm_display_mode *mode;
-	struct drm_connector_helper_funcs *connector_funcs =
-		connector->helper_private;
-	int count = 0;
-	int mode_flags = 0;
-	bool verbose_prune = true;
-
-	WARN_ON(!mutex_is_locked(&dev->mode_config.mutex));
-
-	DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", connector->base.id,
-			drm_get_connector_name(connector));
-	/* set all modes to the unverified state */
-	list_for_each_entry(mode, &connector->modes, head)
-		mode->status = MODE_UNVERIFIED;
-
-	if (connector->force) {
-		if (connector->force == DRM_FORCE_ON)
-			connector->status = connector_status_connected;
-		else
-			connector->status = connector_status_disconnected;
-		if (connector->funcs->force)
-			connector->funcs->force(connector);
-	} else {
-		connector->status = connector->funcs->detect(connector, true);
-	}
-
-	/* Re-enable polling in case the global poll config changed. */
-	if (drm_kms_helper_poll != dev->mode_config.poll_running)
-		drm_kms_helper_poll_enable(dev);
-
-	dev->mode_config.poll_running = drm_kms_helper_poll;
-
-	if (connector->status == connector_status_disconnected) {
-		DRM_DEBUG_KMS("[CONNECTOR:%d:%s] disconnected\n",
-			connector->base.id, drm_get_connector_name(connector));
-		drm_mode_connector_update_edid_property(connector, NULL);
-		verbose_prune = false;
-		goto prune;
-	}
-
-#ifdef CONFIG_DRM_LOAD_EDID_FIRMWARE
-	count = drm_load_edid_firmware(connector);
-	if (count == 0)
-#endif
-		count = (*connector_funcs->get_modes)(connector);
-
-	if (count == 0 && connector->status == connector_status_connected)
-		count = drm_add_modes_noedid(connector, 1024, 768);
-	if (count == 0)
-		goto prune;
-
-	drm_mode_connector_list_update(connector);
-
-	if (maxX && maxY)
-		drm_mode_validate_size(dev, &connector->modes, maxX, maxY);
-
-	if (connector->interlace_allowed)
-		mode_flags |= DRM_MODE_FLAG_INTERLACE;
-	if (connector->doublescan_allowed)
-		mode_flags |= DRM_MODE_FLAG_DBLSCAN;
-	if (connector->stereo_allowed)
-		mode_flags |= DRM_MODE_FLAG_3D_MASK;
-	drm_mode_validate_flag(connector, mode_flags);
-
-	list_for_each_entry(mode, &connector->modes, head) {
-		if (mode->status == MODE_OK)
-			mode->status = connector_funcs->mode_valid(connector,
-								   mode);
-	}
-
-prune:
-	drm_mode_prune_invalid(dev, &connector->modes, verbose_prune);
-
-	if (list_empty(&connector->modes))
-		return 0;
-
-	list_for_each_entry(mode, &connector->modes, head)
-		mode->vrefresh = drm_mode_vrefresh(mode);
-
-	drm_mode_sort(&connector->modes);
-
-	DRM_DEBUG_KMS("[CONNECTOR:%d:%s] probed modes :\n", connector->base.id,
-			drm_get_connector_name(connector));
-	list_for_each_entry(mode, &connector->modes, head) {
-		drm_mode_set_crtcinfo(mode, CRTC_INTERLACE_HALVE_V);
-		drm_mode_debug_printmodeline(mode);
-	}
-
-	return count;
-}
-EXPORT_SYMBOL(drm_helper_probe_single_connector_modes);
-
 /**
  * drm_helper_encoder_in_use - check if a given encoder is in use
  * @encoder: encoder to check
@@ -1020,232 +879,3 @@ void drm_helper_resume_force_mode(struct drm_device *dev)
 	drm_modeset_unlock_all(dev);
 }
 EXPORT_SYMBOL(drm_helper_resume_force_mode);
-
-/**
- * drm_kms_helper_hotplug_event - fire off KMS hotplug events
- * @dev: drm_device whose connector state changed
- *
- * This function fires off the uevent for userspace and also calls the
- * output_poll_changed function, which is most commonly used to inform the fbdev
- * emulation code and allow it to update the fbcon output configuration.
- *
- * Drivers should call this from their hotplug handling code when a change is
- * detected. Note that this function does not do any output detection of its
- * own, like drm_helper_hpd_irq_event() does - this is assumed to be done by the
- * driver already.
- *
- * This function must be called from process context with no mode
- * setting locks held.
- */
-void drm_kms_helper_hotplug_event(struct drm_device *dev)
-{
-	/* send a uevent + call fbdev */
-	drm_sysfs_hotplug_event(dev);
-	if (dev->mode_config.funcs->output_poll_changed)
-		dev->mode_config.funcs->output_poll_changed(dev);
-}
-EXPORT_SYMBOL(drm_kms_helper_hotplug_event);
-
-#define DRM_OUTPUT_POLL_PERIOD (10*HZ)
-static void output_poll_execute(struct work_struct *work)
-{
-	struct delayed_work *delayed_work = to_delayed_work(work);
-	struct drm_device *dev = container_of(delayed_work, struct drm_device, mode_config.output_poll_work);
-	struct drm_connector *connector;
-	enum drm_connector_status old_status;
-	bool repoll = false, changed = false;
-
-	if (!drm_kms_helper_poll)
-		return;
-
-	mutex_lock(&dev->mode_config.mutex);
-	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
-
-		/* Ignore forced connectors. */
-		if (connector->force)
-			continue;
-
-		/* Ignore HPD capable connectors and connectors where we don't
-		 * want any hotplug detection at all for polling. */
-		if (!connector->polled || connector->polled == DRM_CONNECTOR_POLL_HPD)
-			continue;
-
-		repoll = true;
-
-		old_status = connector->status;
-		/* if we are connected and don't want to poll for disconnect
-		   skip it */
-		if (old_status == connector_status_connected &&
-		    !(connector->polled & DRM_CONNECTOR_POLL_DISCONNECT))
-			continue;
-
-		connector->status = connector->funcs->detect(connector, false);
-		if (old_status != connector->status) {
-			const char *old, *new;
-
-			old = drm_get_connector_status_name(old_status);
-			new = drm_get_connector_status_name(connector->status);
-
-			DRM_DEBUG_KMS("[CONNECTOR:%d:%s] "
-				      "status updated from %s to %s\n",
-				      connector->base.id,
-				      drm_get_connector_name(connector),
-				      old, new);
-
-			changed = true;
-		}
-	}
-
-	mutex_unlock(&dev->mode_config.mutex);
-
-	if (changed)
-		drm_kms_helper_hotplug_event(dev);
-
-	if (repoll)
-		schedule_delayed_work(delayed_work, DRM_OUTPUT_POLL_PERIOD);
-}
-
-/**
- * drm_kms_helper_poll_disable - disable output polling
- * @dev: drm_device
- *
- * This function disables the output polling work.
- *
- * Drivers can call this helper from their device suspend implementation. It is
- * not an error to call this even when output polling isn't enabled or arlready
- * disabled.
- */
-void drm_kms_helper_poll_disable(struct drm_device *dev)
-{
-	if (!dev->mode_config.poll_enabled)
-		return;
-	cancel_delayed_work_sync(&dev->mode_config.output_poll_work);
-}
-EXPORT_SYMBOL(drm_kms_helper_poll_disable);
-
-/**
- * drm_kms_helper_poll_enable - re-enable output polling.
- * @dev: drm_device
- *
- * This function re-enables the output polling work.
- *
- * Drivers can call this helper from their device resume implementation. It is
- * an error to call this when the output polling support has not yet been set
- * up.
- */
-void drm_kms_helper_poll_enable(struct drm_device *dev)
-{
-	bool poll = false;
-	struct drm_connector *connector;
-
-	if (!dev->mode_config.poll_enabled || !drm_kms_helper_poll)
-		return;
-
-	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
-		if (connector->polled & (DRM_CONNECTOR_POLL_CONNECT |
-					 DRM_CONNECTOR_POLL_DISCONNECT))
-			poll = true;
-	}
-
-	if (poll)
-		schedule_delayed_work(&dev->mode_config.output_poll_work, DRM_OUTPUT_POLL_PERIOD);
-}
-EXPORT_SYMBOL(drm_kms_helper_poll_enable);
-
-/**
- * drm_kms_helper_poll_init - initialize and enable output polling
- * @dev: drm_device
- *
- * This function intializes and then also enables output polling support for
- * @dev. Drivers which do not have reliable hotplug support in hardware can use
- * this helper infrastructure to regularly poll such connectors for changes in
- * their connection state.
- *
- * Drivers can control which connectors are polled by setting the
- * DRM_CONNECTOR_POLL_CONNECT and DRM_CONNECTOR_POLL_DISCONNECT flags. On
- * connectors where probing live outputs can result in visual distortion drivers
- * should not set the DRM_CONNECTOR_POLL_DISCONNECT flag to avoid this.
- * Connectors which have no flag or only DRM_CONNECTOR_POLL_HPD set are
- * completely ignored by the polling logic.
- *
- * Note that a connector can be both polled and probed from the hotplug handler,
- * in case the hotplug interrupt is known to be unreliable.
- */
-void drm_kms_helper_poll_init(struct drm_device *dev)
-{
-	INIT_DELAYED_WORK(&dev->mode_config.output_poll_work, output_poll_execute);
-	dev->mode_config.poll_enabled = true;
-
-	drm_kms_helper_poll_enable(dev);
-}
-EXPORT_SYMBOL(drm_kms_helper_poll_init);
-
-/**
- * drm_kms_helper_poll_fini - disable output polling and clean it up
- * @dev: drm_device
- */
-void drm_kms_helper_poll_fini(struct drm_device *dev)
-{
-	drm_kms_helper_poll_disable(dev);
-}
-EXPORT_SYMBOL(drm_kms_helper_poll_fini);
-
-/**
- * drm_helper_hpd_irq_event - hotplug processing
- * @dev: drm_device
- *
- * Drivers can use this helper function to run a detect cycle on all connectors
- * which have the DRM_CONNECTOR_POLL_HPD flag set in their &polled member. All
- * other connectors are ignored, which is useful to avoid reprobing fixed
- * panels.
- *
- * This helper function is useful for drivers which can't or don't track hotplug
- * interrupts for each connector.
- *
- * Drivers which support hotplug interrupts for each connector individually and
- * which have a more fine-grained detect logic should bypass this code and
- * directly call drm_kms_helper_hotplug_event() in case the connector state
- * changed.
- *
- * This function must be called from process context with no mode
- * setting locks held.
- *
- * Note that a connector can be both polled and probed from the hotplug handler,
- * in case the hotplug interrupt is known to be unreliable.
- */
-bool drm_helper_hpd_irq_event(struct drm_device *dev)
-{
-	struct drm_connector *connector;
-	enum drm_connector_status old_status;
-	bool changed = false;
-
-	if (!dev->mode_config.poll_enabled)
-		return false;
-
-	mutex_lock(&dev->mode_config.mutex);
-	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
-
-		/* Only handle HPD capable connectors. */
-		if (!(connector->polled & DRM_CONNECTOR_POLL_HPD))
-			continue;
-
-		old_status = connector->status;
-
-		connector->status = connector->funcs->detect(connector, false);
-		DRM_DEBUG_KMS("[CONNECTOR:%d:%s] status updated from %s to %s\n",
-			      connector->base.id,
-			      drm_get_connector_name(connector),
-			      drm_get_connector_status_name(old_status),
-			      drm_get_connector_status_name(connector->status));
-		if (old_status != connector->status)
-			changed = true;
-	}
-
-	mutex_unlock(&dev->mode_config.mutex);
-
-	if (changed)
-		drm_kms_helper_hotplug_event(dev);
-
-	return changed;
-}
-EXPORT_SYMBOL(drm_helper_hpd_irq_event);
diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c
new file mode 100644
index 00000000000..e70f54d4a58
--- /dev/null
+++ b/drivers/gpu/drm/drm_probe_helper.c
@@ -0,0 +1,426 @@
+/*
+ * Copyright (c) 2006-2008 Intel Corporation
+ * Copyright (c) 2007 Dave Airlie <airlied@linux.ie>
+ *
+ * DRM core CRTC related functions
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that copyright
+ * notice and this permission notice appear in supporting documentation, and
+ * that the name of the copyright holders not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  The copyright holders make no representations
+ * about the suitability of this software for any purpose.  It is provided "as
+ * is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ *
+ * Authors:
+ *      Keith Packard
+ *	Eric Anholt <eric@anholt.net>
+ *      Dave Airlie <airlied@linux.ie>
+ *      Jesse Barnes <jesse.barnes@intel.com>
+ */
+
+#include <linux/export.h>
+#include <linux/moduleparam.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc.h>
+#include <drm/drm_fourcc.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_fb_helper.h>
+#include <drm/drm_edid.h>
+
+/**
+ * DOC: output probing helper overview
+ *
+ * This library provides some helper code for output probing. It provides an
+ * implementation of the core connector->fill_modes interface with
+ * drm_helper_probe_single_connector_modes.
+ *
+ * It also provides support for polling connectors with a work item and for
+ * generic hotplug interrupt handling where the driver doesn't or cannot keep
+ * track of a per-connector hpd interrupt.
+ *
+ * This helper library can be used independently of the modeset helper library.
+ * Drivers can also overwrite different parts e.g. use their own hotplug
+ * handling code to avoid probing unrelated outputs.
+ */
+
+static bool drm_kms_helper_poll = true;
+module_param_named(poll, drm_kms_helper_poll, bool, 0600);
+
+static void drm_mode_validate_flag(struct drm_connector *connector,
+				   int flags)
+{
+	struct drm_display_mode *mode;
+
+	if (flags == (DRM_MODE_FLAG_DBLSCAN | DRM_MODE_FLAG_INTERLACE |
+		      DRM_MODE_FLAG_3D_MASK))
+		return;
+
+	list_for_each_entry(mode, &connector->modes, head) {
+		if ((mode->flags & DRM_MODE_FLAG_INTERLACE) &&
+				!(flags & DRM_MODE_FLAG_INTERLACE))
+			mode->status = MODE_NO_INTERLACE;
+		if ((mode->flags & DRM_MODE_FLAG_DBLSCAN) &&
+				!(flags & DRM_MODE_FLAG_DBLSCAN))
+			mode->status = MODE_NO_DBLESCAN;
+		if ((mode->flags & DRM_MODE_FLAG_3D_MASK) &&
+				!(flags & DRM_MODE_FLAG_3D_MASK))
+			mode->status = MODE_NO_STEREO;
+	}
+
+	return;
+}
+
+/**
+ * drm_helper_probe_single_connector_modes - get complete set of display modes
+ * @connector: connector to probe
+ * @maxX: max width for modes
+ * @maxY: max height for modes
+ *
+ * Based on the helper callbacks implemented by @connector try to detect all
+ * valid modes.  Modes will first be added to the connector's probed_modes list,
+ * then culled (based on validity and the @maxX, @maxY parameters) and put into
+ * the normal modes list.
+ *
+ * Intended to be use as a generic implementation of the ->fill_modes()
+ * @connector vfunc for drivers that use the crtc helpers for output mode
+ * filtering and detection.
+ *
+ * Returns:
+ * The number of modes found on @connector.
+ */
+int drm_helper_probe_single_connector_modes(struct drm_connector *connector,
+					    uint32_t maxX, uint32_t maxY)
+{
+	struct drm_device *dev = connector->dev;
+	struct drm_display_mode *mode;
+	struct drm_connector_helper_funcs *connector_funcs =
+		connector->helper_private;
+	int count = 0;
+	int mode_flags = 0;
+	bool verbose_prune = true;
+
+	WARN_ON(!mutex_is_locked(&dev->mode_config.mutex));
+
+	DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", connector->base.id,
+			drm_get_connector_name(connector));
+	/* set all modes to the unverified state */
+	list_for_each_entry(mode, &connector->modes, head)
+		mode->status = MODE_UNVERIFIED;
+
+	if (connector->force) {
+		if (connector->force == DRM_FORCE_ON)
+			connector->status = connector_status_connected;
+		else
+			connector->status = connector_status_disconnected;
+		if (connector->funcs->force)
+			connector->funcs->force(connector);
+	} else {
+		connector->status = connector->funcs->detect(connector, true);
+	}
+
+	/* Re-enable polling in case the global poll config changed. */
+	if (drm_kms_helper_poll != dev->mode_config.poll_running)
+		drm_kms_helper_poll_enable(dev);
+
+	dev->mode_config.poll_running = drm_kms_helper_poll;
+
+	if (connector->status == connector_status_disconnected) {
+		DRM_DEBUG_KMS("[CONNECTOR:%d:%s] disconnected\n",
+			connector->base.id, drm_get_connector_name(connector));
+		drm_mode_connector_update_edid_property(connector, NULL);
+		verbose_prune = false;
+		goto prune;
+	}
+
+#ifdef CONFIG_DRM_LOAD_EDID_FIRMWARE
+	count = drm_load_edid_firmware(connector);
+	if (count == 0)
+#endif
+		count = (*connector_funcs->get_modes)(connector);
+
+	if (count == 0 && connector->status == connector_status_connected)
+		count = drm_add_modes_noedid(connector, 1024, 768);
+	if (count == 0)
+		goto prune;
+
+	drm_mode_connector_list_update(connector);
+
+	if (maxX && maxY)
+		drm_mode_validate_size(dev, &connector->modes, maxX, maxY);
+
+	if (connector->interlace_allowed)
+		mode_flags |= DRM_MODE_FLAG_INTERLACE;
+	if (connector->doublescan_allowed)
+		mode_flags |= DRM_MODE_FLAG_DBLSCAN;
+	if (connector->stereo_allowed)
+		mode_flags |= DRM_MODE_FLAG_3D_MASK;
+	drm_mode_validate_flag(connector, mode_flags);
+
+	list_for_each_entry(mode, &connector->modes, head) {
+		if (mode->status == MODE_OK)
+			mode->status = connector_funcs->mode_valid(connector,
+								   mode);
+	}
+
+prune:
+	drm_mode_prune_invalid(dev, &connector->modes, verbose_prune);
+
+	if (list_empty(&connector->modes))
+		return 0;
+
+	list_for_each_entry(mode, &connector->modes, head)
+		mode->vrefresh = drm_mode_vrefresh(mode);
+
+	drm_mode_sort(&connector->modes);
+
+	DRM_DEBUG_KMS("[CONNECTOR:%d:%s] probed modes :\n", connector->base.id,
+			drm_get_connector_name(connector));
+	list_for_each_entry(mode, &connector->modes, head) {
+		drm_mode_set_crtcinfo(mode, CRTC_INTERLACE_HALVE_V);
+		drm_mode_debug_printmodeline(mode);
+	}
+
+	return count;
+}
+EXPORT_SYMBOL(drm_helper_probe_single_connector_modes);
+
+/**
+ * drm_kms_helper_hotplug_event - fire off KMS hotplug events
+ * @dev: drm_device whose connector state changed
+ *
+ * This function fires off the uevent for userspace and also calls the
+ * output_poll_changed function, which is most commonly used to inform the fbdev
+ * emulation code and allow it to update the fbcon output configuration.
+ *
+ * Drivers should call this from their hotplug handling code when a change is
+ * detected. Note that this function does not do any output detection of its
+ * own, like drm_helper_hpd_irq_event() does - this is assumed to be done by the
+ * driver already.
+ *
+ * This function must be called from process context with no mode
+ * setting locks held.
+ */
+void drm_kms_helper_hotplug_event(struct drm_device *dev)
+{
+	/* send a uevent + call fbdev */
+	drm_sysfs_hotplug_event(dev);
+	if (dev->mode_config.funcs->output_poll_changed)
+		dev->mode_config.funcs->output_poll_changed(dev);
+}
+EXPORT_SYMBOL(drm_kms_helper_hotplug_event);
+
+#define DRM_OUTPUT_POLL_PERIOD (10*HZ)
+static void output_poll_execute(struct work_struct *work)
+{
+	struct delayed_work *delayed_work = to_delayed_work(work);
+	struct drm_device *dev = container_of(delayed_work, struct drm_device, mode_config.output_poll_work);
+	struct drm_connector *connector;
+	enum drm_connector_status old_status;
+	bool repoll = false, changed = false;
+
+	if (!drm_kms_helper_poll)
+		return;
+
+	mutex_lock(&dev->mode_config.mutex);
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+
+		/* Ignore forced connectors. */
+		if (connector->force)
+			continue;
+
+		/* Ignore HPD capable connectors and connectors where we don't
+		 * want any hotplug detection at all for polling. */
+		if (!connector->polled || connector->polled == DRM_CONNECTOR_POLL_HPD)
+			continue;
+
+		repoll = true;
+
+		old_status = connector->status;
+		/* if we are connected and don't want to poll for disconnect
+		   skip it */
+		if (old_status == connector_status_connected &&
+		    !(connector->polled & DRM_CONNECTOR_POLL_DISCONNECT))
+			continue;
+
+		connector->status = connector->funcs->detect(connector, false);
+		if (old_status != connector->status) {
+			const char *old, *new;
+
+			old = drm_get_connector_status_name(old_status);
+			new = drm_get_connector_status_name(connector->status);
+
+			DRM_DEBUG_KMS("[CONNECTOR:%d:%s] "
+				      "status updated from %s to %s\n",
+				      connector->base.id,
+				      drm_get_connector_name(connector),
+				      old, new);
+
+			changed = true;
+		}
+	}
+
+	mutex_unlock(&dev->mode_config.mutex);
+
+	if (changed)
+		drm_kms_helper_hotplug_event(dev);
+
+	if (repoll)
+		schedule_delayed_work(delayed_work, DRM_OUTPUT_POLL_PERIOD);
+}
+
+/**
+ * drm_kms_helper_poll_disable - disable output polling
+ * @dev: drm_device
+ *
+ * This function disables the output polling work.
+ *
+ * Drivers can call this helper from their device suspend implementation. It is
+ * not an error to call this even when output polling isn't enabled or arlready
+ * disabled.
+ */
+void drm_kms_helper_poll_disable(struct drm_device *dev)
+{
+	if (!dev->mode_config.poll_enabled)
+		return;
+	cancel_delayed_work_sync(&dev->mode_config.output_poll_work);
+}
+EXPORT_SYMBOL(drm_kms_helper_poll_disable);
+
+/**
+ * drm_kms_helper_poll_enable - re-enable output polling.
+ * @dev: drm_device
+ *
+ * This function re-enables the output polling work.
+ *
+ * Drivers can call this helper from their device resume implementation. It is
+ * an error to call this when the output polling support has not yet been set
+ * up.
+ */
+void drm_kms_helper_poll_enable(struct drm_device *dev)
+{
+	bool poll = false;
+	struct drm_connector *connector;
+
+	if (!dev->mode_config.poll_enabled || !drm_kms_helper_poll)
+		return;
+
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		if (connector->polled & (DRM_CONNECTOR_POLL_CONNECT |
+					 DRM_CONNECTOR_POLL_DISCONNECT))
+			poll = true;
+	}
+
+	if (poll)
+		schedule_delayed_work(&dev->mode_config.output_poll_work, DRM_OUTPUT_POLL_PERIOD);
+}
+EXPORT_SYMBOL(drm_kms_helper_poll_enable);
+
+/**
+ * drm_kms_helper_poll_init - initialize and enable output polling
+ * @dev: drm_device
+ *
+ * This function intializes and then also enables output polling support for
+ * @dev. Drivers which do not have reliable hotplug support in hardware can use
+ * this helper infrastructure to regularly poll such connectors for changes in
+ * their connection state.
+ *
+ * Drivers can control which connectors are polled by setting the
+ * DRM_CONNECTOR_POLL_CONNECT and DRM_CONNECTOR_POLL_DISCONNECT flags. On
+ * connectors where probing live outputs can result in visual distortion drivers
+ * should not set the DRM_CONNECTOR_POLL_DISCONNECT flag to avoid this.
+ * Connectors which have no flag or only DRM_CONNECTOR_POLL_HPD set are
+ * completely ignored by the polling logic.
+ *
+ * Note that a connector can be both polled and probed from the hotplug handler,
+ * in case the hotplug interrupt is known to be unreliable.
+ */
+void drm_kms_helper_poll_init(struct drm_device *dev)
+{
+	INIT_DELAYED_WORK(&dev->mode_config.output_poll_work, output_poll_execute);
+	dev->mode_config.poll_enabled = true;
+
+	drm_kms_helper_poll_enable(dev);
+}
+EXPORT_SYMBOL(drm_kms_helper_poll_init);
+
+/**
+ * drm_kms_helper_poll_fini - disable output polling and clean it up
+ * @dev: drm_device
+ */
+void drm_kms_helper_poll_fini(struct drm_device *dev)
+{
+	drm_kms_helper_poll_disable(dev);
+}
+EXPORT_SYMBOL(drm_kms_helper_poll_fini);
+
+/**
+ * drm_helper_hpd_irq_event - hotplug processing
+ * @dev: drm_device
+ *
+ * Drivers can use this helper function to run a detect cycle on all connectors
+ * which have the DRM_CONNECTOR_POLL_HPD flag set in their &polled member. All
+ * other connectors are ignored, which is useful to avoid reprobing fixed
+ * panels.
+ *
+ * This helper function is useful for drivers which can't or don't track hotplug
+ * interrupts for each connector.
+ *
+ * Drivers which support hotplug interrupts for each connector individually and
+ * which have a more fine-grained detect logic should bypass this code and
+ * directly call drm_kms_helper_hotplug_event() in case the connector state
+ * changed.
+ *
+ * This function must be called from process context with no mode
+ * setting locks held.
+ *
+ * Note that a connector can be both polled and probed from the hotplug handler,
+ * in case the hotplug interrupt is known to be unreliable.
+ */
+bool drm_helper_hpd_irq_event(struct drm_device *dev)
+{
+	struct drm_connector *connector;
+	enum drm_connector_status old_status;
+	bool changed = false;
+
+	if (!dev->mode_config.poll_enabled)
+		return false;
+
+	mutex_lock(&dev->mode_config.mutex);
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+
+		/* Only handle HPD capable connectors. */
+		if (!(connector->polled & DRM_CONNECTOR_POLL_HPD))
+			continue;
+
+		old_status = connector->status;
+
+		connector->status = connector->funcs->detect(connector, false);
+		DRM_DEBUG_KMS("[CONNECTOR:%d:%s] status updated from %s to %s\n",
+			      connector->base.id,
+			      drm_get_connector_name(connector),
+			      drm_get_connector_status_name(old_status),
+			      drm_get_connector_status_name(connector->status));
+		if (old_status != connector->status)
+			changed = true;
+	}
+
+	mutex_unlock(&dev->mode_config.mutex);
+
+	if (changed)
+		drm_kms_helper_hotplug_event(dev);
+
+	return changed;
+}
+EXPORT_SYMBOL(drm_helper_hpd_irq_event);
diff --git a/include/drm/drm_crtc_helper.h b/include/drm/drm_crtc_helper.h
index 0bb34ca2ad2..36a5febac2a 100644
--- a/include/drm/drm_crtc_helper.h
+++ b/include/drm/drm_crtc_helper.h
@@ -125,7 +125,6 @@ struct drm_connector_helper_funcs {
 	struct drm_encoder *(*best_encoder)(struct drm_connector *connector);
 };
 
-extern int drm_helper_probe_single_connector_modes(struct drm_connector *connector, uint32_t maxX, uint32_t maxY);
 extern void drm_helper_disable_unused_functions(struct drm_device *dev);
 extern int drm_crtc_helper_set_config(struct drm_mode_set *set);
 extern bool drm_crtc_helper_set_mode(struct drm_crtc *crtc,
@@ -161,6 +160,11 @@ static inline void drm_connector_helper_add(struct drm_connector *connector,
 }
 
 extern void drm_helper_resume_force_mode(struct drm_device *dev);
+
+/* drm_probe_helper.c */
+extern int drm_helper_probe_single_connector_modes(struct drm_connector
+						   *connector, uint32_t maxX,
+						   uint32_t maxY);
 extern void drm_kms_helper_poll_init(struct drm_device *dev);
 extern void drm_kms_helper_poll_fini(struct drm_device *dev);
 extern bool drm_helper_hpd_irq_event(struct drm_device *dev);
-- 
cgit v1.2.3-70-g09d2


From 20cd477c398decd6a8461ac48609b6c0ab4c7567 Mon Sep 17 00:00:00 2001
From: Alexander Shiyan <shc_work@mail.ru>
Date: Wed, 16 Apr 2014 10:49:20 +0400
Subject: of: add empty of_find_node_by_path() for !OF

Add an empty version of of_find_node_by_path().
This fixes following build error for asoc tree:
sound/soc/fsl/fsl_ssi.c: In function 'fsl_ssi_probe':
sound/soc/fsl/fsl_ssi.c:1471:2: error: implicit declaration of function 'of_find_node_by_path' [-Werror=implicit-function-declaration]
  sprop = of_get_property(of_find_node_by_path("/"), "compatible", NULL);

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Alexander Shiyan <shc_work@mail.ru>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 include/linux/of.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/of.h b/include/linux/of.h
index 919bf211877..3bad8d106e0 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -374,6 +374,11 @@ static inline struct device_node *of_find_matching_node_and_match(
 	return NULL;
 }
 
+static inline struct device_node *of_find_node_by_path(const char *path)
+{
+	return NULL;
+}
+
 static inline struct device_node *of_get_parent(const struct device_node *node)
 {
 	return NULL;
-- 
cgit v1.2.3-70-g09d2


From b14878ccb7fac0242db82720b784ab62c467c0dc Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevic@redhat.com>
Date: Thu, 17 Apr 2014 17:26:50 +0200
Subject: net: sctp: cache auth_enable per endpoint

Currently, it is possible to create an SCTP socket, then switch
auth_enable via sysctl setting to 1 and crash the system on connect:

Oops[#1]:
CPU: 0 PID: 0 Comm: swapper Not tainted 3.14.1-mipsgit-20140415 #1
task: ffffffff8056ce80 ti: ffffffff8055c000 task.ti: ffffffff8055c000
[...]
Call Trace:
[<ffffffff8043c4e8>] sctp_auth_asoc_set_default_hmac+0x68/0x80
[<ffffffff8042b300>] sctp_process_init+0x5e0/0x8a4
[<ffffffff8042188c>] sctp_sf_do_5_1B_init+0x234/0x34c
[<ffffffff804228c8>] sctp_do_sm+0xb4/0x1e8
[<ffffffff80425a08>] sctp_endpoint_bh_rcv+0x1c4/0x214
[<ffffffff8043af68>] sctp_rcv+0x588/0x630
[<ffffffff8043e8e8>] sctp6_rcv+0x10/0x24
[<ffffffff803acb50>] ip6_input+0x2c0/0x440
[<ffffffff8030fc00>] __netif_receive_skb_core+0x4a8/0x564
[<ffffffff80310650>] process_backlog+0xb4/0x18c
[<ffffffff80313cbc>] net_rx_action+0x12c/0x210
[<ffffffff80034254>] __do_softirq+0x17c/0x2ac
[<ffffffff800345e0>] irq_exit+0x54/0xb0
[<ffffffff800075a4>] ret_from_irq+0x0/0x4
[<ffffffff800090ec>] rm7k_wait_irqoff+0x24/0x48
[<ffffffff8005e388>] cpu_startup_entry+0xc0/0x148
[<ffffffff805a88b0>] start_kernel+0x37c/0x398
Code: dd0900b8  000330f8  0126302d <dcc60000> 50c0fff1  0047182a  a48306a0
03e00008  00000000
---[ end trace b530b0551467f2fd ]---
Kernel panic - not syncing: Fatal exception in interrupt

What happens while auth_enable=0 in that case is, that
ep->auth_hmacs is initialized to NULL in sctp_auth_init_hmacs()
when endpoint is being created.

After that point, if an admin switches over to auth_enable=1,
the machine can crash due to NULL pointer dereference during
reception of an INIT chunk. When we enter sctp_process_init()
via sctp_sf_do_5_1B_init() in order to respond to an INIT chunk,
the INIT verification succeeds and while we walk and process
all INIT params via sctp_process_param() we find that
net->sctp.auth_enable is set, therefore do not fall through,
but invoke sctp_auth_asoc_set_default_hmac() instead, and thus,
dereference what we have set to NULL during endpoint
initialization phase.

The fix is to make auth_enable immutable by caching its value
during endpoint initialization, so that its original value is
being carried along until destruction. The bug seems to originate
from the very first days.

Fix in joint work with Daniel Borkmann.

Reported-by: Joshua Kinard <kumba@gentoo.org>
Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Tested-by: Joshua Kinard <kumba@gentoo.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h |  4 +++-
 net/sctp/auth.c            | 17 ++++++---------
 net/sctp/endpointola.c     |  3 ++-
 net/sctp/sm_make_chunk.c   | 32 ++++++++++++++-------------
 net/sctp/sm_statefuns.c    |  6 +++---
 net/sctp/socket.c          | 54 ++++++++++++++++++++++------------------------
 net/sctp/sysctl.c          | 36 ++++++++++++++++++++++++++++++-
 7 files changed, 92 insertions(+), 60 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index d992ca3145f..0dfcc92600e 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1241,6 +1241,7 @@ struct sctp_endpoint {
 	/* SCTP-AUTH: endpoint shared keys */
 	struct list_head endpoint_shared_keys;
 	__u16 active_key_id;
+	__u8  auth_enable;
 };
 
 /* Recover the outter endpoint structure. */
@@ -1269,7 +1270,8 @@ struct sctp_endpoint *sctp_endpoint_is_match(struct sctp_endpoint *,
 int sctp_has_association(struct net *net, const union sctp_addr *laddr,
 			 const union sctp_addr *paddr);
 
-int sctp_verify_init(struct net *net, const struct sctp_association *asoc,
+int sctp_verify_init(struct net *net, const struct sctp_endpoint *ep,
+		     const struct sctp_association *asoc,
 		     sctp_cid_t, sctp_init_chunk_t *peer_init,
 		     struct sctp_chunk *chunk, struct sctp_chunk **err_chunk);
 int sctp_process_init(struct sctp_association *, struct sctp_chunk *chunk,
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 683c7d1b130..0e8529113dc 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -386,14 +386,13 @@ nomem:
  */
 int sctp_auth_asoc_init_active_key(struct sctp_association *asoc, gfp_t gfp)
 {
-	struct net *net = sock_net(asoc->base.sk);
 	struct sctp_auth_bytes	*secret;
 	struct sctp_shared_key *ep_key;
 
 	/* If we don't support AUTH, or peer is not capable
 	 * we don't need to do anything.
 	 */
-	if (!net->sctp.auth_enable || !asoc->peer.auth_capable)
+	if (!asoc->ep->auth_enable || !asoc->peer.auth_capable)
 		return 0;
 
 	/* If the key_id is non-zero and we couldn't find an
@@ -440,16 +439,16 @@ struct sctp_shared_key *sctp_auth_get_shkey(
  */
 int sctp_auth_init_hmacs(struct sctp_endpoint *ep, gfp_t gfp)
 {
-	struct net *net = sock_net(ep->base.sk);
 	struct crypto_hash *tfm = NULL;
 	__u16   id;
 
-	/* if the transforms are already allocted, we are done */
-	if (!net->sctp.auth_enable) {
+	/* If AUTH extension is disabled, we are done */
+	if (!ep->auth_enable) {
 		ep->auth_hmacs = NULL;
 		return 0;
 	}
 
+	/* If the transforms are already allocated, we are done */
 	if (ep->auth_hmacs)
 		return 0;
 
@@ -665,12 +664,10 @@ static int __sctp_auth_cid(sctp_cid_t chunk, struct sctp_chunks_param *param)
 /* Check if peer requested that this chunk is authenticated */
 int sctp_auth_send_cid(sctp_cid_t chunk, const struct sctp_association *asoc)
 {
-	struct net  *net;
 	if (!asoc)
 		return 0;
 
-	net = sock_net(asoc->base.sk);
-	if (!net->sctp.auth_enable || !asoc->peer.auth_capable)
+	if (!asoc->ep->auth_enable || !asoc->peer.auth_capable)
 		return 0;
 
 	return __sctp_auth_cid(chunk, asoc->peer.peer_chunks);
@@ -679,12 +676,10 @@ int sctp_auth_send_cid(sctp_cid_t chunk, const struct sctp_association *asoc)
 /* Check if we requested that peer authenticate this chunk. */
 int sctp_auth_recv_cid(sctp_cid_t chunk, const struct sctp_association *asoc)
 {
-	struct net *net;
 	if (!asoc)
 		return 0;
 
-	net = sock_net(asoc->base.sk);
-	if (!net->sctp.auth_enable)
+	if (!asoc->ep->auth_enable)
 		return 0;
 
 	return __sctp_auth_cid(chunk,
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 8e5fdea0521..3d9f429858d 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -68,7 +68,8 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
 	if (!ep->digest)
 		return NULL;
 
-	if (net->sctp.auth_enable) {
+	ep->auth_enable = net->sctp.auth_enable;
+	if (ep->auth_enable) {
 		/* Allocate space for HMACS and CHUNKS authentication
 		 * variables.  There are arrays that we encode directly
 		 * into parameters to make the rest of the operations easier.
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 3a1767ef320..fee5552ddf9 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -219,6 +219,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
 			     gfp_t gfp, int vparam_len)
 {
 	struct net *net = sock_net(asoc->base.sk);
+	struct sctp_endpoint *ep = asoc->ep;
 	sctp_inithdr_t init;
 	union sctp_params addrs;
 	size_t chunksize;
@@ -278,7 +279,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
 	chunksize += vparam_len;
 
 	/* Account for AUTH related parameters */
-	if (net->sctp.auth_enable) {
+	if (ep->auth_enable) {
 		/* Add random parameter length*/
 		chunksize += sizeof(asoc->c.auth_random);
 
@@ -363,7 +364,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
 	}
 
 	/* Add SCTP-AUTH chunks to the parameter list */
-	if (net->sctp.auth_enable) {
+	if (ep->auth_enable) {
 		sctp_addto_chunk(retval, sizeof(asoc->c.auth_random),
 				 asoc->c.auth_random);
 		if (auth_hmacs)
@@ -2010,7 +2011,7 @@ static void sctp_process_ext_param(struct sctp_association *asoc,
 			/* if the peer reports AUTH, assume that he
 			 * supports AUTH.
 			 */
-			if (net->sctp.auth_enable)
+			if (asoc->ep->auth_enable)
 				asoc->peer.auth_capable = 1;
 			break;
 		case SCTP_CID_ASCONF:
@@ -2102,6 +2103,7 @@ static sctp_ierror_t sctp_process_unk_param(const struct sctp_association *asoc,
  * 	SCTP_IERROR_NO_ERROR - continue with the chunk
  */
 static sctp_ierror_t sctp_verify_param(struct net *net,
+					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					union sctp_params param,
 					sctp_cid_t cid,
@@ -2152,7 +2154,7 @@ static sctp_ierror_t sctp_verify_param(struct net *net,
 		goto fallthrough;
 
 	case SCTP_PARAM_RANDOM:
-		if (!net->sctp.auth_enable)
+		if (!ep->auth_enable)
 			goto fallthrough;
 
 		/* SCTP-AUTH: Secion 6.1
@@ -2169,7 +2171,7 @@ static sctp_ierror_t sctp_verify_param(struct net *net,
 		break;
 
 	case SCTP_PARAM_CHUNKS:
-		if (!net->sctp.auth_enable)
+		if (!ep->auth_enable)
 			goto fallthrough;
 
 		/* SCTP-AUTH: Section 3.2
@@ -2185,7 +2187,7 @@ static sctp_ierror_t sctp_verify_param(struct net *net,
 		break;
 
 	case SCTP_PARAM_HMAC_ALGO:
-		if (!net->sctp.auth_enable)
+		if (!ep->auth_enable)
 			goto fallthrough;
 
 		hmacs = (struct sctp_hmac_algo_param *)param.p;
@@ -2220,10 +2222,9 @@ fallthrough:
 }
 
 /* Verify the INIT packet before we process it.  */
-int sctp_verify_init(struct net *net, const struct sctp_association *asoc,
-		     sctp_cid_t cid,
-		     sctp_init_chunk_t *peer_init,
-		     struct sctp_chunk *chunk,
+int sctp_verify_init(struct net *net, const struct sctp_endpoint *ep,
+		     const struct sctp_association *asoc, sctp_cid_t cid,
+		     sctp_init_chunk_t *peer_init, struct sctp_chunk *chunk,
 		     struct sctp_chunk **errp)
 {
 	union sctp_params param;
@@ -2264,8 +2265,8 @@ int sctp_verify_init(struct net *net, const struct sctp_association *asoc,
 
 	/* Verify all the variable length parameters */
 	sctp_walk_params(param, peer_init, init_hdr.params) {
-
-		result = sctp_verify_param(net, asoc, param, cid, chunk, errp);
+		result = sctp_verify_param(net, ep, asoc, param, cid,
+					   chunk, errp);
 		switch (result) {
 		case SCTP_IERROR_ABORT:
 		case SCTP_IERROR_NOMEM:
@@ -2497,6 +2498,7 @@ static int sctp_process_param(struct sctp_association *asoc,
 	struct sctp_af *af;
 	union sctp_addr_param *addr_param;
 	struct sctp_transport *t;
+	struct sctp_endpoint *ep = asoc->ep;
 
 	/* We maintain all INIT parameters in network byte order all the
 	 * time.  This allows us to not worry about whether the parameters
@@ -2636,7 +2638,7 @@ do_addr_param:
 		goto fall_through;
 
 	case SCTP_PARAM_RANDOM:
-		if (!net->sctp.auth_enable)
+		if (!ep->auth_enable)
 			goto fall_through;
 
 		/* Save peer's random parameter */
@@ -2649,7 +2651,7 @@ do_addr_param:
 		break;
 
 	case SCTP_PARAM_HMAC_ALGO:
-		if (!net->sctp.auth_enable)
+		if (!ep->auth_enable)
 			goto fall_through;
 
 		/* Save peer's HMAC list */
@@ -2665,7 +2667,7 @@ do_addr_param:
 		break;
 
 	case SCTP_PARAM_CHUNKS:
-		if (!net->sctp.auth_enable)
+		if (!ep->auth_enable)
 			goto fall_through;
 
 		asoc->peer.peer_chunks = kmemdup(param.p,
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index ae9fbeba40b..5170a1ff95a 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -357,7 +357,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net,
 
 	/* Verify the INIT chunk before processing it. */
 	err_chunk = NULL;
-	if (!sctp_verify_init(net, asoc, chunk->chunk_hdr->type,
+	if (!sctp_verify_init(net, ep, asoc, chunk->chunk_hdr->type,
 			      (sctp_init_chunk_t *)chunk->chunk_hdr, chunk,
 			      &err_chunk)) {
 		/* This chunk contains fatal error. It is to be discarded.
@@ -524,7 +524,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net,
 
 	/* Verify the INIT chunk before processing it. */
 	err_chunk = NULL;
-	if (!sctp_verify_init(net, asoc, chunk->chunk_hdr->type,
+	if (!sctp_verify_init(net, ep, asoc, chunk->chunk_hdr->type,
 			      (sctp_init_chunk_t *)chunk->chunk_hdr, chunk,
 			      &err_chunk)) {
 
@@ -1430,7 +1430,7 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
 
 	/* Verify the INIT chunk before processing it. */
 	err_chunk = NULL;
-	if (!sctp_verify_init(net, asoc, chunk->chunk_hdr->type,
+	if (!sctp_verify_init(net, ep, asoc, chunk->chunk_hdr->type,
 			      (sctp_init_chunk_t *)chunk->chunk_hdr, chunk,
 			      &err_chunk)) {
 		/* This chunk contains fatal error. It is to be discarded.
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index ff20e2dbbbc..fee06b99a4d 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3321,10 +3321,10 @@ static int sctp_setsockopt_auth_chunk(struct sock *sk,
 				      char __user *optval,
 				      unsigned int optlen)
 {
-	struct net *net = sock_net(sk);
+	struct sctp_endpoint *ep = sctp_sk(sk)->ep;
 	struct sctp_authchunk val;
 
-	if (!net->sctp.auth_enable)
+	if (!ep->auth_enable)
 		return -EACCES;
 
 	if (optlen != sizeof(struct sctp_authchunk))
@@ -3341,7 +3341,7 @@ static int sctp_setsockopt_auth_chunk(struct sock *sk,
 	}
 
 	/* add this chunk id to the endpoint */
-	return sctp_auth_ep_add_chunkid(sctp_sk(sk)->ep, val.sauth_chunk);
+	return sctp_auth_ep_add_chunkid(ep, val.sauth_chunk);
 }
 
 /*
@@ -3354,12 +3354,12 @@ static int sctp_setsockopt_hmac_ident(struct sock *sk,
 				      char __user *optval,
 				      unsigned int optlen)
 {
-	struct net *net = sock_net(sk);
+	struct sctp_endpoint *ep = sctp_sk(sk)->ep;
 	struct sctp_hmacalgo *hmacs;
 	u32 idents;
 	int err;
 
-	if (!net->sctp.auth_enable)
+	if (!ep->auth_enable)
 		return -EACCES;
 
 	if (optlen < sizeof(struct sctp_hmacalgo))
@@ -3376,7 +3376,7 @@ static int sctp_setsockopt_hmac_ident(struct sock *sk,
 		goto out;
 	}
 
-	err = sctp_auth_ep_set_hmacs(sctp_sk(sk)->ep, hmacs);
+	err = sctp_auth_ep_set_hmacs(ep, hmacs);
 out:
 	kfree(hmacs);
 	return err;
@@ -3392,12 +3392,12 @@ static int sctp_setsockopt_auth_key(struct sock *sk,
 				    char __user *optval,
 				    unsigned int optlen)
 {
-	struct net *net = sock_net(sk);
+	struct sctp_endpoint *ep = sctp_sk(sk)->ep;
 	struct sctp_authkey *authkey;
 	struct sctp_association *asoc;
 	int ret;
 
-	if (!net->sctp.auth_enable)
+	if (!ep->auth_enable)
 		return -EACCES;
 
 	if (optlen <= sizeof(struct sctp_authkey))
@@ -3418,7 +3418,7 @@ static int sctp_setsockopt_auth_key(struct sock *sk,
 		goto out;
 	}
 
-	ret = sctp_auth_set_key(sctp_sk(sk)->ep, asoc, authkey);
+	ret = sctp_auth_set_key(ep, asoc, authkey);
 out:
 	kzfree(authkey);
 	return ret;
@@ -3434,11 +3434,11 @@ static int sctp_setsockopt_active_key(struct sock *sk,
 				      char __user *optval,
 				      unsigned int optlen)
 {
-	struct net *net = sock_net(sk);
+	struct sctp_endpoint *ep = sctp_sk(sk)->ep;
 	struct sctp_authkeyid val;
 	struct sctp_association *asoc;
 
-	if (!net->sctp.auth_enable)
+	if (!ep->auth_enable)
 		return -EACCES;
 
 	if (optlen != sizeof(struct sctp_authkeyid))
@@ -3450,8 +3450,7 @@ static int sctp_setsockopt_active_key(struct sock *sk,
 	if (!asoc && val.scact_assoc_id && sctp_style(sk, UDP))
 		return -EINVAL;
 
-	return sctp_auth_set_active_key(sctp_sk(sk)->ep, asoc,
-					val.scact_keynumber);
+	return sctp_auth_set_active_key(ep, asoc, val.scact_keynumber);
 }
 
 /*
@@ -3463,11 +3462,11 @@ static int sctp_setsockopt_del_key(struct sock *sk,
 				   char __user *optval,
 				   unsigned int optlen)
 {
-	struct net *net = sock_net(sk);
+	struct sctp_endpoint *ep = sctp_sk(sk)->ep;
 	struct sctp_authkeyid val;
 	struct sctp_association *asoc;
 
-	if (!net->sctp.auth_enable)
+	if (!ep->auth_enable)
 		return -EACCES;
 
 	if (optlen != sizeof(struct sctp_authkeyid))
@@ -3479,8 +3478,7 @@ static int sctp_setsockopt_del_key(struct sock *sk,
 	if (!asoc && val.scact_assoc_id && sctp_style(sk, UDP))
 		return -EINVAL;
 
-	return sctp_auth_del_key_id(sctp_sk(sk)->ep, asoc,
-				    val.scact_keynumber);
+	return sctp_auth_del_key_id(ep, asoc, val.scact_keynumber);
 
 }
 
@@ -5387,16 +5385,16 @@ static int sctp_getsockopt_maxburst(struct sock *sk, int len,
 static int sctp_getsockopt_hmac_ident(struct sock *sk, int len,
 				    char __user *optval, int __user *optlen)
 {
-	struct net *net = sock_net(sk);
+	struct sctp_endpoint *ep = sctp_sk(sk)->ep;
 	struct sctp_hmacalgo  __user *p = (void __user *)optval;
 	struct sctp_hmac_algo_param *hmacs;
 	__u16 data_len = 0;
 	u32 num_idents;
 
-	if (!net->sctp.auth_enable)
+	if (!ep->auth_enable)
 		return -EACCES;
 
-	hmacs = sctp_sk(sk)->ep->auth_hmacs_list;
+	hmacs = ep->auth_hmacs_list;
 	data_len = ntohs(hmacs->param_hdr.length) - sizeof(sctp_paramhdr_t);
 
 	if (len < sizeof(struct sctp_hmacalgo) + data_len)
@@ -5417,11 +5415,11 @@ static int sctp_getsockopt_hmac_ident(struct sock *sk, int len,
 static int sctp_getsockopt_active_key(struct sock *sk, int len,
 				    char __user *optval, int __user *optlen)
 {
-	struct net *net = sock_net(sk);
+	struct sctp_endpoint *ep = sctp_sk(sk)->ep;
 	struct sctp_authkeyid val;
 	struct sctp_association *asoc;
 
-	if (!net->sctp.auth_enable)
+	if (!ep->auth_enable)
 		return -EACCES;
 
 	if (len < sizeof(struct sctp_authkeyid))
@@ -5436,7 +5434,7 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len,
 	if (asoc)
 		val.scact_keynumber = asoc->active_key_id;
 	else
-		val.scact_keynumber = sctp_sk(sk)->ep->active_key_id;
+		val.scact_keynumber = ep->active_key_id;
 
 	len = sizeof(struct sctp_authkeyid);
 	if (put_user(len, optlen))
@@ -5450,7 +5448,7 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len,
 static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len,
 				    char __user *optval, int __user *optlen)
 {
-	struct net *net = sock_net(sk);
+	struct sctp_endpoint *ep = sctp_sk(sk)->ep;
 	struct sctp_authchunks __user *p = (void __user *)optval;
 	struct sctp_authchunks val;
 	struct sctp_association *asoc;
@@ -5458,7 +5456,7 @@ static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len,
 	u32    num_chunks = 0;
 	char __user *to;
 
-	if (!net->sctp.auth_enable)
+	if (!ep->auth_enable)
 		return -EACCES;
 
 	if (len < sizeof(struct sctp_authchunks))
@@ -5495,7 +5493,7 @@ num:
 static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len,
 				    char __user *optval, int __user *optlen)
 {
-	struct net *net = sock_net(sk);
+	struct sctp_endpoint *ep = sctp_sk(sk)->ep;
 	struct sctp_authchunks __user *p = (void __user *)optval;
 	struct sctp_authchunks val;
 	struct sctp_association *asoc;
@@ -5503,7 +5501,7 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len,
 	u32    num_chunks = 0;
 	char __user *to;
 
-	if (!net->sctp.auth_enable)
+	if (!ep->auth_enable)
 		return -EACCES;
 
 	if (len < sizeof(struct sctp_authchunks))
@@ -5520,7 +5518,7 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len,
 	if (asoc)
 		ch = (struct sctp_chunks_param *)asoc->c.auth_chunks;
 	else
-		ch = sctp_sk(sk)->ep->auth_chunk_list;
+		ch = ep->auth_chunk_list;
 
 	if (!ch)
 		goto num;
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 35c8923b555..c82fdc1eab7 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -64,6 +64,9 @@ static int proc_sctp_do_rto_min(struct ctl_table *ctl, int write,
 static int proc_sctp_do_rto_max(struct ctl_table *ctl, int write,
 				void __user *buffer, size_t *lenp,
 				loff_t *ppos);
+static int proc_sctp_do_auth(struct ctl_table *ctl, int write,
+			     void __user *buffer, size_t *lenp,
+			     loff_t *ppos);
 
 static struct ctl_table sctp_table[] = {
 	{
@@ -266,7 +269,7 @@ static struct ctl_table sctp_net_table[] = {
 		.data		= &init_net.sctp.auth_enable,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_sctp_do_auth,
 	},
 	{
 		.procname	= "addr_scope_policy",
@@ -400,6 +403,37 @@ static int proc_sctp_do_rto_max(struct ctl_table *ctl, int write,
 	return ret;
 }
 
+static int proc_sctp_do_auth(struct ctl_table *ctl, int write,
+			     void __user *buffer, size_t *lenp,
+			     loff_t *ppos)
+{
+	struct net *net = current->nsproxy->net_ns;
+	struct ctl_table tbl;
+	int new_value, ret;
+
+	memset(&tbl, 0, sizeof(struct ctl_table));
+	tbl.maxlen = sizeof(unsigned int);
+
+	if (write)
+		tbl.data = &new_value;
+	else
+		tbl.data = &net->sctp.auth_enable;
+
+	ret = proc_dointvec(&tbl, write, buffer, lenp, ppos);
+
+	if (write) {
+		struct sock *sk = net->sctp.ctl_sock;
+
+		net->sctp.auth_enable = new_value;
+		/* Update the value in the control socket */
+		lock_sock(sk);
+		sctp_sk(sk)->ep->auth_enable = new_value;
+		release_sock(sk);
+	}
+
+	return ret;
+}
+
 int sctp_sysctl_net_register(struct net *net)
 {
 	struct ctl_table *table = sctp_net_table;
-- 
cgit v1.2.3-70-g09d2


From 9cc236827fde5e254fd995a0023c05c5ee3a3ba6 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Fri, 18 Apr 2014 15:07:16 -0700
Subject: Shiraz has moved

shiraz.hashim@st.com email-id doesn't exist anymore as he has left the
company.  Replace ST's id with shiraz.linux.kernel@gmail.com.

It also updates .mailmap file to fix address for 'git shortlog'.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Cc: Shiraz Hashim <shiraz.linux.kernel@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 .mailmap                           | 1 +
 MAINTAINERS                        | 2 +-
 arch/arm/boot/dts/spear320-hmi.dts | 2 +-
 arch/arm/mach-spear/headsmp.S      | 2 +-
 arch/arm/mach-spear/platsmp.c      | 2 +-
 arch/arm/mach-spear/time.c         | 2 +-
 drivers/gpio/gpio-spear-spics.c    | 4 ++--
 drivers/irqchip/spear-shirq.c      | 2 +-
 drivers/mtd/devices/spear_smi.c    | 4 ++--
 drivers/pwm/pwm-spear.c            | 4 ++--
 include/linux/mtd/spear_smi.h      | 2 +-
 11 files changed, 14 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/.mailmap b/.mailmap
index 658003aa944..df1baba43a6 100644
--- a/.mailmap
+++ b/.mailmap
@@ -99,6 +99,7 @@ Sachin P Sant <ssant@in.ibm.com>
 Sam Ravnborg <sam@mars.ravnborg.org>
 Sascha Hauer <s.hauer@pengutronix.de>
 S.Çağlar Onur <caglar@pardus.org.tr>
+Shiraz Hashim <shiraz.linux.kernel@gmail.com> <shiraz.hashim@st.com>
 Simon Kelley <simon@thekelleys.org.uk>
 Stéphane Witzmann <stephane.witzmann@ubpmes.univ-bpclermont.fr>
 Stephen Hemminger <shemminger@osdl.org>
diff --git a/MAINTAINERS b/MAINTAINERS
index 80399fff805..e67ea244204 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8315,7 +8315,7 @@ F:	include/linux/compiler.h
 
 SPEAR PLATFORM SUPPORT
 M:	Viresh Kumar <viresh.linux@gmail.com>
-M:	Shiraz Hashim <shiraz.hashim@st.com>
+M:	Shiraz Hashim <shiraz.linux.kernel@gmail.com>
 L:	spear-devel@list.st.com
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 W:	http://www.st.com/spear
diff --git a/arch/arm/boot/dts/spear320-hmi.dts b/arch/arm/boot/dts/spear320-hmi.dts
index 3075d2d3a8b..0aa6fef5ce2 100644
--- a/arch/arm/boot/dts/spear320-hmi.dts
+++ b/arch/arm/boot/dts/spear320-hmi.dts
@@ -1,7 +1,7 @@
 /*
  * DTS file for SPEAr320 Evaluation Baord
  *
- * Copyright 2012 Shiraz Hashim <shiraz.hashim@st.com>
+ * Copyright 2012 Shiraz Hashim <shiraz.linux.kernel@gmail.com>
  *
  * The code contained herein is licensed under the GNU General Public
  * License. You may obtain a copy of the GNU General Public License
diff --git a/arch/arm/mach-spear/headsmp.S b/arch/arm/mach-spear/headsmp.S
index ed85473a047..c52192dc3d9 100644
--- a/arch/arm/mach-spear/headsmp.S
+++ b/arch/arm/mach-spear/headsmp.S
@@ -3,7 +3,7 @@
  *
  * Picked from realview
  * Copyright (c) 2012 ST Microelectronics Limited
- * Shiraz Hashim <shiraz.hashim@st.com>
+ * Shiraz Hashim <shiraz.linux.kernel@gmail.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
diff --git a/arch/arm/mach-spear/platsmp.c b/arch/arm/mach-spear/platsmp.c
index 5c4a19887b2..c19751fff2c 100644
--- a/arch/arm/mach-spear/platsmp.c
+++ b/arch/arm/mach-spear/platsmp.c
@@ -4,7 +4,7 @@
  * based upon linux/arch/arm/mach-realview/platsmp.c
  *
  * Copyright (C) 2012 ST Microelectronics Ltd.
- * Shiraz Hashim <shiraz.hashim@st.com>
+ * Shiraz Hashim <shiraz.linux.kernel@gmail.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
diff --git a/arch/arm/mach-spear/time.c b/arch/arm/mach-spear/time.c
index 218ba5b67d9..64790353951 100644
--- a/arch/arm/mach-spear/time.c
+++ b/arch/arm/mach-spear/time.c
@@ -2,7 +2,7 @@
  * arch/arm/plat-spear/time.c
  *
  * Copyright (C) 2010 ST Microelectronics
- * Shiraz Hashim<shiraz.hashim@st.com>
+ * Shiraz Hashim<shiraz.linux.kernel@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/drivers/gpio/gpio-spear-spics.c b/drivers/gpio/gpio-spear-spics.c
index e9a0415834e..30bcc539425 100644
--- a/drivers/gpio/gpio-spear-spics.c
+++ b/drivers/gpio/gpio-spear-spics.c
@@ -2,7 +2,7 @@
  * SPEAr platform SPI chipselect abstraction over gpiolib
  *
  * Copyright (C) 2012 ST Microelectronics
- * Shiraz Hashim <shiraz.hashim@st.com>
+ * Shiraz Hashim <shiraz.linux.kernel@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
@@ -205,6 +205,6 @@ static int __init spics_gpio_init(void)
 }
 subsys_initcall(spics_gpio_init);
 
-MODULE_AUTHOR("Shiraz Hashim <shiraz.hashim@st.com>");
+MODULE_AUTHOR("Shiraz Hashim <shiraz.linux.kernel@gmail.com>");
 MODULE_DESCRIPTION("ST Microlectronics SPEAr SPI Chip Select Abstraction");
 MODULE_LICENSE("GPL");
diff --git a/drivers/irqchip/spear-shirq.c b/drivers/irqchip/spear-shirq.c
index 8527743b5ce..3fdda3a4026 100644
--- a/drivers/irqchip/spear-shirq.c
+++ b/drivers/irqchip/spear-shirq.c
@@ -5,7 +5,7 @@
  * Viresh Kumar <viresh.linux@gmail.com>
  *
  * Copyright (C) 2012 ST Microelectronics
- * Shiraz Hashim <shiraz.hashim@st.com>
+ * Shiraz Hashim <shiraz.linux.kernel@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/drivers/mtd/devices/spear_smi.c b/drivers/mtd/devices/spear_smi.c
index 363da96e689..c4176b0f382 100644
--- a/drivers/mtd/devices/spear_smi.c
+++ b/drivers/mtd/devices/spear_smi.c
@@ -6,7 +6,7 @@
  *
  * Copyright © 2010 STMicroelectronics.
  * Ashish Priyadarshi
- * Shiraz Hashim <shiraz.hashim@st.com>
+ * Shiraz Hashim <shiraz.linux.kernel@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
@@ -1089,5 +1089,5 @@ static struct platform_driver spear_smi_driver = {
 module_platform_driver(spear_smi_driver);
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Ashish Priyadarshi, Shiraz Hashim <shiraz.hashim@st.com>");
+MODULE_AUTHOR("Ashish Priyadarshi, Shiraz Hashim <shiraz.linux.kernel@gmail.com>");
 MODULE_DESCRIPTION("MTD SMI driver for serial nor flash chips");
diff --git a/drivers/pwm/pwm-spear.c b/drivers/pwm/pwm-spear.c
index 8ad26b8bf41..cb2d4f0f971 100644
--- a/drivers/pwm/pwm-spear.c
+++ b/drivers/pwm/pwm-spear.c
@@ -2,7 +2,7 @@
  * ST Microelectronics SPEAr Pulse Width Modulator driver
  *
  * Copyright (C) 2012 ST Microelectronics
- * Shiraz Hashim <shiraz.hashim@st.com>
+ * Shiraz Hashim <shiraz.linux.kernel@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
@@ -264,6 +264,6 @@ static struct platform_driver spear_pwm_driver = {
 module_platform_driver(spear_pwm_driver);
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Shiraz Hashim <shiraz.hashim@st.com>");
+MODULE_AUTHOR("Shiraz Hashim <shiraz.linux.kernel@gmail.com>");
 MODULE_AUTHOR("Viresh Kumar <viresh.kumar@linaro.com>");
 MODULE_ALIAS("platform:spear-pwm");
diff --git a/include/linux/mtd/spear_smi.h b/include/linux/mtd/spear_smi.h
index 8ae1726044c..581603ac127 100644
--- a/include/linux/mtd/spear_smi.h
+++ b/include/linux/mtd/spear_smi.h
@@ -1,6 +1,6 @@
 /*
  * Copyright © 2010 ST Microelectronics
- * Shiraz Hashim <shiraz.hashim@st.com>
+ * Shiraz Hashim <shiraz.linux.kernel@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
-- 
cgit v1.2.3-70-g09d2


From 8b32201de1f87878ace971bfdc2846a4f3a5bb2b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 18 Apr 2014 15:07:17 -0700
Subject: wait: explain the shadowing and type inconsistencies

Stick in a comment before someone else tries to fix the sparse warning
this generates.

Suggested-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/n/tip-o2ro6f3vkxklni0bc8f7m68s@git.kernel.org
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/wait.h | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index e7d9d9ed14f..bd68819f081 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -191,11 +191,23 @@ wait_queue_head_t *bit_waitqueue(void *, int);
 	(!__builtin_constant_p(state) ||				\
 		state == TASK_INTERRUPTIBLE || state == TASK_KILLABLE)	\
 
+/*
+ * The below macro ___wait_event() has an explicit shadow of the __ret
+ * variable when used from the wait_event_*() macros.
+ *
+ * This is so that both can use the ___wait_cond_timeout() construct
+ * to wrap the condition.
+ *
+ * The type inconsistency of the wait_event_*() __ret variable is also
+ * on purpose; we use long where we can return timeout values and int
+ * otherwise.
+ */
+
 #define ___wait_event(wq, condition, state, exclusive, ret, cmd)	\
 ({									\
 	__label__ __out;						\
 	wait_queue_t __wait;						\
-	long __ret = ret;						\
+	long __ret = ret;	/* explicit shadow */			\
 									\
 	INIT_LIST_HEAD(&__wait.task_list);				\
 	if (exclusive)							\
-- 
cgit v1.2.3-70-g09d2


From 29c7787075c92ca8af353acd5301481e6f37082f Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Fri, 18 Apr 2014 15:07:21 -0700
Subject: mm: use paravirt friendly ops for NUMA hinting ptes

David Vrabel identified a regression when using automatic NUMA balancing
under Xen whereby page table entries were getting corrupted due to the
use of native PTE operations.  Quoting him

	Xen PV guest page tables require that their entries use machine
	addresses if the preset bit (_PAGE_PRESENT) is set, and (for
	successful migration) non-present PTEs must use pseudo-physical
	addresses.  This is because on migration MFNs in present PTEs are
	translated to PFNs (canonicalised) so they may be translated back
	to the new MFN in the destination domain (uncanonicalised).

	pte_mknonnuma(), pmd_mknonnuma(), pte_mknuma() and pmd_mknuma()
	set and clear the _PAGE_PRESENT bit using pte_set_flags(),
	pte_clear_flags(), etc.

	In a Xen PV guest, these functions must translate MFNs to PFNs
	when clearing _PAGE_PRESENT and translate PFNs to MFNs when setting
	_PAGE_PRESENT.

His suggested fix converted p[te|md]_[set|clear]_flags to using
paravirt-friendly ops but this is overkill.  He suggested an alternative
of using p[te|md]_modify in the NUMA page table operations but this is
does more work than necessary and would require looking up a VMA for
protections.

This patch modifies the NUMA page table operations to use paravirt
friendly operations to set/clear the flags of interest.  Unfortunately
this will take a performance hit when updating the PTEs on
CONFIG_PARAVIRT but I do not see a way around it that does not break
Xen.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Acked-by: David Vrabel <david.vrabel@citrix.com>
Tested-by: David Vrabel <david.vrabel@citrix.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Anvin <hpa@zytor.com>
Cc: Fengguang Wu <fengguang.wu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Steven Noonan <steven@uplinklabs.net>
Cc: Rik van Riel <riel@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/pgtable.h | 31 +++++++++++++++++++++++--------
 1 file changed, 23 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 1ec08c198b6..a8015a7a55b 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -693,24 +693,35 @@ static inline int pmd_numa(pmd_t pmd)
 #ifndef pte_mknonnuma
 static inline pte_t pte_mknonnuma(pte_t pte)
 {
-	pte = pte_clear_flags(pte, _PAGE_NUMA);
-	return pte_set_flags(pte, _PAGE_PRESENT|_PAGE_ACCESSED);
+	pteval_t val = pte_val(pte);
+
+	val &= ~_PAGE_NUMA;
+	val |= (_PAGE_PRESENT|_PAGE_ACCESSED);
+	return __pte(val);
 }
 #endif
 
 #ifndef pmd_mknonnuma
 static inline pmd_t pmd_mknonnuma(pmd_t pmd)
 {
-	pmd = pmd_clear_flags(pmd, _PAGE_NUMA);
-	return pmd_set_flags(pmd, _PAGE_PRESENT|_PAGE_ACCESSED);
+	pmdval_t val = pmd_val(pmd);
+
+	val &= ~_PAGE_NUMA;
+	val |= (_PAGE_PRESENT|_PAGE_ACCESSED);
+
+	return __pmd(val);
 }
 #endif
 
 #ifndef pte_mknuma
 static inline pte_t pte_mknuma(pte_t pte)
 {
-	pte = pte_set_flags(pte, _PAGE_NUMA);
-	return pte_clear_flags(pte, _PAGE_PRESENT);
+	pteval_t val = pte_val(pte);
+
+	val &= ~_PAGE_PRESENT;
+	val |= _PAGE_NUMA;
+
+	return __pte(val);
 }
 #endif
 
@@ -729,8 +740,12 @@ static inline void ptep_set_numa(struct mm_struct *mm, unsigned long addr,
 #ifndef pmd_mknuma
 static inline pmd_t pmd_mknuma(pmd_t pmd)
 {
-	pmd = pmd_set_flags(pmd, _PAGE_NUMA);
-	return pmd_clear_flags(pmd, _PAGE_PRESENT);
+	pmdval_t val = pmd_val(pmd);
+
+	val &= ~_PAGE_PRESENT;
+	val |= _PAGE_NUMA;
+
+	return __pmd(val);
 }
 #endif
 
-- 
cgit v1.2.3-70-g09d2


From 7d568a8383bbb9c1f5167781075906acb2bb1550 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 9 Apr 2014 11:07:30 -0400
Subject: kernfs: implement kernfs_root->supers list

Currently, there's no way to find out which super_blocks are
associated with a given kernfs_root.  Let's implement it - the planned
inotify extension to kernfs_notify() needs it.

Make kernfs_super_info point back to the super_block and chain it at
kernfs_root->supers.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/kernfs/dir.c             |  1 +
 fs/kernfs/kernfs-internal.h |  5 +++++
 fs/kernfs/mount.c           | 11 +++++++++++
 include/linux/kernfs.h      |  4 ++++
 4 files changed, 21 insertions(+)

(limited to 'include')

diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 78f3403300a..43aa97988c3 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -711,6 +711,7 @@ struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
 		return ERR_PTR(-ENOMEM);
 
 	ida_init(&root->ino_ida);
+	INIT_LIST_HEAD(&root->supers);
 
 	kn = __kernfs_new_node(root, "", S_IFDIR | S_IRUGO | S_IXUGO,
 			       KERNFS_DIR);
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
index 8be13b2a079..dc84a3ef9ca 100644
--- a/fs/kernfs/kernfs-internal.h
+++ b/fs/kernfs/kernfs-internal.h
@@ -49,6 +49,8 @@ static inline struct kernfs_root *kernfs_root(struct kernfs_node *kn)
  * mount.c
  */
 struct kernfs_super_info {
+	struct super_block	*sb;
+
 	/*
 	 * The root associated with this super_block.  Each super_block is
 	 * identified by the root and ns it's associated with.
@@ -62,6 +64,9 @@ struct kernfs_super_info {
 	 * an array and compare kernfs_node tag against every entry.
 	 */
 	const void		*ns;
+
+	/* anchored at kernfs_root->supers, protected by kernfs_mutex */
+	struct list_head	node;
 };
 #define kernfs_info(SB) ((struct kernfs_super_info *)(SB->s_fs_info))
 
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index 6a5f04ac870..f25a7c0c3cd 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -68,6 +68,7 @@ static int kernfs_fill_super(struct super_block *sb)
 	struct inode *inode;
 	struct dentry *root;
 
+	info->sb = sb;
 	sb->s_blocksize = PAGE_CACHE_SIZE;
 	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
 	sb->s_magic = SYSFS_MAGIC;
@@ -166,12 +167,18 @@ struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags,
 		*new_sb_created = !sb->s_root;
 
 	if (!sb->s_root) {
+		struct kernfs_super_info *info = kernfs_info(sb);
+
 		error = kernfs_fill_super(sb);
 		if (error) {
 			deactivate_locked_super(sb);
 			return ERR_PTR(error);
 		}
 		sb->s_flags |= MS_ACTIVE;
+
+		mutex_lock(&kernfs_mutex);
+		list_add(&info->node, &root->supers);
+		mutex_unlock(&kernfs_mutex);
 	}
 
 	return dget(sb->s_root);
@@ -190,6 +197,10 @@ void kernfs_kill_sb(struct super_block *sb)
 	struct kernfs_super_info *info = kernfs_info(sb);
 	struct kernfs_node *root_kn = sb->s_root->d_fsdata;
 
+	mutex_lock(&kernfs_mutex);
+	list_del(&info->node);
+	mutex_unlock(&kernfs_mutex);
+
 	/*
 	 * Remove the superblock from fs_supers/s_instances
 	 * so we can't find it, before freeing kernfs_super_info.
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index b0122dc6f96..589318b73e6 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -144,6 +144,10 @@ struct kernfs_root {
 	/* private fields, do not use outside kernfs proper */
 	struct ida		ino_ida;
 	struct kernfs_syscall_ops *syscall_ops;
+
+	/* list of kernfs_super_info of this root, protected by kernfs_mutex */
+	struct list_head	supers;
+
 	wait_queue_head_t	deactivate_waitq;
 };
 
-- 
cgit v1.2.3-70-g09d2


From 86d56134f1b67d0c18025ba5cade95c048ed528d Mon Sep 17 00:00:00 2001
From: Michael Marineau <mike@marineau.org>
Date: Thu, 10 Apr 2014 14:09:31 -0700
Subject: kobject: Make support for uevent_helper optional.

Support for uevent_helper, aka hotplug, is not required on many systems
these days but it can still be enabled via sysfs or sysctl.

Reported-by: Darren Shepherd <darren.s.shepherd@gmail.com>
Signed-off-by: Michael Marineau <mike@marineau.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/Kconfig    | 17 +++++++++++------
 include/linux/kobject.h |  2 ++
 kernel/ksysfs.c         |  5 ++++-
 kernel/sysctl.c         |  4 ++--
 lib/kobject_uevent.c    |  6 ++++++
 5 files changed, 25 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 8fa8deab644..4b7b4522b64 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -1,10 +1,10 @@
 menu "Generic Driver Options"
 
-config UEVENT_HELPER_PATH
-	string "path to uevent helper"
-	default ""
+config UEVENT_HELPER
+	bool "Support for uevent helper"
+	default y
 	help
-	  Path to uevent helper program forked by the kernel for
+	  The uevent helper program is forked by the kernel for
 	  every uevent.
 	  Before the switch to the netlink-based uevent source, this was
 	  used to hook hotplug scripts into kernel device events. It
@@ -15,8 +15,13 @@ config UEVENT_HELPER_PATH
 	  that it creates a high system load, or on smaller systems
 	  it is known to create out-of-memory situations during bootup.
 
-	  To disable user space helper program execution at early boot
-	  time specify an empty string here. This setting can be altered
+config UEVENT_HELPER_PATH
+	string "path to uevent helper"
+	depends on UEVENT_HELPER
+	default ""
+	help
+	  To disable user space helper program execution at by default
+	  specify an empty string here. This setting can still be altered
 	  via /proc/sys/kernel/hotplug or via /sys/kernel/uevent_helper
 	  later at runtime.
 
diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index f896a33e834..2d61b909f41 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -32,8 +32,10 @@
 #define UEVENT_NUM_ENVP			32	/* number of env pointers */
 #define UEVENT_BUFFER_SIZE		2048	/* buffer for the variables */
 
+#ifdef CONFIG_UEVENT_HELPER
 /* path to the userspace helper executed on an event */
 extern char uevent_helper[];
+#endif
 
 /* counter to tag the uevent, read only except for the kobject core */
 extern u64 uevent_seqnum;
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index 2495a9b14ac..6683ccef9ff 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -37,6 +37,7 @@ static ssize_t uevent_seqnum_show(struct kobject *kobj,
 }
 KERNEL_ATTR_RO(uevent_seqnum);
 
+#ifdef CONFIG_UEVENT_HELPER
 /* uevent helper program, used during early boot */
 static ssize_t uevent_helper_show(struct kobject *kobj,
 				  struct kobj_attribute *attr, char *buf)
@@ -56,7 +57,7 @@ static ssize_t uevent_helper_store(struct kobject *kobj,
 	return count;
 }
 KERNEL_ATTR_RW(uevent_helper);
-
+#endif
 
 #ifdef CONFIG_PROFILING
 static ssize_t profiling_show(struct kobject *kobj,
@@ -189,7 +190,9 @@ EXPORT_SYMBOL_GPL(kernel_kobj);
 static struct attribute * kernel_attrs[] = {
 	&fscaps_attr.attr,
 	&uevent_seqnum_attr.attr,
+#ifdef CONFIG_UEVENT_HELPER
 	&uevent_helper_attr.attr,
+#endif
 #ifdef CONFIG_PROFILING
 	&profiling_attr.attr,
 #endif
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 74f5b580fe3..bc966a8ffc3 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -643,7 +643,7 @@ static struct ctl_table kern_table[] = {
 		.extra2		= &one,
 	},
 #endif
-
+#ifdef CONFIG_UEVENT_HELPER
 	{
 		.procname	= "hotplug",
 		.data		= &uevent_helper,
@@ -651,7 +651,7 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dostring,
 	},
-
+#endif
 #ifdef CONFIG_CHR_DEV_SG
 	{
 		.procname	= "sg-big-buff",
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index 4e3bd71bd94..9ebf9e20de5 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -29,7 +29,9 @@
 
 
 u64 uevent_seqnum;
+#ifdef CONFIG_UEVENT_HELPER
 char uevent_helper[UEVENT_HELPER_PATH_LEN] = CONFIG_UEVENT_HELPER_PATH;
+#endif
 #ifdef CONFIG_NET
 struct uevent_sock {
 	struct list_head list;
@@ -109,6 +111,7 @@ static int kobj_bcast_filter(struct sock *dsk, struct sk_buff *skb, void *data)
 }
 #endif
 
+#ifdef CONFIG_UEVENT_HELPER
 static int kobj_usermode_filter(struct kobject *kobj)
 {
 	const struct kobj_ns_type_operations *ops;
@@ -147,6 +150,7 @@ static void cleanup_uevent_env(struct subprocess_info *info)
 {
 	kfree(info->data);
 }
+#endif
 
 /**
  * kobject_uevent_env - send an uevent with environmental data
@@ -323,6 +327,7 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
 #endif
 	mutex_unlock(&uevent_sock_mutex);
 
+#ifdef CONFIG_UEVENT_HELPER
 	/* call uevent_helper, usually only enabled during early boot */
 	if (uevent_helper[0] && !kobj_usermode_filter(kobj)) {
 		struct subprocess_info *info;
@@ -347,6 +352,7 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
 			env = NULL;	/* freed by cleanup_uevent_env */
 		}
 	}
+#endif
 
 exit:
 	kfree(devpath);
-- 
cgit v1.2.3-70-g09d2