From 0c06a5d4b13cd66c833805a0d1db76b977944aac Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 10 Sep 2013 00:54:17 +0200
Subject: arm: Fix build error with context tracking calls

ad65782fba50 (context_tracking: Optimize main APIs off case
with static key) converted context tracking main APIs to inline
function and left ARM asm callers behind.

This can be easily fixed by making ARM calling the post static
keys context tracking function. We just need to replicate the
static key checks there. We'll remove these later when ARM will
support the context tracking static keys.

Reported-by: Guenter Roeck <linux@roeck-us.net>
Reported-by: Russell King <linux@arm.linux.org.uk>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Tested-by: Kevin Hilman <khilman@linaro.org>
Cc: Nicolas Pitre <nicolas.pitre@linaro.org>
Cc: Anil Kumar <anilk4.v@gmail.com>
Cc: Tony Lindgren <tony@atomide.com>
Cc: Benoit Cousson <b-cousson@ti.com>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Kevin Hilman <khilman@linaro.org>
---
 kernel/context_tracking.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'kernel')

diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index 247091bf058..859c8dfd78a 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -50,6 +50,15 @@ void context_tracking_user_enter(void)
 {
 	unsigned long flags;
 
+	/*
+	 * Repeat the user_enter() check here because some archs may be calling
+	 * this from asm and if no CPU needs context tracking, they shouldn't
+	 * go further. Repeat the check here until they support the static key
+	 * check.
+	 */
+	if (!static_key_false(&context_tracking_enabled))
+		return;
+
 	/*
 	 * Some contexts may involve an exception occuring in an irq,
 	 * leading to that nesting:
@@ -151,6 +160,9 @@ void context_tracking_user_exit(void)
 {
 	unsigned long flags;
 
+	if (!static_key_false(&context_tracking_enabled))
+		return;
+
 	if (in_interrupt())
 		return;
 
-- 
cgit v1.2.3-70-g09d2


From 3a126f85e015701e56240884f27f97543580d5f7 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Fri, 27 Sep 2013 13:17:39 -0700
Subject: kernel/params: fix handling of signed integer types

Commit 6072ddc8520b ("kernel: replace strict_strto*() with kstrto*()")
broke the handling of signed integer types, fix it.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Reported-by: Christian Kujau <lists@nerdbynature.de>
Tested-by: Christian Kujau <lists@nerdbynature.de>
Cc: Jingoo Han <jg1.han@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/params.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/params.c b/kernel/params.c
index 81c4e78c8f4..c00d5b502aa 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -254,11 +254,11 @@ int parse_args(const char *doing,
 
 
 STANDARD_PARAM_DEF(byte, unsigned char, "%hhu", unsigned long, kstrtoul);
-STANDARD_PARAM_DEF(short, short, "%hi", long, kstrtoul);
+STANDARD_PARAM_DEF(short, short, "%hi", long, kstrtol);
 STANDARD_PARAM_DEF(ushort, unsigned short, "%hu", unsigned long, kstrtoul);
-STANDARD_PARAM_DEF(int, int, "%i", long, kstrtoul);
+STANDARD_PARAM_DEF(int, int, "%i", long, kstrtol);
 STANDARD_PARAM_DEF(uint, unsigned int, "%u", unsigned long, kstrtoul);
-STANDARD_PARAM_DEF(long, long, "%li", long, kstrtoul);
+STANDARD_PARAM_DEF(long, long, "%li", long, kstrtol);
 STANDARD_PARAM_DEF(ulong, unsigned long, "%lu", unsigned long, kstrtoul);
 
 int param_set_charp(const char *val, const struct kernel_param *kp)
-- 
cgit v1.2.3-70-g09d2


From aab1728915420b5288cd0fc7b5bd320105b48983 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 30 Sep 2013 19:40:56 +0200
Subject: PM / hibernate: Fix user space driven resume regression

Recent commit 8fd37a4 (PM / hibernate: Create memory bitmaps after
freezing user space) broke the resume part of the user space driven
hibernation (s2disk), because I forgot that the resume utility
loaded the image into memory without freezing user space (it still
freezes tasks after loading the image).  This means that during user
space driven resume we need to create the memory bitmaps at the
"device open" time rather than at the "freeze tasks" time, so make
that happen (that's a special case anyway, so it needs to be treated
in a special way).

Reported-and-tested-by: Ronald <ronald645@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 kernel/power/snapshot.c | 5 ++++-
 kernel/power/user.c     | 8 ++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 358a146fd4d..98c3b34a4cf 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -743,7 +743,10 @@ int create_basic_memory_bitmaps(void)
 	struct memory_bitmap *bm1, *bm2;
 	int error = 0;
 
-	BUG_ON(forbidden_pages_map || free_pages_map);
+	if (forbidden_pages_map && free_pages_map)
+		return 0;
+	else
+		BUG_ON(forbidden_pages_map || free_pages_map);
 
 	bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL);
 	if (!bm1)
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 72e8f4fd616..957f06164ad 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -39,6 +39,7 @@ static struct snapshot_data {
 	char frozen;
 	char ready;
 	char platform_support;
+	bool free_bitmaps;
 } snapshot_state;
 
 atomic_t snapshot_device_available = ATOMIC_INIT(1);
@@ -82,6 +83,10 @@ static int snapshot_open(struct inode *inode, struct file *filp)
 		data->swap = -1;
 		data->mode = O_WRONLY;
 		error = pm_notifier_call_chain(PM_RESTORE_PREPARE);
+		if (!error) {
+			error = create_basic_memory_bitmaps();
+			data->free_bitmaps = !error;
+		}
 		if (error)
 			pm_notifier_call_chain(PM_POST_RESTORE);
 	}
@@ -111,6 +116,8 @@ static int snapshot_release(struct inode *inode, struct file *filp)
 		pm_restore_gfp_mask();
 		free_basic_memory_bitmaps();
 		thaw_processes();
+	} else if (data->free_bitmaps) {
+		free_basic_memory_bitmaps();
 	}
 	pm_notifier_call_chain(data->mode == O_RDONLY ?
 			PM_POST_HIBERNATION : PM_POST_RESTORE);
@@ -231,6 +238,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
 			break;
 		pm_restore_gfp_mask();
 		free_basic_memory_bitmaps();
+		data->free_bitmaps = false;
 		thaw_processes();
 		data->frozen = 0;
 		break;
-- 
cgit v1.2.3-70-g09d2


From 4c1c7be95c345cf2ad537a0c48e9aeadc7304527 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Mon, 30 Sep 2013 13:45:08 -0700
Subject: kernel/kmod.c: check for NULL in call_usermodehelper_exec()

If /proc/sys/kernel/core_pattern contains only "|", a NULL pointer
dereference happens upon core dump because argv_split("") returns
argv[0] == NULL.

This bug was once fixed by commit 264b83c07a84 ("usermodehelper: check
subprocess_info->path != NULL") but was by error reintroduced by commit
7f57cfa4e2aa ("usermodehelper: kill the sub_info->path[0] check").

This bug seems to exist since 2.6.19 (the version which core dump to
pipe was added).  Depending on kernel version and config, some side
effect might happen immediately after this oops (e.g.  kernel panic with
2.6.32-358.18.1.el6).

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/kmod.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'kernel')

diff --git a/kernel/kmod.c b/kernel/kmod.c
index fb326365b69..b086006c59e 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -571,6 +571,10 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
 	DECLARE_COMPLETION_ONSTACK(done);
 	int retval = 0;
 
+	if (!sub_info->path) {
+		call_usermodehelper_freeinfo(sub_info);
+		return -EINVAL;
+	}
 	helper_lock();
 	if (!khelper_wq || usermodehelper_disabled) {
 		retval = -EBUSY;
-- 
cgit v1.2.3-70-g09d2


From 314a8ad0f18ac37887896b288939acd8cb17e208 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 30 Sep 2013 13:45:27 -0700
Subject: pidns: fix free_pid() to handle the first fork failure

"case 0" in free_pid() assumes that disable_pid_allocation() should
clear PIDNS_HASH_ADDING before the last pid goes away.

However this doesn't happen if the first fork() fails to create the
child reaper which should call disable_pid_allocation().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: "Serge E. Hallyn" <serge@hallyn.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/pid.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'kernel')

diff --git a/kernel/pid.c b/kernel/pid.c
index ebe5e80b10f..9b9a2669814 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -273,6 +273,11 @@ void free_pid(struct pid *pid)
 			 */
 			wake_up_process(ns->child_reaper);
 			break;
+		case PIDNS_HASH_ADDING:
+			/* Handle a fork failure of the first process */
+			WARN_ON(ns->child_reaper);
+			ns->nr_hashed = 0;
+			/* fall through */
 		case 0:
 			schedule_work(&ns->proc_work);
 			break;
-- 
cgit v1.2.3-70-g09d2


From ded797547548a5b8e7b92383a41e4c0e6b0ecb7f Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 24 Sep 2013 00:50:25 +0200
Subject: irq: Force hardirq exit's softirq processing on its own stack

The commit facd8b80c67a3cf64a467c4a2ac5fb31f2e6745b
("irq: Sanitize invoke_softirq") converted irq exit
calls of do_softirq() to __do_softirq() on all architectures,
assuming it was only used there for its irq disablement
properties.

But as a side effect, the softirqs processed in the end
of the hardirq are always called on the inline current
stack that is used by irq_exit() instead of the softirq
stack provided by the archs that override do_softirq().

The result is mostly safe if the architecture runs irq_exit()
on a separate irq stack because then softirqs are processed
on that same stack that is near empty at this stage (assuming
hardirq aren't nesting).

Otherwise irq_exit() runs in the task stack and so does the softirq
too. The interrupted call stack can be randomly deep already and
the softirq can dig through it even further. To add insult to the
injury, this softirq can be interrupted by a new hardirq, maximizing
the chances for a stack overrun as reported in powerpc for example:

	do_IRQ: stack overflow: 1920
	CPU: 0 PID: 1602 Comm: qemu-system-ppc Not tainted 3.10.4-300.1.fc19.ppc64p7 #1
	Call Trace:
	[c0000000050a8740] .show_stack+0x130/0x200 (unreliable)
	[c0000000050a8810] .dump_stack+0x28/0x3c
	[c0000000050a8880] .do_IRQ+0x2b8/0x2c0
	[c0000000050a8930] hardware_interrupt_common+0x154/0x180
	--- Exception: 501 at .cp_start_xmit+0x3a4/0x820 [8139cp]
		LR = .cp_start_xmit+0x390/0x820 [8139cp]
	[c0000000050a8d40] .dev_hard_start_xmit+0x394/0x640
	[c0000000050a8e00] .sch_direct_xmit+0x110/0x260
	[c0000000050a8ea0] .dev_queue_xmit+0x260/0x630
	[c0000000050a8f40] .br_dev_queue_push_xmit+0xc4/0x130 [bridge]
	[c0000000050a8fc0] .br_dev_xmit+0x198/0x270 [bridge]
	[c0000000050a9070] .dev_hard_start_xmit+0x394/0x640
	[c0000000050a9130] .dev_queue_xmit+0x428/0x630
	[c0000000050a91d0] .ip_finish_output+0x2a4/0x550
	[c0000000050a9290] .ip_local_out+0x50/0x70
	[c0000000050a9310] .ip_queue_xmit+0x148/0x420
	[c0000000050a93b0] .tcp_transmit_skb+0x4e4/0xaf0
	[c0000000050a94a0] .__tcp_ack_snd_check+0x7c/0xf0
	[c0000000050a9520] .tcp_rcv_established+0x1e8/0x930
	[c0000000050a95f0] .tcp_v4_do_rcv+0x21c/0x570
	[c0000000050a96c0] .tcp_v4_rcv+0x734/0x930
	[c0000000050a97a0] .ip_local_deliver_finish+0x184/0x360
	[c0000000050a9840] .ip_rcv_finish+0x148/0x400
	[c0000000050a98d0] .__netif_receive_skb_core+0x4f8/0xb00
	[c0000000050a99d0] .netif_receive_skb+0x44/0x110
	[c0000000050a9a70] .br_handle_frame_finish+0x2bc/0x3f0 [bridge]
	[c0000000050a9b20] .br_nf_pre_routing_finish+0x2ac/0x420 [bridge]
	[c0000000050a9bd0] .br_nf_pre_routing+0x4dc/0x7d0 [bridge]
	[c0000000050a9c70] .nf_iterate+0x114/0x130
	[c0000000050a9d30] .nf_hook_slow+0xb4/0x1e0
	[c0000000050a9e00] .br_handle_frame+0x290/0x330 [bridge]
	[c0000000050a9ea0] .__netif_receive_skb_core+0x34c/0xb00
	[c0000000050a9fa0] .netif_receive_skb+0x44/0x110
	[c0000000050aa040] .napi_gro_receive+0xe8/0x120
	[c0000000050aa0c0] .cp_rx_poll+0x31c/0x590 [8139cp]
	[c0000000050aa1d0] .net_rx_action+0x1dc/0x310
	[c0000000050aa2b0] .__do_softirq+0x158/0x330
	[c0000000050aa3b0] .irq_exit+0xc8/0x110
	[c0000000050aa430] .do_IRQ+0xdc/0x2c0
	[c0000000050aa4e0] hardware_interrupt_common+0x154/0x180
	 --- Exception: 501 at .bad_range+0x1c/0x110
		 LR = .get_page_from_freelist+0x908/0xbb0
	[c0000000050aa7d0] .list_del+0x18/0x50 (unreliable)
	[c0000000050aa850] .get_page_from_freelist+0x908/0xbb0
	[c0000000050aa9e0] .__alloc_pages_nodemask+0x21c/0xae0
	[c0000000050aaba0] .alloc_pages_vma+0xd0/0x210
	[c0000000050aac60] .handle_pte_fault+0x814/0xb70
	[c0000000050aad50] .__get_user_pages+0x1a4/0x640
	[c0000000050aae60] .get_user_pages_fast+0xec/0x160
	[c0000000050aaf10] .__gfn_to_pfn_memslot+0x3b0/0x430 [kvm]
	[c0000000050aafd0] .kvmppc_gfn_to_pfn+0x64/0x130 [kvm]
	[c0000000050ab070] .kvmppc_mmu_map_page+0x94/0x530 [kvm]
	[c0000000050ab190] .kvmppc_handle_pagefault+0x174/0x610 [kvm]
	[c0000000050ab270] .kvmppc_handle_exit_pr+0x464/0x9b0 [kvm]
	[c0000000050ab320]  kvm_start_lightweight+0x1ec/0x1fc [kvm]
	[c0000000050ab4f0] .kvmppc_vcpu_run_pr+0x168/0x3b0 [kvm]
	[c0000000050ab9c0] .kvmppc_vcpu_run+0xc8/0xf0 [kvm]
	[c0000000050aba50] .kvm_arch_vcpu_ioctl_run+0x5c/0x1a0 [kvm]
	[c0000000050abae0] .kvm_vcpu_ioctl+0x478/0x730 [kvm]
	[c0000000050abc90] .do_vfs_ioctl+0x4ec/0x7c0
	[c0000000050abd80] .SyS_ioctl+0xd4/0xf0
	[c0000000050abe30] syscall_exit+0x0/0x98

Since this is a regression, this patch proposes a minimalistic
and low-risk solution by blindly forcing the hardirq exit processing of
softirqs on the softirq stack. This way we should reduce significantly
the opportunities for task stack overflow dug by softirqs.

Longer term solutions may involve extending the hardirq stack coverage to
irq_exit(), etc...

Reported-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: #3.9.. <stable@vger.kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@au1.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul Mackerras <paulus@au1.ibm.com>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: James E.J. Bottomley <jejb@parisc-linux.org>
Cc: Helge Deller <deller@gmx.de>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/softirq.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/softirq.c b/kernel/softirq.c
index 53cc09ceb0b..d7d498d8cc4 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -328,10 +328,19 @@ void irq_enter(void)
 
 static inline void invoke_softirq(void)
 {
-	if (!force_irqthreads)
-		__do_softirq();
-	else
+	if (!force_irqthreads) {
+		/*
+		 * We can safely execute softirq on the current stack if
+		 * it is the irq stack, because it should be near empty
+		 * at this stage. But we have no way to know if the arch
+		 * calls irq_exit() on the irq stack. So call softirq
+		 * in its own stack to prevent from any overrun on top
+		 * of a potentially deep task stack.
+		 */
+		do_softirq();
+	} else {
 		wakeup_softirqd();
+	}
 }
 
 static inline void tick_irq_exit(void)
-- 
cgit v1.2.3-70-g09d2


From 9886167d20c0720dcfb01e62cdff4d906b226f43 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 3 Oct 2013 16:02:23 +0200
Subject: perf: Fix perf_pmu_migrate_context

While auditing the list_entry usage due to a trinity bug I found that
perf_pmu_migrate_context violates the rules for
perf_event::event_entry.

The problem is that perf_event::event_entry is a RCU list element, and
hence we must wait for a full RCU grace period before re-using the
element after deletion.

Therefore the usage in perf_pmu_migrate_context() which re-uses the
entry immediately is broken. For now introduce another list_head into
perf_event for this specific usage.

This doesn't actually fix the trinity report because that never goes
through this code.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/n/tip-mkj72lxagw1z8fvjm648iznw@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h | 24 +++++++++++++++++++++++-
 kernel/events/core.c       |  6 +++---
 2 files changed, 26 insertions(+), 4 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 866e85c5eb9..c8ba627c1d6 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -294,9 +294,31 @@ struct ring_buffer;
  */
 struct perf_event {
 #ifdef CONFIG_PERF_EVENTS
-	struct list_head		group_entry;
+	/*
+	 * entry onto perf_event_context::event_list;
+	 *   modifications require ctx->lock
+	 *   RCU safe iterations.
+	 */
 	struct list_head		event_entry;
+
+	/*
+	 * XXX: group_entry and sibling_list should be mutually exclusive;
+	 * either you're a sibling on a group, or you're the group leader.
+	 * Rework the code to always use the same list element.
+	 *
+	 * Locked for modification by both ctx->mutex and ctx->lock; holding
+	 * either sufficies for read.
+	 */
+	struct list_head		group_entry;
 	struct list_head		sibling_list;
+
+	/*
+	 * We need storage to track the entries in perf_pmu_migrate_context; we
+	 * cannot use the event_entry because of RCU and we want to keep the
+	 * group in tact which avoids us using the other two entries.
+	 */
+	struct list_head		migrate_entry;
+
 	struct hlist_node		hlist_entry;
 	int				nr_siblings;
 	int				group_flags;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index cb4238e85b3..d49a9d29334 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7234,15 +7234,15 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
 		perf_remove_from_context(event);
 		unaccount_event_cpu(event, src_cpu);
 		put_ctx(src_ctx);
-		list_add(&event->event_entry, &events);
+		list_add(&event->migrate_entry, &events);
 	}
 	mutex_unlock(&src_ctx->mutex);
 
 	synchronize_rcu();
 
 	mutex_lock(&dst_ctx->mutex);
-	list_for_each_entry_safe(event, tmp, &events, event_entry) {
-		list_del(&event->event_entry);
+	list_for_each_entry_safe(event, tmp, &events, migrate_entry) {
+		list_del(&event->migrate_entry);
 		if (event->state >= PERF_EVENT_STATE_OFF)
 			event->state = PERF_EVENT_STATE_INACTIVE;
 		account_event_cpu(event, dst_cpu);
-- 
cgit v1.2.3-70-g09d2