aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/perf_event.c5
-rw-r--r--kernel/power/Kconfig9
-rw-r--r--kernel/power/Makefile2
-rw-r--r--kernel/power/nvs.c (renamed from kernel/power/hibernate_nvs.c)24
-rw-r--r--kernel/power/suspend.c6
-rw-r--r--kernel/sched.c133
-rw-r--r--kernel/sched_fair.c2
-rw-r--r--kernel/time/tick-sched.c5
8 files changed, 105 insertions, 81 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 31d6afe9259..ff86c558af4 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1507,6 +1507,9 @@ do { \
divisor = nsec * frequency;
}
+ if (!divisor)
+ return dividend;
+
return div64_u64(dividend, divisor);
}
@@ -1529,7 +1532,7 @@ static int perf_event_start(struct perf_event *event)
static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
{
struct hw_perf_event *hwc = &event->hw;
- u64 period, sample_period;
+ s64 period, sample_period;
s64 delta;
period = perf_calculate_period(event, nsec, count);
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 5c36ea9d55d..ca6066a6952 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -99,9 +99,13 @@ config PM_SLEEP_ADVANCED_DEBUG
depends on PM_ADVANCED_DEBUG
default n
+config SUSPEND_NVS
+ bool
+
config SUSPEND
bool "Suspend to RAM and standby"
depends on PM && ARCH_SUSPEND_POSSIBLE
+ select SUSPEND_NVS if HAS_IOMEM
default y
---help---
Allow the system to enter sleep states in which main memory is
@@ -130,13 +134,10 @@ config SUSPEND_FREEZER
Turning OFF this setting is NOT recommended! If in doubt, say Y.
-config HIBERNATION_NVS
- bool
-
config HIBERNATION
bool "Hibernation (aka 'suspend to disk')"
depends on PM && SWAP && ARCH_HIBERNATION_POSSIBLE
- select HIBERNATION_NVS if HAS_IOMEM
+ select SUSPEND_NVS if HAS_IOMEM
---help---
Enable the suspend to disk (STD) functionality, which is usually
called "hibernation" in user interfaces. STD checkpoints the
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index 524e058dcf0..f9063c6b185 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -10,6 +10,6 @@ obj-$(CONFIG_SUSPEND) += suspend.o
obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o
obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \
block_io.o
-obj-$(CONFIG_HIBERNATION_NVS) += hibernate_nvs.o
+obj-$(CONFIG_SUSPEND_NVS) += nvs.o
obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o
diff --git a/kernel/power/hibernate_nvs.c b/kernel/power/nvs.c
index fdcad9ed5a7..1836db60bbb 100644
--- a/kernel/power/hibernate_nvs.c
+++ b/kernel/power/nvs.c
@@ -15,7 +15,7 @@
/*
* Platforms, like ACPI, may want us to save some memory used by them during
- * hibernation and to restore the contents of this memory during the subsequent
+ * suspend and to restore the contents of this memory during the subsequent
* resume. The code below implements a mechanism allowing us to do that.
*/
@@ -30,7 +30,7 @@ struct nvs_page {
static LIST_HEAD(nvs_list);
/**
- * hibernate_nvs_register - register platform NVS memory region to save
+ * suspend_nvs_register - register platform NVS memory region to save
* @start - physical address of the region
* @size - size of the region
*
@@ -38,7 +38,7 @@ static LIST_HEAD(nvs_list);
* things so that the data from page-aligned addresses in this region will
* be copied into separate RAM pages.
*/
-int hibernate_nvs_register(unsigned long start, unsigned long size)
+int suspend_nvs_register(unsigned long start, unsigned long size)
{
struct nvs_page *entry, *next;
@@ -68,9 +68,9 @@ int hibernate_nvs_register(unsigned long start, unsigned long size)
}
/**
- * hibernate_nvs_free - free data pages allocated for saving NVS regions
+ * suspend_nvs_free - free data pages allocated for saving NVS regions
*/
-void hibernate_nvs_free(void)
+void suspend_nvs_free(void)
{
struct nvs_page *entry;
@@ -86,16 +86,16 @@ void hibernate_nvs_free(void)
}
/**
- * hibernate_nvs_alloc - allocate memory necessary for saving NVS regions
+ * suspend_nvs_alloc - allocate memory necessary for saving NVS regions
*/
-int hibernate_nvs_alloc(void)
+int suspend_nvs_alloc(void)
{
struct nvs_page *entry;
list_for_each_entry(entry, &nvs_list, node) {
entry->data = (void *)__get_free_page(GFP_KERNEL);
if (!entry->data) {
- hibernate_nvs_free();
+ suspend_nvs_free();
return -ENOMEM;
}
}
@@ -103,9 +103,9 @@ int hibernate_nvs_alloc(void)
}
/**
- * hibernate_nvs_save - save NVS memory regions
+ * suspend_nvs_save - save NVS memory regions
*/
-void hibernate_nvs_save(void)
+void suspend_nvs_save(void)
{
struct nvs_page *entry;
@@ -119,12 +119,12 @@ void hibernate_nvs_save(void)
}
/**
- * hibernate_nvs_restore - restore NVS memory regions
+ * suspend_nvs_restore - restore NVS memory regions
*
* This function is going to be called with interrupts disabled, so it
* cannot iounmap the virtual addresses used to access the NVS region.
*/
-void hibernate_nvs_restore(void)
+void suspend_nvs_restore(void)
{
struct nvs_page *entry;
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 56e7dbb8b99..f37cb7dd440 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -16,6 +16,12 @@
#include <linux/cpu.h>
#include <linux/syscalls.h>
#include <linux/gfp.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/suspend.h>
#include "power.h"
diff --git a/kernel/sched.c b/kernel/sched.c
index f8b8996228d..cb816e36cc8 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -306,52 +306,6 @@ static int init_task_group_load = INIT_TASK_GROUP_LOAD;
*/
struct task_group init_task_group;
-/* return group to which a task belongs */
-static inline struct task_group *task_group(struct task_struct *p)
-{
- struct task_group *tg;
-
-#ifdef CONFIG_CGROUP_SCHED
- tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id),
- struct task_group, css);
-#else
- tg = &init_task_group;
-#endif
- return tg;
-}
-
-/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
-static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
-{
- /*
- * Strictly speaking this rcu_read_lock() is not needed since the
- * task_group is tied to the cgroup, which in turn can never go away
- * as long as there are tasks attached to it.
- *
- * However since task_group() uses task_subsys_state() which is an
- * rcu_dereference() user, this quiets CONFIG_PROVE_RCU.
- */
- rcu_read_lock();
-#ifdef CONFIG_FAIR_GROUP_SCHED
- p->se.cfs_rq = task_group(p)->cfs_rq[cpu];
- p->se.parent = task_group(p)->se[cpu];
-#endif
-
-#ifdef CONFIG_RT_GROUP_SCHED
- p->rt.rt_rq = task_group(p)->rt_rq[cpu];
- p->rt.parent = task_group(p)->rt_se[cpu];
-#endif
- rcu_read_unlock();
-}
-
-#else
-
-static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
-static inline struct task_group *task_group(struct task_struct *p)
-{
- return NULL;
-}
-
#endif /* CONFIG_CGROUP_SCHED */
/* CFS-related fields in a runqueue */
@@ -644,6 +598,49 @@ static inline int cpu_of(struct rq *rq)
#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
#define raw_rq() (&__raw_get_cpu_var(runqueues))
+#ifdef CONFIG_CGROUP_SCHED
+
+/*
+ * Return the group to which this tasks belongs.
+ *
+ * We use task_subsys_state_check() and extend the RCU verification
+ * with lockdep_is_held(&task_rq(p)->lock) because cpu_cgroup_attach()
+ * holds that lock for each task it moves into the cgroup. Therefore
+ * by holding that lock, we pin the task to the current cgroup.
+ */
+static inline struct task_group *task_group(struct task_struct *p)
+{
+ struct cgroup_subsys_state *css;
+
+ css = task_subsys_state_check(p, cpu_cgroup_subsys_id,
+ lockdep_is_held(&task_rq(p)->lock));
+ return container_of(css, struct task_group, css);
+}
+
+/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
+static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
+{
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ p->se.cfs_rq = task_group(p)->cfs_rq[cpu];
+ p->se.parent = task_group(p)->se[cpu];
+#endif
+
+#ifdef CONFIG_RT_GROUP_SCHED
+ p->rt.rt_rq = task_group(p)->rt_rq[cpu];
+ p->rt.parent = task_group(p)->rt_se[cpu];
+#endif
+}
+
+#else /* CONFIG_CGROUP_SCHED */
+
+static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
+static inline struct task_group *task_group(struct task_struct *p)
+{
+ return NULL;
+}
+
+#endif /* CONFIG_CGROUP_SCHED */
+
inline void update_rq_clock(struct rq *rq)
{
if (!rq->skip_clock_update)
@@ -1257,6 +1254,12 @@ static void sched_avg_update(struct rq *rq)
s64 period = sched_avg_period();
while ((s64)(rq->clock - rq->age_stamp) > period) {
+ /*
+ * Inline assembly required to prevent the compiler
+ * optimising this loop into a divmod call.
+ * See __iter_div_u64_rem() for another example of this.
+ */
+ asm("" : "+rm" (rq->age_stamp));
rq->age_stamp += period;
rq->rt_avg /= 2;
}
@@ -1660,9 +1663,6 @@ static void update_shares(struct sched_domain *sd)
static void update_h_load(long cpu)
{
- if (root_task_group_empty())
- return;
-
walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
}
@@ -2494,7 +2494,16 @@ void sched_fork(struct task_struct *p, int clone_flags)
if (p->sched_class->task_fork)
p->sched_class->task_fork(p);
+ /*
+ * The child is not yet in the pid-hash so no cgroup attach races,
+ * and the cgroup is pinned to this child due to cgroup_fork()
+ * is ran before sched_fork().
+ *
+ * Silence PROVE_RCU.
+ */
+ rcu_read_lock();
set_task_cpu(p, cpu);
+ rcu_read_unlock();
#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
if (likely(sched_info_on()))
@@ -4465,16 +4474,6 @@ recheck:
}
if (user) {
-#ifdef CONFIG_RT_GROUP_SCHED
- /*
- * Do not allow realtime tasks into groups that have no runtime
- * assigned.
- */
- if (rt_bandwidth_enabled() && rt_policy(policy) &&
- task_group(p)->rt_bandwidth.rt_runtime == 0)
- return -EPERM;
-#endif
-
retval = security_task_setscheduler(p, policy, param);
if (retval)
return retval;
@@ -4490,6 +4489,22 @@ recheck:
* runqueue lock must be held.
*/
rq = __task_rq_lock(p);
+
+#ifdef CONFIG_RT_GROUP_SCHED
+ if (user) {
+ /*
+ * Do not allow realtime tasks into groups that have no runtime
+ * assigned.
+ */
+ if (rt_bandwidth_enabled() && rt_policy(policy) &&
+ task_group(p)->rt_bandwidth.rt_runtime == 0) {
+ __task_rq_unlock(rq);
+ raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+ return -EPERM;
+ }
+ }
+#endif
+
/* recheck policy now with rq lock held */
if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
policy = oldpolicy = -1;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index eed35eded60..a878b5332da 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1240,6 +1240,7 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
* effect of the currently running task from the load
* of the current CPU:
*/
+ rcu_read_lock();
if (sync) {
tg = task_group(current);
weight = current->se.load.weight;
@@ -1275,6 +1276,7 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
balanced = this_eff_load <= prev_eff_load;
} else
balanced = true;
+ rcu_read_unlock();
/*
* If the currently running task will sleep within
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 1d7b9bc1c03..783fbadf220 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -315,9 +315,6 @@ void tick_nohz_stop_sched_tick(int inidle)
goto end;
}
- if (nohz_ratelimit(cpu))
- goto end;
-
ts->idle_calls++;
/* Read jiffies and the time when jiffies were updated last */
do {
@@ -328,7 +325,7 @@ void tick_nohz_stop_sched_tick(int inidle)
} while (read_seqretry(&xtime_lock, seq));
if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) ||
- arch_needs_cpu(cpu)) {
+ arch_needs_cpu(cpu) || nohz_ratelimit(cpu)) {
next_jiffies = last_jiffies + 1;
delta_jiffies = 1;
} else {