diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/perf_event.c | 5 | ||||
-rw-r--r-- | kernel/power/Kconfig | 9 | ||||
-rw-r--r-- | kernel/power/Makefile | 2 | ||||
-rw-r--r-- | kernel/power/nvs.c (renamed from kernel/power/hibernate_nvs.c) | 24 | ||||
-rw-r--r-- | kernel/power/suspend.c | 6 | ||||
-rw-r--r-- | kernel/sched.c | 133 | ||||
-rw-r--r-- | kernel/sched_fair.c | 2 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 5 |
8 files changed, 105 insertions, 81 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 31d6afe9259..ff86c558af4 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -1507,6 +1507,9 @@ do { \ divisor = nsec * frequency; } + if (!divisor) + return dividend; + return div64_u64(dividend, divisor); } @@ -1529,7 +1532,7 @@ static int perf_event_start(struct perf_event *event) static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) { struct hw_perf_event *hwc = &event->hw; - u64 period, sample_period; + s64 period, sample_period; s64 delta; period = perf_calculate_period(event, nsec, count); diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 5c36ea9d55d..ca6066a6952 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -99,9 +99,13 @@ config PM_SLEEP_ADVANCED_DEBUG depends on PM_ADVANCED_DEBUG default n +config SUSPEND_NVS + bool + config SUSPEND bool "Suspend to RAM and standby" depends on PM && ARCH_SUSPEND_POSSIBLE + select SUSPEND_NVS if HAS_IOMEM default y ---help--- Allow the system to enter sleep states in which main memory is @@ -130,13 +134,10 @@ config SUSPEND_FREEZER Turning OFF this setting is NOT recommended! If in doubt, say Y. -config HIBERNATION_NVS - bool - config HIBERNATION bool "Hibernation (aka 'suspend to disk')" depends on PM && SWAP && ARCH_HIBERNATION_POSSIBLE - select HIBERNATION_NVS if HAS_IOMEM + select SUSPEND_NVS if HAS_IOMEM ---help--- Enable the suspend to disk (STD) functionality, which is usually called "hibernation" in user interfaces. STD checkpoints the diff --git a/kernel/power/Makefile b/kernel/power/Makefile index 524e058dcf0..f9063c6b185 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -10,6 +10,6 @@ obj-$(CONFIG_SUSPEND) += suspend.o obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \ block_io.o -obj-$(CONFIG_HIBERNATION_NVS) += hibernate_nvs.o +obj-$(CONFIG_SUSPEND_NVS) += nvs.o obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o diff --git a/kernel/power/hibernate_nvs.c b/kernel/power/nvs.c index fdcad9ed5a7..1836db60bbb 100644 --- a/kernel/power/hibernate_nvs.c +++ b/kernel/power/nvs.c @@ -15,7 +15,7 @@ /* * Platforms, like ACPI, may want us to save some memory used by them during - * hibernation and to restore the contents of this memory during the subsequent + * suspend and to restore the contents of this memory during the subsequent * resume. The code below implements a mechanism allowing us to do that. */ @@ -30,7 +30,7 @@ struct nvs_page { static LIST_HEAD(nvs_list); /** - * hibernate_nvs_register - register platform NVS memory region to save + * suspend_nvs_register - register platform NVS memory region to save * @start - physical address of the region * @size - size of the region * @@ -38,7 +38,7 @@ static LIST_HEAD(nvs_list); * things so that the data from page-aligned addresses in this region will * be copied into separate RAM pages. */ -int hibernate_nvs_register(unsigned long start, unsigned long size) +int suspend_nvs_register(unsigned long start, unsigned long size) { struct nvs_page *entry, *next; @@ -68,9 +68,9 @@ int hibernate_nvs_register(unsigned long start, unsigned long size) } /** - * hibernate_nvs_free - free data pages allocated for saving NVS regions + * suspend_nvs_free - free data pages allocated for saving NVS regions */ -void hibernate_nvs_free(void) +void suspend_nvs_free(void) { struct nvs_page *entry; @@ -86,16 +86,16 @@ void hibernate_nvs_free(void) } /** - * hibernate_nvs_alloc - allocate memory necessary for saving NVS regions + * suspend_nvs_alloc - allocate memory necessary for saving NVS regions */ -int hibernate_nvs_alloc(void) +int suspend_nvs_alloc(void) { struct nvs_page *entry; list_for_each_entry(entry, &nvs_list, node) { entry->data = (void *)__get_free_page(GFP_KERNEL); if (!entry->data) { - hibernate_nvs_free(); + suspend_nvs_free(); return -ENOMEM; } } @@ -103,9 +103,9 @@ int hibernate_nvs_alloc(void) } /** - * hibernate_nvs_save - save NVS memory regions + * suspend_nvs_save - save NVS memory regions */ -void hibernate_nvs_save(void) +void suspend_nvs_save(void) { struct nvs_page *entry; @@ -119,12 +119,12 @@ void hibernate_nvs_save(void) } /** - * hibernate_nvs_restore - restore NVS memory regions + * suspend_nvs_restore - restore NVS memory regions * * This function is going to be called with interrupts disabled, so it * cannot iounmap the virtual addresses used to access the NVS region. */ -void hibernate_nvs_restore(void) +void suspend_nvs_restore(void) { struct nvs_page *entry; diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 56e7dbb8b99..f37cb7dd440 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -16,6 +16,12 @@ #include <linux/cpu.h> #include <linux/syscalls.h> #include <linux/gfp.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/suspend.h> #include "power.h" diff --git a/kernel/sched.c b/kernel/sched.c index f8b8996228d..cb816e36cc8 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -306,52 +306,6 @@ static int init_task_group_load = INIT_TASK_GROUP_LOAD; */ struct task_group init_task_group; -/* return group to which a task belongs */ -static inline struct task_group *task_group(struct task_struct *p) -{ - struct task_group *tg; - -#ifdef CONFIG_CGROUP_SCHED - tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id), - struct task_group, css); -#else - tg = &init_task_group; -#endif - return tg; -} - -/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ -static inline void set_task_rq(struct task_struct *p, unsigned int cpu) -{ - /* - * Strictly speaking this rcu_read_lock() is not needed since the - * task_group is tied to the cgroup, which in turn can never go away - * as long as there are tasks attached to it. - * - * However since task_group() uses task_subsys_state() which is an - * rcu_dereference() user, this quiets CONFIG_PROVE_RCU. - */ - rcu_read_lock(); -#ifdef CONFIG_FAIR_GROUP_SCHED - p->se.cfs_rq = task_group(p)->cfs_rq[cpu]; - p->se.parent = task_group(p)->se[cpu]; -#endif - -#ifdef CONFIG_RT_GROUP_SCHED - p->rt.rt_rq = task_group(p)->rt_rq[cpu]; - p->rt.parent = task_group(p)->rt_se[cpu]; -#endif - rcu_read_unlock(); -} - -#else - -static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } -static inline struct task_group *task_group(struct task_struct *p) -{ - return NULL; -} - #endif /* CONFIG_CGROUP_SCHED */ /* CFS-related fields in a runqueue */ @@ -644,6 +598,49 @@ static inline int cpu_of(struct rq *rq) #define cpu_curr(cpu) (cpu_rq(cpu)->curr) #define raw_rq() (&__raw_get_cpu_var(runqueues)) +#ifdef CONFIG_CGROUP_SCHED + +/* + * Return the group to which this tasks belongs. + * + * We use task_subsys_state_check() and extend the RCU verification + * with lockdep_is_held(&task_rq(p)->lock) because cpu_cgroup_attach() + * holds that lock for each task it moves into the cgroup. Therefore + * by holding that lock, we pin the task to the current cgroup. + */ +static inline struct task_group *task_group(struct task_struct *p) +{ + struct cgroup_subsys_state *css; + + css = task_subsys_state_check(p, cpu_cgroup_subsys_id, + lockdep_is_held(&task_rq(p)->lock)); + return container_of(css, struct task_group, css); +} + +/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ +static inline void set_task_rq(struct task_struct *p, unsigned int cpu) +{ +#ifdef CONFIG_FAIR_GROUP_SCHED + p->se.cfs_rq = task_group(p)->cfs_rq[cpu]; + p->se.parent = task_group(p)->se[cpu]; +#endif + +#ifdef CONFIG_RT_GROUP_SCHED + p->rt.rt_rq = task_group(p)->rt_rq[cpu]; + p->rt.parent = task_group(p)->rt_se[cpu]; +#endif +} + +#else /* CONFIG_CGROUP_SCHED */ + +static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } +static inline struct task_group *task_group(struct task_struct *p) +{ + return NULL; +} + +#endif /* CONFIG_CGROUP_SCHED */ + inline void update_rq_clock(struct rq *rq) { if (!rq->skip_clock_update) @@ -1257,6 +1254,12 @@ static void sched_avg_update(struct rq *rq) s64 period = sched_avg_period(); while ((s64)(rq->clock - rq->age_stamp) > period) { + /* + * Inline assembly required to prevent the compiler + * optimising this loop into a divmod call. + * See __iter_div_u64_rem() for another example of this. + */ + asm("" : "+rm" (rq->age_stamp)); rq->age_stamp += period; rq->rt_avg /= 2; } @@ -1660,9 +1663,6 @@ static void update_shares(struct sched_domain *sd) static void update_h_load(long cpu) { - if (root_task_group_empty()) - return; - walk_tg_tree(tg_load_down, tg_nop, (void *)cpu); } @@ -2494,7 +2494,16 @@ void sched_fork(struct task_struct *p, int clone_flags) if (p->sched_class->task_fork) p->sched_class->task_fork(p); + /* + * The child is not yet in the pid-hash so no cgroup attach races, + * and the cgroup is pinned to this child due to cgroup_fork() + * is ran before sched_fork(). + * + * Silence PROVE_RCU. + */ + rcu_read_lock(); set_task_cpu(p, cpu); + rcu_read_unlock(); #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) if (likely(sched_info_on())) @@ -4465,16 +4474,6 @@ recheck: } if (user) { -#ifdef CONFIG_RT_GROUP_SCHED - /* - * Do not allow realtime tasks into groups that have no runtime - * assigned. - */ - if (rt_bandwidth_enabled() && rt_policy(policy) && - task_group(p)->rt_bandwidth.rt_runtime == 0) - return -EPERM; -#endif - retval = security_task_setscheduler(p, policy, param); if (retval) return retval; @@ -4490,6 +4489,22 @@ recheck: * runqueue lock must be held. */ rq = __task_rq_lock(p); + +#ifdef CONFIG_RT_GROUP_SCHED + if (user) { + /* + * Do not allow realtime tasks into groups that have no runtime + * assigned. + */ + if (rt_bandwidth_enabled() && rt_policy(policy) && + task_group(p)->rt_bandwidth.rt_runtime == 0) { + __task_rq_unlock(rq); + raw_spin_unlock_irqrestore(&p->pi_lock, flags); + return -EPERM; + } + } +#endif + /* recheck policy now with rq lock held */ if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { policy = oldpolicy = -1; diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index eed35eded60..a878b5332da 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1240,6 +1240,7 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) * effect of the currently running task from the load * of the current CPU: */ + rcu_read_lock(); if (sync) { tg = task_group(current); weight = current->se.load.weight; @@ -1275,6 +1276,7 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) balanced = this_eff_load <= prev_eff_load; } else balanced = true; + rcu_read_unlock(); /* * If the currently running task will sleep within diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 1d7b9bc1c03..783fbadf220 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -315,9 +315,6 @@ void tick_nohz_stop_sched_tick(int inidle) goto end; } - if (nohz_ratelimit(cpu)) - goto end; - ts->idle_calls++; /* Read jiffies and the time when jiffies were updated last */ do { @@ -328,7 +325,7 @@ void tick_nohz_stop_sched_tick(int inidle) } while (read_seqretry(&xtime_lock, seq)); if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) || - arch_needs_cpu(cpu)) { + arch_needs_cpu(cpu) || nohz_ratelimit(cpu)) { next_jiffies = last_jiffies + 1; delta_jiffies = 1; } else { |