diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-04-30 07:43:28 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-04-30 07:43:28 -0700 |
commit | 16fa94b532b1958f508e07eca1a9256351241fbc (patch) | |
tree | 90012a7b7fe2b8cf96f6f5ec12490e0c5e152291 | |
parent | e0972916e8fe943f342b0dd1c9d43dbf5bc261c2 (diff) | |
parent | 25f55d9d01ad7a7ad248fd5af1d22675ffd202c5 (diff) |
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler changes from Ingo Molnar:
"The main changes in this development cycle were:
- full dynticks preparatory work by Frederic Weisbecker
- factor out the cpu time accounting code better, by Li Zefan
- multi-CPU load balancer cleanups and improvements by Joonsoo Kim
- various smaller fixes and cleanups"
* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (45 commits)
sched: Fix init NOHZ_IDLE flag
sched: Prevent to re-select dst-cpu in load_balance()
sched: Rename load_balance_tmpmask to load_balance_mask
sched: Move up affinity check to mitigate useless redoing overhead
sched: Don't consider other cpus in our group in case of NEWLY_IDLE
sched: Explicitly cpu_idle_type checking in rebalance_domains()
sched: Change position of resched_cpu() in load_balance()
sched: Fix wrong rq's runnable_avg update with rt tasks
sched: Document task_struct::personality field
sched/cpuacct/UML: Fix header file dependency bug on the UML build
cgroup: Kill subsys.active flag
sched/cpuacct: No need to check subsys active state
sched/cpuacct: Initialize cpuacct subsystem earlier
sched/cpuacct: Initialize root cpuacct earlier
sched/cpuacct: Allocate per_cpu cpuusage for root cpuacct statically
sched/cpuacct: Clean up cpuacct.h
sched/cpuacct: Remove redundant NULL checks in cpuacct_acount_field()
sched/cpuacct: Remove redundant NULL checks in cpuacct_charge()
sched/cpuacct: Add cpuacct_acount_field()
sched/cpuacct: Add cpuacct_init()
...
-rw-r--r-- | arch/x86/include/asm/context_tracking.h | 21 | ||||
-rw-r--r-- | arch/x86/kernel/kvm.c | 8 | ||||
-rw-r--r-- | arch/x86/kernel/traps.c | 68 | ||||
-rw-r--r-- | arch/x86/mm/fault.c | 8 | ||||
-rw-r--r-- | include/linux/cgroup.h | 1 | ||||
-rw-r--r-- | include/linux/context_tracking.h | 24 | ||||
-rw-r--r-- | include/linux/math64.h | 19 | ||||
-rw-r--r-- | include/linux/sched.h | 204 | ||||
-rw-r--r-- | init/Kconfig | 1 | ||||
-rw-r--r-- | kernel/cgroup.c | 3 | ||||
-rw-r--r-- | kernel/fork.c | 2 | ||||
-rw-r--r-- | kernel/sched/Makefile | 1 | ||||
-rw-r--r-- | kernel/sched/core.c | 254 | ||||
-rw-r--r-- | kernel/sched/cpuacct.c | 296 | ||||
-rw-r--r-- | kernel/sched/cpuacct.h | 17 | ||||
-rw-r--r-- | kernel/sched/cputime.c | 214 | ||||
-rw-r--r-- | kernel/sched/fair.c | 148 | ||||
-rw-r--r-- | kernel/sched/idle_task.c | 16 | ||||
-rw-r--r-- | kernel/sched/sched.h | 219 | ||||
-rw-r--r-- | lib/div64.c | 19 |
20 files changed, 834 insertions, 709 deletions
diff --git a/arch/x86/include/asm/context_tracking.h b/arch/x86/include/asm/context_tracking.h index 1616562683e..1fe49704b14 100644 --- a/arch/x86/include/asm/context_tracking.h +++ b/arch/x86/include/asm/context_tracking.h @@ -1,31 +1,10 @@ #ifndef _ASM_X86_CONTEXT_TRACKING_H #define _ASM_X86_CONTEXT_TRACKING_H -#ifndef __ASSEMBLY__ -#include <linux/context_tracking.h> -#include <asm/ptrace.h> - -static inline void exception_enter(struct pt_regs *regs) -{ - user_exit(); -} - -static inline void exception_exit(struct pt_regs *regs) -{ -#ifdef CONFIG_CONTEXT_TRACKING - if (user_mode(regs)) - user_enter(); -#endif -} - -#else /* __ASSEMBLY__ */ - #ifdef CONFIG_CONTEXT_TRACKING # define SCHEDULE_USER call schedule_user #else # define SCHEDULE_USER call schedule #endif -#endif /* !__ASSEMBLY__ */ - #endif diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index b686a904d7c..cd6d9a5a42f 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -20,6 +20,7 @@ * Authors: Anthony Liguori <aliguori@us.ibm.com> */ +#include <linux/context_tracking.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/kvm_para.h> @@ -43,7 +44,6 @@ #include <asm/apicdef.h> #include <asm/hypervisor.h> #include <asm/kvm_guest.h> -#include <asm/context_tracking.h> static int kvmapf = 1; @@ -254,16 +254,18 @@ EXPORT_SYMBOL_GPL(kvm_read_and_reset_pf_reason); dotraplinkage void __kprobes do_async_page_fault(struct pt_regs *regs, unsigned long error_code) { + enum ctx_state prev_state; + switch (kvm_read_and_reset_pf_reason()) { default: do_page_fault(regs, error_code); break; case KVM_PV_REASON_PAGE_NOT_PRESENT: /* page is swapped out by the host. */ - exception_enter(regs); + prev_state = exception_enter(); exit_idle(); kvm_async_pf_task_wait((u32)read_cr2()); - exception_exit(regs); + exception_exit(prev_state); break; case KVM_PV_REASON_PAGE_READY: rcu_irq_enter(); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 68bda7a8415..ff6d2271cbe 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -12,6 +12,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/context_tracking.h> #include <linux/interrupt.h> #include <linux/kallsyms.h> #include <linux/spinlock.h> @@ -55,8 +56,6 @@ #include <asm/i387.h> #include <asm/fpu-internal.h> #include <asm/mce.h> -#include <asm/context_tracking.h> - #include <asm/mach_traps.h> #ifdef CONFIG_X86_64 @@ -176,34 +175,38 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, #define DO_ERROR(trapnr, signr, str, name) \ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ { \ - exception_enter(regs); \ + enum ctx_state prev_state; \ + \ + prev_state = exception_enter(); \ if (notify_die(DIE_TRAP, str, regs, error_code, \ trapnr, signr) == NOTIFY_STOP) { \ - exception_exit(regs); \ + exception_exit(prev_state); \ return; \ } \ conditional_sti(regs); \ do_trap(trapnr, signr, str, regs, error_code, NULL); \ - exception_exit(regs); \ + exception_exit(prev_state); \ } #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ { \ siginfo_t info; \ + enum ctx_state prev_state; \ + \ info.si_signo = signr; \ info.si_errno = 0; \ info.si_code = sicode; \ info.si_addr = (void __user *)siaddr; \ - exception_enter(regs); \ + prev_state = exception_enter(); \ if (notify_die(DIE_TRAP, str, regs, error_code, \ trapnr, signr) == NOTIFY_STOP) { \ - exception_exit(regs); \ + exception_exit(prev_state); \ return; \ } \ conditional_sti(regs); \ do_trap(trapnr, signr, str, regs, error_code, &info); \ - exception_exit(regs); \ + exception_exit(prev_state); \ } DO_ERROR_INFO(X86_TRAP_DE, SIGFPE, "divide error", divide_error, FPE_INTDIV, @@ -226,14 +229,16 @@ DO_ERROR_INFO(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check, /* Runs on IST stack */ dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code) { - exception_enter(regs); + enum ctx_state prev_state; + + prev_state = exception_enter(); if (notify_die(DIE_TRAP, "stack segment", regs, error_code, X86_TRAP_SS, SIGBUS) != NOTIFY_STOP) { preempt_conditional_sti(regs); do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL); preempt_conditional_cli(regs); } - exception_exit(regs); + exception_exit(prev_state); } dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) @@ -241,7 +246,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) static const char str[] = "double fault"; struct task_struct *tsk = current; - exception_enter(regs); + exception_enter(); /* Return not checked because double check cannot be ignored */ notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); @@ -261,8 +266,9 @@ dotraplinkage void __kprobes do_general_protection(struct pt_regs *regs, long error_code) { struct task_struct *tsk; + enum ctx_state prev_state; - exception_enter(regs); + prev_state = exception_enter(); conditional_sti(regs); #ifdef CONFIG_X86_32 @@ -300,12 +306,14 @@ do_general_protection(struct pt_regs *regs, long error_code) force_sig(SIGSEGV, tsk); exit: - exception_exit(regs); + exception_exit(prev_state); } /* May run on IST stack. */ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_code) { + enum ctx_state prev_state; + #ifdef CONFIG_DYNAMIC_FTRACE /* * ftrace must be first, everything else may cause a recursive crash. @@ -315,7 +323,7 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co ftrace_int3_handler(regs)) return; #endif - exception_enter(regs); + prev_state = exception_enter(); #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, SIGTRAP) == NOTIFY_STOP) @@ -336,7 +344,7 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co preempt_conditional_cli(regs); debug_stack_usage_dec(); exit: - exception_exit(regs); + exception_exit(prev_state); } #ifdef CONFIG_X86_64 @@ -393,11 +401,12 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) { struct task_struct *tsk = current; + enum ctx_state prev_state; int user_icebp = 0; unsigned long dr6; int si_code; - exception_enter(regs); + prev_state = exception_enter(); get_debugreg(dr6, 6); @@ -467,7 +476,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) debug_stack_usage_dec(); exit: - exception_exit(regs); + exception_exit(prev_state); } /* @@ -561,17 +570,21 @@ void math_error(struct pt_regs *regs, int error_code, int trapnr) dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) { - exception_enter(regs); + enum ctx_state prev_state; + + prev_state = exception_enter(); math_error(regs, error_code, X86_TRAP_MF); - exception_exit(regs); + exception_exit(prev_state); } dotraplinkage void do_simd_coprocessor_error(struct pt_regs *regs, long error_code) { - exception_enter(regs); + enum ctx_state prev_state; + + prev_state = exception_enter(); math_error(regs, error_code, X86_TRAP_XF); - exception_exit(regs); + exception_exit(prev_state); } dotraplinkage void @@ -639,7 +652,9 @@ EXPORT_SYMBOL_GPL(math_state_restore); dotraplinkage void __kprobes do_device_not_available(struct pt_regs *regs, long error_code) { - exception_enter(regs); + enum ctx_state prev_state; + + prev_state = exception_enter(); BUG_ON(use_eager_fpu()); #ifdef CONFIG_MATH_EMULATION @@ -650,7 +665,7 @@ do_device_not_available(struct pt_regs *regs, long error_code) info.regs = regs; math_emulate(&info); - exception_exit(regs); + exception_exit(prev_state); return; } #endif @@ -658,15 +673,16 @@ do_device_not_available(struct pt_regs *regs, long error_code) #ifdef CONFIG_X86_32 conditional_sti(regs); #endif - exception_exit(regs); + exception_exit(prev_state); } #ifdef CONFIG_X86_32 dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) { siginfo_t info; + enum ctx_state prev_state; - exception_enter(regs); + prev_state = exception_enter(); local_irq_enable(); info.si_signo = SIGILL; @@ -678,7 +694,7 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code, &info); } - exception_exit(regs); + exception_exit(prev_state); } #endif diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 0e883364abb..022a9a0a3c6 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -13,12 +13,12 @@ #include <linux/perf_event.h> /* perf_sw_event */ #include <linux/hugetlb.h> /* hstate_index_to_shift */ #include <linux/prefetch.h> /* prefetchw */ +#include <linux/context_tracking.h> /* exception_enter(), ... */ #include <asm/traps.h> /* dotraplinkage, ... */ #include <asm/pgalloc.h> /* pgd_*(), ... */ #include <asm/kmemcheck.h> /* kmemcheck_*(), ... */ #include <asm/fixmap.h> /* VSYSCALL_START */ -#include <asm/context_tracking.h> /* exception_enter(), ... */ /* * Page fault error code bits: @@ -1224,7 +1224,9 @@ good_area: dotraplinkage void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) { - exception_enter(regs); + enum ctx_state prev_state; + + prev_state = exception_enter(); __do_page_fault(regs, error_code); - exception_exit(regs); + exception_exit(prev_state); } diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index d86e215ca2b..646ab9d15e4 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -586,7 +586,6 @@ struct cgroup_subsys { void (*bind)(struct cgroup *root); int subsys_id; - int active; int disabled; int early_init; /* diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index b28d161c109..365f4a61bf0 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -1,9 +1,9 @@ #ifndef _LINUX_CONTEXT_TRACKING_H #define _LINUX_CONTEXT_TRACKING_H -#ifdef CONFIG_CONTEXT_TRACKING #include <linux/sched.h> #include <linux/percpu.h> +#include <asm/ptrace.h> struct context_tracking { /* @@ -13,12 +13,13 @@ struct context_tracking { * may be further optimized using static keys. */ bool active; - enum { + enum ctx_state { IN_KERNEL = 0, IN_USER, } state; }; +#ifdef CONFIG_CONTEXT_TRACKING DECLARE_PER_CPU(struct context_tracking, context_tracking); static inline bool context_tracking_in_user(void) @@ -33,12 +34,31 @@ static inline bool context_tracking_active(void) extern void user_enter(void); extern void user_exit(void); + +static inline enum ctx_state exception_enter(void) +{ + enum ctx_state prev_ctx; + + prev_ctx = this_cpu_read(context_tracking.state); + user_exit(); + + return prev_ctx; +} + +static inline void exception_exit(enum ctx_state prev_ctx) +{ + if (prev_ctx == IN_USER) + user_enter(); +} + extern void context_tracking_task_switch(struct task_struct *prev, struct task_struct *next); #else static inline bool context_tracking_in_user(void) { return false; } static inline void user_enter(void) { } static inline void user_exit(void) { } +static inline enum ctx_state exception_enter(void) { return 0; } +static inline void exception_exit(enum ctx_state prev_ctx) { } static inline void context_tracking_task_switch(struct task_struct *prev, struct task_struct *next) { } #endif /* !CONFIG_CONTEXT_TRACKING */ diff --git a/include/linux/math64.h b/include/linux/math64.h index b8ba8554472..931a619407b 100644 --- a/include/linux/math64.h +++ b/include/linux/math64.h @@ -30,6 +30,15 @@ static inline s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder) } /** + * div64_u64_rem - unsigned 64bit divide with 64bit divisor + */ +static inline u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder) +{ + *remainder = dividend % divisor; + return dividend / divisor; +} + +/** * div64_u64 - unsigned 64bit divide with 64bit divisor */ static inline u64 div64_u64(u64 dividend, u64 divisor) @@ -61,8 +70,16 @@ static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder) extern s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder); #endif +#ifndef div64_u64_rem +extern u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder); +#endif + #ifndef div64_u64 -extern u64 div64_u64(u64 dividend, u64 divisor); +static inline u64 div64_u64(u64 dividend, u64 divisor) +{ + u64 remainder; + return div64_u64_rem(dividend, divisor, &remainder); +} #endif #ifndef div64_s64 diff --git a/include/linux/sched.h b/include/linux/sched.h index bcbc30397f2..01c7d85bcaa 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -127,18 +127,6 @@ extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m); extern void proc_sched_set_task(struct task_struct *p); extern void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq); -#else -static inline void -proc_sched_show_task(struct task_struct *p, struct seq_file *m) -{ -} -static inline void proc_sched_set_task(struct task_struct *p) -{ -} -static inline void -print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) -{ -} #endif /* @@ -570,7 +558,7 @@ struct signal_struct { cputime_t utime, stime, cutime, cstime; cputime_t gtime; cputime_t cgtime; -#ifndef CONFIG_VIRT_CPU_ACCOUNTING +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE struct cputime prev_cputime; #endif unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; @@ -768,31 +756,6 @@ enum cpu_idle_type { }; /* - * Increase resolution of nice-level calculations for 64-bit architectures. - * The extra resolution improves shares distribution and load balancing of - * low-weight task groups (eg. nice +19 on an autogroup), deeper taskgroup - * hierarchies, especially on larger systems. This is not a user-visible change - * and does not change the user-interface for setting shares/weights. - * - * We increase resolution only if we have enough bits to allow this increased - * resolution (i.e. BITS_PER_LONG > 32). The costs for increasing resolution - * when BITS_PER_LONG <= 32 are pretty high and the returns do not justify the - * increased costs. - */ -#if 0 /* BITS_PER_LONG > 32 -- currently broken: it increases power usage under light load */ -# define SCHED_LOAD_RESOLUTION 10 -# define scale_load(w) ((w) << SCHED_LOAD_RESOLUTION) -# define scale_load_down(w) ((w) >> SCHED_LOAD_RESOLUTION) -#else -# define SCHED_LOAD_RESOLUTION 0 -# define scale_load(w) (w) -# define scale_load_down(w) (w) -#endif - -#define SCHED_LOAD_SHIFT (10 + SCHED_LOAD_RESOLUTION) -#define SCHED_LOAD_SCALE (1L << SCHED_LOAD_SHIFT) - -/* * Increase resolution of cpu_power calculations */ #define SCHED_POWER_SHIFT 10 @@ -817,62 +780,6 @@ enum cpu_idle_type { extern int __weak arch_sd_sibiling_asym_packing(void); -struct sched_group_power { - atomic_t ref; - /* - * CPU power of this group, SCHED_LOAD_SCALE being max power for a - * single CPU. - */ - unsigned int power, power_orig; - unsigned long next_update; - /* - * Number of busy cpus in this group. - */ - atomic_t nr_busy_cpus; - - unsigned long cpumask[0]; /* iteration mask */ -}; - -struct sched_group { - struct sched_group *next; /* Must be a circular list */ - atomic_t ref; - - unsigned int group_weight; - struct sched_group_power *sgp; - - /* - * The CPUs this group covers. - * - * NOTE: this field is variable length. (Allocated dynamically - * by attaching extra space to the end of the structure, - * depending on how many CPUs the kernel has booted up with) - */ - unsigned long cpumask[0]; -}; - -static inline struct cpumask *sched_group_cpus(struct sched_group *sg) -{ - return to_cpumask(sg->cpumask); -} - -/* - * cpumask masking which cpus in the group are allowed to iterate up the domain - * tree. - */ -static inline struct cpumask *sched_group_mask(struct sched_group *sg) -{ - return to_cpumask(sg->sgp->cpumask); -} - -/** - * group_first_cpu - Returns the first cpu in the cpumask of a sched_group. - * @group: The group whose first cpu is to be returned. - */ -static inline unsigned int group_first_cpu(struct sched_group *group) -{ - return cpumask_first(sched_group_cpus(group)); -} - struct sched_domain_attr { int relax_domain_level; }; @@ -883,6 +790,8 @@ struct sched_domain_attr { extern int sched_domain_level_max; +struct sched_group; + struct sched_domain { /* These fields must be setup */ struct sched_domain *parent; /* top domain must be null terminated */ @@ -899,6 +808,8 @@ struct sched_domain { unsigned int wake_idx; unsigned int forkexec_idx; unsigned int smt_gain; + + int nohz_idle; /* NOHZ IDLE status */ int flags; /* See SD_* */ int level; @@ -971,18 +882,6 @@ extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], cpumask_var_t *alloc_sched_domains(unsigned int ndoms); void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms); -/* Test a flag in parent sched domain */ -static inline int test_sd_parent(struct sched_domain *sd, int flag) -{ - if (sd->parent && (sd->parent->flags & flag)) - return 1; - - return 0; -} - -unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu); -unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu); - bool cpus_share_cache(int this_cpu, int that_cpu); #else /* CONFIG_SMP */ @@ -1017,72 +916,6 @@ struct mempolicy; struct pipe_inode_info; struct uts_namespace; -struct rq; -struct sched_domain; - -/* - * wake flags - */ -#define WF_SYNC 0x01 /* waker goes to sleep after wakup */ -#define WF_FORK 0x02 /* child wakeup after fork */ -#define WF_MIGRATED 0x04 /* internal use, task got migrated */ - -#define ENQUEUE_WAKEUP 1 -#define ENQUEUE_HEAD 2 -#ifdef CONFIG_SMP -#define ENQUEUE_WAKING 4 /* sched_class::task_waking was called */ -#else -#define ENQUEUE_WAKING 0 -#endif - -#define DEQUEUE_SLEEP 1 - -struct sched_class { - const struct sched_class *next; - - void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags); - void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags); - void (*yield_task) (struct rq *rq); - bool (*yield_to_task) (struct rq *rq, struct task_struct *p, bool preempt); - - void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags); - - struct task_struct * (*pick_next_task) (struct rq *rq); - void (*put_prev_task) (struct rq *rq, struct task_struct *p); - -#ifdef CONFIG_SMP - int (*select_task_rq)(struct task_struct *p, int sd_flag, int flags); - void (*migrate_task_rq)(struct task_struct *p, int next_cpu); - - void (*pre_schedule) (struct rq *this_rq, struct task_struct *task); - void (*post_schedule) (struct rq *this_rq); - void (*task_waking) (struct task_struct *task); - void (*task_woken) (struct rq *this_rq, struct task_struct *task); - - void (*set_cpus_allowed)(struct task_struct *p, - const struct cpumask *newmask); - - void (*rq_online)(struct rq *rq); - void (*rq_offline)(struct rq *rq); -#endif - - void (*set_curr_task) (struct rq *rq); - void (*task_tick) (struct rq *rq, struct task_struct *p, int queued); - void (*task_fork) (struct task_struct *p); - - void (*switched_from) (struct rq *this_rq, struct task_struct *task); - void (*switched_to) (struct rq *this_rq, struct task_struct *task); - void (*prio_changed) (struct rq *this_rq, struct task_struct *task, - int oldprio); - - unsigned int (*get_rr_interval) (struct rq *rq, - struct task_struct *task); - -#ifdef CONFIG_FAIR_GROUP_SCHED - void (*task_move_group) (struct task_struct *p, int on_rq); -#endif -}; - struct load_weight { unsigned long weight, inv_weight; }; @@ -1274,8 +1107,10 @@ struct task_struct { int exit_code, exit_signal; int pdeath_signal; /* The signal sent when the parent dies */ unsigned int jobctl; /* JOBCTL_*, siglock protected */ - /* ??? */ + + /* Used for emulating ABI behavior of previous Linux versions */ unsigned int personality; + unsigned did_exec:1; unsigned in_execve:1; /* Tell the LSMs that the process is doing an * execve */ @@ -1327,7 +1162,7 @@ struct task_struct { cputime_t utime, stime, utimescaled, stimescaled; cputime_t gtime; -#ifndef CONFIG_VIRT_CPU_ACCOUNTING +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE struct cputime prev_cputime; #endif #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN @@ -2681,28 +2516,7 @@ extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask); extern long sched_getaffinity(pid_t pid, struct cpumask *mask); #ifdef CONFIG_CGROUP_SCHED - extern struct task_group root_task_group; - -extern struct task_group *sched_create_group(struct task_group *parent); -extern void sched_online_group(struct task_group *tg, - struct task_group *parent); -extern void sched_destroy_group(struct task_group *tg); -extern void sched_offline_group(struct task_group *tg); -extern void sched_move_task(struct task_struct *tsk); -#ifdef CONFIG_FAIR_GROUP_SCHED -extern int sched_group_set_shares(struct task_group *tg, unsigned long shares); -extern unsigned long sched_group_shares(struct task_group *tg); -#endif -#ifdef CONFIG_RT_GROUP_SCHED -extern int sched_group_set_rt_runtime(struct task_group *tg, - long rt_runtime_us); -extern long sched_group_rt_runtime(struct task_group *tg); -extern int sched_group_set_rt_period(struct task_group *tg, - long rt_period_us); -extern long sched_group_rt_period(struct task_group *tg); -extern int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk); -#endif #endif /* CONFIG_CGROUP_SCHED */ extern int task_can_switch_user(struct user_struct *up, diff --git a/init/Kconfig b/init/Kconfig index 71bb9e73011..4367e137900 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -505,6 +505,7 @@ config RCU_USER_QS config CONTEXT_TRACKING_FORCE bool "Force context tracking" depends on CONTEXT_TRACKING + default CONTEXT_TRACKING help Probe on user/kernel boundaries by default in order to test the features that rely on it such as userspace RCU extended diff --git a/kernel/cgroup.c b/kernel/cgroup.c index eeb7e49946b..d3abce2d645 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -4380,7 +4380,6 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss) * need to invoke fork callbacks here. */ BUG_ON(!list_empty(&init_task.tasks)); - ss->active = 1; BUG_ON(online_css(ss, dummytop)); mutex_unlock(&cgroup_mutex); @@ -4485,7 +4484,6 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) } write_unlock(&css_set_lock); - ss->active = 1; ret = online_css(ss, dummytop); if (ret) goto err_unload; @@ -4526,7 +4524,6 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss) mutex_lock(&cgroup_mutex); offline_css(ss, dummytop); - ss->active = 0; if (ss->use_id) idr_destroy(&ss->idr); diff --git a/kernel/fork.c b/kernel/fork.c index 1766d324d5e..339f60dfd62 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1233,7 +1233,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->utime = p->stime = p->gtime = 0; p->utimescaled = p->stimescaled = 0; -#ifndef CONFIG_VIRT_CPU_ACCOUNTING +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE p->prev_cputime.utime = p->prev_cputime.stime = 0; #endif #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index f06d249e103..deaf90e4a1d 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -16,3 +16,4 @@ obj-$(CONFIG_SMP) += cpupri.o obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o obj-$(CONFIG_SCHEDSTATS) += stats.o obj-$(CONFIG_SCHED_DEBUG) += debug.o +obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o diff --git a/kernel/sched/core.c b/kernel/sched/core.c index d8285eb0cde..ebdb1954121 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1288,8 +1288,8 @@ static void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags) static void ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags) { - trace_sched_wakeup(p, true); check_preempt_curr(rq, p, wake_flags); + trace_sched_wakeup(p, true); p->state = TASK_RUNNING; #ifdef CONFIG_SMP @@ -3039,11 +3039,13 @@ EXPORT_SYMBOL(preempt_schedule); asmlinkage void __sched preempt_schedule_irq(void) { struct thread_info *ti = current_thread_info(); + enum ctx_state prev_state; /* Catch callers which need to be fixed */ BUG_ON(ti->preempt_count || !irqs_disabled()); - user_exit(); + prev_state = exception_enter(); + do { add_preempt_count(PREEMPT_ACTIVE); local_irq_enable(); @@ -3057,6 +3059,8 @@ asmlinkage void __sched preempt_schedule_irq(void) */ barrier(); } while (need_resched()); + + exception_exit(prev_state); } #endif /* CONFIG_PREEMPT */ @@ -6204,7 +6208,7 @@ static void sched_init_numa(void) * 'level' contains the number of unique distances, excluding the * identity distance node_distance(i,i). * - * The sched_domains_nume_distance[] array includes the actual distance + * The sched_domains_numa_distance[] array includes the actual distance * numbers. */ @@ -6817,11 +6821,15 @@ int in_sched_functions(unsigned long addr) } #ifdef CONFIG_CGROUP_SCHED +/* + * Default task group. + * Every task in system belongs to this group at bootup. + */ struct task_group root_task_group; LIST_HEAD(task_groups); #endif -DECLARE_PER_CPU(cpumask_var_t, load_balance_tmpmask); +DECLARE_PER_CPU(cpumask_var_t, load_balance_mask); void __init sched_init(void) { @@ -6858,7 +6866,7 @@ void __init sched_init(void) #endif /* CONFIG_RT_GROUP_SCHED */ #ifdef CONFIG_CPUMASK_OFFSTACK for_each_possible_cpu(i) { - per_cpu(load_balance_tmpmask, i) = (void *)ptr; + per_cpu(load_balance_mask, i) = (void *)ptr; ptr += cpumask_size(); } #endif /* CONFIG_CPUMASK_OFFSTACK */ @@ -6884,12 +6892,6 @@ void __init sched_init(void) #endif /* CONFIG_CGROUP_SCHED */ -#ifdef CONFIG_CGROUP_CPUACCT - root_cpuacct.cpustat = &kernel_cpustat; - root_cpuacct.cpuusage = alloc_percpu(u64); - /* Too early, not expected to fail */ - BUG_ON(!root_cpuacct.cpuusage); -#endif for_each_possible_cpu(i) { struct rq *rq; @@ -7411,7 +7413,7 @@ unlock: return err; } -int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us) +static int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us) { u64 rt_runtime, rt_period; @@ -7423,7 +7425,7 @@ int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us) return tg_set_rt_bandwidth(tg, rt_period, rt_runtime); } -long sched_group_rt_runtime(struct task_group *tg) +static long sched_group_rt_runtime(struct task_group *tg) { u64 rt_runtime_us; @@ -7435,7 +7437,7 @@ long sched_group_rt_runtime(struct task_group *tg) return rt_runtime_us; } -int sched_group_set_rt_period(struct task_group *tg, long rt_period_us) |