From 029632fbb7b7c9d85063cc9eb470de6c54873df3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 25 Oct 2011 10:00:11 +0200 Subject: sched: Make separate sched*.c translation units Since once needs to do something at conferences and fixing compile warnings doesn't actually require much if any attention I decided to break up the sched.c #include "*.c" fest. This further modularizes the scheduler code. Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-x0fcd3mnp8f9c99grcpewmhi@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/latencytop.h | 3 ++- include/linux/sched.h | 9 +++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/latencytop.h b/include/linux/latencytop.h index b0e99898527..e23121f9d82 100644 --- a/include/linux/latencytop.h +++ b/include/linux/latencytop.h @@ -10,6 +10,8 @@ #define _INCLUDE_GUARD_LATENCYTOP_H_ #include +struct task_struct; + #ifdef CONFIG_LATENCYTOP #define LT_SAVECOUNT 32 @@ -23,7 +25,6 @@ struct latency_record { }; -struct task_struct; extern int latencytop_enabled; void __account_scheduler_latency(struct task_struct *task, int usecs, int inter); diff --git a/include/linux/sched.h b/include/linux/sched.h index 68daf4f27e2..8db17b7622e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -925,6 +925,15 @@ static inline struct cpumask *sched_group_cpus(struct sched_group *sg) return to_cpumask(sg->cpumask); } +/** + * group_first_cpu - Returns the first cpu in the cpumask of a sched_group. + * @group: The group whose first cpu is to be returned. + */ +static inline unsigned int group_first_cpu(struct sched_group *group) +{ + return cpumask_first(sched_group_cpus(group)); +} + struct sched_domain_attr { int relax_domain_level; }; -- cgit v1.2.3-18-g5258 From b781a602ac745ee3d5d745276f1e1905a2c101f9 Mon Sep 17 00:00:00 2001 From: Andrew Vagin Date: Mon, 28 Nov 2011 12:03:35 +0300 Subject: events, sched: Add tracepoint for accounting blocked time This tracepoint shows how long a task is sleeping in uninterruptible state. E.g. it may show how long and where a mutex is waited for. Signed-off-by: Andrew Vagin Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1322471015-107825-8-git-send-email-avagin@openvz.org Signed-off-by: Ingo Molnar --- include/trace/events/sched.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 959ff18b63b..e33ed1bfa11 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -330,6 +330,13 @@ DEFINE_EVENT(sched_stat_template, sched_stat_iowait, TP_PROTO(struct task_struct *tsk, u64 delay), TP_ARGS(tsk, delay)); +/* + * Tracepoint for accounting blocked time (time the task is in uninterruptible). + */ +DEFINE_EVENT(sched_stat_template, sched_stat_blocked, + TP_PROTO(struct task_struct *tsk, u64 delay), + TP_ARGS(tsk, delay)); + /* * Tracepoint for accounting runtime (time the task is executing * on a CPU). -- cgit v1.2.3-18-g5258 From 69e1e811dcc436a6b129dbef273ad9ec22d095ce Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 1 Dec 2011 17:07:33 -0800 Subject: sched, nohz: Track nr_busy_cpus in the sched_group_power Introduce nr_busy_cpus in the struct sched_group_power [Not in sched_group because sched groups are duplicated for the SD_OVERLAP scheduler domain] and for each cpu that enters and exits idle, this parameter will be updated in each scheduler group of the scheduler domain that this cpu belongs to. To avoid the frequent update of this state as the cpu enters and exits idle, the update of the stat during idle exit is delayed to the first timer tick that happens after the cpu becomes busy. This is done using NOHZ_IDLE flag in the struct rq's nohz_flags. Signed-off-by: Suresh Siddha Signed-off-by: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20111202010832.555984323@sbsiddha-desk.sc.intel.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 8db17b7622e..295666cb5b8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -273,9 +273,11 @@ extern int runqueue_is_locked(int cpu); #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) extern void select_nohz_load_balancer(int stop_tick); +extern void set_cpu_sd_state_idle(void); extern int get_nohz_timer_target(void); #else static inline void select_nohz_load_balancer(int stop_tick) { } +static inline void set_cpu_sd_state_idle(void); #endif /* @@ -901,6 +903,10 @@ struct sched_group_power { * single CPU. */ unsigned int power, power_orig; + /* + * Number of busy cpus in this group. + */ + atomic_t nr_busy_cpus; }; struct sched_group { -- cgit v1.2.3-18-g5258 From 3292beb340c76884427faa1f5d6085719477d889 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 28 Nov 2011 14:45:17 -0200 Subject: sched/accounting: Change cpustat fields to an array This patch changes fields in cpustat from a structure, to an u64 array. Math gets easier, and the code is more flexible. Signed-off-by: Glauber Costa Reviewed-by: KAMEZAWA Hiroyuki Cc: Linus Torvalds Cc: Andrew Morton Cc: Paul Tuner Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1322498719-2255-2-git-send-email-glommer@parallels.com Signed-off-by: Ingo Molnar --- include/linux/kernel_stat.h | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 0cce2db580c..2fbd9053c2d 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -15,21 +16,25 @@ * used by rstatd/perfmeter */ -struct cpu_usage_stat { - cputime64_t user; - cputime64_t nice; - cputime64_t system; - cputime64_t softirq; - cputime64_t irq; - cputime64_t idle; - cputime64_t iowait; - cputime64_t steal; - cputime64_t guest; - cputime64_t guest_nice; +enum cpu_usage_stat { + CPUTIME_USER, + CPUTIME_NICE, + CPUTIME_SYSTEM, + CPUTIME_SOFTIRQ, + CPUTIME_IRQ, + CPUTIME_IDLE, + CPUTIME_IOWAIT, + CPUTIME_STEAL, + CPUTIME_GUEST, + CPUTIME_GUEST_NICE, + NR_STATS, +}; + +struct kernel_cpustat { + u64 cpustat[NR_STATS]; }; struct kernel_stat { - struct cpu_usage_stat cpustat; #ifndef CONFIG_GENERIC_HARDIRQS unsigned int irqs[NR_IRQS]; #endif @@ -38,10 +43,13 @@ struct kernel_stat { }; DECLARE_PER_CPU(struct kernel_stat, kstat); +DECLARE_PER_CPU(struct kernel_cpustat, kernel_cpustat); -#define kstat_cpu(cpu) per_cpu(kstat, cpu) /* Must have preemption disabled for this to be meaningful. */ -#define kstat_this_cpu __get_cpu_var(kstat) +#define kstat_this_cpu (&__get_cpu_var(kstat)) +#define kcpustat_this_cpu (&__get_cpu_var(kernel_cpustat)) +#define kstat_cpu(cpu) per_cpu(kstat, cpu) +#define kcpustat_cpu(cpu) per_cpu(kernel_cpustat, cpu) extern unsigned long long nr_context_switches(void); -- cgit v1.2.3-18-g5258 From fdaabd800bdd60652a448994eeb77442180db6c0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 6 Dec 2011 12:47:55 +0100 Subject: sched: Fix compile error for UP,!NOHZ Commit 69e1e811 ("sched, nohz: Track nr_busy_cpus in the sched_group_power") messed up the static inline function definition. Signed-off-by: Peter Zijlstra Cc: Suresh Siddha Link: http://lkml.kernel.org/n/tip-abjah8ctq5qrjjtdiabe8lph@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 295666cb5b8..64527c49962 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -277,7 +277,7 @@ extern void set_cpu_sd_state_idle(void); extern int get_nohz_timer_target(void); #else static inline void select_nohz_load_balancer(int stop_tick) { } -static inline void set_cpu_sd_state_idle(void); +static inline void set_cpu_sd_state_idle(void) { } #endif /* -- cgit v1.2.3-18-g5258 From abd63bc3a0f65ae9d85bc3b1bb067d3e3c2b2cc2 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 14 Dec 2011 14:39:26 -0800 Subject: sched: Mark parent and real_parent as __rcu The parent and real_parent pointers should be considered __rcu, since they should be held under either tasklist_lock or rcu_read_lock. Signed-off-by: Kees Cook Cc: Peter Zijlstra Cc: Paul E. McKenney Cc: Al Viro Link: http://lkml.kernel.org/r/20111214223925.GA27578@www.outflux.net Signed-off-by: Ingo Molnar --- include/linux/sched.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index cc8c6206657..5ef09012a62 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1330,8 +1330,8 @@ struct task_struct { * older sibling, respectively. (p->father can be replaced with * p->real_parent->pid) */ - struct task_struct *real_parent; /* real parent process */ - struct task_struct *parent; /* recipient of SIGCHLD, wait4() reports */ + struct task_struct __rcu *real_parent; /* real parent process */ + struct task_struct __rcu *parent; /* recipient of SIGCHLD, wait4() reports */ /* * children/sibling forms the list of my natural children */ -- cgit v1.2.3-18-g5258 From 648616343cdbe904c585a6c12e323d3b3c72e46f Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 15 Dec 2011 14:56:09 +0100 Subject: [S390] cputime: add sparse checking and cleanup Make cputime_t and cputime64_t nocast to enable sparse checking to detect incorrect use of cputime. Drop the cputime macros for simple scalar operations. The conversion macros are still needed. Signed-off-by: Martin Schwidefsky --- include/asm-generic/cputime.h | 62 +++++++++++++++++++------------------------ include/linux/sched.h | 4 +-- 2 files changed, 30 insertions(+), 36 deletions(-) (limited to 'include') diff --git a/include/asm-generic/cputime.h b/include/asm-generic/cputime.h index 62ce6823c0f..77202e2c9fc 100644 --- a/include/asm-generic/cputime.h +++ b/include/asm-generic/cputime.h @@ -4,70 +4,64 @@ #include #include -typedef unsigned long cputime_t; +typedef unsigned long __nocast cputime_t; -#define cputime_zero (0UL) #define cputime_one_jiffy jiffies_to_cputime(1) -#define cputime_max ((~0UL >> 1) - 1) -#define cputime_add(__a, __b) ((__a) + (__b)) -#define cputime_sub(__a, __b) ((__a) - (__b)) -#define cputime_div(__a, __n) ((__a) / (__n)) -#define cputime_halve(__a) ((__a) >> 1) -#define cputime_eq(__a, __b) ((__a) == (__b)) -#define cputime_gt(__a, __b) ((__a) > (__b)) -#define cputime_ge(__a, __b) ((__a) >= (__b)) -#define cputime_lt(__a, __b) ((__a) < (__b)) -#define cputime_le(__a, __b) ((__a) <= (__b)) -#define cputime_to_jiffies(__ct) (__ct) +#define cputime_to_jiffies(__ct) (__force unsigned long)(__ct) #define cputime_to_scaled(__ct) (__ct) -#define jiffies_to_cputime(__hz) (__hz) +#define jiffies_to_cputime(__hz) (__force cputime_t)(__hz) -typedef u64 cputime64_t; +typedef u64 __nocast cputime64_t; -#define cputime64_zero (0ULL) -#define cputime64_add(__a, __b) ((__a) + (__b)) -#define cputime64_sub(__a, __b) ((__a) - (__b)) -#define cputime64_to_jiffies64(__ct) (__ct) -#define jiffies64_to_cputime64(__jif) (__jif) -#define cputime_to_cputime64(__ct) ((u64) __ct) -#define cputime64_gt(__a, __b) ((__a) > (__b)) +#define cputime64_to_jiffies64(__ct) (__force u64)(__ct) +#define jiffies64_to_cputime64(__jif) (__force cputime64_t)(__jif) -#define nsecs_to_cputime64(__ct) nsecs_to_jiffies64(__ct) +#define nsecs_to_cputime64(__ct) \ + jiffies64_to_cputime64(nsecs_to_jiffies64(__ct)) /* * Convert cputime to microseconds and back. */ -#define cputime_to_usecs(__ct) jiffies_to_usecs(__ct) -#define usecs_to_cputime(__msecs) usecs_to_jiffies(__msecs) +#define cputime_to_usecs(__ct) \ + jiffies_to_usecs(cputime_to_jiffies(__ct)); +#define usecs_to_cputime(__msecs) \ + jiffies_to_cputime(usecs_to_jiffies(__msecs)); /* * Convert cputime to seconds and back. */ -#define cputime_to_secs(jif) ((jif) / HZ) -#define secs_to_cputime(sec) ((sec) * HZ) +#define cputime_to_secs(jif) (cputime_to_jiffies(jif) / HZ) +#define secs_to_cputime(sec) jiffies_to_cputime((sec) * HZ) /* * Convert cputime to timespec and back. */ -#define timespec_to_cputime(__val) timespec_to_jiffies(__val) -#define cputime_to_timespec(__ct,__val) jiffies_to_timespec(__ct,__val) +#define timespec_to_cputime(__val) \ + jiffies_to_cputime(timespec_to_jiffies(__val)) +#define cputime_to_timespec(__ct,__val) \ + jiffies_to_timespec(cputime_to_jiffies(__ct),__val) /* * Convert cputime to timeval and back. */ -#define timeval_to_cputime(__val) timeval_to_jiffies(__val) -#define cputime_to_timeval(__ct,__val) jiffies_to_timeval(__ct,__val) +#define timeval_to_cputime(__val) \ + jiffies_to_cputime(timeval_to_jiffies(__val)) +#define cputime_to_timeval(__ct,__val) \ + jiffies_to_timeval(cputime_to_jiffies(__ct),__val) /* * Convert cputime to clock and back. */ -#define cputime_to_clock_t(__ct) jiffies_to_clock_t(__ct) -#define clock_t_to_cputime(__x) clock_t_to_jiffies(__x) +#define cputime_to_clock_t(__ct) \ + jiffies_to_clock_t(cputime_to_jiffies(__ct)) +#define clock_t_to_cputime(__x) \ + jiffies_to_cputime(clock_t_to_jiffies(__x)) /* * Convert cputime64 to clock. */ -#define cputime64_to_clock_t(__ct) jiffies_64_to_clock_t(__ct) +#define cputime64_to_clock_t(__ct) \ + jiffies_64_to_clock_t(cputime64_to_jiffies64(__ct)) #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index 1c4f3e9b9bc..5649032d73f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -483,8 +483,8 @@ struct task_cputime { #define INIT_CPUTIME \ (struct task_cputime) { \ - .utime = cputime_zero, \ - .stime = cputime_zero, \ + .utime = 0, \ + .stime = 0, \ .sum_exec_runtime = 0, \ } -- cgit v1.2.3-18-g5258 From 1ac9bc6943edf7d181b4b1cc734981350d4f6bae Mon Sep 17 00:00:00 2001 From: Arun Sharma Date: Wed, 21 Dec 2011 16:15:40 -0800 Subject: sched/tracing: Add a new tracepoint for sleeptime If CONFIG_SCHEDSTATS is defined, the kernel maintains information about how long the task was sleeping or in the case of iowait, blocking in the kernel before getting woken up. This will be useful for sleep time profiling. Note: this information is only provided for sched_fair. Other scheduling classes may choose to provide this in the future. Note: the delay includes the time spent on the runqueue as well. Signed-off-by: Arun Sharma Acked-by: Peter Zijlstra Cc: Steven Rostedt Cc: Mathieu Desnoyers Cc: Arnaldo Carvalho de Melo Cc: Andrew Vagin Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/1324512940-32060-2-git-send-email-asharma@fb.com Signed-off-by: Ingo Molnar --- include/trace/events/sched.h | 50 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) (limited to 'include') diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index e33ed1bfa11..6ba596b07a7 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -370,6 +370,56 @@ TRACE_EVENT(sched_stat_runtime, (unsigned long long)__entry->vruntime) ); +#ifdef CREATE_TRACE_POINTS +static inline u64 trace_get_sleeptime(struct task_struct *tsk) +{ +#ifdef CONFIG_SCHEDSTATS + u64 block, sleep; + + block = tsk->se.statistics.block_start; + sleep = tsk->se.statistics.sleep_start; + tsk->se.statistics.block_start = 0; + tsk->se.statistics.sleep_start = 0; + + return block ? block : sleep ? sleep : 0; +#else + return 0; +#endif +} +#endif + +/* + * Tracepoint for accounting sleeptime (time the task is sleeping + * or waiting for I/O). + */ +TRACE_EVENT(sched_stat_sleeptime, + + TP_PROTO(struct task_struct *tsk, u64 now), + + TP_ARGS(tsk, now), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( u64, sleeptime ) + ), + + TP_fast_assign( + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __entry->pid = tsk->pid; + __entry->sleeptime = trace_get_sleeptime(tsk); + __entry->sleeptime = __entry->sleeptime ? + now - __entry->sleeptime : 0; + ) + TP_perf_assign( + __perf_count(__entry->sleeptime); + ), + + TP_printk("comm=%s pid=%d sleeptime=%Lu [ns]", + __entry->comm, __entry->pid, + (unsigned long long)__entry->sleeptime) +); + /* * Tracepoint for showing priority inheritance modifying a tasks * priority. -- cgit v1.2.3-18-g5258