diff options
Diffstat (limited to 'include/trace/events/sched.h')
| -rw-r--r-- | include/trace/events/sched.h | 410 |
1 files changed, 244 insertions, 166 deletions
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 4069c43f418..0a68d5ae584 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -6,6 +6,7 @@ #include <linux/sched.h> #include <linux/tracepoint.h> +#include <linux/binfmts.h> /* * Tracepoint for calling kthread_stop, performed to end a kthread: @@ -26,7 +27,7 @@ TRACE_EVENT(sched_kthread_stop, __entry->pid = t->pid; ), - TP_printk("task %s:%d", __entry->comm, __entry->pid) + TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid) ); /* @@ -46,55 +47,24 @@ TRACE_EVENT(sched_kthread_stop_ret, __entry->ret = ret; ), - TP_printk("ret %d", __entry->ret) -); - -/* - * Tracepoint for waiting on task to unschedule: - * - * (NOTE: the 'rq' argument is not used by generic trace events, - * but used by the latency tracer plugin. ) - */ -TRACE_EVENT(sched_wait_task, - - TP_PROTO(struct rq *rq, struct task_struct *p), - - TP_ARGS(rq, p), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - ), - - TP_printk("task %s:%d [%d]", - __entry->comm, __entry->pid, __entry->prio) + TP_printk("ret=%d", __entry->ret) ); /* * Tracepoint for waking up a task: - * - * (NOTE: the 'rq' argument is not used by generic trace events, - * but used by the latency tracer plugin. ) */ -TRACE_EVENT(sched_wakeup, +DECLARE_EVENT_CLASS(sched_wakeup_template, - TP_PROTO(struct rq *rq, struct task_struct *p, int success), + TP_PROTO(struct task_struct *p, int success), - TP_ARGS(rq, p, success), + TP_ARGS(__perf_task(p), success), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) __field( pid_t, pid ) __field( int, prio ) __field( int, success ) - __field( int, cpu ) + __field( int, target_cpu ) ), TP_fast_assign( @@ -102,59 +72,51 @@ TRACE_EVENT(sched_wakeup, __entry->pid = p->pid; __entry->prio = p->prio; __entry->success = success; - __entry->cpu = task_cpu(p); + __entry->target_cpu = task_cpu(p); ), - TP_printk("task %s:%d [%d] success=%d [%03d]", + TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d", __entry->comm, __entry->pid, __entry->prio, - __entry->success, __entry->cpu) + __entry->success, __entry->target_cpu) ); +DEFINE_EVENT(sched_wakeup_template, sched_wakeup, + TP_PROTO(struct task_struct *p, int success), + TP_ARGS(p, success)); + /* * Tracepoint for waking up a new task: - * - * (NOTE: the 'rq' argument is not used by generic trace events, - * but used by the latency tracer plugin. ) */ -TRACE_EVENT(sched_wakeup_new, - - TP_PROTO(struct rq *rq, struct task_struct *p, int success), - - TP_ARGS(rq, p, success), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - __field( int, success ) - __field( int, cpu ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - __entry->success = success; - __entry->cpu = task_cpu(p); - ), - - TP_printk("task %s:%d [%d] success=%d [%03d]", - __entry->comm, __entry->pid, __entry->prio, - __entry->success, __entry->cpu) -); +DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new, + TP_PROTO(struct task_struct *p, int success), + TP_ARGS(p, success)); + +#ifdef CREATE_TRACE_POINTS +static inline long __trace_sched_switch_state(struct task_struct *p) +{ + long state = p->state; + +#ifdef CONFIG_PREEMPT + /* + * For all intents and purposes a preempted task is a running task. + */ + if (task_preempt_count(p) & PREEMPT_ACTIVE) + state = TASK_RUNNING | TASK_STATE_MAX; +#endif + + return state; +} +#endif /* * Tracepoint for task switches, performed by the scheduler: - * - * (NOTE: the 'rq' argument is not used by generic trace events, - * but used by the latency tracer plugin. ) */ TRACE_EVENT(sched_switch, - TP_PROTO(struct rq *rq, struct task_struct *prev, + TP_PROTO(struct task_struct *prev, struct task_struct *next), - TP_ARGS(rq, prev, next), + TP_ARGS(prev, next), TP_STRUCT__entry( __array( char, prev_comm, TASK_COMM_LEN ) @@ -170,19 +132,20 @@ TRACE_EVENT(sched_switch, memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); __entry->prev_pid = prev->pid; __entry->prev_prio = prev->prio; - __entry->prev_state = prev->state; + __entry->prev_state = __trace_sched_switch_state(prev); memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); __entry->next_pid = next->pid; __entry->next_prio = next->prio; ), - TP_printk("task %s:%d [%d] (%s) ==> %s:%d [%d]", + TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d", __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, - __entry->prev_state ? - __print_flags(__entry->prev_state, "|", + __entry->prev_state & (TASK_STATE_MAX-1) ? + __print_flags(__entry->prev_state & (TASK_STATE_MAX-1), "|", { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" }, { 16, "Z" }, { 32, "X" }, { 64, "x" }, - { 128, "W" }) : "R", + { 128, "K" }, { 256, "W" }, { 512, "P" }) : "R", + __entry->prev_state & TASK_STATE_MAX ? "+" : "", __entry->next_comm, __entry->next_pid, __entry->next_prio) ); @@ -211,15 +174,12 @@ TRACE_EVENT(sched_migrate_task, __entry->dest_cpu = dest_cpu; ), - TP_printk("task %s:%d [%d] from: %d to: %d", + TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d", __entry->comm, __entry->pid, __entry->prio, __entry->orig_cpu, __entry->dest_cpu) ); -/* - * Tracepoint for freeing a task: - */ -TRACE_EVENT(sched_process_free, +DECLARE_EVENT_CLASS(sched_process_template, TP_PROTO(struct task_struct *p), @@ -237,34 +197,31 @@ TRACE_EVENT(sched_process_free, __entry->prio = p->prio; ), - TP_printk("task %s:%d [%d]", + TP_printk("comm=%s pid=%d prio=%d", __entry->comm, __entry->pid, __entry->prio) ); /* + * Tracepoint for freeing a task: + */ +DEFINE_EVENT(sched_process_template, sched_process_free, + TP_PROTO(struct task_struct *p), + TP_ARGS(p)); + + +/* * Tracepoint for a task exiting: */ -TRACE_EVENT(sched_process_exit, +DEFINE_EVENT(sched_process_template, sched_process_exit, + TP_PROTO(struct task_struct *p), + TP_ARGS(p)); +/* + * Tracepoint for waiting on task to unschedule: + */ +DEFINE_EVENT(sched_process_template, sched_wait_task, TP_PROTO(struct task_struct *p), - - TP_ARGS(p), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - ), - - TP_printk("task %s:%d [%d]", - __entry->comm, __entry->pid, __entry->prio) -); + TP_ARGS(p)); /* * Tracepoint for a waiting task: @@ -287,7 +244,7 @@ TRACE_EVENT(sched_process_wait, __entry->prio = current->prio; ), - TP_printk("task %s:%d [%d]", + TP_printk("comm=%s pid=%d prio=%d", __entry->comm, __entry->pid, __entry->prio) ); @@ -314,50 +271,46 @@ TRACE_EVENT(sched_process_fork, __entry->child_pid = child->pid; ), - TP_printk("parent %s:%d child %s:%d", + TP_printk("comm=%s pid=%d child_comm=%s child_pid=%d", __entry->parent_comm, __entry->parent_pid, __entry->child_comm, __entry->child_pid) ); /* - * Tracepoint for sending a signal: + * Tracepoint for exec: */ -TRACE_EVENT(sched_signal_send, +TRACE_EVENT(sched_process_exec, - TP_PROTO(int sig, struct task_struct *p), + TP_PROTO(struct task_struct *p, pid_t old_pid, + struct linux_binprm *bprm), - TP_ARGS(sig, p), + TP_ARGS(p, old_pid, bprm), TP_STRUCT__entry( - __field( int, sig ) - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) + __string( filename, bprm->filename ) + __field( pid_t, pid ) + __field( pid_t, old_pid ) ), TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->sig = sig; + __assign_str(filename, bprm->filename); + __entry->pid = p->pid; + __entry->old_pid = old_pid; ), - TP_printk("sig: %d task %s:%d", - __entry->sig, __entry->comm, __entry->pid) + TP_printk("filename=%s pid=%d old_pid=%d", __get_str(filename), + __entry->pid, __entry->old_pid) ); /* * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE * adding sched_stat support to SCHED_FIFO/RR would be welcome. */ - -/* - * Tracepoint for accounting wait time (time the task is runnable - * but not actually running due to scheduler contention). - */ -TRACE_EVENT(sched_stat_wait, +DECLARE_EVENT_CLASS(sched_stat_template, TP_PROTO(struct task_struct *tsk, u64 delay), - TP_ARGS(tsk, delay), + TP_ARGS(__perf_task(tsk), __perf_count(delay)), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) @@ -369,25 +322,54 @@ TRACE_EVENT(sched_stat_wait, memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); __entry->pid = tsk->pid; __entry->delay = delay; - ) - TP_perf_assign( - __perf_count(delay); ), - TP_printk("task: %s:%d wait: %Lu [ns]", + TP_printk("comm=%s pid=%d delay=%Lu [ns]", __entry->comm, __entry->pid, (unsigned long long)__entry->delay) ); + +/* + * Tracepoint for accounting wait time (time the task is runnable + * but not actually running due to scheduler contention). + */ +DEFINE_EVENT(sched_stat_template, sched_stat_wait, + TP_PROTO(struct task_struct *tsk, u64 delay), + TP_ARGS(tsk, delay)); + +/* + * Tracepoint for accounting sleep time (time the task is not runnable, + * including iowait, see below). + */ +DEFINE_EVENT(sched_stat_template, sched_stat_sleep, + TP_PROTO(struct task_struct *tsk, u64 delay), + TP_ARGS(tsk, delay)); + +/* + * Tracepoint for accounting iowait time (time the task is not runnable + * due to waiting on IO to complete). + */ +DEFINE_EVENT(sched_stat_template, sched_stat_iowait, + TP_PROTO(struct task_struct *tsk, u64 delay), + TP_ARGS(tsk, delay)); + +/* + * Tracepoint for accounting blocked time (time the task is in uninterruptible). + */ +DEFINE_EVENT(sched_stat_template, sched_stat_blocked, + TP_PROTO(struct task_struct *tsk, u64 delay), + TP_ARGS(tsk, delay)); + /* * Tracepoint for accounting runtime (time the task is executing * on a CPU). */ -TRACE_EVENT(sched_stat_runtime, +DECLARE_EVENT_CLASS(sched_stat_runtime, TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime), - TP_ARGS(tsk, runtime, vruntime), + TP_ARGS(tsk, __perf_count(runtime), vruntime), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) @@ -401,77 +383,173 @@ TRACE_EVENT(sched_stat_runtime, __entry->pid = tsk->pid; __entry->runtime = runtime; __entry->vruntime = vruntime; - ) - TP_perf_assign( - __perf_count(runtime); ), - TP_printk("task: %s:%d runtime: %Lu [ns], vruntime: %Lu [ns]", + TP_printk("comm=%s pid=%d runtime=%Lu [ns] vruntime=%Lu [ns]", __entry->comm, __entry->pid, (unsigned long long)__entry->runtime, (unsigned long long)__entry->vruntime) ); +DEFINE_EVENT(sched_stat_runtime, sched_stat_runtime, + TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime), + TP_ARGS(tsk, runtime, vruntime)); + /* - * Tracepoint for accounting sleep time (time the task is not runnable, - * including iowait, see below). + * Tracepoint for showing priority inheritance modifying a tasks + * priority. */ -TRACE_EVENT(sched_stat_sleep, +TRACE_EVENT(sched_pi_setprio, - TP_PROTO(struct task_struct *tsk, u64 delay), + TP_PROTO(struct task_struct *tsk, int newprio), - TP_ARGS(tsk, delay), + TP_ARGS(tsk, newprio), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) __field( pid_t, pid ) - __field( u64, delay ) + __field( int, oldprio ) + __field( int, newprio ) ), TP_fast_assign( memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); - __entry->pid = tsk->pid; - __entry->delay = delay; - ) - TP_perf_assign( - __perf_count(delay); + __entry->pid = tsk->pid; + __entry->oldprio = tsk->prio; + __entry->newprio = newprio; ), - TP_printk("task: %s:%d sleep: %Lu [ns]", + TP_printk("comm=%s pid=%d oldprio=%d newprio=%d", __entry->comm, __entry->pid, - (unsigned long long)__entry->delay) + __entry->oldprio, __entry->newprio) ); -/* - * Tracepoint for accounting iowait time (time the task is not runnable - * due to waiting on IO to complete). - */ -TRACE_EVENT(sched_stat_iowait, - - TP_PROTO(struct task_struct *tsk, u64 delay), - - TP_ARGS(tsk, delay), +#ifdef CONFIG_DETECT_HUNG_TASK +TRACE_EVENT(sched_process_hang, + TP_PROTO(struct task_struct *tsk), + TP_ARGS(tsk), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) __field( pid_t, pid ) - __field( u64, delay ) ), TP_fast_assign( memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); - __entry->pid = tsk->pid; - __entry->delay = delay; - ) - TP_perf_assign( - __perf_count(delay); + __entry->pid = tsk->pid; ), - TP_printk("task: %s:%d iowait: %Lu [ns]", - __entry->comm, __entry->pid, - (unsigned long long)__entry->delay) + TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid) ); +#endif /* CONFIG_DETECT_HUNG_TASK */ + +DECLARE_EVENT_CLASS(sched_move_task_template, + TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu), + + TP_ARGS(tsk, src_cpu, dst_cpu), + + TP_STRUCT__entry( + __field( pid_t, pid ) + __field( pid_t, tgid ) + __field( pid_t, ngid ) + __field( int, src_cpu ) + __field( int, src_nid ) + __field( int, dst_cpu ) + __field( int, dst_nid ) + ), + + TP_fast_assign( + __entry->pid = task_pid_nr(tsk); + __entry->tgid = task_tgid_nr(tsk); + __entry->ngid = task_numa_group_id(tsk); + __entry->src_cpu = src_cpu; + __entry->src_nid = cpu_to_node(src_cpu); + __entry->dst_cpu = dst_cpu; + __entry->dst_nid = cpu_to_node(dst_cpu); + ), + + TP_printk("pid=%d tgid=%d ngid=%d src_cpu=%d src_nid=%d dst_cpu=%d dst_nid=%d", + __entry->pid, __entry->tgid, __entry->ngid, + __entry->src_cpu, __entry->src_nid, + __entry->dst_cpu, __entry->dst_nid) +); + +/* + * Tracks migration of tasks from one runqueue to another. Can be used to + * detect if automatic NUMA balancing is bouncing between nodes + */ +DEFINE_EVENT(sched_move_task_template, sched_move_numa, + TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu), + + TP_ARGS(tsk, src_cpu, dst_cpu) +); + +DEFINE_EVENT(sched_move_task_template, sched_stick_numa, + TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu), + + TP_ARGS(tsk, src_cpu, dst_cpu) +); + +TRACE_EVENT(sched_swap_numa, + + TP_PROTO(struct task_struct *src_tsk, int src_cpu, + struct task_struct *dst_tsk, int dst_cpu), + + TP_ARGS(src_tsk, src_cpu, dst_tsk, dst_cpu), + + TP_STRUCT__entry( + __field( pid_t, src_pid ) + __field( pid_t, src_tgid ) + __field( pid_t, src_ngid ) + __field( int, src_cpu ) + __field( int, src_nid ) + __field( pid_t, dst_pid ) + __field( pid_t, dst_tgid ) + __field( pid_t, dst_ngid ) + __field( int, dst_cpu ) + __field( int, dst_nid ) + ), + + TP_fast_assign( + __entry->src_pid = task_pid_nr(src_tsk); + __entry->src_tgid = task_tgid_nr(src_tsk); + __entry->src_ngid = task_numa_group_id(src_tsk); + __entry->src_cpu = src_cpu; + __entry->src_nid = cpu_to_node(src_cpu); + __entry->dst_pid = task_pid_nr(dst_tsk); + __entry->dst_tgid = task_tgid_nr(dst_tsk); + __entry->dst_ngid = task_numa_group_id(dst_tsk); + __entry->dst_cpu = dst_cpu; + __entry->dst_nid = cpu_to_node(dst_cpu); + ), + + TP_printk("src_pid=%d src_tgid=%d src_ngid=%d src_cpu=%d src_nid=%d dst_pid=%d dst_tgid=%d dst_ngid=%d dst_cpu=%d dst_nid=%d", + __entry->src_pid, __entry->src_tgid, __entry->src_ngid, + __entry->src_cpu, __entry->src_nid, + __entry->dst_pid, __entry->dst_tgid, __entry->dst_ngid, + __entry->dst_cpu, __entry->dst_nid) +); + +/* + * Tracepoint for waking a polling cpu without an IPI. + */ +TRACE_EVENT(sched_wake_idle_without_ipi, + + TP_PROTO(int cpu), + + TP_ARGS(cpu), + + TP_STRUCT__entry( + __field( int, cpu ) + ), + + TP_fast_assign( + __entry->cpu = cpu; + ), + + TP_printk("cpu=%d", __entry->cpu) +); #endif /* _TRACE_SCHED_H */ /* This part must be outside protection */ |
