From 7e066fb870fcd1025ec3ba7bbde5d541094f4ce1 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 14 Nov 2008 17:47:47 -0500 Subject: tracepoints: add DECLARE_TRACE() and DEFINE_TRACE() Impact: API *CHANGE*. Must update all tracepoint users. Add DEFINE_TRACE() to tracepoints to let them declare the tracepoint structure in a single spot for all the kernel. It helps reducing memory consumption, especially when declaring a lot of tracepoints, e.g. for kmalloc tracing. *API CHANGE WARNING*: now, DECLARE_TRACE() must be used in headers for tracepoint declarations rather than DEFINE_TRACE(). This is the sane way to do it. The name previously used was misleading. Updates scheduler instrumentation to follow this API change. Signed-off-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar --- kernel/exit.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index ae2b92be5fa..f995d241866 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -54,6 +54,10 @@ #include #include +DEFINE_TRACE(sched_process_free); +DEFINE_TRACE(sched_process_exit); +DEFINE_TRACE(sched_process_wait); + static void exit_mm(struct task_struct * tsk); static inline int task_detached(struct task_struct *p) -- cgit v1.2.3-70-g09d2 From 2b5fe6de58276d0b5a7c884d5dbfc300ca47db78 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 17 Nov 2008 15:40:08 +0100 Subject: thread_group_cputime: move a couple of callsites outside of ->siglock Impact: relax the locking of cpu-time accounting calls ->siglock buys nothing for thread_group_cputime() in do_sys_times() and wait_task_zombie() (which btw takes the unrelated parent's ->siglock). Actually I think do_sys_times() doesn't need ->siglock at all. Signed-off-by: Oleg Nesterov Signed-off-by: Ingo Molnar --- kernel/exit.c | 2 +- kernel/sys.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index ae2b92be5fa..b9c4d8bb72e 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1330,10 +1330,10 @@ static int wait_task_zombie(struct task_struct *p, int options, * group, which consolidates times for all threads in the * group including the group leader. */ + thread_group_cputime(p, &cputime); spin_lock_irq(&p->parent->sighand->siglock); psig = p->parent->signal; sig = p->signal; - thread_group_cputime(p, &cputime); psig->cutime = cputime_add(psig->cutime, cputime_add(cputime.utime, diff --git a/kernel/sys.c b/kernel/sys.c index 31deba8f7d1..5fc3a0cfb99 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -858,8 +858,8 @@ void do_sys_times(struct tms *tms) struct task_cputime cputime; cputime_t cutime, cstime; - spin_lock_irq(¤t->sighand->siglock); thread_group_cputime(current, &cputime); + spin_lock_irq(¤t->sighand->siglock); cutime = current->signal->cutime; cstime = current->signal->cstime; spin_unlock_irq(¤t->sighand->siglock); -- cgit v1.2.3-70-g09d2 From f201ae2356c74bcae130b2177b3dca903ea98071 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 23 Nov 2008 06:22:56 +0100 Subject: tracing/function-return-tracer: store return stack into task_struct and allocate it dynamically Impact: use deeper function tracing depth safely Some tests showed that function return tracing needed a more deeper depth of function calls. But it could be unsafe to store these return addresses to the stack. So these arrays will now be allocated dynamically into task_struct of current only when the tracer is activated. Typical scheme when tracer is activated: - allocate a return stack for each task in global list. - fork: allocate the return stack for the newly created task - exit: free return stack of current - idle init: same as fork I chose a default depth of 50. I don't have overruns anymore. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- arch/x86/include/asm/ftrace.h | 1 - arch/x86/include/asm/thread_info.h | 29 ------------ arch/x86/kernel/ftrace.c | 29 ++++++------ include/linux/ftrace.h | 5 ++ include/linux/sched.h | 23 +++++---- kernel/exit.c | 5 +- kernel/fork.c | 4 ++ kernel/sched.c | 3 ++ kernel/trace/ftrace.c | 96 +++++++++++++++++++++++++++++++++++++- 9 files changed, 137 insertions(+), 58 deletions(-) (limited to 'kernel/exit.c') diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 2bb43b433e0..754a3e082f9 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -29,7 +29,6 @@ struct dyn_arch_ftrace { #endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_FUNCTION_RET_TRACER -#define FTRACE_RET_STACK_SIZE 20 #ifndef __ASSEMBLY__ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index e90e81ef6ab..0921b4018c1 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -40,36 +40,8 @@ struct thread_info { */ __u8 supervisor_stack[0]; #endif - -#ifdef CONFIG_FUNCTION_RET_TRACER - /* Index of current stored adress in ret_stack */ - int curr_ret_stack; - /* Stack of return addresses for return function tracing */ - struct ftrace_ret_stack ret_stack[FTRACE_RET_STACK_SIZE]; - /* - * Number of functions that haven't been traced - * because of depth overrun. - */ - atomic_t trace_overrun; -#endif }; -#ifdef CONFIG_FUNCTION_RET_TRACER -#define INIT_THREAD_INFO(tsk) \ -{ \ - .task = &tsk, \ - .exec_domain = &default_exec_domain, \ - .flags = 0, \ - .cpu = 0, \ - .preempt_count = 1, \ - .addr_limit = KERNEL_DS, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ - .curr_ret_stack = -1,\ - .trace_overrun = ATOMIC_INIT(0) \ -} -#else #define INIT_THREAD_INFO(tsk) \ { \ .task = &tsk, \ @@ -82,7 +54,6 @@ struct thread_info { .fn = do_no_restart_syscall, \ }, \ } -#endif #define init_thread_info (init_thread_union.thread_info) #define init_stack (init_thread_union.stack) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 356bb1eb6e9..bb137f7297e 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -350,19 +350,21 @@ static int push_return_trace(unsigned long ret, unsigned long long time, unsigned long func) { int index; - struct thread_info *ti = current_thread_info(); + + if (!current->ret_stack) + return -EBUSY; /* The return trace stack is full */ - if (ti->curr_ret_stack == FTRACE_RET_STACK_SIZE - 1) { - atomic_inc(&ti->trace_overrun); + if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) { + atomic_inc(¤t->trace_overrun); return -EBUSY; } - index = ++ti->curr_ret_stack; + index = ++current->curr_ret_stack; barrier(); - ti->ret_stack[index].ret = ret; - ti->ret_stack[index].func = func; - ti->ret_stack[index].calltime = time; + current->ret_stack[index].ret = ret; + current->ret_stack[index].func = func; + current->ret_stack[index].calltime = time; return 0; } @@ -373,13 +375,12 @@ static void pop_return_trace(unsigned long *ret, unsigned long long *time, { int index; - struct thread_info *ti = current_thread_info(); - index = ti->curr_ret_stack; - *ret = ti->ret_stack[index].ret; - *func = ti->ret_stack[index].func; - *time = ti->ret_stack[index].calltime; - *overrun = atomic_read(&ti->trace_overrun); - ti->curr_ret_stack--; + index = current->curr_ret_stack; + *ret = current->ret_stack[index].ret; + *func = current->ret_stack[index].func; + *time = current->ret_stack[index].calltime; + *overrun = atomic_read(¤t->trace_overrun); + current->curr_ret_stack--; } /* diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index f7ba4ea5e12..2ba259b2def 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -323,6 +323,8 @@ struct ftrace_retfunc { }; #ifdef CONFIG_FUNCTION_RET_TRACER +#define FTRACE_RETFUNC_DEPTH 50 +#define FTRACE_RETSTACK_ALLOC_SIZE 32 /* Type of a callback handler of tracing return function */ typedef void (*trace_function_return_t)(struct ftrace_retfunc *); @@ -330,6 +332,9 @@ extern int register_ftrace_return(trace_function_return_t func); /* The current handler in use */ extern trace_function_return_t ftrace_function_return; extern void unregister_ftrace_return(void); + +extern void ftrace_retfunc_init_task(struct task_struct *t); +extern void ftrace_retfunc_exit_task(struct task_struct *t); #endif #endif /* _LINUX_FTRACE_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index c8e0db46420..bee1e93c95a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1352,6 +1352,17 @@ struct task_struct { unsigned long default_timer_slack_ns; struct list_head *scm_work_list; +#ifdef CONFIG_FUNCTION_RET_TRACER + /* Index of current stored adress in ret_stack */ + int curr_ret_stack; + /* Stack of return addresses for return function tracing */ + struct ftrace_ret_stack *ret_stack; + /* + * Number of functions that haven't been traced + * because of depth overrun. + */ + atomic_t trace_overrun; +#endif }; /* @@ -2006,18 +2017,6 @@ static inline void setup_thread_stack(struct task_struct *p, struct task_struct { *task_thread_info(p) = *task_thread_info(org); task_thread_info(p)->task = p; - -#ifdef CONFIG_FUNCTION_RET_TRACER - /* - * When fork() creates a child process, this function is called. - * But the child task may not inherit the return adresses traced - * by the return function tracer because it will directly execute - * in userspace and will not return to kernel functions its parent - * used. - */ - task_thread_info(p)->curr_ret_stack = -1; - atomic_set(&task_thread_info(p)->trace_overrun, 0); -#endif } static inline unsigned long *end_of_stack(struct task_struct *p) diff --git a/kernel/exit.c b/kernel/exit.c index 35c8ec2ba03..b9d446329da 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -1127,7 +1128,9 @@ NORET_TYPE void do_exit(long code) preempt_disable(); /* causes final put_task_struct in finish_task_switch(). */ tsk->state = TASK_DEAD; - +#ifdef CONFIG_FUNCTION_RET_TRACER + ftrace_retfunc_exit_task(tsk); +#endif schedule(); BUG(); /* Avoid "noreturn function does return". */ diff --git a/kernel/fork.c b/kernel/fork.c index ac62f43ee43..d1eb30e69cc 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include #include @@ -1269,6 +1270,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, total_forks++; spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); +#ifdef CONFIG_FUNCTION_RET_TRACER + ftrace_retfunc_init_task(p); +#endif proc_fork_connector(p); cgroup_post_fork(p); return p; diff --git a/kernel/sched.c b/kernel/sched.c index 4de56108c86..fb17205950d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5901,6 +5901,9 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) * The idle tasks have their own, simple scheduling class: */ idle->sched_class = &idle_sched_class; +#ifdef CONFIG_FUNCTION_RET_TRACER + ftrace_retfunc_init_task(idle); +#endif } /* diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index f212da48668..90d99fb02ae 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1498,10 +1498,77 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, #ifdef CONFIG_FUNCTION_RET_TRACER +static atomic_t ftrace_retfunc_active; + /* The callback that hooks the return of a function */ trace_function_return_t ftrace_function_return = (trace_function_return_t)ftrace_stub; + +/* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */ +static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) +{ + int i; + int ret = 0; + unsigned long flags; + int start = 0, end = FTRACE_RETSTACK_ALLOC_SIZE; + struct task_struct *g, *t; + + for (i = 0; i < FTRACE_RETSTACK_ALLOC_SIZE; i++) { + ret_stack_list[i] = kmalloc(FTRACE_RETFUNC_DEPTH + * sizeof(struct ftrace_ret_stack), + GFP_KERNEL); + if (!ret_stack_list[i]) { + start = 0; + end = i; + ret = -ENOMEM; + goto free; + } + } + + read_lock_irqsave(&tasklist_lock, flags); + do_each_thread(g, t) { + if (start == end) { + ret = -EAGAIN; + goto unlock; + } + + if (t->ret_stack == NULL) { + t->ret_stack = ret_stack_list[start++]; + t->curr_ret_stack = -1; + atomic_set(&t->trace_overrun, 0); + } + } while_each_thread(g, t); + +unlock: + read_unlock_irqrestore(&tasklist_lock, flags); +free: + for (i = start; i < end; i++) + kfree(ret_stack_list[i]); + return ret; +} + +/* Allocate a return stack for each task */ +static int start_return_tracing(void) +{ + struct ftrace_ret_stack **ret_stack_list; + int ret; + + ret_stack_list = kmalloc(FTRACE_RETSTACK_ALLOC_SIZE * + sizeof(struct ftrace_ret_stack *), + GFP_KERNEL); + + if (!ret_stack_list) + return -ENOMEM; + + do { + ret = alloc_retstack_tasklist(ret_stack_list); + } while (ret == -EAGAIN); + + kfree(ret_stack_list); + return ret; +} + int register_ftrace_return(trace_function_return_t func) { int ret = 0; @@ -1516,7 +1583,12 @@ int register_ftrace_return(trace_function_return_t func) ret = -EBUSY; goto out; } - + atomic_inc(&ftrace_retfunc_active); + ret = start_return_tracing(); + if (ret) { + atomic_dec(&ftrace_retfunc_active); + goto out; + } ftrace_tracing_type = FTRACE_TYPE_RETURN; ftrace_function_return = func; ftrace_startup(); @@ -1530,6 +1602,7 @@ void unregister_ftrace_return(void) { mutex_lock(&ftrace_sysctl_lock); + atomic_dec(&ftrace_retfunc_active); ftrace_function_return = (trace_function_return_t)ftrace_stub; ftrace_shutdown(); /* Restore normal tracing type */ @@ -1537,6 +1610,27 @@ void unregister_ftrace_return(void) mutex_unlock(&ftrace_sysctl_lock); } + +/* Allocate a return stack for newly created task */ +void ftrace_retfunc_init_task(struct task_struct *t) +{ + if (atomic_read(&ftrace_retfunc_active)) { + t->ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH + * sizeof(struct ftrace_ret_stack), + GFP_KERNEL); + if (!t->ret_stack) + return; + t->curr_ret_stack = -1; + atomic_set(&t->trace_overrun, 0); + } else + t->ret_stack = NULL; +} + +void ftrace_retfunc_exit_task(struct task_struct *t) +{ + kfree(t->ret_stack); + t->ret_stack = NULL; +} #endif -- cgit v1.2.3-70-g09d2 From 82f60f0bc854aada696f27d863c03bef91f1509d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 23 Nov 2008 09:18:56 +0100 Subject: tracing/function-return-tracer: clean up task start/exit callbacks Impact: cleanup Eliminate #ifdefs in core code by using empty inline functions. Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 3 +++ kernel/exit.c | 2 -- kernel/fork.c | 2 -- kernel/sched.c | 2 -- 4 files changed, 3 insertions(+), 6 deletions(-) (limited to 'kernel/exit.c') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 2ba259b2def..938ca194264 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -335,6 +335,9 @@ extern void unregister_ftrace_return(void); extern void ftrace_retfunc_init_task(struct task_struct *t); extern void ftrace_retfunc_exit_task(struct task_struct *t); +#else +static inline void ftrace_retfunc_init_task(struct task_struct *t) { } +static inline void ftrace_retfunc_exit_task(struct task_struct *t) { } #endif #endif /* _LINUX_FTRACE_H */ diff --git a/kernel/exit.c b/kernel/exit.c index b9d446329da..ef04d03b328 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1128,9 +1128,7 @@ NORET_TYPE void do_exit(long code) preempt_disable(); /* causes final put_task_struct in finish_task_switch(). */ tsk->state = TASK_DEAD; -#ifdef CONFIG_FUNCTION_RET_TRACER ftrace_retfunc_exit_task(tsk); -#endif schedule(); BUG(); /* Avoid "noreturn function does return". */ diff --git a/kernel/fork.c b/kernel/fork.c index d1eb30e69cc..fbf4a4c0a62 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1270,9 +1270,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, total_forks++; spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); -#ifdef CONFIG_FUNCTION_RET_TRACER ftrace_retfunc_init_task(p); -#endif proc_fork_connector(p); cgroup_post_fork(p); return p; diff --git a/kernel/sched.c b/kernel/sched.c index fb17205950d..388d9db044a 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5901,9 +5901,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) * The idle tasks have their own, simple scheduling class: */ idle->sched_class = &idle_sched_class; -#ifdef CONFIG_FUNCTION_RET_TRACER ftrace_retfunc_init_task(idle); -#endif } /* -- cgit v1.2.3-70-g09d2 From 65afa5e603d507014580ead016ec887b49e1afa6 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 23 Nov 2008 18:43:39 +0100 Subject: tracing/function-return-tracer: free the return stack on free_task() Impact: avoid losing some traces when a task is freed do_exit() is not the last function called when a task finishes. There are still some functions which are to be called such as ree_task(). So we delay the freeing of the return stack to the last moment. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- kernel/exit.c | 2 -- kernel/fork.c | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index ef04d03b328..e5ae36ebe8a 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -47,7 +47,6 @@ #include #include #include -#include #include #include @@ -1128,7 +1127,6 @@ NORET_TYPE void do_exit(long code) preempt_disable(); /* causes final put_task_struct in finish_task_switch(). */ tsk->state = TASK_DEAD; - ftrace_retfunc_exit_task(tsk); schedule(); BUG(); /* Avoid "noreturn function does return". */ diff --git a/kernel/fork.c b/kernel/fork.c index fbf4a4c0a62..d6e1a3205f6 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -140,6 +140,7 @@ void free_task(struct task_struct *tsk) prop_local_destroy_single(&tsk->dirties); free_thread_info(tsk->stack); rt_mutex_debug_task_free(tsk); + ftrace_retfunc_exit_task(tsk); free_task_struct(tsk); } EXPORT_SYMBOL(free_task); -- cgit v1.2.3-70-g09d2