From c47956d9ae3341d2d1998bff26620fa3338c01e4 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 23 Dec 2008 23:24:11 -0500 Subject: ftrace: remove obsolete print continue functionality Impact: cleanup, remove obsolete code Now that the ring buffer used by ftrace allows for variable length entries, we do not need the 'cont' feature of the buffer. This code makes other parts of ftrace more complex and by removing this it simplifies the ftrace code. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 61 ---------------------------------------------------- 1 file changed, 61 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f4bb3800318..fca0233f1d7 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1765,43 +1765,6 @@ static int task_state_char(unsigned long state) return bit < sizeof(state_to_char) - 1 ? state_to_char[bit] : '?'; } -/* - * The message is supposed to contain an ending newline. - * If the printing stops prematurely, try to add a newline of our own. - */ -void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter) -{ - struct trace_entry *ent; - struct trace_field_cont *cont; - bool ok = true; - - ent = peek_next_entry(iter, iter->cpu, NULL); - if (!ent || ent->type != TRACE_CONT) { - trace_seq_putc(s, '\n'); - return; - } - - do { - cont = (struct trace_field_cont *)ent; - if (ok) - ok = (trace_seq_printf(s, "%s", cont->buf) > 0); - - ftrace_disable_cpu(); - - if (iter->buffer_iter[iter->cpu]) - ring_buffer_read(iter->buffer_iter[iter->cpu], NULL); - else - ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL); - - ftrace_enable_cpu(); - - ent = peek_next_entry(iter, iter->cpu, NULL); - } while (ent && ent->type == TRACE_CONT); - - if (!ok) - trace_seq_putc(s, '\n'); -} - static void test_cpu_buff_start(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; @@ -1834,9 +1797,6 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) int S, T; int i; - if (entry->type == TRACE_CONT) - return TRACE_TYPE_HANDLED; - test_cpu_buff_start(iter); next_entry = find_next_entry(iter, NULL, &next_ts); @@ -1922,8 +1882,6 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) seq_print_ip_sym(s, field->ip, sym_flags); trace_seq_printf(s, ": %s", field->buf); - if (entry->flags & TRACE_FLAG_CONT) - trace_seq_print_cont(s, iter); break; } case TRACE_BRANCH: { @@ -1968,9 +1926,6 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) entry = iter->ent; - if (entry->type == TRACE_CONT) - return TRACE_TYPE_HANDLED; - test_cpu_buff_start(iter); comm = trace_find_cmdline(iter->ent->pid); @@ -2076,8 +2031,6 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) seq_print_ip_sym(s, field->ip, sym_flags); trace_seq_printf(s, ": %s", field->buf); - if (entry->flags & TRACE_FLAG_CONT) - trace_seq_print_cont(s, iter); break; } case TRACE_GRAPH_RET: { @@ -2124,9 +2077,6 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter) entry = iter->ent; - if (entry->type == TRACE_CONT) - return TRACE_TYPE_HANDLED; - ret = trace_seq_printf(s, "%d %d %llu ", entry->pid, iter->cpu, iter->ts); if (!ret) @@ -2187,8 +2137,6 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter) trace_assign_type(field, entry); trace_seq_printf(s, "# %lx %s", field->ip, field->buf); - if (entry->flags & TRACE_FLAG_CONT) - trace_seq_print_cont(s, iter); break; } } @@ -2217,9 +2165,6 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter) entry = iter->ent; - if (entry->type == TRACE_CONT) - return TRACE_TYPE_HANDLED; - SEQ_PUT_HEX_FIELD_RET(s, entry->pid); SEQ_PUT_HEX_FIELD_RET(s, iter->cpu); SEQ_PUT_HEX_FIELD_RET(s, iter->ts); @@ -2283,9 +2228,6 @@ static enum print_line_t print_printk_msg_only(struct trace_iterator *iter) if (!ret) return TRACE_TYPE_PARTIAL_LINE; - if (entry->flags & TRACE_FLAG_CONT) - trace_seq_print_cont(s, iter); - return TRACE_TYPE_HANDLED; } @@ -2296,9 +2238,6 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter) entry = iter->ent; - if (entry->type == TRACE_CONT) - return TRACE_TYPE_HANDLED; - SEQ_PUT_FIELD_RET(s, entry->pid); SEQ_PUT_FIELD_RET(s, entry->cpu); SEQ_PUT_FIELD_RET(s, iter->ts); -- cgit v1.2.3-18-g5258 From f0868d1e23a8efec33beb3aa688aab7fdb1ae093 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 23 Dec 2008 23:24:12 -0500 Subject: ftrace: set up trace event hash infrastructure Impact: simplify/generalize/refactor trace.c The trace.c file is becoming more difficult to maintain due to the growing number of events. There is several formats that an event may be printed. This patch sets up the infrastructure of an event hash to allow for events to register how they should be printed. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 275 +-------------------------------------------------- 1 file changed, 1 insertion(+), 274 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index fca0233f1d7..90ce0c1d437 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -38,6 +38,7 @@ #include #include "trace.h" +#include "trace_output.h" #define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE) @@ -330,132 +331,6 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) tracing_record_cmdline(current); } -/** - * trace_seq_printf - sequence printing of trace information - * @s: trace sequence descriptor - * @fmt: printf format string - * - * The tracer may use either sequence operations or its own - * copy to user routines. To simplify formating of a trace - * trace_seq_printf is used to store strings into a special - * buffer (@s). Then the output may be either used by - * the sequencer or pulled into another buffer. - */ -int -trace_seq_printf(struct trace_seq *s, const char *fmt, ...) -{ - int len = (PAGE_SIZE - 1) - s->len; - va_list ap; - int ret; - - if (!len) - return 0; - - va_start(ap, fmt); - ret = vsnprintf(s->buffer + s->len, len, fmt, ap); - va_end(ap); - - /* If we can't write it all, don't bother writing anything */ - if (ret >= len) - return 0; - - s->len += ret; - - return len; -} - -/** - * trace_seq_puts - trace sequence printing of simple string - * @s: trace sequence descriptor - * @str: simple string to record - * - * The tracer may use either the sequence operations or its own - * copy to user routines. This function records a simple string - * into a special buffer (@s) for later retrieval by a sequencer - * or other mechanism. - */ -static int -trace_seq_puts(struct trace_seq *s, const char *str) -{ - int len = strlen(str); - - if (len > ((PAGE_SIZE - 1) - s->len)) - return 0; - - memcpy(s->buffer + s->len, str, len); - s->len += len; - - return len; -} - -static int -trace_seq_putc(struct trace_seq *s, unsigned char c) -{ - if (s->len >= (PAGE_SIZE - 1)) - return 0; - - s->buffer[s->len++] = c; - - return 1; -} - -static int -trace_seq_putmem(struct trace_seq *s, void *mem, size_t len) -{ - if (len > ((PAGE_SIZE - 1) - s->len)) - return 0; - - memcpy(s->buffer + s->len, mem, len); - s->len += len; - - return len; -} - -#define MAX_MEMHEX_BYTES 8 -#define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1) - -static int -trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len) -{ - unsigned char hex[HEX_CHARS]; - unsigned char *data = mem; - int i, j; - -#ifdef __BIG_ENDIAN - for (i = 0, j = 0; i < len; i++) { -#else - for (i = len-1, j = 0; i >= 0; i--) { -#endif - hex[j++] = hex_asc_hi(data[i]); - hex[j++] = hex_asc_lo(data[i]); - } - hex[j++] = ' '; - - return trace_seq_putmem(s, hex, j); -} - -static int -trace_seq_path(struct trace_seq *s, struct path *path) -{ - unsigned char *p; - - if (s->len >= (PAGE_SIZE - 1)) - return 0; - p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len); - if (!IS_ERR(p)) { - p = mangle_path(s->buffer + s->len, p, "\n"); - if (p) { - s->len = p - s->buffer; - return 1; - } - } else { - s->buffer[s->len++] = '?'; - return 1; - } - - return 0; -} - static void trace_seq_reset(struct trace_seq *s) { @@ -1473,154 +1348,6 @@ static void s_stop(struct seq_file *m, void *p) mutex_unlock(&trace_types_lock); } -#ifdef CONFIG_KRETPROBES -static inline const char *kretprobed(const char *name) -{ - static const char tramp_name[] = "kretprobe_trampoline"; - int size = sizeof(tramp_name); - - if (strncmp(tramp_name, name, size) == 0) - return "[unknown/kretprobe'd]"; - return name; -} -#else -static inline const char *kretprobed(const char *name) -{ - return name; -} -#endif /* CONFIG_KRETPROBES */ - -static int -seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address) -{ -#ifdef CONFIG_KALLSYMS - char str[KSYM_SYMBOL_LEN]; - const char *name; - - kallsyms_lookup(address, NULL, NULL, NULL, str); - - name = kretprobed(str); - - return trace_seq_printf(s, fmt, name); -#endif - return 1; -} - -static int -seq_print_sym_offset(struct trace_seq *s, const char *fmt, - unsigned long address) -{ -#ifdef CONFIG_KALLSYMS - char str[KSYM_SYMBOL_LEN]; - const char *name; - - sprint_symbol(str, address); - name = kretprobed(str); - - return trace_seq_printf(s, fmt, name); -#endif - return 1; -} - -#ifndef CONFIG_64BIT -# define IP_FMT "%08lx" -#else -# define IP_FMT "%016lx" -#endif - -int -seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags) -{ - int ret; - - if (!ip) - return trace_seq_printf(s, "0"); - - if (sym_flags & TRACE_ITER_SYM_OFFSET) - ret = seq_print_sym_offset(s, "%s", ip); - else - ret = seq_print_sym_short(s, "%s", ip); - - if (!ret) - return 0; - - if (sym_flags & TRACE_ITER_SYM_ADDR) - ret = trace_seq_printf(s, " <" IP_FMT ">", ip); - return ret; -} - -static inline int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm, - unsigned long ip, unsigned long sym_flags) -{ - struct file *file = NULL; - unsigned long vmstart = 0; - int ret = 1; - - if (mm) { - const struct vm_area_struct *vma; - - down_read(&mm->mmap_sem); - vma = find_vma(mm, ip); - if (vma) { - file = vma->vm_file; - vmstart = vma->vm_start; - } - if (file) { - ret = trace_seq_path(s, &file->f_path); - if (ret) - ret = trace_seq_printf(s, "[+0x%lx]", ip - vmstart); - } - up_read(&mm->mmap_sem); - } - if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file)) - ret = trace_seq_printf(s, " <" IP_FMT ">", ip); - return ret; -} - -static int -seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s, - unsigned long sym_flags) -{ - struct mm_struct *mm = NULL; - int ret = 1; - unsigned int i; - - if (trace_flags & TRACE_ITER_SYM_USEROBJ) { - struct task_struct *task; - /* - * we do the lookup on the thread group leader, - * since individual threads might have already quit! - */ - rcu_read_lock(); - task = find_task_by_vpid(entry->ent.tgid); - if (task) - mm = get_task_mm(task); - rcu_read_unlock(); - } - - for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { - unsigned long ip = entry->caller[i]; - - if (ip == ULONG_MAX || !ret) - break; - if (i && ret) - ret = trace_seq_puts(s, " <- "); - if (!ip) { - if (ret) - ret = trace_seq_puts(s, "??"); - continue; - } - if (!ret) - break; - if (ret) - ret = seq_print_user_ip(s, mm, ip, sym_flags); - } - - if (mm) - mmput(mm); - return ret; -} - static void print_lat_help_header(struct seq_file *m) { seq_puts(m, "# _------=> CPU# \n"); -- cgit v1.2.3-18-g5258 From f633cef0200bbaec539e2dbb0bc4bed7f022f98b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 23 Dec 2008 23:24:13 -0500 Subject: ftrace: change trace.c to use registered events Impact: rework trace.c to use new event register API Almost every ftrace event has to implement its output display in trace.c through a different function. Some events did not handle all the formats (trace, latency-trace, raw, hex, binary), and this method does not scale well. This patch converts the format functions to use the event API to find the event and and print its format. Currently, we have a print function for trace, latency_trace, raw, hex and binary. A trace_nop_print is available if the event wants to avoid output on a particular format. Perhaps other tracers could use this in the future (like mmiotrace and function_graph). Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 402 +++++---------------------------------------------- 1 file changed, 38 insertions(+), 364 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 90ce0c1d437..3f0317586cf 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1483,15 +1483,6 @@ lat_print_timestamp(struct trace_seq *s, u64 abs_usecs, trace_seq_puts(s, " : "); } -static const char state_to_char[] = TASK_STATE_TO_CHAR_STR; - -static int task_state_char(unsigned long state) -{ - int bit = state ? __ffs(state) + 1 : 0; - - return bit < sizeof(state_to_char) - 1 ? state_to_char[bit] : '?'; -} - static void test_cpu_buff_start(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; @@ -1515,14 +1506,14 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) struct trace_seq *s = &iter->seq; unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); struct trace_entry *next_entry; + struct trace_event *event; unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE); struct trace_entry *entry = iter->ent; unsigned long abs_usecs; unsigned long rel_usecs; u64 next_ts; char *comm; - int S, T; - int i; + int ret; test_cpu_buff_start(iter); @@ -1547,94 +1538,16 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) lat_print_generic(s, entry, cpu); lat_print_timestamp(s, abs_usecs, rel_usecs); } - switch (entry->type) { - case TRACE_FN: { - struct ftrace_entry *field; - - trace_assign_type(field, entry); - - seq_print_ip_sym(s, field->ip, sym_flags); - trace_seq_puts(s, " ("); - seq_print_ip_sym(s, field->parent_ip, sym_flags); - trace_seq_puts(s, ")\n"); - break; - } - case TRACE_CTX: - case TRACE_WAKE: { - struct ctx_switch_entry *field; - - trace_assign_type(field, entry); - - T = task_state_char(field->next_state); - S = task_state_char(field->prev_state); - comm = trace_find_cmdline(field->next_pid); - trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n", - field->prev_pid, - field->prev_prio, - S, entry->type == TRACE_CTX ? "==>" : " +", - field->next_cpu, - field->next_pid, - field->next_prio, - T, comm); - break; - } - case TRACE_SPECIAL: { - struct special_entry *field; - - trace_assign_type(field, entry); - - trace_seq_printf(s, "# %ld %ld %ld\n", - field->arg1, - field->arg2, - field->arg3); - break; - } - case TRACE_STACK: { - struct stack_entry *field; - - trace_assign_type(field, entry); - - for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { - if (i) - trace_seq_puts(s, " <= "); - seq_print_ip_sym(s, field->caller[i], sym_flags); - } - trace_seq_puts(s, "\n"); - break; - } - case TRACE_PRINT: { - struct print_entry *field; - - trace_assign_type(field, entry); - seq_print_ip_sym(s, field->ip, sym_flags); - trace_seq_printf(s, ": %s", field->buf); - break; - } - case TRACE_BRANCH: { - struct trace_branch *field; - - trace_assign_type(field, entry); - - trace_seq_printf(s, "[%s] %s:%s:%d\n", - field->correct ? " ok " : " MISS ", - field->func, - field->file, - field->line); - break; + event = ftrace_find_event(entry->type); + if (event && event->latency_trace) { + ret = event->latency_trace(s, entry, sym_flags); + if (ret) + return ret; + return TRACE_TYPE_HANDLED; } - case TRACE_USER_STACK: { - struct userstack_entry *field; - trace_assign_type(field, entry); - - seq_print_userip_objs(field, s, sym_flags); - trace_seq_putc(s, '\n'); - break; - } - default: - trace_seq_printf(s, "Unknown type %d\n", entry->type); - } + trace_seq_printf(s, "Unknown type %d\n", entry->type); return TRACE_TYPE_HANDLED; } @@ -1643,13 +1556,12 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) struct trace_seq *s = &iter->seq; unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); struct trace_entry *entry; + struct trace_event *event; unsigned long usec_rem; unsigned long long t; unsigned long secs; char *comm; int ret; - int S, T; - int i; entry = iter->ent; @@ -1671,127 +1583,17 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) if (!ret) return TRACE_TYPE_PARTIAL_LINE; - switch (entry->type) { - case TRACE_FN: { - struct ftrace_entry *field; - - trace_assign_type(field, entry); - - ret = seq_print_ip_sym(s, field->ip, sym_flags); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - if ((sym_flags & TRACE_ITER_PRINT_PARENT) && - field->parent_ip) { - ret = trace_seq_printf(s, " <-"); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - ret = seq_print_ip_sym(s, - field->parent_ip, - sym_flags); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - } - ret = trace_seq_printf(s, "\n"); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - break; - } - case TRACE_CTX: - case TRACE_WAKE: { - struct ctx_switch_entry *field; - - trace_assign_type(field, entry); - - T = task_state_char(field->next_state); - S = task_state_char(field->prev_state); - ret = trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c\n", - field->prev_pid, - field->prev_prio, - S, - entry->type == TRACE_CTX ? "==>" : " +", - field->next_cpu, - field->next_pid, - field->next_prio, - T); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - break; - } - case TRACE_SPECIAL: { - struct special_entry *field; - - trace_assign_type(field, entry); - - ret = trace_seq_printf(s, "# %ld %ld %ld\n", - field->arg1, - field->arg2, - field->arg3); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - break; - } - case TRACE_STACK: { - struct stack_entry *field; - - trace_assign_type(field, entry); - - for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { - if (i) { - ret = trace_seq_puts(s, " <= "); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - } - ret = seq_print_ip_sym(s, field->caller[i], - sym_flags); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - } - ret = trace_seq_puts(s, "\n"); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - break; - } - case TRACE_PRINT: { - struct print_entry *field; - - trace_assign_type(field, entry); - - seq_print_ip_sym(s, field->ip, sym_flags); - trace_seq_printf(s, ": %s", field->buf); - break; - } - case TRACE_GRAPH_RET: { - return print_graph_function(iter); - } - case TRACE_GRAPH_ENT: { - return print_graph_function(iter); - } - case TRACE_BRANCH: { - struct trace_branch *field; - - trace_assign_type(field, entry); - - trace_seq_printf(s, "[%s] %s:%s:%d\n", - field->correct ? " ok " : " MISS ", - field->func, - field->file, - field->line); - break; + event = ftrace_find_event(entry->type); + if (event && event->trace) { + ret = event->trace(s, entry, sym_flags); + if (ret) + return ret; + return TRACE_TYPE_HANDLED; } - case TRACE_USER_STACK: { - struct userstack_entry *field; - - trace_assign_type(field, entry); + ret = trace_seq_printf(s, "Unknown type %d\n", entry->type); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; - ret = seq_print_userip_objs(field, s, sym_flags); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - ret = trace_seq_putc(s, '\n'); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - break; - } - } return TRACE_TYPE_HANDLED; } @@ -1799,8 +1601,8 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; struct trace_entry *entry; + struct trace_event *event; int ret; - int S, T; entry = iter->ent; @@ -1809,86 +1611,26 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter) if (!ret) return TRACE_TYPE_PARTIAL_LINE; - switch (entry->type) { - case TRACE_FN: { - struct ftrace_entry *field; - - trace_assign_type(field, entry); - - ret = trace_seq_printf(s, "%x %x\n", - field->ip, - field->parent_ip); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - break; - } - case TRACE_CTX: - case TRACE_WAKE: { - struct ctx_switch_entry *field; - - trace_assign_type(field, entry); - - T = task_state_char(field->next_state); - S = entry->type == TRACE_WAKE ? '+' : - task_state_char(field->prev_state); - ret = trace_seq_printf(s, "%d %d %c %d %d %d %c\n", - field->prev_pid, - field->prev_prio, - S, - field->next_cpu, - field->next_pid, - field->next_prio, - T); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - break; - } - case TRACE_SPECIAL: - case TRACE_USER_STACK: - case TRACE_STACK: { - struct special_entry *field; - - trace_assign_type(field, entry); - - ret = trace_seq_printf(s, "# %ld %ld %ld\n", - field->arg1, - field->arg2, - field->arg3); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - break; + event = ftrace_find_event(entry->type); + if (event && event->raw) { + ret = event->raw(s, entry, 0); + if (ret) + return ret; + return TRACE_TYPE_HANDLED; } - case TRACE_PRINT: { - struct print_entry *field; - - trace_assign_type(field, entry); + ret = trace_seq_printf(s, "%d ?\n", entry->type); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; - trace_seq_printf(s, "# %lx %s", field->ip, field->buf); - break; - } - } return TRACE_TYPE_HANDLED; } -#define SEQ_PUT_FIELD_RET(s, x) \ -do { \ - if (!trace_seq_putmem(s, &(x), sizeof(x))) \ - return 0; \ -} while (0) - -#define SEQ_PUT_HEX_FIELD_RET(s, x) \ -do { \ - BUILD_BUG_ON(sizeof(x) > MAX_MEMHEX_BYTES); \ - if (!trace_seq_putmem_hex(s, &(x), sizeof(x))) \ - return 0; \ -} while (0) - static enum print_line_t print_hex_fmt(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; unsigned char newline = '\n'; struct trace_entry *entry; - int S, T; + struct trace_event *event; entry = iter->ent; @@ -1896,47 +1638,10 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter) SEQ_PUT_HEX_FIELD_RET(s, iter->cpu); SEQ_PUT_HEX_FIELD_RET(s, iter->ts); - switch (entry->type) { - case TRACE_FN: { - struct ftrace_entry *field; - - trace_assign_type(field, entry); - - SEQ_PUT_HEX_FIELD_RET(s, field->ip); - SEQ_PUT_HEX_FIELD_RET(s, field->parent_ip); - break; - } - case TRACE_CTX: - case TRACE_WAKE: { - struct ctx_switch_entry *field; - - trace_assign_type(field, entry); - - T = task_state_char(field->next_state); - S = entry->type == TRACE_WAKE ? '+' : - task_state_char(field->prev_state); - SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid); - SEQ_PUT_HEX_FIELD_RET(s, field->prev_prio); - SEQ_PUT_HEX_FIELD_RET(s, S); - SEQ_PUT_HEX_FIELD_RET(s, field->next_cpu); - SEQ_PUT_HEX_FIELD_RET(s, field->next_pid); - SEQ_PUT_HEX_FIELD_RET(s, field->next_prio); - SEQ_PUT_HEX_FIELD_RET(s, T); - break; - } - case TRACE_SPECIAL: - case TRACE_USER_STACK: - case TRACE_STACK: { - struct special_entry *field; - - trace_assign_type(field, entry); + event = ftrace_find_event(entry->type); + if (event && event->hex) + event->hex(s, entry, 0); - SEQ_PUT_HEX_FIELD_RET(s, field->arg1); - SEQ_PUT_HEX_FIELD_RET(s, field->arg2); - SEQ_PUT_HEX_FIELD_RET(s, field->arg3); - break; - } - } SEQ_PUT_FIELD_RET(s, newline); return TRACE_TYPE_HANDLED; @@ -1962,6 +1667,7 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; struct trace_entry *entry; + struct trace_event *event; entry = iter->ent; @@ -1969,43 +1675,11 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter) SEQ_PUT_FIELD_RET(s, entry->cpu); SEQ_PUT_FIELD_RET(s, iter->ts); - switch (entry->type) { - case TRACE_FN: { - struct ftrace_entry *field; - - trace_assign_type(field, entry); + event = ftrace_find_event(entry->type); + if (event && event->binary) + event->binary(s, entry, 0); - SEQ_PUT_FIELD_RET(s, field->ip); - SEQ_PUT_FIELD_RET(s, field->parent_ip); - break; - } - case TRACE_CTX: { - struct ctx_switch_entry *field; - - trace_assign_type(field, entry); - - SEQ_PUT_FIELD_RET(s, field->prev_pid); - SEQ_PUT_FIELD_RET(s, field->prev_prio); - SEQ_PUT_FIELD_RET(s, field->prev_state); - SEQ_PUT_FIELD_RET(s, field->next_pid); - SEQ_PUT_FIELD_RET(s, field->next_prio); - SEQ_PUT_FIELD_RET(s, field->next_state); - break; - } - case TRACE_SPECIAL: - case TRACE_USER_STACK: - case TRACE_STACK: { - struct special_entry *field; - - trace_assign_type(field, entry); - - SEQ_PUT_FIELD_RET(s, field->arg1); - SEQ_PUT_FIELD_RET(s, field->arg2); - SEQ_PUT_FIELD_RET(s, field->arg3); - break; - } - } - return 1; + return TRACE_TYPE_HANDLED; } static int trace_empty(struct trace_iterator *iter) -- cgit v1.2.3-18-g5258 From dbd0b4b33074aa6b7832a9d9a5bd985eca5c1aa2 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 28 Dec 2008 20:44:51 -0800 Subject: tracing/ftrace: provide the base infrastructure for histogram tracing Impact: extend the tracing API The goal of this patch is to normalize and make more easy the implementation of statistical (histogram) tracing. It implements a trace_stat file into the /debugfs/tracing directory where one can print a one-shot output of statistics/histogram entries. A tracer has to provide two basic iterator callbacks: stat_start() => the first entry stat_next(prev, idx) => the next one. Note that it is adapted for arrays or hash tables or lists.... since it provides a pointer to the previous entry and the current index of the iterator. These two callbacks are called to get a snapshot of the statistics at each opening of the trace_stat file because. The values are so updated between two "cat trace_stat". And the tracer is free to lock its datas during the iteration to keep consistent values. Since it is almost always interesting to sort statisticals values to address the problems by priority, this infrastructure provides a "sorting" of the stat entries too if desired. A tracer has just to provide a stat_cmp callback to compare two entries and the stat tracing infrastructure will build a sorted list of the given entries. A last callback, called stat_headers, can be implemented by a tracer to output headers on its trace. If one of these callbacks is changed on runtime, it just have to signal it to the stat tracing API by calling the init_tracer_stat() helper. Changes in V2: - Fix a memory leak if the user opens multiple times the trace_stat file without closing it. Now we always free our list before rebuilding it. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3f0317586cf..b789c010512 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2354,6 +2354,7 @@ static int tracing_set_tracer(char *buf) if (ret) goto out; } + init_tracer_stat(t); trace_branch_enable(tr); out: @@ -3206,7 +3207,7 @@ __init static int tracer_alloc_buffers(void) #else current_trace = &nop_trace; #endif - + init_tracer_stat(current_trace); /* All seems OK, enable tracing */ tracing_disabled = 0; -- cgit v1.2.3-18-g5258 From 002bb86d8d42f18937aef396c3ecd65c7e02e21a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 10 Jan 2009 11:34:13 -0800 Subject: tracing/ftrace: separate events tracing and stats tracing engine Impact: tracing's Api change Currently, the stat tracing depends on the events tracing. When you switch to a new tracer, the stats files of the previous tracer will disappear. But it's more scalable to separate those two engines. This way, we can keep the stat files of one or several tracers when we want, without bothering of multiple tracer stat files or tracer switching. To build/destroys its stats files, a tracer just have to call register_stat_tracer/unregister_stat_tracer everytimes it wants to. Signed-off-by: Frederic Weisbecker Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0418fc338b5..40217fb499e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2353,7 +2353,6 @@ static int tracing_set_tracer(char *buf) if (ret) goto out; } - init_tracer_stat(t); trace_branch_enable(tr); out: @@ -3218,7 +3217,6 @@ __init static int tracer_alloc_buffers(void) #else current_trace = &nop_trace; #endif - init_tracer_stat(current_trace); /* All seems OK, enable tracing */ tracing_disabled = 0; -- cgit v1.2.3-18-g5258 From 428aee1460a75197f0190534b4d610450ee59af1 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 14 Jan 2009 12:24:42 -0500 Subject: trace: print ftrace_dump at KERN_EMERG log level Impact: fix to print out ftrace_dump when expected I was debugging a hard race condition to only find out that after I hit the race, my log level was not at level to show KERN_INFO. The time it took to trigger the race was wasted because I did not capture the trace. Since ftrace_dump is only called from kernel oops (and only when it is set in the kernel command line to do so), or when a developer adds it to their own local tree, the log level of the print should be at KERN_EMERG to make sure the print appears. ftrace_dump is not called by a normal user setup, and will not add extra unwanted print out to the console. There is no reason it should be at KERN_INFO. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 40217fb499e..408c03f2b8a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3076,7 +3076,7 @@ static struct notifier_block trace_die_notifier = { * it if we decide to change what log level the ftrace dump * should be at. */ -#define KERN_TRACE KERN_INFO +#define KERN_TRACE KERN_EMERG static void trace_printk_seq(struct trace_seq *s) -- cgit v1.2.3-18-g5258 From 0ee6b6cf5bdb793b4c68507dd65adf16341aa4ca Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 14 Jan 2009 14:50:19 -0500 Subject: trace: stop all recording to ring buffer on ftrace_dump Impact: limit ftrace dump output Currently ftrace_dump only calls ftrace_kill that is a fast way to prevent the function tracer functions from being called (just sets a flag and clears the function to call, nothing else). It is better to also turn off any recording to the ring buffers as well. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 408c03f2b8a..dcb757f70d2 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3110,6 +3110,7 @@ void ftrace_dump(void) dump_ran = 1; /* No turning back! */ + tracing_off(); ftrace_kill(); for_each_tracing_cpu(cpu) { -- cgit v1.2.3-18-g5258 From 5361499101306cfb776c3cfa0f69d0479bc63868 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 15 Jan 2009 19:12:40 -0500 Subject: ftrace: add stack trace to function tracer Impact: new feature to stack trace any function Chris Mason asked about being able to pick and choose a function and get a stack trace from it. This feature enables his request. # echo io_schedule > /debug/tracing/set_ftrace_filter # echo function > /debug/tracing/current_tracer # echo func_stack_trace > /debug/tracing/trace_options Produces the following in /debug/tracing/trace: kjournald-702 [001] 135.673060: io_schedule <-sync_buffer kjournald-702 [002] 135.673671: <= sync_buffer <= __wait_on_bit <= out_of_line_wait_on_bit <= __wait_on_buffer <= sync_dirty_buffer <= journal_commit_transaction <= kjournald Note, be careful about turning this on without filtering the functions. You may find that you have a 10 second lag between typing and seeing what you typed. This is why the stack trace for the function tracer does not use the same stack_trace flag as the other tracers use. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index dcb757f70d2..3c54cb12522 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -835,10 +835,10 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data, trace_function(tr, data, ip, parent_ip, flags, pc); } -static void ftrace_trace_stack(struct trace_array *tr, - struct trace_array_cpu *data, - unsigned long flags, - int skip, int pc) +static void __ftrace_trace_stack(struct trace_array *tr, + struct trace_array_cpu *data, + unsigned long flags, + int skip, int pc) { #ifdef CONFIG_STACKTRACE struct ring_buffer_event *event; @@ -846,9 +846,6 @@ static void ftrace_trace_stack(struct trace_array *tr, struct stack_trace trace; unsigned long irq_flags; - if (!(trace_flags & TRACE_ITER_STACKTRACE)) - return; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), &irq_flags); if (!event) @@ -869,12 +866,23 @@ static void ftrace_trace_stack(struct trace_array *tr, #endif } +static void ftrace_trace_stack(struct trace_array *tr, + struct trace_array_cpu *data, + unsigned long flags, + int skip, int pc) +{ + if (!(trace_flags & TRACE_ITER_STACKTRACE)) + return; + + __ftrace_trace_stack(tr, data, flags, skip, pc); +} + void __trace_stack(struct trace_array *tr, struct trace_array_cpu *data, unsigned long flags, - int skip) + int skip, int pc) { - ftrace_trace_stack(tr, data, flags, skip, preempt_count()); + __ftrace_trace_stack(tr, data, flags, skip, pc); } static void ftrace_trace_userstack(struct trace_array *tr, -- cgit v1.2.3-18-g5258 From bb3c3c95f330f7bf16e33b002e48882616089db1 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 15 Jan 2009 20:40:23 -0500 Subject: ftrace: move function tracer functions out of trace.c Impact: clean up of trace.c The function tracer functions were put in trace.c because it needed to share static variables that were in trace.c. Since then, those variables have become global for various reasons. This patch moves the function tracer functions into trace_function.c where they belong. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 84 ---------------------------------------------------- 1 file changed, 84 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3c54cb12522..2585ffb6c6b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1046,65 +1046,6 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) local_irq_restore(flags); } -#ifdef CONFIG_FUNCTION_TRACER -static void -function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip) -{ - struct trace_array *tr = &global_trace; - struct trace_array_cpu *data; - unsigned long flags; - long disabled; - int cpu, resched; - int pc; - - if (unlikely(!ftrace_function_enabled)) - return; - - pc = preempt_count(); - resched = ftrace_preempt_disable(); - local_save_flags(flags); - cpu = raw_smp_processor_id(); - data = tr->data[cpu]; - disabled = atomic_inc_return(&data->disabled); - - if (likely(disabled == 1)) - trace_function(tr, data, ip, parent_ip, flags, pc); - - atomic_dec(&data->disabled); - ftrace_preempt_enable(resched); -} - -static void -function_trace_call(unsigned long ip, unsigned long parent_ip) -{ - struct trace_array *tr = &global_trace; - struct trace_array_cpu *data; - unsigned long flags; - long disabled; - int cpu; - int pc; - - if (unlikely(!ftrace_function_enabled)) - return; - - /* - * Need to use raw, since this must be called before the - * recursive protection is performed. - */ - local_irq_save(flags); - cpu = raw_smp_processor_id(); - data = tr->data[cpu]; - disabled = atomic_inc_return(&data->disabled); - - if (likely(disabled == 1)) { - pc = preempt_count(); - trace_function(tr, data, ip, parent_ip, flags, pc); - } - - atomic_dec(&data->disabled); - local_irq_restore(flags); -} - #ifdef CONFIG_FUNCTION_GRAPH_TRACER int trace_graph_entry(struct ftrace_graph_ent *trace) { @@ -1162,31 +1103,6 @@ void trace_graph_return(struct ftrace_graph_ret *trace) } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ -static struct ftrace_ops trace_ops __read_mostly = -{ - .func = function_trace_call, -}; - -void tracing_start_function_trace(void) -{ - ftrace_function_enabled = 0; - - if (trace_flags & TRACE_ITER_PREEMPTONLY) - trace_ops.func = function_trace_call_preempt_only; - else - trace_ops.func = function_trace_call; - - register_ftrace_function(&trace_ops); - ftrace_function_enabled = 1; -} - -void tracing_stop_function_trace(void) -{ - ftrace_function_enabled = 0; - unregister_ftrace_function(&trace_ops); -} -#endif - enum trace_file_type { TRACE_FILE_LAT_FMT = 1, TRACE_FILE_ANNOTATE = 2, -- cgit v1.2.3-18-g5258 From a225cdd263f340c864febb1992802fb5b08bc328 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 15 Jan 2009 23:06:03 -0500 Subject: ftrace: remove static from function tracer functions Impact: clean up After reorganizing the functions in trace.c and trace_function.c, they no longer need to be in global context. This patch makes the functions and one variable into static. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2585ffb6c6b..7de6a94063d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -187,9 +187,6 @@ int tracing_is_enabled(void) return tracer_enabled; } -/* function tracing enabled */ -int ftrace_function_enabled; - /* * trace_buf_size is the size in bytes that is allocated * for a buffer. Note, the number of bytes is always rounded -- cgit v1.2.3-18-g5258 From 745b1626dd71ce9661a05ea4db57859ed5c773d2 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 15 Jan 2009 23:40:11 -0500 Subject: trace: set max latency variable to zero on default Impact: trace max latencies on start of latency tracing This patch sets the max latency to zero whenever one of the irq variant tracers or the wakeup tracer is set to current tracer. Most developers expect to see output when starting up a latency tracer. But since the max_latency is already set to max, and it takes a latency greater than max_latency to be recorded, there is no trace. This is not the expected behavior and has even confused myself. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 7de6a94063d..220c264e311 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -41,7 +41,7 @@ #define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE) -unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; +unsigned long __read_mostly tracing_max_latency; unsigned long __read_mostly tracing_thresh; /* -- cgit v1.2.3-18-g5258 From 97b17efe4537e11bf6669106cfe4ee2c5331b267 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 21 Jan 2009 15:24:56 -0500 Subject: ring-buffer: do not swap if recording is disabled If the ring buffer recording has been disabled. Do not let swapping of ring buffers occur. Simply return -EAGAIN. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 220c264e311..757ae6f7e64 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -415,7 +415,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) ftrace_enable_cpu(); - WARN_ON_ONCE(ret); + WARN_ON_ONCE(ret && ret != -EAGAIN); __update_max_tr(tr, tsk, cpu); __raw_spin_unlock(&ftrace_max_lock); -- cgit v1.2.3-18-g5258 From b06a830183b610c0a88c29a92feb7991a867ab46 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 22 Jan 2009 14:26:15 -0500 Subject: trace: fix logic to start/stop counting The logic in the tracing_start/stop code prevents the WARN_ON from ever detecting if a start/stop pair was mismatched. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 757ae6f7e64..2129ab9d2a4 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -610,13 +610,12 @@ void tracing_start(void) return; spin_lock_irqsave(&tracing_start_lock, flags); - if (--trace_stop_count) - goto out; - - if (trace_stop_count < 0) { - /* Someone screwed up their debugging */ - WARN_ON_ONCE(1); - trace_stop_count = 0; + if (--trace_stop_count) { + if (trace_stop_count < 0) { + /* Someone screwed up their debugging */ + WARN_ON_ONCE(1); + trace_stop_count = 0; + } goto out; } -- cgit v1.2.3-18-g5258 From 9011262a37cb438f0fa9394b5e83840db8f9680a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 23 Jan 2009 12:06:23 -0200 Subject: ftrace: add ftrace_vprintk Impact: new helper function Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2129ab9d2a4..2f8ac1f008f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2951,6 +2951,15 @@ int __ftrace_printk(unsigned long ip, const char *fmt, ...) } EXPORT_SYMBOL_GPL(__ftrace_printk); +int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap) +{ + if (!(trace_flags & TRACE_ITER_PRINTK)) + return 0; + + return trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap); +} +EXPORT_SYMBOL_GPL(__ftrace_vprintk); + static int trace_panic_handler(struct notifier_block *this, unsigned long event, void *unused) { -- cgit v1.2.3-18-g5258 From b2821ae68b14480bfc85ea1629537163310bc5cd Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 2 Feb 2009 21:38:32 -0500 Subject: trace: fix default boot up tracer Peter Zijlstra started the functionality to start up a default tracing at bootup. This patch finishes the work. Now if you add 'ftrace=' to the command line, when that tracer is registered on bootup, that tracer is selected and starts tracing. Note, all selftests for tracers that are registered after this tracer is disabled. This prevents the selftests from disturbing the running tracer, or the running tracer from disturbing the selftest. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 54 insertions(+), 6 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2f8ac1f008f..2c720c79bc6 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -53,6 +53,11 @@ unsigned long __read_mostly tracing_thresh; */ static bool __read_mostly tracing_selftest_running; +/* + * If a tracer is running, we do not want to run SELFTEST. + */ +static bool __read_mostly tracing_selftest_disabled; + /* For tracers that don't implement custom flags */ static struct tracer_opt dummy_tracer_opt[] = { { } @@ -110,14 +115,19 @@ static cpumask_var_t __read_mostly tracing_buffer_mask; */ int ftrace_dump_on_oops; -static int tracing_set_tracer(char *buf); +static int tracing_set_tracer(const char *buf); + +#define BOOTUP_TRACER_SIZE 100 +static char bootup_tracer_buf[BOOTUP_TRACER_SIZE] __initdata; +static char *default_bootup_tracer; static int __init set_ftrace(char *str) { - tracing_set_tracer(str); + strncpy(bootup_tracer_buf, str, BOOTUP_TRACER_SIZE); + default_bootup_tracer = bootup_tracer_buf; return 1; } -__setup("ftrace", set_ftrace); +__setup("ftrace=", set_ftrace); static int __init set_ftrace_dump_on_oops(char *str) { @@ -468,7 +478,7 @@ int register_tracer(struct tracer *type) type->flags->opts = dummy_tracer_opt; #ifdef CONFIG_FTRACE_STARTUP_TEST - if (type->selftest) { + if (type->selftest && !tracing_selftest_disabled) { struct tracer *saved_tracer = current_trace; struct trace_array *tr = &global_trace; int i; @@ -510,8 +520,25 @@ int register_tracer(struct tracer *type) out: tracing_selftest_running = false; mutex_unlock(&trace_types_lock); - lock_kernel(); + if (!ret && default_bootup_tracer) { + if (!strncmp(default_bootup_tracer, type->name, + BOOTUP_TRACER_SIZE)) { + printk(KERN_INFO "Starting tracer '%s'\n", + type->name); + /* Do we want this tracer to start on bootup? */ + tracing_set_tracer(type->name); + default_bootup_tracer = NULL; + /* disable other selftests, since this will break it. */ + tracing_selftest_disabled = 1; +#ifdef CONFIG_FTRACE_STARTUP_TEST + printk(KERN_INFO "Disabling FTRACE selftests due" + " to running tracer '%s'\n", type->name); +#endif + } + } + + lock_kernel(); return ret; } @@ -2245,7 +2272,7 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf, return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } -static int tracing_set_tracer(char *buf) +static int tracing_set_tracer(const char *buf) { struct trace_array *tr = &global_trace; struct tracer *t; @@ -3163,5 +3190,26 @@ out_free_buffer_mask: out: return ret; } + +__init static int clear_boot_tracer(void) +{ + /* + * The default tracer at boot buffer is an init section. + * This function is called in lateinit. If we did not + * find the boot tracer, then clear it out, to prevent + * later registration from accessing the buffer that is + * about to be freed. + */ + if (!default_bootup_tracer) + return 0; + + printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n", + default_bootup_tracer); + default_bootup_tracer = NULL; + + return 0; +} + early_initcall(tracer_alloc_buffers); fs_initcall(tracer_init_debugfs); +late_initcall(clear_boot_tracer); -- cgit v1.2.3-18-g5258 From 79fb0768fbd371f3b94d909f51f587b3a24ab272 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 2 Feb 2009 21:38:33 -0500 Subject: trace: let boot trace be chosen by command line Now that we have a working ftrace= function, make the boot tracer get activated by it. This way we can turn it on or off without recompiling the kernel, as well as keeping the selftests on. The selftests are disabled whenever a default tracer starts running. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2c720c79bc6..40edef4255c 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3167,12 +3167,9 @@ __init static int tracer_alloc_buffers(void) trace_init_cmdlines(); register_tracer(&nop_trace); + current_trace = &nop_trace; #ifdef CONFIG_BOOT_TRACER register_tracer(&boot_tracer); - current_trace = &boot_tracer; - current_trace->init(&global_trace); -#else - current_trace = &nop_trace; #endif /* All seems OK, enable tracing */ tracing_disabled = 0; -- cgit v1.2.3-18-g5258 From c4a8e8be2d43cc22b371e8e9c05c253409759d94 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 2 Feb 2009 20:29:21 -0200 Subject: trace: better manage the context info for events Impact: make trace_event more convenient for tracers All tracers (for the moment) that use the struct trace_event want to have the context info printed before their own output: the pid/cmdline, cpu, and timestamp. But some other tracers that want to implement their trace_event callbacks will not necessary need these information or they may want to format them as they want. This patch adds a new default-enabled trace option: TRACE_ITER_CONTEXT_INFO When disabled through: echo nocontext-info > /debugfs/tracing/trace_options The pid, cpu and timestamps headers will not be printed. IE with the sched_switch tracer with context-info (default): bash-2935 [001] 100.356561: 2935:120:S ==> [001] 0:140:R -0 [000] 100.412804: 0:140:R + [000] 11:115:S events/0 -0 [000] 100.412816: 0:140:R ==> [000] 11:115:R events/0 events/0-11 [000] 100.412829: 11:115:S ==> [000] 0:140:R Without context-info: 2935:120:S ==> [001] 0:140:R 0:140:R + [000] 11:115:S events/0 0:140:R ==> [000] 11:115:R events/0 11:115:S ==> [000] 0:140:R A tracer can disable it at runtime by clearing the bit TRACE_ITER_CONTEXT_INFO in trace_flags. The print routines were renamed to trace_print_context and trace_print_lat_context, so that they can be used by tracers if they want to use them for one of the trace_event callbacks. Signed-off-by: Frederic Weisbecker Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 149 ++++++++++++--------------------------------------- 1 file changed, 35 insertions(+), 114 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2f8ac1f008f..5ec49c3c159 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -227,7 +227,7 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait); /* trace_flags holds trace_options default values */ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | - TRACE_ITER_ANNOTATE; + TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO; /** * trace_wake_up - wake up tasks waiting for trace input @@ -285,6 +285,7 @@ static const char *trace_options[] = { "userstacktrace", "sym-userobj", "printk-msg-only", + "context-info", NULL }; @@ -1171,8 +1172,8 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts) } /* Find the next real entry, without updating the iterator itself */ -static struct trace_entry * -find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts) +struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, + int *ent_cpu, u64 *ent_ts) { return __find_next_entry(iter, ent_cpu, ent_ts); } @@ -1351,57 +1352,6 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter) seq_puts(m, "\n"); } -static void -lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu) -{ - int hardirq, softirq; - char *comm; - - comm = trace_find_cmdline(entry->pid); - - trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid); - trace_seq_printf(s, "%3d", cpu); - trace_seq_printf(s, "%c%c", - (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : - (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' : '.', - ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.')); - - hardirq = entry->flags & TRACE_FLAG_HARDIRQ; - softirq = entry->flags & TRACE_FLAG_SOFTIRQ; - if (hardirq && softirq) { - trace_seq_putc(s, 'H'); - } else { - if (hardirq) { - trace_seq_putc(s, 'h'); - } else { - if (softirq) - trace_seq_putc(s, 's'); - else - trace_seq_putc(s, '.'); - } - } - - if (entry->preempt_count) - trace_seq_printf(s, "%x", entry->preempt_count); - else - trace_seq_puts(s, "."); -} - -unsigned long preempt_mark_thresh = 100; - -static void -lat_print_timestamp(struct trace_seq *s, u64 abs_usecs, - unsigned long rel_usecs) -{ - trace_seq_printf(s, " %4lldus", abs_usecs); - if (rel_usecs > preempt_mark_thresh) - trace_seq_puts(s, "!: "); - else if (rel_usecs > 1) - trace_seq_puts(s, "+: "); - else - trace_seq_puts(s, " : "); -} - static void test_cpu_buff_start(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; @@ -1419,46 +1369,24 @@ static void test_cpu_buff_start(struct trace_iterator *iter) trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu); } -static enum print_line_t -print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) +static enum print_line_t print_lat_fmt(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); - struct trace_entry *next_entry; struct trace_event *event; - unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE); struct trace_entry *entry = iter->ent; - unsigned long abs_usecs; - unsigned long rel_usecs; - u64 next_ts; - char *comm; int ret; test_cpu_buff_start(iter); - next_entry = find_next_entry(iter, NULL, &next_ts); - if (!next_entry) - next_ts = iter->ts; - rel_usecs = ns2usecs(next_ts - iter->ts); - abs_usecs = ns2usecs(iter->ts - iter->tr->time_start); - - if (verbose) { - comm = trace_find_cmdline(entry->pid); - trace_seq_printf(s, "%16s %5d %3d %d %08x %08x [%08lx]" - " %ld.%03ldms (+%ld.%03ldms): ", - comm, - entry->pid, cpu, entry->flags, - entry->preempt_count, trace_idx, - ns2usecs(iter->ts), - abs_usecs/1000, - abs_usecs % 1000, rel_usecs/1000, - rel_usecs % 1000); - } else { - lat_print_generic(s, entry, cpu); - lat_print_timestamp(s, abs_usecs, rel_usecs); + event = ftrace_find_event(entry->type); + + if (trace_flags & TRACE_ITER_CONTEXT_INFO) { + ret = trace_print_lat_context(iter); + if (ret) + return ret; } - event = ftrace_find_event(entry->type); if (event && event->latency_trace) { ret = event->latency_trace(s, entry, sym_flags); if (ret) @@ -1476,33 +1404,20 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); struct trace_entry *entry; struct trace_event *event; - unsigned long usec_rem; - unsigned long long t; - unsigned long secs; - char *comm; int ret; entry = iter->ent; test_cpu_buff_start(iter); - comm = trace_find_cmdline(iter->ent->pid); - - t = ns2usecs(iter->ts); - usec_rem = do_div(t, 1000000ULL); - secs = (unsigned long)t; + event = ftrace_find_event(entry->type); - ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - ret = trace_seq_printf(s, "[%03d] ", iter->cpu); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; + if (trace_flags & TRACE_ITER_CONTEXT_INFO) { + ret = trace_print_context(iter); + if (ret) + return ret; + } - event = ftrace_find_event(entry->type); if (event && event->trace) { ret = event->trace(s, entry, sym_flags); if (ret) @@ -1525,10 +1440,12 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter) entry = iter->ent; - ret = trace_seq_printf(s, "%d %d %llu ", - entry->pid, iter->cpu, iter->ts); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; + if (trace_flags & TRACE_ITER_CONTEXT_INFO) { + ret = trace_seq_printf(s, "%d %d %llu ", + entry->pid, iter->cpu, iter->ts); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + } event = ftrace_find_event(entry->type); if (event && event->raw) { @@ -1553,9 +1470,11 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter) entry = iter->ent; - SEQ_PUT_HEX_FIELD_RET(s, entry->pid); - SEQ_PUT_HEX_FIELD_RET(s, iter->cpu); - SEQ_PUT_HEX_FIELD_RET(s, iter->ts); + if (trace_flags & TRACE_ITER_CONTEXT_INFO) { + SEQ_PUT_HEX_FIELD_RET(s, entry->pid); + SEQ_PUT_HEX_FIELD_RET(s, iter->cpu); + SEQ_PUT_HEX_FIELD_RET(s, iter->ts); + } event = ftrace_find_event(entry->type); if (event && event->hex) @@ -1575,7 +1494,7 @@ static enum print_line_t print_printk_msg_only(struct trace_iterator *iter) trace_assign_type(field, entry); - ret = trace_seq_printf(s, field->buf); + ret = trace_seq_printf(s, "%s", field->buf); if (!ret) return TRACE_TYPE_PARTIAL_LINE; @@ -1590,9 +1509,11 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter) entry = iter->ent; - SEQ_PUT_FIELD_RET(s, entry->pid); - SEQ_PUT_FIELD_RET(s, entry->cpu); - SEQ_PUT_FIELD_RET(s, iter->ts); + if (trace_flags & TRACE_ITER_CONTEXT_INFO) { + SEQ_PUT_FIELD_RET(s, entry->pid); + SEQ_PUT_FIELD_RET(s, entry->cpu); + SEQ_PUT_FIELD_RET(s, iter->ts); + } event = ftrace_find_event(entry->type); if (event && event->binary) @@ -1643,7 +1564,7 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter) return print_raw_fmt(iter); if (iter->iter_flags & TRACE_FILE_LAT_FMT) - return print_lat_fmt(iter, iter->idx, iter->cpu); + return print_lat_fmt(iter); return print_trace_fmt(iter); } -- cgit v1.2.3-18-g5258 From 2c9b238eb325895d3312dad64e2685783575e474 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 2 Feb 2009 20:30:12 -0200 Subject: trace: Change struct trace_event callbacks parameter list Impact: API change The trace_seq and trace_entry are in trace_iterator, where there are more fields that may be needed by tracers, so just pass the tracer_iterator as is already the case for struct tracer->print_line. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5ec49c3c159..152d0969adf 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1388,7 +1388,7 @@ static enum print_line_t print_lat_fmt(struct trace_iterator *iter) } if (event && event->latency_trace) { - ret = event->latency_trace(s, entry, sym_flags); + ret = event->latency_trace(iter, sym_flags); if (ret) return ret; return TRACE_TYPE_HANDLED; @@ -1419,7 +1419,7 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) } if (event && event->trace) { - ret = event->trace(s, entry, sym_flags); + ret = event->trace(iter, sym_flags); if (ret) return ret; return TRACE_TYPE_HANDLED; @@ -1449,7 +1449,7 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter) event = ftrace_find_event(entry->type); if (event && event->raw) { - ret = event->raw(s, entry, 0); + ret = event->raw(iter, 0); if (ret) return ret; return TRACE_TYPE_HANDLED; @@ -1478,7 +1478,7 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter) event = ftrace_find_event(entry->type); if (event && event->hex) - event->hex(s, entry, 0); + event->hex(iter, 0); SEQ_PUT_FIELD_RET(s, newline); @@ -1517,7 +1517,7 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter) event = ftrace_find_event(entry->type); if (event && event->binary) - event->binary(s, entry, 0); + event->binary(iter, 0); return TRACE_TYPE_HANDLED; } -- cgit v1.2.3-18-g5258 From d9793bd8018f835c64b10f44e278c86cecb8e932 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 3 Feb 2009 20:20:41 -0200 Subject: trace: judicious error checking of trace_seq results Impact: bugfix and cleanup Some callsites were returning either TRACE_ITER_PARTIAL_LINE if the trace_seq routines (trace_seq_printf, etc) returned 0 meaning its buffer was full, or zero otherwise. But... /* Return values for print_line callback */ enum print_line_t { TRACE_TYPE_PARTIAL_LINE = 0, /* Retry after flushing the seq */ TRACE_TYPE_HANDLED = 1, TRACE_TYPE_UNHANDLED = 2 /* Relay to other output functions */ }; In other cases the return value was not being relayed at all. Most of the time it didn't hurt because the page wasn't get filled, but for correctness sake, handle the return values everywhere. Signed-off-by: Arnaldo Carvalho de Melo Acked-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 75 ++++++++++++++++++++++++---------------------------- 1 file changed, 34 insertions(+), 41 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index bbdfaa2cbdb..5822ff4e5a3 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1402,27 +1402,25 @@ static enum print_line_t print_lat_fmt(struct trace_iterator *iter) unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); struct trace_event *event; struct trace_entry *entry = iter->ent; - int ret; test_cpu_buff_start(iter); event = ftrace_find_event(entry->type); if (trace_flags & TRACE_ITER_CONTEXT_INFO) { - ret = trace_print_lat_context(iter); - if (ret) - return ret; + if (!trace_print_lat_context(iter)) + goto partial; } - if (event && event->latency_trace) { - ret = event->latency_trace(iter, sym_flags); - if (ret) - return ret; - return TRACE_TYPE_HANDLED; - } + if (event && event->latency_trace) + return event->latency_trace(iter, sym_flags); + + if (!trace_seq_printf(s, "Unknown type %d\n", entry->type)) + goto partial; - trace_seq_printf(s, "Unknown type %d\n", entry->type); return TRACE_TYPE_HANDLED; +partial: + return TRACE_TYPE_PARTIAL_LINE; } static enum print_line_t print_trace_fmt(struct trace_iterator *iter) @@ -1431,7 +1429,6 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); struct trace_entry *entry; struct trace_event *event; - int ret; entry = iter->ent; @@ -1440,22 +1437,19 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) event = ftrace_find_event(entry->type); if (trace_flags & TRACE_ITER_CONTEXT_INFO) { - ret = trace_print_context(iter); - if (ret) - return ret; + if (!trace_print_context(iter)) + goto partial; } - if (event && event->trace) { - ret = event->trace(iter, sym_flags); - if (ret) - return ret; - return TRACE_TYPE_HANDLED; - } - ret = trace_seq_printf(s, "Unknown type %d\n", entry->type); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; + if (event && event->trace) + return event->trace(iter, sym_flags); + + if (!trace_seq_printf(s, "Unknown type %d\n", entry->type)) + goto partial; return TRACE_TYPE_HANDLED; +partial: + return TRACE_TYPE_PARTIAL_LINE; } static enum print_line_t print_raw_fmt(struct trace_iterator *iter) @@ -1463,29 +1457,25 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter) struct trace_seq *s = &iter->seq; struct trace_entry *entry; struct trace_event *event; - int ret; entry = iter->ent; if (trace_flags & TRACE_ITER_CONTEXT_INFO) { - ret = trace_seq_printf(s, "%d %d %llu ", - entry->pid, iter->cpu, iter->ts); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; + if (!trace_seq_printf(s, "%d %d %llu ", + entry->pid, iter->cpu, iter->ts)) + goto partial; } event = ftrace_find_event(entry->type); - if (event && event->raw) { - ret = event->raw(iter, 0); - if (ret) - return ret; - return TRACE_TYPE_HANDLED; - } - ret = trace_seq_printf(s, "%d ?\n", entry->type); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; + if (event && event->raw) + return event->raw(iter, 0); + + if (!trace_seq_printf(s, "%d ?\n", entry->type)) + goto partial; return TRACE_TYPE_HANDLED; +partial: + return TRACE_TYPE_PARTIAL_LINE; } static enum print_line_t print_hex_fmt(struct trace_iterator *iter) @@ -1504,8 +1494,11 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter) } event = ftrace_find_event(entry->type); - if (event && event->hex) - event->hex(iter, 0); + if (event && event->hex) { + int ret = event->hex(iter, 0); + if (ret != TRACE_TYPE_HANDLED) + return ret; + } SEQ_PUT_FIELD_RET(s, newline); @@ -1544,7 +1537,7 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter) event = ftrace_find_event(entry->type); if (event && event->binary) - event->binary(iter, 0); + return event->binary(iter, 0); return TRACE_TYPE_HANDLED; } -- cgit v1.2.3-18-g5258 From ae7462b4f1fe1f36b5d562dbd5202a2eba01f072 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 3 Feb 2009 22:05:50 -0200 Subject: trace: make the trace_event callbacks return enum print_line_t As they actually all return these enumerators. Reported-by: Frederic Weisbecker Signed-off-by: Arnaldo Carvalho de Melo Acked-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5822ff4e5a3..fd51cf0b94c 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1495,7 +1495,7 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter) event = ftrace_find_event(entry->type); if (event && event->hex) { - int ret = event->hex(iter, 0); + enum print_line_t ret = event->hex(iter, 0); if (ret != TRACE_TYPE_HANDLED) return ret; } -- cgit v1.2.3-18-g5258 From 268ccda0cb4d1292029d07ee3dbd07117baf6ecb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 4 Feb 2009 20:16:39 -0200 Subject: trace: assign defaults at register_ftrace_event Impact: simplification of tracers As all tracers are doing this we might as well do it in register_ftrace_event and save one branch each time we call these callbacks. Signed-off-by: Arnaldo Carvalho de Melo Acked-by: Frederic Weisbecker Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index fd51cf0b94c..a5e4c0af9bb 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1412,7 +1412,7 @@ static enum print_line_t print_lat_fmt(struct trace_iterator *iter) goto partial; } - if (event && event->latency_trace) + if (event) return event->latency_trace(iter, sym_flags); if (!trace_seq_printf(s, "Unknown type %d\n", entry->type)) @@ -1441,7 +1441,7 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) goto partial; } - if (event && event->trace) + if (event) return event->trace(iter, sym_flags); if (!trace_seq_printf(s, "Unknown type %d\n", entry->type)) @@ -1467,7 +1467,7 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter) } event = ftrace_find_event(entry->type); - if (event && event->raw) + if (event) return event->raw(iter, 0); if (!trace_seq_printf(s, "%d ?\n", entry->type)) @@ -1494,7 +1494,7 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter) } event = ftrace_find_event(entry->type); - if (event && event->hex) { + if (event) { enum print_line_t ret = event->hex(iter, 0); if (ret != TRACE_TYPE_HANDLED) return ret; @@ -1536,10 +1536,7 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter) } event = ftrace_find_event(entry->type); - if (event && event->binary) - return event->binary(iter, 0); - - return TRACE_TYPE_HANDLED; + return event ? event->binary(iter, 0) : TRACE_TYPE_HANDLED; } static int trace_empty(struct trace_iterator *iter) -- cgit v1.2.3-18-g5258 From 7be421510b91491d5aa5a29fa1005712039b95af Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 5 Feb 2009 01:13:37 -0500 Subject: trace: Remove unused trace_array_cpu parameter Impact: cleanup Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 47 ++++++++++++++++++----------------------------- 1 file changed, 18 insertions(+), 29 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a5e4c0af9bb..1d4ff568cc4 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -776,7 +776,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, } void -trace_function(struct trace_array *tr, struct trace_array_cpu *data, +trace_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, unsigned long flags, int pc) { @@ -802,7 +802,6 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data, #ifdef CONFIG_FUNCTION_GRAPH_TRACER static void __trace_graph_entry(struct trace_array *tr, - struct trace_array_cpu *data, struct ftrace_graph_ent *trace, unsigned long flags, int pc) @@ -826,7 +825,6 @@ static void __trace_graph_entry(struct trace_array *tr, } static void __trace_graph_return(struct trace_array *tr, - struct trace_array_cpu *data, struct ftrace_graph_ret *trace, unsigned long flags, int pc) @@ -856,11 +854,10 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data, int pc) { if (likely(!atomic_read(&data->disabled))) - trace_function(tr, data, ip, parent_ip, flags, pc); + trace_function(tr, ip, parent_ip, flags, pc); } static void __ftrace_trace_stack(struct trace_array *tr, - struct trace_array_cpu *data, unsigned long flags, int skip, int pc) { @@ -891,27 +888,24 @@ static void __ftrace_trace_stack(struct trace_array *tr, } static void ftrace_trace_stack(struct trace_array *tr, - struct trace_array_cpu *data, unsigned long flags, int skip, int pc) { if (!(trace_flags & TRACE_ITER_STACKTRACE)) return; - __ftrace_trace_stack(tr, data, flags, skip, pc); + __ftrace_trace_stack(tr, flags, skip, pc); } void __trace_stack(struct trace_array *tr, - struct trace_array_cpu *data, unsigned long flags, int skip, int pc) { - __ftrace_trace_stack(tr, data, flags, skip, pc); + __ftrace_trace_stack(tr, flags, skip, pc); } static void ftrace_trace_userstack(struct trace_array *tr, - struct trace_array_cpu *data, - unsigned long flags, int pc) + unsigned long flags, int pc) { #ifdef CONFIG_STACKTRACE struct ring_buffer_event *event; @@ -942,20 +936,17 @@ static void ftrace_trace_userstack(struct trace_array *tr, #endif } -void __trace_userstack(struct trace_array *tr, - struct trace_array_cpu *data, - unsigned long flags) +void __trace_userstack(struct trace_array *tr, unsigned long flags) { - ftrace_trace_userstack(tr, data, flags, preempt_count()); + ftrace_trace_userstack(tr, flags, preempt_count()); } static void -ftrace_trace_special(void *__tr, void *__data, +ftrace_trace_special(void *__tr, unsigned long arg1, unsigned long arg2, unsigned long arg3, int pc) { struct ring_buffer_event *event; - struct trace_array_cpu *data = __data; struct trace_array *tr = __tr; struct special_entry *entry; unsigned long irq_flags; @@ -971,8 +962,8 @@ ftrace_trace_special(void *__tr, void *__data, entry->arg2 = arg2; entry->arg3 = arg3; ring_buffer_unlock_commit(tr->buffer, event, irq_flags); - ftrace_trace_stack(tr, data, irq_flags, 4, pc); - ftrace_trace_userstack(tr, data, irq_flags, pc); + ftrace_trace_stack(tr, irq_flags, 4, pc); + ftrace_trace_userstack(tr, irq_flags, pc); trace_wake_up(); } @@ -981,12 +972,11 @@ void __trace_special(void *__tr, void *__data, unsigned long arg1, unsigned long arg2, unsigned long arg3) { - ftrace_trace_special(__tr, __data, arg1, arg2, arg3, preempt_count()); + ftrace_trace_special(__tr, arg1, arg2, arg3, preempt_count()); } void tracing_sched_switch_trace(struct trace_array *tr, - struct trace_array_cpu *data, struct task_struct *prev, struct task_struct *next, unsigned long flags, int pc) @@ -1010,13 +1000,12 @@ tracing_sched_switch_trace(struct trace_array *tr, entry->next_state = next->state; entry->next_cpu = task_cpu(next); ring_buffer_unlock_commit(tr->buffer, event, irq_flags); - ftrace_trace_stack(tr, data, flags, 5, pc); - ftrace_trace_userstack(tr, data, flags, pc); + ftrace_trace_stack(tr, flags, 5, pc); + ftrace_trace_userstack(tr, flags, pc); } void tracing_sched_wakeup_trace(struct trace_array *tr, - struct trace_array_cpu *data, struct task_struct *wakee, struct task_struct *curr, unsigned long flags, int pc) @@ -1040,8 +1029,8 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->next_state = wakee->state; entry->next_cpu = task_cpu(wakee); ring_buffer_unlock_commit(tr->buffer, event, irq_flags); - ftrace_trace_stack(tr, data, flags, 6, pc); - ftrace_trace_userstack(tr, data, flags, pc); + ftrace_trace_stack(tr, flags, 6, pc); + ftrace_trace_userstack(tr, flags, pc); trace_wake_up(); } @@ -1064,7 +1053,7 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) data = tr->data[cpu]; if (likely(atomic_inc_return(&data->disabled) == 1)) - ftrace_trace_special(tr, data, arg1, arg2, arg3, pc); + ftrace_trace_special(tr, arg1, arg2, arg3, pc); atomic_dec(&data->disabled); local_irq_restore(flags); @@ -1092,7 +1081,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) { pc = preempt_count(); - __trace_graph_entry(tr, data, trace, flags, pc); + __trace_graph_entry(tr, trace, flags, pc); } /* Only do the atomic if it is not already set */ if (!test_tsk_trace_graph(current)) @@ -1118,7 +1107,7 @@ void trace_graph_return(struct ftrace_graph_ret *trace) disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) { pc = preempt_count(); - __trace_graph_return(tr, data, trace, flags, pc); + __trace_graph_return(tr, trace, flags, pc); } if (!trace->depth) clear_tsk_trace_graph(current); -- cgit v1.2.3-18-g5258 From dac74940289f350c2590bec92737833bad608541 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 5 Feb 2009 01:13:38 -0500 Subject: trace: code style clean up Ingo Molnar suggested using goto logic to keep the indentation down and to be able to remove the nasty line breaks. This actually makes the code a bit more readable. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 1d4ff568cc4..3536ef41575 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -522,23 +522,24 @@ int register_tracer(struct tracer *type) tracing_selftest_running = false; mutex_unlock(&trace_types_lock); - if (!ret && default_bootup_tracer) { - if (!strncmp(default_bootup_tracer, type->name, - BOOTUP_TRACER_SIZE)) { - printk(KERN_INFO "Starting tracer '%s'\n", - type->name); - /* Do we want this tracer to start on bootup? */ - tracing_set_tracer(type->name); - default_bootup_tracer = NULL; - /* disable other selftests, since this will break it. */ - tracing_selftest_disabled = 1; + if (ret || !default_bootup_tracer) + goto out_unlock; + + if (strncmp(default_bootup_tracer, type->name, BOOTUP_TRACER_SIZE)) + goto out_unlock; + + printk(KERN_INFO "Starting tracer '%s'\n", type->name); + /* Do we want this tracer to start on bootup? */ + tracing_set_tracer(type->name); + default_bootup_tracer = NULL; + /* disable other selftests, since this will break it. */ + tracing_selftest_disabled = 1; #ifdef CONFIG_FTRACE_STARTUP_TEST - printk(KERN_INFO "Disabling FTRACE selftests due" - " to running tracer '%s'\n", type->name); + printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n", + type->name); #endif - } - } + out_unlock: lock_kernel(); return ret; } -- cgit v1.2.3-18-g5258 From 0a9877514c4fed10a70720293b37213dd172ee3e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 5 Feb 2009 16:12:56 -0200 Subject: ring_buffer: remove unused flags parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Impact: API change, cleanup >From ring_buffer_{lock_reserve,unlock_commit}. $ codiff /tmp/vmlinux.before /tmp/vmlinux.after linux-2.6-tip/kernel/trace/trace.c: trace_vprintk | -14 trace_graph_return | -14 trace_graph_entry | -10 trace_function | -8 __ftrace_trace_stack | -8 ftrace_trace_userstack | -8 tracing_sched_switch_trace | -8 ftrace_trace_special | -12 tracing_sched_wakeup_trace | -8 9 functions changed, 90 bytes removed, diff: -90 linux-2.6-tip/block/blktrace.c: __blk_add_trace | -1 1 function changed, 1 bytes removed, diff: -1 /tmp/vmlinux.after: 10 functions changed, 91 bytes removed, diff: -91 Signed-off-by: Arnaldo Carvalho de Melo Acked-by: Frédéric Weisbecker Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 56 +++++++++++++++++++--------------------------------- 1 file changed, 20 insertions(+), 36 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3536ef41575..eb453a238a6 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -783,14 +783,12 @@ trace_function(struct trace_array *tr, { struct ring_buffer_event *event; struct ftrace_entry *entry; - unsigned long irq_flags; /* If we are reading the ring buffer, don't trace */ if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) return; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), - &irq_flags); + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); if (!event) return; entry = ring_buffer_event_data(event); @@ -798,7 +796,7 @@ trace_function(struct trace_array *tr, entry->ent.type = TRACE_FN; entry->ip = ip; entry->parent_ip = parent_ip; - ring_buffer_unlock_commit(tr->buffer, event, irq_flags); + ring_buffer_unlock_commit(tr->buffer, event); } #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -809,20 +807,18 @@ static void __trace_graph_entry(struct trace_array *tr, { struct ring_buffer_event *event; struct ftrace_graph_ent_entry *entry; - unsigned long irq_flags; if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) return; - event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry), - &irq_flags); + event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry)); if (!event) return; entry = ring_buffer_event_data(event); tracing_generic_entry_update(&entry->ent, flags, pc); entry->ent.type = TRACE_GRAPH_ENT; entry->graph_ent = *trace; - ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags); + ring_buffer_unlock_commit(global_trace.buffer, event); } static void __trace_graph_return(struct trace_array *tr, @@ -832,20 +828,18 @@ static void __trace_graph_return(struct trace_array *tr, { struct ring_buffer_event *event; struct ftrace_graph_ret_entry *entry; - unsigned long irq_flags; if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) return; - event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry), - &irq_flags); + event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry)); if (!event) return; entry = ring_buffer_event_data(event); tracing_generic_entry_update(&entry->ent, flags, pc); entry->ent.type = TRACE_GRAPH_RET; entry->ret = *trace; - ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags); + ring_buffer_unlock_commit(global_trace.buffer, event); } #endif @@ -866,10 +860,8 @@ static void __ftrace_trace_stack(struct trace_array *tr, struct ring_buffer_event *event; struct stack_entry *entry; struct stack_trace trace; - unsigned long irq_flags; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), - &irq_flags); + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); if (!event) return; entry = ring_buffer_event_data(event); @@ -884,7 +876,7 @@ static void __ftrace_trace_stack(struct trace_array *tr, trace.entries = entry->caller; save_stack_trace(&trace); - ring_buffer_unlock_commit(tr->buffer, event, irq_flags); + ring_buffer_unlock_commit(tr->buffer, event); #endif } @@ -912,13 +904,11 @@ static void ftrace_trace_userstack(struct trace_array *tr, struct ring_buffer_event *event; struct userstack_entry *entry; struct stack_trace trace; - unsigned long irq_flags; if (!(trace_flags & TRACE_ITER_USERSTACKTRACE)) return; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), - &irq_flags); + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); if (!event) return; entry = ring_buffer_event_data(event); @@ -933,7 +923,7 @@ static void ftrace_trace_userstack(struct trace_array *tr, trace.entries = entry->caller; save_stack_trace_user(&trace); - ring_buffer_unlock_commit(tr->buffer, event, irq_flags); + ring_buffer_unlock_commit(tr->buffer, event); #endif } @@ -950,10 +940,8 @@ ftrace_trace_special(void *__tr, struct ring_buffer_event *event; struct trace_array *tr = __tr; struct special_entry *entry; - unsigned long irq_flags; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), - &irq_flags); + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); if (!event) return; entry = ring_buffer_event_data(event); @@ -962,9 +950,9 @@ ftrace_trace_special(void *__tr, entry->arg1 = arg1; entry->arg2 = arg2; entry->arg3 = arg3; - ring_buffer_unlock_commit(tr->buffer, event, irq_flags); - ftrace_trace_stack(tr, irq_flags, 4, pc); - ftrace_trace_userstack(tr, irq_flags, pc); + ring_buffer_unlock_commit(tr->buffer, event); + ftrace_trace_stack(tr, 0, 4, pc); + ftrace_trace_userstack(tr, 0, pc); trace_wake_up(); } @@ -984,10 +972,8 @@ tracing_sched_switch_trace(struct trace_array *tr, { struct ring_buffer_event *event; struct ctx_switch_entry *entry; - unsigned long irq_flags; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), - &irq_flags); + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); if (!event) return; entry = ring_buffer_event_data(event); @@ -1000,7 +986,7 @@ tracing_sched_switch_trace(struct trace_array *tr, entry->next_prio = next->prio; entry->next_state = next->state; entry->next_cpu = task_cpu(next); - ring_buffer_unlock_commit(tr->buffer, event, irq_flags); + ring_buffer_unlock_commit(tr->buffer, event); ftrace_trace_stack(tr, flags, 5, pc); ftrace_trace_userstack(tr, flags, pc); } @@ -1013,10 +999,8 @@ tracing_sched_wakeup_trace(struct trace_array *tr, { struct ring_buffer_event *event; struct ctx_switch_entry *entry; - unsigned long irq_flags; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), - &irq_flags); + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); if (!event) return; entry = ring_buffer_event_data(event); @@ -1029,7 +1013,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->next_prio = wakee->prio; entry->next_state = wakee->state; entry->next_cpu = task_cpu(wakee); - ring_buffer_unlock_commit(tr->buffer, event, irq_flags); + ring_buffer_unlock_commit(tr->buffer, event); ftrace_trace_stack(tr, flags, 6, pc); ftrace_trace_userstack(tr, flags, pc); @@ -2841,7 +2825,7 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) trace_buf[len] = 0; size = sizeof(*entry) + len + 1; - event = ring_buffer_lock_reserve(tr->buffer, size, &irq_flags); + event = ring_buffer_lock_reserve(tr->buffer, size); if (!event) goto out_unlock; entry = ring_buffer_event_data(event); @@ -2852,7 +2836,7 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) memcpy(&entry->buf, trace_buf, len); entry->buf[len] = 0; - ring_buffer_unlock_commit(tr->buffer, event, irq_flags); + ring_buffer_unlock_commit(tr->buffer, event); out_unlock: spin_unlock_irqrestore(&trace_buf_lock, irq_flags); -- cgit v1.2.3-18-g5258 From 51a763dd84253bab1d0a1e68e11a7753d1b702ca Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 5 Feb 2009 16:14:13 -0200 Subject: tracing: Introduce trace_buffer_{lock_reserve,unlock_commit} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Impact: new API These new functions do what previously was being open coded, reducing the number of details ftrace plugin writers have to worry about. It also standardizes the handling of stacktrace, userstacktrace and other trace options we may introduce in the future. With this patch, for instance, the blk tracer (and some others already in the tree) can use the "userstacktrace" /d/tracing/trace_options facility. $ codiff /tmp/vmlinux.before /tmp/vmlinux.after linux-2.6-tip/kernel/trace/trace.c: trace_vprintk | -5 trace_graph_return | -22 trace_graph_entry | -26 trace_function | -45 __ftrace_trace_stack | -27 ftrace_trace_userstack | -29 tracing_sched_switch_trace | -66 tracing_stop | +1 trace_seq_to_user | -1 ftrace_trace_special | -63 ftrace_special | +1 tracing_sched_wakeup_trace | -70 tracing_reset_online_cpus | -1 13 functions changed, 2 bytes added, 355 bytes removed, diff: -353 linux-2.6-tip/block/blktrace.c: __blk_add_trace | -58 1 function changed, 58 bytes removed, diff: -58 linux-2.6-tip/kernel/trace/trace.c: trace_buffer_lock_reserve | +88 trace_buffer_unlock_commit | +86 2 functions changed, 174 bytes added, diff: +174 /tmp/vmlinux.after: 16 functions changed, 176 bytes added, 413 bytes removed, diff: -237 Signed-off-by: Arnaldo Carvalho de Melo Acked-by: Frédéric Weisbecker Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 94 +++++++++++++++++++++++++++++----------------------- 1 file changed, 53 insertions(+), 41 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index eb453a238a6..8fad3776e84 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -776,6 +776,39 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0); } +struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr, + unsigned char type, + unsigned long len, + unsigned long flags, int pc) +{ + struct ring_buffer_event *event; + + event = ring_buffer_lock_reserve(tr->buffer, len); + if (event != NULL) { + struct trace_entry *ent = ring_buffer_event_data(event); + + tracing_generic_entry_update(ent, flags, pc); + ent->type = type; + } + + return event; +} +static void ftrace_trace_stack(struct trace_array *tr, + unsigned long flags, int skip, int pc); +static void ftrace_trace_userstack(struct trace_array *tr, + unsigned long flags, int pc); + +void trace_buffer_unlock_commit(struct trace_array *tr, + struct ring_buffer_event *event, + unsigned long flags, int pc) +{ + ring_buffer_unlock_commit(tr->buffer, event); + + ftrace_trace_stack(tr, flags, 6, pc); + ftrace_trace_userstack(tr, flags, pc); + trace_wake_up(); +} + void trace_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, unsigned long flags, @@ -788,12 +821,11 @@ trace_function(struct trace_array *tr, if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) return; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); + event = trace_buffer_lock_reserve(tr, TRACE_FN, sizeof(*entry), + flags, pc); if (!event) return; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, flags, pc); - entry->ent.type = TRACE_FN; entry->ip = ip; entry->parent_ip = parent_ip; ring_buffer_unlock_commit(tr->buffer, event); @@ -811,12 +843,11 @@ static void __trace_graph_entry(struct trace_array *tr, if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) return; - event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry)); + event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_ENT, + sizeof(*entry), flags, pc); if (!event) return; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, flags, pc); - entry->ent.type = TRACE_GRAPH_ENT; entry->graph_ent = *trace; ring_buffer_unlock_commit(global_trace.buffer, event); } @@ -832,12 +863,11 @@ static void __trace_graph_return(struct trace_array *tr, if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) return; - event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry)); + event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_RET, + sizeof(*entry), flags, pc); if (!event) return; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, flags, pc); - entry->ent.type = TRACE_GRAPH_RET; entry->ret = *trace; ring_buffer_unlock_commit(global_trace.buffer, event); } @@ -861,13 +891,11 @@ static void __ftrace_trace_stack(struct trace_array *tr, struct stack_entry *entry; struct stack_trace trace; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); + event = trace_buffer_lock_reserve(tr, TRACE_STACK, + sizeof(*entry), flags, pc); if (!event) return; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, flags, pc); - entry->ent.type = TRACE_STACK; - memset(&entry->caller, 0, sizeof(entry->caller)); trace.nr_entries = 0; @@ -908,12 +936,11 @@ static void ftrace_trace_userstack(struct trace_array *tr, if (!(trace_flags & TRACE_ITER_USERSTACKTRACE)) return; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); + event = trace_buffer_lock_reserve(tr, TRACE_USER_STACK, + sizeof(*entry), flags, pc); if (!event) return; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, flags, pc); - entry->ent.type = TRACE_USER_STACK; memset(&entry->caller, 0, sizeof(entry->caller)); @@ -941,20 +968,15 @@ ftrace_trace_special(void *__tr, struct trace_array *tr = __tr; struct special_entry *entry; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); + event = trace_buffer_lock_reserve(tr, TRACE_SPECIAL, + sizeof(*entry), 0, pc); if (!event) return; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, 0, pc); - entry->ent.type = TRACE_SPECIAL; entry->arg1 = arg1; entry->arg2 = arg2; entry->arg3 = arg3; - ring_buffer_unlock_commit(tr->buffer, event); - ftrace_trace_stack(tr, 0, 4, pc); - ftrace_trace_userstack(tr, 0, pc); - - trace_wake_up(); + trace_buffer_unlock_commit(tr, event, 0, pc); } void @@ -973,12 +995,11 @@ tracing_sched_switch_trace(struct trace_array *tr, struct ring_buffer_event *event; struct ctx_switch_entry *entry; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); + event = trace_buffer_lock_reserve(tr, TRACE_CTX, + sizeof(*entry), flags, pc); if (!event) return; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, flags, pc); - entry->ent.type = TRACE_CTX; entry->prev_pid = prev->pid; entry->prev_prio = prev->prio; entry->prev_state = prev->state; @@ -986,9 +1007,7 @@ tracing_sched_switch_trace(struct trace_array *tr, entry->next_prio = next->prio; entry->next_state = next->state; entry->next_cpu = task_cpu(next); - ring_buffer_unlock_commit(tr->buffer, event); - ftrace_trace_stack(tr, flags, 5, pc); - ftrace_trace_userstack(tr, flags, pc); + trace_buffer_unlock_commit(tr, event, flags, pc); } void @@ -1000,12 +1019,11 @@ tracing_sched_wakeup_trace(struct trace_array *tr, struct ring_buffer_event *event; struct ctx_switch_entry *entry; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); + event = trace_buffer_lock_reserve(tr, TRACE_WAKE, + sizeof(*entry), flags, pc); if (!event) return; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, flags, pc); - entry->ent.type = TRACE_WAKE; entry->prev_pid = curr->pid; entry->prev_prio = curr->prio; entry->prev_state = curr->state; @@ -1013,11 +1031,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->next_prio = wakee->prio; entry->next_state = wakee->state; entry->next_cpu = task_cpu(wakee); - ring_buffer_unlock_commit(tr->buffer, event); - ftrace_trace_stack(tr, flags, 6, pc); - ftrace_trace_userstack(tr, flags, pc); - - trace_wake_up(); + trace_buffer_unlock_commit(tr, event, flags, pc); } void @@ -2825,12 +2839,10 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) trace_buf[len] = 0; size = sizeof(*entry) + len + 1; - event = ring_buffer_lock_reserve(tr->buffer, size); + event = trace_buffer_lock_reserve(tr, TRACE_PRINT, size, irq_flags, pc); if (!event) goto out_unlock; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, irq_flags, pc); - entry->ent.type = TRACE_PRINT; entry->ip = ip; entry->depth = depth; -- cgit v1.2.3-18-g5258 From b6f11df26fdc28324cf9c9e3b77f2dc985c1bb13 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 5 Feb 2009 18:02:00 -0200 Subject: trace: Call tracing_reset_online_cpus before tracer->init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Impact: cleanup To make it easy for ftrace plugin writers, as this was open coded in the existing plugins Signed-off-by: Arnaldo Carvalho de Melo Acked-by: Frédéric Weisbecker Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8fad3776e84..ef4dbac9556 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2171,6 +2171,12 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf, return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } +int tracer_init(struct tracer *t, struct trace_array *tr) +{ + tracing_reset_online_cpus(tr); + return t->init(tr); +} + static int tracing_set_tracer(const char *buf) { struct trace_array *tr = &global_trace; @@ -2195,7 +2201,7 @@ static int tracing_set_tracer(const char *buf) current_trace = t; if (t->init) { - ret = t->init(tr); + ret = tracer_init(t, tr); if (ret) goto out; } -- cgit v1.2.3-18-g5258 From 1830b52d0de8c60c4f5dfbac134aa8f69d815801 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 7 Feb 2009 19:38:43 -0500 Subject: trace: remove deprecated entry->cpu Impact: fix to prevent developers from using entry->cpu With the new ring buffer infrastructure, the cpu for the entry is implicit with which CPU buffer it is on. The original code use to record the current cpu into the generic entry header, which can be retrieved by entry->cpu. When the ring buffer was introduced, the users were convert to use the the cpu number of which cpu ring buffer was in use (this was passed to the tracers by the iterator: iter->cpu). Unfortunately, the cpu item in the entry structure was never removed. This allowed for developers to use it instead of the proper iter->cpu, unknowingly, using an uninitialized variable. This was not the fault of the developers, since it would seem like the logical place to retrieve the cpu identifier. This patch removes the cpu item from the entry structure and fixes all the users that should have been using iter->cpu. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index fd51cf0b94c..bd4d9f8818f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1531,7 +1531,7 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter) if (trace_flags & TRACE_ITER_CONTEXT_INFO) { SEQ_PUT_FIELD_RET(s, entry->pid); - SEQ_PUT_FIELD_RET(s, entry->cpu); + SEQ_PUT_FIELD_RET(s, iter->cpu); SEQ_PUT_FIELD_RET(s, iter->ts); } -- cgit v1.2.3-18-g5258 From b5db03c4355e568f1567758287b30a6a262d5057 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 7 Feb 2009 18:52:59 -0200 Subject: tracing: handle unregistering the current tracer Impact: simplification Instead of requiring that plugins have the sequence: my_tracer_stop(my_trace_array); unregister_tracer(my_tracer); it should be possible just do a: unregister_tracer(my_tracer); Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 03fbd4c20bc..93040f1bef1 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -559,6 +559,15 @@ void unregister_tracer(struct tracer *type) found: *t = (*t)->next; + + if (type == current_trace && tracer_enabled) { + tracer_enabled = 0; + tracing_stop(); + if (current_trace->stop) + current_trace->stop(&global_trace); + current_trace = &nop_trace; + } + if (strlen(type->name) != max_tracer_type_len) goto out; -- cgit v1.2.3-18-g5258 From b91facc367366b3f71375f337eb5997ec9ab4e69 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 6 Feb 2009 18:30:44 +0100 Subject: tracing/function-graph-tracer: handle the leaf functions from trace_pipe When one cats the trace file, the leaf functions are printed without brackets: function(); whereas in the trace_pipe file we'll see the following: function() { } This is because the ring_buffer handling is not the same between those two files. On the trace file, when an entry is printed, the iterator advanced and then we can check the next entry. There is no iterator with trace_pipe, the current entry to print has been peeked and not consumed. So checking the next entry will still return the current one while we don't consume it. This patch introduces a new value for the output callbacks to ask the tracing core to not consume the current entry after printing it. We need it because we will have to consume the current entry ourself to check the next one. Now the trace_pipe is able to handle well the leaf functions. Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 93040f1bef1..5b1e9a9e990 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2468,8 +2468,8 @@ waitagain: iter->seq.len = len; break; } - - trace_consume(iter); + if (ret != TRACE_TYPE_NO_CONSUME) + trace_consume(iter); if (iter->seq.len >= cnt) break; -- cgit v1.2.3-18-g5258 From 3c56819b14b00dd449bd776303e61f8532fad09f Mon Sep 17 00:00:00 2001 From: Eduard - Gabriel Munteanu Date: Mon, 9 Feb 2009 08:15:56 +0200 Subject: tracing: splice support for tracing_pipe Added and implemented tracing_pipe_fops->splice_read(). This allows userspace programs to get tracing data more efficiently. Signed-off-by: Eduard - Gabriel Munteanu Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 136 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 136 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5b1e9a9e990..9e29fdb0dfe 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -364,6 +365,25 @@ ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt) return cnt; } +ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) +{ + int len; + void *ret; + + if (s->len <= s->readpos) + return -EBUSY; + + len = s->len - s->readpos; + if (cnt > len) + cnt = len; + ret = memcpy(buf, s->buffer + s->readpos, cnt); + if (!ret) + return -EFAULT; + + s->readpos += len; + return cnt; +} + static void trace_print_seq(struct seq_file *m, struct trace_seq *s) { @@ -2493,6 +2513,121 @@ out: return sret; } +static void tracing_pipe_buf_release(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) +{ + __free_page(buf->page); +} + +static void tracing_spd_release_pipe(struct splice_pipe_desc *spd, + unsigned int idx) +{ + __free_page(spd->pages[idx]); +} + +static struct pipe_buf_operations tracing_pipe_buf_ops = { + .can_merge = 0, + .map = generic_pipe_buf_map, + .unmap = generic_pipe_buf_unmap, + .confirm = generic_pipe_buf_confirm, + .release = tracing_pipe_buf_release, + .steal = generic_pipe_buf_steal, + .get = generic_pipe_buf_get, +}; + +static ssize_t tracing_splice_read_pipe(struct file *filp, + loff_t *ppos, + struct pipe_inode_info *pipe, + size_t len, + unsigned int flags) +{ + struct page *pages[PIPE_BUFFERS]; + struct partial_page partial[PIPE_BUFFERS]; + struct trace_iterator *iter = filp->private_data; + struct splice_pipe_desc spd = { + .pages = pages, + .partial = partial, + .nr_pages = 0, /* This gets updated below. */ + .flags = flags, + .ops = &tracing_pipe_buf_ops, + .spd_release = tracing_spd_release_pipe, + }; + ssize_t ret; + size_t count, rem; + unsigned int i; + + mutex_lock(&trace_types_lock); + + if (iter->trace->splice_read) { + ret = iter->trace->splice_read(iter, filp, + ppos, pipe, len, flags); + if (ret) + goto out; + } + + ret = tracing_wait_pipe(filp); + if (ret <= 0) + goto out; + + if (!iter->ent && !find_next_entry_inc(iter)) { + ret = -EFAULT; + goto out; + } + + /* Fill as many pages as possible. */ + for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) { + pages[i] = alloc_page(GFP_KERNEL); + + /* Seq buffer is page-sized, exactly what we need. */ + for (;;) { + count = iter->seq.len; + ret = print_trace_line(iter); + count = iter->seq.len - count; + if (rem < count) { + rem = 0; + iter->seq.len -= count; + break; + } + if (ret == TRACE_TYPE_PARTIAL_LINE) { + iter->seq.len -= count; + break; + } + + trace_consume(iter); + rem -= count; + if (!find_next_entry_inc(iter)) { + rem = 0; + iter->ent = NULL; + break; + } + } + + /* Copy the data into the page, so we can start over. */ + ret = trace_seq_to_buffer(&iter->seq, + page_address(pages[i]), + iter->seq.len); + if (ret < 0) { + __free_page(pages[i]); + break; + } + partial[i].offset = 0; + partial[i].len = iter->seq.len; + + trace_seq_reset(&iter->seq); + } + + mutex_unlock(&trace_types_lock); + + spd.nr_pages = i; + + return splice_to_pipe(pipe, &spd); + +out: + mutex_unlock(&trace_types_lock); + + return ret; +} + static ssize_t tracing_entries_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) @@ -2656,6 +2791,7 @@ static struct file_operations tracing_pipe_fops = { .open = tracing_open_pipe, .poll = tracing_poll_pipe, .read = tracing_read_pipe, + .splice_read = tracing_splice_read_pipe, .release = tracing_release_pipe, }; -- cgit v1.2.3-18-g5258 From ff98781bab2735e6c89793034173e0cb5007a7e5 Mon Sep 17 00:00:00 2001 From: Eduard - Gabriel Munteanu Date: Mon, 9 Feb 2009 08:15:55 +0200 Subject: tracing: Move pipe waiting code out of tracing_read_pipe(). This moves the pipe waiting code from tracing_read_pipe() into tracing_wait_pipe(), which is useful to implement other fops, like splice_read. Signed-off-by: Eduard - Gabriel Munteanu Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 69 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 40 insertions(+), 29 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9e29fdb0dfe..11fde0ad9cb 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2388,37 +2388,15 @@ tracing_poll_pipe(struct file *filp, poll_table *poll_table) } } -/* - * Consumer reader. - */ -static ssize_t -tracing_read_pipe(struct file *filp, char __user *ubuf, - size_t cnt, loff_t *ppos) +/* Must be called with trace_types_lock mutex held. */ +static int tracing_wait_pipe(struct file *filp) { struct trace_iterator *iter = filp->private_data; - ssize_t sret; - - /* return any leftover data */ - sret = trace_seq_to_user(&iter->seq, ubuf, cnt); - if (sret != -EBUSY) - return sret; - - trace_seq_reset(&iter->seq); - mutex_lock(&trace_types_lock); - if (iter->trace->read) { - sret = iter->trace->read(iter, filp, ubuf, cnt, ppos); - if (sret) - goto out; - } - -waitagain: - sret = 0; while (trace_empty(iter)) { if ((filp->f_flags & O_NONBLOCK)) { - sret = -EAGAIN; - goto out; + return -EAGAIN; } /* @@ -2443,12 +2421,11 @@ waitagain: iter->tr->waiter = NULL; if (signal_pending(current)) { - sret = -EINTR; - goto out; + return -EINTR; } if (iter->trace != current_trace) - goto out; + return 0; /* * We block until we read something and tracing is disabled. @@ -2465,9 +2442,43 @@ waitagain: continue; } + return 1; +} + +/* + * Consumer reader. + */ +static ssize_t +tracing_read_pipe(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct trace_iterator *iter = filp->private_data; + ssize_t sret; + + /* return any leftover data */ + sret = trace_seq_to_user(&iter->seq, ubuf, cnt); + if (sret != -EBUSY) + return sret; + + trace_seq_reset(&iter->seq); + + mutex_lock(&trace_types_lock); + if (iter->trace->read) { + sret = iter->trace->read(iter, filp, ubuf, cnt, ppos); + if (sret) + goto out; + } + +waitagain: + sret = tracing_wait_pipe(filp); + if (sret <= 0) + goto out; + /* stop when tracing is finished */ - if (trace_empty(iter)) + if (trace_empty(iter)) { + sret = 0; goto out; + } if (cnt >= PAGE_SIZE) cnt = PAGE_SIZE - 1; -- cgit v1.2.3-18-g5258 From 34cd4998d38f9bd04f34b78a7cb0c7f1bee00bd9 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 9 Feb 2009 12:06:29 -0500 Subject: tracing: clean up splice code Ingo Molnar suggested a series of clean ups for the splice code. This patch implements those suggestions. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 96 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 55 insertions(+), 41 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 11fde0ad9cb..d89821283b4 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2537,15 +2537,49 @@ static void tracing_spd_release_pipe(struct splice_pipe_desc *spd, } static struct pipe_buf_operations tracing_pipe_buf_ops = { - .can_merge = 0, - .map = generic_pipe_buf_map, - .unmap = generic_pipe_buf_unmap, - .confirm = generic_pipe_buf_confirm, - .release = tracing_pipe_buf_release, - .steal = generic_pipe_buf_steal, - .get = generic_pipe_buf_get, + .can_merge = 0, + .map = generic_pipe_buf_map, + .unmap = generic_pipe_buf_unmap, + .confirm = generic_pipe_buf_confirm, + .release = tracing_pipe_buf_release, + .steal = generic_pipe_buf_steal, + .get = generic_pipe_buf_get, }; +static size_t +tracing_fill_pipe_page(struct page *pages, size_t rem, + struct trace_iterator *iter) +{ + size_t count; + int ret; + + /* Seq buffer is page-sized, exactly what we need. */ + for (;;) { + count = iter->seq.len; + ret = print_trace_line(iter); + count = iter->seq.len - count; + if (rem < count) { + rem = 0; + iter->seq.len -= count; + break; + } + if (ret == TRACE_TYPE_PARTIAL_LINE) { + iter->seq.len -= count; + break; + } + + trace_consume(iter); + rem -= count; + if (!find_next_entry_inc(iter)) { + rem = 0; + iter->ent = NULL; + break; + } + } + + return rem; +} + static ssize_t tracing_splice_read_pipe(struct file *filp, loff_t *ppos, struct pipe_inode_info *pipe, @@ -2556,15 +2590,15 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, struct partial_page partial[PIPE_BUFFERS]; struct trace_iterator *iter = filp->private_data; struct splice_pipe_desc spd = { - .pages = pages, - .partial = partial, - .nr_pages = 0, /* This gets updated below. */ - .flags = flags, - .ops = &tracing_pipe_buf_ops, - .spd_release = tracing_spd_release_pipe, + .pages = pages, + .partial = partial, + .nr_pages = 0, /* This gets updated below. */ + .flags = flags, + .ops = &tracing_pipe_buf_ops, + .spd_release = tracing_spd_release_pipe, }; ssize_t ret; - size_t count, rem; + size_t rem; unsigned int i; mutex_lock(&trace_types_lock); @@ -2573,45 +2607,25 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, ret = iter->trace->splice_read(iter, filp, ppos, pipe, len, flags); if (ret) - goto out; + goto out_err; } ret = tracing_wait_pipe(filp); if (ret <= 0) - goto out; + goto out_err; if (!iter->ent && !find_next_entry_inc(iter)) { ret = -EFAULT; - goto out; + goto out_err; } /* Fill as many pages as possible. */ for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) { pages[i] = alloc_page(GFP_KERNEL); + if (!pages[i]) + break; - /* Seq buffer is page-sized, exactly what we need. */ - for (;;) { - count = iter->seq.len; - ret = print_trace_line(iter); - count = iter->seq.len - count; - if (rem < count) { - rem = 0; - iter->seq.len -= count; - break; - } - if (ret == TRACE_TYPE_PARTIAL_LINE) { - iter->seq.len -= count; - break; - } - - trace_consume(iter); - rem -= count; - if (!find_next_entry_inc(iter)) { - rem = 0; - iter->ent = NULL; - break; - } - } + rem = tracing_fill_pipe_page(pages[i], rem, iter); /* Copy the data into the page, so we can start over. */ ret = trace_seq_to_buffer(&iter->seq, @@ -2633,7 +2647,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, return splice_to_pipe(pipe, &spd); -out: +out_err: mutex_unlock(&trace_types_lock); return ret; -- cgit v1.2.3-18-g5258 From c3706f005c3aaf570e71f0f083fdbb59a5a9fa2e Mon Sep 17 00:00:00 2001 From: Wenji Huang Date: Tue, 10 Feb 2009 01:03:18 -0500 Subject: tracing: fix typos in comments Impact: clean up. Fix typos in the comments. Signed-off-by: Wenji Huang Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d89821283b4..d7c175a442d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1963,7 +1963,7 @@ tracing_trace_options_read(struct file *filp, char __user *ubuf, struct tracer_opt *trace_opts = current_trace->flags->opts; - /* calulate max size */ + /* calculate max size */ for (i = 0; trace_options[i]; i++) { len += strlen(trace_options[i]); len += 3; /* "no" and space */ -- cgit v1.2.3-18-g5258 From 4fd2735881bf4d8bf5e30979f31fc2f1b1d505fa Mon Sep 17 00:00:00 2001 From: Hannes Eder Date: Tue, 10 Feb 2009 19:44:12 +0100 Subject: tracing: fix sparse warnings: make symbols static Impact: make global variables and a global function static The function '__trace_userstack' does not seem to have a caller, so it is commented out. Fix this sparse warnings: kernel/trace/trace.c:82:5: warning: symbol 'tracing_disabled' was not declared. Should it be static? kernel/trace/trace.c:600:10: warning: symbol 'trace_record_cmdline_disabled' was not declared. Should it be static? kernel/trace/trace.c:957:6: warning: symbol '__trace_userstack' was not declared. Should it be static? kernel/trace/trace.c:1694:5: warning: symbol 'tracing_release' was not declared. Should it be static? Signed-off-by: Hannes Eder Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d7c175a442d..c157ba70f30 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -80,7 +80,7 @@ static int dummy_set_flag(u32 old_flags, u32 bit, int set) * of the tracer is successful. But that is the only place that sets * this back to zero. */ -int tracing_disabled = 1; +static int tracing_disabled = 1; static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled); @@ -626,7 +626,7 @@ static int cmdline_idx; static DEFINE_SPINLOCK(trace_cmdline_lock); /* temporary disable recording */ -atomic_t trace_record_cmdline_disabled __read_mostly; +static atomic_t trace_record_cmdline_disabled __read_mostly; static void trace_init_cmdlines(void) { @@ -983,10 +983,12 @@ static void ftrace_trace_userstack(struct trace_array *tr, #endif } -void __trace_userstack(struct trace_array *tr, unsigned long flags) +#ifdef UNUSED +static void __trace_userstack(struct trace_array *tr, unsigned long flags) { ftrace_trace_userstack(tr, flags, preempt_count()); } +#endif /* UNUSED */ static void ftrace_trace_special(void *__tr, @@ -1720,7 +1722,7 @@ int tracing_open_generic(struct inode *inode, struct file *filp) return 0; } -int tracing_release(struct inode *inode, struct file *file) +static int tracing_release(struct inode *inode, struct file *file) { struct seq_file *m = (struct seq_file *)file->private_data; struct trace_iterator *iter = m->private; -- cgit v1.2.3-18-g5258 From 5e39841c45cf5e6ea930ede1b0303309e03037a2 Mon Sep 17 00:00:00 2001 From: Hannes Eder Date: Tue, 10 Feb 2009 19:44:34 +0100 Subject: tracing: fix sparse warnings: fix (un-)signedness Fix these sparse warnings: kernel/trace/ring_buffer.c:70:37: warning: incorrect type in argument 2 (different signedness) kernel/trace/ring_buffer.c:84:39: warning: incorrect type in argument 2 (different signedness) kernel/trace/ring_buffer.c:96:43: warning: incorrect type in argument 2 (different signedness) kernel/trace/ring_buffer.c:2475:13: warning: incorrect type in argument 2 (different signedness) kernel/trace/ring_buffer.c:2475:13: warning: incorrect type in argument 2 (different signedness) kernel/trace/ring_buffer.c:2478:42: warning: incorrect type in argument 2 (different signedness) kernel/trace/ring_buffer.c:2478:42: warning: incorrect type in argument 2 (different signedness) kernel/trace/ring_buffer.c:2500:40: warning: incorrect type in argument 3 (different signedness) kernel/trace/ring_buffer.c:2505:44: warning: incorrect type in argument 2 (different signedness) kernel/trace/ring_buffer.c:2507:46: warning: incorrect type in argument 2 (different signedness) kernel/trace/trace.c:2130:40: warning: incorrect type in argument 3 (different signedness) kernel/trace/trace.c:2280:40: warning: incorrect type in argument 3 (different signedness) Signed-off-by: Hannes Eder Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c157ba70f30..e2434990277 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2147,7 +2147,7 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf, { struct trace_array *tr = filp->private_data; char buf[64]; - long val; + unsigned long val; int ret; if (cnt >= sizeof(buf)) @@ -2295,9 +2295,9 @@ static ssize_t tracing_max_lat_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { - long *ptr = filp->private_data; + unsigned long *ptr = filp->private_data; char buf[64]; - long val; + unsigned long val; int ret; if (cnt >= sizeof(buf)) -- cgit v1.2.3-18-g5258 From e7669b8e329255bbcb40af65b38e342825d97a46 Mon Sep 17 00:00:00 2001 From: Hannes Eder Date: Tue, 10 Feb 2009 19:44:45 +0100 Subject: tracing: fix sparse warning: attribute function with __acquires/__releases Fix this sparse warning: kernel/trace/trace.c:458:9: warning: context imbalance in 'register_tracer' - unexpected unlock Signed-off-by: Hannes Eder Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e2434990277..95f99a7abf2 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -459,6 +459,8 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) * Register a new plugin tracer. */ int register_tracer(struct tracer *type) +__releases(kernel_lock) +__acquires(kernel_lock) { struct tracer *t; int len; -- cgit v1.2.3-18-g5258 From af513098452b8887d7c0e15a39d7cb74479501bd Mon Sep 17 00:00:00 2001 From: Wenji Huang Date: Tue, 17 Feb 2009 01:07:28 -0500 Subject: tracing: use the more proper parameter Pass tsk to tracing_record_cmdline instead of current. Signed-off-by: Wenji Huang Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 95f99a7abf2..dc61e82faad 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -336,7 +336,7 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) data->rt_priority = tsk->rt_priority; /* record this tasks comm */ - tracing_record_cmdline(current); + tracing_record_cmdline(tsk); } static void -- cgit v1.2.3-18-g5258 From 6eaaa5d57e76c454479833fc8594cd7c3b75c789 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 11 Feb 2009 02:25:00 +0100 Subject: tracing/core: use appropriate waiting on trace_pipe Impact: api and pipe waiting change Currently, the waiting used in tracing_read_pipe() is done through a 100 msecs schedule_timeout() loop which periodically check if there are traces on the buffer. This can cause small latencies for programs which are reading the incoming events. This patch makes the reader waiting for the trace_wait waitqueue except for few tracers such as the sched and functions tracers which might be already hold the runqueue lock while waking up the reader. This is performed through a new callback wait_pipe() on struct tracer. If none is implemented on a specific tracer, the default waiting for trace_wait queue is attached. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 62 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 41 insertions(+), 21 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index dc61e82faad..881a94474d7 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -499,6 +499,9 @@ __acquires(kernel_lock) else if (!type->flags->opts) type->flags->opts = dummy_tracer_opt; + if (!type->wait_pipe) + type->wait_pipe = default_wait_pipe; + #ifdef CONFIG_FTRACE_STARTUP_TEST if (type->selftest && !tracing_selftest_disabled) { @@ -1064,7 +1067,10 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->next_prio = wakee->prio; entry->next_state = wakee->state; entry->next_cpu = task_cpu(wakee); - trace_buffer_unlock_commit(tr, event, flags, pc); + + ring_buffer_unlock_commit(tr->buffer, event); + ftrace_trace_stack(tr, flags, 6, pc); + ftrace_trace_userstack(tr, flags, pc); } void @@ -2392,6 +2398,38 @@ tracing_poll_pipe(struct file *filp, poll_table *poll_table) } } + +void default_wait_pipe(struct trace_iterator *iter) +{ + DEFINE_WAIT(wait); + + prepare_to_wait(&trace_wait, &wait, TASK_INTERRUPTIBLE); + + if (trace_empty(iter)) + schedule(); + + finish_wait(&trace_wait, &wait); +} + +/* + * This is a make-shift waitqueue. + * A tracer might use this callback on some rare cases: + * + * 1) the current tracer might hold the runqueue lock when it wakes up + * a reader, hence a deadlock (sched, function, and function graph tracers) + * 2) the function tracers, trace all functions, we don't want + * the overhead of calling wake_up and friends + * (and tracing them too) + * + * Anyway, this is really very primitive wakeup. + */ +void poll_wait_pipe(struct trace_iterator *iter) +{ + set_current_state(TASK_INTERRUPTIBLE); + /* sleep for 100 msecs, and try again. */ + schedule_timeout(HZ / 10); +} + /* Must be called with trace_types_lock mutex held. */ static int tracing_wait_pipe(struct file *filp) { @@ -2403,30 +2441,14 @@ static int tracing_wait_pipe(struct file *filp) return -EAGAIN; } - /* - * This is a make-shift waitqueue. The reason we don't use - * an actual wait queue is because: - * 1) we only ever have one waiter - * 2) the tracing, traces all functions, we don't want - * the overhead of calling wake_up and friends - * (and tracing them too) - * Anyway, this is really very primitive wakeup. - */ - set_current_state(TASK_INTERRUPTIBLE); - iter->tr->waiter = current; - mutex_unlock(&trace_types_lock); - /* sleep for 100 msecs, and try again. */ - schedule_timeout(HZ/10); + iter->trace->wait_pipe(iter); mutex_lock(&trace_types_lock); - iter->tr->waiter = NULL; - - if (signal_pending(current)) { + if (signal_pending(current)) return -EINTR; - } if (iter->trace != current_trace) return 0; @@ -2442,8 +2464,6 @@ static int tracing_wait_pipe(struct file *filp) */ if (!tracer_enabled && iter->pos) break; - - continue; } return 1; -- cgit v1.2.3-18-g5258 From fa7c7f6e11f70d62505074a8b30a776236850dec Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 11 Feb 2009 02:51:30 +0100 Subject: tracing/core: remove unused parameter in tracing_fill_pipe_page() Impact: cleanup The struct page *pages parameter is unused. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 881a94474d7..e1f3b99a2e5 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2571,8 +2571,7 @@ static struct pipe_buf_operations tracing_pipe_buf_ops = { }; static size_t -tracing_fill_pipe_page(struct page *pages, size_t rem, - struct trace_iterator *iter) +tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter) { size_t count; int ret; @@ -2649,7 +2648,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, if (!pages[i]) break; - rem = tracing_fill_pipe_page(pages[i], rem, iter); + rem = tracing_fill_pipe_page(rem, iter); /* Copy the data into the page, so we can start over. */ ret = trace_seq_to_buffer(&iter->seq, -- cgit v1.2.3-18-g5258 From 886b5b73d71e4027d7dc6c14f5f7ab102201ea6b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 25 Feb 2009 11:03:44 +0100 Subject: tracing: remove /debug/tracing/latency_trace Impact: remove old debug/tracing API /debug/tracing/latency_trace is an old legacy format we kept from the old latency tracer. Remove the file for now. If there's any useful bit missing then we'll propagate any useful output bits into the /debug/tracing/trace output. Reported-by: Andrew Morton Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e1f3b99a2e5..11ba100f9a9 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2938,11 +2938,6 @@ static __init int tracer_init_debugfs(void) if (!entry) pr_warning("Could not create debugfs 'tracing_cpumask' entry\n"); - entry = debugfs_create_file("latency_trace", 0444, d_tracer, - &global_trace, &tracing_lt_fops); - if (!entry) - pr_warning("Could not create debugfs 'latency_trace' entry\n"); - entry = debugfs_create_file("trace", 0444, d_tracer, &global_trace, &tracing_fops); if (!entry) -- cgit v1.2.3-18-g5258 From b04cc6b1f6398b0e0b60d37e27ce51b4899672ec Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 25 Feb 2009 03:22:28 +0100 Subject: tracing/core: introduce per cpu tracing files Impact: split up tracing output per cpu Currently, on the tracing debugfs directory, three files are available to the user to let him extracting the trace output: - trace is an iterator through the ring-buffer. It's a reader but not a consumer It doesn't block when no more traces are available. - trace pretty similar to the former, except that it adds more informations such as prempt count, irq flag, ... - trace_pipe is a reader and a consumer, it will also block waiting for traces if necessary (heh, yes it's a pipe). The traces coming from different cpus are curretly mixed up inside these files. Sometimes it messes up the informations, sometimes it's useful, depending on what does the tracer capture. The tracing_cpumask file is useful to filter the output and select only the traces captured a custom defined set of cpus. But still it is not enough powerful to extract at the same time one trace buffer per cpu. So this patch creates a new directory: /debug/tracing/per_cpu/. Inside this directory, you will now find one trace_pipe file and one trace file per cpu. Which means if you have two cpus, you will have: trace0 trace1 trace_pipe0 trace_pipe1 And of course, reading these files will have the same effect than with the usual tracing files, except that you will only see the traces from the given cpu. The original all-in-one cpu trace file are still available on their original place. Until now, only one consumer was allowed on trace_pipe to avoid racy consuming on the ring-buffer. Now the approach changed a bit, you can have only one consumer per cpu. Which means you are allowed to read concurrently trace_pipe0 and trace_pipe1 But you can't have two readers on trace_pipe0 or trace_pipe1. Following the same logic, if there is one reader on the common trace_pipe, you can not have at the same time another reader on trace_pipe0 or in trace_pipe1. Because in trace_pipe is already a consumer in all cpu buffers in essence. Signed-off-by: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 168 +++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 144 insertions(+), 24 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 11ba100f9a9..aa58b7bc847 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -98,6 +98,9 @@ static inline void ftrace_enable_cpu(void) static cpumask_var_t __read_mostly tracing_buffer_mask; +/* Define which cpu buffers are currently read in trace_pipe */ +static cpumask_var_t tracing_reader_cpumask; + #define for_each_tracing_cpu(cpu) \ for_each_cpu(cpu, tracing_buffer_mask) @@ -1195,10 +1198,25 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts) { struct ring_buffer *buffer = iter->tr->buffer; struct trace_entry *ent, *next = NULL; + int cpu_file = iter->cpu_file; u64 next_ts = 0, ts; int next_cpu = -1; int cpu; + /* + * If we are in a per_cpu trace file, don't bother by iterating over + * all cpu and peek directly. + */ + if (cpu_file > TRACE_PIPE_ALL_CPU) { + if (ring_buffer_empty_cpu(buffer, cpu_file)) + return NULL; + ent = peek_next_entry(iter, cpu_file, ent_ts); + if (ent_cpu) + *ent_cpu = cpu_file; + + return ent; + } + for_each_tracing_cpu(cpu) { if (ring_buffer_empty_cpu(buffer, cpu)) @@ -1279,6 +1297,7 @@ static void *s_next(struct seq_file *m, void *v, loff_t *pos) static void *s_start(struct seq_file *m, loff_t *pos) { struct trace_iterator *iter = m->private; + int cpu_file = iter->cpu_file; void *p = NULL; loff_t l = 0; int cpu; @@ -1299,9 +1318,12 @@ static void *s_start(struct seq_file *m, loff_t *pos) ftrace_disable_cpu(); - for_each_tracing_cpu(cpu) { - ring_buffer_iter_reset(iter->buffer_iter[cpu]); - } + if (cpu_file == TRACE_PIPE_ALL_CPU) { + for_each_tracing_cpu(cpu) + ring_buffer_iter_reset(iter->buffer_iter[cpu]); + } else + ring_buffer_iter_reset(iter->buffer_iter[cpu_file]); + ftrace_enable_cpu(); @@ -1653,6 +1675,7 @@ static struct seq_operations tracer_seq_ops = { static struct trace_iterator * __tracing_open(struct inode *inode, struct file *file, int *ret) { + long cpu_file = (long) inode->i_private; struct trace_iterator *iter; struct seq_file *m; int cpu; @@ -1672,9 +1695,10 @@ __tracing_open(struct inode *inode, struct file *file, int *ret) if (current_trace && current_trace->print_max) iter->tr = &max_tr; else - iter->tr = inode->i_private; + iter->tr = &global_trace; iter->trace = current_trace; iter->pos = -1; + iter->cpu_file = cpu_file; /* Notify the tracer early; before we stop tracing. */ if (iter->trace && iter->trace->open) @@ -1684,14 +1708,22 @@ __tracing_open(struct inode *inode, struct file *file, int *ret) if (ring_buffer_overruns(iter->tr->buffer)) iter->iter_flags |= TRACE_FILE_ANNOTATE; + if (iter->cpu_file == TRACE_PIPE_ALL_CPU) { + for_each_tracing_cpu(cpu) { - for_each_tracing_cpu(cpu) { + iter->buffer_iter[cpu] = + ring_buffer_read_start(iter->tr->buffer, cpu); + if (!iter->buffer_iter[cpu]) + goto fail_buffer; + } + } else { + cpu = iter->cpu_file; iter->buffer_iter[cpu] = - ring_buffer_read_start(iter->tr->buffer, cpu); + ring_buffer_read_start(iter->tr->buffer, cpu); if (!iter->buffer_iter[cpu]) - goto fail_buffer; + goto fail; } /* TODO stop tracer */ @@ -1715,6 +1747,7 @@ __tracing_open(struct inode *inode, struct file *file, int *ret) if (iter->buffer_iter[cpu]) ring_buffer_read_finish(iter->buffer_iter[cpu]); } +fail: mutex_unlock(&trace_types_lock); kfree(iter); @@ -2325,54 +2358,77 @@ tracing_max_lat_write(struct file *filp, const char __user *ubuf, return cnt; } -static atomic_t tracing_reader; - static int tracing_open_pipe(struct inode *inode, struct file *filp) { + long cpu_file = (long) inode->i_private; struct trace_iterator *iter; + int ret = 0; if (tracing_disabled) return -ENODEV; - /* We only allow for reader of the pipe */ - if (atomic_inc_return(&tracing_reader) != 1) { - atomic_dec(&tracing_reader); - return -EBUSY; + mutex_lock(&trace_types_lock); + + /* We only allow one reader per cpu */ + if (cpu_file == TRACE_PIPE_ALL_CPU) { + if (!cpumask_empty(tracing_reader_cpumask)) { + ret = -EBUSY; + goto out; + } + cpumask_setall(tracing_reader_cpumask); + } else { + if (!cpumask_test_cpu(cpu_file, tracing_reader_cpumask)) + cpumask_set_cpu(cpu_file, tracing_reader_cpumask); + else { + ret = -EBUSY; + goto out; + } } /* create a buffer to store the information to pass to userspace */ iter = kzalloc(sizeof(*iter), GFP_KERNEL); - if (!iter) - return -ENOMEM; + if (!iter) { + ret = -ENOMEM; + goto out; + } if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) { kfree(iter); - return -ENOMEM; + ret = -ENOMEM; + goto out; } - mutex_lock(&trace_types_lock); - /* trace pipe does not show start of buffer */ cpumask_setall(iter->started); + iter->cpu_file = cpu_file; iter->tr = &global_trace; iter->trace = current_trace; filp->private_data = iter; if (iter->trace->pipe_open) iter->trace->pipe_open(iter); - mutex_unlock(&trace_types_lock); - return 0; +out: + mutex_unlock(&trace_types_lock); + return ret; } static int tracing_release_pipe(struct inode *inode, struct file *file) { struct trace_iterator *iter = file->private_data; + mutex_lock(&trace_types_lock); + + if (iter->cpu_file == TRACE_PIPE_ALL_CPU) + cpumask_clear(tracing_reader_cpumask); + else + cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask); + + mutex_unlock(&trace_types_lock); + free_cpumask_var(iter->started); kfree(iter); - atomic_dec(&tracing_reader); return 0; } @@ -2911,6 +2967,59 @@ struct dentry *tracing_init_dentry(void) return d_tracer; } +static struct dentry *d_percpu; + +struct dentry *tracing_dentry_percpu(void) +{ + static int once; + struct dentry *d_tracer; + + if (d_percpu) + return d_percpu; + + d_tracer = tracing_init_dentry(); + + if (!d_tracer) + return NULL; + + d_percpu = debugfs_create_dir("per_cpu", d_tracer); + + if (!d_percpu && !once) { + once = 1; + pr_warning("Could not create debugfs directory 'per_cpu'\n"); + return NULL; + } + + return d_percpu; +} + +static void tracing_init_debugfs_percpu(long cpu) +{ + struct dentry *d_percpu = tracing_dentry_percpu(); + struct dentry *entry; + /* strlen(trace_pipe) + MAX(log10(cpu)) + '\0' */ + char filename[17]; + + if (cpu > 999 || cpu < 0) + return; + + /* per cpu trace_pipe */ + sprintf(filename, "trace_pipe%ld", cpu); + + entry = debugfs_create_file(filename, 0444, d_percpu, + (void *) cpu, &tracing_pipe_fops); + if (!entry) + pr_warning("Could not create debugfs '%s' entry\n", filename); + + /* per cpu trace */ + sprintf(filename, "trace%ld", cpu); + + entry = debugfs_create_file(filename, 0444, d_percpu, + (void *) cpu, &tracing_fops); + if (!entry) + pr_warning("Could not create debugfs '%s' entry\n", filename); +} + #ifdef CONFIG_FTRACE_SELFTEST /* Let selftest have access to static functions in this file */ #include "trace_selftest.c" @@ -2920,6 +3029,7 @@ static __init int tracer_init_debugfs(void) { struct dentry *d_tracer; struct dentry *entry; + int cpu; d_tracer = tracing_init_dentry(); @@ -2939,7 +3049,7 @@ static __init int tracer_init_debugfs(void) pr_warning("Could not create debugfs 'tracing_cpumask' entry\n"); entry = debugfs_create_file("trace", 0444, d_tracer, - &global_trace, &tracing_fops); + (void *) TRACE_PIPE_ALL_CPU, &tracing_fops); if (!entry) pr_warning("Could not create debugfs 'trace' entry\n"); @@ -2970,8 +3080,8 @@ static __init int tracer_init_debugfs(void) if (!entry) pr_warning("Could not create debugfs 'README' entry\n"); - entry = debugfs_create_file("trace_pipe", 0644, d_tracer, - NULL, &tracing_pipe_fops); + entry = debugfs_create_file("trace_pipe", 0444, d_tracer, + (void *) TRACE_PIPE_ALL_CPU, &tracing_pipe_fops); if (!entry) pr_warning("Could not create debugfs " "'trace_pipe' entry\n"); @@ -2999,6 +3109,10 @@ static __init int tracer_init_debugfs(void) #ifdef CONFIG_SYSPROF_TRACER init_tracer_sysprof_debugfs(d_tracer); #endif + + for_each_tracing_cpu(cpu) + tracing_init_debugfs_percpu(cpu); + return 0; } @@ -3222,8 +3336,12 @@ __init static int tracer_alloc_buffers(void) if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL)) goto out_free_buffer_mask; + if (!alloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL)) + goto out_free_tracing_cpumask; + cpumask_copy(tracing_buffer_mask, cpu_possible_mask); cpumask_copy(tracing_cpumask, cpu_all_mask); + cpumask_clear(tracing_reader_cpumask); /* TODO: make the number of buffers hot pluggable with CPUS */ global_trace.buffer = ring_buffer_alloc(trace_buf_size, @@ -3272,6 +3390,8 @@ __init static int tracer_alloc_buffers(void) ret = 0; out_free_cpumask: + free_cpumask_var(tracing_reader_cpumask); +out_free_tracing_cpumask: free_cpumask_var(tracing_cpumask); out_free_buffer_mask: free_cpumask_var(tracing_buffer_mask); -- cgit v1.2.3-18-g5258 From d7350c3f45694104e820041969c8185c5f99e57c Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 25 Feb 2009 06:13:16 +0100 Subject: tracing/core: make the read callbacks reentrants Now that several per-cpu files can be read or spliced at the same, we want the read/splice callbacks for tracing files to be reentrants. Until now, a single global mutex (trace_types_lock) serialized the access to tracing_read_pipe(), tracing_splice_read_pipe(), and the seq helpers. Ie: it means that if a user tries to read trace_pipe0 and trace_pipe1 at the same time, the access to the function tracing_read_pipe() is contended and one reader must wait for the other to finish its read call. The trace_type_lock mutex is mostly here to serialize the access to the global current tracer (current_trace), which can be changed concurrently. Although the iter struct keeps a private pointer to this tracer, its callbacks can be changed by another function. The method used here is to not keep anymore private reference to the tracer inside the iterator but to make a copy of it inside the iterator. Then it checks on subsequents read calls if the tracer has changed. This is not costly because the current tracer is not expected to be changed often, so we use a branch prediction for that. Moreover, we add a private mutex to the iterator (there is one iterator per file descriptor) to serialize the accesses in case of multiple consumers per file descriptor (which would be a silly idea from the user). Note that this is not to protect the ring buffer, since the ring buffer already serializes the readers accesses. This is to prevent from traces weirdness in case of concurrent consumers. But these mutexes can be dropped anyway, that would not result in any crash. Just tell me what you think about it. Signed-off-by: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 101 ++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 83 insertions(+), 18 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index aa58b7bc847..d8d899f3bd6 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1294,20 +1294,32 @@ static void *s_next(struct seq_file *m, void *v, loff_t *pos) return ent; } +/* + * No necessary locking here. The worst thing which can + * happen is loosing events consumed at the same time + * by a trace_pipe reader. + * Other than that, we don't risk to crash the ring buffer + * because it serializes the readers. + * + * The current tracer is copied to avoid a global locking + * all around. + */ static void *s_start(struct seq_file *m, loff_t *pos) { struct trace_iterator *iter = m->private; + static struct tracer *old_tracer; int cpu_file = iter->cpu_file; void *p = NULL; loff_t l = 0; int cpu; + /* copy the tracer to avoid using a global lock all around */ mutex_lock(&trace_types_lock); - - if (!current_trace || current_trace != iter->trace) { - mutex_unlock(&trace_types_lock); - return NULL; + if (unlikely(old_tracer != current_trace && current_trace)) { + old_tracer = current_trace; + *iter->trace = *current_trace; } + mutex_unlock(&trace_types_lock); atomic_inc(&trace_record_cmdline_disabled); @@ -1341,7 +1353,6 @@ static void *s_start(struct seq_file *m, loff_t *pos) static void s_stop(struct seq_file *m, void *p) { atomic_dec(&trace_record_cmdline_disabled); - mutex_unlock(&trace_types_lock); } static void print_lat_help_header(struct seq_file *m) @@ -1691,13 +1702,25 @@ __tracing_open(struct inode *inode, struct file *file, int *ret) goto out; } + /* + * We make a copy of the current tracer to avoid concurrent + * changes on it while we are reading. + */ mutex_lock(&trace_types_lock); + iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL); + if (!iter->trace) { + *ret = -ENOMEM; + goto fail; + } + if (current_trace) + *iter->trace = *current_trace; + if (current_trace && current_trace->print_max) iter->tr = &max_tr; else iter->tr = &global_trace; - iter->trace = current_trace; iter->pos = -1; + mutex_init(&iter->mutex); iter->cpu_file = cpu_file; /* Notify the tracer early; before we stop tracing. */ @@ -1747,8 +1770,9 @@ __tracing_open(struct inode *inode, struct file *file, int *ret) if (iter->buffer_iter[cpu]) ring_buffer_read_finish(iter->buffer_iter[cpu]); } -fail: + fail: mutex_unlock(&trace_types_lock); + kfree(iter->trace); kfree(iter); return ERR_PTR(-ENOMEM); @@ -1783,6 +1807,8 @@ static int tracing_release(struct inode *inode, struct file *file) mutex_unlock(&trace_types_lock); seq_release(inode, file); + mutex_destroy(&iter->mutex); + kfree(iter->trace); kfree(iter); return 0; } @@ -2392,10 +2418,21 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) goto out; } + /* + * We make a copy of the current tracer to avoid concurrent + * changes on it while we are reading. + */ + iter->trace = kmalloc(sizeof(*iter->trace), GFP_KERNEL); + if (!iter->trace) { + ret = -ENOMEM; + goto fail; + } + if (current_trace) + *iter->trace = *current_trace; + if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) { - kfree(iter); ret = -ENOMEM; - goto out; + goto fail; } /* trace pipe does not show start of buffer */ @@ -2403,7 +2440,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) iter->cpu_file = cpu_file; iter->tr = &global_trace; - iter->trace = current_trace; + mutex_init(&iter->mutex); filp->private_data = iter; if (iter->trace->pipe_open) @@ -2412,6 +2449,12 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) out: mutex_unlock(&trace_types_lock); return ret; + +fail: + kfree(iter->trace); + kfree(iter); + mutex_unlock(&trace_types_lock); + return ret; } static int tracing_release_pipe(struct inode *inode, struct file *file) @@ -2428,6 +2471,8 @@ static int tracing_release_pipe(struct inode *inode, struct file *file) mutex_unlock(&trace_types_lock); free_cpumask_var(iter->started); + mutex_destroy(&iter->mutex); + kfree(iter->trace); kfree(iter); return 0; @@ -2497,18 +2542,15 @@ static int tracing_wait_pipe(struct file *filp) return -EAGAIN; } - mutex_unlock(&trace_types_lock); + mutex_unlock(&iter->mutex); iter->trace->wait_pipe(iter); - mutex_lock(&trace_types_lock); + mutex_lock(&iter->mutex); if (signal_pending(current)) return -EINTR; - if (iter->trace != current_trace) - return 0; - /* * We block until we read something and tracing is disabled. * We still block if tracing is disabled, but we have never @@ -2533,6 +2575,7 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_iterator *iter = filp->private_data; + static struct tracer *old_tracer; ssize_t sret; /* return any leftover data */ @@ -2542,7 +2585,20 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, trace_seq_reset(&iter->seq); + /* copy the tracer to avoid using a global lock all around */ mutex_lock(&trace_types_lock); + if (unlikely(old_tracer != current_trace && current_trace)) { + old_tracer = current_trace; + *iter->trace = *current_trace; + } + mutex_unlock(&trace_types_lock); + + /* + * Avoid more than one consumer on a single file descriptor + * This is just a matter of traces coherency, the ring buffer itself + * is protected. + */ + mutex_lock(&iter->mutex); if (iter->trace->read) { sret = iter->trace->read(iter, filp, ubuf, cnt, ppos); if (sret) @@ -2599,7 +2655,7 @@ waitagain: goto waitagain; out: - mutex_unlock(&trace_types_lock); + mutex_unlock(&iter->mutex); return sret; } @@ -2676,11 +2732,20 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, .ops = &tracing_pipe_buf_ops, .spd_release = tracing_spd_release_pipe, }; + static struct tracer *old_tracer; ssize_t ret; size_t rem; unsigned int i; + /* copy the tracer to avoid using a global lock all around */ mutex_lock(&trace_types_lock); + if (unlikely(old_tracer != current_trace && current_trace)) { + old_tracer = current_trace; + *iter->trace = *current_trace; + } + mutex_unlock(&trace_types_lock); + + mutex_lock(&iter->mutex); if (iter->trace->splice_read) { ret = iter->trace->splice_read(iter, filp, @@ -2720,14 +2785,14 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, trace_seq_reset(&iter->seq); } - mutex_unlock(&trace_types_lock); + mutex_unlock(&iter->mutex); spd.nr_pages = i; return splice_to_pipe(pipe, &spd); out_err: - mutex_unlock(&trace_types_lock); + mutex_unlock(&iter->mutex); return ret; } -- cgit v1.2.3-18-g5258 From 8656e7a2fa6afcd8682990f804a2a9674568738f Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 26 Feb 2009 00:41:38 +0100 Subject: tracing/core: make the per cpu trace files in per cpu directories Impact: restructure the VFS layout of per CPU trace buffers The per cpu trace files are all in a single directory: /debug/tracing/per_cpu. In case of a large number of cpu, the content of this directory becomes messy so we create now one directory per cpu inside /debug/tracing/per_cpu which contain each their own trace_pipe and trace files. Ie: /debug/tracing$ ls -R per_cpu per_cpu: cpu0 cpu1 per_cpu/cpu0: trace trace_pipe per_cpu/cpu1: trace trace_pipe Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Lai Jiangshan Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d8d899f3bd6..bdaf60d3d33 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3061,28 +3061,31 @@ struct dentry *tracing_dentry_percpu(void) static void tracing_init_debugfs_percpu(long cpu) { struct dentry *d_percpu = tracing_dentry_percpu(); - struct dentry *entry; - /* strlen(trace_pipe) + MAX(log10(cpu)) + '\0' */ - char filename[17]; + struct dentry *entry, *d_cpu; + /* strlen(cpu) + MAX(log10(cpu)) + '\0' */ + char cpu_dir[7]; if (cpu > 999 || cpu < 0) return; - /* per cpu trace_pipe */ - sprintf(filename, "trace_pipe%ld", cpu); + sprintf(cpu_dir, "cpu%ld", cpu); + d_cpu = debugfs_create_dir(cpu_dir, d_percpu); + if (!d_cpu) { + pr_warning("Could not create debugfs '%s' entry\n", cpu_dir); + return; + } - entry = debugfs_create_file(filename, 0444, d_percpu, + /* per cpu trace_pipe */ + entry = debugfs_create_file("trace_pipe", 0444, d_cpu, (void *) cpu, &tracing_pipe_fops); if (!entry) - pr_warning("Could not create debugfs '%s' entry\n", filename); + pr_warning("Could not create debugfs 'trace_pipe' entry\n"); /* per cpu trace */ - sprintf(filename, "trace%ld", cpu); - - entry = debugfs_create_file(filename, 0444, d_percpu, + entry = debugfs_create_file("trace", 0444, d_cpu, (void *) cpu, &tracing_fops); if (!entry) - pr_warning("Could not create debugfs '%s' entry\n", filename); + pr_warning("Could not create debugfs 'trace' entry\n"); } #ifdef CONFIG_FTRACE_SELFTEST -- cgit v1.2.3-18-g5258 From a8259075074fb09c230b4cd2c8d3ee3c49d6ecd1 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 26 Feb 2009 22:19:12 -0500 Subject: tracing: add options directory and core option files This patch creates an options directory in the debugfs, that contains the available tracing options. These files contain 1 or 0, where 1 is the option is enabled and 0 it is disabled. Simply echoing in 1 will enable the option and 0 will disable it. This patch only contains the core options, not the tracer options. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 117 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index bdaf60d3d33..40e983ed994 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3093,6 +3093,121 @@ static void tracing_init_debugfs_percpu(long cpu) #include "trace_selftest.c" #endif +static ssize_t +trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt, + loff_t *ppos) +{ + long index = (long)filp->private_data; + char *buf; + + if (trace_flags & (1 << index)) + buf = "1\n"; + else + buf = "0\n"; + + return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2); +} + +static ssize_t +trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt, + loff_t *ppos) +{ + long index = (long)filp->private_data; + char buf[64]; + unsigned long val; + int ret; + + if (cnt >= sizeof(buf)) + return -EINVAL; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + + ret = strict_strtoul(buf, 10, &val); + if (ret < 0) + return ret; + + switch (val) { + case 0: + trace_flags &= ~(1 << index); + break; + case 1: + trace_flags |= 1 << index; + break; + + default: + return -EINVAL; + } + + *ppos += cnt; + + return cnt; +} + + +static const struct file_operations trace_options_core_fops = { + .open = tracing_open_generic, + .read = trace_options_core_read, + .write = trace_options_core_write, +}; + +static struct dentry *trace_options_init_dentry(void) +{ + struct dentry *d_tracer; + static struct dentry *t_options; + + if (t_options) + return t_options; + + d_tracer = tracing_init_dentry(); + if (!d_tracer) + return NULL; + + t_options = debugfs_create_dir("options", d_tracer); + if (!t_options) { + pr_warning("Could not create debugfs directory 'options'\n"); + return NULL; + } + + return t_options; +} + +static struct dentry * +create_trace_option_core_file(const char *option, long index) +{ + struct dentry *t_options; + struct dentry *entry; + + t_options = trace_options_init_dentry(); + if (!t_options) + return NULL; + + entry = debugfs_create_file(option, 0644, t_options, (void *)index, + &trace_options_core_fops); + + return entry; +} + +static __init void create_trace_options_dir(void) +{ + struct dentry *t_options; + struct dentry *entry; + int i; + + t_options = trace_options_init_dentry(); + if (!t_options) + return; + + for (i = 0; trace_options[i]; i++) { + entry = create_trace_option_core_file(trace_options[i], i); + if (!entry) + pr_warning("Could not create debugfs %s entry\n", + trace_options[i]); + } +} + static __init int tracer_init_debugfs(void) { struct dentry *d_tracer; @@ -3111,6 +3226,8 @@ static __init int tracer_init_debugfs(void) if (!entry) pr_warning("Could not create debugfs 'trace_options' entry\n"); + create_trace_options_dir(); + entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer, NULL, &tracing_cpumask_fops); if (!entry) -- cgit v1.2.3-18-g5258 From 577b785f55168d5acb3d123ba41bfe8d7981e044 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 26 Feb 2009 23:43:05 -0500 Subject: tracing: add tracer dependent options to options directory This patch adds the tracer dependent options dynamically to the options directory when the tracer is activated. These options are removed when the tracer is deactivated. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 174 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 173 insertions(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 40e983ed994..485c6e7f446 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2275,8 +2275,17 @@ int tracer_init(struct tracer *t, struct trace_array *tr) return t->init(tr); } +struct trace_option_dentry; + +static struct trace_option_dentry * +create_trace_option_files(struct tracer *tracer); + +static void +destroy_trace_option_files(struct trace_option_dentry *topts); + static int tracing_set_tracer(const char *buf) { + static struct trace_option_dentry *topts; struct trace_array *tr = &global_trace; struct tracer *t; int ret = 0; @@ -2297,7 +2306,12 @@ static int tracing_set_tracer(const char *buf) if (current_trace && current_trace->reset) current_trace->reset(tr); + destroy_trace_option_files(topts); + current_trace = t; + + topts = create_trace_option_files(current_trace); + if (t->init) { ret = tracer_init(t, tr); if (ret) @@ -3093,6 +3107,95 @@ static void tracing_init_debugfs_percpu(long cpu) #include "trace_selftest.c" #endif +struct trace_option_dentry { + struct tracer_opt *opt; + struct tracer_flags *flags; + struct dentry *entry; +}; + +static ssize_t +trace_options_read(struct file *filp, char __user *ubuf, size_t cnt, + loff_t *ppos) +{ + struct trace_option_dentry *topt = filp->private_data; + char *buf; + + if (topt->flags->val & topt->opt->bit) + buf = "1\n"; + else + buf = "0\n"; + + return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2); +} + +static ssize_t +trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt, + loff_t *ppos) +{ + struct trace_option_dentry *topt = filp->private_data; + unsigned long val; + char buf[64]; + int ret; + + if (cnt >= sizeof(buf)) + return -EINVAL; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + + ret = strict_strtoul(buf, 10, &val); + if (ret < 0) + return ret; + + ret = 0; + switch (val) { + case 0: + /* do nothing if already cleared */ + if (!(topt->flags->val & topt->opt->bit)) + break; + + mutex_lock(&trace_types_lock); + if (current_trace->set_flag) + ret = current_trace->set_flag(topt->flags->val, + topt->opt->bit, 0); + mutex_unlock(&trace_types_lock); + if (ret) + return ret; + topt->flags->val &= ~topt->opt->bit; + break; + case 1: + /* do nothing if already set */ + if (topt->flags->val & topt->opt->bit) + break; + + mutex_lock(&trace_types_lock); + if (current_trace->set_flag) + ret = current_trace->set_flag(topt->flags->val, + topt->opt->bit, 1); + mutex_unlock(&trace_types_lock); + if (ret) + return ret; + topt->flags->val |= topt->opt->bit; + break; + + default: + return -EINVAL; + } + + *ppos += cnt; + + return cnt; +} + + +static const struct file_operations trace_options_fops = { + .open = tracing_open_generic, + .read = trace_options_read, + .write = trace_options_write, +}; + static ssize_t trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) @@ -3146,7 +3249,6 @@ trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt, return cnt; } - static const struct file_operations trace_options_core_fops = { .open = tracing_open_generic, .read = trace_options_core_read, @@ -3174,6 +3276,76 @@ static struct dentry *trace_options_init_dentry(void) return t_options; } +static void +create_trace_option_file(struct trace_option_dentry *topt, + struct tracer_flags *flags, + struct tracer_opt *opt) +{ + struct dentry *t_options; + struct dentry *entry; + + t_options = trace_options_init_dentry(); + if (!t_options) + return; + + topt->flags = flags; + topt->opt = opt; + + entry = debugfs_create_file(opt->name, 0644, t_options, topt, + &trace_options_fops); + + topt->entry = entry; + +} + +static struct trace_option_dentry * +create_trace_option_files(struct tracer *tracer) +{ + struct trace_option_dentry *topts; + struct tracer_flags *flags; + struct tracer_opt *opts; + int cnt; + + if (!tracer) + return NULL; + + flags = tracer->flags; + + if (!flags || !flags->opts) + return NULL; + + opts = flags->opts; + + for (cnt = 0; opts[cnt].name; cnt++) + ; + + topts = kzalloc(sizeof(*topts) * (cnt + 1), GFP_KERNEL); + if (!topts) + return NULL; + + for (cnt = 0; opts[cnt].name; cnt++) + create_trace_option_file(&topts[cnt], flags, + &opts[cnt]); + + return topts; +} + +static void +destroy_trace_option_files(struct trace_option_dentry *topts) +{ + int cnt; + + if (!topts) + return; + + for (cnt = 0; topts[cnt].opt; cnt++) { + if (topts[cnt].entry) + debugfs_remove(topts[cnt].entry); + } + + kfree(topts); +} + static struct dentry * create_trace_option_core_file(const char *option, long index) { -- cgit v1.2.3-18-g5258 From d8e83d26b5ab3b31ee0ff6d093a2627707a1e221 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 26 Feb 2009 23:55:58 -0500 Subject: tracing: add protection around open use of current_tracer Impact: fix to possible race conditions There's some uses of current_tracer that is not protected by the trace_types_lock. There is a small chance that a sysadmin changes the tracer while the current_tracer is being referenced. If the race is hit, it is unlikely to cause any harm since the tracers are constant and are not freed. But some strang side effects may occur. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 485c6e7f446..6c89ec6cbd4 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2024,12 +2024,12 @@ static ssize_t tracing_trace_options_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { - int i; + struct tracer_opt *trace_opts; + u32 tracer_flags; + int len = 0; char *buf; int r = 0; - int len = 0; - u32 tracer_flags = current_trace->flags->val; - struct tracer_opt *trace_opts = current_trace->flags->opts; + int i; /* calculate max size */ @@ -2038,6 +2038,10 @@ tracing_trace_options_read(struct file *filp, char __user *ubuf, len += 3; /* "no" and space */ } + mutex_lock(&trace_types_lock); + tracer_flags = current_trace->flags->val; + trace_opts = current_trace->flags->opts; + /* * Increase the size with names of options specific * of the current tracer. @@ -2049,8 +2053,10 @@ tracing_trace_options_read(struct file *filp, char __user *ubuf, /* +2 for \n and \0 */ buf = kmalloc(len + 2, GFP_KERNEL); - if (!buf) + if (!buf) { + mutex_unlock(&trace_types_lock); return -ENOMEM; + } for (i = 0; trace_options[i]; i++) { if (trace_flags & (1 << i)) @@ -2067,6 +2073,7 @@ tracing_trace_options_read(struct file *filp, char __user *ubuf, r += sprintf(buf + r, "no%s ", trace_opts[i].name); } + mutex_unlock(&trace_types_lock); r += sprintf(buf + r, "\n"); WARN_ON(r >= len + 2); @@ -2074,7 +2081,6 @@ tracing_trace_options_read(struct file *filp, char __user *ubuf, r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); kfree(buf); - return r; } @@ -2149,7 +2155,9 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf, /* If no option could be set, test the specific tracer options */ if (!trace_options[i]) { + mutex_lock(&trace_types_lock); ret = set_tracer_option(current_trace, cmp, neg); + mutex_unlock(&trace_types_lock); if (ret) return ret; } -- cgit v1.2.3-18-g5258 From 85a2f9b46f8cd8aaa11c64c715e1ea3ec27ec486 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 27 Feb 2009 00:12:38 -0500 Subject: tracing: use pointer error returns for __tracing_open Impact: fix compile warning and clean up When I first wrote __tracing_open, instead of passing the error code via the ERR_PTR macros, I lazily used a separate parameter to hold the return for errors. When Frederic Weisbecker updated that function, he used the Linux kernel ERR_PTR for the returns. This caused the parameter return to possibly not be initialized on error. gcc correctly pointed this out with a warning. This patch converts the entire function to use the Linux kernel ERR_PTR macro methods. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 46 ++++++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 22 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 6c89ec6cbd4..304e02ce39c 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1684,23 +1684,20 @@ static struct seq_operations tracer_seq_ops = { }; static struct trace_iterator * -__tracing_open(struct inode *inode, struct file *file, int *ret) +__tracing_open(struct inode *inode, struct file *file) { long cpu_file = (long) inode->i_private; + void *fail_ret = ERR_PTR(-ENOMEM); struct trace_iterator *iter; struct seq_file *m; - int cpu; + int cpu, ret; - if (tracing_disabled) { - *ret = -ENODEV; - return NULL; - } + if (tracing_disabled) + return ERR_PTR(-ENODEV); iter = kzalloc(sizeof(*iter), GFP_KERNEL); - if (!iter) { - *ret = -ENOMEM; - goto out; - } + if (!iter) + return ERR_PTR(-ENOMEM); /* * We make a copy of the current tracer to avoid concurrent @@ -1708,10 +1705,9 @@ __tracing_open(struct inode *inode, struct file *file, int *ret) */ mutex_lock(&trace_types_lock); iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL); - if (!iter->trace) { - *ret = -ENOMEM; + if (!iter->trace) goto fail; - } + if (current_trace) *iter->trace = *current_trace; @@ -1750,9 +1746,11 @@ __tracing_open(struct inode *inode, struct file *file, int *ret) } /* TODO stop tracer */ - *ret = seq_open(file, &tracer_seq_ops); - if (*ret) + ret = seq_open(file, &tracer_seq_ops); + if (ret < 0) { + fail_ret = ERR_PTR(ret); goto fail_buffer; + } m = file->private_data; m->private = iter; @@ -1762,7 +1760,6 @@ __tracing_open(struct inode *inode, struct file *file, int *ret) mutex_unlock(&trace_types_lock); - out: return iter; fail_buffer: @@ -1775,7 +1772,7 @@ __tracing_open(struct inode *inode, struct file *file, int *ret) kfree(iter->trace); kfree(iter); - return ERR_PTR(-ENOMEM); + return fail_ret; } int tracing_open_generic(struct inode *inode, struct file *filp) @@ -1815,9 +1812,12 @@ static int tracing_release(struct inode *inode, struct file *file) static int tracing_open(struct inode *inode, struct file *file) { - int ret; + struct trace_iterator *iter; + int ret = 0; - __tracing_open(inode, file, &ret); + iter = __tracing_open(inode, file); + if (IS_ERR(iter)) + ret = PTR_ERR(iter); return ret; } @@ -1825,11 +1825,13 @@ static int tracing_open(struct inode *inode, struct file *file) static int tracing_lt_open(struct inode *inode, struct file *file) { struct trace_iterator *iter; - int ret; + int ret = 0; - iter = __tracing_open(inode, file, &ret); + iter = __tracing_open(inode, file); - if (!ret) + if (IS_ERR(iter)) + ret = PTR_ERR(iter); + else iter->iter_flags |= TRACE_FILE_LAT_FMT; return ret; -- cgit v1.2.3-18-g5258 From 5c6a3ae1b4beebb56e2916b84f1208d96a9e32ff Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 27 Feb 2009 00:22:21 -0500 Subject: tracing: use newline separator for trace options list Impact: clean up Instead of listing the trace options like: # cat /debug/tracing/trace_options print-parent nosym-offset nosym-addr noverbose noraw nohex nobin noblock nostacktrace nosched-tree ftrace_printk noftrace_preempt nobranch annotate nouserstacktrace nosym-userobj We now list them like: # cat /debug/tracing/trace_options print-parent nosym-offset nosym-addr noverbose noraw nohex nobin noblock nostacktrace nosched-tree ftrace_printk noftrace_preempt nobranch annotate nouserstacktrace nosym-userobj Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 304e02ce39c..5db7485158d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2037,7 +2037,7 @@ tracing_trace_options_read(struct file *filp, char __user *ubuf, /* calculate max size */ for (i = 0; trace_options[i]; i++) { len += strlen(trace_options[i]); - len += 3; /* "no" and space */ + len += 3; /* "no" and newline */ } mutex_lock(&trace_types_lock); @@ -2050,7 +2050,7 @@ tracing_trace_options_read(struct file *filp, char __user *ubuf, */ for (i = 0; trace_opts[i].name; i++) { len += strlen(trace_opts[i].name); - len += 3; /* "no" and space */ + len += 3; /* "no" and newline */ } /* +2 for \n and \0 */ @@ -2062,22 +2062,21 @@ tracing_trace_options_read(struct file *filp, char __user *ubuf, for (i = 0; trace_options[i]; i++) { if (trace_flags & (1 << i)) - r += sprintf(buf + r, "%s ", trace_options[i]); + r += sprintf(buf + r, "%s\n", trace_options[i]); else - r += sprintf(buf + r, "no%s ", trace_options[i]); + r += sprintf(buf + r, "no%s\n", trace_options[i]); } for (i = 0; trace_opts[i].name; i++) { if (tracer_flags & trace_opts[i].bit) - r += sprintf(buf + r, "%s ", + r += sprintf(buf + r, "%s\n", trace_opts[i].name); else - r += sprintf(buf + r, "no%s ", + r += sprintf(buf + r, "no%s\n", trace_opts[i].name); } mutex_unlock(&trace_types_lock); - r += sprintf(buf + r, "\n"); WARN_ON(r >= len + 2); r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); -- cgit v1.2.3-18-g5258 From 0cfe82451dfa3ebf4e69158f2eb450f2fbb6b715 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 27 Feb 2009 10:51:10 -0500 Subject: tracing: replace kzalloc with kcalloc Impact: clean up kcalloc is a better approach to allocate a NULL array. Reported-by: Andrew Morton Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5db7485158d..9c5987aca74 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3328,7 +3328,7 @@ create_trace_option_files(struct tracer *tracer) for (cnt = 0; opts[cnt].name; cnt++) ; - topts = kzalloc(sizeof(*topts) * (cnt + 1), GFP_KERNEL); + topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL); if (!topts) return NULL; -- cgit v1.2.3-18-g5258 From ef5580d0fffce6e0a01043bac0625128b5d409a7 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 27 Feb 2009 19:38:04 -0500 Subject: tracing: add interface to write into current tracer buffer Right now all tracers must manage their own trace buffers. This was to enforce tracers to be independent in case we finally decide to allow each tracer to have their own trace buffer. But now we are adding event tracing that writes to the current tracer's buffer. This adds an interface to allow events to write to the current tracer buffer without having to manage its own. Since event tracing has no "tracer", and is just a way to hook into any other tracer. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9c5987aca74..c5e39cd7310 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -846,6 +846,20 @@ void trace_buffer_unlock_commit(struct trace_array *tr, trace_wake_up(); } +struct ring_buffer_event * +trace_current_buffer_lock_reserve(unsigned char type, unsigned long len, + unsigned long flags, int pc) +{ + return trace_buffer_lock_reserve(&global_trace, + type, len, flags, pc); +} + +void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, + unsigned long flags, int pc) +{ + return trace_buffer_unlock_commit(&global_trace, event, flags, pc); +} + void trace_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, unsigned long flags, -- cgit v1.2.3-18-g5258 From f9520750c4c9924c14325cd951efae5fae58104c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 2 Mar 2009 14:04:40 -0500 Subject: tracing: make trace_seq_reset global and rename to trace_seq_init Impact: clean up The trace_seq functions may be used separately outside of the ftrace iterator. The trace_seq_reset is needed for these operations. This patch also renames trace_seq_reset to the more appropriate trace_seq_init. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c5e39cd7310..ea055aa21cd 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -342,13 +342,6 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) tracing_record_cmdline(tsk); } -static void -trace_seq_reset(struct trace_seq *s) -{ - s->len = 0; - s->readpos = 0; -} - ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt) { int len; @@ -395,7 +388,7 @@ trace_print_seq(struct seq_file *m, struct trace_seq *s) s->buffer[len] = 0; seq_puts(m, s->buffer); - trace_seq_reset(s); + trace_seq_init(s); } /** @@ -2620,7 +2613,7 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, if (sret != -EBUSY) return sret; - trace_seq_reset(&iter->seq); + trace_seq_init(&iter->seq); /* copy the tracer to avoid using a global lock all around */ mutex_lock(&trace_types_lock); @@ -2682,7 +2675,7 @@ waitagain: /* Now copy what we have to the user */ sret = trace_seq_to_user(&iter->seq, ubuf, cnt); if (iter->seq.readpos >= iter->seq.len) - trace_seq_reset(&iter->seq); + trace_seq_init(&iter->seq); /* * If there was nothing to send to user, inspite of consuming trace @@ -2819,7 +2812,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, partial[i].offset = 0; partial[i].len = iter->seq.len; - trace_seq_reset(&iter->seq); + trace_seq_init(&iter->seq); } mutex_unlock(&iter->mutex); @@ -3631,7 +3624,7 @@ trace_printk_seq(struct trace_seq *s) printk(KERN_TRACE "%s", s->buffer); - trace_seq_reset(s); + trace_seq_init(s); } void ftrace_dump(void) -- cgit v1.2.3-18-g5258 From 2cadf9135eb3b6d84b6427314be827ddd443c308 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 1 Dec 2008 22:20:19 -0500 Subject: tracing: add binary buffer files for use with splice Impact: new feature This patch creates a directory of files that correspond to the per CPU ring buffers. These are binary files and are made to be used with splice. This is the fastest way to extract data from the ftrace ring buffers. Thanks to Jiaying Zhang for pushing me to get this code fixed, and to Eduard - Gabriel Munteanu for his splice code that helped me debug my code. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 274 +++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 267 insertions(+), 7 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ea055aa21cd..12539f72f4a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -11,31 +11,30 @@ * Copyright (C) 2004-2006 Ingo Molnar * Copyright (C) 2004 William Lee Irwin III */ +#include #include +#include +#include #include #include #include +#include #include #include #include #include #include +#include #include #include #include +#include #include #include #include #include #include #include -#include -#include -#include - -#include -#include -#include #include "trace.h" #include "trace_output.h" @@ -3005,6 +3004,246 @@ static struct file_operations tracing_mark_fops = { .write = tracing_mark_write, }; +struct ftrace_buffer_info { + struct trace_array *tr; + void *spare; + int cpu; + unsigned int read; +}; + +static int tracing_buffers_open(struct inode *inode, struct file *filp) +{ + int cpu = (int)(long)inode->i_private; + struct ftrace_buffer_info *info; + + if (tracing_disabled) + return -ENODEV; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return -ENOMEM; + + info->tr = &global_trace; + info->cpu = cpu; + info->spare = ring_buffer_alloc_read_page(info->tr->buffer); + /* Force reading ring buffer for first read */ + info->read = (unsigned int)-1; + if (!info->spare) + goto out; + + filp->private_data = info; + + return 0; + + out: + kfree(info); + return -ENOMEM; +} + +static ssize_t +tracing_buffers_read(struct file *filp, char __user *ubuf, + size_t count, loff_t *ppos) +{ + struct ftrace_buffer_info *info = filp->private_data; + unsigned int pos; + ssize_t ret; + size_t size; + + /* Do we have previous read data to read? */ + if (info->read < PAGE_SIZE) + goto read; + + info->read = 0; + + ret = ring_buffer_read_page(info->tr->buffer, + &info->spare, + count, + info->cpu, 0); + if (ret < 0) + return 0; + + pos = ring_buffer_page_len(info->spare); + + if (pos < PAGE_SIZE) + memset(info->spare + pos, 0, PAGE_SIZE - pos); + +read: + size = PAGE_SIZE - info->read; + if (size > count) + size = count; + + ret = copy_to_user(ubuf, info->spare + info->read, size); + if (ret) + return -EFAULT; + *ppos += size; + info->read += size; + + return size; +} + +static int tracing_buffers_release(struct inode *inode, struct file *file) +{ + struct ftrace_buffer_info *info = file->private_data; + + ring_buffer_free_read_page(info->tr->buffer, info->spare); + kfree(info); + + return 0; +} + +struct buffer_ref { + struct ring_buffer *buffer; + void *page; + int ref; +}; + +static void buffer_pipe_buf_release(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) +{ + struct buffer_ref *ref = (struct buffer_ref *)buf->private; + + if (--ref->ref) + return; + + ring_buffer_free_read_page(ref->buffer, ref->page); + kfree(ref); + buf->private = 0; +} + +static int buffer_pipe_buf_steal(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) +{ + return 1; +} + +static void buffer_pipe_buf_get(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) +{ + struct buffer_ref *ref = (struct buffer_ref *)buf->private; + + ref->ref++; +} + +/* Pipe buffer operations for a buffer. */ +static struct pipe_buf_operations buffer_pipe_buf_ops = { + .can_merge = 0, + .map = generic_pipe_buf_map, + .unmap = generic_pipe_buf_unmap, + .confirm = generic_pipe_buf_confirm, + .release = buffer_pipe_buf_release, + .steal = buffer_pipe_buf_steal, + .get = buffer_pipe_buf_get, +}; + +/* + * Callback from splice_to_pipe(), if we need to release some pages + * at the end of the spd in case we error'ed out in filling the pipe. + */ +static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i) +{ + struct buffer_ref *ref = + (struct buffer_ref *)spd->partial[i].private; + + if (--ref->ref) + return; + + ring_buffer_free_read_page(ref->buffer, ref->page); + kfree(ref); + spd->partial[i].private = 0; +} + +static ssize_t +tracing_buffers_splice_read(struct file *file, loff_t *ppos, + struct pipe_inode_info *pipe, size_t len, + unsigned int flags) +{ + struct ftrace_buffer_info *info = file->private_data; + struct partial_page partial[PIPE_BUFFERS]; + struct page *pages[PIPE_BUFFERS]; + struct splice_pipe_desc spd = { + .pages = pages, + .partial = partial, + .flags = flags, + .ops = &buffer_pipe_buf_ops, + .spd_release = buffer_spd_release, + }; + struct buffer_ref *ref; + int size, i; + size_t ret; + + /* + * We can't seek on a buffer input + */ + if (unlikely(*ppos)) + return -ESPIPE; + + + for (i = 0; i < PIPE_BUFFERS && len; i++, len -= size) { + struct page *page; + int r; + + ref = kzalloc(sizeof(*ref), GFP_KERNEL); + if (!ref) + break; + + ref->buffer = info->tr->buffer; + ref->page = ring_buffer_alloc_read_page(ref->buffer); + if (!ref->page) { + kfree(ref); + break; + } + + r = ring_buffer_read_page(ref->buffer, &ref->page, + len, info->cpu, 0); + if (r < 0) { + ring_buffer_free_read_page(ref->buffer, + ref->page); + kfree(ref); + break; + } + + /* + * zero out any left over data, this is going to + * user land. + */ + size = ring_buffer_page_len(ref->page); + if (size < PAGE_SIZE) + memset(ref->page + size, 0, PAGE_SIZE - size); + + page = virt_to_page(ref->page); + + spd.pages[i] = page; + spd.partial[i].len = PAGE_SIZE; + spd.partial[i].offset = 0; + spd.partial[i].private = (unsigned long)ref; + spd.nr_pages++; + } + + spd.nr_pages = i; + + /* did we read anything? */ + if (!spd.nr_pages) { + if (flags & SPLICE_F_NONBLOCK) + ret = -EAGAIN; + else + ret = 0; + /* TODO: block */ + return ret; + } + + ret = splice_to_pipe(pipe, &spd); + + return ret; +} + +static const struct file_operations tracing_buffers_fops = { + .open = tracing_buffers_open, + .read = tracing_buffers_read, + .release = tracing_buffers_release, + .splice_read = tracing_buffers_splice_read, + .llseek = no_llseek, +}; + #ifdef CONFIG_DYNAMIC_FTRACE int __weak ftrace_arch_read_dyn_info(char *buf, int size) @@ -3399,6 +3638,7 @@ static __init void create_trace_options_dir(void) static __init int tracer_init_debugfs(void) { struct dentry *d_tracer; + struct dentry *buffers; struct dentry *entry; int cpu; @@ -3471,6 +3711,26 @@ static __init int tracer_init_debugfs(void) pr_warning("Could not create debugfs " "'trace_marker' entry\n"); + buffers = debugfs_create_dir("binary_buffers", d_tracer); + + if (!buffers) + pr_warning("Could not create buffers directory\n"); + else { + int cpu; + char buf[64]; + + for_each_tracing_cpu(cpu) { + sprintf(buf, "%d", cpu); + + entry = debugfs_create_file(buf, 0444, buffers, + (void *)(long)cpu, + &tracing_buffers_fops); + if (!entry) + pr_warning("Could not create debugfs buffers " + "'%s' entry\n", buf); + } + } + #ifdef CONFIG_DYNAMIC_FTRACE entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer, &ftrace_update_tot_cnt, -- cgit v1.2.3-18-g5258 From efed792d6738964f399a508ef9e831cd60fa4657 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 4 Mar 2009 12:32:55 +0100 Subject: tracing: add lockdep tracepoints for lock acquire/release Augment the traces with lock names when lockdep is available: 1) | down_read_trylock() { 1) | _spin_lock_irqsave() { 1) | /* lock_acquire: &sem->wait_lock */ 1) 4.201 us | } 1) | _spin_unlock_irqrestore() { 1) | /* lock_release: &sem->wait_lock */ 1) 3.523 us | } 1) | /* lock_acquire: try read &mm->mmap_sem */ 1) + 13.386 us | } 1) 1.635 us | find_vma(); 1) | handle_mm_fault() { 1) | __do_fault() { 1) | filemap_fault() { 1) | find_lock_page() { 1) | find_get_page() { 1) | /* lock_acquire: read rcu_read_lock */ 1) | /* lock_release: rcu_read_lock */ 1) 5.697 us | } 1) 8.158 us | } 1) + 11.079 us | } 1) | _spin_lock() { 1) | /* lock_acquire: __pte_lockptr(page) */ 1) 3.949 us | } 1) 1.460 us | page_add_file_rmap(); 1) | _spin_unlock() { 1) | /* lock_release: __pte_lockptr(page) */ 1) 3.115 us | } 1) | unlock_page() { 1) 1.421 us | page_waitqueue(); 1) 1.220 us | __wake_up_bit(); 1) 6.519 us | } 1) + 34.328 us | } 1) + 37.452 us | } 1) | up_read() { 1) | /* lock_release: &mm->mmap_sem */ 1) | _spin_lock_irqsave() { 1) | /* lock_acquire: &sem->wait_lock */ 1) 3.865 us | } 1) | _spin_unlock_irqrestore() { 1) | /* lock_release: &sem->wait_lock */ 1) 8.562 us | } 1) + 17.370 us | } Signed-off-by: Peter Zijlstra Cc: Steven Rostedt Cc: =?ISO-8859-1?Q?T=F6r=F6k?= Edwin Cc: Jason Baron Cc: Frederic Weisbecker LKML-Reference: <1236166375.5330.7209.camel@laptop> Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 12539f72f4a..c8abbb0c839 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -623,7 +623,7 @@ static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1]; static unsigned map_cmdline_to_pid[SAVED_CMDLINES]; static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN]; static int cmdline_idx; -static DEFINE_SPINLOCK(trace_cmdline_lock); +static raw_spinlock_t trace_cmdline_lock = __RAW_SPIN_LOCK_UNLOCKED; /* temporary disable recording */ static atomic_t trace_record_cmdline_disabled __read_mostly; @@ -735,7 +735,7 @@ static void trace_save_cmdline(struct task_struct *tsk) * nor do we want to disable interrupts, * so if we miss here, then better luck next time. */ - if (!spin_trylock(&trace_cmdline_lock)) + if (!__raw_spin_trylock(&trace_cmdline_lock)) return; idx = map_pid_to_cmdline[tsk->pid]; @@ -753,7 +753,7 @@ static void trace_save_cmdline(struct task_struct *tsk) memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN); - spin_unlock(&trace_cmdline_lock); + __raw_spin_unlock(&trace_cmdline_lock); } char *trace_find_cmdline(int pid) @@ -3751,7 +3751,7 @@ static __init int tracer_init_debugfs(void) int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) { - static DEFINE_SPINLOCK(trace_buf_lock); + static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED; static char trace_buf[TRACE_BUF_SIZE]; struct ring_buffer_event *event; @@ -3773,7 +3773,8 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) goto out; pause_graph_tracing(); - spin_lock_irqsave(&trace_buf_lock, irq_flags); + raw_local_irq_save(irq_flags); + __raw_spin_lock(&trace_buf_lock); len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args); len = min(len, TRACE_BUF_SIZE-1); @@ -3792,7 +3793,8 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) ring_buffer_unlock_commit(tr->buffer, event); out_unlock: - spin_unlock_irqrestore(&trace_buf_lock, irq_flags); + __raw_spin_unlock(&trace_buf_lock); + raw_local_irq_restore(irq_flags); unpause_graph_tracing(); out: preempt_enable_notrace(); -- cgit v1.2.3-18-g5258 From e543ad76914abec1acf6631604a4154cd7a2ca6b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 4 Mar 2009 18:20:36 -0500 Subject: tracing: add cpu_file intialization for ftrace_dump Impact: fix to ftrace_dump output corruption The commit: b04cc6b1f6398b0e0b60d37e27ce51b4899672ec tracing/core: introduce per cpu tracing files added a new field to the iterator called cpu_file. This was a handle to differentiate between the per cpu trace output files and the all cpu "trace" file. The all cpu "trace" file required setting this to TRACE_PIPE_ALL_CPU. The problem is that the ftrace_dump sets up its own iterator but was not updated to handle this change. The result was only CPU 0 printing out on crash and a lot of "<0>"'s also being printed. Reported-by: Thomas Gleixner Tested-by: Darren Hart Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c8abbb0c839..ab5cbcae43a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3918,8 +3918,10 @@ void ftrace_dump(void) printk(KERN_TRACE "Dumping ftrace buffer:\n"); + /* Simulate the iterator */ iter.tr = &global_trace; iter.trace = current_trace; + iter.cpu_file = TRACE_PIPE_ALL_CPU; /* * We need to stop all tracing on all CPUS to read the -- cgit v1.2.3-18-g5258 From 2dc5d12b1f43134e9bc5037f69f4739cfdfab93e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 4 Mar 2009 19:10:05 -0500 Subject: tracing: do not return EFAULT if read copied anything Impact: fix trace read to conform to standards Andrew Morton, Theodore Tso and H. Peter Anvin brought to my attention that a userspace read should not return -EFAULT if it succeeded in copying anything. It should only return -EFAULT if it failed to copy at all. This patch modifies the check of copy_from_user and updates the return code appropriately. I also used H. Peter Anvin's short cut rule to just test ret == count. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ab5cbcae43a..57155dc5353 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -346,6 +346,9 @@ ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt) int len; int ret; + if (!cnt) + return 0; + if (s->len <= s->readpos) return -EBUSY; @@ -353,9 +356,11 @@ ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt) if (cnt > len) cnt = len; ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt); - if (ret) + if (ret == cnt) return -EFAULT; + cnt -= ret; + s->readpos += len; return cnt; } @@ -3049,6 +3054,9 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, ssize_t ret; size_t size; + if (!count) + return 0; + /* Do we have previous read data to read? */ if (info->read < PAGE_SIZE) goto read; @@ -3073,8 +3081,10 @@ read: size = count; ret = copy_to_user(ubuf, info->spare + info->read, size); - if (ret) + if (ret == size) return -EFAULT; + size -= ret; + *ppos += size; info->read += size; -- cgit v1.2.3-18-g5258 From e74da5235cec6cb71eb338c987f876ecc793138b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 4 Mar 2009 20:31:11 -0500 Subject: tracing: fix seq read from trace files The buffer used by trace_seq was updated incorrectly. Instead of consuming what was actually read, it consumed the rest of the buffer on reads. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 57155dc5353..2e53e6f0944 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -361,7 +361,7 @@ ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt) cnt -= ret; - s->readpos += len; + s->readpos += cnt; return cnt; } @@ -380,7 +380,7 @@ ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) if (!ret) return -EFAULT; - s->readpos += len; + s->readpos += cnt; return cnt; } -- cgit v1.2.3-18-g5258 From c032ef64d680717e4e8ce3da65da6419a35f8a2c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 4 Mar 2009 20:34:24 -0500 Subject: tracing: add latency output format option With the removal of the latency_trace file, we lost the ability to see some of the finer details in a trace. Like the state of interrupts enabled, the preempt count, need resched, and if we are in an interrupt handler, softirq handler or not. This patch simply creates an option to bring back the old format. This also removes the warning about an unused variable that held the latency_trace file operations. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2e53e6f0944..55fcbb56795 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -299,6 +299,7 @@ static const char *trace_options[] = { "sym-userobj", "printk-msg-only", "context-info", + "latency-format", NULL }; @@ -1829,26 +1830,12 @@ static int tracing_open(struct inode *inode, struct file *file) iter = __tracing_open(inode, file); if (IS_ERR(iter)) ret = PTR_ERR(iter); - - return ret; -} - -static int tracing_lt_open(struct inode *inode, struct file *file) -{ - struct trace_iterator *iter; - int ret = 0; - - iter = __tracing_open(inode, file); - - if (IS_ERR(iter)) - ret = PTR_ERR(iter); - else + else if (trace_flags & TRACE_ITER_LATENCY_FMT) iter->iter_flags |= TRACE_FILE_LAT_FMT; return ret; } - static void * t_next(struct seq_file *m, void *v, loff_t *pos) { @@ -1927,13 +1914,6 @@ static struct file_operations tracing_fops = { .release = tracing_release, }; -static struct file_operations tracing_lt_fops = { - .open = tracing_lt_open, - .read = seq_read, - .llseek = seq_lseek, - .release = tracing_release, -}; - static struct file_operations show_traces_fops = { .open = show_traces_open, .read = seq_read, -- cgit v1.2.3-18-g5258 From 5fd73f862468280d4cbb5ba4321502f911f9f89a Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 4 Mar 2009 21:42:04 -0500 Subject: tracing: remove extra latency_trace method from trace structure Impact: clean up The trace and latency_trace function pointers are identical for every tracer but the function tracer. The differences in the function tracer are trivial (latency output puts paranthesis around parent). This patch removes the latency_trace pointer and all prints will now just use the trace output function pointer. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 55fcbb56795..21b89ecb848 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1485,7 +1485,7 @@ static enum print_line_t print_lat_fmt(struct trace_iterator *iter) } if (event) - return event->latency_trace(iter, sym_flags); + return event->trace(iter, sym_flags); if (!trace_seq_printf(s, "Unknown type %d\n", entry->type)) goto partial; -- cgit v1.2.3-18-g5258 From 27d48be84477d2f0a2e2ac3738a3971dece631d5 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 4 Mar 2009 21:57:29 -0500 Subject: tracing: consolidate print_lat_fmt and print_trace_fmt Impact: clean up Both print_lat_fmt and print_trace_fmt do pretty much the same thing except for one different function call. This patch consolidates the two functions and adds an if statement to perform the difference. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 39 +++++++-------------------------------- 1 file changed, 7 insertions(+), 32 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 21b89ecb848..d1ef43999d9 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1468,33 +1468,6 @@ static void test_cpu_buff_start(struct trace_iterator *iter) trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu); } -static enum print_line_t print_lat_fmt(struct trace_iterator *iter) -{ - struct trace_seq *s = &iter->seq; - unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); - struct trace_event *event; - struct trace_entry *entry = iter->ent; - - test_cpu_buff_start(iter); - - event = ftrace_find_event(entry->type); - - if (trace_flags & TRACE_ITER_CONTEXT_INFO) { - if (!trace_print_lat_context(iter)) - goto partial; - } - - if (event) - return event->trace(iter, sym_flags); - - if (!trace_seq_printf(s, "Unknown type %d\n", entry->type)) - goto partial; - - return TRACE_TYPE_HANDLED; -partial: - return TRACE_TYPE_PARTIAL_LINE; -} - static enum print_line_t print_trace_fmt(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; @@ -1509,8 +1482,13 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) event = ftrace_find_event(entry->type); if (trace_flags & TRACE_ITER_CONTEXT_INFO) { - if (!trace_print_context(iter)) - goto partial; + if (iter->iter_flags & TRACE_FILE_LAT_FMT) { + if (!trace_print_lat_context(iter)) + goto partial; + } else { + if (!trace_print_context(iter)) + goto partial; + } } if (event) @@ -1652,9 +1630,6 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter) if (trace_flags & TRACE_ITER_RAW) return print_raw_fmt(iter); - if (iter->iter_flags & TRACE_FILE_LAT_FMT) - return print_lat_fmt(iter); - return print_trace_fmt(iter); } -- cgit v1.2.3-18-g5258 From 5e1607a00bd082972629d3d68c95c8bcf902b55a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 5 Mar 2009 10:24:48 +0100 Subject: tracing: rename ftrace_printk() => trace_printk() Impact: cleanup Use a more generic name - this also allows the prototype to move to kernel.h and be generally available to kernel developers who want to do some quick tracing. Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d1ef43999d9..c0e9c126339 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -48,7 +48,7 @@ unsigned long __read_mostly tracing_thresh; * We need to change this state when a selftest is running. * A selftest will lurk into the ring-buffer to count the * entries inserted during the selftest although some concurrent - * insertions into the ring-buffer such as ftrace_printk could occurred + * insertions into the ring-buffer such as trace_printk could occurred * at the same time, giving false positive or negative results. */ static bool __read_mostly tracing_selftest_running; @@ -291,7 +291,7 @@ static const char *trace_options[] = { "block", "stacktrace", "sched-tree", - "ftrace_printk", + "trace_printk", "ftrace_preempt", "branch", "annotate", @@ -3768,7 +3768,7 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) } EXPORT_SYMBOL_GPL(trace_vprintk); -int __ftrace_printk(unsigned long ip, const char *fmt, ...) +int __trace_printk(unsigned long ip, const char *fmt, ...) { int ret; va_list ap; @@ -3781,7 +3781,7 @@ int __ftrace_printk(unsigned long ip, const char *fmt, ...) va_end(ap); return ret; } -EXPORT_SYMBOL_GPL(__ftrace_printk); +EXPORT_SYMBOL_GPL(__trace_printk); int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap) { -- cgit v1.2.3-18-g5258 From 5e2336a0d47c9661a40cc5ef85135ce1406af6e8 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 5 Mar 2009 21:44:55 -0500 Subject: tracing: make all file_operations const Impact: cleanup All file_operations structures should be constant. No one is going to change them. Reported-by: Andrew Morton Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c0e9c126339..e6144acf2b7 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1882,14 +1882,14 @@ static int show_traces_open(struct inode *inode, struct file *file) return ret; } -static struct file_operations tracing_fops = { +static const struct file_operations tracing_fops = { .open = tracing_open, .read = seq_read, .llseek = seq_lseek, .release = tracing_release, }; -static struct file_operations show_traces_fops = { +static const struct file_operations show_traces_fops = { .open = show_traces_open, .read = seq_read, .release = seq_release, @@ -1982,7 +1982,7 @@ err_unlock: return err; } -static struct file_operations tracing_cpumask_fops = { +static const struct file_operations tracing_cpumask_fops = { .open = tracing_open_generic, .read = tracing_cpumask_read, .write = tracing_cpumask_write, @@ -2134,7 +2134,7 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf, return cnt; } -static struct file_operations tracing_iter_fops = { +static const struct file_operations tracing_iter_fops = { .open = tracing_open_generic, .read = tracing_trace_options_read, .write = tracing_trace_options_write, @@ -2167,7 +2167,7 @@ tracing_readme_read(struct file *filp, char __user *ubuf, readme_msg, strlen(readme_msg)); } -static struct file_operations tracing_readme_fops = { +static const struct file_operations tracing_readme_fops = { .open = tracing_open_generic, .read = tracing_readme_read, }; @@ -2927,25 +2927,25 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, return cnt; } -static struct file_operations tracing_max_lat_fops = { +static const struct file_operations tracing_max_lat_fops = { .open = tracing_open_generic, .read = tracing_max_lat_read, .write = tracing_max_lat_write, }; -static struct file_operations tracing_ctrl_fops = { +static const struct file_operations tracing_ctrl_fops = { .open = tracing_open_generic, .read = tracing_ctrl_read, .write = tracing_ctrl_write, }; -static struct file_operations set_tracer_fops = { +static const struct file_operations set_tracer_fops = { .open = tracing_open_generic, .read = tracing_set_trace_read, .write = tracing_set_trace_write, }; -static struct file_operations tracing_pipe_fops = { +static const struct file_operations tracing_pipe_fops = { .open = tracing_open_pipe, .poll = tracing_poll_pipe, .read = tracing_read_pipe, @@ -2953,13 +2953,13 @@ static struct file_operations tracing_pipe_fops = { .release = tracing_release_pipe, }; -static struct file_operations tracing_entries_fops = { +static const struct file_operations tracing_entries_fops = { .open = tracing_open_generic, .read = tracing_entries_read, .write = tracing_entries_write, }; -static struct file_operations tracing_mark_fops = { +static const struct file_operations tracing_mark_fops = { .open = tracing_open_generic, .write = tracing_mark_write, }; @@ -3240,7 +3240,7 @@ tracing_read_dyn_info(struct file *filp, char __user *ubuf, return r; } -static struct file_operations tracing_dyn_info_fops = { +static const struct file_operations tracing_dyn_info_fops = { .open = tracing_open_generic, .read = tracing_read_dyn_info, }; -- cgit v1.2.3-18-g5258 From 1427cdf0592368bdec57276edaf714040ee8744f Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Fri, 6 Mar 2009 17:21:47 +0100 Subject: tracing: infrastructure for supporting binary record Impact: save on memory for tracing Current tracers are typically using a struct(like struct ftrace_entry, struct ctx_switch_entry, struct special_entr etc...)to record a binary event. These structs can only record a their own kind of events. A new kind of tracer need a new struct and a lot of code too handle it. So we need a generic binary record for events. This infrastructure is for this purpose. [fweisbec@gmail.com: rebase against latest -tip, make it safe while sched tracing as reported by Steven Rostedt] Signed-off-by: Lai Jiangshan Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt LKML-Reference: <1236356510-8381-3-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e6144acf2b7..ff53509e19f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3792,6 +3792,62 @@ int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap) } EXPORT_SYMBOL_GPL(__ftrace_vprintk); +/** + * trace_vbprintk - write binary msg to tracing buffer + * + * Caller must insure @fmt are valid when msg is in tracing buffer. + */ +int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) +{ + static DEFINE_SPINLOCK(trace_buf_lock); + static u32 trace_buf[TRACE_BUF_SIZE]; + + struct ring_buffer_event *event; + struct trace_array *tr = &global_trace; + struct trace_array_cpu *data; + struct bprintk_entry *entry; + unsigned long flags; + int resched; + int cpu, len = 0, size, pc; + + if (tracing_disabled || !trace_bprintk_enable) + return 0; + + pc = preempt_count(); + resched = ftrace_preempt_disable(); + cpu = raw_smp_processor_id(); + data = tr->data[cpu]; + + if (unlikely(atomic_read(&data->disabled))) + goto out; + + spin_lock_irqsave(&trace_buf_lock, flags); + len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args); + + if (len > TRACE_BUF_SIZE || len < 0) + goto out_unlock; + + size = sizeof(*entry) + sizeof(u32) * len; + event = trace_buffer_lock_reserve(tr, TRACE_BPRINTK, size, flags, pc); + if (!event) + goto out_unlock; + entry = ring_buffer_event_data(event); + entry->ip = ip; + entry->fmt = fmt; + + memcpy(entry->buf, trace_buf, sizeof(u32) * len); + ring_buffer_unlock_commit(tr->buffer, event); + +out_unlock: + spin_unlock_irqrestore(&trace_buf_lock, flags); + +out: + ftrace_preempt_enable(resched); + + return len; +} +EXPORT_SYMBOL_GPL(trace_vbprintk); + static int trace_panic_handler(struct notifier_block *this, unsigned long event, void *unused) { -- cgit v1.2.3-18-g5258 From 1ba28e02a18cbdbea123836f6c98efb09cbf59ec Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Fri, 6 Mar 2009 17:21:48 +0100 Subject: tracing: add trace_bprintk() Impact: add a generic printk() for tracing, like trace_printk() trace_bprintk() uses the infrastructure to record events on ring_buffer. [ fweisbec@gmail.com: ported to latest -tip, made it work if !CONFIG_MODULES, never free the format strings from modules because we can't keep track of them and conditionnaly create the ftrace format strings section (reported by Steven Rostedt) ] Signed-off-by: Lai Jiangshan Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt LKML-Reference: <1236356510-8381-4-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ff53509e19f..46b3cd7a575 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3848,6 +3848,21 @@ out: } EXPORT_SYMBOL_GPL(trace_vbprintk); +int __trace_bprintk(unsigned long ip, const char *fmt, ...) +{ + int ret; + va_list ap; + + if (!fmt) + return 0; + + va_start(ap, fmt); + ret = trace_vbprintk(ip, fmt, ap); + va_end(ap); + return ret; +} +EXPORT_SYMBOL_GPL(__trace_bprintk); + static int trace_panic_handler(struct notifier_block *this, unsigned long event, void *unused) { -- cgit v1.2.3-18-g5258 From 769b0441f438c4bb4872cb8560eb6fe51bcc09ee Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 6 Mar 2009 17:21:49 +0100 Subject: tracing/core: drop the old trace_printk() implementation in favour of trace_bprintk() Impact: faster and lighter tracing Now that we have trace_bprintk() which is faster and consume lesser memory than trace_printk() and has the same purpose, we can now drop the old implementation in favour of the binary one from trace_bprintk(), which means we move all the implementation of trace_bprintk() to trace_printk(), so the Api doesn't change except that we must now use trace_seq_bprintk() to print the TRACE_PRINT entries. Some changes result of this: - Previously, trace_bprintk depended of a single tracer and couldn't work without. This tracer has been dropped and the whole implementation of trace_printk() (like the module formats management) is now integrated in the tracing core (comes with CONFIG_TRACING), though we keep the file trace_printk (previously trace_bprintk.c) where we can find the module management. Thus we don't overflow trace.c - changes some parts to use trace_seq_bprintk() to print TRACE_PRINT entries. - change a bit trace_printk/trace_vprintk macros to support non-builtin formats constants, and fix 'const' qualifiers warnings. But this is all transparent for developers. - etc... V2: - Rebase against last changes - Fix mispell on the changelog V3: - Rebase against last changes (moving trace_printk() to kernel.h) Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt LKML-Reference: <1236356510-8381-5-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 212 +++++++++++++++------------------------------------ 1 file changed, 62 insertions(+), 150 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 46b3cd7a575..cc94f864248 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1169,6 +1169,67 @@ void trace_graph_return(struct ftrace_graph_ret *trace) } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + +/** + * trace_vprintk - write binary msg to tracing buffer + * + */ +int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) +{ + static DEFINE_SPINLOCK(trace_buf_lock); + static u32 trace_buf[TRACE_BUF_SIZE]; + + struct ring_buffer_event *event; + struct trace_array *tr = &global_trace; + struct trace_array_cpu *data; + struct print_entry *entry; + unsigned long flags; + int resched; + int cpu, len = 0, size, pc; + + if (unlikely(tracing_selftest_running || tracing_disabled)) + return 0; + + /* Don't pollute graph traces with trace_vprintk internals */ + pause_graph_tracing(); + + pc = preempt_count(); + resched = ftrace_preempt_disable(); + cpu = raw_smp_processor_id(); + data = tr->data[cpu]; + + if (unlikely(atomic_read(&data->disabled))) + goto out; + + spin_lock_irqsave(&trace_buf_lock, flags); + len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args); + + if (len > TRACE_BUF_SIZE || len < 0) + goto out_unlock; + + size = sizeof(*entry) + sizeof(u32) * len; + event = trace_buffer_lock_reserve(tr, TRACE_PRINT, size, flags, pc); + if (!event) + goto out_unlock; + entry = ring_buffer_event_data(event); + entry->ip = ip; + entry->depth = depth; + entry->fmt = fmt; + + memcpy(entry->buf, trace_buf, sizeof(u32) * len); + ring_buffer_unlock_commit(tr->buffer, event); + +out_unlock: + spin_unlock_irqrestore(&trace_buf_lock, flags); + +out: + ftrace_preempt_enable(resched); + unpause_graph_tracing(); + + return len; +} +EXPORT_SYMBOL_GPL(trace_vprintk); + enum trace_file_type { TRACE_FILE_LAT_FMT = 1, TRACE_FILE_ANNOTATE = 2, @@ -1564,7 +1625,7 @@ static enum print_line_t print_printk_msg_only(struct trace_iterator *iter) trace_assign_type(field, entry); - ret = trace_seq_printf(s, "%s", field->buf); + ret = trace_seq_bprintf(s, field->fmt, field->buf); if (!ret) return TRACE_TYPE_PARTIAL_LINE; @@ -3714,155 +3775,6 @@ static __init int tracer_init_debugfs(void) return 0; } -int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) -{ - static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED; - static char trace_buf[TRACE_BUF_SIZE]; - - struct ring_buffer_event *event; - struct trace_array *tr = &global_trace; - struct trace_array_cpu *data; - int cpu, len = 0, size, pc; - struct print_entry *entry; - unsigned long irq_flags; - - if (tracing_disabled || tracing_selftest_running) - return 0; - - pc = preempt_count(); - preempt_disable_notrace(); - cpu = raw_smp_processor_id(); - data = tr->data[cpu]; - - if (unlikely(atomic_read(&data->disabled))) - goto out; - - pause_graph_tracing(); - raw_local_irq_save(irq_flags); - __raw_spin_lock(&trace_buf_lock); - len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args); - - len = min(len, TRACE_BUF_SIZE-1); - trace_buf[len] = 0; - - size = sizeof(*entry) + len + 1; - event = trace_buffer_lock_reserve(tr, TRACE_PRINT, size, irq_flags, pc); - if (!event) - goto out_unlock; - entry = ring_buffer_event_data(event); - entry->ip = ip; - entry->depth = depth; - - memcpy(&entry->buf, trace_buf, len); - entry->buf[len] = 0; - ring_buffer_unlock_commit(tr->buffer, event); - - out_unlock: - __raw_spin_unlock(&trace_buf_lock); - raw_local_irq_restore(irq_flags); - unpause_graph_tracing(); - out: - preempt_enable_notrace(); - - return len; -} -EXPORT_SYMBOL_GPL(trace_vprintk); - -int __trace_printk(unsigned long ip, const char *fmt, ...) -{ - int ret; - va_list ap; - - if (!(trace_flags & TRACE_ITER_PRINTK)) - return 0; - - va_start(ap, fmt); - ret = trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap); - va_end(ap); - return ret; -} -EXPORT_SYMBOL_GPL(__trace_printk); - -int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap) -{ - if (!(trace_flags & TRACE_ITER_PRINTK)) - return 0; - - return trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap); -} -EXPORT_SYMBOL_GPL(__ftrace_vprintk); - -/** - * trace_vbprintk - write binary msg to tracing buffer - * - * Caller must insure @fmt are valid when msg is in tracing buffer. - */ -int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) -{ - static DEFINE_SPINLOCK(trace_buf_lock); - static u32 trace_buf[TRACE_BUF_SIZE]; - - struct ring_buffer_event *event; - struct trace_array *tr = &global_trace; - struct trace_array_cpu *data; - struct bprintk_entry *entry; - unsigned long flags; - int resched; - int cpu, len = 0, size, pc; - - if (tracing_disabled || !trace_bprintk_enable) - return 0; - - pc = preempt_count(); - resched = ftrace_preempt_disable(); - cpu = raw_smp_processor_id(); - data = tr->data[cpu]; - - if (unlikely(atomic_read(&data->disabled))) - goto out; - - spin_lock_irqsave(&trace_buf_lock, flags); - len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args); - - if (len > TRACE_BUF_SIZE || len < 0) - goto out_unlock; - - size = sizeof(*entry) + sizeof(u32) * len; - event = trace_buffer_lock_reserve(tr, TRACE_BPRINTK, size, flags, pc); - if (!event) - goto out_unlock; - entry = ring_buffer_event_data(event); - entry->ip = ip; - entry->fmt = fmt; - - memcpy(entry->buf, trace_buf, sizeof(u32) * len); - ring_buffer_unlock_commit(tr->buffer, event); - -out_unlock: - spin_unlock_irqrestore(&trace_buf_lock, flags); - -out: - ftrace_preempt_enable(resched); - - return len; -} -EXPORT_SYMBOL_GPL(trace_vbprintk); - -int __trace_bprintk(unsigned long ip, const char *fmt, ...) -{ - int ret; - va_list ap; - - if (!fmt) - return 0; - - va_start(ap, fmt); - ret = trace_vbprintk(ip, fmt, ap); - va_end(ap); - return ret; -} -EXPORT_SYMBOL_GPL(__trace_bprintk); - static int trace_panic_handler(struct notifier_block *this, unsigned long event, void *unused) { -- cgit v1.2.3-18-g5258 From 888b55dc314d26239d84c3b187dae555a81c1605 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Sun, 8 Mar 2009 13:12:43 +0900 Subject: ftrace: tracing header should put '#' at the beginning of a line In a recent discussion, Andrew Morton pointed out that tracing header should put '#' at the beginning of a line. Then, we can easily filtered the header by following grep usage: cat trace | grep -v '^#' Wakeup trace also has the same header problem. Comparison of headers displayed: before this patch: # tracer: wakeup # wakeup latency trace v1.1.5 on 2.6.29-rc7-tip-tip -------------------------------------------------------------------- latency: 19059 us, #21277/21277, CPU#1 | (M:desktop VP:0, KP:0, SP:0 HP:0 #P:4) ----------------- | task: kondemand/1-1644 (uid:0 nice:-5 policy:0 rt_prio:0) ----------------- # _------=> CPU# # / _-----=> irqs-off # | / _----=> need-resched # || / _---=> hardirq/softirq # ||| / _--=> preempt-depth # |||| / # ||||| delay # cmd pid ||||| time | caller # \ / ||||| \ | / irqbalan-1887 1d.s. 0us : 1887:120:R + [001] 1644:115:S kondemand/1 irqbalan-1887 1d.s. 1us : default_wake_function <-autoremove_wake_function irqbalan-1887 1d.s. 2us : check_preempt_wakeup <-try_to_wake_up after this patch: # tracer: wakeup # # wakeup latency trace v1.1.5 on 2.6.29-rc7-tip-tip # -------------------------------------------------------------------- # latency: 529 us, #530/530, CPU#0 | (M:desktop VP:0, KP:0, SP:0 HP:0 #P:4) # ----------------- # | task: kondemand/0-1641 (uid:0 nice:-5 policy:0 rt_prio:0) # ----------------- # # _------=> CPU# # / _-----=> irqs-off # | / _----=> need-resched # || / _---=> hardirq/softirq # ||| / _--=> preempt-depth # |||| / # ||||| delay # cmd pid ||||| time | caller # \ / ||||| \ | / sshd-2496 0d.s. 0us : 2496:120:R + [000] 1641:115:S kondemand/0 sshd-2496 0d.s. 1us : default_wake_function <-autoremove_wake_function sshd-2496 0d.s. 1us : check_preempt_wakeup <-try_to_wake_up Signed-off-by: KOSAKI Motohiro Cc: Andrew Morton LKML-Reference: <20090308124421.23C3.A69D9226@jp.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index cc94f864248..e5b56199e5e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1466,11 +1466,11 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter) total = entries + ring_buffer_overruns(iter->tr->buffer); - seq_printf(m, "%s latency trace v1.1.5 on %s\n", + seq_printf(m, "# %s latency trace v1.1.5 on %s\n", name, UTS_RELEASE); - seq_puts(m, "-----------------------------------" + seq_puts(m, "# -----------------------------------" "---------------------------------\n"); - seq_printf(m, " latency: %lu us, #%lu/%lu, CPU#%d |" + seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |" " (M:%s VP:%d, KP:%d, SP:%d HP:%d", nsecs_to_usecs(data->saved_latency), entries, @@ -1492,24 +1492,24 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter) #else seq_puts(m, ")\n"); #endif - seq_puts(m, " -----------------\n"); - seq_printf(m, " | task: %.16s-%d " + seq_puts(m, "# -----------------\n"); + seq_printf(m, "# | task: %.16s-%d " "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n", data->comm, data->pid, data->uid, data->nice, data->policy, data->rt_priority); - seq_puts(m, " -----------------\n"); + seq_puts(m, "# -----------------\n"); if (data->critical_start) { - seq_puts(m, " => started at: "); + seq_puts(m, "# => started at: "); seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags); trace_print_seq(m, &iter->seq); - seq_puts(m, "\n => ended at: "); + seq_puts(m, "\n# => ended at: "); seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags); trace_print_seq(m, &iter->seq); - seq_puts(m, "\n"); + seq_puts(m, "#\n"); } - seq_puts(m, "\n"); + seq_puts(m, "#\n"); } static void test_cpu_buff_start(struct trace_iterator *iter) -- cgit v1.2.3-18-g5258 From ef18012b248b47ec9a12c3a83ca5e99782d39c5d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 10 Mar 2009 14:10:56 -0400 Subject: tracing: remove funky whitespace in the trace code Impact: clean up There existed a lot of 's in the tracing code. This patch removes them. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index cc94f864248..8c6a902db40 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -799,7 +799,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, entry->preempt_count = pc & 0xff; entry->pid = (tsk) ? tsk->pid : 0; - entry->tgid = (tsk) ? tsk->tgid : 0; + entry->tgid = (tsk) ? tsk->tgid : 0; entry->flags = #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | -- cgit v1.2.3-18-g5258 From 80370cb758e7ca2692cd9fb5e413d970b1f4b2b2 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 10 Mar 2009 17:16:35 -0400 Subject: tracing: use raw spinlocks for trace_vprintk Impact: prevent locking up by lockdep tracer The lockdep tracer uses trace_vprintk and thus trace_vprintk can not call back into lockdep without locking up. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8c6a902db40..4c97947650a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1176,7 +1176,8 @@ void trace_graph_return(struct ftrace_graph_ret *trace) */ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) { - static DEFINE_SPINLOCK(trace_buf_lock); + static raw_spinlock_t trace_buf_lock = + (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; static u32 trace_buf[TRACE_BUF_SIZE]; struct ring_buffer_event *event; @@ -1201,7 +1202,9 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) if (unlikely(atomic_read(&data->disabled))) goto out; - spin_lock_irqsave(&trace_buf_lock, flags); + /* Lockdep uses trace_printk for lock tracing */ + local_irq_save(flags); + __raw_spin_lock(&trace_buf_lock); len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args); if (len > TRACE_BUF_SIZE || len < 0) @@ -1220,7 +1223,8 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) ring_buffer_unlock_commit(tr->buffer, event); out_unlock: - spin_unlock_irqrestore(&trace_buf_lock, flags); + __raw_spin_unlock(&trace_buf_lock); + local_irq_restore(flags); out: ftrace_preempt_enable(resched); -- cgit v1.2.3-18-g5258 From 73c5162aa362a543793f4a957c6c536dcbaa89ce Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 11 Mar 2009 13:42:01 -0400 Subject: tracing: keep ring buffer to minimum size till used Impact: less memory impact on systems not using tracer When the kernel boots up that has tracing configured, it allocates the default size of the ring buffer. This currently happens to be 1.4Megs per possible CPU. This is quite a bit of wasted memory if the system is never using the tracer. The current solution is to keep the ring buffers to a minimum size until the user uses them. Once a tracer is piped into the current_tracer the ring buffer will be expanded to the default size. If the user changes the size of the ring buffer, it will take the size given by the user immediately. If the user adds a "ftrace=" to the kernel command line, then the ring buffers will be set to the default size on initialization. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 79 +++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 59 insertions(+), 20 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4c97947650a..0c1dc185085 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -44,6 +44,12 @@ unsigned long __read_mostly tracing_max_latency; unsigned long __read_mostly tracing_thresh; +/* + * On boot up, the ring buffer is set to the minimum size, so that + * we do not waste memory on systems that are not using tracing. + */ +static int ring_buffer_expanded; + /* * We need to change this state when a selftest is running. * A selftest will lurk into the ring-buffer to count the @@ -128,6 +134,8 @@ static int __init set_ftrace(char *str) { strncpy(bootup_tracer_buf, str, BOOTUP_TRACER_SIZE); default_bootup_tracer = bootup_tracer_buf; + /* We are using ftrace early, expand it */ + ring_buffer_expanded = 1; return 1; } __setup("ftrace=", set_ftrace); @@ -2315,6 +2323,40 @@ int tracer_init(struct tracer *t, struct trace_array *tr) return t->init(tr); } +static int tracing_resize_ring_buffer(unsigned long size) +{ + int ret; + + /* + * If kernel or user changes the size of the ring buffer + * it get completed. + */ + ring_buffer_expanded = 1; + + ret = ring_buffer_resize(global_trace.buffer, size); + if (ret < 0) + return ret; + + ret = ring_buffer_resize(max_tr.buffer, size); + if (ret < 0) { + int r; + + r = ring_buffer_resize(global_trace.buffer, + global_trace.entries); + if (r < 0) { + /* AARGH! We are left with different + * size max buffer!!!! */ + WARN_ON(1); + tracing_disabled = 1; + } + return ret; + } + + global_trace.entries = size; + + return ret; +} + struct trace_option_dentry; static struct trace_option_dentry * @@ -2330,6 +2372,13 @@ static int tracing_set_tracer(const char *buf) struct tracer *t; int ret = 0; + if (!ring_buffer_expanded) { + ret = tracing_resize_ring_buffer(trace_buf_size); + if (ret < 0) + return ret; + ret = 0; + } + mutex_lock(&trace_types_lock); for (t = trace_types; t; t = t->next) { if (strcmp(t->name, buf) == 0) @@ -2903,28 +2952,11 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, val <<= 10; if (val != global_trace.entries) { - ret = ring_buffer_resize(global_trace.buffer, val); + ret = tracing_resize_ring_buffer(val); if (ret < 0) { cnt = ret; goto out; } - - ret = ring_buffer_resize(max_tr.buffer, val); - if (ret < 0) { - int r; - cnt = ret; - r = ring_buffer_resize(global_trace.buffer, - global_trace.entries); - if (r < 0) { - /* AARGH! We are left with different - * size max buffer!!!! */ - WARN_ON(1); - tracing_disabled = 1; - } - goto out; - } - - global_trace.entries = val; } filp->f_pos += cnt; @@ -3916,6 +3948,7 @@ void ftrace_dump(void) __init static int tracer_alloc_buffers(void) { struct trace_array_cpu *data; + int ring_buf_size; int i; int ret = -ENOMEM; @@ -3928,12 +3961,18 @@ __init static int tracer_alloc_buffers(void) if (!alloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL)) goto out_free_tracing_cpumask; + /* To save memory, keep the ring buffer size to its minimum */ + if (ring_buffer_expanded) + ring_buf_size = trace_buf_size; + else + ring_buf_size = 1; + cpumask_copy(tracing_buffer_mask, cpu_possible_mask); cpumask_copy(tracing_cpumask, cpu_all_mask); cpumask_clear(tracing_reader_cpumask); /* TODO: make the number of buffers hot pluggable with CPUS */ - global_trace.buffer = ring_buffer_alloc(trace_buf_size, + global_trace.buffer = ring_buffer_alloc(ring_buf_size, TRACE_BUFFER_FLAGS); if (!global_trace.buffer) { printk(KERN_ERR "tracer: failed to allocate ring buffer!\n"); @@ -3944,7 +3983,7 @@ __init static int tracer_alloc_buffers(void) #ifdef CONFIG_TRACER_MAX_TRACE - max_tr.buffer = ring_buffer_alloc(trace_buf_size, + max_tr.buffer = ring_buffer_alloc(ring_buf_size, TRACE_BUFFER_FLAGS); if (!max_tr.buffer) { printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n"); -- cgit v1.2.3-18-g5258 From 1852fcce181faa237c010a3dbedb473cf9d4555f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 11 Mar 2009 14:33:00 -0400 Subject: tracing: expand the ring buffers when an event is activated To save memory, the tracer ring buffers are set to a minimum. The activating of a trace expands the ring buffer size. This patch adds this expanding, when an event is activated. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0c1dc185085..35ee63ae412 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2357,6 +2357,26 @@ static int tracing_resize_ring_buffer(unsigned long size) return ret; } +/** + * tracing_update_buffers - used by tracing facility to expand ring buffers + * + * To save on memory when the tracing is never used on a system with it + * configured in. The ring buffers are set to a minimum size. But once + * a user starts to use the tracing facility, then they need to grow + * to their default size. + * + * This function is to be called when a tracer is about to be used. + */ +int tracing_update_buffers(void) +{ + int ret = 0; + + if (!ring_buffer_expanded) + ret = tracing_resize_ring_buffer(trace_buf_size); + + return ret; +} + struct trace_option_dentry; static struct trace_option_dentry * -- cgit v1.2.3-18-g5258 From 9aba60fe6eb20453de53a572143bef22fa929fba Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 11 Mar 2009 19:52:30 -0400 Subject: tracing: fix trace_wait to know to wait on all cpus or just one Impact: fix to task live locking on reading trace_pipe on one CPU The same code is used for both trace_pipe (all CPUS) and the per_cpu trace_pipe file. When there is no data to read, it will check for signals and wait on the trace wait queue. The problem happens with the per_cpu wait. The trace_wait code checks all CPUs. Thus, if there's data in another CPU buffer, then it will exit the wait, without checking for signals or waiting on the wait queue. It would then try to read the empty buffer, and since that will just return nothing, then it will try to wait again. Unfortunately, that will again fail due to there still being data in the other buffers. This ends up with a live lock for the task. This patch fixes the trace_wait to be aware that the iterator may only be waiting on a single buffer. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 35ee63ae412..e60f4be10d6 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1666,6 +1666,19 @@ static int trace_empty(struct trace_iterator *iter) { int cpu; + /* If we are looking at one CPU buffer, only check that one */ + if (iter->cpu_file != TRACE_PIPE_ALL_CPU) { + cpu = iter->cpu_file; + if (iter->buffer_iter[cpu]) { + if (!ring_buffer_iter_empty(iter->buffer_iter[cpu])) + return 0; + } else { + if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu)) + return 0; + } + return 1; + } + for_each_tracing_cpu(cpu) { if (iter->buffer_iter[cpu]) { if (!ring_buffer_iter_empty(iter->buffer_iter[cpu])) -- cgit v1.2.3-18-g5258 From 554f786e284a6ce859d51f62240d615603944c8e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 11 Mar 2009 22:00:13 -0400 Subject: ring-buffer: only allocate buffers for online cpus Impact: save on memory Currently, a ring buffer was allocated for each "possible_cpus". On some systems, this is the same as NR_CPUS. Thus, if a system defined NR_CPUS = 64 but it only had 1 CPU, we could have possibly 63 useless ring buffers taking up space. With a default buffer of 3 megs, this could be quite drastic. This patch changes the ring buffer code to only allocate ring buffers for online CPUs. If a CPU goes off line, we do not free the buffer. This is because the user may still have trace data in that buffer that they would like to look at. Perhaps in the future we could add code to delete a ring buffer if the CPU is offline and the ring buffer becomes empty. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e60f4be10d6..14c98f6a47b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1805,17 +1805,11 @@ __tracing_open(struct inode *inode, struct file *file) iter->buffer_iter[cpu] = ring_buffer_read_start(iter->tr->buffer, cpu); - - if (!iter->buffer_iter[cpu]) - goto fail_buffer; } } else { cpu = iter->cpu_file; iter->buffer_iter[cpu] = ring_buffer_read_start(iter->tr->buffer, cpu); - - if (!iter->buffer_iter[cpu]) - goto fail; } /* TODO stop tracer */ -- cgit v1.2.3-18-g5258 From a123c52b46a1f84bcec3dc963351896c6d6afaf7 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 12 Mar 2009 11:21:08 -0400 Subject: tracing: fix comments about trace buffer resizing Impact: cleanup Some of the comments about the trace buffer resizing is gobbledygook. And I wonder why people question if I'm a native English speaker. This patch makes the comments make a bit more sense. Reported-by: Andrew Morton Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c3946a6df34..c61ee85c50b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2336,7 +2336,8 @@ static int tracing_resize_ring_buffer(unsigned long size) /* * If kernel or user changes the size of the ring buffer - * it get completed. + * we use the size that was given, and we can forget about + * expanding it later. */ ring_buffer_expanded = 1; @@ -2351,8 +2352,20 @@ static int tracing_resize_ring_buffer(unsigned long size) r = ring_buffer_resize(global_trace.buffer, global_trace.entries); if (r < 0) { - /* AARGH! We are left with different - * size max buffer!!!! */ + /* + * AARGH! We are left with different + * size max buffer!!!! + * The max buffer is our "snapshot" buffer. + * When a tracer needs a snapshot (one of the + * latency tracers), it swaps the max buffer + * with the saved snap shot. We succeeded to + * update the size of the main buffer, but failed to + * update the size of the max buffer. But when we tried + * to reset the main buffer to the original size, we + * failed there too. This is very unlikely to + * happen, but if it does, warn and kill all + * tracing. + */ WARN_ON(1); tracing_disabled = 1; } -- cgit v1.2.3-18-g5258 From 1027fcb206a0fb8348e63aff078c74bdee1c2698 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 12 Mar 2009 11:33:20 -0400 Subject: tracing: protect ring_buffer_expanded with trace_types_lock Impact: prevent races with ring_buffer_expanded This patch places the expanding of the tracing buffer under the protection of the trace_types_lock mutex. It is highly unlikely that there would be any contention, but better safe than sorry. Reported-by: Andrew Morton Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c61ee85c50b..04ab8243a13 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2391,8 +2391,10 @@ int tracing_update_buffers(void) { int ret = 0; + mutex_lock(&trace_types_lock); if (!ring_buffer_expanded) ret = tracing_resize_ring_buffer(trace_buf_size); + mutex_unlock(&trace_types_lock); return ret; } @@ -2412,6 +2414,8 @@ static int tracing_set_tracer(const char *buf) struct tracer *t; int ret = 0; + mutex_lock(&trace_types_lock); + if (!ring_buffer_expanded) { ret = tracing_resize_ring_buffer(trace_buf_size); if (ret < 0) @@ -2419,7 +2423,6 @@ static int tracing_set_tracer(const char *buf) ret = 0; } - mutex_lock(&trace_types_lock); for (t = trace_types; t; t = t->next) { if (strcmp(t->name, buf) == 0) break; -- cgit v1.2.3-18-g5258 From db526ca329f855510e8ce672332eba3304aed590 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 12 Mar 2009 13:53:25 -0400 Subject: tracing: show that buffer size is not expanded Impact: do not confuse user on small trace buffer sizes When the system boots up, the trace buffer is small to conserve memory. It is only two pages per online CPU. When the tracer is used, it expands to the default value. This can confuse the user if they look at the buffer size and see only 7, but then later they see 1408. # cat /debug/tracing/buffer_size_kb 7 # echo sched_switch > /debug/tracing/current_tracer # cat /debug/tracing/buffer_size_kb 1408 This patch tries to help remove this confustion by showing that the buffer has not been expanded. # cat /debug/tracing/buffer_size_kb 7 (expanded: 1408) Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 04ab8243a13..62a63b2b33d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2948,10 +2948,18 @@ tracing_entries_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_array *tr = filp->private_data; - char buf[64]; + char buf[96]; int r; - r = sprintf(buf, "%lu\n", tr->entries >> 10); + mutex_lock(&trace_types_lock); + if (!ring_buffer_expanded) + r = sprintf(buf, "%lu (expanded: %lu)\n", + tr->entries >> 10, + trace_buf_size >> 10); + else + r = sprintf(buf, "%lu\n", tr->entries >> 10); + mutex_unlock(&trace_types_lock); + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } -- cgit v1.2.3-18-g5258 From 48ead02030f849d011259244bb4ea9b985479006 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 12 Mar 2009 18:24:49 +0100 Subject: tracing/core: bring back raw trace_printk for dynamic formats strings Impact: fix callsites with dynamic format strings Since its new binary implementation, trace_printk() internally uses static containers for the format strings on each callsites. But the value is assigned once at build time, which means that it can't take dynamic formats. So this patch unearthes the raw trace_printk implementation for the callers that will need trace_printk to be able to carry these dynamic format strings. The trace_printk() macro will use the appropriate implementation for each callsite. Most of the time however, the binary implementation will still be used. The other impact of this patch is that mmiotrace_printk() will use the old implementation because it calls the low level trace_vprintk and we can't guess here whether the format passed in it is dynamic or not. Some parts of this patch have been written by Steven Rostedt (most notably the part that chooses the appropriate implementation for each callsites). Signed-off-by: Frederic Weisbecker Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 80 insertions(+), 5 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 62a63b2b33d..dbb077d8a17 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1179,10 +1179,10 @@ void trace_graph_return(struct ftrace_graph_ret *trace) /** - * trace_vprintk - write binary msg to tracing buffer + * trace_vbprintk - write binary msg to tracing buffer * */ -int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) +int trace_vbprintk(unsigned long ip, int depth, const char *fmt, va_list args) { static raw_spinlock_t trace_buf_lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; @@ -1191,7 +1191,7 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) struct ring_buffer_event *event; struct trace_array *tr = &global_trace; struct trace_array_cpu *data; - struct print_entry *entry; + struct bprint_entry *entry; unsigned long flags; int resched; int cpu, len = 0, size, pc; @@ -1219,7 +1219,7 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) goto out_unlock; size = sizeof(*entry) + sizeof(u32) * len; - event = trace_buffer_lock_reserve(tr, TRACE_PRINT, size, flags, pc); + event = trace_buffer_lock_reserve(tr, TRACE_BPRINT, size, flags, pc); if (!event) goto out_unlock; entry = ring_buffer_event_data(event); @@ -1240,6 +1240,60 @@ out: return len; } +EXPORT_SYMBOL_GPL(trace_vbprintk); + +int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) +{ + static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED; + static char trace_buf[TRACE_BUF_SIZE]; + + struct ring_buffer_event *event; + struct trace_array *tr = &global_trace; + struct trace_array_cpu *data; + int cpu, len = 0, size, pc; + struct print_entry *entry; + unsigned long irq_flags; + + if (tracing_disabled || tracing_selftest_running) + return 0; + + pc = preempt_count(); + preempt_disable_notrace(); + cpu = raw_smp_processor_id(); + data = tr->data[cpu]; + + if (unlikely(atomic_read(&data->disabled))) + goto out; + + pause_graph_tracing(); + raw_local_irq_save(irq_flags); + __raw_spin_lock(&trace_buf_lock); + len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args); + + len = min(len, TRACE_BUF_SIZE-1); + trace_buf[len] = 0; + + size = sizeof(*entry) + len + 1; + event = trace_buffer_lock_reserve(tr, TRACE_PRINT, size, irq_flags, pc); + if (!event) + goto out_unlock; + entry = ring_buffer_event_data(event); + entry->ip = ip; + entry->depth = depth; + + memcpy(&entry->buf, trace_buf, len); + entry->buf[len] = 0; + ring_buffer_unlock_commit(tr->buffer, event); + + out_unlock: + __raw_spin_unlock(&trace_buf_lock); + raw_local_irq_restore(irq_flags); + unpause_graph_tracing(); + out: + preempt_enable_notrace(); + + return len; +} EXPORT_SYMBOL_GPL(trace_vprintk); enum trace_file_type { @@ -1628,6 +1682,22 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter) return TRACE_TYPE_HANDLED; } +static enum print_line_t print_bprintk_msg_only(struct trace_iterator *iter) +{ + struct trace_seq *s = &iter->seq; + struct trace_entry *entry = iter->ent; + struct bprint_entry *field; + int ret; + + trace_assign_type(field, entry); + + ret = trace_seq_bprintf(s, field->fmt, field->buf); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + return TRACE_TYPE_HANDLED; +} + static enum print_line_t print_printk_msg_only(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; @@ -1637,7 +1707,7 @@ static enum print_line_t print_printk_msg_only(struct trace_iterator *iter) trace_assign_type(field, entry); - ret = trace_seq_bprintf(s, field->fmt, field->buf); + ret = trace_seq_printf(s, "%s", field->buf); if (!ret) return TRACE_TYPE_PARTIAL_LINE; @@ -1702,6 +1772,11 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter) return ret; } + if (iter->ent->type == TRACE_BPRINT && + trace_flags & TRACE_ITER_PRINTK && + trace_flags & TRACE_ITER_PRINTK_MSGONLY) + return print_bprintk_msg_only(iter); + if (iter->ent->type == TRACE_PRINT && trace_flags & TRACE_ITER_PRINTK && trace_flags & TRACE_ITER_PRINTK_MSGONLY) -- cgit v1.2.3-18-g5258 From 7f96f93f02b7637491a1637dee12dcdcd40b9802 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 13 Mar 2009 00:37:42 -0400 Subject: tracing: move binary buffers into per cpu directory The binary_buffers directory in /debugfs/tracing held the files to read the trace buffers in a binary format. This held one file per CPU buffer. But we also have a per_cpu directory that holds a way to read the pretty-print formats. This patch moves the binary buffers into the per_cpu_directory: # ls /debug/tracing/per_cpu/cpu1/ trace trace_pipe trace_pipe_raw The new name is called "trace_pipe_raw". The binary buffers always acted similar to trace_pipe, except that they produce raw data. Requested-by: Ingo Molnar Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 26 +++++--------------------- 1 file changed, 5 insertions(+), 21 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index dbb077d8a17..efe3202c020 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3543,6 +3543,11 @@ static void tracing_init_debugfs_percpu(long cpu) (void *) cpu, &tracing_fops); if (!entry) pr_warning("Could not create debugfs 'trace' entry\n"); + + entry = debugfs_create_file("trace_pipe_raw", 0444, d_cpu, + (void *) cpu, &tracing_buffers_fops); + if (!entry) + pr_warning("Could not create debugfs 'trace_pipe_raw' entry\n"); } #ifdef CONFIG_FTRACE_SELFTEST @@ -3826,7 +3831,6 @@ static __init void create_trace_options_dir(void) static __init int tracer_init_debugfs(void) { struct dentry *d_tracer; - struct dentry *buffers; struct dentry *entry; int cpu; @@ -3899,26 +3903,6 @@ static __init int tracer_init_debugfs(void) pr_warning("Could not create debugfs " "'trace_marker' entry\n"); - buffers = debugfs_create_dir("binary_buffers", d_tracer); - - if (!buffers) - pr_warning("Could not create buffers directory\n"); - else { - int cpu; - char buf[64]; - - for_each_tracing_cpu(cpu) { - sprintf(buf, "%d", cpu); - - entry = debugfs_create_file(buf, 0444, buffers, - (void *)(long)cpu, - &tracing_buffers_fops); - if (!entry) - pr_warning("Could not create debugfs buffers " - "'%s' entry\n", buf); - } - } - #ifdef CONFIG_DYNAMIC_FTRACE entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer, &ftrace_update_tot_cnt, -- cgit v1.2.3-18-g5258 From 59f586db98919d7d9c43527b26c8de1cdf9ed912 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 15 Mar 2009 22:10:39 +0100 Subject: tracing/core: fix missing mutex unlock on tracing_set_tracer() Impact: fix possible locking imbalance In case of ring buffer resize failure, tracing_set_tracer forgot to release trace_types_lock. Fix it. Signed-off-by: Frederic Weisbecker LKML-Reference: <1237151439-6755-5-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index efe3202c020..c0cf946d42f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2494,7 +2494,7 @@ static int tracing_set_tracer(const char *buf) if (!ring_buffer_expanded) { ret = tracing_resize_ring_buffer(trace_buf_size); if (ret < 0) - return ret; + goto out; ret = 0; } -- cgit v1.2.3-18-g5258 From 2fc1dfbe17e7705c55b7a99da995fa565e26f151 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 16 Mar 2009 01:45:03 +0100 Subject: tracing/core: fix early free of cpumasks Impact: fix crashes when tracing cpumasks While ring-buffer allocation, the cpumasks are allocated too, including the tracing cpumask and the per-cpu file mask handler. But these cpumasks are freed accidentally just after. Fix it. Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker LKML-Reference: <1237164303-11476-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c0cf946d42f..ae32d3b99b4 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4125,7 +4125,8 @@ __init static int tracer_alloc_buffers(void) &trace_panic_notifier); register_die_notifier(&trace_die_notifier); - ret = 0; + + return 0; out_free_cpumask: free_cpumask_var(tracing_reader_cpumask); -- cgit v1.2.3-18-g5258 From 4ca530852346be239b7c19e7bec5d2b78855bebe Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 16 Mar 2009 19:20:15 -0400 Subject: tracing: protect reader of cmdline output Impact: fix to one cause of incorrect comm outputs in trace The spinlock only protected the creation of a comm <=> pid pair. But it was possible that a reader could look up a pid, and get the wrong comm because it had no locking. This also required changing trace_find_cmdline to copy the comm cache and not just send back a pointer to it. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index efe3202c020..2796bd2b17e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -770,25 +770,29 @@ static void trace_save_cmdline(struct task_struct *tsk) __raw_spin_unlock(&trace_cmdline_lock); } -char *trace_find_cmdline(int pid) +void trace_find_cmdline(int pid, char comm[]) { - char *cmdline = "<...>"; unsigned map; - if (!pid) - return ""; + if (!pid) { + strcpy(comm, ""); + return; + } - if (pid > PID_MAX_DEFAULT) - goto out; + if (pid > PID_MAX_DEFAULT) { + strcpy(comm, "<...>"); + return; + } + __raw_spin_lock(&trace_cmdline_lock); map = map_pid_to_cmdline[pid]; if (map >= SAVED_CMDLINES) goto out; - cmdline = saved_cmdlines[map]; + strcpy(comm, saved_cmdlines[map]); out: - return cmdline; + __raw_spin_unlock(&trace_cmdline_lock); } void tracing_record_cmdline(struct task_struct *tsk) -- cgit v1.2.3-18-g5258 From 6adaad14d7d4d3ef31b4e2dc992b18b5da7c4eb3 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 16 Mar 2009 21:57:17 -0400 Subject: tracing: stop comm recording on tracing off Impact: fix for losing comms in trace The command lines of tasks are cached at sched switch to not need to record them at every trace point. Disabling the tracing on stops the recording of traces, but does not stop the caching of command lines. When the tracing is off the cache may overflow and cause the tracing to show incorrect tasks matching the PIDs. This patch disables prevents updates to the comm cache when the ring buffer is off. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2796bd2b17e..8f89690230e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -797,7 +797,7 @@ void trace_find_cmdline(int pid, char comm[]) void tracing_record_cmdline(struct task_struct *tsk) { - if (atomic_read(&trace_record_cmdline_disabled)) + if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on()) return; trace_save_cmdline(tsk); -- cgit v1.2.3-18-g5258 From 37886f6a9f62d22530ffee8d3f9215c8345b6969 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 17 Mar 2009 17:22:06 -0400 Subject: ring-buffer: add api to allow a tracer to change clock source This patch adds a new function called ring_buffer_set_clock that allows a tracer to assign its own clock source to the buffer. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8f89690230e..3be2f788e10 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -155,13 +155,6 @@ ns2usecs(cycle_t nsec) return nsec; } -cycle_t ftrace_now(int cpu) -{ - u64 ts = ring_buffer_time_stamp(cpu); - ring_buffer_normalize_time_stamp(cpu, &ts); - return ts; -} - /* * The global_trace is the descriptor that holds the tracing * buffers for the live tracing. For each CPU, it contains @@ -178,6 +171,20 @@ static struct trace_array global_trace; static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu); +cycle_t ftrace_now(int cpu) +{ + u64 ts; + + /* Early boot up does not have a buffer yet */ + if (!global_trace.buffer) + return trace_clock_local(); + + ts = ring_buffer_time_stamp(global_trace.buffer, cpu); + ring_buffer_normalize_time_stamp(global_trace.buffer, cpu, &ts); + + return ts; +} + /* * The max_tr is used to snapshot the global_trace when a maximum * latency is reached. Some tracers will use this to store a maximum -- cgit v1.2.3-18-g5258 From af4617bdba34aa556272b34c3986b0a4d588f568 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 17 Mar 2009 18:09:55 -0400 Subject: tracing: add global-clock option to provide cross CPU clock to traces Impact: feature to allow better serialized clock This patch adds an option called "global-clock" that will allow the tracer to switch to a slower but more accurate (across CPUs) clock. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3be2f788e10..2f994caab0b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -315,6 +315,7 @@ static const char *trace_options[] = { "printk-msg-only", "context-info", "latency-format", + "global-clock", NULL }; @@ -2251,6 +2252,34 @@ static int set_tracer_option(struct tracer *trace, char *cmp, int neg) return 0; } +static void set_tracer_flags(unsigned int mask, int enabled) +{ + /* do nothing if flag is already set */ + if (!!(trace_flags & mask) == !!enabled) + return; + + if (enabled) + trace_flags |= mask; + else + trace_flags &= ~mask; + + if (mask == TRACE_ITER_GLOBAL_CLK) { + u64 (*func)(void); + + if (enabled) + func = trace_clock_global; + else + func = trace_clock_local; + + mutex_lock(&trace_types_lock); + ring_buffer_set_clock(global_trace.buffer, func); + + if (max_tr.buffer) + ring_buffer_set_clock(max_tr.buffer, func); + mutex_unlock(&trace_types_lock); + } +} + static ssize_t tracing_trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) @@ -2278,10 +2307,7 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf, int len = strlen(trace_options[i]); if (strncmp(cmp, trace_options[i], len) == 0) { - if (neg) - trace_flags &= ~(1 << i); - else - trace_flags |= (1 << i); + set_tracer_flags(1 << i, !neg); break; } } -- cgit v1.2.3-18-g5258 From 18aecd362a1c991fbf5f7919ae051a77532ba2f8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 18 Mar 2009 08:56:58 +0100 Subject: tracing: stop command line recording when tracing is disabled Impact: prevent overwrite of command line entries When the tracer is stopped the command line recording continues to record. The check for tracing_is_on() is not sufficient here as the ringbuffer status is not affected by setting debug/tracing/tracing_enabled to 0. On a non idle system this can result in the loss of the command line information for the stopped trace, which makes the trace harder to read and analyse. Check tracer_enabled to allow further recording. Signed-off-by: Thomas Gleixner Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 1ce6208fd72..7b6043ea256 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -797,7 +797,8 @@ void trace_find_cmdline(int pid, char comm[]) void tracing_record_cmdline(struct task_struct *tsk) { - if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on()) + if (atomic_read(&trace_record_cmdline_disabled) || !tracer_enabled || + !tracing_is_on()) return; trace_save_cmdline(tsk); -- cgit v1.2.3-18-g5258 From 2c7eea4c62ba090b7f4583c3d7337ea0019be900 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 18 Mar 2009 09:03:19 +0100 Subject: tracing: replace the crude (unsigned) -1 hackery Impact: cleanup The command line recorder uses (unsigned) -1 to mark non mapped entries in the pid to command line maps. The validity check is completely unintuitive: idx >= SAVED_CMDLINES There is no need for such casting games. Use a constant to mark unmapped entries and check for that constant to make the code readable and understandable. Signed-off-by: Thomas Gleixner Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 7b6043ea256..ca673c47568 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -633,6 +633,7 @@ void tracing_reset_online_cpus(struct trace_array *tr) } #define SAVED_CMDLINES 128 +#define NO_CMDLINE_MAP UINT_MAX static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1]; static unsigned map_cmdline_to_pid[SAVED_CMDLINES]; static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN]; @@ -644,8 +645,8 @@ static atomic_t trace_record_cmdline_disabled __read_mostly; static void trace_init_cmdlines(void) { - memset(&map_pid_to_cmdline, -1, sizeof(map_pid_to_cmdline)); - memset(&map_cmdline_to_pid, -1, sizeof(map_cmdline_to_pid)); + memset(&map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(map_pid_to_cmdline)); + memset(&map_cmdline_to_pid, NO_CMDLINE_MAP, sizeof(map_cmdline_to_pid)); cmdline_idx = 0; } @@ -753,12 +754,12 @@ static void trace_save_cmdline(struct task_struct *tsk) return; idx = map_pid_to_cmdline[tsk->pid]; - if (idx >= SAVED_CMDLINES) { + if (idx == NO_CMDLINE_MAP) { idx = (cmdline_idx + 1) % SAVED_CMDLINES; map = map_cmdline_to_pid[idx]; - if (map <= PID_MAX_DEFAULT) - map_pid_to_cmdline[map] = (unsigned)-1; + if (map != NO_CMDLINE_MAP) + map_pid_to_cmdline[map] = NO_CMDLINE_MAP; map_pid_to_cmdline[tsk->pid] = idx; @@ -786,7 +787,7 @@ void trace_find_cmdline(int pid, char comm[]) __raw_spin_lock(&trace_cmdline_lock); map = map_pid_to_cmdline[pid]; - if (map >= SAVED_CMDLINES) + if (map == NO_CMDLINE_MAP) goto out; strcpy(comm, saved_cmdlines[map]); -- cgit v1.2.3-18-g5258 From 50d88758a3f9787cbdbdbc030560b815721eab4b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 18 Mar 2009 08:58:44 +0100 Subject: tracing: fix trace_find_cmdline() Impact: prevent stale command line output In case there is no valid command line mapping for a pid trace_find_cmdline() returns without updating the comm buffer. The trace dump keeps the previous entry which results in confusing trace output: -0 [000] 280.702056 .... -23456 [000] 280.702080 .... Update the comm buffer with "<...>" when no mapping is found. Signed-off-by: Thomas Gleixner Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ca673c47568..06c69a26032 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -787,12 +787,11 @@ void trace_find_cmdline(int pid, char comm[]) __raw_spin_lock(&trace_cmdline_lock); map = map_pid_to_cmdline[pid]; - if (map == NO_CMDLINE_MAP) - goto out; - - strcpy(comm, saved_cmdlines[map]); + if (map != NO_CMDLINE_MAP) + strcpy(comm, saved_cmdlines[map]); + else + strcpy(comm, "<...>"); - out: __raw_spin_unlock(&trace_cmdline_lock); } -- cgit v1.2.3-18-g5258 From a635cf0497342978d417cae19d4a4823932977ff Mon Sep 17 00:00:00 2001 From: Carsten Emde Date: Wed, 18 Mar 2009 09:00:41 +0100 Subject: tracing: fix command line to pid reverse map Impact: fix command line to pid mapping map_cmdline_to_pid[] is checked in trace_save_cmdline(), but never updated. This results in stale pid to command line mappings and the tracer output will associate the wrong comm string. Signed-off-by: Carsten Emde Signed-off-by: Thomas Gleixner Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 06c69a26032..305c562dae2 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -738,8 +738,7 @@ void trace_stop_cmdline_recording(void); static void trace_save_cmdline(struct task_struct *tsk) { - unsigned map; - unsigned idx; + unsigned pid, idx; if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT)) return; @@ -757,10 +756,17 @@ static void trace_save_cmdline(struct task_struct *tsk) if (idx == NO_CMDLINE_MAP) { idx = (cmdline_idx + 1) % SAVED_CMDLINES; - map = map_cmdline_to_pid[idx]; - if (map != NO_CMDLINE_MAP) - map_pid_to_cmdline[map] = NO_CMDLINE_MAP; + /* + * Check whether the cmdline buffer at idx has a pid + * mapped. We are going to overwrite that entry so we + * need to clear the map_pid_to_cmdline. Otherwise we + * would read the new comm for the old pid. + */ + pid = map_cmdline_to_pid[idx]; + if (pid != NO_CMDLINE_MAP) + map_pid_to_cmdline[pid] = NO_CMDLINE_MAP; + map_cmdline_to_pid[idx] = tsk->pid; map_pid_to_cmdline[tsk->pid] = idx; cmdline_idx = idx; -- cgit v1.2.3-18-g5258 From 4acd4d00f716873e27e7b60ae292cbdbfae674dd Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 18 Mar 2009 10:40:24 -0400 Subject: tracing: give easy way to clear trace buffer There is currently no easy way to clear the trace buffer. Currently the only way is to change the current tracer. This patch lets the user clear the trace buffer by simply writing into the trace files. echo > /debug/tracing/trace or to clear a single cpu (i.e. for CPU 1): echo > /debug/tracing/per_cpu/cpu1/trace Requested-by: Ingo Molnar Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 41 +++++++++++++++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 8 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a2d13e8c8fd..8d981ababc4 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1941,9 +1941,14 @@ int tracing_open_generic(struct inode *inode, struct file *filp) static int tracing_release(struct inode *inode, struct file *file) { struct seq_file *m = (struct seq_file *)file->private_data; - struct trace_iterator *iter = m->private; + struct trace_iterator *iter; int cpu; + if (!(file->f_mode & FMODE_READ)) + return 0; + + iter = m->private; + mutex_lock(&trace_types_lock); for_each_tracing_cpu(cpu) { if (iter->buffer_iter[cpu]) @@ -1969,12 +1974,24 @@ static int tracing_open(struct inode *inode, struct file *file) struct trace_iterator *iter; int ret = 0; - iter = __tracing_open(inode, file); - if (IS_ERR(iter)) - ret = PTR_ERR(iter); - else if (trace_flags & TRACE_ITER_LATENCY_FMT) - iter->iter_flags |= TRACE_FILE_LAT_FMT; + /* If this file was open for write, then erase contents */ + if ((file->f_mode & FMODE_WRITE) && + !(file->f_flags & O_APPEND)) { + long cpu = (long) inode->i_private; + + if (cpu == TRACE_PIPE_ALL_CPU) + tracing_reset_online_cpus(&global_trace); + else + tracing_reset(&global_trace, cpu); + } + if (file->f_mode & FMODE_READ) { + iter = __tracing_open(inode, file); + if (IS_ERR(iter)) + ret = PTR_ERR(iter); + else if (trace_flags & TRACE_ITER_LATENCY_FMT) + iter->iter_flags |= TRACE_FILE_LAT_FMT; + } return ret; } @@ -2049,9 +2066,17 @@ static int show_traces_open(struct inode *inode, struct file *file) return ret; } +static ssize_t +tracing_write_stub(struct file *filp, const char __user *ubuf, + size_t count, loff_t *ppos) +{ + return count; +} + static const struct file_operations tracing_fops = { .open = tracing_open, .read = seq_read, + .write = tracing_write_stub, .llseek = seq_lseek, .release = tracing_release, }; @@ -3576,7 +3601,7 @@ static void tracing_init_debugfs_percpu(long cpu) pr_warning("Could not create debugfs 'trace_pipe' entry\n"); /* per cpu trace */ - entry = debugfs_create_file("trace", 0444, d_cpu, + entry = debugfs_create_file("trace", 0644, d_cpu, (void *) cpu, &tracing_fops); if (!entry) pr_warning("Could not create debugfs 'trace' entry\n"); @@ -3890,7 +3915,7 @@ static __init int tracer_init_debugfs(void) if (!entry) pr_warning("Could not create debugfs 'tracing_cpumask' entry\n"); - entry = debugfs_create_file("trace", 0444, d_tracer, + entry = debugfs_create_file("trace", 0644, d_tracer, (void *) TRACE_PIPE_ALL_CPU, &tracing_fops); if (!entry) pr_warning("Could not create debugfs 'trace' entry\n"); -- cgit v1.2.3-18-g5258 From 5ef841f6f32dce0b752a4fa0622781ee67a0e874 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 19 Mar 2009 12:20:38 -0400 Subject: tracing: make print_(b)printk_msg_only global This patch makes print_printk_msg_only and print_bprintk_msg_only global for other functions to use. It also renames them by adding a "trace_" to the beginning to avoid namespace collisions. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 36 ++---------------------------------- 1 file changed, 2 insertions(+), 34 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8d981ababc4..c637cb687cf 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1694,38 +1694,6 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter) return TRACE_TYPE_HANDLED; } -static enum print_line_t print_bprintk_msg_only(struct trace_iterator *iter) -{ - struct trace_seq *s = &iter->seq; - struct trace_entry *entry = iter->ent; - struct bprint_entry *field; - int ret; - - trace_assign_type(field, entry); - - ret = trace_seq_bprintf(s, field->fmt, field->buf); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - return TRACE_TYPE_HANDLED; -} - -static enum print_line_t print_printk_msg_only(struct trace_iterator *iter) -{ - struct trace_seq *s = &iter->seq; - struct trace_entry *entry = iter->ent; - struct print_entry *field; - int ret; - - trace_assign_type(field, entry); - - ret = trace_seq_printf(s, "%s", field->buf); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - return TRACE_TYPE_HANDLED; -} - static enum print_line_t print_bin_fmt(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; @@ -1787,12 +1755,12 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter) if (iter->ent->type == TRACE_BPRINT && trace_flags & TRACE_ITER_PRINTK && trace_flags & TRACE_ITER_PRINTK_MSGONLY) - return print_bprintk_msg_only(iter); + return trace_print_bprintk_msg_only(iter); if (iter->ent->type == TRACE_PRINT && trace_flags & TRACE_ITER_PRINTK && trace_flags & TRACE_ITER_PRINTK_MSGONLY) - return print_printk_msg_only(iter); + return trace_print_printk_msg_only(iter); if (trace_flags & TRACE_ITER_BIN) return print_bin_fmt(iter); -- cgit v1.2.3-18-g5258 From 40ce74f19c28077550646c76d96a075bf312e461 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 19 Mar 2009 14:03:53 -0400 Subject: tracing: remove recording function depth from trace_printk The function depth in trace_printk was to facilitate the function graph output. Now that the function graph calculates the depth within the trace output, we no longer need to record the depth when the trace_printk is called. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c637cb687cf..f7f359d4582 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1194,7 +1194,7 @@ void trace_graph_return(struct ftrace_graph_ret *trace) * trace_vbprintk - write binary msg to tracing buffer * */ -int trace_vbprintk(unsigned long ip, int depth, const char *fmt, va_list args) +int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) { static raw_spinlock_t trace_buf_lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; @@ -1236,7 +1236,6 @@ int trace_vbprintk(unsigned long ip, int depth, const char *fmt, va_list args) goto out_unlock; entry = ring_buffer_event_data(event); entry->ip = ip; - entry->depth = depth; entry->fmt = fmt; memcpy(entry->buf, trace_buf, sizeof(u32) * len); @@ -1254,7 +1253,7 @@ out: } EXPORT_SYMBOL_GPL(trace_vbprintk); -int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) +int trace_vprintk(unsigned long ip, const char *fmt, va_list args) { static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED; static char trace_buf[TRACE_BUF_SIZE]; @@ -1291,7 +1290,6 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) goto out_unlock; entry = ring_buffer_event_data(event); entry->ip = ip; - entry->depth = depth; memcpy(&entry->buf, trace_buf, len); entry->buf[len] = 0; @@ -3140,7 +3138,7 @@ static int mark_printk(const char *fmt, ...) int ret; va_list args; va_start(args, fmt); - ret = trace_vprintk(0, -1, fmt, args); + ret = trace_vprintk(0, fmt, args); va_end(args); return ret; } -- cgit v1.2.3-18-g5258 From cf586b61f80229491127d3c57c06ed93c9f530d3 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 22 Mar 2009 05:04:35 +0100 Subject: tracing/function-graph-tracer: prevent hangs during self-tests Impact: detect tracing related hangs Sometimes, with some configs, the function graph tracer can make the timer interrupt too much slow, hanging the kernel in an endless loop of timer interrupts servicing. As suggested by Ingo, this patch brings a watchdog which stops the selftest after a defined number of functions traced, definitely disabling this tracer. For those who want to debug the cause of the function graph trace hang, you can pass the ftrace_dump_on_oops kernel parameter to dump the traces after this hang detection. Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt LKML-Reference: <1237694675-23509-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e3dfefe6934..e6fac0ffe6f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4018,11 +4018,12 @@ trace_printk_seq(struct trace_seq *s) trace_seq_init(s); } -void ftrace_dump(void) +static void __ftrace_dump(bool disable_tracing) { static DEFINE_SPINLOCK(ftrace_dump_lock); /* use static because iter can be a bit big for the stack */ static struct trace_iterator iter; + unsigned int old_userobj; static int dump_ran; unsigned long flags; int cnt = 0, cpu; @@ -4034,14 +4035,17 @@ void ftrace_dump(void) dump_ran = 1; - /* No turning back! */ tracing_off(); - ftrace_kill(); + + if (disable_tracing) + ftrace_kill(); for_each_tracing_cpu(cpu) { atomic_inc(&global_trace.data[cpu]->disabled); } + old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ; + /* don't look at user memory in panic mode */ trace_flags &= ~TRACE_ITER_SYM_USEROBJ; @@ -4086,10 +4090,26 @@ void ftrace_dump(void) else printk(KERN_TRACE "---------------------------------\n"); + /* Re-enable tracing if requested */ + if (!disable_tracing) { + trace_flags |= old_userobj; + + for_each_tracing_cpu(cpu) { + atomic_dec(&global_trace.data[cpu]->disabled); + } + tracing_on(); + } + out: spin_unlock_irqrestore(&ftrace_dump_lock, flags); } +/* By default: disable tracing after the dump */ +void ftrace_dump(void) +{ + __ftrace_dump(true); +} + __init static int tracer_alloc_buffers(void) { struct trace_array_cpu *data; -- cgit v1.2.3-18-g5258 From b8b94265337f83b7db9c5f429b1769d463d7da8c Mon Sep 17 00:00:00 2001 From: Dmitri Vorobiev Date: Sun, 22 Mar 2009 19:11:11 +0200 Subject: tracing: fix four sparse warnings Impact: cleanup. This patch fixes the following sparse warnings: kernel/trace/trace.c:385:9: warning: symbol 'trace_seq_to_buffer' was not declared. Should it be static? kernel/trace/trace_clock.c:29:13: warning: symbol 'trace_clock_local' was not declared. Should it be static? kernel/trace/trace_clock.c:54:13: warning: symbol 'trace_clock' was not declared. Should it be static? kernel/trace/trace_clock.c:74:13: warning: symbol 'trace_clock_global' was not declared. Should it be static? Signed-off-by: Dmitri Vorobiev LKML-Reference: <1237741871-5827-4-git-send-email-dmitri.vorobiev@movial.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e6fac0ffe6f..ace685c7018 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -382,7 +382,7 @@ ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt) return cnt; } -ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) +static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) { int len; void *ret; -- cgit v1.2.3-18-g5258 From 07edf7121374609709ef1b0889f6e7b8d6a62ec1 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 22 Mar 2009 23:10:46 +0100 Subject: tracing/events: don't use wake up for events Impact: fix hard-lockup with sched switch events Some ftrace events, such as sched wakeup, can be traced while the runqueue lock is hold. Since they are using trace_current_buffer_unlock_commit(), they call wake_up() which can try to grab the runqueue lock too, resulting in a deadlock. Now for all event, we call a new helper: trace_nowake_buffer_unlock_commit() which do pretty the same than trace_current_buffer_unlock_commit() except than it doesn't call trace_wake_up(). Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt LKML-Reference: <1237759847-21025-4-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e6fac0ffe6f..6bad12819eb 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -860,15 +860,25 @@ static void ftrace_trace_stack(struct trace_array *tr, static void ftrace_trace_userstack(struct trace_array *tr, unsigned long flags, int pc); -void trace_buffer_unlock_commit(struct trace_array *tr, - struct ring_buffer_event *event, - unsigned long flags, int pc) +static inline void __trace_buffer_unlock_commit(struct trace_array *tr, + struct ring_buffer_event *event, + unsigned long flags, int pc, + int wake) { ring_buffer_unlock_commit(tr->buffer, event); ftrace_trace_stack(tr, flags, 6, pc); ftrace_trace_userstack(tr, flags, pc); - trace_wake_up(); + + if (wake) + trace_wake_up(); +} + +void trace_buffer_unlock_commit(struct trace_array *tr, + struct ring_buffer_event *event, + unsigned long flags, int pc) +{ + __trace_buffer_unlock_commit(tr, event, flags, pc, 1); } struct ring_buffer_event * @@ -882,7 +892,13 @@ trace_current_buffer_lock_reserve(unsigned char type, unsigned long len, void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, unsigned long flags, int pc) { - return trace_buffer_unlock_commit(&global_trace, event, flags, pc); + return __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 1); +} + +void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event, + unsigned long flags, int pc) +{ + return __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 0); } void -- cgit v1.2.3-18-g5258 From 3e1f60b80cafcb5d7e8d3665b35962fbb8fb9efa Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 22 Mar 2009 23:10:45 +0100 Subject: tracing/ftrace: check if debugfs is registered before creating files Impact: fix a crash with ftrace={nop,boot} parameter If the nop or initcall tracers are launched as boot tracers, they will attempt to create their option directory and files. But these tracers are registered very early and then assigned as "boot tracers" very early if asked to. Since they do this before debugfs has been registered (core initcall), a crash is triggered. Another early tracers could also come later. So we fix it by checking if debugfs is initialized before creating the root tracing directory. Signed-off-by: Frederic Weisbecker Cc: Greg Kroah-Hartman Cc: Steven Rostedt LKML-Reference: <1237759847-21025-3-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ace685c7018..f0e1337b1eb 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3513,6 +3513,9 @@ struct dentry *tracing_init_dentry(void) if (d_tracer) return d_tracer; + if (!debugfs_initialized()) + return NULL; + d_tracer = debugfs_create_dir("tracing", NULL); if (!d_tracer && !once) { -- cgit v1.2.3-18-g5258 From 1618536961d31f9b3f55767b22d4a897f4204c26 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 23 Mar 2009 22:17:01 +0100 Subject: tracing/function-graph-tracer: fix functions call traces imbalance Impact: fix traces output Sometimes one can observe an imbalance in the traces between function calls and function return traces: func1() { } } The curly brace inside func1() is the return of another function nested inside func1. The return trace have been inserted in the buffer but not the entry. We are storing a return address on the function traces stack while we haven't inserted its entry on the buffer, hence the imbalance on the traces. This is because the tracers doesn't check all failures that can happen on buffer insertion. This patch reports the tracing recursion failures and the ring buffer failures. In such cases, we now restore the original return address for the function, giving up its return trace. Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt LKML-Reference: <1237843021-11695-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 6bad12819eb..89f0c2544ad 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -924,7 +924,7 @@ trace_function(struct trace_array *tr, } #ifdef CONFIG_FUNCTION_GRAPH_TRACER -static void __trace_graph_entry(struct trace_array *tr, +static int __trace_graph_entry(struct trace_array *tr, struct ftrace_graph_ent *trace, unsigned long flags, int pc) @@ -933,15 +933,17 @@ static void __trace_graph_entry(struct trace_array *tr, struct ftrace_graph_ent_entry *entry; if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) - return; + return 0; event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_ENT, sizeof(*entry), flags, pc); if (!event) - return; + return 0; entry = ring_buffer_event_data(event); entry->graph_ent = *trace; ring_buffer_unlock_commit(global_trace.buffer, event); + + return 1; } static void __trace_graph_return(struct trace_array *tr, @@ -1162,6 +1164,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) struct trace_array_cpu *data; unsigned long flags; long disabled; + int ret; int cpu; int pc; @@ -1177,15 +1180,18 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) { pc = preempt_count(); - __trace_graph_entry(tr, trace, flags, pc); + ret = __trace_graph_entry(tr, trace, flags, pc); + } else { + ret = 0; } /* Only do the atomic if it is not already set */ if (!test_tsk_trace_graph(current)) set_tsk_trace_graph(current); + atomic_dec(&data->disabled); local_irq_restore(flags); - return 1; + return ret; } void trace_graph_return(struct ftrace_graph_ret *trace) -- cgit v1.2.3-18-g5258 From be6f164a02f394675e2ac2077dd354cebef5b4c0 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 24 Mar 2009 11:06:24 -0400 Subject: function-graph: add option for include sleep times Impact: give user a choice to show times spent while sleeping The user may want to see the time a function spent sleeping. This patch adds the trace option "sleep-time" to allow that. The "sleep-time" option is default on. echo sleep-time > /debug/tracing/trace_options produces: ------------------------------------------ 2) avahi-d-3428 => -0 ------------------------------------------ 2) | finish_task_switch() { 2) 0.621 us | _spin_unlock_irq(); 2) 2.202 us | } 2) ! 1002.197 us | } 2) ! 1003.521 us | } where as, echo nosleep-time > /debug/tracing/trace_options produces: 0) -0 => yum-upd-3416 ------------------------------------------ 0) | finish_task_switch() { 0) 0.643 us | _spin_unlock_irq(); 0) 2.342 us | } 0) + 41.302 us | } 0) + 42.453 us | } Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f0e1337b1eb..67c6a21dd42 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -255,7 +255,7 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait); /* trace_flags holds trace_options default values */ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | - TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO; + TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME; /** * trace_wake_up - wake up tasks waiting for trace input @@ -316,6 +316,7 @@ static const char *trace_options[] = { "context-info", "latency-format", "global-clock", + "sleep-time", NULL }; -- cgit v1.2.3-18-g5258