From 0588fa30db44fd2d4032b36a061c87478a43fbee Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 21 Mar 2011 22:59:21 -0400 Subject: tracing: Convert trace_printk() formats for module to const char * The trace_printk() formats for modules do not show up in the debugfs/tracing/printk_formats file. Only the formats that are for trace_printk()s that are in the kernel core. To facilitate the change to add trace_printk() formats from modules into that file as well, we need to convert the structure that holds the formats from char fmt[], into const char *fmt, and allocate them separately. Signed-off-by: Steven Rostedt --- kernel/trace/trace_printk.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c index 2547d8813cf..b8b268158af 100644 --- a/kernel/trace/trace_printk.c +++ b/kernel/trace/trace_printk.c @@ -32,7 +32,7 @@ static DEFINE_MUTEX(btrace_mutex); struct trace_bprintk_fmt { struct list_head list; - char fmt[0]; + const char *fmt; }; static inline struct trace_bprintk_fmt *lookup_format(const char *fmt) @@ -49,6 +49,7 @@ static void hold_module_trace_bprintk_format(const char **start, const char **end) { const char **iter; + char *fmt; mutex_lock(&btrace_mutex); for (iter = start; iter < end; iter++) { @@ -58,14 +59,18 @@ void hold_module_trace_bprintk_format(const char **start, const char **end) continue; } - tb_fmt = kmalloc(offsetof(struct trace_bprintk_fmt, fmt) - + strlen(*iter) + 1, GFP_KERNEL); - if (tb_fmt) { + tb_fmt = kmalloc(sizeof(*tb_fmt), GFP_KERNEL); + if (tb_fmt) + fmt = kmalloc(strlen(*iter) + 1, GFP_KERNEL); + if (tb_fmt && fmt) { list_add_tail(&tb_fmt->list, &trace_bprintk_fmt_list); - strcpy(tb_fmt->fmt, *iter); + strcpy(fmt, *iter); + tb_fmt->fmt = fmt; *iter = tb_fmt->fmt; - } else + } else { + kfree(tb_fmt); *iter = NULL; + } } mutex_unlock(&btrace_mutex); } -- cgit v1.2.3-18-g5258 From 1813dc3776c22ad4b0294a6df8434b9a02c98109 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 21 Mar 2011 23:36:31 -0400 Subject: tracing: Print trace_bprintk() formats for modules too The file debugfs/tracing/printk_formats maps the addresses to the formats that are used by trace_bprintk() so that userspace tools can read the buffer and be able to decode trace_bprintk events to get the format saved when reading the ring buffer directly. This is because trace_bprintk() does not store the format into the buffer, but just the address of the format, which is hidden in the kernel memory. But currently it only exports trace_bprintk()s from the kernel core and not for modules. The modules need their formats exported as well. Signed-off-by: Steven Rostedt --- kernel/trace/trace_printk.c | 103 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 97 insertions(+), 6 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c index b8b268158af..dff763b7baf 100644 --- a/kernel/trace/trace_printk.c +++ b/kernel/trace/trace_printk.c @@ -89,6 +89,76 @@ static int module_trace_bprintk_format_notify(struct notifier_block *self, return 0; } +/* + * The debugfs/tracing/printk_formats file maps the addresses with + * the ASCII formats that are used in the bprintk events in the + * buffer. For userspace tools to be able to decode the events from + * the buffer, they need to be able to map the address with the format. + * + * The addresses of the bprintk formats are in their own section + * __trace_printk_fmt. But for modules we copy them into a link list. + * The code to print the formats and their addresses passes around the + * address of the fmt string. If the fmt address passed into the seq + * functions is within the kernel core __trace_printk_fmt section, then + * it simply uses the next pointer in the list. + * + * When the fmt pointer is outside the kernel core __trace_printk_fmt + * section, then we need to read the link list pointers. The trick is + * we pass the address of the string to the seq function just like + * we do for the kernel core formats. To get back the structure that + * holds the format, we simply use containerof() and then go to the + * next format in the list. + */ +static const char ** +find_next_mod_format(int start_index, void *v, const char **fmt, loff_t *pos) +{ + struct trace_bprintk_fmt *mod_fmt; + + if (list_empty(&trace_bprintk_fmt_list)) + return NULL; + + /* + * v will point to the address of the fmt record from t_next + * v will be NULL from t_start. + * If this is the first pointer or called from start + * then we need to walk the list. + */ + if (!v || start_index == *pos) { + struct trace_bprintk_fmt *p; + + /* search the module list */ + list_for_each_entry(p, &trace_bprintk_fmt_list, list) { + if (start_index == *pos) + return &p->fmt; + start_index++; + } + /* pos > index */ + return NULL; + } + + /* + * v points to the address of the fmt field in the mod list + * structure that holds the module print format. + */ + mod_fmt = container_of(v, typeof(*mod_fmt), fmt); + if (mod_fmt->list.next == &trace_bprintk_fmt_list) + return NULL; + + mod_fmt = container_of(mod_fmt->list.next, typeof(*mod_fmt), list); + + return &mod_fmt->fmt; +} + +static void format_mod_start(void) +{ + mutex_lock(&btrace_mutex); +} + +static void format_mod_stop(void) +{ + mutex_unlock(&btrace_mutex); +} + #else /* !CONFIG_MODULES */ __init static int module_trace_bprintk_format_notify(struct notifier_block *self, @@ -96,6 +166,13 @@ module_trace_bprintk_format_notify(struct notifier_block *self, { return 0; } +static inline const char ** +find_next_mod_format(int start_index, void *v, const char **fmt, loff_t *pos) +{ + return NULL; +} +static inline void format_mod_start(void) { } +static inline void format_mod_stop(void) { } #endif /* CONFIG_MODULES */ @@ -158,20 +235,33 @@ int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap) } EXPORT_SYMBOL_GPL(__ftrace_vprintk); +static const char **find_next(void *v, loff_t *pos) +{ + const char **fmt = v; + int start_index; + + if (!fmt) + fmt = __start___trace_bprintk_fmt + *pos; + + start_index = __stop___trace_bprintk_fmt - __start___trace_bprintk_fmt; + + if (*pos < start_index) + return fmt; + + return find_next_mod_format(start_index, v, fmt, pos); +} + static void * t_start(struct seq_file *m, loff_t *pos) { - const char **fmt = __start___trace_bprintk_fmt + *pos; - - if ((unsigned long)fmt >= (unsigned long)__stop___trace_bprintk_fmt) - return NULL; - return fmt; + format_mod_start(); + return find_next(NULL, pos); } static void *t_next(struct seq_file *m, void * v, loff_t *pos) { (*pos)++; - return t_start(m, pos); + return find_next(v, pos); } static int t_show(struct seq_file *m, void *v) @@ -210,6 +300,7 @@ static int t_show(struct seq_file *m, void *v) static void t_stop(struct seq_file *m, void *p) { + format_mod_stop(); } static const struct seq_operations show_format_seq_ops = { -- cgit v1.2.3-18-g5258 From ee5e51f51be755830f57445e268ba50e88ccbdbb Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 25 Mar 2011 12:05:18 +0100 Subject: tracing: Avoid soft lockup in trace_pipe running following commands: # enable the binary option echo 1 > ./options/bin # disable context info option echo 0 > ./options/context-info # tracing only events echo 1 > ./events/enable cat trace_pipe plus forcing system to generate many tracing events, is causing lockup (in NON preemptive kernels) inside tracing_read_pipe function. The issue is also easily reproduced by running ltp stress test. (ftrace_stress_test.sh) The reasons are: - bin/hex/raw output functions for events are set to trace_nop_print function, which prints nothing and returns TRACE_TYPE_HANDLED value - LOST EVENT trace do not handle trace_seq overflow These reasons force the while loop in tracing_read_pipe function never to break. The attached patch fixies handling of lost event trace, and changes trace_nop_print to print minimal info, which is needed for the correct tracing_read_pipe processing. v2 changes: - omit the cond_resched changes by trace_nop_print changes - WARN changed to WARN_ONCE and added info to be able to find out the culprit v3 changes: - make more accurate patch comment Signed-off-by: Jiri Olsa LKML-Reference: <20110325110518.GC1922@jolsa.brq.redhat.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 15 ++++++++++++--- kernel/trace/trace_output.c | 3 +++ 2 files changed, 15 insertions(+), 3 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9541c27c1cf..5af42f478c0 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2013,9 +2013,10 @@ enum print_line_t print_trace_line(struct trace_iterator *iter) { enum print_line_t ret; - if (iter->lost_events) - trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n", - iter->cpu, iter->lost_events); + if (iter->lost_events && + !trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n", + iter->cpu, iter->lost_events)) + return TRACE_TYPE_PARTIAL_LINE; if (iter->trace && iter->trace->print_line) { ret = iter->trace->print_line(iter); @@ -3229,6 +3230,14 @@ waitagain: if (iter->seq.len >= cnt) break; + + /* + * Setting the full flag means we reached the trace_seq buffer + * size and we should leave by partial output condition above. + * One of the trace_seq_* functions is not used properly. + */ + WARN_ONCE(iter->seq.full, "full flag set for trace type %d", + iter->ent->type); } trace_access_unlock(iter->cpu_file); trace_event_read_unlock(); diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 456be9063c2..cf535ccedc8 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -830,6 +830,9 @@ EXPORT_SYMBOL_GPL(unregister_ftrace_event); enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags, struct trace_event *event) { + if (!trace_seq_printf(&iter->seq, "type: %d\n", iter->ent->type)) + return TRACE_TYPE_PARTIAL_LINE; + return TRACE_TYPE_HANDLED; } -- cgit v1.2.3-18-g5258