From 082605de5f82eb692cc90f7fda071cc01bb5ac34 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 19 Jan 2009 14:32:51 -0500 Subject: ring-buffer: fix alignment problem Impact: fix to allow some archs to use the ring buffer Commits in the ring buffer are checked by pointer arithmetic. If the calculation is incorrect, then the commits will never take place and the buffer will simply fill up and report an error. Each page in the ring buffer has a small header: struct buffer_data_page { u64 time_stamp; local_t commit; unsigned char data[]; }; Unfortuntely, some of the calculations used sizeof(struct buffer_data_page) to know the size of the header. But this is incorrect on some archs, where sizeof(struct buffer_data_page) does not equal offsetof(struct buffer_data_page, data), and on those archs, the commits are never processed. This patch replaces the sizeof with offsetof. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/ring_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 8b0daf0662e..1d6526361d0 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -246,7 +246,7 @@ static inline int test_time_stamp(u64 delta) return 0; } -#define BUF_PAGE_SIZE (PAGE_SIZE - sizeof(struct buffer_data_page)) +#define BUF_PAGE_SIZE (PAGE_SIZE - offsetof(struct buffer_data_page, data)) /* * head_page == tail_page && head == tail then buffer is empty. -- cgit v1.2.3-18-g5258 From 00f57f545afa422db3003b0d0b30a30f8de7ecb2 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 14 Jan 2009 13:33:27 -0800 Subject: tracing/function-graph-tracer: fix a regression while suspend to disk Impact: fix a crash while kernel image restore When the function graph tracer is running and while suspend to disk, some racy and dangerous things happen against this tracer. The current task will save its registers including the stack pointer which contains the return address hooked by the tracer. But the current task will continue to enter other functions after that to save the memory, and then it will store other return addresses, and finally loose the old depth which matches the return address saved in the old stack (during the registers saving). So on image restore, the code will return to wrong addresses. And there are other things: on restore, the task will have it's "current" pointer overwritten during registers restoring....switching from one task to another... That would be insane to try to trace function graphs at these stages. This patch makes the function graph tracer listening on power events, making it's tracing disabled for the current task (the one that performs the hibernation work) while suspend/resume to disk, making the tracing safe during hibernation. Signed-off-by: Frederic Weisbecker Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/ftrace.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 2f32969c09d..7dcf6e9f2b0 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -1965,6 +1966,7 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, #ifdef CONFIG_FUNCTION_GRAPH_TRACER static atomic_t ftrace_graph_active; +static struct notifier_block ftrace_suspend_notifier; int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace) { @@ -2043,6 +2045,27 @@ static int start_graph_tracing(void) return ret; } +/* + * Hibernation protection. + * The state of the current task is too much unstable during + * suspend/restore to disk. We want to protect against that. + */ +static int +ftrace_suspend_notifier_call(struct notifier_block *bl, unsigned long state, + void *unused) +{ + switch (state) { + case PM_HIBERNATION_PREPARE: + pause_graph_tracing(); + break; + + case PM_POST_HIBERNATION: + unpause_graph_tracing(); + break; + } + return NOTIFY_DONE; +} + int register_ftrace_graph(trace_func_graph_ret_t retfunc, trace_func_graph_ent_t entryfunc) { @@ -2050,6 +2073,9 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc, mutex_lock(&ftrace_sysctl_lock); + ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call; + register_pm_notifier(&ftrace_suspend_notifier); + atomic_inc(&ftrace_graph_active); ret = start_graph_tracing(); if (ret) { @@ -2075,6 +2101,7 @@ void unregister_ftrace_graph(void) ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; ftrace_graph_entry = ftrace_graph_entry_stub; ftrace_shutdown(FTRACE_STOP_FUNC_RET); + unregister_pm_notifier(&ftrace_suspend_notifier); mutex_unlock(&ftrace_sysctl_lock); } -- cgit v1.2.3-18-g5258 From 551b4048b3d4acf15aff9fe4aed89b892c135b02 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Mon, 12 Jan 2009 11:06:18 +0800 Subject: ring_buffer: reset write when reserve buffer fail Impact: reset struct buffer_page.write when interrupt storm if struct buffer_page.write is not reset, any succedent committing will corrupted ring_buffer: static inline void rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) { ...... cpu_buffer->commit_page->commit = cpu_buffer->commit_page->write; ...... } when "if (RB_WARN_ON(cpu_buffer, next_page == reader_page))", ring_buffer is disabled, but some reserved buffers may haven't been committed. we need reset struct buffer_page.write. when "if (unlikely(next_page == cpu_buffer->commit_page))", ring_buffer is still available, we should not corrupt it. Signed-off-by: Lai Jiangshan Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/ring_buffer.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 1d6526361d0..9c1e73da4e3 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1025,12 +1025,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, } if (next_page == head_page) { - if (!(buffer->flags & RB_FL_OVERWRITE)) { - /* reset write */ - if (tail <= BUF_PAGE_SIZE) - local_set(&tail_page->write, tail); + if (!(buffer->flags & RB_FL_OVERWRITE)) goto out_unlock; - } /* tail_page has not moved yet? */ if (tail_page == cpu_buffer->tail_page) { @@ -1105,6 +1101,10 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, return event; out_unlock: + /* reset write */ + if (tail <= BUF_PAGE_SIZE) + local_set(&tail_page->write, tail); + __raw_spin_unlock(&cpu_buffer->lock); local_irq_restore(flags); return NULL; -- cgit v1.2.3-18-g5258 From faf6861ebd776871e77b761c43ec045cd20b5716 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 14 Jan 2009 12:24:42 -0500 Subject: trace: print ftrace_dump at KERN_EMERG log level Impact: fix to print out ftrace_dump when expected I was debugging a hard race condition to only find out that after I hit the race, my log level was not at level to show KERN_INFO. The time it took to trigger the race was wasted because I did not capture the trace. Since ftrace_dump is only called from kernel oops (and only when it is set in the kernel command line to do so), or when a developer adds it to their own local tree, the log level of the print should be at KERN_EMERG to make sure the print appears. ftrace_dump is not called by a normal user setup, and will not add extra unwanted print out to the console. There is no reason it should be at KERN_INFO. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c580233add9..1a1c5a6ab24 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3736,7 +3736,7 @@ static struct notifier_block trace_die_notifier = { * it if we decide to change what log level the ftrace dump * should be at. */ -#define KERN_TRACE KERN_INFO +#define KERN_TRACE KERN_EMERG static void trace_printk_seq(struct trace_seq *s) -- cgit v1.2.3-18-g5258 From a442e5e0a2011af5b2d1f118fee0a8f9079f1d88 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 14 Jan 2009 14:50:19 -0500 Subject: trace: stop all recording to ring buffer on ftrace_dump Impact: limit ftrace dump output Currently ftrace_dump only calls ftrace_kill that is a fast way to prevent the function tracer functions from being called (just sets a flag and clears the function to call, nothing else). It is better to also turn off any recording to the ring buffers as well. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 1a1c5a6ab24..4d89e84f0f4 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3770,6 +3770,7 @@ void ftrace_dump(void) dump_ran = 1; /* No turning back! */ + tracing_off(); ftrace_kill(); for_each_tracing_cpu(cpu) { -- cgit v1.2.3-18-g5258 From 1092307d582a7566d23779c304cf86f3075ac5f0 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 15 Jan 2009 23:40:11 -0500 Subject: trace: set max latency variable to zero on default Impact: trace max latencies on start of latency tracing This patch sets the max latency to zero whenever one of the irq variant tracers or the wakeup tracer is set to current tracer. Most developers expect to see output when starting up a latency tracer. But since the max_latency is already set to max, and it takes a latency greater than max_latency to be recorded, there is no trace. This is not the expected behavior and has even confused myself. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 2 +- kernel/trace/trace_irqsoff.c | 1 + kernel/trace/trace_sched_wakeup.c | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4d89e84f0f4..17bb88d86ac 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -40,7 +40,7 @@ #define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE) -unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; +unsigned long __read_mostly tracing_max_latency; unsigned long __read_mostly tracing_thresh; /* diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 7c2e326bbc8..62a78d94353 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -380,6 +380,7 @@ static void stop_irqsoff_tracer(struct trace_array *tr) static void __irqsoff_tracer_init(struct trace_array *tr) { + tracing_max_latency = 0; irqsoff_trace = tr; /* make sure that the tracer is visible */ smp_wmb(); diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 43586b689e3..42ae1e77b6b 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -333,6 +333,7 @@ static void stop_wakeup_tracer(struct trace_array *tr) static int wakeup_tracer_init(struct trace_array *tr) { + tracing_max_latency = 0; wakeup_trace = tr; start_wakeup_tracer(tr); return 0; -- cgit v1.2.3-18-g5258 From 91a8d07d82cac3aae3ef2ea1aaba5c9c4a934e91 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 21 Jan 2009 18:45:57 -0500 Subject: ring-buffer: reset timestamps when ring buffer is reset Impact: fix bad times of recent resets The ring buffer needs to reset its timestamps when reseting of the buffer, otherwise the timestamps are stale and might be used to calculate times in the buffer causing funny timestamps to appear. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/ring_buffer.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 9c1e73da4e3..bd38c5cfd8a 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2174,6 +2174,9 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) cpu_buffer->overrun = 0; cpu_buffer->entries = 0; + + cpu_buffer->write_stamp = 0; + cpu_buffer->read_stamp = 0; } /** -- cgit v1.2.3-18-g5258 From ba2607fe9c1f2d4ad5a3d4c4ae9117c5bfdca826 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Mon, 19 Jan 2009 10:38:35 +0100 Subject: x86, ds, bts: cleanup/fix DS configuration Cleanup the cpuid check for DS configuration. This also fixes a Corei7 CPUID enumeration bug. Signed-off-by: Markus Metzger Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/kernel/ds.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index da91701a234..169a120587b 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -15,8 +15,8 @@ * - buffer allocation (memory accounting) * * - * Copyright (C) 2007-2008 Intel Corporation. - * Markus Metzger , 2007-2008 + * Copyright (C) 2007-2009 Intel Corporation. + * Markus Metzger , 2007-2009 */ @@ -890,7 +890,7 @@ int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value) } static const struct ds_configuration ds_cfg_netburst = { - .name = "netburst", + .name = "Netburst", .ctl[dsf_bts] = (1 << 2) | (1 << 3), .ctl[dsf_bts_kernel] = (1 << 5), .ctl[dsf_bts_user] = (1 << 6), @@ -904,7 +904,7 @@ static const struct ds_configuration ds_cfg_netburst = { #endif }; static const struct ds_configuration ds_cfg_pentium_m = { - .name = "pentium m", + .name = "Pentium M", .ctl[dsf_bts] = (1 << 6) | (1 << 7), .sizeof_field = sizeof(long), @@ -915,8 +915,8 @@ static const struct ds_configuration ds_cfg_pentium_m = { .sizeof_rec[ds_pebs] = sizeof(long) * 18, #endif }; -static const struct ds_configuration ds_cfg_core2 = { - .name = "core 2", +static const struct ds_configuration ds_cfg_core2_atom = { + .name = "Core 2/Atom", .ctl[dsf_bts] = (1 << 6) | (1 << 7), .ctl[dsf_bts_kernel] = (1 << 9), .ctl[dsf_bts_user] = (1 << 10), @@ -949,19 +949,22 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) switch (c->x86) { case 0x6: switch (c->x86_model) { - case 0 ... 0xC: - /* sorry, don't know about them */ - break; - case 0xD: - case 0xE: /* Pentium M */ + case 0x9: + case 0xd: /* Pentium M */ ds_configure(&ds_cfg_pentium_m); break; - default: /* Core2, Atom, ... */ - ds_configure(&ds_cfg_core2); + case 0xf: + case 0x17: /* Core2 */ + case 0x1c: /* Atom */ + ds_configure(&ds_cfg_core2_atom); + break; + case 0x1a: /* i7 */ + default: + /* sorry, don't know about them */ break; } break; - case 0xF: + case 0xf: switch (c->x86_model) { case 0x0: case 0x1: -- cgit v1.2.3-18-g5258