From a949ae560a511fe4e3adf48fa44fefded93e5c2b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 24 Apr 2014 10:40:12 -0400 Subject: ftrace/module: Hardcode ftrace_module_init() call into load_module() A race exists between module loading and enabling of function tracer. CPU 1 CPU 2 ----- ----- load_module() module->state = MODULE_STATE_COMING register_ftrace_function() mutex_lock(&ftrace_lock); ftrace_startup() update_ftrace_function(); ftrace_arch_code_modify_prepare() set_all_module_text_rw(); ftrace_arch_code_modify_post_process() set_all_module_text_ro(); [ here all module text is set to RO, including the module that is loading!! ] blocking_notifier_call_chain(MODULE_STATE_COMING); ftrace_init_module() [ tries to modify code, but it's RO, and fails! ftrace_bug() is called] When this race happens, ftrace_bug() will produces a nasty warning and all of the function tracing features will be disabled until reboot. The simple solution is to treate module load the same way the core kernel is treated at boot. To hardcode the ftrace function modification of converting calls to mcount into nops. This is done in init/main.c there's no reason it could not be done in load_module(). This gives a better control of the changes and doesn't tie the state of the module to its notifiers as much. Ftrace is special, it needs to be treated as such. The reason this would work, is that the ftrace_module_init() would be called while the module is in MODULE_STATE_UNFORMED, which is ignored by the set_all_module_text_ro() call. Link: http://lkml.kernel.org/r/1395637826-3312-1-git-send-email-indou.takao@jp.fujitsu.com Reported-by: Takao Indoh Acked-by: Rusty Russell Cc: stable@vger.kernel.org # 2.6.38+ Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 27 ++++----------------------- 1 file changed, 4 insertions(+), 23 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 1fd4b947921..4a54a25afa2 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -4330,16 +4330,11 @@ static void ftrace_init_module(struct module *mod, ftrace_process_locs(mod, start, end); } -static int ftrace_module_notify_enter(struct notifier_block *self, - unsigned long val, void *data) +void ftrace_module_init(struct module *mod) { - struct module *mod = data; - - if (val == MODULE_STATE_COMING) - ftrace_init_module(mod, mod->ftrace_callsites, - mod->ftrace_callsites + - mod->num_ftrace_callsites); - return 0; + ftrace_init_module(mod, mod->ftrace_callsites, + mod->ftrace_callsites + + mod->num_ftrace_callsites); } static int ftrace_module_notify_exit(struct notifier_block *self, @@ -4353,11 +4348,6 @@ static int ftrace_module_notify_exit(struct notifier_block *self, return 0; } #else -static int ftrace_module_notify_enter(struct notifier_block *self, - unsigned long val, void *data) -{ - return 0; -} static int ftrace_module_notify_exit(struct notifier_block *self, unsigned long val, void *data) { @@ -4365,11 +4355,6 @@ static int ftrace_module_notify_exit(struct notifier_block *self, } #endif /* CONFIG_MODULES */ -struct notifier_block ftrace_module_enter_nb = { - .notifier_call = ftrace_module_notify_enter, - .priority = INT_MAX, /* Run before anything that can use kprobes */ -}; - struct notifier_block ftrace_module_exit_nb = { .notifier_call = ftrace_module_notify_exit, .priority = INT_MIN, /* Run after anything that can remove kprobes */ @@ -4403,10 +4388,6 @@ void __init ftrace_init(void) __start_mcount_loc, __stop_mcount_loc); - ret = register_module_notifier(&ftrace_module_enter_nb); - if (ret) - pr_warning("Failed to register trace ftrace module enter notifier\n"); - ret = register_module_notifier(&ftrace_module_exit_nb); if (ret) pr_warning("Failed to register trace ftrace module exit notifier\n"); -- cgit v1.2.3-70-g09d2 From 561a4fe851ccab9dd0d14989ab566f9392d9f8b5 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 2 May 2014 13:30:04 -0400 Subject: tracing: Use rcu_dereference_sched() for trace event triggers As trace event triggers are now part of the mainline kernel, I added my trace event trigger tests to my test suite I run on all my kernels. Now these tests get run under different config options, and one of those options is CONFIG_PROVE_RCU, which checks under lockdep that the rcu locking primitives are being used correctly. This triggered the following splat: =============================== [ INFO: suspicious RCU usage. ] 3.15.0-rc2-test+ #11 Not tainted ------------------------------- kernel/trace/trace_events_trigger.c:80 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 1, debug_locks = 0 4 locks held by swapper/1/0: #0: ((&(&j_cdbs->work)->timer)){..-...}, at: [] call_timer_fn+0x5/0x1be #1: (&(&pool->lock)->rlock){-.-...}, at: [] __queue_work+0x140/0x283 #2: (&p->pi_lock){-.-.-.}, at: [] try_to_wake_up+0x2e/0x1e8 #3: (&rq->lock){-.-.-.}, at: [] try_to_wake_up+0x1a0/0x1e8 stack backtrace: CPU: 1 PID: 0 Comm: swapper/1 Not tainted 3.15.0-rc2-test+ #11 Hardware name: /DG965MQ, BIOS MQ96510J.86A.0372.2006.0605.1717 06/05/2006 0000000000000001 ffff88007e083b98 ffffffff819f53a5 0000000000000006 ffff88007b0942c0 ffff88007e083bc8 ffffffff81081307 ffff88007ad96d20 0000000000000000 ffff88007af2d840 ffff88007b2e701c ffff88007e083c18 Call Trace: [] dump_stack+0x4f/0x7c [] lockdep_rcu_suspicious+0x107/0x110 [] event_triggers_call+0x99/0x108 [] ftrace_event_buffer_commit+0x42/0xa4 [] ftrace_raw_event_sched_wakeup_template+0x71/0x7c [] ttwu_do_wakeup+0x7f/0xff [] ttwu_do_activate.constprop.126+0x5c/0x61 [] try_to_wake_up+0x1ac/0x1e8 [] wake_up_process+0x36/0x3b [] wake_up_worker+0x24/0x26 [] insert_work+0x5c/0x65 [] __queue_work+0x26c/0x283 [] ? __queue_work+0x283/0x283 [] delayed_work_timer_fn+0x1e/0x20 [] call_timer_fn+0xdf/0x1be^M [] ? call_timer_fn+0x5/0x1be [] ? __queue_work+0x283/0x283 [] run_timer_softirq+0x1a4/0x22f^M [] __do_softirq+0x17b/0x31b^M [] irq_exit+0x42/0x97 [] smp_apic_timer_interrupt+0x37/0x44 [] apic_timer_interrupt+0x6f/0x80 [] ? default_idle+0x21/0x32 [] ? default_idle+0x1f/0x32 [] arch_cpu_idle+0xf/0x11 [] cpu_startup_entry+0x1a3/0x213 [] start_secondary+0x212/0x219 The cause is that the triggers are protected by rcu_read_lock_sched() but the data is dereferenced with rcu_dereference() which expects it to be protected with rcu_read_lock(). The proper reference should be rcu_dereference_sched(). Cc: Tom Zanussi Cc: stable@vger.kernel.org # 3.14+ Signed-off-by: Steven Rostedt --- kernel/trace/trace_events_trigger.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 925f537f07d..4747b476a03 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -77,7 +77,7 @@ event_triggers_call(struct ftrace_event_file *file, void *rec) data->ops->func(data); continue; } - filter = rcu_dereference(data->filter); + filter = rcu_dereference_sched(data->filter); if (filter && !filter_match_preds(filter, rec)) continue; if (data->cmd_ops->post_trigger) { -- cgit v1.2.3-70-g09d2