diff options
-rw-r--r-- | Documentation/trace/events.txt | 207 | ||||
-rw-r--r-- | Documentation/trace/uprobetracer.txt | 36 | ||||
-rw-r--r-- | include/linux/ftrace.h | 2 | ||||
-rw-r--r-- | include/linux/ftrace_event.h | 139 | ||||
-rw-r--r-- | include/trace/ftrace.h | 29 | ||||
-rw-r--r-- | kernel/events/uprobes.c | 4 | ||||
-rw-r--r-- | kernel/trace/Makefile | 1 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 215 | ||||
-rw-r--r-- | kernel/trace/trace.c | 57 | ||||
-rw-r--r-- | kernel/trace/trace.h | 193 | ||||
-rw-r--r-- | kernel/trace/trace_events.c | 49 | ||||
-rw-r--r-- | kernel/trace/trace_events_filter.c | 12 | ||||
-rw-r--r-- | kernel/trace/trace_events_trigger.c | 1437 | ||||
-rw-r--r-- | kernel/trace/trace_kprobe.c | 838 | ||||
-rw-r--r-- | kernel/trace/trace_probe.c | 440 | ||||
-rw-r--r-- | kernel/trace/trace_probe.h | 224 | ||||
-rw-r--r-- | kernel/trace/trace_stack.c | 2 | ||||
-rw-r--r-- | kernel/trace/trace_syscalls.c | 14 | ||||
-rw-r--r-- | kernel/trace/trace_uprobe.c | 487 |
19 files changed, 3463 insertions, 923 deletions
diff --git a/Documentation/trace/events.txt b/Documentation/trace/events.txt index 37732a220d3..c94435df203 100644 --- a/Documentation/trace/events.txt +++ b/Documentation/trace/events.txt @@ -287,3 +287,210 @@ their old filters): prev_pid == 0 # cat sched_wakeup/filter common_pid == 0 + +6. Event triggers +================= + +Trace events can be made to conditionally invoke trigger 'commands' +which can take various forms and are described in detail below; +examples would be enabling or disabling other trace events or invoking +a stack trace whenever the trace event is hit. Whenever a trace event +with attached triggers is invoked, the set of trigger commands +associated with that event is invoked. Any given trigger can +additionally have an event filter of the same form as described in +section 5 (Event filtering) associated with it - the command will only +be invoked if the event being invoked passes the associated filter. +If no filter is associated with the trigger, it always passes. + +Triggers are added to and removed from a particular event by writing +trigger expressions to the 'trigger' file for the given event. + +A given event can have any number of triggers associated with it, +subject to any restrictions that individual commands may have in that +regard. + +Event triggers are implemented on top of "soft" mode, which means that +whenever a trace event has one or more triggers associated with it, +the event is activated even if it isn't actually enabled, but is +disabled in a "soft" mode. That is, the tracepoint will be called, +but just will not be traced, unless of course it's actually enabled. +This scheme allows triggers to be invoked even for events that aren't +enabled, and also allows the current event filter implementation to be +used for conditionally invoking triggers. + +The syntax for event triggers is roughly based on the syntax for +set_ftrace_filter 'ftrace filter commands' (see the 'Filter commands' +section of Documentation/trace/ftrace.txt), but there are major +differences and the implementation isn't currently tied to it in any +way, so beware about making generalizations between the two. + +6.1 Expression syntax +--------------------- + +Triggers are added by echoing the command to the 'trigger' file: + + # echo 'command[:count] [if filter]' > trigger + +Triggers are removed by echoing the same command but starting with '!' +to the 'trigger' file: + + # echo '!command[:count] [if filter]' > trigger + +The [if filter] part isn't used in matching commands when removing, so +leaving that off in a '!' command will accomplish the same thing as +having it in. + +The filter syntax is the same as that described in the 'Event +filtering' section above. + +For ease of use, writing to the trigger file using '>' currently just +adds or removes a single trigger and there's no explicit '>>' support +('>' actually behaves like '>>') or truncation support to remove all +triggers (you have to use '!' for each one added.) + +6.2 Supported trigger commands +------------------------------ + +The following commands are supported: + +- enable_event/disable_event + + These commands can enable or disable another trace event whenever + the triggering event is hit. When these commands are registered, + the other trace event is activated, but disabled in a "soft" mode. + That is, the tracepoint will be called, but just will not be traced. + The event tracepoint stays in this mode as long as there's a trigger + in effect that can trigger it. + + For example, the following trigger causes kmalloc events to be + traced when a read system call is entered, and the :1 at the end + specifies that this enablement happens only once: + + # echo 'enable_event:kmem:kmalloc:1' > \ + /sys/kernel/debug/tracing/events/syscalls/sys_enter_read/trigger + + The following trigger causes kmalloc events to stop being traced + when a read system call exits. This disablement happens on every + read system call exit: + + # echo 'disable_event:kmem:kmalloc' > \ + /sys/kernel/debug/tracing/events/syscalls/sys_exit_read/trigger + + The format is: + + enable_event:<system>:<event>[:count] + disable_event:<system>:<event>[:count] + + To remove the above commands: + + # echo '!enable_event:kmem:kmalloc:1' > \ + /sys/kernel/debug/tracing/events/syscalls/sys_enter_read/trigger + + # echo '!disable_event:kmem:kmalloc' > \ + /sys/kernel/debug/tracing/events/syscalls/sys_exit_read/trigger + + Note that there can be any number of enable/disable_event triggers + per triggering event, but there can only be one trigger per + triggered event. e.g. sys_enter_read can have triggers enabling both + kmem:kmalloc and sched:sched_switch, but can't have two kmem:kmalloc + versions such as kmem:kmalloc and kmem:kmalloc:1 or 'kmem:kmalloc if + bytes_req == 256' and 'kmem:kmalloc if bytes_alloc == 256' (they + could be combined into a single filter on kmem:kmalloc though). + +- stacktrace + + This command dumps a stacktrace in the trace buffer whenever the + triggering event occurs. + + For example, the following trigger dumps a stacktrace every time the + kmalloc tracepoint is hit: + + # echo 'stacktrace' > \ + /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger + + The following trigger dumps a stacktrace the first 5 times a kmalloc + request happens with a size >= 64K + + # echo 'stacktrace:5 if bytes_req >= 65536' > \ + /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger + + The format is: + + stacktrace[:count] + + To remove the above commands: + + # echo '!stacktrace' > \ + /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger + + # echo '!stacktrace:5 if bytes_req >= 65536' > \ + /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger + + The latter can also be removed more simply by the following (without + the filter): + + # echo '!stacktrace:5' > \ + /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger + + Note that there can be only one stacktrace trigger per triggering + event. + +- snapshot + + This command causes a snapshot to be triggered whenever the + triggering event occurs. + + The following command creates a snapshot every time a block request + queue is unplugged with a depth > 1. If you were tracing a set of + events or functions at the time, the snapshot trace buffer would + capture those events when the trigger event occured: + + # echo 'snapshot if nr_rq > 1' > \ + /sys/kernel/debug/tracing/events/block/block_unplug/trigger + + To only snapshot once: + + # echo 'snapshot:1 if nr_rq > 1' > \ + /sys/kernel/debug/tracing/events/block/block_unplug/trigger + + To remove the above commands: + + # echo '!snapshot if nr_rq > 1' > \ + /sys/kernel/debug/tracing/events/block/block_unplug/trigger + + # echo '!snapshot:1 if nr_rq > 1' > \ + /sys/kernel/debug/tracing/events/block/block_unplug/trigger + + Note that there can be only one snapshot trigger per triggering + event. + +- traceon/traceoff + + These commands turn tracing on and off when the specified events are + hit. The parameter determines how many times the tracing system is + turned on and off. If unspecified, there is no limit. + + The following command turns tracing off the first time a block + request queue is unplugged with a depth > 1. If you were tracing a + set of events or functions at the time, you could then examine the + trace buffer to see the sequence of events that led up to the + trigger event: + + # echo 'traceoff:1 if nr_rq > 1' > \ + /sys/kernel/debug/tracing/events/block/block_unplug/trigger + + To always disable tracing when nr_rq > 1 : + + # echo 'traceoff if nr_rq > 1' > \ + /sys/kernel/debug/tracing/events/block/block_unplug/trigger + + To remove the above commands: + + # echo '!traceoff:1 if nr_rq > 1' > \ + /sys/kernel/debug/tracing/events/block/block_unplug/trigger + + # echo '!traceoff if nr_rq > 1' > \ + /sys/kernel/debug/tracing/events/block/block_unplug/trigger + + Note that there can be only one traceon or traceoff trigger per + triggering event. diff --git a/Documentation/trace/uprobetracer.txt b/Documentation/trace/uprobetracer.txt index d9c3e682312..f1cf9a34ad9 100644 --- a/Documentation/trace/uprobetracer.txt +++ b/Documentation/trace/uprobetracer.txt @@ -19,18 +19,44 @@ user to calculate the offset of the probepoint in the object. Synopsis of uprobe_tracer ------------------------- - p[:[GRP/]EVENT] PATH:SYMBOL[+offs] [FETCHARGS] : Set a uprobe - r[:[GRP/]EVENT] PATH:SYMBOL[+offs] [FETCHARGS] : Set a return uprobe (uretprobe) - -:[GRP/]EVENT : Clear uprobe or uretprobe event + p[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a uprobe + r[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a return uprobe (uretprobe) + -:[GRP/]EVENT : Clear uprobe or uretprobe event GRP : Group name. If omitted, "uprobes" is the default value. EVENT : Event name. If omitted, the event name is generated based - on SYMBOL+offs. + on PATH+OFFSET. PATH : Path to an executable or a library. - SYMBOL[+offs] : Symbol+offset where the probe is inserted. + OFFSET : Offset where the probe is inserted. FETCHARGS : Arguments. Each probe can have up to 128 args. %REG : Fetch register REG + @ADDR : Fetch memory at ADDR (ADDR should be in userspace) + @+OFFSET : Fetch memory at OFFSET (OFFSET from same file as PATH) + $stackN : Fetch Nth entry of stack (N >= 0) + $stack : Fetch stack address. + $retval : Fetch return value.(*) + +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**) + NAME=FETCHARG : Set NAME as the argument name of FETCHARG. + FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types + (u8/u16/u32/u64/s8/s16/s32/s64), "string" and bitfield + are supported. + + (*) only for return probe. + (**) this is useful for fetching a field of data structures. + +Types +----- +Several types are supported for fetch-args. Uprobe tracer will access memory +by given type. Prefix 's' and 'u' means those types are signed and unsigned +respectively. Traced arguments are shown in decimal (signed) or hex (unsigned). +String type is a special type, which fetches a "null-terminated" string from +user space. +Bitfield is another special type, which takes 3 parameters, bit-width, bit- +offset, and container-size (usually 32). The syntax is; + + b<bit-width>@<bit-offset>/<container-size> + Event Profiling --------------- diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 31ea4b42836..f4233b195da 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -570,8 +570,6 @@ static inline int ftrace_regex_release(struct inode *inode, struct file *file) { return -ENODEV; } #endif /* CONFIG_DYNAMIC_FTRACE */ -loff_t ftrace_filter_lseek(struct file *file, loff_t offset, int whence); - /* totally disable ftrace - can not re-enable after this */ void ftrace_kill(void); diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 8c9b7a1c413..4e4cc28623a 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -1,3 +1,4 @@ + #ifndef _LINUX_FTRACE_EVENT_H #define _LINUX_FTRACE_EVENT_H @@ -264,6 +265,8 @@ enum { FTRACE_EVENT_FL_NO_SET_FILTER_BIT, FTRACE_EVENT_FL_SOFT_MODE_BIT, FTRACE_EVENT_FL_SOFT_DISABLED_BIT, + FTRACE_EVENT_FL_TRIGGER_MODE_BIT, + FTRACE_EVENT_FL_TRIGGER_COND_BIT, }; /* @@ -275,6 +278,8 @@ enum { * SOFT_MODE - The event is enabled/disabled by SOFT_DISABLED * SOFT_DISABLED - When set, do not trace the event (even though its * tracepoint may be enabled) + * TRIGGER_MODE - When set, invoke the triggers associated with the event + * TRIGGER_COND - When set, one or more triggers has an associated filter */ enum { FTRACE_EVENT_FL_ENABLED = (1 << FTRACE_EVENT_FL_ENABLED_BIT), @@ -283,6 +288,8 @@ enum { FTRACE_EVENT_FL_NO_SET_FILTER = (1 << FTRACE_EVENT_FL_NO_SET_FILTER_BIT), FTRACE_EVENT_FL_SOFT_MODE = (1 << FTRACE_EVENT_FL_SOFT_MODE_BIT), FTRACE_EVENT_FL_SOFT_DISABLED = (1 << FTRACE_EVENT_FL_SOFT_DISABLED_BIT), + FTRACE_EVENT_FL_TRIGGER_MODE = (1 << FTRACE_EVENT_FL_TRIGGER_MODE_BIT), + FTRACE_EVENT_FL_TRIGGER_COND = (1 << FTRACE_EVENT_FL_TRIGGER_COND_BIT), }; struct ftrace_event_file { @@ -292,6 +299,7 @@ struct ftrace_event_file { struct dentry *dir; struct trace_array *tr; struct ftrace_subsystem_dir *system; + struct list_head triggers; /* * 32 bit flags: @@ -299,6 +307,7 @@ struct ftrace_event_file { * bit 1: enabled cmd record * bit 2: enable/disable with the soft disable bit * bit 3: soft disabled + * bit 4: trigger enabled * * Note: The bits must be set atomically to prevent races * from other writers. Reads of flags do not need to be in @@ -310,6 +319,7 @@ struct ftrace_event_file { */ unsigned long flags; atomic_t sm_ref; /* soft-mode reference counter */ + atomic_t tm_ref; /* trigger-mode reference counter */ }; #define __TRACE_EVENT_FLAGS(name, value) \ @@ -337,6 +347,14 @@ struct ftrace_event_file { #define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ +enum event_trigger_type { + ETT_NONE = (0), + ETT_TRACE_ONOFF = (1 << 0), + ETT_SNAPSHOT = (1 << 1), + ETT_STACKTRACE = (1 << 2), + ETT_EVENT_ENABLE = (1 << 3), +}; + extern void destroy_preds(struct ftrace_event_file *file); extern void destroy_call_preds(struct ftrace_event_call *call); extern int filter_match_preds(struct event_filter *filter, void *rec); @@ -347,6 +365,127 @@ extern int filter_check_discard(struct ftrace_event_file *file, void *rec, extern int call_filter_check_discard(struct ftrace_event_call *call, void *rec, struct ring_buffer *buffer, struct ring_buffer_event *event); +extern enum event_trigger_type event_triggers_call(struct ftrace_event_file *file, + void *rec); +extern void event_triggers_post_call(struct ftrace_event_file *file, + enum event_trigger_type tt); + +/** + * ftrace_trigger_soft_disabled - do triggers and test if soft disabled + * @file: The file pointer of the event to test + * + * If any triggers without filters are attached to this event, they + * will be called here. If the event is soft disabled and has no + * triggers that require testing the fields, it will return true, + * otherwise false. + */ +static inline bool +ftrace_trigger_soft_disabled(struct ftrace_event_file *file) +{ + unsigned long eflags = file->flags; + + if (!(eflags & FTRACE_EVENT_FL_TRIGGER_COND)) { + if (eflags & FTRACE_EVENT_FL_TRIGGER_MODE) + event_triggers_call(file, NULL); + if (eflags & FTRACE_EVENT_FL_SOFT_DISABLED) + return true; + } + return false; +} + +/* + * Helper function for event_trigger_unlock_commit{_regs}(). + * If there are event triggers attached to this event that requires + * filtering against its fields, then they wil be called as the + * entry already holds the field information of the current event. + * + * It also checks if the event should be discarded or not. + * It is to be discarded if the event is soft disabled and the + * event was only recorded to process triggers, or if the event + * filter is active and this event did not match the filters. + * + * Returns true if the event is discarded, false otherwise. + */ +static inline bool +__event_trigger_test_discard(struct ftrace_event_file *file, + struct ring_buffer *buffer, + struct ring_buffer_event *event, + void *entry, + enum event_trigger_type *tt) +{ + unsigned long eflags = file->flags; + + if (eflags & FTRACE_EVENT_FL_TRIGGER_COND) + *tt = event_triggers_call(file, entry); + + if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags)) + ring_buffer_discard_commit(buffer, event); + else if (!filter_check_discard(file, entry, buffer, event)) + return false; + + return true; +} + +/** + * event_trigger_unlock_commit - handle triggers and finish event commit + * @file: The file pointer assoctiated to the event + * @buffer: The ring buffer that the event is being written to + * @event: The event meta data in the ring buffer + * @entry: The event itself + * @irq_flags: The state of the interrupts at the start of the event + * @pc: The state of the preempt count at the start of the event. + * + * This is a helper function to handle triggers that require data + * from the event itself. It also tests the event against filters and + * if the event is soft disabled and should be discarded. + */ +static inline void +event_trigger_unlock_commit(struct ftrace_event_file *file, + struct ring_buffer *buffer, + struct ring_buffer_event *event, + void *entry, unsigned long irq_flags, int pc) +{ + enum event_trigger_type tt = ETT_NONE; + + if (!__event_trigger_test_discard(file, buffer, event, entry, &tt)) + trace_buffer_unlock_commit(buffer, event, irq_flags, pc); + + if (tt) + event_triggers_post_call(file, tt); +} + +/** + * event_trigger_unlock_commit_regs - handle triggers and finish event commit + * @file: The file pointer assoctiated to the event + * @buffer: The ring buffer that the event is being written to + * @event: The event meta data in the ring buffer + * @entry: The event itself + * @irq_flags: The state of the interrupts at the start of the event + * @pc: The state of the preempt count at the start of the event. + * + * This is a helper function to handle triggers that require data + * from the event itself. It also tests the event against filters and + * if the event is soft disabled and should be discarded. + * + * Same as event_trigger_unlock_commit() but calls + * trace_buffer_unlock_commit_regs() instead of trace_buffer_unlock_commit(). + */ +static inline void +event_trigger_unlock_commit_regs(struct ftrace_event_file *file, + struct ring_buffer *buffer, + struct ring_buffer_event *event, + void *entry, unsigned long irq_flags, int pc, + struct pt_regs *regs) +{ + enum event_trigger_type tt = ETT_NONE; + + if (!__event_trigger_test_discard(file, buffer, event, entry, &tt)) + trace_buffer_unlock_commit_regs(buffer, event, + irq_flags, pc, regs); + + if (tt) + event_triggers_post_call(file, tt); +} enum { FILTER_OTHER = 0, diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 5c38606613d..1a8b28db377 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -418,6 +418,8 @@ static inline notrace int ftrace_get_offsets_##call( \ * struct ftrace_event_file *ftrace_file = __data; * struct ftrace_event_call *event_call = ftrace_file->event_call; * struct ftrace_data_offsets_<call> __maybe_unused __data_offsets; + * unsigned long eflags = ftrace_file->flags; + * enum event_trigger_type __tt = ETT_NONE; * struct ring_buffer_event *event; * struct ftrace_raw_<call> *entry; <-- defined in stage 1 * struct ring_buffer *buffer; @@ -425,9 +427,12 @@ static inline notrace int ftrace_get_offsets_##call( \ * int __data_size; * int pc; * - * if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, - * &ftrace_file->flags)) - * return; + * if (!(eflags & FTRACE_EVENT_FL_TRIGGER_COND)) { + * if (eflags & FTRACE_EVENT_FL_TRIGGER_MODE) + * event_triggers_call(ftrace_file, NULL); + * if (eflags & FTRACE_EVENT_FL_SOFT_DISABLED) + * return; + * } * * local_save_flags(irq_flags); * pc = preempt_count(); @@ -445,8 +450,17 @@ static inline notrace int ftrace_get_offsets_##call( \ * { <assign>; } <-- Here we assign the entries by the __field and * __array macros. * - * if (!filter_check_discard(ftrace_file, entry, buffer, event)) + * if (eflags & FTRACE_EVENT_FL_TRIGGER_COND) + * __tt = event_triggers_call(ftrace_file, entry); + * + * if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, + * &ftrace_file->flags)) + * ring_buffer_discard_commit(buffer, event); + * else if (!filter_check_discard(ftrace_file, entry, buffer, event)) * trace_buffer_unlock_commit(buffer, event, irq_flags, pc); + * + * if (__tt) + * event_triggers_post_call(ftrace_file, __tt); * } * * static struct trace_event ftrace_event_type_<call> = { @@ -539,8 +553,7 @@ ftrace_raw_event_##call(void *__data, proto) \ int __data_size; \ int pc; \ \ - if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, \ - &ftrace_file->flags)) \ + if (ftrace_trigger_soft_disabled(ftrace_file)) \ return; \ \ local_save_flags(irq_flags); \ @@ -560,8 +573,8 @@ ftrace_raw_event_##call(void *__data, proto) \ \ { assign; } \ \ - if (!filter_check_discard(ftrace_file, entry, buffer, event)) \ - trace_buffer_unlock_commit(buffer, event, irq_flags, pc); \ + event_trigger_unlock_commit(ftrace_file, buffer, event, entry, \ + irq_flags, pc); \ } /* * The ftrace_test_probe is compiled out, it is only here as a build time check diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index b886a5e7d4f..307d87c0991 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1854,6 +1854,10 @@ static void handle_swbp(struct pt_regs *regs) if (unlikely(!test_bit(UPROBE_COPY_INSN, &uprobe->flags))) goto out; + /* Tracing handlers use ->utask to communicate with fetch methods */ + if (!get_utask()) + goto out; + handler_chain(uprobe, regs); if (can_skip_sstep(uprobe, regs)) goto out; diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index d7e2068e4b7..1378e84fbe3 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -50,6 +50,7 @@ ifeq ($(CONFIG_PERF_EVENTS),y) obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o endif obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o +obj-$(CONFIG_EVENT_TRACING) += trace_events_trigger.o obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o obj-$(CONFIG_TRACEPOINTS) += power-traces.o ifeq ($(CONFIG_PM_RUNTIME),y) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 72a0f81dc5a..cd7f76d1eb8 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -85,6 +85,8 @@ int function_trace_stop __read_mostly; /* Current function tracing op */ struct ftrace_ops *function_trace_op __read_mostly = &ftrace_list_end; +/* What to set function_trace_op to */ +static struct ftrace_ops *set_function_trace_op; /* List for set_ftrace_pid's pids. */ LIST_HEAD(ftrace_pids); @@ -278,6 +280,29 @@ static void update_global_ops(void) global_ops.func = func; } +static void ftrace_sync(struct work_struct *work) +{ + /* + * This function is just a stub to implement a hard force + * of synchronize_sched(). This requires synchronizing + * tasks even in userspace and idle. + * + * Yes, function tracing is rude. + */ +} + +static void ftrace_sync_ipi(void *data) +{ + /* Probably not needed, but do it anyway */ + smp_rmb(); +} + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +static void update_function_graph_func(void); +#else +static inline void update_function_graph_func(void) { } +#endif + static void update_ftrace_function(void) { ftrace_func_t func; @@ -296,16 +321,61 @@ static void update_ftrace_function(void) !FTRACE_FORCE_LIST_FUNC)) { /* Set the ftrace_ops that the arch callback uses */ if (ftrace_ops_list == &global_ops) - function_trace_op = ftrace_global_list; + set_function_trace_op = ftrace_global_list; else - function_trace_op = ftrace_ops_list; + set_function_trace_op = ftrace_ops_list; func = ftrace_ops_list->func; } else { /* Just use the default ftrace_ops */ - function_trace_op = &ftrace_list_end; + set_function_trace_op = &ftrace_list_end; func = ftrace_ops_list_func; } + /* If there's no change, then do nothing more here */ + if (ftrace_trace_function == func) + return; + + update_function_graph_func(); + + /* + * If we are using the list function, it doesn't care + * about the function_trace_ops. + */ + if (func == ftrace_ops_list_func) { + ftrace_trace_function = func; + /* + * Don't even bother setting function_trace_ops, + * it would be racy to do so anyway. + */ + return; + } + +#ifndef CONFIG_DYNAMIC_FTRACE + /* + * For static tracing, we need to be a bit more careful. + * The function change takes affect immediately. Thus, + * we need to coorditate the setting of the function_trace_ops + * with the setting of the ftrace_trace_function. + * + * Set the function to the list ops, which will call the + * function we want, albeit indirectly, but it handles the + * ftrace_ops and doesn't depend on function_trace_op. + */ + ftrace_trace_function = ftrace_ops_list_func; + /* + * Make sure all CPUs see this. Yes this is slow, but static + * tracing is slow and nasty to have enabled. + */ + schedule_on_each_cpu(ftrace_sync); + /* Now all cpus are using the list ops. */ + function_trace_op = set_function_trace_op; + /* Make sure the function_trace_op is visible on all CPUs */ + smp_wmb(); + /* Nasty way to force a rmb on all cpus */ + smp_call_function(ftrace_sync_ipi, NULL, 1); + /* OK, we are all set to update the ftrace_trace_function now! */ +#endif /* !CONFIG_DYNAMIC_FTRACE */ + ftrace_trace_function = func; } @@ -410,17 +480,6 @@ static int __register_ftrace_function(struct ftrace_ops *ops) return 0; } -static void ftrace_sync(struct work_struct *work) -{ - /* - * This function is just a stub to implement a hard force - * of synchronize_sched(). This requires synchronizing - * tasks even in userspace and idle. - * - * Yes, function tracing is rude. - */ -} - static int __unregister_ftrace_function(struct ftrace_ops *ops) { int ret; @@ -439,20 +498,6 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops) } else if (ops->flags & FTRACE_OPS_FL_CONTROL) { ret = remove_ftrace_list_ops(&ftrace_control_list, &control_ops, ops); - if (!ret) { - /* - * The ftrace_ops is now removed from the list, - * so there'll be no new users. We must ensure - * all current users are done before we free - * the control data. - * Note synchronize_sched() is not enough, as we - * use preempt_disable() to do RCU, but the function - * tracer can be called where RCU is not active - * (before user_exit()). - */ - schedule_on_each_cpu(ftrace_sync); - control_ops_free(ops); - } } else ret = remove_ftrace_ops(&ftrace_ops_list, ops); @@ -462,17 +507,6 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops) if (ftrace_enabled) update_ftrace_function(); - /* - * Dynamic ops may be freed, we must make sure that all - * callers are done before leaving this function. - * - * Again, normal synchronize_sched() is not good enough. - * We need to do a hard force of sched synchronization. - */ - if (ops->flags & FTRACE_OPS_FL_DYNAMIC) - schedule_on_each_cpu(ftrace_sync); - - return 0; } @@ -1082,19 +1116,6 @@ static __init void ftrace_profile_debugfs(struct dentry *d_tracer) static struct pid * const ftrace_swapper_pid = &init_struct_pid; -loff_t -ftrace_filter_lseek(struct file *file, loff_t offset, int whence) -{ - loff_t ret; - - if (file->f_mode & FMODE_READ) - ret = seq_lseek(file, offset, whence); - else - file->f_pos = ret = 1; - - return ret; -} - #ifdef CONFIG_DYNAMIC_FTRACE #ifndef CONFIG_FTRACE_MCOUNT_RECORD @@ -1992,8 +2013,14 @@ void ftrace_modify_all_code(int command) else if (command & FTRACE_DISABLE_CALLS) ftrace_replace_code(0); - if (update && ftrace_trace_function != ftrace_ops_list_func) + if (update && ftrace_trace_function != ftrace_ops_list_func) { + function_trace_op = set_function_trace_op; + smp_wmb(); + /* If irqs are disabled, we are in stop machine */ + if (!irqs_disabled()) + smp_call_function(ftrace_sync_ipi, NULL, 1); ftrace_update_ftrace_func(ftrace_trace_function); + } if (command & FTRACE_START_FUNC_RET) ftrace_enable_ftrace_graph_caller(); @@ -2156,10 +2183,41 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command) command |= FTRACE_UPDATE_TRACE_FUNC; } - if (!command || !ftrace_enabled) + if (!command || !ftrace_enabled) { + /* + * If these are control ops, they still need their + * per_cpu field freed. Since, function tracing is + * not currently active, we can just free them + * without synchronizing all CPUs. + */ + if (ops->flags & FTRACE_OPS_FL_CONTROL) + control_ops_free(ops); return 0; + } ftrace_run_update_code(command); + + /* + * Dynamic ops may be freed, we must make sure that all + * callers are done before leaving this function. + * The same goes for freeing the per_cpu data of the control + * ops. + * + * Again, normal synchronize_sched() is not good enough. + * We need to do a hard force of sched synchronization. + * This is because we use preempt_disable() to do RCU, but + * the function tracers can be called where RCU is not watching + * (like before user_exit()). We can not rely on the RCU + * infrastructure to do the synchronization, thus we must do it + * ourselves. + */ + if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_CONTROL)) { + schedule_on_each_cpu(ftrace_sync); + + if (ops->flags & FTRACE_OPS_FL_CONTROL) + control_ops_free(ops); + } + return 0; } @@ -2739,7 +2797,7 @@ static void ftrace_filter_reset(struct ftrace_hash *hash) * routine, you can use ftrace_filter_write() for the write * routine if @flag has FTRACE_ITER_FILTER set, or * ftrace_notrace_write() if @flag has FTRACE_ITER_NOTRACE set. - * ftrace_filter_lseek() should be used as the lseek routine, and + * tracing_lseek() should be used as the lseek routine, and * release must call ftrace_regex_release(). */ int @@ -3767,7 +3825,7 @@ static const struct file_operations ftrace_filter_fops = { .open = ftrace_filter_open, .read = seq_read, .write = ftrace_filter_write, - .llseek = ftrace_filter_lseek, + .llseek = tracing_lseek, .release = ftrace_regex_release, }; @@ -3775,7 +3833,7 @@ static const struct file_operations ftrace_notrace_fops = { .open = ftrace_notrace_open, .read = seq_read, .write = ftrace_notrace_write, - .llseek = ftrace_filter_lseek, + .llseek = tracing_lseek, .release = ftrace_regex_release, }; @@ -4038,7 +4096,7 @@ static const struct file_operations ftrace_graph_fops = { .open = ftrace_graph_open, .read = seq_read, .write = ftrace_graph_write, - .llseek = ftrace_filter_lseek, + .llseek = tracing_lseek, .release = ftrace_graph_release, }; @@ -4046,7 +4104,7 @@ static const struct file_operations ftrace_graph_notrace_fops = { .open = ftrace_graph_notrace_open, .read = seq_read, .write = ftrace_graph_write, - .llseek = ftrace_filter_lseek, + .llseek = tracing_lseek, .release = ftrace_graph_release, }; #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ @@ -4719,7 +4777,7 @@ static const struct file_operations ftrace_pid_fops = { .open = ftrace_pid_open, .write = ftrace_pid_write, .read = seq_read, - .llseek = ftrace_filter_lseek, + .llseek = tracing_lseek, .release = ftrace_pid_release, }; @@ -4862,6 +4920,7 @@ int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace) trace_func_graph_ret_t ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; trace_func_graph_ent_t ftrace_graph_entry = ftrace_graph_entry_stub; +static trace_func_graph_ent_t __ftrace_graph_entry = ftrace_graph_entry_stub; /* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) @@ -5003,6 +5062,30 @@ static struct ftrace_ops fgraph_ops __read_mostly = { FTRACE_OPS_FL_RECURSION_SAFE, }; +static int |