aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/trace/events.txt207
-rw-r--r--Documentation/trace/uprobetracer.txt36
-rw-r--r--include/linux/ftrace.h2
-rw-r--r--include/linux/ftrace_event.h139
-rw-r--r--include/trace/ftrace.h29
-rw-r--r--kernel/events/uprobes.c4
-rw-r--r--kernel/trace/Makefile1
-rw-r--r--kernel/trace/ftrace.c215
-rw-r--r--kernel/trace/trace.c57
-rw-r--r--kernel/trace/trace.h193
-rw-r--r--kernel/trace/trace_events.c49
-rw-r--r--kernel/trace/trace_events_filter.c12
-rw-r--r--kernel/trace/trace_events_trigger.c1437
-rw-r--r--kernel/trace/trace_kprobe.c838
-rw-r--r--kernel/trace/trace_probe.c440
-rw-r--r--kernel/trace/trace_probe.h224
-rw-r--r--kernel/trace/trace_stack.c2
-rw-r--r--kernel/trace/trace_syscalls.c14
-rw-r--r--kernel/trace/trace_uprobe.c487
19 files changed, 3463 insertions, 923 deletions
diff --git a/Documentation/trace/events.txt b/Documentation/trace/events.txt
index 37732a220d3..c94435df203 100644
--- a/Documentation/trace/events.txt
+++ b/Documentation/trace/events.txt
@@ -287,3 +287,210 @@ their old filters):
prev_pid == 0
# cat sched_wakeup/filter
common_pid == 0
+
+6. Event triggers
+=================
+
+Trace events can be made to conditionally invoke trigger 'commands'
+which can take various forms and are described in detail below;
+examples would be enabling or disabling other trace events or invoking
+a stack trace whenever the trace event is hit. Whenever a trace event
+with attached triggers is invoked, the set of trigger commands
+associated with that event is invoked. Any given trigger can
+additionally have an event filter of the same form as described in
+section 5 (Event filtering) associated with it - the command will only
+be invoked if the event being invoked passes the associated filter.
+If no filter is associated with the trigger, it always passes.
+
+Triggers are added to and removed from a particular event by writing
+trigger expressions to the 'trigger' file for the given event.
+
+A given event can have any number of triggers associated with it,
+subject to any restrictions that individual commands may have in that
+regard.
+
+Event triggers are implemented on top of "soft" mode, which means that
+whenever a trace event has one or more triggers associated with it,
+the event is activated even if it isn't actually enabled, but is
+disabled in a "soft" mode. That is, the tracepoint will be called,
+but just will not be traced, unless of course it's actually enabled.
+This scheme allows triggers to be invoked even for events that aren't
+enabled, and also allows the current event filter implementation to be
+used for conditionally invoking triggers.
+
+The syntax for event triggers is roughly based on the syntax for
+set_ftrace_filter 'ftrace filter commands' (see the 'Filter commands'
+section of Documentation/trace/ftrace.txt), but there are major
+differences and the implementation isn't currently tied to it in any
+way, so beware about making generalizations between the two.
+
+6.1 Expression syntax
+---------------------
+
+Triggers are added by echoing the command to the 'trigger' file:
+
+ # echo 'command[:count] [if filter]' > trigger
+
+Triggers are removed by echoing the same command but starting with '!'
+to the 'trigger' file:
+
+ # echo '!command[:count] [if filter]' > trigger
+
+The [if filter] part isn't used in matching commands when removing, so
+leaving that off in a '!' command will accomplish the same thing as
+having it in.
+
+The filter syntax is the same as that described in the 'Event
+filtering' section above.
+
+For ease of use, writing to the trigger file using '>' currently just
+adds or removes a single trigger and there's no explicit '>>' support
+('>' actually behaves like '>>') or truncation support to remove all
+triggers (you have to use '!' for each one added.)
+
+6.2 Supported trigger commands
+------------------------------
+
+The following commands are supported:
+
+- enable_event/disable_event
+
+ These commands can enable or disable another trace event whenever
+ the triggering event is hit. When these commands are registered,
+ the other trace event is activated, but disabled in a "soft" mode.
+ That is, the tracepoint will be called, but just will not be traced.
+ The event tracepoint stays in this mode as long as there's a trigger
+ in effect that can trigger it.
+
+ For example, the following trigger causes kmalloc events to be
+ traced when a read system call is entered, and the :1 at the end
+ specifies that this enablement happens only once:
+
+ # echo 'enable_event:kmem:kmalloc:1' > \
+ /sys/kernel/debug/tracing/events/syscalls/sys_enter_read/trigger
+
+ The following trigger causes kmalloc events to stop being traced
+ when a read system call exits. This disablement happens on every
+ read system call exit:
+
+ # echo 'disable_event:kmem:kmalloc' > \
+ /sys/kernel/debug/tracing/events/syscalls/sys_exit_read/trigger
+
+ The format is:
+
+ enable_event:<system>:<event>[:count]
+ disable_event:<system>:<event>[:count]
+
+ To remove the above commands:
+
+ # echo '!enable_event:kmem:kmalloc:1' > \
+ /sys/kernel/debug/tracing/events/syscalls/sys_enter_read/trigger
+
+ # echo '!disable_event:kmem:kmalloc' > \
+ /sys/kernel/debug/tracing/events/syscalls/sys_exit_read/trigger
+
+ Note that there can be any number of enable/disable_event triggers
+ per triggering event, but there can only be one trigger per
+ triggered event. e.g. sys_enter_read can have triggers enabling both
+ kmem:kmalloc and sched:sched_switch, but can't have two kmem:kmalloc
+ versions such as kmem:kmalloc and kmem:kmalloc:1 or 'kmem:kmalloc if
+ bytes_req == 256' and 'kmem:kmalloc if bytes_alloc == 256' (they
+ could be combined into a single filter on kmem:kmalloc though).
+
+- stacktrace
+
+ This command dumps a stacktrace in the trace buffer whenever the
+ triggering event occurs.
+
+ For example, the following trigger dumps a stacktrace every time the
+ kmalloc tracepoint is hit:
+
+ # echo 'stacktrace' > \
+ /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
+
+ The following trigger dumps a stacktrace the first 5 times a kmalloc
+ request happens with a size >= 64K
+
+ # echo 'stacktrace:5 if bytes_req >= 65536' > \
+ /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
+
+ The format is:
+
+ stacktrace[:count]
+
+ To remove the above commands:
+
+ # echo '!stacktrace' > \
+ /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
+
+ # echo '!stacktrace:5 if bytes_req >= 65536' > \
+ /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
+
+ The latter can also be removed more simply by the following (without
+ the filter):
+
+ # echo '!stacktrace:5' > \
+ /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
+
+ Note that there can be only one stacktrace trigger per triggering
+ event.
+
+- snapshot
+
+ This command causes a snapshot to be triggered whenever the
+ triggering event occurs.
+
+ The following command creates a snapshot every time a block request
+ queue is unplugged with a depth > 1. If you were tracing a set of
+ events or functions at the time, the snapshot trace buffer would
+ capture those events when the trigger event occured:
+
+ # echo 'snapshot if nr_rq > 1' > \
+ /sys/kernel/debug/tracing/events/block/block_unplug/trigger
+
+ To only snapshot once:
+
+ # echo 'snapshot:1 if nr_rq > 1' > \
+ /sys/kernel/debug/tracing/events/block/block_unplug/trigger
+
+ To remove the above commands:
+
+ # echo '!snapshot if nr_rq > 1' > \
+ /sys/kernel/debug/tracing/events/block/block_unplug/trigger
+
+ # echo '!snapshot:1 if nr_rq > 1' > \
+ /sys/kernel/debug/tracing/events/block/block_unplug/trigger
+
+ Note that there can be only one snapshot trigger per triggering
+ event.
+
+- traceon/traceoff
+
+ These commands turn tracing on and off when the specified events are
+ hit. The parameter determines how many times the tracing system is
+ turned on and off. If unspecified, there is no limit.
+
+ The following command turns tracing off the first time a block
+ request queue is unplugged with a depth > 1. If you were tracing a
+ set of events or functions at the time, you could then examine the
+ trace buffer to see the sequence of events that led up to the
+ trigger event:
+
+ # echo 'traceoff:1 if nr_rq > 1' > \
+ /sys/kernel/debug/tracing/events/block/block_unplug/trigger
+
+ To always disable tracing when nr_rq > 1 :
+
+ # echo 'traceoff if nr_rq > 1' > \
+ /sys/kernel/debug/tracing/events/block/block_unplug/trigger
+
+ To remove the above commands:
+
+ # echo '!traceoff:1 if nr_rq > 1' > \
+ /sys/kernel/debug/tracing/events/block/block_unplug/trigger
+
+ # echo '!traceoff if nr_rq > 1' > \
+ /sys/kernel/debug/tracing/events/block/block_unplug/trigger
+
+ Note that there can be only one traceon or traceoff trigger per
+ triggering event.
diff --git a/Documentation/trace/uprobetracer.txt b/Documentation/trace/uprobetracer.txt
index d9c3e682312..f1cf9a34ad9 100644
--- a/Documentation/trace/uprobetracer.txt
+++ b/Documentation/trace/uprobetracer.txt
@@ -19,18 +19,44 @@ user to calculate the offset of the probepoint in the object.
Synopsis of uprobe_tracer
-------------------------
- p[:[GRP/]EVENT] PATH:SYMBOL[+offs] [FETCHARGS] : Set a uprobe
- r[:[GRP/]EVENT] PATH:SYMBOL[+offs] [FETCHARGS] : Set a return uprobe (uretprobe)
- -:[GRP/]EVENT : Clear uprobe or uretprobe event
+ p[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a uprobe
+ r[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a return uprobe (uretprobe)
+ -:[GRP/]EVENT : Clear uprobe or uretprobe event
GRP : Group name. If omitted, "uprobes" is the default value.
EVENT : Event name. If omitted, the event name is generated based
- on SYMBOL+offs.
+ on PATH+OFFSET.
PATH : Path to an executable or a library.
- SYMBOL[+offs] : Symbol+offset where the probe is inserted.
+ OFFSET : Offset where the probe is inserted.
FETCHARGS : Arguments. Each probe can have up to 128 args.
%REG : Fetch register REG
+ @ADDR : Fetch memory at ADDR (ADDR should be in userspace)
+ @+OFFSET : Fetch memory at OFFSET (OFFSET from same file as PATH)
+ $stackN : Fetch Nth entry of stack (N >= 0)
+ $stack : Fetch stack address.
+ $retval : Fetch return value.(*)
+ +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**)
+ NAME=FETCHARG : Set NAME as the argument name of FETCHARG.
+ FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types
+ (u8/u16/u32/u64/s8/s16/s32/s64), "string" and bitfield
+ are supported.
+
+ (*) only for return probe.
+ (**) this is useful for fetching a field of data structures.
+
+Types
+-----
+Several types are supported for fetch-args. Uprobe tracer will access memory
+by given type. Prefix 's' and 'u' means those types are signed and unsigned
+respectively. Traced arguments are shown in decimal (signed) or hex (unsigned).
+String type is a special type, which fetches a "null-terminated" string from
+user space.
+Bitfield is another special type, which takes 3 parameters, bit-width, bit-
+offset, and container-size (usually 32). The syntax is;
+
+ b<bit-width>@<bit-offset>/<container-size>
+
Event Profiling
---------------
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 31ea4b42836..f4233b195da 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -570,8 +570,6 @@ static inline int
ftrace_regex_release(struct inode *inode, struct file *file) { return -ENODEV; }
#endif /* CONFIG_DYNAMIC_FTRACE */
-loff_t ftrace_filter_lseek(struct file *file, loff_t offset, int whence);
-
/* totally disable ftrace - can not re-enable after this */
void ftrace_kill(void);
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 8c9b7a1c413..4e4cc28623a 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -1,3 +1,4 @@
+
#ifndef _LINUX_FTRACE_EVENT_H
#define _LINUX_FTRACE_EVENT_H
@@ -264,6 +265,8 @@ enum {
FTRACE_EVENT_FL_NO_SET_FILTER_BIT,
FTRACE_EVENT_FL_SOFT_MODE_BIT,
FTRACE_EVENT_FL_SOFT_DISABLED_BIT,
+ FTRACE_EVENT_FL_TRIGGER_MODE_BIT,
+ FTRACE_EVENT_FL_TRIGGER_COND_BIT,
};
/*
@@ -275,6 +278,8 @@ enum {
* SOFT_MODE - The event is enabled/disabled by SOFT_DISABLED
* SOFT_DISABLED - When set, do not trace the event (even though its
* tracepoint may be enabled)
+ * TRIGGER_MODE - When set, invoke the triggers associated with the event
+ * TRIGGER_COND - When set, one or more triggers has an associated filter
*/
enum {
FTRACE_EVENT_FL_ENABLED = (1 << FTRACE_EVENT_FL_ENABLED_BIT),
@@ -283,6 +288,8 @@ enum {
FTRACE_EVENT_FL_NO_SET_FILTER = (1 << FTRACE_EVENT_FL_NO_SET_FILTER_BIT),
FTRACE_EVENT_FL_SOFT_MODE = (1 << FTRACE_EVENT_FL_SOFT_MODE_BIT),
FTRACE_EVENT_FL_SOFT_DISABLED = (1 << FTRACE_EVENT_FL_SOFT_DISABLED_BIT),
+ FTRACE_EVENT_FL_TRIGGER_MODE = (1 << FTRACE_EVENT_FL_TRIGGER_MODE_BIT),
+ FTRACE_EVENT_FL_TRIGGER_COND = (1 << FTRACE_EVENT_FL_TRIGGER_COND_BIT),
};
struct ftrace_event_file {
@@ -292,6 +299,7 @@ struct ftrace_event_file {
struct dentry *dir;
struct trace_array *tr;
struct ftrace_subsystem_dir *system;
+ struct list_head triggers;
/*
* 32 bit flags:
@@ -299,6 +307,7 @@ struct ftrace_event_file {
* bit 1: enabled cmd record
* bit 2: enable/disable with the soft disable bit
* bit 3: soft disabled
+ * bit 4: trigger enabled
*
* Note: The bits must be set atomically to prevent races
* from other writers. Reads of flags do not need to be in
@@ -310,6 +319,7 @@ struct ftrace_event_file {
*/
unsigned long flags;
atomic_t sm_ref; /* soft-mode reference counter */
+ atomic_t tm_ref; /* trigger-mode reference counter */
};
#define __TRACE_EVENT_FLAGS(name, value) \
@@ -337,6 +347,14 @@ struct ftrace_event_file {
#define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */
+enum event_trigger_type {
+ ETT_NONE = (0),
+ ETT_TRACE_ONOFF = (1 << 0),
+ ETT_SNAPSHOT = (1 << 1),
+ ETT_STACKTRACE = (1 << 2),
+ ETT_EVENT_ENABLE = (1 << 3),
+};
+
extern void destroy_preds(struct ftrace_event_file *file);
extern void destroy_call_preds(struct ftrace_event_call *call);
extern int filter_match_preds(struct event_filter *filter, void *rec);
@@ -347,6 +365,127 @@ extern int filter_check_discard(struct ftrace_event_file *file, void *rec,
extern int call_filter_check_discard(struct ftrace_event_call *call, void *rec,
struct ring_buffer *buffer,
struct ring_buffer_event *event);
+extern enum event_trigger_type event_triggers_call(struct ftrace_event_file *file,
+ void *rec);
+extern void event_triggers_post_call(struct ftrace_event_file *file,
+ enum event_trigger_type tt);
+
+/**
+ * ftrace_trigger_soft_disabled - do triggers and test if soft disabled
+ * @file: The file pointer of the event to test
+ *
+ * If any triggers without filters are attached to this event, they
+ * will be called here. If the event is soft disabled and has no
+ * triggers that require testing the fields, it will return true,
+ * otherwise false.
+ */
+static inline bool
+ftrace_trigger_soft_disabled(struct ftrace_event_file *file)
+{
+ unsigned long eflags = file->flags;
+
+ if (!(eflags & FTRACE_EVENT_FL_TRIGGER_COND)) {
+ if (eflags & FTRACE_EVENT_FL_TRIGGER_MODE)
+ event_triggers_call(file, NULL);
+ if (eflags & FTRACE_EVENT_FL_SOFT_DISABLED)
+ return true;
+ }
+ return false;
+}
+
+/*
+ * Helper function for event_trigger_unlock_commit{_regs}().
+ * If there are event triggers attached to this event that requires
+ * filtering against its fields, then they wil be called as the
+ * entry already holds the field information of the current event.
+ *
+ * It also checks if the event should be discarded or not.
+ * It is to be discarded if the event is soft disabled and the
+ * event was only recorded to process triggers, or if the event
+ * filter is active and this event did not match the filters.
+ *
+ * Returns true if the event is discarded, false otherwise.
+ */
+static inline bool
+__event_trigger_test_discard(struct ftrace_event_file *file,
+ struct ring_buffer *buffer,
+ struct ring_buffer_event *event,
+ void *entry,
+ enum event_trigger_type *tt)
+{
+ unsigned long eflags = file->flags;
+
+ if (eflags & FTRACE_EVENT_FL_TRIGGER_COND)
+ *tt = event_triggers_call(file, entry);
+
+ if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags))
+ ring_buffer_discard_commit(buffer, event);
+ else if (!filter_check_discard(file, entry, buffer, event))
+ return false;
+
+ return true;
+}
+
+/**
+ * event_trigger_unlock_commit - handle triggers and finish event commit
+ * @file: The file pointer assoctiated to the event
+ * @buffer: The ring buffer that the event is being written to
+ * @event: The event meta data in the ring buffer
+ * @entry: The event itself
+ * @irq_flags: The state of the interrupts at the start of the event
+ * @pc: The state of the preempt count at the start of the event.
+ *
+ * This is a helper function to handle triggers that require data
+ * from the event itself. It also tests the event against filters and
+ * if the event is soft disabled and should be discarded.
+ */
+static inline void
+event_trigger_unlock_commit(struct ftrace_event_file *file,
+ struct ring_buffer *buffer,
+ struct ring_buffer_event *event,
+ void *entry, unsigned long irq_flags, int pc)
+{
+ enum event_trigger_type tt = ETT_NONE;
+
+ if (!__event_trigger_test_discard(file, buffer, event, entry, &tt))
+ trace_buffer_unlock_commit(buffer, event, irq_flags, pc);
+
+ if (tt)
+ event_triggers_post_call(file, tt);
+}
+
+/**
+ * event_trigger_unlock_commit_regs - handle triggers and finish event commit
+ * @file: The file pointer assoctiated to the event
+ * @buffer: The ring buffer that the event is being written to
+ * @event: The event meta data in the ring buffer
+ * @entry: The event itself
+ * @irq_flags: The state of the interrupts at the start of the event
+ * @pc: The state of the preempt count at the start of the event.
+ *
+ * This is a helper function to handle triggers that require data
+ * from the event itself. It also tests the event against filters and
+ * if the event is soft disabled and should be discarded.
+ *
+ * Same as event_trigger_unlock_commit() but calls
+ * trace_buffer_unlock_commit_regs() instead of trace_buffer_unlock_commit().
+ */
+static inline void
+event_trigger_unlock_commit_regs(struct ftrace_event_file *file,
+ struct ring_buffer *buffer,
+ struct ring_buffer_event *event,
+ void *entry, unsigned long irq_flags, int pc,
+ struct pt_regs *regs)
+{
+ enum event_trigger_type tt = ETT_NONE;
+
+ if (!__event_trigger_test_discard(file, buffer, event, entry, &tt))
+ trace_buffer_unlock_commit_regs(buffer, event,
+ irq_flags, pc, regs);
+
+ if (tt)
+ event_triggers_post_call(file, tt);
+}
enum {
FILTER_OTHER = 0,
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 5c38606613d..1a8b28db377 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -418,6 +418,8 @@ static inline notrace int ftrace_get_offsets_##call( \
* struct ftrace_event_file *ftrace_file = __data;
* struct ftrace_event_call *event_call = ftrace_file->event_call;
* struct ftrace_data_offsets_<call> __maybe_unused __data_offsets;
+ * unsigned long eflags = ftrace_file->flags;
+ * enum event_trigger_type __tt = ETT_NONE;
* struct ring_buffer_event *event;
* struct ftrace_raw_<call> *entry; <-- defined in stage 1
* struct ring_buffer *buffer;
@@ -425,9 +427,12 @@ static inline notrace int ftrace_get_offsets_##call( \
* int __data_size;
* int pc;
*
- * if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT,
- * &ftrace_file->flags))
- * return;
+ * if (!(eflags & FTRACE_EVENT_FL_TRIGGER_COND)) {
+ * if (eflags & FTRACE_EVENT_FL_TRIGGER_MODE)
+ * event_triggers_call(ftrace_file, NULL);
+ * if (eflags & FTRACE_EVENT_FL_SOFT_DISABLED)
+ * return;
+ * }
*
* local_save_flags(irq_flags);
* pc = preempt_count();
@@ -445,8 +450,17 @@ static inline notrace int ftrace_get_offsets_##call( \
* { <assign>; } <-- Here we assign the entries by the __field and
* __array macros.
*
- * if (!filter_check_discard(ftrace_file, entry, buffer, event))
+ * if (eflags & FTRACE_EVENT_FL_TRIGGER_COND)
+ * __tt = event_triggers_call(ftrace_file, entry);
+ *
+ * if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT,
+ * &ftrace_file->flags))
+ * ring_buffer_discard_commit(buffer, event);
+ * else if (!filter_check_discard(ftrace_file, entry, buffer, event))
* trace_buffer_unlock_commit(buffer, event, irq_flags, pc);
+ *
+ * if (__tt)
+ * event_triggers_post_call(ftrace_file, __tt);
* }
*
* static struct trace_event ftrace_event_type_<call> = {
@@ -539,8 +553,7 @@ ftrace_raw_event_##call(void *__data, proto) \
int __data_size; \
int pc; \
\
- if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, \
- &ftrace_file->flags)) \
+ if (ftrace_trigger_soft_disabled(ftrace_file)) \
return; \
\
local_save_flags(irq_flags); \
@@ -560,8 +573,8 @@ ftrace_raw_event_##call(void *__data, proto) \
\
{ assign; } \
\
- if (!filter_check_discard(ftrace_file, entry, buffer, event)) \
- trace_buffer_unlock_commit(buffer, event, irq_flags, pc); \
+ event_trigger_unlock_commit(ftrace_file, buffer, event, entry, \
+ irq_flags, pc); \
}
/*
* The ftrace_test_probe is compiled out, it is only here as a build time check
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index b886a5e7d4f..307d87c0991 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1854,6 +1854,10 @@ static void handle_swbp(struct pt_regs *regs)
if (unlikely(!test_bit(UPROBE_COPY_INSN, &uprobe->flags)))
goto out;
+ /* Tracing handlers use ->utask to communicate with fetch methods */
+ if (!get_utask())
+ goto out;
+
handler_chain(uprobe, regs);
if (can_skip_sstep(uprobe, regs))
goto out;
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index d7e2068e4b7..1378e84fbe3 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -50,6 +50,7 @@ ifeq ($(CONFIG_PERF_EVENTS),y)
obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o
endif
obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
+obj-$(CONFIG_EVENT_TRACING) += trace_events_trigger.o
obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
obj-$(CONFIG_TRACEPOINTS) += power-traces.o
ifeq ($(CONFIG_PM_RUNTIME),y)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 72a0f81dc5a..cd7f76d1eb8 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -85,6 +85,8 @@ int function_trace_stop __read_mostly;
/* Current function tracing op */
struct ftrace_ops *function_trace_op __read_mostly = &ftrace_list_end;
+/* What to set function_trace_op to */
+static struct ftrace_ops *set_function_trace_op;
/* List for set_ftrace_pid's pids. */
LIST_HEAD(ftrace_pids);
@@ -278,6 +280,29 @@ static void update_global_ops(void)
global_ops.func = func;
}
+static void ftrace_sync(struct work_struct *work)
+{
+ /*
+ * This function is just a stub to implement a hard force
+ * of synchronize_sched(). This requires synchronizing
+ * tasks even in userspace and idle.
+ *
+ * Yes, function tracing is rude.
+ */
+}
+
+static void ftrace_sync_ipi(void *data)
+{
+ /* Probably not needed, but do it anyway */
+ smp_rmb();
+}
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+static void update_function_graph_func(void);
+#else
+static inline void update_function_graph_func(void) { }
+#endif
+
static void update_ftrace_function(void)
{
ftrace_func_t func;
@@ -296,16 +321,61 @@ static void update_ftrace_function(void)
!FTRACE_FORCE_LIST_FUNC)) {
/* Set the ftrace_ops that the arch callback uses */
if (ftrace_ops_list == &global_ops)
- function_trace_op = ftrace_global_list;
+ set_function_trace_op = ftrace_global_list;
else
- function_trace_op = ftrace_ops_list;
+ set_function_trace_op = ftrace_ops_list;
func = ftrace_ops_list->func;
} else {
/* Just use the default ftrace_ops */
- function_trace_op = &ftrace_list_end;
+ set_function_trace_op = &ftrace_list_end;
func = ftrace_ops_list_func;
}
+ /* If there's no change, then do nothing more here */
+ if (ftrace_trace_function == func)
+ return;
+
+ update_function_graph_func();
+
+ /*
+ * If we are using the list function, it doesn't care
+ * about the function_trace_ops.
+ */
+ if (func == ftrace_ops_list_func) {
+ ftrace_trace_function = func;
+ /*
+ * Don't even bother setting function_trace_ops,
+ * it would be racy to do so anyway.
+ */
+ return;
+ }
+
+#ifndef CONFIG_DYNAMIC_FTRACE
+ /*
+ * For static tracing, we need to be a bit more careful.
+ * The function change takes affect immediately. Thus,
+ * we need to coorditate the setting of the function_trace_ops
+ * with the setting of the ftrace_trace_function.
+ *
+ * Set the function to the list ops, which will call the
+ * function we want, albeit indirectly, but it handles the
+ * ftrace_ops and doesn't depend on function_trace_op.
+ */
+ ftrace_trace_function = ftrace_ops_list_func;
+ /*
+ * Make sure all CPUs see this. Yes this is slow, but static
+ * tracing is slow and nasty to have enabled.
+ */
+ schedule_on_each_cpu(ftrace_sync);
+ /* Now all cpus are using the list ops. */
+ function_trace_op = set_function_trace_op;
+ /* Make sure the function_trace_op is visible on all CPUs */
+ smp_wmb();
+ /* Nasty way to force a rmb on all cpus */
+ smp_call_function(ftrace_sync_ipi, NULL, 1);
+ /* OK, we are all set to update the ftrace_trace_function now! */
+#endif /* !CONFIG_DYNAMIC_FTRACE */
+
ftrace_trace_function = func;
}
@@ -410,17 +480,6 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
return 0;
}
-static void ftrace_sync(struct work_struct *work)
-{
- /*
- * This function is just a stub to implement a hard force
- * of synchronize_sched(). This requires synchronizing
- * tasks even in userspace and idle.
- *
- * Yes, function tracing is rude.
- */
-}
-
static int __unregister_ftrace_function(struct ftrace_ops *ops)
{
int ret;
@@ -439,20 +498,6 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
} else if (ops->flags & FTRACE_OPS_FL_CONTROL) {
ret = remove_ftrace_list_ops(&ftrace_control_list,
&control_ops, ops);
- if (!ret) {
- /*
- * The ftrace_ops is now removed from the list,
- * so there'll be no new users. We must ensure
- * all current users are done before we free
- * the control data.
- * Note synchronize_sched() is not enough, as we
- * use preempt_disable() to do RCU, but the function
- * tracer can be called where RCU is not active
- * (before user_exit()).
- */
- schedule_on_each_cpu(ftrace_sync);
- control_ops_free(ops);
- }
} else
ret = remove_ftrace_ops(&ftrace_ops_list, ops);
@@ -462,17 +507,6 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
if (ftrace_enabled)
update_ftrace_function();
- /*
- * Dynamic ops may be freed, we must make sure that all
- * callers are done before leaving this function.
- *
- * Again, normal synchronize_sched() is not good enough.
- * We need to do a hard force of sched synchronization.
- */
- if (ops->flags & FTRACE_OPS_FL_DYNAMIC)
- schedule_on_each_cpu(ftrace_sync);
-
-
return 0;
}
@@ -1082,19 +1116,6 @@ static __init void ftrace_profile_debugfs(struct dentry *d_tracer)
static struct pid * const ftrace_swapper_pid = &init_struct_pid;
-loff_t
-ftrace_filter_lseek(struct file *file, loff_t offset, int whence)
-{
- loff_t ret;
-
- if (file->f_mode & FMODE_READ)
- ret = seq_lseek(file, offset, whence);
- else
- file->f_pos = ret = 1;
-
- return ret;
-}
-
#ifdef CONFIG_DYNAMIC_FTRACE
#ifndef CONFIG_FTRACE_MCOUNT_RECORD
@@ -1992,8 +2013,14 @@ void ftrace_modify_all_code(int command)
else if (command & FTRACE_DISABLE_CALLS)
ftrace_replace_code(0);
- if (update && ftrace_trace_function != ftrace_ops_list_func)
+ if (update && ftrace_trace_function != ftrace_ops_list_func) {
+ function_trace_op = set_function_trace_op;
+ smp_wmb();
+ /* If irqs are disabled, we are in stop machine */
+ if (!irqs_disabled())
+ smp_call_function(ftrace_sync_ipi, NULL, 1);
ftrace_update_ftrace_func(ftrace_trace_function);
+ }
if (command & FTRACE_START_FUNC_RET)
ftrace_enable_ftrace_graph_caller();
@@ -2156,10 +2183,41 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command)
command |= FTRACE_UPDATE_TRACE_FUNC;
}
- if (!command || !ftrace_enabled)
+ if (!command || !ftrace_enabled) {
+ /*
+ * If these are control ops, they still need their
+ * per_cpu field freed. Since, function tracing is
+ * not currently active, we can just free them
+ * without synchronizing all CPUs.
+ */
+ if (ops->flags & FTRACE_OPS_FL_CONTROL)
+ control_ops_free(ops);
return 0;
+ }
ftrace_run_update_code(command);
+
+ /*
+ * Dynamic ops may be freed, we must make sure that all
+ * callers are done before leaving this function.
+ * The same goes for freeing the per_cpu data of the control
+ * ops.
+ *
+ * Again, normal synchronize_sched() is not good enough.
+ * We need to do a hard force of sched synchronization.
+ * This is because we use preempt_disable() to do RCU, but
+ * the function tracers can be called where RCU is not watching
+ * (like before user_exit()). We can not rely on the RCU
+ * infrastructure to do the synchronization, thus we must do it
+ * ourselves.
+ */
+ if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_CONTROL)) {
+ schedule_on_each_cpu(ftrace_sync);
+
+ if (ops->flags & FTRACE_OPS_FL_CONTROL)
+ control_ops_free(ops);
+ }
+
return 0;
}
@@ -2739,7 +2797,7 @@ static void ftrace_filter_reset(struct ftrace_hash *hash)
* routine, you can use ftrace_filter_write() for the write
* routine if @flag has FTRACE_ITER_FILTER set, or
* ftrace_notrace_write() if @flag has FTRACE_ITER_NOTRACE set.
- * ftrace_filter_lseek() should be used as the lseek routine, and
+ * tracing_lseek() should be used as the lseek routine, and
* release must call ftrace_regex_release().
*/
int
@@ -3767,7 +3825,7 @@ static const struct file_operations ftrace_filter_fops = {
.open = ftrace_filter_open,
.read = seq_read,
.write = ftrace_filter_write,
- .llseek = ftrace_filter_lseek,
+ .llseek = tracing_lseek,
.release = ftrace_regex_release,
};
@@ -3775,7 +3833,7 @@ static const struct file_operations ftrace_notrace_fops = {
.open = ftrace_notrace_open,
.read = seq_read,
.write = ftrace_notrace_write,
- .llseek = ftrace_filter_lseek,
+ .llseek = tracing_lseek,
.release = ftrace_regex_release,
};
@@ -4038,7 +4096,7 @@ static const struct file_operations ftrace_graph_fops = {
.open = ftrace_graph_open,
.read = seq_read,
.write = ftrace_graph_write,
- .llseek = ftrace_filter_lseek,
+ .llseek = tracing_lseek,
.release = ftrace_graph_release,
};
@@ -4046,7 +4104,7 @@ static const struct file_operations ftrace_graph_notrace_fops = {
.open = ftrace_graph_notrace_open,
.read = seq_read,
.write = ftrace_graph_write,
- .llseek = ftrace_filter_lseek,
+ .llseek = tracing_lseek,
.release = ftrace_graph_release,
};
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
@@ -4719,7 +4777,7 @@ static const struct file_operations ftrace_pid_fops = {
.open = ftrace_pid_open,
.write = ftrace_pid_write,
.read = seq_read,
- .llseek = ftrace_filter_lseek,
+ .llseek = tracing_lseek,
.release = ftrace_pid_release,
};
@@ -4862,6 +4920,7 @@ int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)
trace_func_graph_ret_t ftrace_graph_return =
(trace_func_graph_ret_t)ftrace_stub;
trace_func_graph_ent_t ftrace_graph_entry = ftrace_graph_entry_stub;
+static trace_func_graph_ent_t __ftrace_graph_entry = ftrace_graph_entry_stub;
/* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */
static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
@@ -5003,6 +5062,30 @@ static struct ftrace_ops fgraph_ops __read_mostly = {
FTRACE_OPS_FL_RECURSION_SAFE,
};
+static int