aboutsummaryrefslogtreecommitdiff
path: root/kernel/trace
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig31
-rw-r--r--kernel/trace/Makefile4
-rw-r--r--kernel/trace/blktrace.c74
-rw-r--r--kernel/trace/ftrace.c836
-rw-r--r--kernel/trace/ring_buffer.c37
-rw-r--r--kernel/trace/ring_buffer_benchmark.c6
-rw-r--r--kernel/trace/trace.c953
-rw-r--r--kernel/trace/trace.h331
-rw-r--r--kernel/trace/trace_benchmark.c198
-rw-r--r--kernel/trace/trace_benchmark.h41
-rw-r--r--kernel/trace/trace_branch.c2
-rw-r--r--kernel/trace/trace_clock.c9
-rw-r--r--kernel/trace/trace_event_perf.c35
-rw-r--r--kernel/trace/trace_events.c199
-rw-r--r--kernel/trace/trace_events_filter.c230
-rw-r--r--kernel/trace/trace_events_trigger.c1437
-rw-r--r--kernel/trace/trace_export.c15
-rw-r--r--kernel/trace/trace_functions.c155
-rw-r--r--kernel/trace/trace_functions_graph.c104
-rw-r--r--kernel/trace/trace_irqsoff.c85
-rw-r--r--kernel/trace/trace_kprobe.c906
-rw-r--r--kernel/trace/trace_mmiotrace.c4
-rw-r--r--kernel/trace/trace_nop.c6
-rw-r--r--kernel/trace/trace_output.c93
-rw-r--r--kernel/trace/trace_probe.c479
-rw-r--r--kernel/trace/trace_probe.h242
-rw-r--r--kernel/trace/trace_sched_switch.c4
-rw-r--r--kernel/trace/trace_sched_wakeup.c143
-rw-r--r--kernel/trace/trace_selftest.c102
-rw-r--r--kernel/trace/trace_stack.c43
-rw-r--r--kernel/trace/trace_stat.c41
-rw-r--r--kernel/trace/trace_syscalls.c38
-rw-r--r--kernel/trace/trace_uprobe.c689
33 files changed, 5578 insertions, 1994 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 015f85aaca0..d4409356f40 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -424,6 +424,7 @@ config UPROBE_EVENT
bool "Enable uprobes-based dynamic events"
depends on ARCH_SUPPORTS_UPROBES
depends on MMU
+ depends on PERF_EVENTS
select UPROBES
select PROBE_EVENTS
select TRACING
@@ -534,6 +535,36 @@ config MMIOTRACE_TEST
Say N, unless you absolutely know what you are doing.
+config TRACEPOINT_BENCHMARK
+ bool "Add tracepoint that benchmarks tracepoints"
+ help
+ This option creates the tracepoint "benchmark:benchmark_event".
+ When the tracepoint is enabled, it kicks off a kernel thread that
+ goes into an infinite loop (calling cond_sched() to let other tasks
+ run), and calls the tracepoint. Each iteration will record the time
+ it took to write to the tracepoint and the next iteration that
+ data will be passed to the tracepoint itself. That is, the tracepoint
+ will report the time it took to do the previous tracepoint.
+ The string written to the tracepoint is a static string of 128 bytes
+ to keep the time the same. The initial string is simply a write of
+ "START". The second string records the cold cache time of the first
+ write which is not added to the rest of the calculations.
+
+ As it is a tight loop, it benchmarks as hot cache. That's fine because
+ we care most about hot paths that are probably in cache already.
+
+ An example of the output:
+
+ START
+ first=3672 [COLD CACHED]
+ last=632 first=3672 max=632 min=632 avg=316 std=446 std^2=199712
+ last=278 first=3672 max=632 min=278 avg=303 std=316 std^2=100337
+ last=277 first=3672 max=632 min=277 avg=296 std=258 std^2=67064
+ last=273 first=3672 max=632 min=273 avg=292 std=224 std^2=50411
+ last=273 first=3672 max=632 min=273 avg=288 std=200 std^2=40389
+ last=281 first=3672 max=632 min=273 avg=287 std=183 std^2=33666
+
+
config RING_BUFFER_BENCHMARK
tristate "Ring buffer benchmark stress tester"
depends on RING_BUFFER
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index d7e2068e4b7..2611613f14f 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -17,6 +17,7 @@ ifdef CONFIG_TRACING_BRANCHES
KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
endif
+CFLAGS_trace_benchmark.o := -I$(src)
CFLAGS_trace_events_filter.o := -I$(src)
obj-$(CONFIG_TRACE_CLOCK) += trace_clock.o
@@ -50,6 +51,7 @@ ifeq ($(CONFIG_PERF_EVENTS),y)
obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o
endif
obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
+obj-$(CONFIG_EVENT_TRACING) += trace_events_trigger.o
obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
obj-$(CONFIG_TRACEPOINTS) += power-traces.o
ifeq ($(CONFIG_PM_RUNTIME),y)
@@ -61,4 +63,6 @@ endif
obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o
obj-$(CONFIG_UPROBE_EVENT) += trace_uprobe.o
+obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o
+
libftrace-y := ftrace.o
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index b8b8560bfb9..c1bd4ada2a0 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -26,6 +26,7 @@
#include <linux/export.h>
#include <linux/time.h>
#include <linux/uaccess.h>
+#include <linux/list.h>
#include <trace/events/block.h>
@@ -38,6 +39,9 @@ static unsigned int blktrace_seq __read_mostly = 1;
static struct trace_array *blk_tr;
static bool blk_tracer_enabled __read_mostly;
+static LIST_HEAD(running_trace_list);
+static __cacheline_aligned_in_smp DEFINE_SPINLOCK(running_trace_lock);
+
/* Select an alternative, minimalistic output than the original one */
#define TRACE_BLK_OPT_CLASSIC 0x1
@@ -107,10 +111,18 @@ record_it:
* Send out a notify for this process, if we haven't done so since a trace
* started
*/
-static void trace_note_tsk(struct blk_trace *bt, struct task_struct *tsk)
+static void trace_note_tsk(struct task_struct *tsk)
{
+ unsigned long flags;
+ struct blk_trace *bt;
+
tsk->btrace_seq = blktrace_seq;
- trace_note(bt, tsk->pid, BLK_TN_PROCESS, tsk->comm, sizeof(tsk->comm));
+ spin_lock_irqsave(&running_trace_lock, flags);
+ list_for_each_entry(bt, &running_trace_list, running_list) {
+ trace_note(bt, tsk->pid, BLK_TN_PROCESS, tsk->comm,
+ sizeof(tsk->comm));
+ }
+ spin_unlock_irqrestore(&running_trace_lock, flags);
}
static void trace_note_time(struct blk_trace *bt)
@@ -229,16 +241,15 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
goto record_it;
}
+ if (unlikely(tsk->btrace_seq != blktrace_seq))
+ trace_note_tsk(tsk);
+
/*
* A word about the locking here - we disable interrupts to reserve
* some space in the relay per-cpu buffer, to prevent an irq
* from coming in and stepping on our toes.
*/
local_irq_save(flags);
-
- if (unlikely(tsk->btrace_seq != blktrace_seq))
- trace_note_tsk(bt, tsk);
-
t = relay_reserve(bt->rchan, sizeof(*t) + pdu_len);
if (t) {
sequence = per_cpu_ptr(bt->sequence, cpu);
@@ -477,6 +488,7 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
bt->dir = dir;
bt->dev = dev;
atomic_set(&bt->dropped, 0);
+ INIT_LIST_HEAD(&bt->running_list);
ret = -EIO;
bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt,
@@ -567,13 +579,12 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name,
.end_lba = cbuts.end_lba,
.pid = cbuts.pid,
};
- memcpy(&buts.name, &cbuts.name, 32);
ret = do_blk_trace_setup(q, name, dev, bdev, &buts);
if (ret)
return ret;
- if (copy_to_user(arg, &buts.name, 32)) {
+ if (copy_to_user(arg, &buts.name, ARRAY_SIZE(buts.name))) {
blk_trace_remove(q);
return -EFAULT;
}
@@ -601,6 +612,9 @@ int blk_trace_startstop(struct request_queue *q, int start)
blktrace_seq++;
smp_mb();
bt->trace_state = Blktrace_running;
+ spin_lock_irq(&running_trace_lock);
+ list_add(&bt->running_list, &running_trace_list);
+ spin_unlock_irq(&running_trace_lock);
trace_note_time(bt);
ret = 0;
@@ -608,6 +622,9 @@ int blk_trace_startstop(struct request_queue *q, int start)
} else {
if (bt->trace_state == Blktrace_running) {
bt->trace_state = Blktrace_stopped;
+ spin_lock_irq(&running_trace_lock);
+ list_del_init(&bt->running_list);
+ spin_unlock_irq(&running_trace_lock);
relay_flush(bt->rchan);
ret = 0;
}
@@ -685,6 +702,7 @@ void blk_trace_shutdown(struct request_queue *q)
* blk_add_trace_rq - Add a trace for a request oriented action
* @q: queue the io is for
* @rq: the source request
+ * @nr_bytes: number of completed bytes
* @what: the action
*
* Description:
@@ -692,7 +710,7 @@ void blk_trace_shutdown(struct request_queue *q)
*
**/
static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
- u32 what)
+ unsigned int nr_bytes, u32 what)
{
struct blk_trace *bt = q->blk_trace;
@@ -701,11 +719,11 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
what |= BLK_TC_ACT(BLK_TC_PC);
- __blk_add_trace(bt, 0, blk_rq_bytes(rq), rq->cmd_flags,
+ __blk_add_trace(bt, 0, nr_bytes, rq->cmd_flags,
what, rq->errors, rq->cmd_len, rq->cmd);
} else {
what |= BLK_TC_ACT(BLK_TC_FS);
- __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq),
+ __blk_add_trace(bt, blk_rq_pos(rq), nr_bytes,
rq->cmd_flags, what, rq->errors, 0, NULL);
}
}
@@ -713,33 +731,34 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
static void blk_add_trace_rq_abort(void *ignore,
struct request_queue *q, struct request *rq)
{
- blk_add_trace_rq(q, rq, BLK_TA_ABORT);
+ blk_add_trace_rq(q, rq, blk_rq_bytes(rq), BLK_TA_ABORT);
}
static void blk_add_trace_rq_insert(void *ignore,
struct request_queue *q, struct request *rq)
{
- blk_add_trace_rq(q, rq, BLK_TA_INSERT);
+ blk_add_trace_rq(q, rq, blk_rq_bytes(rq), BLK_TA_INSERT);
}
static void blk_add_trace_rq_issue(void *ignore,
struct request_queue *q, struct request *rq)
{
- blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
+ blk_add_trace_rq(q, rq, blk_rq_bytes(rq), BLK_TA_ISSUE);
}
static void blk_add_trace_rq_requeue(void *ignore,
struct request_queue *q,
struct request *rq)
{
- blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
+ blk_add_trace_rq(q, rq, blk_rq_bytes(rq), BLK_TA_REQUEUE);
}
static void blk_add_trace_rq_complete(void *ignore,
struct request_queue *q,
- struct request *rq)
+ struct request *rq,
+ unsigned int nr_bytes)
{
- blk_add_trace_rq(q, rq, BLK_TA_COMPLETE);
+ blk_add_trace_rq(q, rq, nr_bytes, BLK_TA_COMPLETE);
}
/**
@@ -764,8 +783,8 @@ static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
if (!error && !bio_flagged(bio, BIO_UPTODATE))
error = EIO;
- __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what,
- error, 0, NULL);
+ __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
+ bio->bi_rw, what, error, 0, NULL);
}
static void blk_add_trace_bio_bounce(void *ignore,
@@ -868,8 +887,9 @@ static void blk_add_trace_split(void *ignore,
if (bt) {
__be64 rpdu = cpu_to_be64(pdu);
- __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw,
- BLK_TA_SPLIT, !bio_flagged(bio, BIO_UPTODATE),
+ __blk_add_trace(bt, bio->bi_iter.bi_sector,
+ bio->bi_iter.bi_size, bio->bi_rw, BLK_TA_SPLIT,
+ !bio_flagged(bio, BIO_UPTODATE),
sizeof(rpdu), &rpdu);
}
}
@@ -901,9 +921,9 @@ static void blk_add_trace_bio_remap(void *ignore,
r.device_to = cpu_to_be32(bio->bi_bdev->bd_dev);
r.sector_from = cpu_to_be64(from);
- __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw,
- BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE),
- sizeof(r), &r);
+ __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
+ bio->bi_rw, BLK_TA_REMAP,
+ !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r);
}
/**
@@ -1409,7 +1429,8 @@ static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter)
return print_one_line(iter, true);
}
-static int blk_tracer_set_flag(u32 old_flags, u32 bit, int set)
+static int
+blk_tracer_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
{
/* don't output context-info for blk_classic output */
if (bit == TRACE_BLK_OPT_CLASSIC) {
@@ -1472,6 +1493,9 @@ static int blk_trace_remove_queue(struct request_queue *q)
if (atomic_dec_and_test(&blk_probes_ref))
blk_unregister_tracepoints();
+ spin_lock_irq(&running_trace_lock);
+ list_del(&bt->running_list);
+ spin_unlock_irq(&running_trace_lock);
blk_trace_free(bt);
return 0;
}
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 03cf44ac54d..ac9d1dad630 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -62,7 +62,7 @@
#define FTRACE_HASH_DEFAULT_BITS 10
#define FTRACE_HASH_MAX_BITS 12
-#define FL_GLOBAL_CONTROL_MASK (FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_CONTROL)
+#define FL_GLOBAL_CONTROL_MASK (FTRACE_OPS_FL_CONTROL)
#ifdef CONFIG_DYNAMIC_FTRACE
#define INIT_REGEX_LOCK(opsname) \
@@ -85,6 +85,8 @@ int function_trace_stop __read_mostly;
/* Current function tracing op */
struct ftrace_ops *function_trace_op __read_mostly = &ftrace_list_end;
+/* What to set function_trace_op to */
+static struct ftrace_ops *set_function_trace_op;
/* List for set_ftrace_pid's pids. */
LIST_HEAD(ftrace_pids);
@@ -101,7 +103,6 @@ static int ftrace_disabled __read_mostly;
static DEFINE_MUTEX(ftrace_lock);
-static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end;
static struct ftrace_ops *ftrace_control_list __read_mostly = &ftrace_list_end;
static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end;
ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
@@ -169,23 +170,6 @@ int ftrace_nr_registered_ops(void)
return cnt;
}
-static void
-ftrace_global_list_func(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *op, struct pt_regs *regs)
-{
- int bit;
-
- bit = trace_test_and_set_recursion(TRACE_GLOBAL_START, TRACE_GLOBAL_MAX);
- if (bit < 0)
- return;
-
- do_for_each_ftrace_op(op, ftrace_global_list) {
- op->func(ip, parent_ip, op, regs);
- } while_for_each_ftrace_op(op);
-
- trace_clear_recursion(bit);
-}
-
static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct pt_regs *regs)
{
@@ -235,55 +219,33 @@ static int control_ops_alloc(struct ftrace_ops *ops)
return 0;
}
-static void control_ops_free(struct ftrace_ops *ops)
-{
- free_percpu(ops->disabled);
-}
-
-static void update_global_ops(void)
+static void ftrace_sync(struct work_struct *work)
{
- ftrace_func_t func;
-
/*
- * If there's only one function registered, then call that
- * function directly. Otherwise, we need to iterate over the
- * registered callers.
+ * This function is just a stub to implement a hard force
+ * of synchronize_sched(). This requires synchronizing
+ * tasks even in userspace and idle.
+ *
+ * Yes, function tracing is rude.
*/
- if (ftrace_global_list == &ftrace_list_end ||
- ftrace_global_list->next == &ftrace_list_end) {
- func = ftrace_global_list->func;
- /*
- * As we are calling the function directly.
- * If it does not have recursion protection,
- * the function_trace_op needs to be updated
- * accordingly.
- */
- if (ftrace_global_list->flags & FTRACE_OPS_FL_RECURSION_SAFE)
- global_ops.flags |= FTRACE_OPS_FL_RECURSION_SAFE;
- else
- global_ops.flags &= ~FTRACE_OPS_FL_RECURSION_SAFE;
- } else {
- func = ftrace_global_list_func;
- /* The list has its own recursion protection. */
- global_ops.flags |= FTRACE_OPS_FL_RECURSION_SAFE;
- }
-
-
- /* If we filter on pids, update to use the pid function */
- if (!list_empty(&ftrace_pids)) {
- set_ftrace_pid_function(func);
- func = ftrace_pid_func;
- }
+}
- global_ops.func = func;
+static void ftrace_sync_ipi(void *data)
+{
+ /* Probably not needed, but do it anyway */
+ smp_rmb();
}
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+static void update_function_graph_func(void);
+#else
+static inline void update_function_graph_func(void) { }
+#endif
+
static void update_ftrace_function(void)
{
ftrace_func_t func;
- update_global_ops();
-
/*
* If we are at the end of the list and this ops is
* recursion safe and not dynamic and the arch supports passing ops,
@@ -295,20 +257,67 @@ static void update_ftrace_function(void)
(ftrace_ops_list->flags & FTRACE_OPS_FL_RECURSION_SAFE) &&
!FTRACE_FORCE_LIST_FUNC)) {
/* Set the ftrace_ops that the arch callback uses */
- if (ftrace_ops_list == &global_ops)
- function_trace_op = ftrace_global_list;
- else
- function_trace_op = ftrace_ops_list;
+ set_function_trace_op = ftrace_ops_list;
func = ftrace_ops_list->func;
} else {
/* Just use the default ftrace_ops */
- function_trace_op = &ftrace_list_end;
+ set_function_trace_op = &ftrace_list_end;
func = ftrace_ops_list_func;
}
+ update_function_graph_func();
+
+ /* If there's no change, then do nothing more here */
+ if (ftrace_trace_function == func)
+ return;
+
+ /*
+ * If we are using the list function, it doesn't care
+ * about the function_trace_ops.
+ */
+ if (func == ftrace_ops_list_func) {
+ ftrace_trace_function = func;
+ /*
+ * Don't even bother setting function_trace_ops,
+ * it would be racy to do so anyway.
+ */
+ return;
+ }
+
+#ifndef CONFIG_DYNAMIC_FTRACE
+ /*
+ * For static tracing, we need to be a bit more careful.
+ * The function change takes affect immediately. Thus,
+ * we need to coorditate the setting of the function_trace_ops
+ * with the setting of the ftrace_trace_function.
+ *
+ * Set the function to the list ops, which will call the
+ * function we want, albeit indirectly, but it handles the
+ * ftrace_ops and doesn't depend on function_trace_op.
+ */
+ ftrace_trace_function = ftrace_ops_list_func;
+ /*
+ * Make sure all CPUs see this. Yes this is slow, but static
+ * tracing is slow and nasty to have enabled.
+ */
+ schedule_on_each_cpu(ftrace_sync);
+ /* Now all cpus are using the list ops. */
+ function_trace_op = set_function_trace_op;
+ /* Make sure the function_trace_op is visible on all CPUs */
+ smp_wmb();
+ /* Nasty way to force a rmb on all cpus */
+ smp_call_function(ftrace_sync_ipi, NULL, 1);
+ /* OK, we are all set to update the ftrace_trace_function now! */
+#endif /* !CONFIG_DYNAMIC_FTRACE */
+
ftrace_trace_function = func;
}
+int using_ftrace_ops_list_func(void)
+{
+ return ftrace_trace_function == ftrace_ops_list_func;
+}
+
static void add_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops)
{
ops->next = *list;
@@ -367,19 +376,12 @@ static int remove_ftrace_list_ops(struct ftrace_ops **list,
static int __register_ftrace_function(struct ftrace_ops *ops)
{
- if (unlikely(ftrace_disabled))
- return -ENODEV;
-
- if (FTRACE_WARN_ON(ops == &global_ops))
+ if (ops->flags & FTRACE_OPS_FL_DELETED)
return -EINVAL;
if (WARN_ON(ops->flags & FTRACE_OPS_FL_ENABLED))
return -EBUSY;
- /* We don't support both control and global flags set. */
- if ((ops->flags & FL_GLOBAL_CONTROL_MASK) == FL_GLOBAL_CONTROL_MASK)
- return -EINVAL;
-
#ifndef CONFIG_DYNAMIC_FTRACE_WITH_REGS
/*
* If the ftrace_ops specifies SAVE_REGS, then it only can be used
@@ -397,10 +399,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
if (!core_kernel_data((unsigned long)ops))
ops->flags |= FTRACE_OPS_FL_DYNAMIC;
- if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
- add_ftrace_list_ops(&ftrace_global_list, &global_ops, ops);
- ops->flags |= FTRACE_OPS_FL_ENABLED;
- } else if (ops->flags & FTRACE_OPS_FL_CONTROL) {
+ if (ops->flags & FTRACE_OPS_FL_CONTROL) {
if (control_ops_alloc(ops))
return -ENOMEM;
add_ftrace_list_ops(&ftrace_control_list, &control_ops, ops);
@@ -413,52 +412,16 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
return 0;
}
-static void ftrace_sync(struct work_struct *work)
-{
- /*
- * This function is just a stub to implement a hard force
- * of synchronize_sched(). This requires synchronizing
- * tasks even in userspace and idle.
- *
- * Yes, function tracing is rude.
- */
-}
-
static int __unregister_ftrace_function(struct ftrace_ops *ops)
{
int ret;
- if (ftrace_disabled)
- return -ENODEV;
-
if (WARN_ON(!(ops->flags & FTRACE_OPS_FL_ENABLED)))
return -EBUSY;
- if (FTRACE_WARN_ON(ops == &global_ops))
- return -EINVAL;
-
- if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
- ret = remove_ftrace_list_ops(&ftrace_global_list,
- &global_ops, ops);
- if (!ret)
- ops->flags &= ~FTRACE_OPS_FL_ENABLED;
- } else if (ops->flags & FTRACE_OPS_FL_CONTROL) {
+ if (ops->flags & FTRACE_OPS_FL_CONTROL) {
ret = remove_ftrace_list_ops(&ftrace_control_list,
&control_ops, ops);
- if (!ret) {
- /*
- * The ftrace_ops is now removed from the list,
- * so there'll be no new users. We must ensure
- * all current users are done before we free
- * the control data.
- * Note synchronize_sched() is not enough, as we
- * use preempt_disable() to do RCU, but the function
- * tracer can be called where RCU is not active
- * (before user_exit()).
- */
- schedule_on_each_cpu(ftrace_sync);
- control_ops_free(ops);
- }
} else
ret = remove_ftrace_ops(&ftrace_ops_list, ops);
@@ -468,17 +431,6 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
if (ftrace_enabled)
update_ftrace_function();
- /*
- * Dynamic ops may be freed, we must make sure that all
- * callers are done before leaving this function.
- *
- * Again, normal synchronize_sched() is not good enough.
- * We need to do a hard force of sched synchronization.
- */
- if (ops->flags & FTRACE_OPS_FL_DYNAMIC)
- schedule_on_each_cpu(ftrace_sync);
-
-
return 0;
}
@@ -781,7 +733,7 @@ static int ftrace_profile_init(void)
int cpu;
int ret = 0;
- for_each_online_cpu(cpu) {
+ for_each_possible_cpu(cpu) {
ret = ftrace_profile_init_cpu(cpu);
if (ret)
break;
@@ -870,7 +822,7 @@ function_profile_call(unsigned long ip, unsigned long parent_ip,
local_irq_save(flags);
- stat = &__get_cpu_var(ftrace_profile_stats);
+ stat = this_cpu_ptr(&ftrace_profile_stats);
if (!stat->hash || !ftrace_profile_enabled)
goto out;
@@ -901,7 +853,7 @@ static void profile_graph_return(struct ftrace_graph_ret *trace)
unsigned long flags;
local_irq_save(flags);
- stat = &__get_cpu_var(ftrace_profile_stats);
+ stat = this_cpu_ptr(&ftrace_profile_stats);
if (!stat->hash || !ftrace_profile_enabled)
goto out;
@@ -1088,19 +1040,6 @@ static __init void ftrace_profile_debugfs(struct dentry *d_tracer)
static struct pid * const ftrace_swapper_pid = &init_struct_pid;
-loff_t
-ftrace_filter_lseek(struct file *file, loff_t offset, int whence)
-{
- loff_t ret;
-
- if (file->f_mode & FMODE_READ)
- ret = seq_lseek(file, offset, whence);
- else
- file->f_pos = ret = 1;
-
- return ret;
-}
-
#ifdef CONFIG_DYNAMIC_FTRACE
#ifndef CONFIG_FTRACE_MCOUNT_RECORD
@@ -1157,8 +1096,6 @@ struct ftrace_page {
int size;
};
-static struct ftrace_page *ftrace_new_pgs;
-
#define ENTRY_SIZE sizeof(struct dyn_ftrace)
#define ENTRIES_PER_PAGE (PAGE_SIZE / ENTRY_SIZE)
@@ -1168,7 +1105,7 @@ static struct ftrace_page *ftrace_new_pgs;
static struct ftrace_page *ftrace_pages_start;
static struct ftrace_page *ftrace_pages;
-static bool ftrace_hash_empty(struct ftrace_hash *hash)
+static bool __always_inline ftrace_hash_empty(struct ftrace_hash *hash)
{
return !hash || !hash->count;
}
@@ -1545,7 +1482,7 @@ unsigned long ftrace_location(unsigned long ip)
* the function tracer. It checks the ftrace internal tables to
* determine if the address belongs or not.
*/
-int ftrace_text_reserved(void *start, void *end)
+int ftrace_text_reserved(const void *start, const void *end)
{
unsigned long ret;
@@ -1615,7 +1552,14 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
in_other_hash = !!ftrace_lookup_ip(other_hash, rec->ip);
/*
+ * If filter_hash is set, we want to match all functions
+ * that are in the hash but not in the other hash.
*
+ * If filter_hash is not set, then we are decrementing.
+ * That means we match anything that is in the hash
+ * and also in the other_hash. That is, we need to turn
+ * off functions in the other hash because they are disabled
+ * by this hash.
*/
if (filter_hash && in_hash && !in_other_hash)
match = 1;
@@ -1757,19 +1701,15 @@ static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update)
/*
* If this record is being updated from a nop, then
* return UPDATE_MAKE_CALL.
- * Otherwise, if the EN flag is set, then return
- * UPDATE_MODIFY_CALL_REGS to tell the caller to convert
- * from the non-save regs, to a save regs function.
* Otherwise,
* return UPDATE_MODIFY_CALL to tell the caller to convert
- * from the save regs, to a non-save regs function.
+ * from the save regs, to a non-save regs function or
+ * vice versa.
*/
if (flag & FTRACE_FL_ENABLED)
return FTRACE_UPDATE_MAKE_CALL;
- else if (rec->flags & FTRACE_FL_REGS_EN)
- return FTRACE_UPDATE_MODIFY_CALL_REGS;
- else
- return FTRACE_UPDATE_MODIFY_CALL;
+
+ return FTRACE_UPDATE_MODIFY_CALL;
}
if (update) {
@@ -1811,6 +1751,42 @@ int ftrace_test_record(struct dyn_ftrace *rec, int enable)
return ftrace_check_record(rec, enable, 0);
}
+/**
+ * ftrace_get_addr_new - Get the call address to set to
+ * @rec: The ftrace record descriptor
+ *
+ * If the record has the FTRACE_FL_REGS set, that means that it
+ * wants to convert to a callback that saves all regs. If FTRACE_FL_REGS
+ * is not not set, then it wants to convert to the normal callback.
+ *
+ * Returns the address of the trampoline to set to
+ */
+unsigned long ftrace_get_addr_new(struct dyn_ftrace *rec)
+{
+ if (rec->flags & FTRACE_FL_REGS)
+ return (unsigned long)FTRACE_REGS_ADDR;
+ else
+ return (unsigned long)FTRACE_ADDR;
+}
+
+/**
+ * ftrace_get_addr_curr - Get the call address that is already there
+ * @rec: The ftrace record descriptor
+ *
+ * The FTRACE_FL_REGS_EN is set when the record already points to
+ * a function that saves all the regs. Basically the '_EN' version
+ * represents the current state of the function.
+ *
+ * Returns the address of the trampoline that is currently being called
+ */
+unsigned long ftrace_get_addr_curr(struct dyn_ftrace *rec)
+{
+ if (rec->flags & FTRACE_FL_REGS_EN)
+ return (unsigned long)FTRACE_REGS_ADDR;
+ else
+ return (unsigned long)FTRACE_ADDR;
+}
+
static int
__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
{
@@ -1818,12 +1794,12 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
unsigned long ftrace_addr;
int ret;
- ret = ftrace_update_record(rec, enable);
+ ftrace_addr = ftrace_get_addr_new(rec);
- if (rec->flags & FTRACE_FL_REGS)
- ftrace_addr = (unsigned long)FTRACE_REGS_ADDR;
- else
- ftrace_addr = (unsigned long)FTRACE_ADDR;
+ /* This needs to be done before we call ftrace_update_record */
+ ftrace_old_addr = ftrace_get_addr_curr(rec);
+
+ ret = ftrace_update_record(rec, enable);
switch (ret) {
case FTRACE_UPDATE_IGNORE:
@@ -1835,13 +1811,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
case FTRACE_UPDATE_MAKE_NOP:
return ftrace_make_nop(NULL, rec, ftrace_addr);
- case FTRACE_UPDATE_MODIFY_CALL_REGS:
case FTRACE_UPDATE_MODIFY_CALL:
- if (rec->flags & FTRACE_FL_REGS)
- ftrace_old_addr = (unsigned long)FTRACE_ADDR;
- else
- ftrace_old_addr = (unsigned long)FTRACE_REGS_ADDR;
-
return ftrace_modify_call(rec, ftrace_old_addr, ftrace_addr);
}
@@ -1979,6 +1949,7 @@ int __weak ftrace_arch_code_modify_post_process(void)
void ftrace_modify_all_code(int command)
{
int update = command & FTRACE_UPDATE_TRACE_FUNC;
+ int err = 0;
/*
* If the ftrace_caller calls a ftrace_ops func directly,
@@ -1990,21 +1961,33 @@ void ftrace_modify_all_code(int command)
* to make sure the ops are having the right functions
* traced.
*/
- if (update)
- ftrace_update_ftrace_func(ftrace_ops_list_func);
+ if (update) {
+ err = ftrace_update_ftrace_func(ftrace_ops_list_func);
+ if (FTRACE_WARN_ON(err))
+ return;
+ }
if (command & FTRACE_UPDATE_CALLS)
ftrace_replace_code(1);
else if (command & FTRACE_DISABLE_CALLS)
ftrace_replace_code(0);
- if (update && ftrace_trace_function != ftrace_ops_list_func)
- ftrace_update_ftrace_func(ftrace_trace_function);
+ if (update && ftrace_trace_function != ftrace_ops_list_func) {
+ function_trace_op = set_function_trace_op;
+ smp_wmb();
+ /* If irqs are disabled, we are in stop machine */
+ if (!irqs_disabled())
+ smp_call_function(ftrace_sync_ipi, NULL, 1);
+ err = ftrace_update_ftrace_func(ftrace_trace_function);
+ if (FTRACE_WARN_ON(err))
+ return;
+ }
if (command & FTRACE_START_FUNC_RET)
- ftrace_enable_ftrace_graph_caller();
+ err = ftrace_enable_ftrace_graph_caller();
else if (command & FTRACE_STOP_FUNC_RET)
- ftrace_disable_ftrace_graph_caller();
+ err = ftrace_disable_ftrace_graph_caller();
+ FTRACE_WARN_ON(err);
}
static int __ftrace_modify_code(void *data)
@@ -2072,6 +2055,11 @@ static ftrace_func_t saved_ftrace_func;
static int ftrace_start_up;
static int global_start_up;
+static void control_ops_free(struct ftrace_ops *ops)
+{
+ free_percpu(ops->disabled);
+}
+
static void ftrace_startup_enable(int command)
{
if (saved_ftrace_func != ftrace_trace_function) {
@@ -2087,38 +2075,37 @@ static void ftrace_startup_enable(int command)
static int ftrace_startup(struct ftrace_ops *ops, int command)
{
- bool hash_enable = true;
+ int ret;
if (unlikely(ftrace_disabled))
return -ENODEV;
+ ret = __register_ftrace_function(ops);
+ if (ret)
+ return ret;
+
ftrace_start_up++;
command |= FTRACE_UPDATE_CALLS;
- /* ops marked global share the filter hashes */
- if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
- ops = &global_ops;
- /* Don't update hash if global is already set */
- if (global_start_up)
- hash_enable = false;
- global_start_up++;
- }
-
ops->flags |= FTRACE_OPS_FL_ENABLED;
- if (hash_enable)
- ftrace_hash_rec_enable(ops, 1);
+
+ ftrace_hash_rec_enable(ops, 1);
ftrace_startup_enable(command);
return 0;
}
-static void ftrace_shutdown(struct ftrace_ops *ops, int command)
+static int ftrace_shutdown(struct ftrace_ops *ops, int command)
{
- bool hash_disable = true;
+ int ret;
if (unlikely(ftrace_disabled))
- return;
+ return -ENODEV;
+
+ ret = __unregister_ftrace_function(ops);
+ if (ret)
+ return ret;
ftrace_start_up--;
/*
@@ -2128,21 +2115,9 @@ static void ftrace_shutdown(struct ftrace_ops *ops, int command)
*/
WARN_ON_ONCE(ftrace_start_up < 0);
- if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
- ops = &global_ops;
- global_start_up--;
- WARN_ON_ONCE(global_start_up < 0);
- /* Don't update hash if global still has users */
- if (global_start_up) {
- WARN_ON_ONCE(!ftrace_start_up);
- hash_disable = false;
- }
- }
-
- if (hash_disable)
- ftrace_hash_rec_disable(ops, 1);
+ ftrace_hash_rec_disable(ops, 1);
- if (ops != &global_ops || !global_start_up)
+ if (!global_start_up)
ops->flags &= ~FTRACE_OPS_FL_ENABLED;
command |= FTRACE_UPDATE_CALLS;
@@ -2152,10 +2127,42 @@ static void ftrace_shutdown(struct ftrace_ops *ops, int command)
command |= FTRACE_UPDATE_TRACE_FUNC;
}
- if (!command || !ftrace_enabled)
- return;
+ if (!command || !ftrace_enabled) {
+ /*
+ * If these are control ops, they still need their
+ * per_cpu field freed. Since, function tracing is
+ * not currently active, we can just free them
+ * without synchronizing all CPUs.
+ */
+ if (ops->flags & FTRACE_OPS_FL_CONTROL)
+ control_ops_free(ops);
+ return 0;
+ }
ftrace_run_update_code(command);
+
+ /*
+ * Dynamic ops may be freed, we must make sure that all
+ * callers are done before leaving this function.
+ * The same goes for freeing the per_cpu data of the control
+ * ops.
+ *
+ * Again, normal synchronize_sched() is not good enough.
+ * We need to do a hard force of sched synchronization.
+ * This is because we use preempt_disable() to do RCU, but
+ * the function tracers can be called where RCU is not watching
+ * (like before user_exit()). We can not rely on the RCU
+ * infrastructure to do the synchronization, thus we must do it
+ * ourselves.
+ */
+ if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_CONTROL)) {
+ schedule_on_each_cpu(ftrace_sync);
+
+ if (ops->flags & FTRACE_OPS_FL_CONTROL)
+ control_ops_free(ops);
+ }
+
+ return 0;
}
static void ftrace_startup_sysctl(void)
@@ -2181,7 +2188,6 @@ static void ftrace_shutdown_sysctl(void)
}
static cycle_t ftrace_update_time;
-static unsigned long ftrace_update_cnt;
unsigned long ftrace_update_tot_cnt;
static inline int ops_traces_mod(struct ftrace_ops *ops)
@@ -2237,11 +2243,12 @@ static int referenced_filters(struct dyn_ftrace *rec)
return cnt;
}
-static int ftrace_update_code(struct module *mod)
+static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs)
{
struct ftrace_page *pg;
struct dyn_ftrace *p;
cycle_t start, stop;
+ unsigned long update_cnt = 0;
unsigned long ref = 0;
bool test = false;
int i;
@@ -2267,9 +2274,8 @@ static int ftrace_update_code(struct module *mod)
}
start = ftrace_now(raw_smp_processor_id());
- ftrace_update_cnt = 0;
- for (pg = ftrace_new_pgs; pg; pg = pg->next) {
+ for (pg = new_pgs; pg; pg = pg->next) {
for (i = 0; i < pg->index; i++) {
int cnt = ref;
@@ -2290,7 +2296,7 @@ static int ftrace_update_code(struct module *mod)
if (!ftrace_code_disable(mod, p))
break;
- ftrace_update_cnt++;
+ update_cnt++;
/*
* If the tracing is enabled, go ahead and enable the record.
@@ -2309,11 +2315,9 @@ static int ftrace_update_code(struct module *mod)
}
}
- ftrace_new_pgs = NULL;
-
stop = ftrace_now(raw_smp_processor_id());
ftrace_update_time = stop - start;
- ftrace_update_tot_cnt += ftrace_update_cnt;
+ ftrace_update_tot_cnt += update_cnt;
return 0;
}
@@ -2405,22 +2409,6 @@ ftrace_allocate_pages(unsigned long num_to_init)
return NULL;
}
-static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
-{
- int cnt;
-
- if (!num_to_init) {
- pr_info("ftrace: No functions to be traced?\n");
- return -1;
- }
-
- cnt = num_to_init / ENTRIES_PER_PAGE;
- pr_info("ftrace: allocating %ld entries in %d pages\n",
- num_to_init, cnt + 1);
-
- return 0;
-}
-
#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */
struct ftrace_iterator {
@@ -2734,7 +2722,7 @@ static void ftrace_filter_reset(struct ftrace_hash *hash)
* routine, you can use ftrace_filter_write() for the write
* routine if @flag has FTRACE_ITER_FILTER set, or
* ftrace_notrace_write() if @flag has FTRACE_ITER_NOTRACE set.
- * ftrace_filter_lseek() should be used as the lseek routine, and
+ * tracing_lseek() should be used as the lseek routine, and
* release must call ftrace_regex_release().
*/
int
@@ -2808,7 +2796,9 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag,
static int
ftrace_filter_open(struct inode *inode, struct file *file)
{
- return ftrace_regex_open(&global_ops,
+ struct ftrace_ops *ops = inode->i_private;
+
+ return ftrace_regex_open(ops,
FTRACE_ITER_FILTER | FTRACE_ITER_DO_HASH,
inode, file);
}
@@ -2816,7 +2806,9 @@ ftrace_filter_open(struct inode *inode, struct file *file)
static int
ftrace_notrace_open(struct inode *inode, struct file *file)
{
- return ftrace_regex_open(&global_ops, FTRACE_ITER_NOTRACE,
+ struct ftrace_ops *ops = inode->i_private;
+
+ return ftrace_regex_open(ops, FTRACE_ITER_NOTRACE,
inode, file);
}
@@ -3060,16 +3052,13 @@ static void __enable_ftrace_function_probe(void)
if (i == FTRACE_FUNC_HASHSIZE)
return;
- ret = __register_ftrace_function(&trace_probe_ops);
- if (!ret)
- ret = ftrace_startup(&trace_probe_ops, 0);
+ ret = ftrace_startup(&trace_probe_ops, 0);
ftrace_probe_registered = 1;
}
static void __disable_ftrace_function_probe(void)
{
- int ret;
int i;
if (!ftrace_probe_registered)
@@ -3082,9 +3071,7 @@ static void __disable_ftrace_function_probe(void)
}
/* no more funcs left */
- ret = __unregister_ftrace_function(&trace_probe_ops);
- if (!ret)
- ftrace_shutdown(&trace_probe_ops, 0);
+ ftrace_shutdown(&trace_probe_ops, 0);
ftrace_probe_registered = 0;
}
@@ -3307,7 +3294,11 @@ void unregister_ftrace_function_probe_all(char *glob)
static LIST_HEAD(ftrace_commands);
static DEFINE_MUTEX(ftrace_cmd_mutex);
-int register_ftrace_command(struct ftrace_func_command *cmd)
+/*
+ * Currently we only register ftrace commands from __init, so mark this
+ * __init too.
+ */
+__init int register_ftrace_command(struct ftrace_func_command *cmd)
{
struct ftrace_func_command *p;
int ret = 0;
@@ -3326,7 +3317,11 @@ int register_ftrace_command(struct ftrace_func_command *cmd)
return ret;
}
-int unregister_ftrace_command(struct ftrace_func_command *cmd)
+/*
+ * Currently we only unregister ftrace commands from __init, so mark
+ * this __init too.
+ */
+__init int unregister_ftrace_command(struct ftrace_func_command *cmd)
{
struct ftrace_func_command *p, *n;
int ret = -ENODEV;
@@ -3466,10 +3461,6 @@ ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len,
struct ftrace_hash *hash;
int ret;
- /* All global ops uses the global ops filters */
- if (ops->flags & FTRACE_OPS_FL_GLOBAL)
- ops = &global_ops;
-
if (unlikely(ftrace_disabled))
return -ENODEV;
@@ -3581,8 +3572,7 @@ int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf,
}
EXPORT_SYMBOL_GPL(ftrace_set_notrace);
/**
- * ftrace_set_filter - set a function to filter on in ftrace
- * @ops - the ops to set the filter with
+ * ftrace_set_global_filter - set a function to filter on with global tracers
* @buf - the string that holds the function filter text.
* @len - the length of the string.
* @reset - non zero to reset all filters before applying this filter.
@@ -3597,8 +3587,7 @@ void ftrace_set_global_filter(unsigned char *buf, int len, int reset)
EXPORT_SYMBOL_GPL(ftrace_set_global_filter);
/**
- * ftrace_set_notrace - set a function to not trace in ftrace
- * @ops - the ops to set the notrace filter with
+ * ftrace_set_global_notrace - set a function to not trace with global tracers
* @buf - the string that holds the function notrace text.
* @len - the length of the string.
* @reset - non zero to reset all filters before applying this filter.
@@ -3641,7 +3630,7 @@ __setup("ftrace_filter=", set_ftrace_filter);
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata;
-static int ftrace_set_func(unsigned long *array, int *idx, char *buffer);
+static int ftrace_set_func(unsigned long *array, int *idx, int size, char *buffer);
static int __init set_graph_function(char *str)
{
@@ -3659,7 +3648,7 @@ static void __init set_ftrace_early_graph(char *buf)
func = strsep(&buf, ",");
/* we allow only one expression at a time */
ret = ftrace_set_func(ftrace_graph_funcs, &ftrace_graph_count,
- func);
+ FTRACE_GRAPH_MAX_FUNCS, func);
if (ret)
printk(KERN_DEBUG "ftrace: function %s not "
"traceable\n", func);
@@ -3759,7 +3748,7 @@ static const struct file_operations ftrace_filter_fops = {
.open = ftrace_filter_open,
.read = seq_read,
.write = ftrace_filter_write,
- .llseek = ftrace_filter_lseek,
+ .llseek = tracing_lseek,
.release = ftrace_regex_release,
};
@@ -3767,7 +3756,7 @@ static const struct file_operations ftrace_notrace_fops = {
.open = ftrace_notrace_open,
.read = seq_read,
.write = ftrace_notrace_write,
- .llseek = ftrace_filter_lseek,
+ .llseek = tracing_lseek,
.release = ftrace_regex_release,
};
@@ -3776,15 +3765,25 @@ static const struct file_operations ftrace_notrace_fops = {
static DEFINE_MUTEX(graph_lock);
int ftrace_graph_count;
-int ftrace_graph_filter_enabled;
+int ftrace_graph_notrace_count;
unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly;
+unsigned long ftrace_graph_notrace_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly;
+
+struct ftrace_graph_data {
+ unsigned long *table;
+ size_t size;
+ int *count;
+ const struct seq_operations *seq_ops;
+};
static void *
__g_next(struct seq_file *m, loff_t *pos)
{
- if (*pos >= ftrace_graph_count)
+ struct ftrace_graph_data *fgd = m->private;
+
+ if (*pos >= *fgd->count)
return NULL;
- return &ftrace_graph_funcs[*pos];
+ return &fgd->table[*pos];
}
static void *
@@ -3796,10 +3795,12 @@ g_next(struct seq_file *m, void *v, loff_t *pos)
static void *g_start(struct seq_file *m, loff_t *pos)
{
+ struct ftrace_graph_data *fgd = m->private;
+
mutex_lock(&graph_lock);
/* Nothing, tell g_show to print all functions are enabled */
- if (!ftrace_graph_filter_enabled && !*pos)
+ if (!*fgd->count && !*pos)
return (void *)1;
return __g_next(m, pos);
@@ -3835,38 +3836,88 @@ static const struct seq_operations ftrace_graph_seq_ops = {
};
static int
-ftrace_graph_open(struct inode *inode, struct file *file)
+__ftrace_graph_open(struct inode *inode, struct file *file,
+ struct ftrace_graph_data *fgd)
{
int ret = 0;
- if (unlikely(ftrace_disabled))
- return -ENODEV;
-
mutex_lock(&graph_lock);
if ((file->f_mode & FMODE_WRITE) &&
(file->f_flags & O_TRUNC)) {
- ftrace_graph_filter_enabled = 0;
- ftrace_graph_count = 0;
- memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs));
+ *fgd->count = 0;
+ memset(fgd->table, 0, fgd->size * sizeof(*fgd->table));
}
mutex_unlock(&graph_lock);
- if (file->f_mode & FMODE_READ)
- ret = seq_open(file, &ftrace_graph_seq_ops);
+ if (file->f_mode & FMODE_READ) {
+ ret = seq_open(file, fgd->seq_ops);
+ if (!ret) {
+ struct seq_file *m = file->private_data;
+ m->private = fgd;
+ }
+ } else
+ file->private_data = fgd;
return ret;
}
static int
+ftrace_graph_open(struct inode *inode, struct file *file)
+{
+ struct ftrace_graph_data *fgd;
+
+ if (unlikely(ftrace_disabled))
+ return -ENODEV;
+
+ fgd = kmalloc(sizeof(*fgd), GFP_KERNEL);
+ if (fgd == NULL)
+ return -ENOMEM;
+
+ fgd->table = ftrace_graph_funcs;
+ fgd->size = FTRACE_GRAPH_MAX_FUNCS;
+ fgd->count = &ftrace_graph_count;
+ fgd->seq_ops = &ftrace_graph_seq_ops;
+
+ return __ftrace_graph_open(inode, file, fgd);
+}
+
+static int
+ftrace_graph_notrace_open(struct inode *inode, struct file *file)
+{
+ struct ftrace_graph_data *fgd;
+
+ if (unlikely(ftrace_disabled))
+ return -ENODEV;
+
+ fgd = kmalloc(sizeof(*fgd), GFP_KERNEL);
+ if (fgd == NULL)
+ return -ENOMEM;
+
+ fgd->table = ftrace_graph_notrace_funcs;
+ fgd->size = FTRACE_GRAPH_MAX_FUNCS;
+ fgd->count = &ftrace_graph_notrace_count;
+ fgd->seq_ops = &ftrace_graph_seq_ops;
+
+ return __ftrace_graph_open(inode, file, fgd);
+}
+
+static int
ftrace_graph_release(struct inode *inode, struct file *file)
{
- if (file->f_mode & FMODE_READ)
+ if (file->f_mode & FMODE_READ) {
+ struct seq_file *m = file->private_data;
+
+ kfree(m->private);
seq_release(inode, file);
+ } else {
+ kfree(file->private_data);
+ }
+
return 0;
}
static int
-ftrace_set_func(unsigned long *array, int *idx, char *buffer)
+ftrace_set_func(unsigned long *array, int *idx, int size, char *buffer)
{
struct dyn_ftrace *rec;
struct ftrace_page *pg;
@@ -3879,7 +3930,7 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
/* decode regex */
type = filter_parse_regex(buffer, strlen(buffer), &search, &not);
- if (!not && *idx >= FTRACE_GRAPH_MAX_FUNCS)
+ if (!not && *idx >= size)
return -EBUSY;
search_len = strlen(search);
@@ -3907,7 +3958,7 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
fail = 0;
if (!exists) {
array[(*idx)++] = rec->ip;
- if (*idx >= FTRACE_GRAPH_MAX_FUNCS)
+ if (*idx >= size)
goto out;
}
} else {
@@ -3925,8 +3976,6 @@ out:
if (fail)
return -EINVAL;
- ftrace_graph_filter_enabled = !!(*idx);
-
return 0;
}
@@ -3935,36 +3984,33 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
struct trace_parser parser;
- ssize_t read, ret;
+ ssize_t read, ret = 0;
+ struct ftrace_graph_data *fgd = file->private_data;
if (!cnt)
return 0;
- mutex_lock(&graph_lock);
-
- if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) {
- ret = -ENOMEM;
- goto out_unlock;
- }
+ if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX))
+ return -ENOMEM;
read = trace_get_user(&parser, ubuf, cnt, ppos);
if (read >= 0 && trace_parser_loaded((&parser))) {
parser.buffer[parser.idx] = 0;
+ mutex_lock(&graph_lock);
+
/* we allow only one expression at a time */
- ret = ftrace_set_func(ftrace_graph_funcs, &ftrace_graph_count,
- parser.buffer);
- if (ret)
- goto out_free;
+ ret = ftrace_set_func(fgd->table, fgd->count, fgd->size,
+ parser.buffer);
+
+ mutex_unlock(&graph_lock);
}
- ret = read;
+ if (!ret)
+ ret = read;
-out_free:
trace_parser_put(&parser);
-out_unlock:
- mutex_unlock(&graph_lock);
return ret;
}
@@ -3973,11 +4019,49 @@ static const struct file_operations ftrace_graph_fops = {
.open = ftrace_graph_open,
.read = seq_read,
.write = ftrace_graph_write,
- .llseek = ftrace_filter_lseek,
+ .llseek = tracing_lseek,
+ .release = ftrace_graph_release,
+};
+
+static const struct file_operations ftrace_graph_notrace_fops = {
+ .open = ftrace_graph_notrace_open,
+ .read = seq_read,
+ .write = ftrace_graph_write,
+ .llseek = tracing_lseek,
.release = ftrace_graph_release,
};
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+void ftrace_create_filter_files(struct ftrace_ops *ops,
+ struct dentry *parent)
+{
+
+ trace_create_file("set_ftrace_filter", 0644, parent,
+ ops, &ftrace_filter_fops);
+
+ trace_create_file("set_ftrace_notrace", 0644, parent,
+ ops, &ftrace_notrace_fops);
+}
+
+/*
+ * The name "destroy_filter_files" is really a misnomer. Although
+ * in the future, it may actualy delete the files, but this is
+ * really intended to make sure the ops passed in are disabled
+ * and that when this function returns, the caller is free to
+ * free the ops.
+ *
+ * The "destroy" name is only to match the "create" name that this
+ * should be paired with.
+ */
+void ftrace_destroy_filter_files(struct ftrace_ops *ops)
+{
+ mutex_lock(&ftrace_lock);
+ if (ops->flags & FTRACE_OPS_FL_ENABLED)
+ ftrace_shutdown(ops, 0);
+ ops->flags |= FTRACE_OPS_FL_DELETED;
+ mutex_unlock(&ftrace_lock);
+}
+
static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
{
@@ -3987,16 +4071,15 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
trace_create_file("enabled_functions", 0444,
d_tracer, NULL, &ftrace_enabled_fops);
- trace_create_file("set_ftrace_filter", 0644, d_tracer,
- NULL, &ftrace_filter_fops);
-
- trace_create_file("set_ftrace_notrace", 0644, d_tracer,
- NULL, &ftrace_notrace_fops);
+ ftrace_create_filter_files(&global_ops, d_tracer);
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
trace_create_file("set_graph_function", 0444, d_tracer,
NULL,
&ftrace_graph_fops);
+ trace_create_file("set_graph_notrace", 0444, d_tracer,
+ NULL,
+ &ftrace_graph_notrace_fops);
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
return 0;
@@ -4104,9 +4187,6 @@ static int ftrace_process_locs(struct module *mod,
/* Assign the last page to ftrace_pages */
ftrace_pages = pg;
- /* These new locations need to be initialized */
- ftrace_new_pgs = start_pg;
-
/*
* We only need to disable interrupts on start up
* because we are modifying code that an interrupt
@@ -4117,7 +4197,7 @@ static int ftrace_process_locs(struct module *mod,
*/
if (!mod)
local_irq_save(flags);
- ftrace_update_code(mod);
+ ftrace_update_code(mod, start_pg);
if (!mod)
local_irq_restore(flags);
ret = 0;
@@ -4181,16 +4261,11 @@ static void ftrace_init_module(struct module *mod,
ftrace_process_locs(mod, start, end);
}
-static int ftrace_module_notify_enter(struct notifier_block *self,
- unsigned long val, void *data)
+void ftrace_module_init(struct module *mod)
{
- struct module *mod = data;
-
- if (val == MODULE_STATE_COMING)
- ftrace_init_module(mod, mod->ftrace_callsites,
- mod->ftrace_callsites +
- mod->num_ftrace_callsites);
- return 0;
+ ftrace_init_module(mod, mod->ftrace_callsites,
+ mod->ftrace_callsites +
+ mod->num_ftrace_callsites);
}
static int ftrace_module_notify_exit(struct notifier_block *self,
@@ -4204,11 +4279,6 @@ static int ftrace_module_notify_exit(struct notifier_block *self,
return 0;
}
#else
-static int ftrace_module_notify_enter(struct notifier_block *self,
- unsigned long val, void *data)
-{
- return 0;
-}
static int ftrace_module_notify_exit(struct notifier_block *self,
unsigned long val, void *data)
{
@@ -4216,40 +4286,32 @@ static int ftrace_module_notify_exit(struct notifier_block *self,
}
#endif /* CONFIG_MODULES */
-struct notifier_block ftrace_module_enter_nb = {
- .notifier_call = ftrace_module_notify_enter,
- .priority = INT_MAX, /* Run before anything that can use kprobes */
-};
-
struct notifier_block ftrace_module_exit_nb = {
.notifier_call = ftrace_module_notify_exit,
.priority = INT_MIN, /* Run after anything that can remove kprobes */
};
-extern unsigned long __start_mcount_loc[];
-extern unsigned long __stop_mcount_loc[];
-
void __init ftrace_init(void)
{
- unsigned long count, addr, flags;
+ extern unsigned long __start_mcount_loc[];
+ extern unsigned long __stop_mcount_loc[];
+ unsigned long count, flags;
int ret;
- /* Keep the ftrace pointer to the stub */
- addr = (unsigned long)ftrace_stub;
-
local_irq_save(flags);
- ftrace_dyn_arch_init(&addr);
+ ret = ftrace_dyn_arch_init();
local_irq_restore(flags);
-
- /* ftrace_dyn_arch_init places the return code in addr */
- if (addr)
+ if (ret)
goto failed;
count = __stop_mcount_loc - __start_mcount_loc;
-
- ret = ftrace_dyn_table_alloc(count);
- if (ret)
+ if (!count) {
+ pr_info("ftrace: No functions to be traced?\n");
goto failed;
+ }
+
+ pr_info("ftrace: allocating %ld entries in %ld pages\n",
+ count, count / ENTRIES_PER_PAGE + 1);
last_ftrace_enabled = ftrace_enabled = 1;
@@ -4257,10 +4319,6 @@ void __init ftrace_init(void)
__start_mcount_loc,
__stop_mcount_loc);
- ret = register_module_notifier(&ftrace_module_enter_nb);
- if (ret)
- pr_warning("Failed to register trace ftrace module enter notifier\n");
-
ret = register_module_notifier(&ftrace_module_exit_nb);
if (ret)
pr_warning("Failed to register trace ftrace module exit notifier\n");
@@ -4290,12 +4348,21 @@ core_initcall(ftrace_nodyn_init);
static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; }
static inline void ftrace_startup_enable(int command) { }
/* Keep as macros so we do not need to define the commands */
-# define ftrace_startup(ops, command) \
- ({ \
- (ops)->flags |= FTRACE_OPS_FL_ENABLED; \
- 0; \
+# define ftrace_startup(ops, command) \
+ ({ \
+ int ___ret = __register_ftrace_function(ops); \
+ if (!___ret) \
+ (ops)->flags |= FTRACE_OPS_FL_ENABLED; \
+ ___ret; \
})
-# define ftrace_shutdown(ops, command) do { } while (0)
+# define ftrace_shutdown(ops, command) \
+ ({ \
+ int ___ret = __unregister_ftrace_function(ops); \
+ if (!___ret) \
+ (ops)->flags &= ~FTRACE_OPS_FL_ENABLED; \
+ ___ret; \
+ })
+
# define ftrace_startup_sysctl() do { } while (0)
# define ftrace_shutdown_sysctl() do { } while (0)
@@ -4307,6 +4374,34 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs)
#endif /* CONFIG_DYNAMIC_FTRACE */
+__init void ftrace_init_global_array_ops(struct trace_array *tr)
+{
+ tr->ops = &global_ops;
+ tr->ops->private = tr;
+}
+
+void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func)
+{
+ /* If we filter on pids, update to use the pid function */
+ if (tr->flags & TRACE_ARRAY_FL_GLOBAL) {
+ if (WARN_ON(tr->ops->func != ftrace_stub))
+ printk("ftrace ops had %pS for function\n",
+ tr->ops->func);
+ /* Only the top level instance does pid tracing */
+ if (!list_empty(&ftrace_pids)) {
+ set_ftrace_pid_function(func);
+ func = ftrace_pid_func;
+ }
+ }
+ tr->ops->func = func;
+ tr->ops->private = tr;
+}
+
+void ftrace_reset_array_ops(struct trace_array *tr)
+{
+ tr->ops->func = ftrace_stub;
+}
+
static void
ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct pt_regs *regs)
@@ -4320,12 +4415,21 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip,
*/
preempt_disable_notrace();
trace_recursion_set(TRACE_CONTROL_BIT);
+
+ /*
+ * Control funcs (perf) uses RCU. Only trace if
+ * RCU is currently active.
+ */
+ if (!rcu_is_watching())
+ goto out;
+
do_for_each_ftrace_op(op, ftrace_control_list) {
if (!(op->flags & FTRACE_OPS_FL_STUB) &&
!ftrace_function_local_disabled(op) &&
ftrace_ops_test(op, ip, regs))
op->func(ip, parent_ip, op, regs);
} while_for_each_ftrace_op(op);
+ out:
trace_recursion_clear(TRACE_CONTROL_BIT);
preempt_enable_notrace();
}
@@ -4356,9 +4460,16 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
*/
preempt_disable_notrace();
do_for_each_ftrace_op(op, ftrace_ops_list) {
- if (ftrace_ops_test(op, ip, regs))
+ if (ftrace_ops_test(op, ip, regs)) {
+ if (WARN_ON(!op->func)) {
+ function_trace_stop = 1;
+ printk("op=%p %pS\n", op, op);
+ goto out;
+ }
op->func(ip, parent_ip, op, regs);
+ }
} while_for_each_ftrace_op(op);
+out:
preempt_enable_notrace();
trace_clear_recursion(bit);
}
@@ -4631,7 +4742,7 @@ static const struct file_operations ftrace_pid_fops = {
.open = ftrace_pid_open,
.write = ftrace_pid_write,
.read = seq_read,
- .llseek = ftrace_filter_lseek,
+ .llseek = tracing_lseek,
.release = ftrace_pid_release,
};
@@ -4695,9 +4806,7 @@ int register_ftrace_function(struct ftrace_ops *ops)
mutex_lock(&ftrace_lock);
- ret = __register_ftrace_function(ops);
- if (!ret)
- ret = ftrace_startup(ops, 0);
+ ret = ftrace_startup(ops, 0);
mutex_unlock(&ftrace_lock);
@@ -4716,9 +4825,7 @@ int unregister_ftrace_function(struct ftrace_ops *ops)
int ret;
mutex_lock(&ftrace_lock);
- ret = __unregister_ftrace_function(ops);
- if (!ret)
- ftrace_shutdown(ops, 0);
+ ret = ftrace_shutdown(ops, 0);
mutex_unlock(&ftrace_lock);
return ret;
@@ -4767,7 +4874,6 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
static int ftrace_graph_active;
-static struct notifier_block ftrace_suspend_notifier;
int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)
{
@@ -4778,6 +4884,7 @@ int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)
trace_func_graph_ret_t ftrace_graph_return =
(trace_func_graph_ret_t)ftrace_stub;
trace_func_graph_ent_t ftrace_graph_entry = ftrace_graph_entry_stub;
+static trace_func_graph_ent_t __ftrace_graph_entry = ftrace_graph_entry_stub;
/* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */
static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
@@ -4912,6 +5019,34 @@ ftrace_suspend_notifier_call(struct notifier_block *bl, unsigned long state,
return NOTIFY_DONE;
}
+static int ftrace_graph_entry_test(struct ftrace_graph_ent *trace)
+{
+ if (!ftrace_ops_test(&global_ops, trace->func, NULL))
+ return 0;
+ return __ftrace_graph_entry(trace);
+}
+
+/*
+ * The function graph tracer should only trace the functions defined
+ * by set_ftrace_filter and set_ftrace_notrace. If another function
+ * tracer ops is registered, the graph tracer requires testing the
+ * function against the global ops, and not just trace any function
+ * that any ftrace_ops registered.
+ */
+static void update_function_graph_func(void)
+{
+ if (ftrace_ops_list == &ftrace_list_end ||
+ (ftrace_ops_list == &global_ops &&
+ global_ops.next == &ftrace_list_end))
+ ftrace_graph_entry = __ftrace_graph_entry;
+ else
+ ftrace_graph_entry = ftrace_graph_entry_test;
+}
+
+static struct notifier_block ftrace_suspend_notifier = {
+ .notifier_call = ftrace_suspend_notifier_call,
+};
+
int register_ftrace_graph(trace_func_graph_ret_t retfunc,
trace_func_graph_ent_t entryfunc)
{
@@ -4925,7 +5060,6 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
goto out;
}
- ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call;
register_pm_notifier(&ftrace_suspend_notifier);
ftrace_graph_active++;
@@ -4936,7 +5070,19 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
}
ftrace_graph_return = retfunc;
- ftrace_graph_entry = entryfunc;
+
+ /*
+ * Update the indirect function to the entryfunc, and the
+ * function that gets called to the entry_test first. Then
+ * call the update fgraph entry function to determine if
+ * the entryfunc should be called directly or not.
+ */
+ __ftrace_graph_entry = entryfunc;
+ ftrace_graph_entry = ftrace_graph_entry_test;
+ update_function_graph_func();
+
+ /* Function graph doesn't use the .func field of global_ops */
+ global_ops.flags |= FTRACE_OPS_FL_STUB;
ret = ftrace_startup(&global_ops, FTRACE_START_FUNC_RET);
@@ -4955,7 +5101,9 @@ void unregister_ftrace_graph(void)
ftrace_graph_active--;
ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
ftrace_graph_entry = ftrace_graph_entry_stub;
+ __ftrace_graph_entry = ftrace_graph_entry_stub;
ftrace_shutdown(&global_ops, FTRACE_STOP_FUNC_RET);
+ global_ops.flags &= ~FTRACE_OPS_FL_STUB;
unregister_pm_notifier(&ftrace_suspend_notifier);
unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index cc2f66f68dc..ff7027199a9 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -543,7 +543,7 @@ static void rb_wake_up_waiters(struct irq_work *work)
* as data is added to any of the @buffer's cpu buffers. Otherwise
* it will wait for data to be added to a specific cpu buffer.
*/
-void ring_buffer_wait(struct ring_buffer *buffer, int cpu)
+int ring_buffer_wait(struct ring_buffer *buffer, int cpu)
{
struct ring_buffer_per_cpu *cpu_buffer;
DEFINE_WAIT(wait);
@@ -557,6 +557,8 @@ void ring_buffer_wait(struct ring_buffer *buffer, int cpu)
if (cpu == RING_BUFFER_ALL_CPUS)
work = &buffer->irq_work;
else {
+ if (!cpumask_test_cpu(cpu, buffer->cpumask))
+ return -ENODEV;
cpu_buffer = buffer->buffers[cpu];
work = &cpu_buffer->irq_work;
}
@@ -591,6 +593,7 @@ void ring_buffer_wait(struct ring_buffer *buffer, int cpu)
schedule();
finish_wait(&work->waiters, &wait);
+ return 0;
}
/**
@@ -613,10 +616,6 @@ int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
struct ring_buffer_per_cpu *cpu_buffer;
struct rb_irq_work *work;
- if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) ||
- (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu)))
- return POLLIN | POLLRDNORM;
-
if (cpu == RING_BUFFER_ALL_CPUS)
work = &buffer->irq_work;
else {
@@ -1301,7 +1300,7 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
* In that off case, we need to allocate for all possible cpus.
*/
#ifdef CONFIG_HOTPLUG_CPU
- get_online_cpus();
+ cpu_notifier_register_begin();
cpumask_copy(buffer->cpumask, cpu_online_mask);
#else
cpumask_copy(buffer->cpumask, cpu_possible_mask);
@@ -1324,10 +1323,10 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
#ifdef CONFIG_HOTPLUG_CPU
buffer->cpu_notify.notifier_call = rb_cpu_notify;
buffer->cpu_notify.priority = 0;
- register_cpu_notifier(&buffer->cpu_notify);
+ __register_cpu_notifier(&buffer->cpu_notify);
+ cpu_notifier_register_done();
#endif
- put_online_cpus();
mutex_init(&buffer->mutex);
return buffer;
@@ -1341,7 +1340,9 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
fail_free_cpumask:
free_cpumask_var(buffer->cpumask);
- put_online_cpus();
+#ifdef CONFIG_HOTPLUG_CPU
+ cpu_notifier_register_done();
+#endif
fail_free_buffer:
kfree(buffer);
@@ -1358,16 +1359,17 @@ ring_buffer_free(struct ring_buffer *buffer)
{
int cpu;
- get_online_cpus();
-
#ifdef CONFIG_HOTPLUG_CPU
- unregister_cpu_notifier(&buffer->cpu_notify);
+ cpu_notifier_register_begin();
+ __unregister_cpu_notifier(&buffer->cpu_notify);
#endif
for_each_buffer_cpu(buffer, cpu)
rb_free_cpu_buffer(buffer->buffers[cpu]);
- put_online_cpus();
+#ifdef CONFIG_HOTPLUG_CPU
+ cpu_notifier_register_done();
+#endif
kfree(buffer->buffers);
free_cpumask_var(buffer->cpumask);
@@ -2397,6 +2399,13 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
write &= RB_WRITE_MASK;
tail = write - length;
+ /*
+ * If this is the first commit on the page, then it has the same
+ * timestamp as the page itself.
+ */
+ if (!tail)
+ delta = 0;
+
/* See if we shot pass the end of this buffer page */
if (unlikely(write > BUF_PAGE_SIZE))
return rb_move_tail(cpu_buffer, length, tail,
@@ -2558,7 +2567,7 @@ rb_reserve_next_event(struct ring_buffer *buffer,
if (unlikely(test_time_stamp(delta))) {
int local_clock_stable = 1;
#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
- local_clock_stable = sched_clock_stable;
+ local_clock_stable = sched_clock_stable();
#endif
WARN_ONCE(delta > (1ULL << 59),
KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s",
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
index a5457d577b9..0434ff1b808 100644
--- a/kernel/trace/ring_buffer_benchmark.c
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -40,8 +40,8 @@ static int write_iteration = 50;
module_param(write_iteration, uint, 0644);
MODULE_PARM_DESC(write_iteration, "# of writes between timestamp readings");
-static int producer_nice = 19;
-static int consumer_nice = 19;
+static int producer_nice = MAX_NICE;
+static int consumer_nice = MAX_NICE;
static int producer_fifo = -1;
static int consumer_fifo = -1;
@@ -308,7 +308,7 @@ static void ring_buffer_producer(void)
/* Let the user know that the test is running at low priority */
if (producer_fifo < 0 && consumer_fifo < 0 &&
- producer_nice == 19 && consumer_nice == 19)
+ producer_nice == MAX_NICE && consumer_nice == MAX_NICE)
trace_printk("WARNING!!! This test is running at lowest priority.\n");
trace_printk("Time: %lld (usecs)\n", time);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 7974ba20557..291397e6666 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -73,7 +73,8 @@ static struct tracer_flags dummy_tracer_flags = {
.opts = dummy_tracer_opt
};
-static int dummy_set_flag(u32 old_flags, u32 bit, int set)
+static int
+dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
{
return 0;
}
@@ -118,7 +119,7 @@ enum ftrace_dump_mode ftrace_dump_on_oops;
/* When set, tracing will stop when a WARN*() is hit */
int __disable_trace_on_warning;
-static int tracing_set_tracer(const char *buf);
+static int tracing_set_tracer(struct trace_array *tr, const char *buf);
#define MAX_TRACER_SIZE 100
static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
@@ -180,6 +181,17 @@ static int __init set_trace_boot_options(char *str)
}
__setup("trace_options=", set_trace_boot_options);
+static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
+static char *trace_boot_clock __initdata;
+
+static int __init set_trace_boot_clock(char *str)
+{
+ strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
+ trace_boot_clock = trace_boot_clock_buf;
+ return 0;
+}
+__setup("trace_clock=", set_trace_boot_clock);
+
unsigned long long ns2usecs(cycle_t nsec)
{
@@ -235,15 +247,35 @@ void trace_array_put(struct trace_array *this_tr)
mutex_unlock(&trace_types_lock);
}
-int filter_current_check_discard(struct ring_buffer *buffer,
- struct ftrace_event_call *call, void *rec,
- struct ring_buffer_event *event)
+int filter_check_discard(struct ftrace_event_file *file, void *rec,
+ struct ring_buffer *buffer,
+ struct ring_buffer_event *event)
{
- return filter_check_discard(call, rec, buffer, event);
+ if (unlikely(file->flags & FTRACE_EVENT_FL_FILTERED) &&
+ !filter_match_preds(file->filter, rec)) {
+ ring_buffer_discard_commit(buffer, event);
+ return 1;
+ }
+
+ return 0;
}
-EXPORT_SYMBOL_GPL(filter_current_check_discard);
+EXPORT_SYMBOL_GPL(filter_check_discard);
-cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
+int call_filter_check_discard(struct ftrace_event_call *call, void *rec,
+ struct ring_buffer *buffer,
+ struct ring_buffer_event *event)
+{
+ if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
+ !filter_match_preds(call->filter, rec)) {
+ ring_buffer_discard_commit(buffer, event);
+ return 1;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(call_filter_check_discard);
+
+static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
{
u64 ts;
@@ -434,13 +466,22 @@ int __trace_puts(unsigned long ip, const char *str, int size)
struct print_entry *entry;
unsigned long irq_flags;
int alloc;
+ int pc;
+
+ if (!(trace_flags & TRACE_ITER_PRINTK))
+ return 0;
+
+ pc = preempt_count();
+
+ if (unlikely(tracing_selftest_running || tracing_disabled))
+ return 0;
alloc = sizeof(*entry) + size + 2; /* possible \n added */
local_save_flags(irq_flags);
buffer = global_trace.trace_buffer.buffer;
event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
- irq_flags, preempt_count());
+ irq_flags, pc);
if (!event)
return 0;
@@ -457,6 +498,7 @@ int __trace_puts(unsigned long ip, const char *str, int size)
entry->buf[size] = '\0';
__buffer_unlock_commit(buffer, event);
+ ftrace_trace_stack(buffer, irq_flags, 4, pc);
return size;
}
@@ -474,11 +516,20 @@ int __trace_bputs(unsigned long ip, const char *str)
struct bputs_entry *entry;
unsigned long irq_flags;
int size = sizeof(struct bputs_entry);
+ int pc;
+
+ if (!(trace_flags & TRACE_ITER_PRINTK))
+ return 0;
+
+ pc = preempt_count();
+
+ if (unlikely(tracing_selftest_running || tracing_disabled))
+ return 0;
local_save_flags(irq_flags);
buffer = global_trace.trace_buffer.buffer;
event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
- irq_flags, preempt_count());
+ irq_flags, pc);
if (!event)
return 0;
@@ -487,6 +538,7 @@ int __trace_bputs(unsigned long ip, const char *str)
entry->str = str;
__buffer_unlock_commit(buffer, event);
+ ftrace_trace_stack(buffer, irq_flags, 4, pc);
return 1;
}
@@ -561,7 +613,7 @@ static int alloc_snapshot(struct trace_array *tr)
return 0;
}
-void free_snapshot(struct trace_array *tr)
+static void free_snapshot(struct trace_array *tr)
{
/*
* We don't free the ring buffer. instead, resize it because
@@ -575,6 +627,28 @@ void free_snapshot(struct trace_array *tr)
}
/**
+ * tracing_alloc_snapshot - allocate snapshot buffer.
+ *
+ * This only allocates the snapshot buffer if it isn't already
+ * allocated - it doesn't also take a snapshot.
+ *
+ * This is meant to be used in cases where the snapshot buffer needs
+ * to be set up for events that can't sleep but need to be able to
+ * trigger a snapshot.
+ */
+int tracing_alloc_snapshot(void)
+{
+ struct trace_array *tr = &global_trace;
+ int ret;
+
+ ret = alloc_snapshot(tr);
+ WARN_ON(ret < 0);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
+
+/**
* trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
*
* This is similar to trace_snapshot(), but it will allocate the
@@ -587,11 +661,10 @@ void free_snapshot(struct trace_array *tr)
*/
void tracing_snapshot_alloc(void)
{
- struct trace_array *tr = &global_trace;
int ret;
- ret = alloc_snapshot(tr);
- if (WARN_ON(ret < 0))
+ ret = tracing_alloc_snapshot();
+ if (ret < 0)
return;
tracing_snapshot();
@@ -603,6 +676,12 @@ void tracing_snapshot(void)
WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
}
EXPORT_SYMBOL_GPL(tracing_snapshot);
+int tracing_alloc_snapshot(void)
+{
+ WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
+ return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
void tracing_snapshot_alloc(void)
{
/* Give warning */
@@ -744,7 +823,7 @@ static struct {
{ trace_clock_local, "local", 1 },
{ trace_clock_global, "global", 1 },
{ trace_clock_counter, "counter", 0 },
- { trace_clock_jiffies, "uptime", 1 },
+ { trace_clock_jiffies, "uptime", 0 },
{ trace_clock, "perf", 1 },
ARCH_TRACE_CLOCKS
};
@@ -843,9 +922,12 @@ int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
if (isspace(ch)) {
parser->buffer[parser->idx] = 0;
parser->cont = false;
- } else {
+ } else if (parser->idx < parser->size - 1) {
parser->cont = true;
parser->buffer[parser->idx++] = ch;
+ } else {
+ ret = -EINVAL;
+ goto out;
}
*ppos += read;
@@ -895,27 +977,9 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
return cnt;
}
-/*
- * ftrace_max_lock is used to protect the swapping of buffers
- * when taking a max snapshot. The buffers themselves are
- * protected by per_cpu spinlocks. But the action of the swap
- * needs its own lock.
- *
- * This is defined as a arch_spinlock_t in order to help
- * with performance when lockdep debugging is enabled.
- *
- * It is also used in other places outside the update_max_tr
- * so it needs to be defined outside of the
- * CONFIG_TRACER_MAX_TRACE.
- */
-static arch_spinlock_t ftrace_max_lock =
- (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
-
unsigned long __read_mostly tracing_thresh;
#ifdef CONFIG_TRACER_MAX_TRACE
-unsigned long __read_mostly tracing_max_latency;
-
/*
* Copy the new maximum trace into the separate maximum-trace
* structure. (this way the maximum trace is permanently saved,
@@ -932,7 +996,7 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
max_buf->cpu = cpu;
max_buf->time_start = data->preempt_timestamp;
- max_data->saved_latency = tracing_max_latency;
+ max_data->saved_latency = tr->max_latency;
max_data->critical_start = data->critical_start;
max_data->critical_end = data->critical_end;
@@ -980,14 +1044,14 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
return;
}
- arch_spin_lock(&ftrace_max_lock);
+ arch_spin_lock(&tr->max_lock);
buf = tr->trace_buffer.buffer;
tr->trace_buffer.buffer = tr->max_buffer.buffer;
tr->max_buffer.buffer = buf;
__update_max_tr(tr, tsk, cpu);
- arch_spin_unlock(&ftrace_max_lock);
+ arch_spin_unlock(&tr->max_lock);
}
/**
@@ -1013,7 +1077,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
return;
}
- arch_spin_lock(&ftrace_max_lock);
+ arch_spin_lock(&tr->max_lock);
ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
@@ -1031,17 +1095,17 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
__update_max_tr(tr, tsk, cpu);
- arch_spin_unlock(&ftrace_max_lock);
+ arch_spin_unlock(&tr->max_lock);
}
#endif /* CONFIG_TRACER_MAX_TRACE */
-static void default_wait_pipe(struct trace_iterator *iter)
+static int wait_on_pipe(struct trace_iterator *iter)
{
/* Iterators are static, they should be filled or empty */
if (trace_buffer_iter(iter, iter->cpu_file))
- return;
+ return 0;
- ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file);
+ return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file);
}
#ifdef CONFIG_FTRACE_STARTUP_TEST
@@ -1152,8 +1216,6 @@ int register_tracer(struct tracer *type)
else
if (!type->flags->opts)
type->flags->opts = dummy_tracer_opt;
- if (!type->wait_pipe)
- type->wait_pipe = default_wait_pipe;
ret = run_tracer_selftest(type);
if (ret < 0)
@@ -1174,7 +1236,7 @@ int register_tracer(struct tracer *type)
printk(KERN_INFO "Starting tracer '%s'\n", type->name);
/* Do we want this tracer to start on bootup? */
- tracing_set_tracer(type->name);
+ tracing_set_tracer(&global_trace, type->name);
default_bootup_tracer = NULL;
/* disable other selftests, since this will break it. */
tracing_selftest_disabled = true;
@@ -1237,42 +1299,76 @@ void tracing_reset_all_online_cpus(void)
}
}
-#define SAVED_CMDLINES 128
+#define SAVED_CMDLINES_DEFAULT 128
#define NO_CMDLINE_MAP UINT_MAX
-static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
-static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
-static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
-static int cmdline_idx;
static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
+struct saved_cmdlines_buffer {
+ unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
+ unsigned *map_cmdline_to_pid;
+ unsigned cmdline_num;
+ int cmdline_idx;
+ char *saved_cmdlines;
+};
+static struct saved_cmdlines_buffer *savedcmd;
/* temporary disable recording */
static atomic_t trace_record_cmdline_disabled __read_mostly;
-static void trace_init_cmdlines(void)
+static inline char *get_saved_cmdlines(int idx)
{
- memset(&map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(map_pid_to_cmdline));
- memset(&map_cmdline_to_pid, NO_CMDLINE_MAP, sizeof(map_cmdline_to_pid));
- cmdline_idx = 0;
+ return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
}
-int is_tracing_stopped(void)
+static inline void set_cmdline(int idx, const char *cmdline)
{
- return global_trace.stop_count;
+ memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
}
-/**
- * ftrace_off_permanent - disable all ftrace code permanently
- *
- * This should only be called when a serious anomally has
- * been detected. This will turn off the function tracing,
- * ring buffers, and other tracing utilites. It takes no
- * locks and can be called from any context.
- */
-void ftrace_off_permanent(void)
+static int allocate_cmdlines_buffer(unsigned int val,
+ struct saved_cmdlines_buffer *s)
{
- tracing_disabled = 1;
- ftrace_stop();
- tracing_off_permanent();
+ s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
+ GFP_KERNEL);
+ if (!s->map_cmdline_to_pid)
+ return -ENOMEM;
+
+ s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
+ if (!s->saved_cmdlines) {
+ kfree(s->map_cmdline_to_pid);
+ return -ENOMEM;
+ }
+
+ s->cmdline_idx = 0;
+ s->cmdline_num = val;
+ memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
+ sizeof(s->map_pid_to_cmdline));
+ memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
+ val * sizeof(*s->map_cmdline_to_pid));
+
+ return 0;
+}
+
+static int trace_create_savedcmd(void)
+{
+ int ret;
+
+ savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
+ if (!savedcmd)
+ return -ENOMEM;
+
+ ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
+ if (ret < 0) {
+ kfree(savedcmd);
+ savedcmd = NULL;
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+int is_tracing_stopped(void)
+{
+ return global_trace.stop_count;
}
/**
@@ -1300,7 +1396,7 @@ void tracing_start(void)
}
/* Prevent the buffers from switching */
- arch_spin_lock(&ftrace_max_lock);
+ arch_spin_lock(&global_trace.max_lock);
buffer = global_trace.trace_buffer.buffer;
if (buffer)
@@ -1312,9 +1408,8 @@ void tracing_start(void)
ring_buffer_record_enable(buffer);
#endif
- arch_spin_unlock(&ftrace_max_lock);
+ arch_spin_unlock(&global_trace.max_lock);
- ftrace_start();
out:
raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
}
@@ -1361,13 +1456,12 @@ void tracing_stop(void)
struct ring_buffer *buffer;
unsigned long flags;
- ftrace_stop();
raw_spin_lock_irqsave(&global_trace.start_lock, flags);
if (global_trace.stop_count++)
goto out;
/* Prevent the buffers from switching */
- arch_spin_lock(&ftrace_max_lock);
+ arch_spin_lock(&global_trace.max_lock);
buffer = global_trace.trace_buffer.buffer;
if (buffer)
@@ -1379,7 +1473,7 @@ void tracing_stop(void)
ring_buffer_record_disable(buffer);
#endif
- arch_spin_unlock(&ftrace_max_lock);
+ arch_spin_unlock(&global_trace.max_lock);
out:
raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
@@ -1408,12 +1502,12 @@ static void tracing_stop_tr(struct trace_array *tr)
void trace_stop_cmdline_recording(void);
-static void trace_save_cmdline(struct task_struct *tsk)
+static int trace_save_cmdline(struct task_struct *tsk)
{
unsigned pid, idx;
if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
- return;
+ return 0;
/*
* It's not the end of the world if we don't get
@@ -1422,11 +1516,11 @@ static void trace_save_cmdline(struct task_struct *tsk)
* so if we miss here, then better luck next time.
*/
if (!arch_spin_trylock(&trace_cmdline_lock))
- return;
+ return 0;
- idx = map_pid_to_cmdline[tsk->pid];
+ idx = savedcmd->map_pid_to_cmdline[tsk->pid];
if (idx == NO_CMDLINE_MAP) {
- idx = (cmdline_idx + 1) % SAVED_CMDLINES;
+ idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
/*
* Check whether the cmdline buffer at idx has a pid
@@ -1434,22 +1528,24 @@ static void trace_save_cmdline(struct task_struct *tsk)
* need to clear the map_pid_to_cmdline. Otherwise we
* would read the new comm for the old pid.
*/
- pid = map_cmdline_to_pid[idx];
+ pid = savedcmd->map_cmdline_to_pid[idx];
if (pid != NO_CMDLINE_MAP)
- map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
+ savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
- map_cmdline_to_pid[idx] = tsk->pid;
- map_pid_to_cmdline[tsk->pid] = idx;
+ savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
+ savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
- cmdline_idx = idx;
+ savedcmd->cmdline_idx = idx;
}
- memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
+ set_cmdline(idx, tsk->comm);
arch_spin_unlock(&trace_cmdline_lock);
+
+ return 1;
}
-void trace_find_cmdline(int pid, char comm[])
+static void __trace_find_cmdline(int pid, char comm[])
{
unsigned map;
@@ -1468,13 +1564,19 @@ void trace_find_cmdline(int pid, char comm[])
return;
}
- preempt_disable();
- arch_spin_lock(&trace_cmdline_lock);
- map = map_pid_to_cmdline[pid];
+ map = savedcmd->map_pid_to_cmdline[pid];
if (map != NO_CMDLINE_MAP)
- strcpy(comm, saved_cmdlines[map]);
+ strcpy(comm, get_saved_cmdlines(map));
else
strcpy(comm, "<...>");
+}
+
+void trace_find_cmdline(int pid, char comm[])
+{
+ preempt_disable();
+ arch_spin_lock(&trace_cmdline_lock);
+
+ __trace_find_cmdline(pid, comm);
arch_spin_unlock(&trace_cmdline_lock);
preempt_enable();
@@ -1488,9 +1590,8 @@ void tracing_record_cmdline(struct task_struct *tsk)
if (!__this_cpu_read(trace_cmdline_save))
return;
- __this_cpu_write(trace_cmdline_save, false);
-
- trace_save_cmdline(tsk);
+ if (trace_save_cmdline(tsk))
+ __this_cpu_write(trace_cmdline_save, false);
}
void
@@ -1509,7 +1610,8 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
#endif
((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
- (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
+ (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
+ (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
}
EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
@@ -1558,15 +1660,31 @@ void trace_buffer_unlock_commit(struct ring_buffer *buffer,
}
EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
+static struct ring_buffer *temp_buffer;
+
struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
struct ftrace_event_file *ftrace_file,
int type, unsigned long len,
unsigned long flags, int pc)
{
+ struct ring_buffer_event *entry;
+
*current_rb = ftrace_file->tr->trace_buffer.buffer;
- return trace_buffer_lock_reserve(*current_rb,
+ entry = trace_buffer_lock_reserve(*current_rb,
type, len, flags, pc);
+ /*
+ * If tracing is off, but we have triggers enabled
+ * we still need to look at the event data. Use the temp_buffer
+ * to store the trace event for the tigger to use. It's recusive
+ * safe and will not be recorded anywhere.
+ */
+ if (!entry && ftrace_file->flags & FTRACE_EVENT_FL_TRIGGER_COND) {
+ *current_rb = temp_buffer;
+ entry = trace_buffer_lock_reserve(*current_rb,
+ type, len, flags, pc);
+ }
+ return entry;
}
EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
@@ -1630,7 +1748,7 @@ trace_function(struct trace_array *tr,
entry->ip = ip;
entry->parent_ip = parent_ip;
- if (!filter_check_discard(call, entry, buffer, event))
+ if (!call_filter_check_discard(call, entry, buffer, event))
__buffer_unlock_commit(buffer, event);
}
@@ -1676,7 +1794,7 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
*/
barrier();
if (use_stack == 1) {
- trace.entries = &__get_cpu_var(ftrace_stack).calls[0];
+ trace.entries = this_cpu_ptr(ftrace_stack.calls);
trace.max_entries = FTRACE_STACK_MAX_ENTRIES;
if (regs)
@@ -1714,7 +1832,7 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
entry->size = trace.nr_entries;
- if (!filter_check_discard(call, entry, buffer, event))
+ if (!call_filter_check_discard(call, entry, buffer, event))
__buffer_unlock_commit(buffer, event);
out:
@@ -1816,7 +1934,7 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
trace.entries = entry->caller;
save_stack_trace_user(&trace);
- if (!filter_check_discard(call, entry, buffer, event))
+ if (!call_filter_check_discard(call, entry, buffer, event))
__buffer_unlock_commit(buffer, event);
out_drop_count:
@@ -1925,7 +2043,21 @@ void trace_printk_init_buffers(void)
if (alloc_percpu_trace_buffer())
return;
- pr_info("ftrace: Allocated trace_printk buffers\n");
+ /* trace_printk() is for debug use only. Don't use it in production. */
+
+ pr_warning("\n**********************************************************\n");
+ pr_warning("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
+ pr_warning("** **\n");
+ pr_warning("** trace_printk() being used. Allocating extra memory. **\n");
+ pr_warning("** **\n");
+ pr_warning("** This means that this is a DEBUG kernel and it is **\n");
+ pr_warning("** unsafe for produciton use. **\n");
+ pr_warning("** **\n");
+ pr_warning("** If you see this message and you are not debugging **\n");
+ pr_warning("** the kernel, report this immediately to your vendor! **\n");
+ pr_warning("** **\n");
+ pr_warning("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
+ pr_warning("**********************************************************\n");
/* Expand the buffers to set size */
tracing_update_buffers();
@@ -2008,7 +2140,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
entry->fmt = fmt;
memcpy(entry->buf, tbuffer, sizeof(u32) * len);
- if (!filter_check_discard(call, entry, buffer, event)) {
+ if (!call_filter_check_discard(call, entry, buffer, event)) {
__buffer_unlock_commit(buffer, event);
ftrace_trace_stack(buffer, flags, 6, pc);
}
@@ -2063,7 +2195,7 @@ __trace_array_vprintk(struct ring_buffer *buffer,
memcpy(&entry->buf, tbuffer, len);
entry->buf[len] = '\0';
- if (!filter_check_discard(call, entry, buffer, event)) {
+ if (!call_filter_check_discard(call, entry, buffer, event)) {
__buffer_unlock_commit(buffer, event);
ftrace_trace_stack(buffer, flags, 6, pc);
}
@@ -2760,7 +2892,7 @@ static void show_snapshot_main_help(struct seq_file *m)
seq_printf(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n");
seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
seq_printf(m, "# Takes a snapshot of the main buffer.\n");
- seq_printf(m, "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate)\n");
+ seq_printf(m, "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n");
seq_printf(m, "# (Doesn't have to be '2' works with any number that\n");
seq_printf(m, "# is not a '0' or '1')\n");
}
@@ -2964,6 +3096,11 @@ int tracing_open_generic(struct inode *inode, struct file *filp)
return 0;
}
+bool tracing_is_disabled(void)
+{
+ return (tracing_disabled) ? true: false;
+}
+
/*
* Open and update trace_array ref count.
* Must have the current trace_array passed to it.
@@ -3074,27 +3211,52 @@ static int tracing_open(struct inode *inode, struct file *file)
return ret;
}
+/*
+ * Some tracers are not suitable for instance buffers.
+ * A tracer is always available for the global array (toplevel)
+ * or if it explicitly states that it is.
+ */
+static bool
+trace_ok_for_array(struct tracer *t, struct trace_array *tr)
+{
+ return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
+}
+
+/* Find the next tracer that this trace array may use */
+static struct tracer *
+get_tracer_for_array(struct trace_array *tr, struct tracer *t)
+{
+ while (t && !trace_ok_for_array(t, tr))
+ t = t->next;
+
+ return t;
+}
+
static void *
t_next(struct seq_file *m, void *v, loff_t *pos)
{
+ struct trace_array *tr = m->private;
struct tracer *t = v;
(*pos)++;
if (t)
- t = t->next;
+ t = get_tracer_for_array(tr, t->next);
return t;
}
static void *t_start(struct seq_file *m, loff_t *pos)
{
+ struct trace_array *tr = m->private;
struct tracer *t;
loff_t l = 0;
mutex_lock(&trace_types_lock);
- for (t = trace_types; t && l < *pos; t = t_next(m, t, &l))
- ;
+
+ t = get_tracer_for_array(tr, trace_types);
+ for (; t && l < *pos; t = t_next(m, t, &l))
+ ;
return t;
}
@@ -3129,10 +3291,21 @@ static const struct seq_operations show_traces_seq_ops = {
static int show_traces_open(struct inode *inode, struct file *file)
{
+ struct trace_array *tr = inode->i_private;
+ struct seq_file *m;
+ int ret;
+
if (tracing_disabled)
return -ENODEV;
- return seq_open(file, &show_traces_seq_ops);
+ ret = seq_open(file, &show_traces_seq_ops);
+ if (ret)
+ return ret;
+
+ m = file->private_data;
+ m->private = tr;
+
+ return 0;
}
static ssize_t
@@ -3142,19 +3315,23 @@ tracing_write_stub(struct file *filp, const char __user *ubuf,
return count;
}
-static loff_t tracing_seek(struct file *file, loff_t offset, int origin)
+loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
{
+ int ret;
+
if (file->f_mode & FMODE_READ)
- return seq_lseek(file, offset, origin);
+ ret = seq_lseek(file, offset, whence);
else
- return 0;
+ file->f_pos = ret = 0;
+
+ return ret;
}
static const struct file_operations tracing_fops = {
.open = tracing_open,
.read = seq_read,
.write = tracing_write_stub,
- .llseek = tracing_seek,
+ .llseek = tracing_lseek,
.release = tracing_release,
};
@@ -3218,7 +3395,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
mutex_lock(&tracing_cpumask_update_lock);
local_irq_disable();
- arch_spin_lock(&ftrace_max_lock);
+ arch_spin_lock(&tr->max_lock);
for_each_tracing_cpu(cpu) {
/*
* Increase/decrease the disabled counter if we are
@@ -3235,7 +3412,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
}
}
- arch_spin_unlock(&ftrace_max_lock);
+ arch_spin_unlock(&tr->max_lock);
local_irq_enable();
cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
@@ -3288,13 +3465,14 @@ static int tracing_trace_options_show(struct seq_file *m, void *v)
return 0;
}
-static int __set_tracer_option(struct tracer *trace,
+static int __set_tracer_option(struct trace_array *tr,
struct tracer_flags *tracer_flags,
struct tracer_opt *opts, int neg)
{
+ struct tracer *trace = tr->current_trace;
int ret;
- ret = trace->set_flag(tracer_flags->val, opts->bit, !neg);
+ ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
if (ret)
return ret;
@@ -3306,8 +3484,9 @@ static int __set_tracer_option(struct tracer *trace,
}
/* Try to assign a tracer specific option */
-static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
+static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
{
+ struct tracer *trace = tr->current_trace;
struct tracer_flags *tracer_flags = trace->flags;
struct tracer_opt *opts = NULL;
int i;
@@ -3316,8 +3495,7 @@ static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
opts = &tracer_flags->opts[i];
if (strcmp(cmp, opts->name) == 0)
- return __set_tracer_option(trace, trace->flags,
- opts, neg);
+ return __set_tracer_option(tr, trace->flags, opts, neg);
}
return -EINVAL;
@@ -3340,7 +3518,7 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
/* Give the tracer a chance to approve the change */
if (tr->current_trace->flag_changed)
- if (tr->current_trace->flag_changed(tr->current_trace, mask, !!enabled))
+ if (tr->current_trace->flag_changed(tr, mask, !!enabled))
return -EINVAL;
if (enabled)
@@ -3389,7 +3567,7 @@ static int trace_set_options(struct trace_array *tr, char *option)
/* If no option could be set, test the specific tracer options */
if (!trace_options[i])
- ret = set_tracer_option(tr->current_trace, cmp, neg);
+ ret = set_tracer_option(tr, cmp, neg);
mutex_unlock(&trace_types_lock);
@@ -3474,60 +3652,106 @@ static const char readme_msg[] =
" instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
"\t\t\t Remove sub-buffer with rmdir\n"
" trace_options\t\t- Set format or modify how tracing happens\n"
- "\t\t\t Disable an option by adding a suffix 'no' to the option name\n"
+ "\t\t\t Disable an option by adding a suffix 'no' to the\n"
+ "\t\t\t option name\n"
+ " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
#ifdef CONFIG_DYNAMIC_FTRACE
"\n available_filter_functions - list of functions that can be filtered on\n"
- " set_ftrace_filter\t- echo function name in here to only trace these functions\n"
- " accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
- " modules: Can select a group via module\n"
- " Format: :mod:<module-name>\n"
- " example: echo :mod:ext3 > set_ftrace_filter\n"
- " triggers: a command to perform when function is hit\n"
- " Format: <function>:<trigger>[:count]\n"
- " trigger: traceon, traceoff\n"
- " enable_event:<system>:<event>\n"
- " disable_event:<system>:<event>\n"
+ " set_ftrace_filter\t- echo function name in here to only trace these\n"
+ "\t\t\t functions\n"
+ "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
+ "\t modules: Can select a group via module\n"
+ "\t Format: :mod:<module-name>\n"
+ "\t example: echo :mod:ext3 > set_ftrace_filter\n"
+ "\t triggers: a command to perform when function is hit\n"
+ "\t Format: <function>:<trigger>[:count]\n"
+ "\t trigger: traceon, traceoff\n"
+ "\t\t enable_event:<system>:<event>\n"
+ "\t\t disable_event:<system>:<event>\n"
#ifdef CONFIG_STACKTRACE
- " stacktrace\n"
+ "\t\t stacktrace\n"
#endif
#ifdef CONFIG_TRACER_SNAPSHOT
- " snapshot\n"
+ "\t\t snapshot\n"
#endif
- " example: echo do_fault:traceoff > set_ftrace_filter\n"
- " echo do_trap:traceoff:3 > set_ftrace_filter\n"
- " The first one will disable tracing every time do_fault is hit\n"
- " The second will disable tracing at most 3 times when do_trap is hit\n"
- " The first time do trap is hit and it disables tracing, the counter\n"
- " will decrement to 2. If tracing is already disabled, the counter\n"
- " will not decrement. It only decrements when the trigger did work\n"
- " To remove trigger without count:\n"
- " echo '!<function>:<trigger> > set_ftrace_filter\n"
- " To remove trigger with a count:\n"
- " echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
+ "\t\t dump\n"
+ "\t\t cpudump\n"
+ "\t example: echo do_fault:traceoff > set_ftrace_filter\n"
+ "\t echo do_trap:traceoff:3 > set_ftrace_filter\n"
+ "\t The first one will disable tracing every time do_fault is hit\n"
+ "\t The second will disable tracing at most 3 times when do_trap is hit\n"
+ "\t The first time do trap is hit and it disables tracing, the\n"
+ "\t counter will decrement to 2. If tracing is already disabled,\n"
+ "\t the counter will not decrement. It only decrements when the\n"
+ "\t trigger did work\n"
+ "\t To remove trigger without count:\n"
+ "\t echo '!<function>:<trigger> > set_ftrace_filter\n"
+ "\t To remove trigger with a count:\n"
+ "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
" set_ftrace_notrace\t- echo function name in here to never trace.\n"
- " accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
- " modules: Can select a group via module command :mod:\n"
- " Does not accept triggers\n"
+ "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
+ "\t modules: Can select a group via module command :mod:\n"
+ "\t Does not accept triggers\n"
#endif /* CONFIG_DYNAMIC_FTRACE */
#ifdef CONFIG_FUNCTION_TRACER
- " set_ftrace_pid\t- Write pid(s) to only function trace those pids (function)\n"
+ " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
+ "\t\t (function)\n"
#endif
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
" set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
" max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
#endif
#ifdef CONFIG_TRACER_SNAPSHOT
- "\n snapshot\t\t- Like 'trace' but shows the content of the static snapshot buffer\n"
- "\t\t\t Read the contents for more information\n"
+ "\n snapshot\t\t- Like 'trace' but shows the content of the static\n"
+ "\t\t\t snapshot buffer. Read the contents for more\n"
+ "\t\t\t information\n"
#endif
#ifdef CONFIG_STACK_TRACER
" stack_trace\t\t- Shows the max stack trace when active\n"
" stack_max_size\t- Shows current max stack size that was traced\n"
- "\t\t\t Write into this file to reset the max size (trigger a new trace)\n"
+ "\t\t\t Write into this file to reset the max size (trigger a\n"
+ "\t\t\t new trace)\n"
#ifdef CONFIG_DYNAMIC_FTRACE
- " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace traces\n"
+ " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
+ "\t\t\t traces\n"
#endif
#endif /* CONFIG_STACK_TRACER */
+ " events/\t\t- Directory containing all trace event subsystems:\n"
+ " enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
+ " events/<system>/\t- Directory containing all trace events for <system>:\n"
+ " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
+ "\t\t\t events\n"
+ " filter\t\t- If set, only events passing filter are traced\n"
+ " events/<system>/<event>/\t- Directory containing control files for\n"
+ "\t\t\t <event>:\n"
+ " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
+ " filter\t\t- If set, only events passing filter are traced\n"
+ " trigger\t\t- If set, a command to perform when event is hit\n"
+ "\t Format: <trigger>[:count][if <filter>]\n"
+ "\t trigger: traceon, traceoff\n"
+ "\t enable_event:<system>:<event>\n"
+ "\t disable_event:<system>:<event>\n"
+#ifdef CONFIG_STACKTRACE
+ "\t\t stacktrace\n"
+#endif
+#ifdef CONFIG_TRACER_SNAPSHOT
+ "\t\t snapshot\n"
+#endif
+ "\t example: echo traceoff > events/block/block_unplug/trigger\n"
+ "\t echo traceoff:3 > events/block/block_unplug/trigger\n"
+ "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
+ "\t events/block/block_unplug/trigger\n"
+ "\t The first disables tracing every time block_unplug is hit.\n"
+ "\t The second disables tracing the first 3 times block_unplug is hit.\n"
+ "\t The third enables the kmalloc event the first 3 times block_unplug\n"
+ "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
+ "\t Like function triggers, the counter is only decremented if it\n"
+ "\t enabled or disabled tracing.\n"
+ "\t To remove a trigger without a count:\n"
+ "\t echo '!<trigger> > <system>/<event>/trigger\n"
+ "\t To remove a trigger with a count:\n"
+ "\t echo '!<trigger>:0 > <system>/<event>/trigger\n"
+ "\t Filters can be ignored when removing a trigger.\n"
;
static ssize_t
@@ -3544,55 +3768,153 @@ static const struct file_operations tracing_readme_fops = {
.llseek = generic_file_llseek,
};
+static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ unsigned int *ptr = v;
+
+ if (*pos || m->count)
+ ptr++;
+
+ (*pos)++;
+
+ for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
+ ptr++) {
+ if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
+ continue;
+
+ return ptr;
+ }
+
+ return NULL;
+}
+
+static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
+{
+ void *v;
+ loff_t l = 0;
+
+ preempt_disable();
+ arch_spin_lock(&trace_cmdline_lock);
+
+ v = &savedcmd->map_cmdline_to_pid[0];
+ while (l <= *pos) {
+ v = saved_cmdlines_next(m, v, &l);
+ if (!v)
+ return NULL;
+ }
+
+ return v;
+}
+
+static void saved_cmdlines_stop(struct seq_file *m, void *v)
+{
+ arch_spin_unlock(&trace_cmdline_lock);
+ preempt_enable();
+}
+
+static int saved_cmdlines_show(struct seq_file *m, void *v)
+{
+ char buf[TASK_COMM_LEN];
+ unsigned int *pid = v;
+
+ __trace_find_cmdline(*pid, buf);
+ seq_printf(m, "%d %s\n", *pid, buf);
+ return 0;
+}
+
+static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
+ .start = saved_cmdlines_start,
+ .next = saved_cmdlines_next,
+ .stop = saved_cmdlines_stop,
+ .show = saved_cmdlines_show,
+};
+
+static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
+{
+ if (tracing_disabled)
+ return -ENODEV;
+
+ return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
+}
+
+static const struct file_operations tracing_saved_cmdlines_fops = {
+ .open = tracing_saved_cmdlines_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
static ssize_t
-tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
- size_t cnt, loff_t *ppos)
+tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
+ size_t cnt, loff_t *ppos)
{
- char *buf_comm;
- char *file_buf;
- char *buf;
- int len = 0;
- int pid;
- int i;
+ char buf[64];
+ int r;
+
+ arch_spin_lock(&trace_cmdline_lock);
+ r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
+ arch_spin_unlock(&trace_cmdline_lock);
- file_buf = kmalloc(SAVED_CMDLINES*(16+TASK_COMM_LEN), GFP_KERNEL);
- if (!file_buf)
+ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
+{
+ kfree(s->saved_cmdlines);
+ kfree(s->map_cmdline_to_pid);
+ kfree(s);
+}
+
+static int tracing_resize_saved_cmdlines(unsigned int val)
+{
+ struct saved_cmdlines_buffer *s, *savedcmd_temp;
+
+ s = kmalloc(sizeof(*s), GFP_KERNEL);
+ if (!s)
return -ENOMEM;
- buf_comm = kmalloc(TASK_COMM_LEN, GFP_KERNEL);
- if (!buf_comm) {
- kfree(file_buf);
+ if (allocate_cmdlines_buffer(val, s) < 0) {
+ kfree(s);
return -ENOMEM;
}
- buf = file_buf;
+ arch_spin_lock(&trace_cmdline_lock);
+ savedcmd_temp = savedcmd;
+ savedcmd = s;
+ arch_spin_unlock(&trace_cmdline_lock);
+ free_saved_cmdlines_buffer(savedcmd_temp);
+
+ return 0;
+}
- for (i = 0; i < SAVED_CMDLINES; i++) {
- int r;
+static ssize_t
+tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ unsigned long val;
+ int ret;
- pid = map_cmdline_to_pid[i];
- if (pid == -1 || pid == NO_CMDLINE_MAP)
- continue;
+ ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
+ if (ret)
+ return ret;
- trace_find_cmdline(pid, buf_comm);
- r = sprintf(buf, "%d %s\n", pid, buf_comm);
- buf += r;
- len += r;
- }
+ /* must have at least 1 entry or less than PID_MAX_DEFAULT */
+ if (!val || val > PID_MAX_DEFAULT)
+ return -EINVAL;
- len = simple_read_from_buffer(ubuf, cnt, ppos,
- file_buf, len);
+ ret = tracing_resize_saved_cmdlines((unsigned int)val);
+ if (ret < 0)
+ return ret;
- kfree(file_buf);
- kfree(buf_comm);
+ *ppos += cnt;
- return len;
+ return cnt;
}
-static const struct file_operations tracing_saved_cmdlines_fops = {
- .open = tracing_open_generic,
- .read = tracing_saved_cmdlines_read,
- .llseek = generic_file_llseek,
+static const struct file_operations tracing_saved_cmdlines_size_fops = {
+ .open = tracing_open_generic,
+ .read = tracing_saved_cmdlines_size_read,
+ .write = tracing_saved_cmdlines_size_write,
};
static ssize_t
@@ -3775,10 +4097,26 @@ create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
static void
destroy_trace_option_files(struct trace_option_dentry *topts);
-static int tracing_set_tracer(const char *buf)
+/*
+ * Used to clear out the tracer before deletion of an instance.
+ * Must have trace_types_lock held.
+ */
+static void tracing_set_nop(struct trace_array *tr)
+{
+ if (tr->current_trace == &nop_trace)
+ return;
+
+ tr->current_trace->enabled--;
+
+ if (tr->current_trace->reset)
+ tr->current_trace->reset(tr);
+
+ tr->current_trace = &nop_trace;
+}
+
+static int tracing_set_tracer(struct trace_array *tr, const char *buf)
{
static struct trace_option_dentry *topts;
- struct trace_array *tr = &global_trace;
struct tracer *t;
#ifdef CONFIG_TRACER_MAX_TRACE
bool had_max_tr;
@@ -3806,9 +4144,15 @@ static int tracing_set_tracer(const char *buf)
if (t == tr->current_trace)
goto out;
+ /* Some tracers are only allowed for the top level buffer */
+ if (!trace_ok_for_array(t, tr)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
trace_branch_disable();
- tr->current_trace->enabled = false;
+ tr->current_trace->enabled--;
if (tr->current_trace->reset)
tr->current_trace->reset(tr);
@@ -3831,9 +4175,11 @@ static int tracing_set_tracer(const char *buf)
free_snapshot(tr);
}
#endif
- destroy_trace_option_files(topts);
-
- topts = create_trace_option_files(tr, t);
+ /* Currently, only the top instance has options */
+ if (tr->flags & TRACE_ARRAY_FL_GLOBAL) {
+ destroy_trace_option_files(topts);
+ topts = create_trace_option_files(tr, t);
+ }
#ifdef CONFIG_TRACER_MAX_TRACE
if (t->use_max_tr && !had_max_tr) {
@@ -3850,7 +4196,7 @@ static int tracing_set_tracer(const char *buf)
}
tr->current_trace = t;
- tr->current_trace->enabled = true;
+ tr->current_trace->enabled++;
trace_branch_enable(tr);
out:
mutex_unlock(&trace_types_lock);
@@ -3862,6 +4208,7 @@ static ssize_t
tracing_set_trace_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
+ struct trace_array *tr = filp->private_data;
char buf[MAX_TRACER_SIZE+1];
int i;
size_t ret;
@@ -3881,7 +4228,7 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
buf[i] = 0;
- err = tracing_set_tracer(buf);
+ err = tracing_set_tracer(tr, buf);
if (err)
return err;
@@ -4039,29 +4386,11 @@ tracing_poll_pipe(struct file *filp, poll_table *poll_table)
return trace_poll(iter, filp, poll_table);
}
-/*
- * This is a make-shift waitqueue.
- * A tracer might use this callback on some rare cases:
- *
- * 1) the current tracer might hold the runqueue lock when it wakes up
- * a reader, hence a deadlock (sched, function, and function graph tracers)
- * 2) the function tracers, trace all functions, we don't want
- * the overhead of calling wake_up and friends
- * (and tracing them too)
- *
- * Anyway, this is really very primitive wakeup.
- */
-void poll_wait_pipe(struct trace_iterator *iter)
-{
- set_current_state(TASK_INTERRUPTIBLE);
- /* sleep for 100 msecs, and try again. */
- schedule_timeout(HZ / 10);
-}
-
/* Must be called with trace_types_lock mutex held. */
static int tracing_wait_pipe(struct file *filp)
{
struct trace_iterator *iter = filp->private_data;
+ int ret;
while (trace_empty(iter)) {
@@ -4069,15 +4398,6 @@ static int tracing_wait_pipe(struct file *filp)
return -EAGAIN;
}
- mutex_unlock(&iter->mutex);
-
- iter->trace->wait_pipe(iter);
-
- mutex_lock(&iter->mutex);
-
- if (signal_pending(current))
- return -EINTR;
-
/*
* We block until we read something and tracing is disabled.
* We still block if tracing is disabled, but we have never
@@ -4089,6 +4409,18 @@ static int tracing_wait_pipe(struct file *filp)
*/
if (!tracing_is_on() && iter->pos)
break;
+
+ mutex_unlock(&iter->mutex);
+
+ ret = wait_on_pipe(iter);
+
+ mutex_lock(&iter->mutex);
+
+ if (ret)
+ return ret;
+
+ if (signal_pending(current))
+ return -EINTR;
}
return 1;
@@ -4198,12 +4530,6 @@ out:
return sret;
}
-static void tracing_pipe_buf_release(struct pipe_inode_info *pipe,
- struct pipe_buffer *buf)
-{
- __free_page(buf->page);
-}
-
static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
unsigned int idx)
{
@@ -4212,10 +4538,8 @@ static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
static const struct pipe_buf_operations tracing_pipe_buf_ops = {
.can_merge = 0,
- .map = generic_pipe_buf_map,
- .unmap = generic_pipe_buf_unmap,
.confirm = generic_pipe_buf_confirm,
- .release = tracing_pipe_buf_release,
+ .release = generic_pipe_buf_release,
.steal = generic_pipe_buf_steal,
.get = generic_pipe_buf_get,
};
@@ -4308,7 +4632,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
trace_access_lock(iter->cpu_file);
/* Fill as many pages as possible. */
- for (i = 0, rem = len; i < pipe->buffers && rem; i++) {
+ for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
spd.pages[i] = alloc_page(GFP_KERNEL);
if (!spd.pages[i])
break;
@@ -4595,25 +4919,10 @@ static int tracing_clock_show(struct seq_file *m, void *v)
return 0;
}
-static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
- size_t cnt, loff_t *fpos)
+static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
{
- struct seq_file *m = filp->private_data;
- struct trace_array *tr = m->private;
- char buf[64];
- const char *clockstr;
int i;
- if (cnt >= sizeof(buf))
- return -EINVAL;
-
- if (copy_from_user(&buf, ubuf, cnt))
- return -EFAULT;
-
- buf[cnt] = 0;
-
- clockstr = strstrip(buf);
-
for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
if (strcmp(trace_clocks[i].name, clockstr) == 0)
break;
@@ -4641,6 +4950,32 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
mutex_unlock(&trace_types_lock);
+ return 0;
+}
+
+static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *fpos)
+{
+ struct seq_file *m = filp->private_data;
+ struct trace_array *tr = m->private;
+ char buf[64];
+ const char *clockstr;
+ int ret;
+
+ if (cnt >= sizeof(buf))
+ return -EINVAL;
+
+ if (copy_from_user(&buf, ubuf, cnt))
+ return -EFAULT;
+
+ buf[cnt] = 0;
+
+ clockstr = strstrip(buf);
+
+ ret = tracing_set_clock(tr, clockstr);
+ if (ret)
+ return ret;
+
*fpos += cnt;
return cnt;
@@ -4899,7 +5234,7 @@ static const struct file_operations snapshot_fops = {
.open = tracing_snapshot_open,
.read = seq_read,
.write = tracing_snapshot_write,
- .llseek = tracing_seek,
+ .llseek = tracing_lseek,
.release = tracing_snapshot_release,
};
@@ -5008,8 +5343,12 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
goto out_unlock;
}
mutex_unlock(&trace_types_lock);
- iter->trace->wait_pipe(iter);
+ ret = wait_on_pipe(iter);
mutex_lock(&trace_types_lock);
+ if (ret) {
+ size = ret;
+ goto out_unlock;
+ }
if (signal_pending(current)) {
size = -EINTR;
goto out_unlock;
@@ -5090,8 +5429,6 @@ static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
/* Pipe buffer operations for a buffer. */
static const struct pipe_buf_operations buffer_pipe_buf_ops = {
.can_merge = 0,
- .map = generic_pipe_buf_map,
- .unmap = generic_pipe_buf_unmap,
.confirm = generic_pipe_buf_confirm,
.release = buffer_pipe_buf_release,
.steal = generic_pipe_buf_steal,
@@ -5167,7 +5504,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
trace_access_lock(iter->cpu_file);
entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
- for (i = 0; i < pipe->buffers && len && entries; i++, len -= PAGE_SIZE) {
+ for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
struct page *page;
int r;
@@ -5221,8 +5558,10 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
goto out;
}
mutex_unlock(&trace_types_lock);
- iter->trace->wait_pipe(iter);
+ ret = wait_on_pipe(iter);
mutex_lock(&trace_types_lock);
+ if (ret)
+ goto out;
if (signal_pending(current)) {
ret = -EINTR;
goto out;
@@ -5454,12 +5793,12 @@ static struct ftrace_func_command ftrace_snapshot_cmd = {
.func = ftrace_trace_snapshot_callback,
};
-static int register_snapshot_cmd(void)
+static __init int register_snapshot_cmd(void)
{
return register_ftrace_command(&ftrace_snapshot_cmd);
}
#else
-static inline int register_snapshot_cmd(void) { return 0; }
+static inline __init int register_snapshot_cmd(void) { return 0; }
#endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
struct dentry *tracing_init_dentry_tr(struct trace_array *tr)
@@ -5601,7 +5940,7 @@ trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
if (!!(topt->flags->val & topt->opt->bit) != val) {
mutex_lock(&trace_types_lock);
- ret = __set_tracer_option(topt->tr->current_trace, topt->flags,
+ ret = __set_tracer_option(topt->tr, topt->flags,
topt->opt, !val);
mutex_unlock(&trace_types_lock);
if (ret)
@@ -5869,6 +6208,8 @@ allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size
rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
+ buf->tr = tr;
+
buf->buffer = ring_buffer_alloc(size, rb_flags);
if (!buf->buffer)
return -ENOMEM;
@@ -5913,6 +6254,28 @@ static int allocate_trace_buffers(struct trace_array *tr, int size)
return 0;
}
+static void free_trace_buffer(struct trace_buffer *buf)
+{
+ if (buf->buffer) {
+ ring_buffer_free(buf->buffer);
+ buf->buffer = NULL;
+ free_percpu(buf->data);
+ buf->data = NULL;
+ }
+}
+
+static void free_trace_buffers(struct trace_array *tr)
+{
+ if (!tr)
+ return;
+
+ free_trace_buffer(&tr->trace_buffer);
+
+#ifdef CONFIG_TRACER_MAX_TRACE
+ free_trace_buffer(&tr->max_buffer);
+#endif
+}
+
static int new_instance_create(const char *name)
{
struct trace_array *tr;
@@ -5942,6 +6305,8 @@ static int new_instance_create(const char *name)
raw_spin_lock_init(&tr->start_lock);
+ tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
+
tr->current_trace = &nop_trace;
INIT_LIST_HEAD(&tr->systems);
@@ -5969,8 +6334,7 @@ static int new_instance_create(const char *name)
return 0;
out_free_tr:
- if (tr->trace_buffer.buffer)
- ring_buffer_free(tr->trace_buffer.buffer);
+ free_trace_buffers(tr);
free_cpumask_var(tr->tracing_cpumask);
kfree(tr->name);
kfree(tr);
@@ -6006,10 +6370,11 @@ static int instance_delete(const char *name)
list_del(&tr->list);
+ tracing_set_nop(tr);
event_trace_del_tracer(tr);
+ ftrace_destroy_function_files(tr);
debugfs_remove_recursive(tr->dir);
- free_percpu(tr->trace_buffer.data);
- ring_buffer_free(tr->trace_buffer.buffer);
+ free_trace_buffers(tr);
kfree(tr->name);
kfree(tr);
@@ -6101,6 +6466,12 @@ init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
{
int cpu;
+ trace_create_file("available_tracers", 0444, d_tracer,
+ tr, &show_traces_fops);
+
+ trace_create_file("current_tracer", 0644, d_tracer,
+ tr, &set_tracer_fops);
+
trace_create_file("tracing_cpumask", 0644, d_tracer,
tr, &tracing_cpumask_fops);
@@ -6131,6 +6502,14 @@ init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
trace_create_file("tracing_on", 0644, d_tracer,
tr, &rb_simple_fops);
+#ifdef CONFIG_TRACER_MAX_TRACE
+ trace_create_file("tracing_max_latency", 0644, d_tracer,
+ &tr->max_latency, &tracing_max_lat_fops);
+#endif
+
+ if (ftrace_create_function_files(tr, d_tracer))
+ WARN(1, "Could not allocate function filter files");
+
#ifdef CONFIG_TRACER_SNAPSHOT
trace_create_file("snapshot", 0644, d_tracer,
tr, &snapshot_fops);
@@ -6153,17 +6532,6 @@ static __init int tracer_init_debugfs(void)
init_tracer_debugfs(&global_trace, d_tracer);
- trace_create_file("available_tracers", 0444, d_tracer,
- &global_trace, &show_traces_fops);
-
- trace_create_file("current_tracer", 0644, d_tracer,
- &global_trace, &set_tracer_fops);
-
-#ifdef CONFIG_TRACER_MAX_TRACE
- trace_create_file("tracing_max_latency", 0644, d_tracer,
- &tracing_max_latency, &tracing_max_lat_fops);
-#endif
-
trace_create_file("tracing_thresh", 0644, d_tracer,
&tracing_thresh, &tracing_max_lat_fops);
@@ -6173,6 +6541,9 @@ static __init int tracer_init_debugfs(void)
trace_create_file("saved_cmdlines", 0444, d_tracer,
NULL, &tracing_saved_cmdlines_fops);
+ trace_create_file("saved_cmdlines_size", 0644, d_tracer,
+ NULL, &tracing_saved_cmdlines_size_fops);
+
#ifdef CONFIG_DYNAMIC_FTRACE
trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
@@ -6253,6 +6624,17 @@ void trace_init_global_iter(struct trace_iterator *iter)
iter->trace = iter->tr->current_trace;
iter->cpu_file = RING_BUFFER_ALL_CPUS;
iter->trace_buffer = &global_trace.trace_buffer;
+
+ if (iter->trace && iter->trace->open)
+ iter->trace->open(iter);
+
+ /* Annotate start of buffers if we had overruns */
+ if (ring_buffer_overruns(iter->trace_buffer->buffer))
+ iter->iter_flags |= TRACE_FILE_ANNOTATE;
+
+ /* Output in nanoseconds only if we are using a clock in nanoseconds. */
+ if (trace_clocks[iter->tr->clock_id].in_ns)
+ iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
}
void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
@@ -6393,17 +6775,30 @@ __init static int tracer_alloc_buffers(void)
raw_spin_lock_init(&global_trace.start_lock);
+ /* Used for event triggers */
+ temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
+ if (!temp_buffer)
+ goto out_free_cpumask;
+
+ if (trace_create_savedcmd() < 0)
+ goto out_free_temp_buffer;
+
/* TODO: make the number of buffers hot pluggable with CPUS */
if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
WARN_ON(1);
- goto out_free_cpumask;
+ goto out_free_savedcmd;
}
if (global_trace.buffer_disabled)
tracing_off();
- trace_init_cmdlines();
+ if (trace_boot_clock) {
+ ret = tracing_set_clock(&global_trace, trace_boot_clock);
+ if (ret < 0)
+ pr_warning("Trace clock %s not defined, going back to default\n",
+ trace_boot_clock);
+ }
/*
* register_tracer() might reference current_trace, so it
@@ -6412,6 +6807,10 @@ __init static int tracer_alloc_buffers(void)
*/
global_trace.current_trace = &nop_trace;
+ global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
+
+ ftrace_init_global_array_ops(&global_trace);
+
register_tracer(&nop_trace);
/* All seems OK, enable tracing */
@@ -6439,11 +6838,11 @@ __init static int tracer_alloc_buffers(void)
return 0;
+out_free_savedcmd:
+ free_saved_cmdlines_buffer(savedcmd);
+out_free_temp_buffer:
+ ring_buffer_free(temp_buffer);
out_free_cpumask:
- free_percpu(global_trace.trace_buffer.data);
-#ifdef CONFIG_TRACER_MAX_TRACE
- free_percpu(global_trace.max_buffer.data);
-#endif
free_cpumask_var(global_trace.tracing_cpumask);
out_free_buffer_mask:
free_cpumask_var(tracing_buffer_mask);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 10c86fb7a2b..9258f5a815d 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1,3 +1,4 @@
+
#ifndef _LINUX_KERNEL_TRACE_H
#define _LINUX_KERNEL_TRACE_H
@@ -12,6 +13,7 @@
#include <linux/hw_breakpoint.h>
#include <linux/trace_seq.h>
#include <linux/ftrace_event.h>
+#include <linux/compiler.h>
#ifdef CONFIG_FTRACE_SYSCALLS
#include <asm/unistd.h> /* For NR_SYSCALLS */
@@ -124,6 +126,7 @@ enum trace_flag_type {
TRACE_FLAG_NEED_RESCHED = 0x04,
TRACE_FLAG_HARDIRQ = 0x08,
TRACE_FLAG_SOFTIRQ = 0x10,
+ TRACE_FLAG_PREEMPT_RESCHED = 0x20,
};
#define TRACE_BUF_SIZE 1024
@@ -187,13 +190,28 @@ struct trace_array {
*/
struct trace_buffer max_buffer;
bool allocated_snapshot;
+ unsigned long max_latency;
#endif
+ /*
+ * max_lock is used to protect the swapping of buffers
+ * when taking a max snapshot. The buffers themselves are
+ * protected by per_cpu spinlocks. But the action of the swap
+ * needs its own lock.
+ *
+ * This is defined as a arch_spinlock_t in order to help
+ * with performance when lockdep debugging is enabled.
+ *
+ * It is also used in other places outside the update_max_tr
+ * so it needs to be defined outside of the
+ * CONFIG_TRACER_MAX_TRACE.
+ */
+ arch_spinlock_t max_lock;
int buffer_disabled;
#ifdef CONFIG_FTRACE_SYSCALLS
int sys_refcount_enter;
int sys_refcount_exit;
- DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
- DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
+ struct ftrace_event_file __rcu *enter_syscall_files[NR_syscalls];
+ struct ftrace_event_file __rcu *exit_syscall_files[NR_syscalls];
#endif
int stop_count;
int clock_id;
@@ -208,6 +226,11 @@ struct trace_array {
struct list_head events;
cpumask_var_t tracing_cpumask; /* only trace on set CPUs */
int ref;
+#ifdef CONFIG_FUNCTION_TRACER
+ struct ftrace_ops *ops;
+ /* function tracing enabled */
+ int function_enabled;
+#endif
};
enum {
@@ -229,6 +252,9 @@ static inline struct trace_array *top_trace_array(void)
{
struct trace_array *tr;
+ if (list_empty(&ftrace_trace_arrays))
+ return NULL;
+
tr = list_entry(ftrace_trace_arrays.prev,
typeof(*tr), list);
WARN_ON(!(tr->flags & TRACE_ARRAY_FL_GLOBAL));
@@ -315,7 +341,6 @@ struct tracer_flags {
* @stop: called when tracing is paused (echo 0 > tracing_enabled)
* @open: called when the trace file is opened
* @pipe_open: called when the trace_pipe file is opened
- * @wait_pipe: override how the user waits for traces on trace_pipe
* @close: called when the trace file is released
* @pipe_close: called when the trace_pipe file is released
* @read: override the default read callback on trace_pipe
@@ -334,7 +359,6 @@ struct tracer {
void (*stop)(struct trace_array *tr);
void (*open)(struct trace_iterator *iter);
void (*pipe_open)(struct trace_iterator *iter);
- void (*wait_pipe)(struct trace_iterator *iter);
void (*close)(struct trace_iterator *iter);
void (*pipe_close)(struct trace_iterator *iter);
ssize_t (*read)(struct trace_iterator *iter,
@@ -353,14 +377,16 @@ struct tracer {
void (*print_header)(struct seq_file *m);
enum print_line_t (*print_line)(struct trace_iterator *iter);
/* If you handled the flag setting, return 0 */
- int (*set_flag)(u32 old_flags, u32 bit, int set);
+ int (*set_flag)(struct trace_array *tr,
+ u32 old_flags, u32 bit, int set);
/* Return 0 if OK with change, else return non-zero */
- int (*flag_changed)(struct tracer *tracer,
+ int (*flag_changed)(struct trace_array *tr,
u32 mask, int set);
struct tracer *next;
struct tracer_flags *flags;
+ int enabled;
bool print_max;
- bool enabled;
+ bool allow_instances;
#ifdef CONFIG_TRACER_MAX_TRACE
bool use_max_tr;
#endif
@@ -406,13 +432,7 @@ enum {
TRACE_FTRACE_IRQ_BIT,
TRACE_FTRACE_SIRQ_BIT,
- /* GLOBAL_BITs must be greater than FTRACE_BITs */
- TRACE_GLOBAL_BIT,
- TRACE_GLOBAL_NMI_BIT,
- TRACE_GLOBAL_IRQ_BIT,
- TRACE_GLOBAL_SIRQ_BIT,
-
- /* INTERNAL_BITs must be greater than GLOBAL_BITs */
+ /* INTERNAL_BITs must be greater than FTRACE_BITs */
TRACE_INTERNAL_BIT,
TRACE_INTERNAL_NMI_BIT,
TRACE_INTERNAL_IRQ_BIT,
@@ -439,9 +459,6 @@ enum {
#define TRACE_FTRACE_START TRACE_FTRACE_BIT
#define TRACE_FTRACE_MAX ((1 << (TRACE_FTRACE_START + TRACE_CONTEXT_BITS)) - 1)
-#define TRACE_GLOBAL_START TRACE_GLOBAL_BIT
-#define TRACE_GLOBAL_MAX ((1 << (TRACE_GLOBAL_START + TRACE_CONTEXT_BITS)) - 1)
-
#define TRACE_LIST_START TRACE_INTERNAL_BIT
#define TRACE_LIST_MAX ((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1)
@@ -514,6 +531,7 @@ void tracing_reset_online_cpus(struct trace_buffer *buf);
void tracing_reset_current(int cpu);
void tracing_reset_all_online_cpus(void);
int tracing_open_generic(struct inode *inode, struct file *filp);
+bool tracing_is_disabled(void);
struct dentry *trace_create_file(const char *name,
umode_t mode,
struct dentry *parent,
@@ -549,8 +567,6 @@ void trace_init_global_iter(struct trace_iterator *iter);
void tracing_iter_reset(struct trace_iterator *iter, int cpu);
-void poll_wait_pipe(struct trace_iterator *iter);
-
void tracing_sched_switch_trace(struct trace_array *tr,
struct task_struct *prev,
struct task_struct *next,
@@ -585,6 +601,8 @@ void tracing_start_sched_switch_record(void);
int register_tracer(struct tracer *type);
int is_tracing_stopped(void);
+loff_t tracing_lseek(struct file *file, loff_t offset, int whence);
+
extern cpumask_var_t __read_mostly tracing_buffer_mask;
#define for_each_tracing_cpu(cpu) \
@@ -595,8 +613,6 @@ extern unsigned long nsecs_to_usecs(unsigned long nsecs);
extern unsigned long tracing_thresh;
#ifdef CONFIG_TRACER_MAX_TRACE
-extern unsigned long tracing_max_latency;
-
void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
void update_max_tr_single(struct trace_array *tr,
struct task_struct *tsk, int cpu);
@@ -711,6 +727,10 @@ extern unsigned long trace_flags;
#define TRACE_GRAPH_PRINT_PROC 0x8
#define TRACE_GRAPH_PRINT_DURATION 0x10
#define TRACE_GRAPH_PRINT_ABS_TIME 0x20
+#define TRACE_GRAPH_PRINT_IRQS 0x40
+#define TRACE_GRAPH_PRINT_TAIL 0x80
+#define TRACE_GRAPH_PRINT_FILL_SHIFT 28
+#define TRACE_GRAPH_PRINT_FILL_MASK (0x3 << TRACE_GRAPH_PRINT_FILL_SHIFT)
extern enum print_line_t
print_graph_function_flags(struct trace_iterator *iter, u32 flags);
@@ -730,15 +750,16 @@ extern void __trace_graph_return(struct trace_array *tr,
#ifdef CONFIG_DYNAMIC_FTRACE
/* TODO: make this variable */
#define FTRACE_GRAPH_MAX_FUNCS 32
-extern int ftrace_graph_filter_enabled;
extern int ftrace_graph_count;
extern unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS];
+extern int ftrace_graph_notrace_count;
+extern unsigned long ftrace_graph_notrace_funcs[FTRACE_GRAPH_MAX_FUNCS];
static inline int ftrace_graph_addr(unsigned long addr)
{
int i;
- if (!ftrace_graph_filter_enabled)
+ if (!ftrace_graph_count)
return 1;
for (i = 0; i < ftrace_graph_count; i++) {
@@ -758,11 +779,31 @@ static inline int ftrace_graph_addr(unsigned long addr)
return 0;
}
+
+static inline int ftrace_graph_notrace_addr(unsigned long addr)
+{
+ int i;
+
+ if (!ftrace_graph_notrace_count)
+ return 0;
+
+ for (i = 0; i < ftrace_graph_notrace_count; i++) {
+ if (addr == ftrace_graph_notrace_funcs[i])
+ return 1;
+ }
+
+ return 0;
+}
#else
static inline int ftrace_graph_addr(unsigned long addr)
{
return 1;
}
+
+static inline int ftrace_graph_notrace_addr(unsigned long addr)
+{
+ return 0;
+}
#endif /* CONFIG_DYNAMIC_FTRACE */
#else /* CONFIG_FUNCTION_GRAPH_TRACER */
static inline enum print_line_t
@@ -784,13 +825,45 @@ static inline int ftrace_trace_task(struct task_struct *task)
return test_tsk_trace_trace(task);
}
extern int ftrace_is_dead(void);
+int ftrace_create_function_files(struct trace_array *tr,
+ struct dentry *parent);
+void ftrace_destroy_function_files(struct trace_array *tr);
+void ftrace_init_global_array_ops(struct trace_array *tr);
+void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func);
+void ftrace_reset_array_ops(struct trace_array *tr);
+int using_ftrace_ops_list_func(void);
#else
static inline int ftrace_trace_task(struct task_struct *task)
{
return 1;
}
static inline int ftrace_is_dead(void) { return 0; }
-#endif
+static inline int
+ftrace_create_function_files(struct trace_array *tr,
+ struct dentry *parent)
+{
+ return 0;
+}
+static inline void ftrace_destroy_function_files(struct trace_array *tr) { }
+static inline __init void
+ftrace_init_global_array_ops(struct trace_array *tr) { }
+static inline void ftrace_reset_array_ops(struct trace_array *tr) { }
+/* ftace_func_t type is not defined, use macro instead of static inline */
+#define ftrace_init_array_ops(tr, func) do { } while (0)
+#endif /* CONFIG_FUNCTION_TRACER */
+
+#if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_DYNAMIC_FTRACE)
+void ftrace_create_filter_files(struct ftrace_ops *ops,
+ struct dentry *parent);
+void ftrace_destroy_filter_files(struct ftrace_ops *ops);
+#else
+/*
+ * The ops parameter passed in is usually undefined.
+ * This must be a macro.
+ */
+#define ftrace_create_filter_files(ops, parent) do { } while (0)
+#define ftrace_destroy_filter_files(ops) do { } while (0)
+#endif /* CONFIG_FUNCTION_TRACER && CONFIG_DYNAMIC_FTRACE */
int ftrace_event_is_function(struct ftrace_event_call *call);
@@ -986,40 +1059,216 @@ struct filter_pred {
extern enum regex_type
filter_parse_regex(char *buff, int len, char **search, int *not);
-extern void print_event_filter(struct ftrace_event_call *call,
+extern void print_event_filter(struct ftrace_event_file *file,
struct trace_seq *s);
-extern int apply_event_filter(struct ftrace_event_call *call,
+extern int apply_event_filter(struct ftrace_event_file *file,
char *filter_string);
extern int apply_subsystem_event_filter(struct ftrace_subsystem_dir *dir,
char *filter_string);
extern void print_subsystem_event_filter(struct event_subsystem *system,
struct trace_seq *s);
extern int filter_assign_type(const char *type);
+extern int create_event_filter(struct ftrace_event_call *call,
+ char *filter_str, bool set_str,
+ struct event_filter **filterp);
+extern void free_event_filter(struct event_filter *filter);
struct ftrace_event_field *
trace_find_event_field(struct ftrace_event_call *call, char *name);
-static inline int
-filter_check_discard(struct ftrace_event_call *call, void *rec,
- struct ring_buffer *buffer,
- struct ring_buffer_event *event)
-{
- if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
- !filter_match_preds(call->filter, rec)) {
- ring_buffer_discard_commit(buffer, event);
- return 1;
- }
-
- return 0;
-}
-
extern void trace_event_enable_cmd_record(bool enable);
extern int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr);
extern int event_trace_del_tracer(struct trace_array *tr);
+extern struct ftrace_event_file *find_event_file(struct trace_array *tr,
+ const char *system,
+ const char *event);
+
+static inline void *event_file_data(struct file *filp)
+{
+ return ACCESS_ONCE(file_inode(filp)->i_private);
+}
+
extern struct mutex event_mutex;
extern struct list_head ftrace_events;
+extern const struct file_operations event_trigger_fops;
+
+extern int register_trigger_cmds(void);
+extern void clear_event_triggers(struct trace_array *tr);
+
+struct event_trigger_data {
+ unsigned long count;
+ int ref;
+ struct event_trigger_ops *ops;
+ struct event_command *cmd_ops;
+ struct event_filter __rcu *filter;
+ char *filter_str;
+ void *private_data;
+ struct list_head list;
+};
+
+/**
+ * struct event_trigger_ops - callbacks for trace event triggers
+ *
+ * The methods in this structure provide per-event trigger hooks for
+ * various trigger operations.
+ *
+ * All the methods below, except for @init() and @free(), must be
+ * implemented.
+ *
+ * @func: The trigger 'probe' function called when the triggering
+ * event occurs. The data passed into this callback is the data
+ * that was supplied to the event_command @reg() function that
+ * registered the trigger (see struct event_command).
+ *
+ * @init: An optional initialization function called for the trigger
+ * when the trigger is registered (via the event_command reg()
+ * function). This can be used to perform per-trigger
+ * initialization such as incrementing a per-trigger reference
+ * count, for instance. This is usually implemented by the
+ * generic utility function @event_trigger_init() (see
+ * trace_event_triggers.c).
+ *
+ * @free: An optional de-initialization function called for the
+ * trigger when the trigger is unregistered (via the
+ * event_command @reg() function). This can be used to perform
+ * per-trigger de-initialization such as decrementing a
+ * per-trigger reference count and freeing corresponding trigger
+ * data, for instance. This is usually implemented by the
+ * generic utility function @event_trigger_free() (see
+ * trace_event_triggers.c).
+ *
+ * @print: The callback function invoked to have the trigger print
+ * itself. This is usually implemented by a wrapper function
+ * that calls the generic utility function @event_trigger_print()
+ * (see trace_event_triggers.c).
+ */
+struct event_trigger_ops {
+ void (*func)(struct event_trigger_data *data);
+ int (*init)(struct event_trigger_ops *ops,
+ struct event_trigger_data *data);
+ void (*free)(struct event_trigger_ops *ops,
+ struct event_trigger_data *data);
+ int (*print)(struct seq_file *m,
+ struct event_trigger_ops *ops,
+ struct event_trigger_data *data);
+};
+
+/**
+ * struct event_command - callbacks and data members for event commands
+ *
+ * Event commands are invoked by users by writing the command name
+ * into the 'trigger' file associated with a trace event. The
+ * parameters associated with a specific invocation of an event
+ * command are used to create an event trigger instance, which is
+ * added to the list of trigger instances associated with that trace
+ * event. When the event is hit, the set of triggers associated with
+ * that event is invoked.
+ *
+ * The data members in this structure provide per-event command data
+ * for various event commands.
+ *
+ * All the data members below, except for @post_trigger, must be set
+ * for each event command.
+ *
+ * @name: The unique name that identifies the event command. This is
+ * the name used when setting triggers via trigger files.
+ *
+ * @trigger_type: A unique id that identifies the event command
+ * 'type'. This value has two purposes, the first to ensure that
+ * only one trigger of the same type can be set at a given time
+ * for a particular event e.g. it doesn't make sense to have both
+ * a traceon and traceoff trigger attached to a single event at
+ * the same time, so traceon and traceoff have the same type
+ * though they have different names. The @trigger_type value is
+ * also used as a bit value for deferring the actual trigger
+ * action until after the current event is finished. Some
+ * commands need to do this if they themselves log to the trace
+ * buffer (see the @post_trigger() member below). @trigger_type
+ * values are defined by adding new values to the trigger_type
+ * enum in include/linux/ftrace_event.h.
+ *
+ * @post_trigger: A flag that says whether or not this command needs
+ * to have its action delayed until after the current event has
+ * been closed. Some triggers need to avoid being invoked while
+ * an event is currently in the process of being logged, since
+ * the trigger may itself log data into the trace buffer. Thus
+ * we make sure the current event is committed before invoking
+ * those triggers. To do that, the trigger invocation is split
+ * in two - the first part checks the filter using the current
+ * trace record; if a command has the @post_trigger flag set, it
+ * sets a bit for itself in the return value, otherwise it
+ * directly invokes the trigger. Once all commands have been
+ * either invoked or set their return flag, the current record is
+ * either committed or discarded. At that point, if any commands
+ * have deferred their triggers, those commands are finally
+ * invoked following the close of the current event. In other
+ * words, if the event_trigger_ops @func() probe implementation
+ * itself logs to the trace buffer, this flag should be set,
+ * otherwise it can be left unspecified.
+ *
+ * All the methods below, except for @set_filter(), must be
+ * implemented.
+ *
+ * @func: The callback function responsible for parsing and
+ * registering the trigger written to the 'trigger' file by the
+ * user. It allocates the trigger instance and registers it with
+ * the appropriate trace event. It makes use of the other
+ * event_command callback functions to orchestrate this, and is
+ * usually implemented by the generic utility function
+ * @event_trigger_callback() (see trace_event_triggers.c).
+ *
+ * @reg: Adds the trigger to the list of triggers associated with the
+ * event, and enables the event trigger itself, after
+ * initializing it (via the event_trigger_ops @init() function).
+ * This is also where commands can use the @trigger_type value to
+ * make the decision as to whether or not multiple instances of
+ * the trigger should be allowed. This is usually implemented by
+ * the generic utility function @register_trigger() (see
+ * trace_event_triggers.c).
+ *
+ * @unreg: Removes the trigger from the list of triggers associated
+ * with the event, and disables the event trigger itself, after
+ * initializing it (via the event_trigger_ops @free() function).
+ * This is usually implemented by the generic utility function
+ * @unregister_trigger() (see trace_event_triggers.c).
+ *
+ * @set_filter: An optional function called to parse and set a filter
+ * for the trigger. If no @set_filter() method is set for the
+ * event command, filters set by the user for the command will be
+ * ignored. This is usually implemented by the generic utility
+ * function @set_trigger_filter() (see trace_event_triggers.c).
+ *
+ * @get_trigger_ops: The callback function invoked to retrieve the
+ * event_trigger_ops implementation associated with the command.
+ */
+struct event_command {
+ struct list_head list;
+ char *name;
+ enum event_trigger_type trigger_type;
+ bool post_trigger;
+ int (*func)(struct event_command *cmd_ops,
+ struct ftrace_event_file *file,
+ char *glob, char *cmd, char *params);
+ int (*reg)(char *glob,
+ struct event_trigger_ops *ops,
+ struct event_trigger_data *data,
+ struct ftrace_event_file *file);
+ void (*unreg)(char *glob,
+ struct event_trigger_ops *ops,
+ struct event_trigger_data *data,
+ struct ftrace_event_file *file);
+ int (*set_filter)(char *filter_str,
+ struct event_trigger_data *data,
+ struct ftrace_event_file *file);
+ struct event_trigger_ops *(*get_trigger_ops)(char *cmd, char *param);
+};
+
+extern int trace_event_enable_disable(struct ftrace_event_file *file,
+ int enable, int soft_disable);
+extern int tracing_alloc_snapshot(void);
+
extern const char *__start___trace_bprintk_fmt[];
extern const char *__stop___trace_bprintk_fmt[];
@@ -1045,7 +1294,7 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled);
#undef FTRACE_ENTRY
#define FTRACE_ENTRY(call, struct_name, id, tstruct, print, filter) \
extern struct ftrace_event_call \
- __attribute__((__aligned__(4))) event_##call;
+ __aligned(4) event_##call;
#undef FTRACE_ENTRY_DUP
#define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print, filter) \
FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print), \
diff --git a/kernel/trace/trace_benchmark.c b/kernel/trace/trace_benchmark.c
new file mode 100644
index 00000000000..40a14cbcf8e
--- /dev/null
+++ b/kernel/trace/trace_benchmark.c
@@ -0,0 +1,198 @@
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/kthread.h>
+#include <linux/trace_clock.h>
+
+#define CREATE_TRACE_POINTS
+#include "trace_benchmark.h"
+
+static struct task_struct *bm_event_thread;
+
+static char bm_str[BENCHMARK_EVENT_STRLEN] = "START";
+
+static u64 bm_total;
+static u64 bm_totalsq;
+static u64 bm_last;
+static u64 bm_max;
+static u64 bm_min;
+static u64 bm_first;
+static u64 bm_cnt;
+static u64 bm_stddev;
+static unsigned int bm_avg;
+static unsigned int bm_std;
+
+/*
+ * This gets called in a loop recording the time it took to write
+ * the tracepoint. What it writes is the time statistics of the last
+ * tracepoint write. As there is nothing to write the first time
+ * it simply writes "START". As the first write is cold cache and
+ * the rest is hot, we save off that time in bm_first and it is
+ * reported as "first", which is shown in the second write to the
+ * tracepoint. The "first" field is writen within the statics from
+ * then on but never changes.
+ */
+static void trace_do_benchmark(void)
+{
+ u64 start;
+ u64 stop;
+ u64 delta;
+ u64 stddev;
+ u64 seed;
+ u64 last_seed;
+ unsigned int avg;
+ unsigned int std = 0;
+
+ /* Only run if the tracepoint is actually active */
+ if (!trace_benchmark_event_enabled())
+ return;
+
+ local_irq_disable();
+ start = trace_clock_local();
+ trace_benchmark_event(bm_str);
+ stop = trace_clock_local();
+ local_irq_enable();
+
+ bm_cnt++;
+
+ delta = stop - start;
+
+ /*
+ * The first read is cold cached, keep it separate from the
+ * other calculations.
+ */
+ if (bm_cnt == 1) {
+ bm_first = delta;
+ scnprintf(bm_str, BENCHMARK_EVENT_STRLEN,
+ "first=%llu [COLD CACHED]", bm_first);
+ return;
+ }
+
+ bm_last = delta;
+
+ if (delta > bm_max)
+ bm_max = delta;
+ if (!bm_min || delta < bm_min)
+ bm_min = delta;
+
+ /*
+ * When bm_cnt is greater than UINT_MAX, it breaks the statistics
+ * accounting. Freeze the statistics when that happens.
+ * We should have enough data for the avg and stddev anyway.
+ */
+ if (bm_cnt > UINT_MAX) {
+ scnprintf(bm_str, BENCHMARK_EVENT_STRLEN,
+ "last=%llu first=%llu max=%llu min=%llu ** avg=%u std=%d std^2=%lld",
+ bm_last, bm_first, bm_max, bm_min, bm_avg, bm_std, bm_stddev);
+ return;
+ }
+
+ bm_total += delta;
+ bm_totalsq += delta * delta;
+
+
+ if (bm_cnt > 1) {
+ /*
+ * Apply Welford's method to calculate standard deviation:
+ * s^2 = 1 / (n * (n-1)) * (n * \Sum (x_i)^2 - (\Sum x_i)^2)
+ */
+ stddev = (u64)bm_cnt * bm_totalsq - bm_total * bm_total;
+ do_div(stddev, (u32)bm_cnt);
+ do_div(stddev, (u32)bm_cnt - 1);
+ } else
+ stddev = 0;
+
+ delta = bm_total;
+ do_div(delta, bm_cnt);
+ avg = delta;
+
+ if (stddev > 0) {
+ int i = 0;
+ /*
+ * stddev is the square of standard deviation but
+ * we want the actualy number. Use the average
+ * as our seed to find the std.
+ *
+ * The next try is:
+ * x = (x + N/x) / 2
+ *
+ * Where N is the squared number to find the square
+ * root of.
+ */
+ seed = avg;
+ do {
+ last_seed = seed;
+ seed = stddev;
+ if (!last_seed)
+ break;
+ do_div(seed, last_seed);
+ seed += last_seed;
+ do_div(seed, 2);
+ } while (i++ < 10 && last_seed != seed);
+
+ std = seed;
+ }
+
+ scnprintf(bm_str, BENCHMARK_EVENT_STRLEN,
+ "last=%llu first=%llu max=%llu min=%llu avg=%u std=%d std^2=%lld",
+ bm_last, bm_first, bm_max, bm_min, avg, std, stddev);
+
+ bm_std = std;
+ bm_avg = avg;
+ bm_stddev = stddev;
+}
+
+static int benchmark_event_kthread(void *arg)
+{
+ /* sleep a bit to make sure the tracepoint gets activated */
+ msleep(100);
+
+ while (!kthread_should_stop()) {
+
+ trace_do_benchmark();
+
+ /*
+ * We don't go to sleep, but let others
+ * run as well.
+ */
+ cond_resched();
+ }
+
+ return 0;
+}
+
+/*
+ * When the benchmark tracepoint is enabled, it calls this
+ * function and the thread that calls the tracepoint is created.
+ */
+void trace_benchmark_reg(void)
+{
+ bm_event_thread = kthread_run(benchmark_event_kthread,
+ NULL, "event_benchmark");
+ WARN_ON(!bm_event_thread);
+}
+
+/*
+ * When the benchmark tracepoint is disabled, it calls this
+ * function and the thread that calls the tracepoint is deleted
+ * and all the numbers are reset.
+ */
+void trace_benchmark_unreg(void)
+{
+ if (!bm_event_thread)
+ return;
+
+ kthread_stop(bm_event_thread);
+
+ strcpy(bm_str, "START");
+ bm_total = 0;
+ bm_totalsq = 0;
+ bm_last = 0;
+ bm_max = 0;
+ bm_min = 0;
+ bm_cnt = 0;
+ /* These don't need to be reset but reset them anyway */
+ bm_first = 0;
+ bm_std = 0;
+ bm_avg = 0;
+ bm_stddev = 0;
+}
diff --git a/kernel/trace/trace_benchmark.h b/kernel/trace/trace_benchmark.h
new file mode 100644
index 00000000000..3c1df1df4e2
--- /dev/null
+++ b/kernel/trace/trace_benchmark.h
@@ -0,0 +1,41 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM benchmark
+
+#if !defined(_TRACE_BENCHMARK_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_BENCHMARK_H
+
+#include <linux/tracepoint.h>
+
+extern void trace_benchmark_reg(void);
+extern void trace_benchmark_unreg(void);
+
+#define BENCHMARK_EVENT_STRLEN 128
+
+TRACE_EVENT_FN(benchmark_event,
+
+ TP_PROTO(const char *str),
+
+ TP_ARGS(str),
+
+ TP_STRUCT__entry(
+ __array( char, str, BENCHMARK_EVENT_STRLEN )
+ ),
+
+ TP_fast_assign(
+ memcpy(__entry->str, str, BENCHMARK_EVENT_STRLEN);
+ ),
+
+ TP_printk("%s", __entry->str),
+
+ trace_benchmark_reg, trace_benchmark_unreg
+);
+
+#endif /* _TRACE_BENCHMARK_H */
+
+#undef TRACE_INCLUDE_FILE
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_benchmark
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index d594da0dc03..697fb9bac8f 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -78,7 +78,7 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
entry->line = f->line;
entry->correct = val == expect;
- if (!filter_check_discard(call, entry, buffer, event))
+ if (!call_filter_check_discard(call, entry, buffer, event))
__buffer_unlock_commit(buffer, event);
out:
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 26dc348332b..57b67b1f24d 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -59,13 +59,14 @@ u64 notrace trace_clock(void)
/*
* trace_jiffy_clock(): Simply use jiffies as a clock counter.
+ * Note that this use of jiffies_64 is not completely safe on
+ * 32-bit systems. But the window is tiny, and the effect if
+ * we are affected is that we will have an obviously bogus
+ * timestamp on a trace event - i.e. not life threatening.
*/
u64 notrace trace_clock_jiffies(void)
{
- u64 jiffy = jiffies - INITIAL_JIFFIES;
-
- /* Return nsecs */
- return (u64)jiffies_to_usecs(jiffy) * 1000ULL;
+ return jiffies_64_to_clock_t(jiffies_64 - INITIAL_JIFFIES);
}
/*
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 80c36bcf66e..5d12bb407b4 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -24,10 +24,32 @@ static int total_ref_count;
static int perf_trace_event_perm(struct ftrace_event_call *tp_event,
struct perf_event *p_event)
{
+ if (tp_event->perf_perm) {
+ int ret = tp_event->perf_perm(tp_event, p_event);
+ if (ret)
+ return ret;
+ }
+
/* The ftrace function trace is allowed only for root. */
- if (ftrace_event_is_function(tp_event) &&
- perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
- return -EPERM;
+ if (ftrace_event_is_function(tp_event)) {
+ if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ /*
+ * We don't allow user space callchains for function trace
+ * event, due to issues with page faults while tracing page
+ * fault handler and its overall trickiness nature.
+ */
+ if (!p_event->attr.exclude_callchain_user)
+ return -EINVAL;
+
+ /*
+ * Same reason to disable user stack dump as for user space
+ * callchains above.
+ */
+ if (p_event->attr.sample_type & PERF_SAMPLE_STACK_USER)
+ return -EINVAL;
+ }
/* No tracing, just counting, so no obvious leak */
if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW))
@@ -173,7 +195,7 @@ static int perf_trace_event_init(struct ftrace_event_call *tp_event,
int perf_trace_init(struct perf_event *p_event)
{
struct ftrace_event_call *tp_event;
- int event_id = p_event->attr.config;
+ u64 event_id = p_event->attr.config;
int ret = -EINVAL;
mutex_lock(&event_mutex);
@@ -226,8 +248,8 @@ void perf_trace_del(struct perf_event *p_event, int flags)
tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event);
}
-__kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
- struct pt_regs *regs, int *rctxp)
+void *perf_trace_buf_prepare(int size, unsigned short type,
+ struct pt_regs *regs, int *rctxp)
{
struct trace_entry *entry;
unsigned long flags;
@@ -259,6 +281,7 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
return raw_data;
}
EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);
+NOKPROBE_SYMBOL(perf_trace_buf_prepare);
#ifdef CONFIG_FUNCTION_TRACER
static void
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 368a4d50cc3..2de53628689 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -27,12 +27,6 @@
DEFINE_MUTEX(event_mutex);
-DEFINE_MUTEX(event_storage_mutex);
-EXPORT_SYMBOL_GPL(event_storage_mutex);
-
-char event_storage[EVENT_STORAGE_SIZE];
-EXPORT_SYMBOL_GPL(event_storage);
-
LIST_HEAD(ftrace_events);
static LIST_HEAD(ftrace_common_fields);
@@ -194,29 +188,60 @@ int trace_event_raw_init(struct ftrace_event_call *call)
}
EXPORT_SYMBOL_GPL(trace_event_raw_init);
+void *ftrace_event_buffer_reserve(struct ftrace_event_buffer *fbuffer,
+ struct ftrace_event_file *ftrace_file,
+ unsigned long len)
+{
+ struct ftrace_event_call *event_call = ftrace_file->event_call;
+
+ local_save_flags(fbuffer->flags);
+ fbuffer->pc = preempt_count();
+ fbuffer->ftrace_file = ftrace_file;
+
+ fbuffer->event =
+ trace_event_buffer_lock_reserve(&fbuffer->buffer, ftrace_file,
+ event_call->event.type, len,
+ fbuffer->flags, fbuffer->pc);
+ if (!fbuffer->event)
+ return NULL;
+
+ fbuffer->entry = ring_buffer_event_data(fbuffer->event);
+ return fbuffer->entry;
+}
+EXPORT_SYMBOL_GPL(ftrace_event_buffer_reserve);
+
+void ftrace_event_buffer_commit(struct ftrace_event_buffer *fbuffer)
+{
+ event_trigger_unlock_commit(fbuffer->ftrace_file, fbuffer->buffer,
+ fbuffer->event, fbuffer->entry,
+ fbuffer->flags, fbuffer->pc);
+}
+EXPORT_SYMBOL_GPL(ftrace_event_buffer_commit);
+
int ftrace_event_reg(struct ftrace_event_call *call,
enum trace_reg type, void *data)
{
struct ftrace_event_file *file = data;
+ WARN_ON(!(call->flags & TRACE_EVENT_FL_TRACEPOINT));
switch (type) {
case TRACE_REG_REGISTER:
- return tracepoint_probe_register(call->name,
+ return tracepoint_probe_register(call->tp,
call->class->probe,
file);
case TRACE_REG_UNREGISTER:
- tracepoint_probe_unregister(call->name,
+ tracepoint_probe_unregister(call->tp,
call->class->probe,
file);
return 0;
#ifdef CONFIG_PERF_EVENTS
case TRACE_REG_PERF_REGISTER:
- return tracepoint_probe_register(call->name,
+ return tracepoint_probe_register(call->tp,
call->class->perf_probe,
call);
case TRACE_REG_PERF_UNREGISTER:
- tracepoint_probe_unregister(call->name,
+ tracepoint_probe_unregister(call->tp,
call->class->perf_probe,
call);
return 0;
@@ -328,7 +353,7 @@ static int __ftrace_event_enable_disable(struct ftrace_event_file *file,
if (ret) {
tracing_stop_cmdline_record();
pr_info("event trace: Could not enable event "
- "%s\n", call->name);
+ "%s\n", ftrace_event_name(call));
break;
}
set_bit(FTRACE_EVENT_FL_ENABLED_BIT, &file->flags);
@@ -342,6 +367,12 @@ static int __ftrace_event_enable_disable(struct ftrace_event_file *file,
return ret;
}
+int trace_event_enable_disable(struct ftrace_event_file *file,
+ int enable, int soft_disable)
+{
+ return __ftrace_event_enable_disable(file, enable, soft_disable);
+}
+
static int ftrace_event_enable_disable(struct ftrace_event_file *file,
int enable)
{
@@ -421,11 +452,6 @@ static void remove_subsystem(struct ftrace_subsystem_dir *dir)
}
}
-static void *event_file_data(struct file *filp)
-{
- return ACCESS_ONCE(file_inode(filp)->i_private);
-}
-
static void remove_event_file_dir(struct ftrace_event_file *file)
{
struct dentry *dir = file->dir;
@@ -444,6 +470,7 @@ static void remove_event_file_dir(struct ftrace_event_file *file)
list_del(&file->list);
remove_subsystem(file->system);
+ free_event_filter(file->filter);
kmem_cache_free(file_cachep, file);
}
@@ -456,27 +483,29 @@ __ftrace_set_clr_event_nolock(struct trace_array *tr, const char *match,
{
struct ftrace_event_file *file;
struct ftrace_event_call *call;
+ const char *name;
int ret = -EINVAL;
list_for_each_entry(file, &tr->events, list) {
call = file->event_call;
+ name = ftrace_event_name(call);
- if (!call->name || !call->class || !call->class->reg)
+ if (!name || !call->class || !call->class->reg)
continue;
if (call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)
continue;
if (match &&
- strcmp(match, call->name) != 0 &&
+ strcmp(match, name) != 0 &&
strcmp(match, call->class->system) != 0)
continue;
if (sub && strcmp(sub, call->class->system) != 0)
continue;
- if (event && strcmp(event, call->name) != 0)
+ if (event && strcmp(event, name) != 0)
continue;
ftrace_event_enable_disable(file, set);
@@ -546,6 +575,9 @@ int trace_set_clr_event(const char *system, const char *event, int set)
{
struct trace_array *tr = top_trace_array();
+ if (!tr)
+ return -ENODEV;
+
return __ftrace_set_clr_event(tr, NULL, system, event, set);
}
EXPORT_SYMBOL_GPL(trace_set_clr_event);
@@ -674,7 +706,7 @@ static int t_show(struct seq_file *m, void *v)
if (strcmp(call->class->system, TRACE_SYSTEM) != 0)
seq_printf(m, "%s:", call->class->system);
- seq_printf(m, "%s\n", call->name);
+ seq_printf(m, "%s\n", ftrace_event_name(call));
return 0;
}
@@ -767,7 +799,7 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
mutex_lock(&event_mutex);
list_for_each_entry(file, &tr->events, list) {
call = file->event_call;
- if (!call->name || !call->class || !call->class->reg)
+ if (!ftrace_event_name(call) || !call->class || !call->class->reg)
continue;
if (system && strcmp(call->class->system, system->name) != 0)
@@ -882,7 +914,7 @@ static int f_show(struct seq_file *m, void *v)
switch ((unsigned long)v) {
case FORMAT_HEADER:
- seq_printf(m, "name: %s\n", call->name);
+ seq_printf(m, "name: %s\n", ftrace_event_name(call));
seq_printf(m, "ID: %d\n", call->event.type);
seq_printf(m, "format:\n");
return 0;
@@ -989,7 +1021,7 @@ static ssize_t
event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
loff_t *ppos)
{
- struct ftrace_event_call *call;
+ struct ftrace_event_file *file;
struct trace_seq *s;
int r = -ENODEV;
@@ -1004,12 +1036,12 @@ event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
trace_seq_init(s);
mutex_lock(&event_mutex);
- call = event_file_data(filp);
- if (call)
- print_event_filter(call, s);
+ file = event_file_data(filp);
+ if (file)
+ print_event_filter(file, s);
mutex_unlock(&event_mutex);
- if (call)
+ if (file)
r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
kfree(s);
@@ -1021,7 +1053,7 @@ static ssize_t
event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
loff_t *ppos)
{
- struct ftrace_event_call *call;
+ struct ftrace_event_file *file;
char *buf;
int err = -ENODEV;
@@ -1039,9 +1071,9 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
buf[cnt] = '\0';
mutex_lock(&event_mutex);
- call = event_file_data(filp);
- if (call)
- err = apply_event_filter(call, buf);
+ file = event_file_data(filp);
+ if (file)
+ err = apply_event_filter(file, buf);
mutex_unlock(&event_mutex);
free_page((unsigned long) buf);
@@ -1062,6 +1094,9 @@ static int subsystem_open(struct inode *inode, struct file *filp)
struct trace_array *tr;
int ret;
+ if (tracing_is_disabled())
+ return -ENODEV;
+
/* Make sure the system still exists */
mutex_lock(&trace_types_lock);
mutex_lock(&event_mutex);
@@ -1108,6 +1143,9 @@ static int system_tr_open(struct inode *inode, struct file *filp)
struct trace_array *tr = inode->i_private;
int ret;
+ if (tracing_is_disabled())
+ return -ENODEV;
+
if (trace_array_get(tr) < 0)
return -ENODEV;
@@ -1124,11 +1162,12 @@ static int system_tr_open(struct inode *inode, struct file *filp)
if (ret < 0) {
trace_array_put(tr);
kfree(dir);
+ return ret;
}
filp->private_data = dir;
- return ret;
+ return 0;
}
static int subsystem_release(struct inode *inode, struct file *file)
@@ -1495,6 +1534,7 @@ event_create_dir(struct dentry *parent, struct ftrace_event_file *file)
struct trace_array *tr = file->tr;
struct list_head *head;
struct dentry *d_events;
+ const char *name;
int ret;
/*
@@ -1508,10 +1548,11 @@ event_create_dir(struct dentry *parent, struct ftrace_event_file *file)
} else
d_events = parent;
- file->dir = debugfs_create_dir(call->name, d_events);
+ name = ftrace_event_name(call);
+ file->dir = debugfs_create_dir(name, d_events);
if (!file->dir) {
pr_warning("Could not create debugfs '%s' directory\n",
- call->name);
+ name);
return -1;
}
@@ -1535,13 +1576,16 @@ event_create_dir(struct dentry *parent, struct ftrace_event_file *file)
ret = call->class->define_fields(call);
if (ret < 0) {
pr_warning("Could not initialize trace point"
- " events/%s\n", call->name);
+ " events/%s\n", name);
return -1;
}
}
- trace_create_file("filter", 0644, file->dir, call,
+ trace_create_file("filter", 0644, file->dir, file,
&ftrace_event_filter_fops);
+ trace_create_file("trigger", 0644, file->dir, file,
+ &event_trigger_fops);
+
trace_create_file("format", 0444, file->dir, call,
&ftrace_event_format_fops);
@@ -1577,6 +1621,7 @@ static void event_remove(struct ftrace_event_call *call)
if (file->event_call != call)
continue;
ftrace_event_enable_disable(file, 0);
+ destroy_preds(file);
/*
* The do_for_each_event_file() is
* a double loop. After finding the call for this
@@ -1595,15 +1640,17 @@ static void event_remove(struct ftrace_event_call *call)
static int event_init(struct ftrace_event_call *call)
{
int ret = 0;
+ const char *name;
- if (WARN_ON(!call->name))
+ name = ftrace_event_name(call);
+ if (WARN_ON(!name))
return -EINVAL;
if (call->class->raw_init) {
ret = call->class->raw_init(call);
if (ret < 0 && ret != -ENOSYS)
pr_warn("Could not initialize trace events/%s\n",
- call->name);
+ name);
}
return ret;
@@ -1637,6 +1684,8 @@ trace_create_new_event(struct ftrace_event_call *call,
file->event_call = call;
file->tr = tr;
atomic_set(&file->sm_ref, 0);
+ atomic_set(&file->tm_ref, 0);
+ INIT_LIST_HEAD(&file->triggers);
list_add(&file->list, &tr->events);
return file;
@@ -1700,7 +1749,7 @@ static void __trace_remove_event_call(struct ftrace_event_call *call)
{
event_remove(call);
trace_destroy_fields(call);
- destroy_preds(call);
+ destroy_call_preds(call);
}
static int probe_remove_event_call(struct ftrace_event_call *call)
@@ -1763,6 +1812,16 @@ static void trace_module_add_events(struct module *mod)
{
struct ftrace_event_call **call, **start, **end;
+ if (!mod->num_trace_events)
+ return;
+
+ /* Don't add infrastructure for mods without tracepoints */
+ if (trace_module_has_bad_taint(mod)) {
+ pr_err("%s: module has bad taint, not creating trace events\n",
+ mod->name);
+ return;
+ }
+
start = mod->trace_events;
end = mod->trace_events + mod->num_trace_events;
@@ -1837,46 +1896,48 @@ __trace_add_event_dirs(struct trace_array *tr)
ret = __trace_add_new_event(call, tr);
if (ret < 0)
pr_warning("Could not create directory for event %s\n",
- call->name);
+ ftrace_event_name(call));
}
}
-#ifdef CONFIG_DYNAMIC_FTRACE
-
-/* Avoid typos */
-#define ENABLE_EVENT_STR "enable_event"
-#define DISABLE_EVENT_STR "disable_event"
-
-struct event_probe_data {
- struct ftrace_event_file *file;
- unsigned long count;
- int ref;
- bool enable;
-};
-
-static struct ftrace_event_file *
+struct ftrace_event_file *
find_event_file(struct trace_array *tr, const char *system, const char *event)
{
struct ftrace_event_file *file;
struct ftrace_event_call *call;
+ const char *name;
list_for_each_entry(file, &tr->events, list) {
call = file->event_call;
+ name = ftrace_event_name(call);
- if (!call->name || !call->class || !call->class->reg)
+ if (!name || !call->class || !call->class->reg)
continue;
if (call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)
continue;
- if (strcmp(event, call->name) == 0 &&
+ if (strcmp(event, name) == 0 &&
strcmp(system, call->class->system) == 0)
return file;
}
return NULL;
}
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+/* Avoid typos */
+#define ENABLE_EVENT_STR "enable_event"
+#define DISABLE_EVENT_STR "disable_event"
+
+struct event_probe_data {
+ struct ftrace_event_file *file;
+ unsigned long count;
+ int ref;
+ bool enable;
+};
+
static void
event_enable_probe(unsigned long ip, unsigned long parent_ip, void **_data)
{
@@ -1925,7 +1986,7 @@ event_enable_print(struct seq_file *m, unsigned long ip,
seq_printf(m, "%s:%s:%s",
data->enable ? ENABLE_EVENT_STR : DISABLE_EVENT_STR,
data->file->event_call->class->system,
- data->file->event_call->name);
+ ftrace_event_name(data->file->event_call));
if (data->count == -1)
seq_printf(m, ":unlimited\n");
@@ -2008,6 +2069,9 @@ event_enable_func(struct ftrace_hash *hash,
bool enable;
int ret;
+ if (!tr)
+ return -ENODEV;
+
/* hash funcs only work with set_ftrace_filter */
if (!enabled || !param)
return -EINVAL;
@@ -2145,7 +2209,7 @@ __trace_early_add_event_dirs(struct trace_array *tr)
ret = event_create_dir(tr->event_dir, file);
if (ret < 0)
pr_warning("Could not create directory for event %s\n",
- file->event_call->name);
+ ftrace_event_name(file->event_call));
}
}
@@ -2169,7 +2233,7 @@ __trace_early_add_events(struct trace_array *tr)
ret = __trace_early_add_new_event(call, tr);
if (ret < 0)
pr_warning("Could not create early event %s\n",
- call->name);
+ ftrace_event_name(call));
}
}
@@ -2303,9 +2367,15 @@ int event_trace_del_tracer(struct trace_array *tr)
{
mutex_lock(&event_mutex);
+ /* Disable any event triggers and associated soft-disabled events */
+ clear_event_triggers(tr);
+
/* Disable any running events */
__ftrace_set_clr_event_nolock(tr, NULL, NULL, NULL, 0);
+ /* Access to events are within rcu_read_lock_sched() */
+ synchronize_sched();
+
down_write(&trace_event_sem);
__trace_remove_event_dirs(tr);
debugfs_remove_recursive(tr->event_dir);
@@ -2333,6 +2403,9 @@ static __init int event_trace_enable(void)
char *token;
int ret;
+ if (!tr)
+ return -ENODEV;
+
for_each_event(iter, __start_ftrace_events, __stop_ftrace_events) {
call = *iter;
@@ -2366,6 +2439,8 @@ static __init int event_trace_enable(void)
register_event_cmds();
+ register_trigger_cmds();
+
return 0;
}
@@ -2377,6 +2452,8 @@ static __init int event_trace_init(void)
int ret;
tr = top_trace_array();
+ if (!tr)
+ return -ENODEV;
d_tracer = tracing_init_dentry();
if (!d_tracer)
@@ -2470,6 +2547,8 @@ static __init void event_trace_self_tests(void)
int ret;
tr = top_trace_array();
+ if (!tr)
+ return;
pr_info("Running tests on trace events:\n");
@@ -2493,7 +2572,7 @@ static __init void event_trace_self_tests(void)
continue;
#endif
- pr_info("Testing event %s: ", call->name);
+ pr_info("Testing event %s: ", ftrace_event_name(call));
/*
* If an event is already enabled, someone is using
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 97daa8cf958..8a8631926a0 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -637,10 +637,18 @@ static void append_filter_err(struct filter_parse_state *ps,
free_page((unsigned long) buf);
}
+static inline struct event_filter *event_filter(struct ftrace_event_file *file)
+{
+ if (file->event_call->flags & TRACE_EVENT_FL_USE_CALL_FILTER)
+ return file->event_call->filter;
+ else
+ return file->filter;
+}
+
/* caller must hold event_mutex */
-void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s)
+void print_event_filter(struct ftrace_event_file *file, struct trace_seq *s)
{
- struct event_filter *filter = call->filter;
+ struct event_filter *filter = event_filter(file);
if (filter && filter->filter_string)
trace_seq_printf(s, "%s\n", filter->filter_string);
@@ -766,11 +774,21 @@ static void __free_preds(struct event_filter *filter)
filter->n_preds = 0;
}
-static void filter_disable(struct ftrace_event_call *call)
+static void call_filter_disable(struct ftrace_event_call *call)
{
call->flags &= ~TRACE_EVENT_FL_FILTERED;
}
+static void filter_disable(struct ftrace_event_file *file)
+{
+ struct ftrace_event_call *call = file->event_call;
+
+ if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER)
+ call_filter_disable(call);
+ else
+ file->flags &= ~FTRACE_EVENT_FL_FILTERED;
+}
+
static void __free_filter(struct event_filter *filter)
{
if (!filter)
@@ -781,16 +799,35 @@ static void __free_filter(struct event_filter *filter)
kfree(filter);
}
+void free_event_filter(struct event_filter *filter)
+{
+ __free_filter(filter);
+}
+
+void destroy_call_preds(struct ftrace_event_call *call)
+{
+ __free_filter(call->filter);
+ call->filter = NULL;
+}
+
+static void destroy_file_preds(struct ftrace_event_file *file)
+{
+ __free_filter(file->filter);
+ file->filter = NULL;
+}
+
/*
- * Called when destroying the ftrace_event_call.
- * The call is being freed, so we do not need to worry about
- * the call being currently used. This is for module code removing
+ * Called when destroying the ftrace_event_file.
+ * The file is being freed, so we do not need to worry about
+ * the file being currently used. This is for module code removing
* the tracepoints from within it.
*/
-void destroy_preds(struct ftrace_event_call *call)
+void destroy_preds(struct ftrace_event_file *file)
{
- __free_filter(call->filter);
- call->filter = NULL;
+ if (file->event_call->flags & TRACE_EVENT_FL_USE_CALL_FILTER)
+ destroy_call_preds(file->event_call);
+ else
+ destroy_file_preds(file);
}
static struct event_filter *__alloc_filter(void)
@@ -825,28 +862,56 @@ static int __alloc_preds(struct event_filter *filter, int n_preds)
return 0;
}
-static void filter_free_subsystem_preds(struct event_subsystem *system)
+static inline void __remove_filter(struct ftrace_event_file *file)
{
+ struct ftrace_event_call *call = file->event_call;
+
+ filter_disable(file);
+ if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER)
+ remove_filter_string(call->filter);
+ else
+ remove_filter_string(file->filter);
+}
+
+static void filter_free_subsystem_preds(struct event_subsystem *system,
+ struct trace_array *tr)
+{
+ struct ftrace_event_file *file;
struct ftrace_event_call *call;
- list_for_each_entry(call, &ftrace_events, list) {
+ list_for_each_entry(file, &tr->events, list) {
+ call = file->event_call;
if (strcmp(call->class->system, system->name) != 0)
continue;
- filter_disable(call);
- remove_filter_string(call->filter);
+ __remove_filter(file);
}
}
-static void filter_free_subsystem_filters(struct event_subsystem *system)
+static inline void __free_subsystem_filter(struct ftrace_event_file *file)
{
+ struct ftrace_event_call *call = file->event_call;
+
+ if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) {
+ __free_filter(call->filter);
+ call->filter = NULL;
+ } else {
+ __free_filter(file->filter);
+ file->filter = NULL;
+ }
+}
+
+static void filter_free_subsystem_filters(struct event_subsystem *system,
+ struct trace_array *tr)
+{
+ struct ftrace_event_file *file;
struct ftrace_event_call *call;
- list_for_each_entry(call, &ftrace_events, list) {
+ list_for_each_entry(file, &tr->events, list) {
+ call = file->event_call;
if (strcmp(call->class->system, system->name) != 0)
continue;
- __free_filter(call->filter);
- call->filter = NULL;
+ __free_subsystem_filter(file);
}
}
@@ -1617,15 +1682,85 @@ fail:
return err;
}
+static inline void event_set_filtered_flag(struct ftrace_event_file *file)
+{
+ struct ftrace_event_call *call = file->event_call;
+
+ if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER)
+ call->flags |= TRACE_EVENT_FL_FILTERED;
+ else
+ file->flags |= FTRACE_EVENT_FL_FILTERED;
+}
+
+static inline void event_set_filter(struct ftrace_event_file *file,
+ struct event_filter *filter)
+{
+ struct ftrace_event_call *call = file->event_call;
+
+ if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER)
+ rcu_assign_pointer(call->filter, filter);
+ else
+ rcu_assign_pointer(file->filter, filter);
+}
+
+static inline void event_clear_filter(struct ftrace_event_file *file)
+{
+ struct ftrace_event_call *call = file->event_call;
+
+ if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER)
+ RCU_INIT_POINTER(call->filter, NULL);
+ else
+ RCU_INIT_POINTER(file->filter, NULL);
+}
+
+static inline void
+event_set_no_set_filter_flag(struct ftrace_event_file *file)
+{
+ struct ftrace_event_call *call = file->event_call;
+
+ if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER)
+ call->flags |= TRACE_EVENT_FL_NO_SET_FILTER;
+ else
+ file->flags |= FTRACE_EVENT_FL_NO_SET_FILTER;
+}
+
+static inline void
+event_clear_no_set_filter_flag(struct ftrace_event_file *file)
+{
+ struct ftrace_event_call *call = file->event_call;
+
+ if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER)
+ call->flags &= ~TRACE_EVENT_FL_NO_SET_FILTER;
+ else
+ file->flags &= ~FTRACE_EVENT_FL_NO_SET_FILTER;
+}
+
+static inline bool
+event_no_set_filter_flag(struct ftrace_event_file *file)
+{
+ struct ftrace_event_call *call = file->event_call;
+
+ if (file->flags & FTRACE_EVENT_FL_NO_SET_FILTER)
+ return true;
+
+ if ((call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) &&
+ (call->flags & TRACE_EVENT_FL_NO_SET_FILTER))
+ return true;
+
+ return false;
+}
+
struct filter_list {
struct list_head list;
struct event_filter *filter;
};
static int replace_system_preds(struct event_subsystem *system,
+ struct trace_array *tr,
struct filter_parse_state *ps,
char *filter_string)
{
+ struct ftrace_event_file *file;
struct ftrace_event_call *call;
struct filter_list *filter_item;
struct filter_list *tmp;
@@ -1633,8 +1768,8 @@ static int replace_system_preds(struct event_subsystem *system,
bool fail = true;
int err;
- list_for_each_entry(call, &ftrace_events, list) {
-
+ list_for_each_entry(file, &tr->events, list) {
+ call = file->event_call;
if (strcmp(call->class->system, system->name) != 0)
continue;
@@ -1644,18 +1779,20 @@ static int replace_system_preds(struct event_subsystem *system,
*/
err = replace_preds(call, NULL, ps, filter_string, true);
if (err)
- call->flags |= TRACE_EVENT_FL_NO_SET_FILTER;
+ event_set_no_set_filter_flag(file);
else
- call->flags &= ~TRACE_EVENT_FL_NO_SET_FILTER;
+ event_clear_no_set_filter_flag(file);
}
- list_for_each_entry(call, &ftrace_events, list) {
+ list_for_each_entry(file, &tr->events, list) {
struct event_filter *filter;
+ call = file->event_call;
+
if (strcmp(call->class->system, system->name) != 0)
continue;
- if (call->flags & TRACE_EVENT_FL_NO_SET_FILTER)
+ if (event_no_set_filter_flag(file))
continue;
filter_item = kzalloc(sizeof(*filter_item), GFP_KERNEL);
@@ -1676,17 +1813,17 @@ static int replace_system_preds(struct event_subsystem *system,
err = replace_preds(call, filter, ps, filter_string, false);
if (err) {
- filter_disable(call);
+ filter_disable(file);
parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
append_filter_err(ps, filter);
} else
- call->flags |= TRACE_EVENT_FL_FILTERED;
+ event_set_filtered_flag(file);
/*
* Regardless of if this returned an error, we still
* replace the filter for the call.
*/
- filter = call->filter;
- rcu_assign_pointer(call->filter, filter_item->filter);
+ filter = event_filter(file);
+ event_set_filter(file, filter_item->filter);
filter_item->filter = filter;
fail = false;
@@ -1806,6 +1943,13 @@ static int create_filter(struct ftrace_event_call *call,
return err;
}
+int create_event_filter(struct ftrace_event_call *call,
+ char *filter_str, bool set_str,
+ struct event_filter **filterp)
+{
+ return create_filter(call, filter_str, set_str, filterp);
+}
+
/**
* create_system_filter - create a filter for an event_subsystem
* @system: event_subsystem to create a filter for
@@ -1816,6 +1960,7 @@ static int create_filter(struct ftrace_event_call *call,
* and always remembers @filter_str.
*/
static int create_system_filter(struct event_subsystem *system,
+ struct trace_array *tr,
char *filter_str, struct event_filter **filterp)
{
struct event_filter *filter = NULL;
@@ -1824,7 +1969,7 @@ static int create_system_filter(struct event_subsystem *system,
err = create_filter_start(filter_str, true, &ps, &filter);
if (!err) {
- err = replace_system_preds(system, ps, filter_str);
+ err = replace_system_preds(system, tr, ps, filter_str);
if (!err) {
/* System filters just show a default message */
kfree(filter->filter_string);
@@ -1840,20 +1985,25 @@ static int create_system_filter(struct event_subsystem *system,
}
/* caller must hold event_mutex */
-int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
+int apply_event_filter(struct ftrace_event_file *file, char *filter_string)
{
+ struct ftrace_event_call *call = file->event_call;
struct event_filter *filter;
int err;
if (!strcmp(strstrip(filter_string), "0")) {
- filter_disable(call);
- filter = call->filter;
+ filter_disable(file);
+ filter = event_filter(file);
+
if (!filter)
return 0;
- RCU_INIT_POINTER(call->filter, NULL);
+
+ event_clear_filter(file);
+
/* Make sure the filter is not being used */
synchronize_sched();
__free_filter(filter);
+
return 0;
}
@@ -1866,14 +2016,15 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
* string
*/
if (filter) {
- struct event_filter *tmp = call->filter;
+ struct event_filter *tmp;
+ tmp = event_filter(file);
if (!err)
- call->flags |= TRACE_EVENT_FL_FILTERED;
+ event_set_filtered_flag(file);
else
- filter_disable(call);
+ filter_disable(file);
- rcu_assign_pointer(call->filter, filter);
+ event_set_filter(file, filter);
if (tmp) {
/* Make sure the call is done with the filter */
@@ -1889,6 +2040,7 @@ int apply_subsystem_event_filter(struct ftrace_subsystem_dir *dir,
char *filter_string)
{
struct event_subsystem *system = dir->subsystem;
+ struct trace_array *tr = dir->tr;
struct event_filter *filter;
int err = 0;
@@ -1901,18 +2053,18 @@ int apply_subsystem_event_filter(struct ftrace_subsystem_dir *dir,
}
if (!strcmp(strstrip(filter_string), "0")) {
- filter_free_subsystem_preds(system);
+ filter_free_subsystem_preds(system, tr);
remove_filter_string(system->filter);
filter = system->filter;
system->filter = NULL;
/* Ensure all filters are no longer used */
synchronize_sched();
- filter_free_subsystem_filters(system);
+ filter_free_subsystem_filters(system, tr);
__free_filter(filter);
goto out_unlock;
}
- err = create_system_filter(system, filter_string, &filter);
+ err = create_system_filter(system, tr, filter_string, &filter);
if (filter) {
/*
* No event actually uses the system filter
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
new file mode 100644
index 00000000000..4747b476a03
--- /dev/null
+++ b/kernel/trace/trace_events_trigger.c
@@ -0,0 +1,1437 @@
+/*
+ * trace_events_trigger - trace event triggers
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) 2013 Tom Zanussi <tom.zanussi@linux.intel.com>
+ */
+
+#include <linux/module.h>
+#include <linux/ctype.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+
+#include "trace.h"
+
+static LIST_HEAD(trigger_commands);
+static DEFINE_MUTEX(trigger_cmd_mutex);
+
+static void
+trigger_data_free(struct event_trigger_data *data)
+{
+ if (data->cmd_ops->set_filter)
+ data->cmd_ops->set_filter(NULL, data, NULL);
+
+ synchronize_sched(); /* make sure current triggers exit before free */
+ kfree(data);
+}
+
+/**
+ * event_triggers_call - Call triggers associated with a trace event
+ * @file: The ftrace_event_file associated with the event
+ * @rec: The trace entry for the event, NULL for unconditional invocation
+ *
+ * For each trigger associated with an event, invoke the trigger
+ * function registered with the associated trigger command. If rec is
+ * non-NULL, it means that the trigger requires further processing and
+ * shouldn't be unconditionally invoked. If rec is non-NULL and the
+ * trigger has a filter associated with it, rec will checked against
+ * the filter and if the record matches the trigger will be invoked.
+ * If the trigger is a 'post_trigger', meaning it shouldn't be invoked
+ * in any case until the current event is written, the trigger
+ * function isn't invoked but the bit associated with the deferred
+ * trigger is set in the return value.
+ *
+ * Returns an enum event_trigger_type value containing a set bit for
+ * any trigger that should be deferred, ETT_NONE if nothing to defer.
+ *
+ * Called from tracepoint handlers (with rcu_read_lock_sched() held).
+ *
+ * Return: an enum event_trigger_type value containing a set bit for
+ * any trigger that should be deferred, ETT_NONE if nothing to defer.
+ */
+enum event_trigger_type
+event_triggers_call(struct ftrace_event_file *file, void *rec)
+{
+ struct event_trigger_data *data;
+ enum event_trigger_type tt = ETT_NONE;
+ struct event_filter *filter;
+
+ if (list_empty(&file->triggers))
+ return tt;
+
+ list_for_each_entry_rcu(data, &file->triggers, list) {
+ if (!rec) {
+ data->ops->func(data);
+ continue;
+ }
+ filter = rcu_dereference_sched(data->filter);
+ if (filter && !filter_match_preds(filter, rec))
+ continue;
+ if (data->cmd_ops->post_trigger) {
+ tt |= data->cmd_ops->trigger_type;
+ continue;
+ }
+ data->ops->func(data);
+ }
+ return tt;
+}
+EXPORT_SYMBOL_GPL(event_triggers_call);
+
+/**
+ * event_triggers_post_call - Call 'post_triggers' for a trace event
+ * @file: The ftrace_event_file associated with the event
+ * @tt: enum event_trigger_type containing a set bit for each trigger to invoke
+ *
+ * For each trigger associated with an event, invoke the trigger
+ * function registered with the associated trigger command, if the
+ * corresponding bit is set in the tt enum passed into this function.
+ * See @event_triggers_call for details on how those bits are set.
+ *
+ * Called from tracepoint handlers (with rcu_read_lock_sched() held).
+ */
+void
+event_triggers_post_call(struct ftrace_event_file *file,
+ enum event_trigger_type tt)
+{
+ struct event_trigger_data *data;
+
+ list_for_each_entry_rcu(data, &file->triggers, list) {
+ if (data->cmd_ops->trigger_type & tt)
+ data->ops->func(data);
+ }
+}
+EXPORT_SYMBOL_GPL(event_triggers_post_call);
+
+#define SHOW_AVAILABLE_TRIGGERS (void *)(1UL)
+
+static void *trigger_next(struct seq_file *m, void *t, loff_t *pos)
+{
+ struct ftrace_event_file *event_file = event_file_data(m->private);
+
+ if (t == SHOW_AVAILABLE_TRIGGERS)
+ return NULL;
+
+ return seq_list_next(t, &event_file->triggers, pos);
+}
+
+static void *trigger_start(struct seq_file *m, loff_t *pos)
+{
+ struct ftrace_event_file *event_file;
+
+ /* ->stop() is called even if ->start() fails */
+ mutex_lock(&event_mutex);
+ event_file = event_file_data(m->private);
+ if (unlikely(!event_file))
+ return ERR_PTR(-ENODEV);
+
+ if (list_empty(&event_file->triggers))
+ return *pos == 0 ? SHOW_AVAILABLE_TRIGGERS : NULL;
+
+ return seq_list_start(&event_file->triggers, *pos);
+}
+
+static void trigger_stop(struct seq_file *m, void *t)
+{
+ mutex_unlock(&event_mutex);
+}
+
+static int trigger_show(struct seq_file *m, void *v)
+{
+ struct event_trigger_data *data;
+ struct event_command *p;
+
+ if (v == SHOW_AVAILABLE_TRIGGERS) {
+ seq_puts(m, "# Available triggers:\n");
+ seq_putc(m, '#');
+ mutex_lock(&trigger_cmd_mutex);
+ list_for_each_entry_reverse(p, &trigger_commands, list)
+ seq_printf(m, " %s", p->name);
+ seq_putc(m, '\n');
+ mutex_unlock(&trigger_cmd_mutex);
+ return 0;
+ }
+
+ data = list_entry(v, struct event_trigger_data, list);
+ data->ops->print(m, data->ops, data);
+
+ return 0;
+}
+
+static const struct seq_operations event_triggers_seq_ops = {
+ .start = trigger_start,
+ .next = trigger_next,
+ .stop = trigger_stop,
+ .show = trigger_show,
+};
+
+static int event_trigger_regex_open(struct inode *inode, struct file *file)
+{
+ int ret = 0;
+
+ mutex_lock(&event_mutex);
+
+ if (unlikely(!event_file_data(file))) {
+ mutex_unlock(&event_mutex);
+ return -ENODEV;
+ }
+
+ if (file->f_mode & FMODE_READ) {
+ ret = seq_open(file, &event_triggers_seq_ops);
+ if (!ret) {
+ struct seq_file *m = file->private_data;
+ m->private = file;
+ }
+ }
+
+ mutex_unlock(&event_mutex);
+
+ return ret;
+}
+
+static int trigger_process_regex(struct ftrace_event_file *file, char *buff)
+{
+ char *command, *next = buff;
+ struct event_command *p;
+ int ret = -EINVAL;
+
+ command = strsep(&next, ": \t");
+ command = (command[0] != '!') ? command : command + 1;
+
+ mutex_lock(&trigger_cmd_mutex);
+ list_for_each_entry(p, &trigger_commands, list) {
+ if (strcmp(p->name, command) == 0) {
+ ret = p->func(p, file, buff, command, next);
+ goto out_unlock;
+ }
+ }
+ out_unlock:
+ mutex_unlock(&trigger_cmd_mutex);
+
+ return ret;
+}
+
+static ssize_t event_trigger_regex_write(struct file *file,
+ const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ struct ftrace_event_file *event_file;
+ ssize_t ret;
+ char *buf;
+
+ if (!cnt)
+ return 0;
+
+ if (cnt >= PAGE_SIZE)
+ return -EINVAL;
+
+ buf = (char *)__get_free_page(GFP_TEMPORARY);
+ if (!buf)
+ return -ENOMEM;
+
+ if (copy_from_user(buf, ubuf, cnt)) {
+ free_page((unsigned long)buf);
+ return -EFAULT;
+ }
+ buf[cnt] = '\0';
+ strim(buf);
+
+ mutex_lock(&event_mutex);
+ event_file = event_file_data(file);
+ if (unlikely(!event_file)) {
+ mutex_unlock(&event_mutex);
+ free_page((unsigned long)buf);
+ return -ENODEV;
+ }
+ ret = trigger_process_regex(event_file, buf);
+ mutex_unlock(&event_mutex);
+
+ free_page((unsigned long)buf);
+ if (ret < 0)
+ goto out;
+
+ *ppos += cnt;
+ ret = cnt;
+ out:
+ return ret;
+}
+
+static int event_trigger_regex_release(struct inode *inode, struct file *file)
+{
+ mutex_lock(&event_mutex);
+
+ if (file->f_mode & FMODE_READ)
+ seq_release(inode, file);
+
+ mutex_unlock(&event_mutex);
+
+ return 0;
+}
+
+static ssize_t
+event_trigger_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ return event_trigger_regex_write(filp, ubuf, cnt, ppos);
+}
+
+static int
+event_trigger_open(struct inode *inode, struct file *filp)
+{
+ return event_trigger_regex_open(inode, filp);
+}
+
+static int
+event_trigger_release(struct inode *inode, struct file *file)
+{
+ return event_trigger_regex_release(inode, file);
+}
+
+const struct file_operations event_trigger_fops = {
+ .open = event_trigger_open,
+ .read = seq_read,
+ .write = event_trigger_write,
+ .llseek = tracing_lseek,
+ .release = event_trigger_release,
+};
+
+/*
+ * Currently we only register event commands from __init, so mark this
+ * __init too.
+ */
+static __init int register_event_command(struct event_command *cmd)
+{
+ struct event_command *p;
+ int ret = 0;
+
+ mutex_lock(&trigger_cmd_mutex);
+ list_for_each_entry(p, &trigger_commands, list) {
+ if (strcmp(cmd->name, p->name) == 0) {
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+ }
+ list_add(&cmd->list, &trigger_commands);
+ out_unlock:
+ mutex_unlock(&trigger_cmd_mutex);
+
+ return ret;
+}
+
+/*
+ * Currently we only unregister event commands from __init, so mark
+ * this __init too.
+ */
+static __init int unregister_event_command(struct event_command *cmd)
+{
+ struct event_command *p, *n;
+ int ret = -ENODEV;
+
+ mutex_lock(&trigger_cmd_mutex);
+ list_for_each_entry_safe(p, n, &trigger_commands, list) {
+ if (strcmp(cmd->name, p->name) == 0) {
+ ret = 0;
+ list_del_init(&p->list);
+ goto out_unlock;
+ }
+ }
+ out_unlock:
+ mutex_unlock(&trigger_cmd_mutex);
+
+ return ret;
+}
+
+/**
+ * event_trigger_print - Generic event_trigger_ops @print implementation
+ * @name: The name of the event trigger
+ * @m: The seq_file being printed to
+ * @data: Trigger-specific data
+ * @filter_str: filter_str to print, if present
+ *
+ * Common implementation for event triggers to print themselves.
+ *
+ * Usually wrapped by a function that simply sets the @name of the
+ * trigger command and then invokes this.
+ *
+ * Return: 0 on success, errno otherwise
+ */
+static int
+event_trigger_print(const char *name, struct seq_file *m,
+ void *data, char *filter_str)
+{
+ long count = (long)data;
+
+ seq_printf(m, "%s", name);
+
+ if (count == -1)
+ seq_puts(m, ":unlimited");
+ else
+ seq_printf(m, ":count=%ld", count);
+
+ if (filter_str)
+ seq_printf(m, " if %s\n", filter_str);
+ else
+ seq_puts(m, "\n");
+
+ return 0;
+}
+
+/**
+ * event_trigger_init - Generic event_trigger_ops @init implementation
+ * @ops: The trigger ops associated with the trigger
+ * @data: Trigger-specific data
+ *
+ * Common implementation of event trigger initialization.
+ *
+ * Usually used directly as the @init method in event trigger
+ * implementations.
+ *
+ * Return: 0 on success, errno otherwise
+ */
+static int
+event_trigger_init(struct event_trigger_ops *ops,
+ struct event_trigger_data *data)
+{
+ data->ref++;
+ return 0;
+}
+
+/**
+ * event_trigger_free - Generic event_trigger_ops @free implementation
+ * @ops: The trigger ops associated with the trigger
+ * @data: Trigger-specific data
+ *
+ * Common implementation of event trigger de-initialization.
+ *
+ * Usually used directly as the @free method in event trigger
+ * implementations.
+ */
+static void
+event_trigger_free(struct event_trigger_ops *ops,
+ struct event_trigger_data *data)
+{
+ if (WARN_ON_ONCE(data->ref <= 0))
+ return;
+
+ data->ref--;
+ if (!data->ref)
+ trigger_data_free(data);
+}
+
+static int trace_event_trigger_enable_disable(struct ftrace_event_file *file,
+ int trigger_enable)
+{
+ int ret = 0;
+
+ if (trigger_enable) {
+ if (atomic_inc_return(&file->tm_ref) > 1)
+ return ret;
+ set_bit(FTRACE_EVENT_FL_TRIGGER_MODE_BIT, &file->flags);
+ ret = trace_event_enable_disable(file, 1, 1);
+ } else {
+ if (atomic_dec_return(&file->tm_ref) > 0)
+ return ret;
+ clear_bit(FTRACE_EVENT_FL_TRIGGER_MODE_BIT, &file->flags);
+ ret = trace_event_enable_disable(file, 0, 1);
+ }
+
+ return ret;
+}
+
+/**
+ * clear_event_triggers - Clear all triggers associated with a trace array
+ * @tr: The trace array to clear
+ *
+ * For each trigger, the triggering event has its tm_ref decremented
+ * via trace_event_trigger_enable_disable(), and any associated event
+ * (in the case of enable/disable_event triggers) will have its sm_ref
+ * decremented via free()->trace_event_enable_disable(). That
+ * combination effectively reverses the soft-mode/trigger state added
+ * by trigger registration.
+ *
+ * Must be called with event_mutex held.
+ */
+void
+clear_event_triggers(struct trace_array *tr)
+{
+ struct ftrace_event_file *file;
+
+ list_for_each_entry(file, &tr->events, list) {
+ struct event_trigger_data *data;
+ list_for_each_entry_rcu(data, &file->triggers, list) {
+ trace_event_trigger_enable_disable(file, 0);
+ if (data->ops->free)
+ data->ops->free(data->ops, data);
+ }
+ }
+}
+
+/**
+ * update_cond_flag - Set or reset the TRIGGER_COND bit
+ * @file: The ftrace_event_file associated with the event
+ *
+ * If an event has triggers and any of those triggers has a filter or
+ * a post_trigger, trigger invocation needs to be deferred until after
+ * the current event has logged its data, and the event should have
+ * its TRIGGER_COND bit set, otherwise the TRIGGER_COND bit should be
+ * cleared.
+ */
+static void update_cond_flag(struct ftrace_event_file *file)
+{
+ struct event_trigger_data *data;
+ bool set_cond = false;
+
+ list_for_each_entry_rcu(data, &file->triggers, list) {
+ if (data->filter || data->cmd_ops->post_trigger) {
+ set_cond = true;
+ break;
+ }
+ }
+
+ if (set_cond)
+ set_bit(FTRACE_EVENT_FL_TRIGGER_COND_BIT, &file->flags);
+ else
+ clear_bit(FTRACE_EVENT_FL_TRIGGER_COND_BIT, &file->flags);
+}
+
+/**
+ * register_trigger - Generic event_command @reg implementation
+ * @glob: The raw string used to register the trigger
+ * @ops: The trigger ops associated with the trigger
+ * @data: Trigger-specific data to associate with the trigger
+ * @file: The ftrace_event_file associated with the event
+ *
+ * Common implementation for event trigger registration.
+ *
+ * Usually used directly as the @reg method in event command
+ * implementations.
+ *
+ * Return: 0 on success, errno otherwise
+ */
+static int register_trigger(char *glob, struct event_trigger_ops *ops,
+ struct event_trigger_data *data,
+ struct ftrace_event_file *file)
+{
+ struct event_trigger_data *test;
+ int ret = 0;
+
+ list_for_each_entry_rcu(test, &file->triggers, list) {
+ if (test->cmd_ops->trigger_type == data->cmd_ops->trigger_type) {
+ ret = -EEXIST;
+ goto out;
+ }
+ }
+
+ if (data->ops->init) {
+ ret = data->ops->init(data->ops, data);
+ if (ret < 0)
+ goto out;
+ }
+
+ list_add_rcu(&data->list, &file->triggers);
+ ret++;
+
+ if (trace_event_trigger_enable_disable(file, 1) < 0) {
+ list_del_rcu(&data->list);
+ ret--;
+ }
+ update_cond_flag(file);
+out:
+ return ret;
+}
+
+/**
+ * unregister_trigger - Generic event_command @unreg implementation
+ * @glob: The raw string used to register the trigger
+ * @ops: The trigger ops associated with the trigger
+ * @test: Trigger-specific data used to find the trigger to remove
+ * @file: The ftrace_event_file associated with the event
+ *
+ * Common implementation for event trigger unregistration.
+ *
+ * Usually used directly as the @unreg method in event command
+ * implementations.
+ */
+static void unregister_trigger(char *glob, struct event_trigger_ops *ops,
+ struct event_trigger_data *test,
+ struct ftrace_event_file *file)
+{
+ struct event_trigger_data *data;
+ bool unregistered = false;
+
+ list_for_each_entry_rcu(data, &file->triggers, list) {
+ if (data->cmd_ops->trigger_type == test->cmd_ops->trigger_type) {
+ unregistered = true;
+ list_del_rcu(&data->list);
+ update_cond_flag(file);
+ trace_event_trigger_enable_disable(file, 0);
+ break;
+ }
+ }
+
+ if (unregistered && data->ops->free)
+ data->ops->free(data->ops, data);
+}
+
+/**
+ * event_trigger_callback - Generic event_command @func implementation
+ * @cmd_ops: The command ops, used for trigger registration
+ * @file: The ftrace_event_file associated with the event
+ * @glob: The raw string used to register the trigger
+ * @cmd: The cmd portion of the string used to register the trigger
+ * @param: The params portion of the string used to register the trigger
+ *
+ * Common implementation for event command parsing and trigger
+ * instantiation.
+ *
+ * Usually used directly as the @func method in event command
+ * implementations.
+ *
+ * Return: 0 on success, errno otherwise
+ */
+static int
+event_trigger_callback(struct event_command *cmd_ops,
+ struct ftrace_event_file *file,
+ char *glob, char *cmd, char *param)
+{
+ struct event_trigger_data *trigger_data;
+ struct event_trigger_ops *trigger_ops;
+ char *trigger = NULL;
+ char *number;
+ int ret;
+
+ /* separate the trigger from the filter (t:n [if filter]) */
+ if (param && isdigit(param[0]))
+ trigger = strsep(&param, " \t");
+
+ trigger_ops = cmd_ops->get_trigger_ops(cmd, trigger);
+
+ ret = -ENOMEM;
+ trigger_data = kzalloc(sizeof(*trigger_data), GFP_KERNEL);
+ if (!trigger_data)
+ goto out;
+
+ trigger_data->count = -1;
+ trigger_data->ops = trigger_ops;
+ trigger_data->cmd_ops = cmd_ops;
+ INIT_LIST_HEAD(&trigger_data->list);
+
+ if (glob[0] == '!') {
+ cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file);
+ kfree(trigger_data);
+ ret = 0;
+ goto out;
+ }
+
+ if (trigger) {
+ number = strsep(&trigger, ":");
+
+ ret = -EINVAL;
+ if (!strlen(number))
+ goto out_free;
+
+ /*
+ * We use the callback data field (which is a pointer)
+ * as our counter.
+ */
+ ret = kstrtoul(number, 0, &trigger_data->count);
+ if (ret)
+ goto out_free;
+ }
+
+ if (!param) /* if param is non-empty, it's supposed to be a filter */
+ goto out_reg;
+
+ if (!cmd_ops->set_filter)
+ goto out_reg;
+
+ ret = cmd_ops->set_filter(param, trigger_data, file);
+ if (ret < 0)
+ goto out_free;
+
+ out_reg:
+ ret = cmd_ops->reg(glob, trigger_ops, trigger_data, file);
+ /*
+ * The above returns on success the # of functions enabled,
+ * but if it didn't find any functions it returns zero.
+ * Consider no functions a failure too.
+ */
+ if (!ret) {
+ ret = -ENOENT;
+ goto out_free;
+ } else if (ret < 0)
+ goto out_free;
+ ret = 0;
+ out:
+ return ret;
+
+ out_free:
+ if (cmd_ops->set_filter)
+ cmd_ops->set_filter(NULL, trigger_data, NULL);
+ kfree(trigger_data);
+ goto out;
+}
+
+/**
+ * set_trigger_filter - Generic event_command @set_filter implementation
+ * @filter_str: The filter string for the trigger, NULL to remove filter
+ * @trigger_data: Trigger-specific data
+ * @file: The ftrace_event_file associated with the event
+ *
+ * Common implementation for event command filter parsing and filter
+ * instantiation.
+ *
+ * Usually used directly as the @set_filter method in event command
+ * implementations.
+ *
+ * Also used to remove a filter (if filter_str = NULL).
+ *
+ * Return: 0 on success, errno otherwise
+ */
+static int set_trigger_filter(char *filter_str,
+ struct event_trigger_data *trigger_data,
+ struct ftrace_event_file *file)
+{
+ struct event_trigger_data *data = trigger_data;
+ struct event_filter *filter = NULL, *tmp;
+ int ret = -EINVAL;
+ char *s;
+
+ if (!filter_str) /* clear the current filter */
+ goto assign;
+
+ s = strsep(&filter_str, " \t");
+
+ if (!strlen(s) || strcmp(s, "if") != 0)
+ goto out;
+
+ if (!filter_str)
+ goto out;
+
+ /* The filter is for the 'trigger' event, not the triggered event */
+ ret = create_event_filter(file->event_call, filter_str, false, &filter);
+ if (ret)
+ goto out;
+ assign:
+ tmp = rcu_access_pointer(data->filter);
+
+ rcu_assign_pointer(data->filter, filter);
+
+ if (tmp) {
+ /* Make sure the call is done with the filter */
+ synchronize_sched();
+ free_event_filter(tmp);
+ }
+
+ kfree(data->filter_str);
+ data->filter_str = NULL;
+
+ if (filter_str) {
+ data->filter_str = kstrdup(filter_str, GFP_KERNEL);
+ if (!data->filter_str) {
+ free_event_filter(rcu_access_pointer(data->filter));
+ data->filter = NULL;
+ ret = -ENOMEM;
+ }
+ }
+ out:
+ return ret;
+}
+
+static void
+traceon_trigger(struct event_trigger_data *data)
+{
+ if (tracing_is_on())
+ return;
+
+ tracing_on();
+}
+
+static void
+traceon_count_trigger(struct event_trigger_data *data)
+{
+ if (tracing_is_on())
+ return;
+
+ if (!data->count)
+ return;
+
+ if (data->count != -1)
+ (data->count)--;
+
+ tracing_on();
+}
+
+static void
+traceoff_trigger(struct event_trigger_data *data)
+{
+ if (!tracing_is_on())
+ return;
+
+ tracing_off();
+}
+
+static void
+traceoff_count_trigger(struct event_trigger_data *data)
+{
+ if (!tracing_is_on())
+ return;
+
+ if (!data->count)
+ return;
+
+ if (data->count != -1)
+ (data->count)--;
+
+ tracing_off();
+}
+
+static int
+traceon_trigger_print(struct seq_file *m, struct event_trigger_ops *ops,
+ struct event_trigger_data *data)
+{
+ return event_trigger_print("traceon", m, (void *)data->count,
+ data->filter_str);
+}
+
+static int
+traceoff_trigger_print(struct seq_file *m, struct event_trigger_ops *ops,
+ struct event_trigger_data *data)
+{
+ return event_trigger_print("traceoff", m, (void *)data->count,
+ data->filter_str);
+}
+
+static struct event_trigger_ops traceon_trigger_ops = {
+ .func = traceon_trigger,
+ .print = traceon_trigger_print,
+ .init = event_trigger_init,
+ .free = event_trigger_free,
+};
+
+static struct event_trigger_ops traceon_count_trigger_ops = {
+ .func = traceon_count_trigger,
+ .print = traceon_trigger_print,
+ .init = event_trigger_init,
+ .free = event_trigger_free,
+};
+
+static struct event_trigger_ops traceoff_trigger_ops = {
+ .func = traceoff_trigger,
+ .print = traceoff_trigger_print,
+ .init = event_trigger_init,
+ .free = event_trigger_free,
+};
+
+static struct event_trigger_ops traceoff_count_trigger_ops = {
+ .func = traceoff_count_trigger,
+ .print = traceoff_trigger_print,
+ .init = event_trigger_init,
+ .free = event_trigger_free,
+};
+
+static struct event_trigger_ops *
+onoff_get_trigger_ops(char *cmd, char *param)
+{
+ struct event_trigger_ops *ops;
+
+ /* we register both traceon and traceoff to this callback */
+ if (strcmp(cmd, "traceon") == 0)
+ ops = param ? &traceon_count_trigger_ops :
+ &traceon_trigger_ops;
+ else
+ ops = param ? &traceoff_count_trigger_ops :
+ &traceoff_trigger_ops;
+
+ return ops;
+}
+
+static struct event_command trigger_traceon_cmd = {
+ .name = "traceon",
+ .trigger_type = ETT_TRACE_ONOFF,
+ .func = event_trigger_callback,
+ .reg = register_trigger,
+ .unreg = unregister_trigger,
+ .get_trigger_ops = onoff_get_trigger_ops,
+ .set_filter = set_trigger_filter,
+};
+
+static struct event_command trigger_traceoff_cmd = {
+ .name = "traceoff",
+ .trigger_type = ETT_TRACE_ONOFF,
+ .func = event_trigger_callback,
+ .reg = register_trigger,
+ .unreg = unregister_trigger,
+ .get_trigger_ops = onoff_get_trigger_ops,
+ .set_filter = set_trigger_filter,
+};
+
+#ifdef CONFIG_TRACER_SNAPSHOT
+static void
+snapshot_trigger(struct event_trigger_data *data)
+{
+ tracing_snapshot();
+}
+
+static void
+snapshot_count_trigger(struct event_trigger_data *data)
+{
+ if (!data->count)
+ return;
+
+ if (data->count != -1)
+ (data->count)--;
+
+ snapshot_trigger(data);
+}
+
+static int
+register_snapshot_trigger(char *glob, struct event_trigger_ops *ops,
+ struct event_trigger_data *data,
+ struct ftrace_event_file *file)
+{
+ int ret = register_trigger(glob, ops, data, file);
+
+ if (ret > 0 && tracing_alloc_snapshot() != 0) {
+ unregister_trigger(glob, ops, data, file);
+ ret = 0;
+ }
+
+ return ret;
+}
+
+static int
+snapshot_trigger_print(struct seq_file *m, struct event_trigger_ops *ops,
+ struct event_trigger_data *data)
+{
+ return event_trigger_print("snapshot", m, (void *)data->count,
+ data->filter_str);
+}
+
+static struct event_trigger_ops snapshot_trigger_ops = {
+ .func = snapshot_trigger,
+ .print = snapshot_trigger_print,
+ .init = event_trigger_init,
+ .free = event_trigger_free,
+};
+
+static struct event_trigger_ops snapshot_count_trigger_ops = {
+ .func = snapshot_count_trigger,
+ .print = snapshot_trigger_print,
+ .init = event_trigger_init,
+ .free = event_trigger_free,
+};
+
+static struct event_trigger_ops *
+snapshot_get_trigger_ops(char *cmd, char *param)
+{
+ return param ? &snapshot_count_trigger_ops : &snapshot_trigger_ops;
+}
+
+static struct event_command trigger_snapshot_cmd = {
+ .name = "snapshot",
+ .trigger_type = ETT_SNAPSHOT,
+ .func = event_trigger_callback,
+ .reg = register_snapshot_trigger,
+ .unreg = unregister_trigger,
+ .get_trigger_ops = snapshot_get_trigger_ops,
+ .set_filter = set_trigger_filter,
+};
+
+static __init int register_trigger_snapshot_cmd(void)
+{
+ int ret;
+
+ ret = register_event_command(&trigger_snapshot_cmd);
+ WARN_ON(ret < 0);
+
+ return ret;
+}
+#else
+static __init int register_trigger_snapshot_cmd(void) { return 0; }
+#endif /* CONFIG_TRACER_SNAPSHOT */
+
+#ifdef CONFIG_STACKTRACE
+/*
+ * Skip 3:
+ * stacktrace_trigger()
+ * event_triggers_post_call()
+ * ftrace_raw_event_xxx()
+ */
+#define STACK_SKIP 3
+
+static void
+stacktrace_trigger(struct event_trigger_data *data)
+{
+ trace_dump_stack(STACK_SKIP);
+}
+
+static void
+stacktrace_count_trigger(struct event_trigger_data *data)
+{
+ if (!data->count)
+ return;
+
+ if (data->count != -1)
+ (data->count)--;
+
+ stacktrace_trigger(data);
+}
+
+static int
+stacktrace_trigger_print(struct seq_file *m, struct event_trigger_ops *ops,
+ struct event_trigger_data *data)
+{
+ return event_trigger_print("stacktrace", m, (void *)data->count,
+ data->filter_str);
+}
+
+static struct event_trigger_ops stacktrace_trigger_ops = {
+ .func = stacktrace_trigger,
+ .print = stacktrace_trigger_print,
+ .init = event_trigger_init,
+ .free = event_trigger_free,
+};
+
+static struct event_trigger_ops stacktrace_count_trigger_ops = {
+ .func = stacktrace_count_trigger,
+ .print = stacktrace_trigger_print,
+ .init = event_trigger_init,
+ .free = event_trigger_free,
+};
+
+static struct event_trigger_ops *
+stacktrace_get_trigger_ops(char *cmd, char *param)
+{
+ return param ? &stacktrace_count_trigger_ops : &stacktrace_trigger_ops;
+}
+
+static struct event_command trigger_stacktrace_cmd = {
+ .name = "stacktrace",
+ .trigger_type = ETT_STACKTRACE,
+ .post_trigger = true,
+ .func = event_trigger_callback,
+ .reg = register_trigger,
+ .unreg = unregister_trigger,
+ .get_trigger_ops = stacktrace_get_trigger_ops,
+ .set_filter = set_trigger_filter,
+};
+
+static __init int register_trigger_stacktrace_cmd(void)
+{
+ int ret;
+
+ ret = register_event_command(&trigger_stacktrace_cmd);
+ WARN_ON(ret < 0);
+
+ return ret;
+}
+#else
+static __init int register_trigger_stacktrace_cmd(void) { return 0; }
+#endif /* CONFIG_STACKTRACE */
+
+static __init void unregister_trigger_traceon_traceoff_cmds(void)
+{
+ unregister_event_command(&trigger_traceon_cmd);
+ unregister_event_command(&trigger_traceoff_cmd);
+}
+
+/* Avoid typos */
+#define ENABLE_EVENT_STR "enable_event"
+#define DISABLE_EVENT_STR "disable_event"
+
+struct enable_trigger_data {
+ struct ftrace_event_file *file;
+ bool enable;
+};
+
+static void
+event_enable_trigger(struct event_trigger_data *data)
+{
+ struct enable_trigger_data *enable_data = data->private_data;
+
+ if (enable_data->enable)
+ clear_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &enable_data->file->flags);
+ else
+ set_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &enable_data->file->flags);
+}
+
+static void
+event_enable_count_trigger(struct event_trigger_data *data)
+{
+ struct enable_trigger_data *enable_data = data->private_data;
+
+ if (!data->count)
+ return;
+
+ /* Skip if the event is in a state we want to switch to */
+ if (enable_data->enable == !(enable_data->file->flags & FTRACE_EVENT_FL_SOFT_DISABLED))
+ return;
+
+ if (data->count != -1)
+ (data->count)--;
+
+ event_enable_trigger(data);
+}
+
+static int
+event_enable_trigger_print(struct seq_file *m, struct event_trigger_ops *ops,
+ struct event_trigger_data *data)
+{
+ struct enable_trigger_data *enable_data = data->private_data;
+
+ seq_printf(m, "%s:%s:%s",
+ enable_data->enable ? ENABLE_EVENT_STR : DISABLE_EVENT_STR,
+ enable_data->file->event_call->class->system,
+ ftrace_event_name(enable_data->file->event_call));
+
+ if (data->count == -1)
+ seq_puts(m, ":unlimited");
+ else
+ seq_printf(m, ":count=%ld", data->count);
+
+ if (data->filter_str)
+ seq_printf(m, " if %s\n", data->filter_str);
+ else
+ seq_puts(m, "\n");
+
+ return 0;
+}
+
+static void
+event_enable_trigger_free(struct event_trigger_ops *ops,
+ struct event_trigger_data *data)
+{
+ struct enable_trigger_data *enable_data = data->private_data;
+
+ if (WARN_ON_ONCE(data->ref <= 0))
+ return;
+
+ data->ref--;
+ if (!data->ref) {
+ /* Remove the SOFT_MODE flag */
+ trace_event_enable_disable(enable_data->file, 0, 1);
+ module_put(enable_data->file->event_call->mod);
+ trigger_data_free(data);
+ kfree(enable_data);
+ }
+}
+
+static struct event_trigger_ops event_enable_trigger_ops = {
+ .func = event_enable_trigger,
+ .print = event_enable_trigger_print,
+ .init = event_trigger_init,
+ .free = event_enable_trigger_free,
+};
+
+static struct event_trigger_ops event_enable_count_trigger_ops = {
+ .func = event_enable_count_trigger,
+ .print = event_enable_trigger_print,
+ .init = event_trigger_init,
+ .free = event_enable_trigger_free,
+};
+
+static struct event_trigger_ops event_disable_trigger_ops = {
+ .func = event_enable_trigger,
+ .print = event_enable_trigger_print,
+ .init = event_trigger_init,
+ .free = event_enable_trigger_free,
+};
+
+static struct event_trigger_ops event_disable_count_trigger_ops = {
+ .func = event_enable_count_trigger,
+ .print = event_enable_trigger_print,
+ .init = event_trigger_init,
+ .free = event_enable_trigger_free,
+};
+
+static int
+event_enable_trigger_func(struct event_command *cmd_ops,
+ struct ftrace_event_file *file,
+ char *glob, char *cmd, char *param)
+{
+ struct ftrace_event_file *event_enable_file;
+ struct enable_trigger_data *enable_data;
+ struct event_trigger_data *trigger_data;
+ struct event_trigger_ops *trigger_ops;
+ struct trace_array *tr = file->tr;
+ const char *system;
+ const char *event;
+ char *trigger;
+ char *number;
+ bool enable;
+ int ret;
+
+ if (!param)
+ return -EINVAL;
+
+ /* separate the trigger from the filter (s:e:n [if filter]) */
+ trigger = strsep(&param, " \t");
+ if (!trigger)
+ return -EINVAL;
+
+ system = strsep(&trigger, ":");
+ if (!trigger)
+ return -EINVAL;
+
+ event = strsep(&trigger, ":");
+
+ ret = -EINVAL;
+ event_enable_file = find_event_file(tr, system, event);
+ if (!event_enable_file)
+ goto out;
+
+ enable = strcmp(cmd, ENABLE_EVENT_STR) == 0;
+
+ trigger_ops = cmd_ops->get_trigger_ops(cmd, trigger);
+
+ ret = -ENOMEM;
+ trigger_data = kzalloc(sizeof(*trigger_data), GFP_KERNEL);
+ if (!trigger_data)
+ goto out;
+
+ enable_data = kzalloc(sizeof(*enable_data), GFP_KERNEL);
+ if (!enable_data) {
+ kfree(trigger_data);
+ goto out;
+ }
+
+ trigger_data->count = -1;
+ trigger_data->ops = trigger_ops;
+ trigger_data->cmd_ops = cmd_ops;
+ INIT_LIST_HEAD(&trigger_data->list);
+ RCU_INIT_POINTER(trigger_data->filter, NULL);
+
+ enable_data->enable = enable;
+ enable_data->file = event_enable_file;
+ trigger_data->private_data = enable_data;
+
+ if (glob[0] == '!') {
+ cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file);
+ kfree(trigger_data);
+ kfree(enable_data);
+ ret = 0;
+ goto out;
+ }
+
+ if (trigger) {
+ number = strsep(&trigger, ":");
+
+ ret = -EINVAL;
+ if (!strlen(number))
+ goto out_free;
+
+ /*
+ * We use the callback data field (which is a pointer)
+ * as our counter.
+ */
+ ret = kstrtoul(number, 0, &trigger_data->count);
+ if (ret)
+ goto out_free;
+ }
+
+ if (!param) /* if param is non-empty, it's supposed to be a filter */
+ goto out_reg;
+
+ if (!cmd_ops->set_filter)
+ goto out_reg;
+
+ ret = cmd_ops->set_filter(param, trigger_data, file);
+ if (ret < 0)
+ goto out_free;
+
+ out_reg:
+ /* Don't let event modules unload while probe registered */
+ ret = try_module_get(event_enable_file->event_call->mod);
+ if (!ret) {
+ ret = -EBUSY;
+ goto out_free;
+ }
+
+ ret = trace_event_enable_disable(event_enable_file, 1, 1);
+ if (ret < 0)
+ goto out_put;
+ ret = cmd_ops->reg(glob, trigger_ops, trigger_data, file);
+ /*
+ * The above returns on success the # of functions enabled,
+ * but if it didn't find any functions it returns zero.
+ * Consider no functions a failure too.
+ */
+ if (!ret) {
+ ret = -ENOENT;
+ goto out_disable;
+ } else if (ret < 0)
+ goto out_disable;
+ /* Just return zero, not the number of enabled functions */
+ ret = 0;
+ out:
+ return ret;
+
+ out_disable:
+ trace_event_enable_disable(event_enable_file, 0, 1);
+ out_put:
+ module_put(event_enable_file->event_call->mod);
+ out_free:
+ if (cmd_ops->set_filter)
+ cmd_ops->set_filter(NULL, trigger_data, NULL);
+ kfree(trigger_data);
+ kfree(enable_data);
+ goto out;
+}
+
+static int event_enable_register_trigger(char *glob,
+ struct event_trigger_ops *ops,
+ struct event_trigger_data *data,
+ struct ftrace_event_file *file)
+{
+ struct enable_trigger_data *enable_data = data->private_data;
+ struct enable_trigger_data *test_enable_data;
+ struct event_trigger_data *test;
+ int ret = 0;
+
+ list_for_each_entry_rcu(test, &file->triggers, list) {
+ test_enable_data = test->private_data;
+ if (test_enable_data &&
+ (test_enable_data->file == enable_data->file)) {
+ ret = -EEXIST;
+ goto out;
+ }
+ }
+
+ if (data->ops->init) {
+ ret = data->ops->init(data->ops, data);
+ if (ret < 0)
+ goto out;
+ }
+
+ list_add_rcu(&data->list, &file->triggers);
+ ret++;
+
+ if (trace_event_trigger_enable_disable(file, 1) < 0) {
+ list_del_rcu(&data->list);
+ ret--;
+ }
+ update_cond_flag(file);
+out:
+ return ret;
+}
+
+static void event_enable_unregister_trigger(char *glob,
+ struct event_trigger_ops *ops,
+ struct event_trigger_data *test,
+ struct ftrace_event_file *file)
+{
+ struct enable_trigger_data *test_enable_data = test->private_data;
+ struct enable_trigger_data *enable_data;
+ struct event_trigger_data *data;
+ bool unregistered = false;
+
+ list_for_each_entry_rcu(data, &file->triggers, list) {
+ enable_data = data->private_data;
+ if (enable_data &&
+ (enable_data->file == test_enable_data->file)) {
+ unregistered = true;
+ list_del_rcu(&data->list);
+ update_cond_flag(file);
+ trace_event_trigger_enable_disable(file, 0);
+ break;
+ }
+ }
+
+ if (unregistered && data->ops->free)
+ data->ops->free(data->ops, data);
+}
+
+static struct event_trigger_ops *
+event_enable_get_trigger_ops(char *cmd, char *param)
+{
+ struct event_trigger_ops *ops;
+ bool enable;
+
+ enable = strcmp(cmd, ENABLE_EVENT_STR) == 0;
+
+ if (enable)
+ ops = param ? &event_enable_count_trigger_ops :
+ &event_enable_trigger_ops;
+ else
+ ops = param ? &event_disable_count_trigger_ops :
+ &event_disable_trigger_ops;
+
+ return ops;
+}
+
+static struct event_command trigger_enable_cmd = {
+ .name = ENABLE_EVENT_STR,
+ .trigger_type = ETT_EVENT_ENABLE,
+ .func = event_enable_trigger_func,
+ .reg = event_enable_register_trigger,
+ .unreg = event_enable_unregister_trigger,
+ .get_trigger_ops = event_enable_get_trigger_ops,
+ .set_filter = set_trigger_filter,
+};
+
+static struct event_command trigger_disable_cmd = {
+ .name = DISABLE_EVENT_STR,
+ .trigger_type = ETT_EVENT_ENABLE,
+ .func = event_enable_trigger_func,
+ .reg = event_enable_register_trigger,
+ .unreg = event_enable_unregister_trigger,
+ .get_trigger_ops = event_enable_get_trigger_ops,
+ .set_filter = set_trigger_filter,
+};
+
+static __init void unregister_trigger_enable_disable_cmds(void)
+{
+ unregister_event_command(&trigger_enable_cmd);
+ unregister_event_command(&trigger_disable_cmd);
+}
+
+static __init int register_trigger_enable_disable_cmds(void)
+{
+ int ret;
+
+ ret = register_event_command(&trigger_enable_cmd);
+ if (WARN_ON(ret < 0))
+ return ret;
+ ret = register_event_command(&trigger_disable_cmd);
+ if (WARN_ON(ret < 0))
+ unregister_trigger_enable_disable_cmds();
+
+ return ret;
+}
+
+static __init int register_trigger_traceon_traceoff_cmds(void)
+{
+ int ret;
+
+ ret = register_event_command(&trigger_traceon_cmd);
+ if (WARN_ON(ret < 0))
+ return ret;
+ ret = register_event_command(&trigger_traceoff_cmd);
+ if (WARN_ON(ret < 0))
+ unregister_trigger_traceon_traceoff_cmds();
+
+ return ret;
+}
+
+__init int register_trigger_cmds(void)
+{
+ register_trigger_traceon_traceoff_cmds();
+ register_trigger_snapshot_cmd();
+ register_trigger_stacktrace_cmd();
+ register_trigger_enable_disable_cmds();
+
+ return 0;
+}
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index d21a7467008..d4ddde28a81 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -95,15 +95,12 @@ static void __always_unused ____ftrace_check_##name(void) \
#undef __array
#define __array(type, item, len) \
do { \
+ char *type_str = #type"["__stringify(len)"]"; \
BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \
- mutex_lock(&event_storage_mutex); \
- snprintf(event_storage, sizeof(event_storage), \
- "%s[%d]", #type, len); \
- ret = trace_define_field(event_call, event_storage, #item, \
+ ret = trace_define_field(event_call, type_str, #item, \
offsetof(typeof(field), item), \
sizeof(field.item), \
is_signed_type(type), filter_type); \
- mutex_unlock(&event_storage_mutex); \
if (ret) \
return ret; \
} while (0);
@@ -176,11 +173,13 @@ struct ftrace_event_class __refdata event_class_ftrace_##call = { \
}; \
\
struct ftrace_event_call __used event_##call = { \
- .name = #call, \
- .event.type = etype, \
.class = &event_class_ftrace_##call, \
+ { \
+ .name = #call, \
+ }, \
+ .event.type = etype, \
.print_fmt = print, \
- .flags = TRACE_EVENT_FL_IGNORE_ENABLE, \
+ .flags = TRACE_EVENT_FL_IGNORE_ENABLE | TRACE_EVENT_FL_USE_CALL_FILTER, \
}; \
struct ftrace_event_call __used \
__attribute__((section("_ftrace_events"))) *__event_##call = &event_##call;
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 38fe1483c50..57f0ec962d2 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -13,33 +13,106 @@
#include <linux/debugfs.h>
#include <linux/uaccess.h>
#include <linux/ftrace.h>
+#include <linux/slab.h>
#include <linux/fs.h>
#include "trace.h"
-/* function tracing enabled */
-static int ftrace_function_enabled;
+static void tracing_start_function_trace(struct trace_array *tr);
+static void tracing_stop_function_trace(struct trace_array *tr);
+static void
+function_trace_call(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *op, struct pt_regs *pt_regs);
+static void
+function_stack_trace_call(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *op, struct pt_regs *pt_regs);
+static struct tracer_flags func_flags;
+
+/* Our option */
+enum {
+ TRACE_FUNC_OPT_STACK = 0x1,
+};
+
+static int allocate_ftrace_ops(struct trace_array *tr)
+{
+ struct ftrace_ops *ops;
-static struct trace_array *func_trace;
+ ops = kzalloc(sizeof(*ops), GFP_KERNEL);
+ if (!ops)
+ return -ENOMEM;
+
+ /* Currently only the non stack verision is supported */
+ ops->func = function_trace_call;
+ ops->flags = FTRACE_OPS_FL_RECURSION_SAFE;
+
+ tr->ops = ops;
+ ops->private = tr;
+ return 0;
+}
-static void tracing_start_function_trace(void);
-static void tracing_stop_function_trace(void);
+
+int ftrace_create_function_files(struct trace_array *tr,
+ struct dentry *parent)
+{
+ int ret;
+
+ /*
+ * The top level array uses the "global_ops", and the files are
+ * created on boot up.
+ */
+ if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
+ return 0;
+
+ ret = allocate_ftrace_ops(tr);
+ if (ret)
+ return ret;
+
+ ftrace_create_filter_files(tr->ops, parent);
+
+ return 0;
+}
+
+void ftrace_destroy_function_files(struct trace_array *tr)
+{
+ ftrace_destroy_filter_files(tr->ops);
+ kfree(tr->ops);
+ tr->ops = NULL;
+}
static int function_trace_init(struct trace_array *tr)
{
- func_trace = tr;
+ ftrace_func_t func;
+
+ /*
+ * Instance trace_arrays get their ops allocated
+ * at instance creation. Unless it failed
+ * the allocation.
+ */
+ if (!tr->ops)
+ return -ENOMEM;
+
+ /* Currently only the global instance can do stack tracing */
+ if (tr->flags & TRACE_ARRAY_FL_GLOBAL &&
+ func_flags.val & TRACE_FUNC_OPT_STACK)
+ func = function_stack_trace_call;
+ else
+ func = function_trace_call;
+
+ ftrace_init_array_ops(tr, func);
+
tr->trace_buffer.cpu = get_cpu();
put_cpu();
tracing_start_cmdline_record();
- tracing_start_function_trace();
+ tracing_start_function_trace(tr);
return 0;
}
static void function_trace_reset(struct trace_array *tr)
{
- tracing_stop_function_trace();
+ tracing_stop_function_trace(tr);
tracing_stop_cmdline_record();
+ ftrace_reset_array_ops(tr);
}
static void function_trace_start(struct trace_array *tr)
@@ -47,25 +120,18 @@ static void function_trace_start(struct trace_array *tr)
tracing_reset_online_cpus(&tr->trace_buffer);
}
-/* Our option */
-enum {
- TRACE_FUNC_OPT_STACK = 0x1,
-};
-
-static struct tracer_flags func_flags;
-
static void
function_trace_call(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct pt_regs *pt_regs)
{
- struct trace_array *tr = func_trace;
+ struct trace_array *tr = op->private;
struct trace_array_cpu *data;
unsigned long flags;
int bit;
int cpu;
int pc;
- if (unlikely(!ftrace_function_enabled))
+ if (unlikely(!tr->function_enabled))
return;
pc = preempt_count();
@@ -91,14 +157,14 @@ static void
function_stack_trace_call(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct pt_regs *pt_regs)
{
- struct trace_array *tr = func_trace;
+ struct trace_array *tr = op->private;
struct trace_array_cpu *data;
unsigned long flags;
long disabled;
int cpu;
int pc;
- if (unlikely(!ftrace_function_enabled))
+ if (unlikely(!tr->function_enabled))
return;
/*
@@ -128,19 +194,6 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip,
local_irq_restore(flags);
}
-
-static struct ftrace_ops trace_ops __read_mostly =
-{
- .func = function_trace_call,
- .flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE,
-};
-
-static struct ftrace_ops trace_stack_ops __read_mostly =
-{
- .func = function_stack_trace_call,
- .flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE,
-};
-
static struct tracer_opt func_opts[] = {
#ifdef CONFIG_STACKTRACE
{ TRACER_OPT(func_stack_trace, TRACE_FUNC_OPT_STACK) },
@@ -153,29 +206,21 @@ static struct tracer_flags func_flags = {
.opts = func_opts
};
-static void tracing_start_function_trace(void)
+static void tracing_start_function_trace(struct trace_array *tr)
{
- ftrace_function_enabled = 0;
-
- if (func_flags.val & TRACE_FUNC_OPT_STACK)
- register_ftrace_function(&trace_stack_ops);
- else
- register_ftrace_function(&trace_ops);
-
- ftrace_function_enabled = 1;
+ tr->function_enabled = 0;
+ register_ftrace_function(tr->ops);
+ tr->function_enabled = 1;
}
-static void tracing_stop_function_trace(void)
+static void tracing_stop_function_trace(struct trace_array *tr)
{
- ftrace_function_enabled = 0;
-
- if (func_flags.val & TRACE_FUNC_OPT_STACK)
- unregister_ftrace_function(&trace_stack_ops);
- else
- unregister_ftrace_function(&trace_ops);
+ tr->function_enabled = 0;
+ unregister_ftrace_function(tr->ops);
}
-static int func_set_flag(u32 old_flags, u32 bit, int set)
+static int
+func_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
{
switch (bit) {
case TRACE_FUNC_OPT_STACK:
@@ -183,12 +228,14 @@ static int func_set_flag(u32 old_flags, u32 bit, int set)
if (!!set == !!(func_flags.val & TRACE_FUNC_OPT_STACK))
break;
+ unregister_ftrace_function(tr->ops);
+
if (set) {
- unregister_ftrace_function(&trace_ops);
- register_ftrace_function(&trace_stack_ops);
+ tr->ops->func = function_stack_trace_call;
+ register_ftrace_function(tr->ops);
} else {
- unregister_ftrace_function(&trace_stack_ops);
- register_ftrace_function(&trace_ops);
+ tr->ops->func = function_trace_call;
+ register_ftrace_function(tr->ops);
}
break;
@@ -205,9 +252,9 @@ static struct tracer function_trace __tracer_data =
.init = function_trace_init,
.reset = function_trace_reset,
.start = function_trace_start,
- .wait_pipe = poll_wait_pipe,
.flags = &func_flags,
.set_flag = func_set_flag,
+ .allow_instances = true,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_function,
#endif
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index b5c09242683..4de3e57f723 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -38,15 +38,6 @@ struct fgraph_data {
#define TRACE_GRAPH_INDENT 2
-/* Flag options */
-#define TRACE_GRAPH_PRINT_OVERRUN 0x1
-#define TRACE_GRAPH_PRINT_CPU 0x2
-#define TRACE_GRAPH_PRINT_OVERHEAD 0x4
-#define TRACE_GRAPH_PRINT_PROC 0x8
-#define TRACE_GRAPH_PRINT_DURATION 0x10
-#define TRACE_GRAPH_PRINT_ABS_TIME 0x20
-#define TRACE_GRAPH_PRINT_IRQS 0x40
-
static unsigned int max_depth;
static struct tracer_opt trace_opts[] = {
@@ -64,11 +55,13 @@ static struct tracer_opt trace_opts[] = {
{ TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) },
/* Display interrupts */
{ TRACER_OPT(funcgraph-irqs, TRACE_GRAPH_PRINT_IRQS) },
+ /* Display function name after trailing } */
+ { TRACER_OPT(funcgraph-tail, TRACE_GRAPH_PRINT_TAIL) },
{ } /* Empty entry */
};
static struct tracer_flags tracer_flags = {
- /* Don't display overruns and proc by default */
+ /* Don't display overruns, proc, or tail by default */
.val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD |
TRACE_GRAPH_PRINT_DURATION | TRACE_GRAPH_PRINT_IRQS,
.opts = trace_opts
@@ -82,9 +75,9 @@ static struct trace_array *graph_array;
* to fill in space into DURATION column.
*/
enum {
- DURATION_FILL_FULL = -1,
- DURATION_FILL_START = -2,
- DURATION_FILL_END = -3,
+ FLAGS_FILL_FULL = 1 << TRACE_GRAPH_PRINT_FILL_SHIFT,
+ FLAGS_FILL_START = 2 << TRACE_GRAPH_PRINT_FILL_SHIFT,
+ FLAGS_FILL_END = 3 << TRACE_GRAPH_PRINT_FILL_SHIFT,
};
static enum print_line_t
@@ -114,16 +107,37 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
return -EBUSY;
}
+ /*
+ * The curr_ret_stack is an index to ftrace return stack of
+ * current task. Its value should be in [0, FTRACE_RETFUNC_
+ * DEPTH) when the function graph tracer is used. To support
+ * filtering out specific functions, it makes the index
+ * negative by subtracting huge value (FTRACE_NOTRACE_DEPTH)
+ * so when it sees a negative index the ftrace will ignore
+ * the record. And the index gets recovered when returning
+ * from the filtered function by adding the FTRACE_NOTRACE_
+ * DEPTH and then it'll continue to record functions normally.
+ *
+ * The curr_ret_stack is initialized to -1 and get increased
+ * in this function. So it can be less than -1 only if it was
+ * filtered out via ftrace_graph_notrace_addr() which can be
+ * set from set_graph_notrace file in debugfs by user.
+ */
+ if (current->curr_ret_stack < -1)
+ return -EBUSY;
+
calltime = trace_clock_local();
index = ++current->curr_ret_stack;
+ if (ftrace_graph_notrace_addr(func))
+ current->curr_ret_stack -= FTRACE_NOTRACE_DEPTH;
barrier();
current->ret_stack[index].ret = ret;
current->ret_stack[index].func = func;
current->ret_stack[index].calltime = calltime;
current->ret_stack[index].subtime = 0;
current->ret_stack[index].fp = frame_pointer;
- *depth = index;
+ *depth = current->curr_ret_stack;
return 0;
}
@@ -137,7 +151,17 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
index = current->curr_ret_stack;
- if (unlikely(index < 0)) {
+ /*
+ * A negative index here means that it's just returned from a
+ * notrace'd function. Recover index to get an original
+ * return address. See ftrace_push_return_trace().
+ *
+ * TODO: Need to check whether the stack gets corrupted.
+ */
+ if (index < 0)
+ index += FTRACE_NOTRACE_DEPTH;
+
+ if (unlikely(index < 0 || index >= FTRACE_RETFUNC_DEPTH)) {
ftrace_graph_stop();
WARN_ON(1);
/* Might as well panic, otherwise we have no where to go */
@@ -193,6 +217,15 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
trace.rettime = trace_clock_local();
barrier();
current->curr_ret_stack--;
+ /*
+ * The curr_ret_stack can be less than -1 only if it was
+ * filtered out and it's about to return from the function.
+ * Recover the index and continue to trace normal functions.
+ */
+ if (current->curr_ret_stack < -1) {
+ current->curr_ret_stack += FTRACE_NOTRACE_DEPTH;
+ return ret;
+ }
/*
* The trace should run after decrementing the ret counter
@@ -230,7 +263,7 @@ int __trace_graph_entry(struct trace_array *tr,
return 0;
entry = ring_buffer_event_data(event);
entry->graph_ent = *trace;
- if (!filter_current_check_discard(buffer, call, entry, event))
+ if (!call_filter_check_discard(call, entry, buffer, event))
__buffer_unlock_commit(buffer, event);
return 1;
@@ -259,10 +292,20 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
/* trace it when it is-nested-in or is a function enabled. */
if ((!(trace->depth || ftrace_graph_addr(trace->func)) ||
- ftrace_graph_ignore_irqs()) ||
+ ftrace_graph_ignore_irqs()) || (trace->depth < 0) ||
(max_depth && trace->depth >= max_depth))
return 0;
+ /*
+ * Do not trace a function if it's filtered by set_graph_notrace.
+ * Make the index of ret stack negative to indicate that it should
+ * ignore further functions. But it needs its own ret stack entry
+ * to recover the original index in order to continue tracing after
+ * returning from the function.
+ */
+ if (ftrace_graph_notrace_addr(trace->func))
+ return 1;
+
local_irq_save(flags);
cpu = raw_smp_processor_id();
data = per_cpu_ptr(tr->trace_buffer.data, cpu);
@@ -335,7 +378,7 @@ void __trace_graph_return(struct trace_array *tr,
return;
entry = ring_buffer_event_data(event);
entry->ret = *trace;
- if (!filter_current_check_discard(buffer, call, entry, event))
+ if (!call_filter_check_discard(call, entry, buffer, event))
__buffer_unlock_commit(buffer, event);
}
@@ -652,7 +695,7 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,
}
/* No overhead */
- ret = print_graph_duration(DURATION_FILL_START, s, flags);
+ ret = print_graph_duration(0, s, flags | FLAGS_FILL_START);
if (ret != TRACE_TYPE_HANDLED)
return ret;
@@ -664,7 +707,7 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
- ret = print_graph_duration(DURATION_FILL_END, s, flags);
+ ret = print_graph_duration(0, s, flags | FLAGS_FILL_END);
if (ret != TRACE_TYPE_HANDLED)
return ret;
@@ -729,14 +772,14 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s,
return TRACE_TYPE_HANDLED;
/* No real adata, just filling the column with spaces */
- switch (duration) {
- case DURATION_FILL_FULL:
+ switch (flags & TRACE_GRAPH_PRINT_FILL_MASK) {
+ case FLAGS_FILL_FULL:
ret = trace_seq_puts(s, " | ");
return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
- case DURATION_FILL_START:
+ case FLAGS_FILL_START:
ret = trace_seq_puts(s, " ");
return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
- case DURATION_FILL_END:
+ case FLAGS_FILL_END:
ret = trace_seq_puts(s, " |");
return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
}
@@ -852,7 +895,7 @@ print_graph_entry_nested(struct trace_iterator *iter,
}
/* No time */
- ret = print_graph_duration(DURATION_FILL_FULL, s, flags);
+ ret = print_graph_duration(0, s, flags | FLAGS_FILL_FULL);
if (ret != TRACE_TYPE_HANDLED)
return ret;
@@ -1126,9 +1169,10 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
* If the return function does not have a matching entry,
* then the entry was lost. Instead of just printing
* the '}' and letting the user guess what function this
- * belongs to, write out the function name.
+ * belongs to, write out the function name. Always do
+ * that if the funcgraph-tail option is enabled.
*/
- if (func_match) {
+ if (func_match && !(flags & TRACE_GRAPH_PRINT_TAIL)) {
ret = trace_seq_puts(s, "}\n");
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
@@ -1172,7 +1216,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
return TRACE_TYPE_PARTIAL_LINE;
/* No time */
- ret = print_graph_duration(DURATION_FILL_FULL, s, flags);
+ ret = print_graph_duration(0, s, flags | FLAGS_FILL_FULL);
if (ret != TRACE_TYPE_HANDLED)
return ret;
@@ -1426,7 +1470,8 @@ void graph_trace_close(struct trace_iterator *iter)
}
}
-static int func_graph_set_flag(u32 old_flags, u32 bit, int set)
+static int
+func_graph_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
{
if (bit == TRACE_GRAPH_PRINT_IRQS)
ftrace_graph_skip_irqs = !set;
@@ -1454,7 +1499,6 @@ static struct tracer graph_trace __tracer_data = {
.pipe_open = graph_trace_open,
.close = graph_trace_close,
.pipe_close = graph_trace_close,
- .wait_pipe = poll_wait_pipe,
.init = graph_trace_init,
.reset = graph_trace_reset,
.print_line = print_graph_function,
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 2aefbee93a6..9bb104f748d 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -151,16 +151,11 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip,
atomic_dec(&data->disabled);
}
-
-static struct ftrace_ops trace_ops __read_mostly =
-{
- .func = irqsoff_tracer_call,
- .flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE,
-};
#endif /* CONFIG_FUNCTION_TRACER */
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-static int irqsoff_set_flag(u32 old_flags, u32 bit, int set)
+static int
+irqsoff_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
{
int cpu;
@@ -175,7 +170,7 @@ static int irqsoff_set_flag(u32 old_flags, u32 bit, int set)
for_each_possible_cpu(cpu)
per_cpu(tracing_cpu, cpu) = 0;
- tracing_max_latency = 0;
+ tr->max_latency = 0;
tracing_reset_online_cpus(&irqsoff_trace->trace_buffer);
return start_irqsoff_tracer(irqsoff_trace, set);
@@ -266,7 +261,8 @@ __trace_function(struct trace_array *tr,
#else
#define __trace_function trace_function
-static int irqsoff_set_flag(u32 old_flags, u32 bit, int set)
+static int
+irqsoff_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
{
return -EINVAL;
}
@@ -301,13 +297,13 @@ static void irqsoff_print_header(struct seq_file *s)
/*
* Should this new latency be reported/recorded?
*/
-static int report_latency(cycle_t delta)
+static int report_latency(struct trace_array *tr, cycle_t delta)
{
if (tracing_thresh) {
if (delta < tracing_thresh)
return 0;
} else {
- if (delta <= tracing_max_latency)
+ if (delta <= tr->max_latency)
return 0;
}
return 1;
@@ -331,13 +327,13 @@ check_critical_timing(struct trace_array *tr,
pc = preempt_count();
- if (!report_latency(delta))
+ if (!report_latency(tr, delta))
goto out;
raw_spin_lock_irqsave(&max_trace_lock, flags);
/* check if we are still the max latency */
- if (!report_latency(delta))
+ if (!report_latency(tr, delta))
goto out_unlock;
__trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
@@ -350,7 +346,7 @@ check_critical_timing(struct trace_array *tr,
data->critical_end = parent_ip;
if (likely(!is_tracing_stopped())) {
- tracing_max_latency = delta;
+ tr->max_latency = delta;
update_max_tr_single(tr, current, cpu);
}
@@ -498,14 +494,14 @@ void trace_hardirqs_off(void)
}
EXPORT_SYMBOL(trace_hardirqs_off);
-void trace_hardirqs_on_caller(unsigned long caller_addr)
+__visible void trace_hardirqs_on_caller(unsigned long caller_addr)
{
if (!preempt_trace() && irq_trace())
stop_critical_timing(CALLER_ADDR0, caller_addr);
}
EXPORT_SYMBOL(trace_hardirqs_on_caller);
-void trace_hardirqs_off_caller(unsigned long caller_addr)
+__visible void trace_hardirqs_off_caller(unsigned long caller_addr)
{
if (!preempt_trace() && irq_trace())
start_critical_timing(CALLER_ADDR0, caller_addr);
@@ -529,7 +525,7 @@ void trace_preempt_off(unsigned long a0, unsigned long a1)
}
#endif /* CONFIG_PREEMPT_TRACER */
-static int register_irqsoff_function(int graph, int set)
+static int register_irqsoff_function(struct trace_array *tr, int graph, int set)
{
int ret;
@@ -541,7 +537,7 @@ static int register_irqsoff_function(int graph, int set)
ret = register_ftrace_graph(&irqsoff_graph_return,
&irqsoff_graph_entry);
else
- ret = register_ftrace_function(&trace_ops);
+ ret = register_ftrace_function(tr->ops);
if (!ret)
function_enabled = true;
@@ -549,7 +545,7 @@ static int register_irqsoff_function(int graph, int set)
return ret;
}
-static void unregister_irqsoff_function(int graph)
+static void unregister_irqsoff_function(struct trace_array *tr, int graph)
{
if (!function_enabled)
return;
@@ -557,23 +553,25 @@ static void unregister_irqsoff_function(int graph)
if (graph)
unregister_ftrace_graph();
else
- unregister_ftrace_function(&trace_ops);
+ unregister_ftrace_function(tr->ops);
function_enabled = false;
}
-static void irqsoff_function_set(int set)
+static void irqsoff_function_set(struct trace_array *tr, int set)
{
if (set)
- register_irqsoff_function(is_graph(), 1);
+ register_irqsoff_function(tr, is_graph(), 1);
else
- unregister_irqsoff_function(is_graph());
+ unregister_irqsoff_function(tr, is_graph());
}
-static int irqsoff_flag_changed(struct tracer *tracer, u32 mask, int set)
+static int irqsoff_flag_changed(struct trace_array *tr, u32 mask, int set)
{
+ struct tracer *tracer = tr->current_trace;
+
if (mask & TRACE_ITER_FUNCTION)
- irqsoff_function_set(set);
+ irqsoff_function_set(tr, set);
return trace_keep_overwrite(tracer, mask, set);
}
@@ -582,7 +580,7 @@ static int start_irqsoff_tracer(struct trace_array *tr, int graph)
{
int ret;
- ret = register_irqsoff_function(graph, 0);
+ ret = register_irqsoff_function(tr, graph, 0);
if (!ret && tracing_is_enabled())
tracer_enabled = 1;
@@ -596,25 +594,37 @@ static void stop_irqsoff_tracer(struct trace_array *tr, int graph)
{
tracer_enabled = 0;
- unregister_irqsoff_function(graph);
+ unregister_irqsoff_function(tr, graph);
}
-static void __irqsoff_tracer_init(struct trace_array *tr)
+static bool irqsoff_busy;
+
+static int __irqsoff_tracer_init(struct trace_array *tr)
{
+ if (irqsoff_busy)
+ return -EBUSY;
+
save_flags = trace_flags;
/* non overwrite screws up the latency tracers */
set_tracer_flag(tr, TRACE_ITER_OVERWRITE, 1);
set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, 1);
- tracing_max_latency = 0;
+ tr->max_latency = 0;
irqsoff_trace = tr;
/* make sure that the tracer is visible */
smp_wmb();
tracing_reset_online_cpus(&tr->trace_buffer);
- if (start_irqsoff_tracer(tr, is_graph()))
+ ftrace_init_array_ops(tr, irqsoff_tracer_call);
+
+ /* Only toplevel instance supports graph tracing */
+ if (start_irqsoff_tracer(tr, (tr->flags & TRACE_ARRAY_FL_GLOBAL &&
+ is_graph())))
printk(KERN_ERR "failed to start irqsoff tracer\n");
+
+ irqsoff_busy = true;
+ return 0;
}
static void irqsoff_tracer_reset(struct trace_array *tr)
@@ -626,6 +636,9 @@ static void irqsoff_tracer_reset(struct trace_array *tr)
set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, lat_flag);
set_tracer_flag(tr, TRACE_ITER_OVERWRITE, overwrite_flag);
+ ftrace_reset_array_ops(tr);
+
+ irqsoff_busy = false;
}
static void irqsoff_tracer_start(struct trace_array *tr)
@@ -643,8 +656,7 @@ static int irqsoff_tracer_init(struct trace_array *tr)
{
trace_type = TRACER_IRQS_OFF;
- __irqsoff_tracer_init(tr);
- return 0;
+ return __irqsoff_tracer_init(tr);
}
static struct tracer irqsoff_tracer __read_mostly =
{
@@ -664,6 +676,7 @@ static struct tracer irqsoff_tracer __read_mostly =
#endif
.open = irqsoff_trace_open,
.close = irqsoff_trace_close,
+ .allow_instances = true,
.use_max_tr = true,
};
# define register_irqsoff(trace) register_tracer(&trace)
@@ -676,8 +689,7 @@ static int preemptoff_tracer_init(struct trace_array *tr)
{
trace_type = TRACER_PREEMPT_OFF;
- __irqsoff_tracer_init(tr);
- return 0;
+ return __irqsoff_tracer_init(tr);
}
static struct tracer preemptoff_tracer __read_mostly =
@@ -698,6 +710,7 @@ static struct tracer preemptoff_tracer __read_mostly =
#endif
.open = irqsoff_trace_open,
.close = irqsoff_trace_close,
+ .allow_instances = true,
.use_max_tr = true,
};
# define register_preemptoff(trace) register_tracer(&trace)
@@ -712,8 +725,7 @@ static int preemptirqsoff_tracer_init(struct trace_array *tr)
{
trace_type = TRACER_IRQS_OFF | TRACER_PREEMPT_OFF;
- __irqsoff_tracer_init(tr);
- return 0;
+ return __irqsoff_tracer_init(tr);
}
static struct tracer preemptirqsoff_tracer __read_mostly =
@@ -734,6 +746,7 @@ static struct tracer preemptirqsoff_tracer __read_mostly =
#endif
.open = irqsoff_trace_open,
.close = irqsoff_trace_close,
+ .allow_instances = true,
.use_max_tr = true,
};
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 243f6834d02..282f6e4e553 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -27,75 +27,54 @@
/**
* Kprobe event core functions
*/
-struct trace_probe {
+struct trace_kprobe {
struct list_head list;
struct kretprobe rp; /* Use rp.kp for kprobe use */
unsigned long nhit;
- unsigned int flags; /* For TP_FLAG_* */
const char *symbol; /* symbol name */
- struct ftrace_event_class class;
- struct ftrace_event_call call;
- struct list_head files;
- ssize_t size; /* trace entry size */
- unsigned int nr_args;
- struct probe_arg args[];
+ struct trace_probe tp;
};
-struct event_file_link {
- struct ftrace_event_file *file;
- struct list_head list;
-};
-
-#define SIZEOF_TRACE_PROBE(n) \
- (offsetof(struct trace_probe, args) + \
+#define SIZEOF_TRACE_KPROBE(n) \
+ (offsetof(struct trace_kprobe, tp.args) + \
(sizeof(struct probe_arg) * (n)))
-static __kprobes bool trace_probe_is_return(struct trace_probe *tp)
-{
- return tp->rp.handler != NULL;
-}
-
-static __kprobes const char *trace_probe_symbol(struct trace_probe *tp)
-{
- return tp->symbol ? tp->symbol : "unknown";
-}
-
-static __kprobes unsigned long trace_probe_offset(struct trace_probe *tp)
+static nokprobe_inline bool trace_kprobe_is_return(struct trace_kprobe *tk)
{
- return tp->rp.kp.offset;
+ return tk->rp.handler != NULL;
}
-static __kprobes bool trace_probe_is_enabled(struct trace_probe *tp)
+static nokprobe_inline const char *trace_kprobe_symbol(struct trace_kprobe *tk)
{
- return !!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE));
+ return tk->symbol ? tk->symbol : "unknown";
}
-static __kprobes bool trace_probe_is_registered(struct trace_probe *tp)
+static nokprobe_inline unsigned long trace_kprobe_offset(struct trace_kprobe *tk)
{
- return !!(tp->flags & TP_FLAG_REGISTERED);
+ return tk->rp.kp.offset;
}
-static __kprobes bool trace_probe_has_gone(struct trace_probe *tp)
+static nokprobe_inline bool trace_kprobe_has_gone(struct trace_kprobe *tk)
{
- return !!(kprobe_gone(&tp->rp.kp));
+ return !!(kprobe_gone(&tk->rp.kp));
}
-static __kprobes bool trace_probe_within_module(struct trace_probe *tp,
- struct module *mod)
+static nokprobe_inline bool trace_kprobe_within_module(struct trace_kprobe *tk,
+ struct module *mod)
{
int len = strlen(mod->name);
- const char *name = trace_probe_symbol(tp);
+ const char *name = trace_kprobe_symbol(tk);
return strncmp(mod->name, name, len) == 0 && name[len] == ':';
}
-static __kprobes bool trace_probe_is_on_module(struct trace_probe *tp)
+static nokprobe_inline bool trace_kprobe_is_on_module(struct trace_kprobe *tk)
{
- return !!strchr(trace_probe_symbol(tp), ':');
+ return !!strchr(trace_kprobe_symbol(tk), ':');
}
-static int register_probe_event(struct trace_probe *tp);
-static int unregister_probe_event(struct trace_probe *tp);
+static int register_kprobe_event(struct trace_kprobe *tk);
+static int unregister_kprobe_event(struct trace_kprobe *tk);
static DEFINE_MUTEX(probe_lock);
static LIST_HEAD(probe_list);
@@ -104,45 +83,231 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
static int kretprobe_dispatcher(struct kretprobe_instance *ri,
struct pt_regs *regs);
+/* Memory fetching by symbol */
+struct symbol_cache {
+ char *symbol;
+ long offset;
+ unsigned long addr;
+};
+
+unsigned long update_symbol_cache(struct symbol_cache *sc)
+{
+ sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol);
+
+ if (sc->addr)
+ sc->addr += sc->offset;
+
+ return sc->addr;
+}
+
+void free_symbol_cache(struct symbol_cache *sc)
+{
+ kfree(sc->symbol);
+ kfree(sc);
+}
+
+struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)
+{
+ struct symbol_cache *sc;
+
+ if (!sym || strlen(sym) == 0)
+ return NULL;
+
+ sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL);
+ if (!sc)
+ return NULL;
+
+ sc->symbol = kstrdup(sym, GFP_KERNEL);
+ if (!sc->symbol) {
+ kfree(sc);
+ return NULL;
+ }
+ sc->offset = offset;
+ update_symbol_cache(sc);
+
+ return sc;
+}
+
+/*
+ * Kprobes-specific fetch functions
+ */
+#define DEFINE_FETCH_stack(type) \
+static void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs, \
+ void *offset, void *dest) \
+{ \
+ *(type *)dest = (type)regs_get_kernel_stack_nth(regs, \
+ (unsigned int)((unsigned long)offset)); \
+} \
+NOKPROBE_SYMBOL(FETCH_FUNC_NAME(stack, type));
+
+DEFINE_BASIC_FETCH_FUNCS(stack)
+/* No string on the stack entry */
+#define fetch_stack_string NULL
+#define fetch_stack_string_size NULL
+
+#define DEFINE_FETCH_memory(type) \
+static void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs, \
+ void *addr, void *dest) \
+{ \
+ type retval; \
+ if (probe_kernel_address(addr, retval)) \
+ *(type *)dest = 0; \
+ else \
+ *(type *)dest = retval; \
+} \
+NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, type));
+
+DEFINE_BASIC_FETCH_FUNCS(memory)
+/*
+ * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
+ * length and relative data location.
+ */
+static void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
+ void *addr, void *dest)
+{
+ long ret;
+ int maxlen = get_rloc_len(*(u32 *)dest);
+ u8 *dst = get_rloc_data(dest);
+ u8 *src = addr;
+ mm_segment_t old_fs = get_fs();
+
+ if (!maxlen)
+ return;
+
+ /*
+ * Try to get string again, since the string can be changed while
+ * probing.
+ */
+ set_fs(KERNEL_DS);
+ pagefault_disable();
+
+ do
+ ret = __copy_from_user_inatomic(dst++, src++, 1);
+ while (dst[-1] && ret == 0 && src - (u8 *)addr < maxlen);
+
+ dst[-1] = '\0';
+ pagefault_enable();
+ set_fs(old_fs);
+
+ if (ret < 0) { /* Failed to fetch string */
+ ((u8 *)get_rloc_data(dest))[0] = '\0';
+ *(u32 *)dest = make_data_rloc(0, get_rloc_offs(*(u32 *)dest));
+ } else {
+ *(u32 *)dest = make_data_rloc(src - (u8 *)addr,
+ get_rloc_offs(*(u32 *)dest));
+ }
+}
+NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, string));
+
+/* Return the length of string -- including null terminal byte */
+static void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
+ void *addr, void *dest)
+{
+ mm_segment_t old_fs;
+ int ret, len = 0;
+ u8 c;
+
+ old_fs = get_fs();
+ set_fs(KERNEL_DS);
+ pagefault_disable();
+
+ do {
+ ret = __copy_from_user_inatomic(&c, (u8 *)addr + len, 1);
+ len++;
+ } while (c && ret == 0 && len < MAX_STRING_SIZE);
+
+ pagefault_enable();
+ set_fs(old_fs);
+
+ if (ret < 0) /* Failed to check the length */
+ *(u32 *)dest = 0;
+ else
+ *(u32 *)dest = len;
+}
+NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, string_size));
+
+#define DEFINE_FETCH_symbol(type) \
+void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs, void *data, void *dest)\
+{ \
+ struct symbol_cache *sc = data; \
+ if (sc->addr) \
+ fetch_memory_##type(regs, (void *)sc->addr, dest); \
+ else \
+ *(type *)dest = 0; \
+} \
+NOKPROBE_SYMBOL(FETCH_FUNC_NAME(symbol, type));
+
+DEFINE_BASIC_FETCH_FUNCS(symbol)
+DEFINE_FETCH_symbol(string)
+DEFINE_FETCH_symbol(string_size)
+
+/* kprobes don't support file_offset fetch methods */
+#define fetch_file_offset_u8 NULL
+#define fetch_file_offset_u16 NULL
+#define fetch_file_offset_u32 NULL
+#define fetch_file_offset_u64 NULL
+#define fetch_file_offset_string NULL
+#define fetch_file_offset_string_size NULL
+
+/* Fetch type information table */
+const struct fetch_type kprobes_fetch_type_table[] = {
+ /* Special types */
+ [FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string,
+ sizeof(u32), 1, "__data_loc char[]"),
+ [FETCH_TYPE_STRSIZE] = __ASSIGN_FETCH_TYPE("string_size", u32,
+ string_size, sizeof(u32), 0, "u32"),
+ /* Basic types */
+ ASSIGN_FETCH_TYPE(u8, u8, 0),
+ ASSIGN_FETCH_TYPE(u16, u16, 0),
+ ASSIGN_FETCH_TYPE(u32, u32, 0),
+ ASSIGN_FETCH_TYPE(u64, u64, 0),
+ ASSIGN_FETCH_TYPE(s8, u8, 1),
+ ASSIGN_FETCH_TYPE(s16, u16, 1),
+ ASSIGN_FETCH_TYPE(s32, u32, 1),
+ ASSIGN_FETCH_TYPE(s64, u64, 1),
+
+ ASSIGN_FETCH_TYPE_END
+};
+
/*
* Allocate new trace_probe and initialize it (including kprobes).
*/
-static struct trace_probe *alloc_trace_probe(const char *group,
+static struct trace_kprobe *alloc_trace_kprobe(const char *group,
const char *event,
void *addr,
const char *symbol,
unsigned long offs,
int nargs, bool is_return)
{
- struct trace_probe *tp;
+ struct trace_kprobe *tk;
int ret = -ENOMEM;
- tp = kzalloc(SIZEOF_TRACE_PROBE(nargs), GFP_KERNEL);
- if (!tp)
+ tk = kzalloc(SIZEOF_TRACE_KPROBE(nargs), GFP_KERNEL);
+ if (!tk)
return ERR_PTR(ret);
if (symbol) {
- tp->symbol = kstrdup(symbol, GFP_KERNEL);
- if (!tp->symbol)
+ tk->symbol = kstrdup(symbol, GFP_KERNEL);
+ if (!tk->symbol)
goto error;
- tp->rp.kp.symbol_name = tp->symbol;
- tp->rp.kp.offset = offs;
+ tk->rp.kp.symbol_name = tk->symbol;
+ tk->rp.kp.offset = offs;
} else
- tp->rp.kp.addr = addr;
+ tk->rp.kp.addr = addr;
if (is_return)
- tp->rp.handler = kretprobe_dispatcher;
+ tk->rp.handler = kretprobe_dispatcher;
else
- tp->rp.kp.pre_handler = kprobe_dispatcher;
+ tk->rp.kp.pre_handler = kprobe_dispatcher;
if (!event || !is_good_name(event)) {
ret = -EINVAL;
goto error;
}
- tp->call.class = &tp->class;
- tp->call.name = kstrdup(event, GFP_KERNEL);
- if (!tp->call.name)
+ tk->tp.call.class = &tk->tp.class;
+ tk->tp.call.name = kstrdup(event, GFP_KERNEL);
+ if (!tk->tp.call.name)
goto error;
if (!group || !is_good_name(group)) {
@@ -150,42 +315,42 @@ static struct trace_probe *alloc_trace_probe(const char *group,
goto error;
}
- tp->class.system = kstrdup(group, GFP_KERNEL);
- if (!tp->class.system)
+ tk->tp.class.system = kstrdup(group, GFP_KERNEL);
+ if (!tk->tp.class.system)
goto error;
- INIT_LIST_HEAD(&tp->list);
- INIT_LIST_HEAD(&tp->files);
- return tp;
+ INIT_LIST_HEAD(&tk->list);
+ INIT_LIST_HEAD(&tk->tp.files);
+ return tk;
error:
- kfree(tp->call.name);
- kfree(tp->symbol);
- kfree(tp);
+ kfree(tk->tp.call.name);
+ kfree(tk->symbol);
+ kfree(tk);
return ERR_PTR(ret);
}
-static void free_trace_probe(struct trace_probe *tp)
+static void free_trace_kprobe(struct trace_kprobe *tk)
{
int i;
- for (i = 0; i < tp->nr_args; i++)
- traceprobe_free_probe_arg(&tp->args[i]);
+ for (i = 0; i < tk->tp.nr_args; i++)
+ traceprobe_free_probe_arg(&tk->tp.args[i]);
- kfree(tp->call.class->system);
- kfree(tp->call.name);
- kfree(tp->symbol);
- kfree(tp);
+ kfree(tk->tp.call.class->system);
+ kfree(tk->tp.call.name);
+ kfree(tk->symbol);
+ kfree(tk);
}
-static struct trace_probe *find_trace_probe(const char *event,
- const char *group)
+static struct trace_kprobe *find_trace_kprobe(const char *event,
+ const char *group)
{
- struct trace_probe *tp;
+ struct trace_kprobe *tk;
- list_for_each_entry(tp, &probe_list, list)
- if (strcmp(tp->call.name, event) == 0 &&
- strcmp(tp->call.class->system, group) == 0)
- return tp;
+ list_for_each_entry(tk, &probe_list, list)
+ if (strcmp(ftrace_event_name(&tk->tp.call), event) == 0 &&
+ strcmp(tk->tp.call.class->system, group) == 0)
+ return tk;
return NULL;
}
@@ -194,7 +359,7 @@ static struct trace_probe *find_trace_probe(const char *event,
* if the file is NULL, enable "perf" handler, or enable "trace" handler.
*/
static int
-enable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
+enable_trace_kprobe(struct trace_kprobe *tk, struct ftrace_event_file *file)
{
int ret = 0;
@@ -208,47 +373,35 @@ enable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
}
link->file = file;
- list_add_tail_rcu(&link->list, &tp->files);
+ list_add_tail_rcu(&link->list, &tk->tp.files);
- tp->flags |= TP_FLAG_TRACE;
+ tk->tp.flags |= TP_FLAG_TRACE;
} else
- tp->flags |= TP_FLAG_PROFILE;
+ tk->tp.flags |= TP_FLAG_PROFILE;
- if (trace_probe_is_registered(tp) && !trace_probe_has_gone(tp)) {
- if (trace_probe_is_return(tp))
- ret = enable_kretprobe(&tp->rp);
+ if (trace_probe_is_registered(&tk->tp) && !trace_kprobe_has_gone(tk)) {
+ if (trace_kprobe_is_return(tk))
+ ret = enable_kretprobe(&tk->rp);
else
- ret = enable_kprobe(&tp->rp.kp);
+ ret = enable_kprobe(&tk->rp.kp);
}
out:
return ret;
}
-static struct event_file_link *
-find_event_file_link(struct trace_probe *tp, struct ftrace_event_file *file)
-{
- struct event_file_link *link;
-
- list_for_each_entry(link, &tp->files, list)
- if (link->file == file)
- return link;
-
- return NULL;
-}
-
/*
* Disable trace_probe
* if the file is NULL, disable "perf" handler, or disable "trace" handler.
*/
static int
-disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
+disable_trace_kprobe(struct trace_kprobe *tk, struct ftrace_event_file *file)
{
struct event_file_link *link = NULL;
int wait = 0;
int ret = 0;
if (file) {
- link = find_event_file_link(tp, file);
+ link = find_event_file_link(&tk->tp, file);
if (!link) {
ret = -EINVAL;
goto out;
@@ -256,18 +409,18 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
list_del_rcu(&link->list);
wait = 1;
- if (!list_empty(&tp->files))
+ if (!list_empty(&tk->tp.files))
goto out;
- tp->flags &= ~TP_FLAG_TRACE;
+ tk->tp.flags &= ~TP_FLAG_TRACE;
} else
- tp->flags &= ~TP_FLAG_PROFILE;
+ tk->tp.flags &= ~TP_FLAG_PROFILE;
- if (!trace_probe_is_enabled(tp) && trace_probe_is_registered(tp)) {
- if (trace_probe_is_return(tp))
- disable_kretprobe(&tp->rp);
+ if (!trace_probe_is_enabled(&tk->tp) && trace_probe_is_registered(&tk->tp)) {
+ if (trace_kprobe_is_return(tk))
+ disable_kretprobe(&tk->rp);
else
- disable_kprobe(&tp->rp.kp);
+ disable_kprobe(&tk->rp.kp);
wait = 1;
}
out:
@@ -288,40 +441,40 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
}
/* Internal register function - just handle k*probes and flags */
-static int __register_trace_probe(struct trace_probe *tp)
+static int __register_trace_kprobe(struct trace_kprobe *tk)
{
int i, ret;
- if (trace_probe_is_registered(tp))
+ if (trace_probe_is_registered(&tk->tp))
return -EINVAL;
- for (i = 0; i < tp->nr_args; i++)
- traceprobe_update_arg(&tp->args[i]);
+ for (i = 0; i < tk->tp.nr_args; i++)
+ traceprobe_update_arg(&tk->tp.args[i]);
/* Set/clear disabled flag according to tp->flag */
- if (trace_probe_is_enabled(tp))
- tp->rp.kp.flags &= ~KPROBE_FLAG_DISABLED;
+ if (trace_probe_is_enabled(&tk->tp))
+ tk->rp.kp.flags &= ~KPROBE_FLAG_DISABLED;
else
- tp->rp.kp.flags |= KPROBE_FLAG_DISABLED;
+ tk->rp.kp.flags |= KPROBE_FLAG_DISABLED;
- if (trace_probe_is_return(tp))
- ret = register_kretprobe(&tp->rp);
+ if (trace_kprobe_is_return(tk))
+ ret = register_kretprobe(&tk->rp);
else
- ret = register_kprobe(&tp->rp.kp);
+ ret = register_kprobe(&tk->rp.kp);
if (ret == 0)
- tp->flags |= TP_FLAG_REGISTERED;
+ tk->tp.flags |= TP_FLAG_REGISTERED;
else {
pr_warning("Could not insert probe at %s+%lu: %d\n",
- trace_probe_symbol(tp), trace_probe_offset(tp), ret);
- if (ret == -ENOENT && trace_probe_is_on_module(tp)) {
+ trace_kprobe_symbol(tk), trace_kprobe_offset(tk), ret);
+ if (ret == -ENOENT && trace_kprobe_is_on_module(tk)) {
pr_warning("This probe might be able to register after"
"target module is loaded. Continue.\n");
ret = 0;
} else if (ret == -EILSEQ) {
pr_warning("Probing address(0x%p) is not an "
"instruction boundary.\n",
- tp->rp.kp.addr);
+ tk->rp.kp.addr);
ret = -EINVAL;
}
}
@@ -330,67 +483,68 @@ static int __register_trace_probe(struct trace_probe *tp)
}
/* Internal unregister function - just handle k*probes and flags */
-static void __unregister_trace_probe(struct trace_probe *tp)
+static void __unregister_trace_kprobe(struct trace_kprobe *tk)
{
- if (trace_probe_is_registered(tp)) {
- if (trace_probe_is_return(tp))
- unregister_kretprobe(&tp->rp);
+ if (trace_probe_is_registered(&tk->tp)) {
+ if (trace_kprobe_is_return(tk))
+ unregister_kretprobe(&tk->rp);
else
- unregister_kprobe(&tp->rp.kp);
- tp->flags &= ~TP_FLAG_REGISTERED;
+ unregister_kprobe(&tk->rp.kp);
+ tk->tp.flags &= ~TP_FLAG_REGISTERED;
/* Cleanup kprobe for reuse */
- if (tp->rp.kp.symbol_name)
- tp->rp.kp.addr = NULL;
+ if (tk->rp.kp.symbol_name)
+ tk->rp.kp.addr = NULL;
}
}
/* Unregister a trace_probe and probe_event: call with locking probe_lock */
-static int unregister_trace_probe(struct trace_probe *tp)
+static int unregister_trace_kprobe(struct trace_kprobe *tk)
{
/* Enabled event can not be unregistered */
- if (trace_probe_is_enabled(tp))
+ if (trace_probe_is_enabled(&tk->tp))
return -EBUSY;
/* Will fail if probe is being used by ftrace or perf */
- if (unregister_probe_event(tp))
+ if (unregister_kprobe_event(tk))
return -EBUSY;
- __unregister_trace_probe(tp);
- list_del(&tp->list);
+ __unregister_trace_kprobe(tk);
+ list_del(&tk->list);
return 0;
}
/* Register a trace_probe and probe_event */
-static int register_trace_probe(struct trace_probe *tp)
+static int register_trace_kprobe(struct trace_kprobe *tk)
{
- struct trace_probe *old_tp;
+ struct trace_kprobe *old_tk;
int ret;
mutex_lock(&probe_lock);
/* Delete old (same name) event if exist */
- old_tp = find_trace_probe(tp->call.name, tp->call.class->system);
- if (old_tp) {
- ret = unregister_trace_probe(old_tp);
+ old_tk = find_trace_kprobe(ftrace_event_name(&tk->tp.call),
+ tk->tp.call.class->system);
+ if (old_tk) {
+ ret = unregister_trace_kprobe(old_tk);
if (ret < 0)
goto end;
- free_trace_probe(old_tp);
+ free_trace_kprobe(old_tk);
}
/* Register new event */
- ret = register_probe_event(tp);
+ ret = register_kprobe_event(tk);
if (ret) {
pr_warning("Failed to register probe event(%d)\n", ret);
goto end;
}
/* Register k*probe */
- ret = __register_trace_probe(tp);
+ ret = __register_trace_kprobe(tk);
if (ret < 0)
- unregister_probe_event(tp);
+ unregister_kprobe_event(tk);
else
- list_add_tail(&tp->list, &probe_list);
+ list_add_tail(&tk->list, &probe_list);
end:
mutex_unlock(&probe_lock);
@@ -398,11 +552,11 @@ end:
}
/* Module notifier call back, checking event on the module */
-static int trace_probe_module_callback(struct notifier_block *nb,
+static int trace_kprobe_module_callback(struct notifier_block *nb,
unsigned long val, void *data)
{
struct module *mod = data;
- struct trace_probe *tp;
+ struct trace_kprobe *tk;
int ret;
if (val != MODULE_STATE_COMING)
@@ -410,15 +564,16 @@ static int trace_probe_module_callback(struct notifier_block *nb,
/* Update probes on coming module */
mutex_lock(&probe_lock);
- list_for_each_entry(tp, &probe_list, list) {
- if (trace_probe_within_module(tp, mod)) {
+ list_for_each_entry(tk, &probe_list, list) {
+ if (trace_kprobe_within_module(tk, mod)) {
/* Don't need to check busy - this should have gone. */
- __unregister_trace_probe(tp);
- ret = __register_trace_probe(tp);
+ __unregister_trace_kprobe(tk);
+ ret = __register_trace_kprobe(tk);
if (ret)
pr_warning("Failed to re-register probe %s on"
"%s: %d\n",
- tp->call.name, mod->name, ret);
+ ftrace_event_name(&tk->tp.call),
+ mod->name, ret);
}
}
mutex_unlock(&probe_lock);
@@ -426,12 +581,12 @@ static int trace_probe_module_callback(struct notifier_block *nb,
return NOTIFY_DONE;
}
-static struct notifier_block trace_probe_module_nb = {
- .notifier_call = trace_probe_module_callback,
+static struct notifier_block trace_kprobe_module_nb = {
+ .notifier_call = trace_kprobe_module_callback,
.priority = 1 /* Invoked after kprobe module callback */
};
-static int create_trace_probe(int argc, char **argv)
+static int create_trace_kprobe(int argc, char **argv)
{
/*
* Argument syntax:
@@ -451,7 +606,7 @@ static int create_trace_probe(int argc, char **argv)
* Type of args:
* FETCHARG:TYPE : use TYPE instead of unsigned long.
*/
- struct trace_probe *tp;
+ struct trace_kprobe *tk;
int i, ret = 0;
bool is_return = false, is_delete = false;
char *symbol = NULL, *event = NULL, *group = NULL;
@@ -498,16 +653,16 @@ static int create_trace_probe(int argc, char **argv)
return -EINVAL;
}
mutex_lock(&probe_lock);
- tp = find_trace_probe(event, group);
- if (!tp) {
+ tk = find_trace_kprobe(event, group);
+ if (!tk) {
mutex_unlock(&probe_lock);
pr_info("Event %s/%s doesn't exist.\n", group, event);
return -ENOENT;
}
/* delete an event */
- ret = unregister_trace_probe(tp);
+ ret = unregister_trace_kprobe(tk);
if (ret == 0)
- free_trace_probe(tp);
+ free_trace_kprobe(tk);
mutex_unlock(&probe_lock);
return ret;
}
@@ -554,47 +709,49 @@ static int create_trace_probe(int argc, char **argv)
is_return ? 'r' : 'p', addr);
event = buf;
}
- tp = alloc_trace_probe(group, event, addr, symbol, offset, argc,
+ tk = alloc_trace_kprobe(group, event, addr, symbol, offset, argc,
is_return);
- if (IS_ERR(tp)) {
+ if (IS_ERR(tk)) {
pr_info("Failed to allocate trace_probe.(%d)\n",
- (int)PTR_ERR(tp));
- return PTR_ERR(tp);
+ (int)PTR_ERR(tk));
+ return PTR_ERR(tk);
}
/* parse arguments */
ret = 0;
for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
+ struct probe_arg *parg = &tk->tp.args[i];
+
/* Increment count for freeing args in error case */
- tp->nr_args++;
+ tk->tp.nr_args++;
/* Parse argument name */
arg = strchr(argv[i], '=');
if (arg) {
*arg++ = '\0';
- tp->args[i].name = kstrdup(argv[i], GFP_KERNEL);
+ parg->name = kstrdup(argv[i], GFP_KERNEL);
} else {
arg = argv[i];
/* If argument name is omitted, set "argN" */
snprintf(buf, MAX_EVENT_NAME_LEN, "arg%d", i + 1);
- tp->args[i].name = kstrdup(buf, GFP_KERNEL);
+ parg->name = kstrdup(buf, GFP_KERNEL);
}
- if (!tp->args[i].name) {
+ if (!parg->name) {
pr_info("Failed to allocate argument[%d] name.\n", i);
ret = -ENOMEM;
goto error;
}
- if (!is_good_name(tp->args[i].name)) {
+ if (!is_good_name(parg->name)) {
pr_info("Invalid argument[%d] name: %s\n",
- i, tp->args[i].name);
+ i, parg->name);
ret = -EINVAL;
goto error;
}
- if (traceprobe_conflict_field_name(tp->args[i].name,
- tp->args, i)) {
+ if (traceprobe_conflict_field_name(parg->name,
+ tk->tp.args, i)) {
pr_info("Argument[%d] name '%s' conflicts with "
"another field.\n", i, argv[i]);
ret = -EINVAL;
@@ -602,7 +759,7 @@ static int create_trace_probe(int argc, char **argv)
}
/* Parse fetch argument */
- ret = traceprobe_parse_probe_arg(arg, &tp->size, &tp->args[i],
+ ret = traceprobe_parse_probe_arg(arg, &tk->tp.size, parg,
is_return, true);
if (ret) {
pr_info("Parse error at argument[%d]. (%d)\n", i, ret);
@@ -610,35 +767,35 @@ static int create_trace_probe(int argc, char **argv)
}
}
- ret = register_trace_probe(tp);
+ ret = register_trace_kprobe(tk);
if (ret)
goto error;
return 0;
error:
- free_trace_probe(tp);
+ free_trace_kprobe(tk);
return ret;
}
-static int release_all_trace_probes(void)
+static int release_all_trace_kprobes(void)
{
- struct trace_probe *tp;
+ struct trace_kprobe *tk;
int ret = 0;
mutex_lock(&probe_lock);
/* Ensure no probe is in use. */
- list_for_each_entry(tp, &probe_list, list)
- if (trace_probe_is_enabled(tp)) {
+ list_for_each_entry(tk, &probe_list, list)
+ if (trace_probe_is_enabled(&tk->tp)) {
ret = -EBUSY;
goto end;
}
/* TODO: Use batch unregistration */
while (!list_empty(&probe_list)) {
- tp = list_entry(probe_list.next, struct trace_probe, list);
- ret = unregister_trace_probe(tp);
+ tk = list_entry(probe_list.next, struct trace_kprobe, list);
+ ret = unregister_trace_kprobe(tk);
if (ret)
goto end;
- free_trace_probe(tp);
+ free_trace_kprobe(tk);
}
end:
@@ -666,22 +823,23 @@ static void probes_seq_stop(struct seq_file *m, void *v)
static int probes_seq_show(struct seq_file *m, void *v)
{
- struct trace_probe *tp = v;
+ struct trace_kprobe *tk = v;
int i;
- seq_printf(m, "%c", trace_probe_is_return(tp) ? 'r' : 'p');
- seq_printf(m, ":%s/%s", tp->call.class->system, tp->call.name);
+ seq_printf(m, "%c", trace_kprobe_is_return(tk) ? 'r' : 'p');
+ seq_printf(m, ":%s/%s", tk->tp.call.class->system,
+ ftrace_event_name(&tk->tp.call));
- if (!tp->symbol)
- seq_printf(m, " 0x%p", tp->rp.kp.addr);
- else if (tp->rp.kp.offset)
- seq_printf(m, " %s+%u", trace_probe_symbol(tp),
- tp->rp.kp.offset);
+ if (!tk->symbol)
+ seq_printf(m, " 0x%p", tk->rp.kp.addr);
+ else if (tk->rp.kp.offset)
+ seq_printf(m, " %s+%u", trace_kprobe_symbol(tk),
+ tk->rp.kp.offset);
else
- seq_printf(m, " %s", trace_probe_symbol(tp));
+ seq_printf(m, " %s", trace_kprobe_symbol(tk));
- for (i = 0; i < tp->nr_args; i++)
- seq_printf(m, " %s=%s", tp->args[i].name, tp->args[i].comm);
+ for (i = 0; i < tk->tp.nr_args; i++)
+ seq_printf(m, " %s=%s", tk->tp.args[i].name, tk->tp.args[i].comm);
seq_printf(m, "\n");
return 0;
@@ -699,7 +857,7 @@ static int probes_open(struct inode *inode, struct file *file)
int ret;
if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
- ret = release_all_trace_probes();
+ ret = release_all_trace_kprobes();
if (ret < 0)
return ret;
}
@@ -711,7 +869,7 @@ static ssize_t probes_write(struct file *file, const char __user *buffer,
size_t count, loff_t *ppos)
{
return traceprobe_probes_write(file, buffer, count, ppos,
- create_trace_probe);
+ create_trace_kprobe);
}
static const struct file_operations kprobe_events_ops = {
@@ -726,10 +884,11 @@ static const struct file_operations kprobe_events_ops = {
/* Probes profiling interfaces */
static int probes_profile_seq_show(struct seq_file *m, void *v)
{
- struct trace_probe *tp = v;
+ struct trace_kprobe *tk = v;
- seq_printf(m, " %-44s %15lu %15lu\n", tp->call.name, tp->nhit,
- tp->rp.kp.nmissed);
+ seq_printf(m, " %-44s %15lu %15lu\n",
+ ftrace_event_name(&tk->tp.call), tk->nhit,
+ tk->rp.kp.nmissed);
return 0;
}
@@ -754,57 +913,9 @@ static const struct file_operations kprobe_profile_ops = {
.release = seq_release,
};
-/* Sum up total data length for dynamic arraies (strings) */
-static __kprobes int __get_data_size(struct trace_probe *tp,
- struct pt_regs *regs)
-{
- int i, ret = 0;
- u32 len;
-
- for (i = 0; i < tp->nr_args; i++)
- if (unlikely(tp->args[i].fetch_size.fn)) {
- call_fetch(&tp->args[i].fetch_size, regs, &len);
- ret += len;
- }
-
- return ret;
-}
-
-/* Store the value of each argument */
-static __kprobes void store_trace_args(int ent_size, struct trace_probe *tp,
- struct pt_regs *regs,
- u8 *data, int maxlen)
-{
- int i;
- u32 end = tp->size;
- u32 *dl; /* Data (relative) location */
-
- for (i = 0; i < tp->nr_args; i++) {
- if (unlikely(tp->args[i].fetch_size.fn)) {
- /*
- * First, we set the relative location and
- * maximum data length to *dl
- */
- dl = (u32 *)(data + tp->args[i].offset);
- *dl = make_data_rloc(maxlen, end - tp->args[i].offset);
- /* Then try to fetch string or dynamic array data */
- call_fetch(&tp->args[i].fetch, regs, dl);
- /* Reduce maximum length */
- end += get_rloc_len(*dl);
- maxlen -= get_rloc_len(*dl);
- /* Trick here, convert data_rloc to data_loc */
- *dl = convert_rloc_to_loc(*dl,
- ent_size + tp->args[i].offset);
- } else
- /* Just fetching data normally */
- call_fetch(&tp->args[i].fetch, regs,
- data + tp->args[i].offset);
- }
-}
-
/* Kprobe handler */
-static __kprobes void
-__kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs,
+static nokprobe_inline void
+__kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs,
struct ftrace_event_file *ftrace_file)
{
struct kprobe_trace_entry_head *entry;
@@ -812,18 +923,18 @@ __kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs,
struct ring_buffer *buffer;
int size, dsize, pc;
unsigned long irq_flags;
- struct ftrace_event_call *call = &tp->call;
+ struct ftrace_event_call *call = &tk->tp.call;
WARN_ON(call != ftrace_file->event_call);
- if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &ftrace_file->flags))
+ if (ftrace_trigger_soft_disabled(ftrace_file))
return;
local_save_flags(irq_flags);
pc = preempt_count();
- dsize = __get_data_size(tp, regs);
- size = sizeof(*entry) + tp->size + dsize;
+ dsize = __get_data_size(&tk->tp, regs);
+ size = sizeof(*entry) + tk->tp.size + dsize;
event = trace_event_buffer_lock_reserve(&buffer, ftrace_file,
call->event.type,
@@ -832,26 +943,26 @@ __kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs,
return;
entry = ring_buffer_event_data(event);
- entry->ip = (unsigned long)tp->rp.kp.addr;
- store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
+ entry->ip = (unsigned long)tk->rp.kp.addr;
+ store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
- if (!filter_current_check_discard(buffer, call, entry, event))
- trace_buffer_unlock_commit_regs(buffer, event,
- irq_flags, pc, regs);
+ event_trigger_unlock_commit_regs(ftrace_file, buffer, event,
+ entry, irq_flags, pc, regs);
}
-static __kprobes void
-kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs)
+static void
+kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs)
{
struct event_file_link *link;
- list_for_each_entry_rcu(link, &tp->files, list)
- __kprobe_trace_func(tp, regs, link->file);
+ list_for_each_entry_rcu(link, &tk->tp.files, list)
+ __kprobe_trace_func(tk, regs, link->file);
}
+NOKPROBE_SYMBOL(kprobe_trace_func);
/* Kretprobe handler */
-static __kprobes void
-__kretprobe_trace_func(struct trace_probe *tp, struct kretprobe_instance *ri,
+static nokprobe_inline void
+__kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
struct pt_regs *regs,
struct ftrace_event_file *ftrace_file)
{
@@ -860,18 +971,18 @@ __kretprobe_trace_func(struct trace_probe *tp, struct kretprobe_instance *ri,
struct ring_buffer *buffer;
int size, pc, dsize;
unsigned long irq_flags;
- struct ftrace_event_call *call = &tp->call;
+ struct ftrace_event_call *call = &tk->tp.call;
WARN_ON(call != ftrace_file->event_call);
- if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &ftrace_file->flags))
+ if (ftrace_trigger_soft_disabled(ftrace_file))
return;
local_save_flags(irq_flags);
pc = preempt_count();
- dsize = __get_data_size(tp, regs);
- size = sizeof(*entry) + tp->size + dsize;
+ dsize = __get_data_size(&tk->tp, regs);
+ size = sizeof(*entry) + tk->tp.size + dsize;
event = trace_event_buffer_lock_reserve(&buffer, ftrace_file,
call->event.type,
@@ -880,24 +991,24 @@ __kretprobe_trace_func(struct trace_probe *tp, struct kretprobe_instance *ri,
return;
entry = ring_buffer_event_data(event);
- entry->func = (unsigned long)tp->rp.kp.addr;
+ entry->func = (unsigned long)tk->rp.kp.addr;
entry->ret_ip = (unsigned long)ri->ret_addr;
- store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
+ store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
- if (!filter_current_check_discard(buffer, call, entry, event))
- trace_buffer_unlock_commit_regs(buffer, event,
- irq_flags, pc, regs);
+ event_trigger_unlock_commit_regs(ftrace_file, buffer, event,
+ entry, irq_flags, pc, regs);
}
-static __kprobes void
-kretprobe_trace_func(struct trace_probe *tp, struct kretprobe_instance *ri,
+static void
+kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
struct pt_regs *regs)
{
struct event_file_link *link;
- list_for_each_entry_rcu(link, &tp->files, list)
- __kretprobe_trace_func(tp, ri, regs, link->file);
+ list_for_each_entry_rcu(link, &tk->tp.files, list)
+ __kretprobe_trace_func(tk, ri, regs, link->file);
}
+NOKPROBE_SYMBOL(kretprobe_trace_func);
/* Event entry printers */
static enum print_line_t
@@ -913,7 +1024,7 @@ print_kprobe_event(struct trace_iterator *iter, int flags,
field = (struct kprobe_trace_entry_head *)iter->ent;
tp = container_of(event, struct trace_probe, call.event);
- if (!trace_seq_printf(s, "%s: (", tp->call.name))
+ if (!trace_seq_printf(s, "%s: (", ftrace_event_name(&tp->call)))
goto partial;
if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
@@ -949,7 +1060,7 @@ print_kretprobe_event(struct trace_iterator *iter, int flags,
field = (struct kretprobe_trace_entry_head *)iter->ent;
tp = container_of(event, struct trace_probe, call.event);
- if (!trace_seq_printf(s, "%s: (", tp->call.name))
+ if (!trace_seq_printf(s, "%s: (", ftrace_event_name(&tp->call)))
goto partial;
if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
@@ -983,16 +1094,18 @@ static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
{
int ret, i;
struct kprobe_trace_entry_head field;
- struct trace_probe *tp = (struct trace_probe *)event_call->data;
+ struct trace_kprobe *tk = (struct trace_kprobe *)event_call->data;
DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
/* Set argument names as fields */
- for (i = 0; i < tp->nr_args; i++) {
- ret = trace_define_field(event_call, tp->args[i].type->fmttype,
- tp->args[i].name,
- sizeof(field) + tp->args[i].offset,
- tp->args[i].type->size,
- tp->args[i].type->is_signed,
+ for (i = 0; i < tk->tp.nr_args; i++) {
+ struct probe_arg *parg = &tk->tp.args[i];
+
+ ret = trace_define_field(event_call, parg->type->fmttype,
+ parg->name,
+ sizeof(field) + parg->offset,
+ parg->type->size,
+ parg->type->is_signed,
FILTER_OTHER);
if (ret)
return ret;
@@ -1004,17 +1117,19 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
{
int ret, i;
struct kretprobe_trace_entry_head field;
- struct trace_probe *tp = (struct trace_probe *)event_call->data;
+ struct trace_kprobe *tk = (struct trace_kprobe *)event_call->data;
DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
/* Set argument names as fields */
- for (i = 0; i < tp->nr_args; i++) {
- ret = trace_define_field(event_call, tp->args[i].type->fmttype,
- tp->args[i].name,
- sizeof(field) + tp->args[i].offset,
- tp->args[i].type->size,
- tp->args[i].type->is_signed,
+ for (i = 0; i < tk->tp.nr_args; i++) {
+ struct probe_arg *parg = &tk->tp.args[i];
+
+ ret = trace_define_field(event_call, parg->type->fmttype,
+ parg->name,
+ sizeof(field) + parg->offset,
+ parg->type->size,
+ parg->type->is_signed,
FILTER_OTHER);
if (ret)
return ret;
@@ -1022,74 +1137,13 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
return 0;
}
-static int __set_print_fmt(struct trace_probe *tp, char *buf, int len)
-{
- int i;
- int pos = 0;
-
- const char *fmt, *arg;
-
- if (!trace_probe_is_return(tp)) {
- fmt = "(%lx)";
- arg = "REC->" FIELD_STRING_IP;
- } else {
- fmt = "(%lx <- %lx)";
- arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP;
- }
-
- /* When len=0, we just calculate the needed length */
-#define LEN_OR_ZERO (len ? len - pos : 0)
-
- pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt);
-
- for (i = 0; i < tp->nr_args; i++) {
- pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%s",
- tp->args[i].name, tp->args[i].type->fmt);
- }
-
- pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg);
-
- for (i = 0; i < tp->nr_args; i++) {
- if (strcmp(tp->args[i].type->name, "string") == 0)
- pos += snprintf(buf + pos, LEN_OR_ZERO,
- ", __get_str(%s)",
- tp->args[i].name);
- else
- pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s",
- tp->args[i].name);
- }
-
-#undef LEN_OR_ZERO
-
- /* return the length of print_fmt */
- return pos;
-}
-
-static int set_print_fmt(struct trace_probe *tp)
-{
- int len;
- char *print_fmt;
-
- /* First: called with 0 length to calculate the needed length */
- len = __set_print_fmt(tp, NULL, 0);
- print_fmt = kmalloc(len + 1, GFP_KERNEL);
- if (!print_fmt)
- return -ENOMEM;
-
- /* Second: actually write the @print_fmt */
- __set_print_fmt(tp, print_fmt, len + 1);
- tp->call.print_fmt = print_fmt;
-
- return 0;
-}
-
#ifdef CONFIG_PERF_EVENTS
/* Kprobe profile handler */
-static __kprobes void
-kprobe_perf_func(struct trace_probe *tp, struct pt_regs *regs)
+static void
+kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
{
- struct ftrace_event_call *call = &tp->call;
+ struct ftrace_event_call *call = &tk->tp.call;
struct kprobe_trace_entry_head *entry;
struct hlist_head *head;
int size, __size, dsize;
@@ -1099,8 +1153,8 @@ kprobe_perf_func(struct trace_probe *tp, struct pt_regs *regs)
if (hlist_empty(head))
return;
- dsize = __get_data_size(tp, regs);
- __size = sizeof(*entry) + tp->size + dsize;
+ dsize = __get_data_size(&tk->tp, regs);
+ __size = sizeof(*entry) + tk->tp.size + dsize;
size = ALIGN(__size + sizeof(u32), sizeof(u64));
size -= sizeof(u32);
@@ -1108,18 +1162,19 @@ kprobe_perf_func(struct trace_probe *tp, struct pt_regs *regs)
if (!entry)
return;
- entry->ip = (unsigned long)tp->rp.kp.addr;
+ entry->ip = (unsigned long)tk->rp.kp.addr;
memset(&entry[1], 0, dsize);
- store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
+ store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL);
}
+NOKPROBE_SYMBOL(kprobe_perf_func);
/* Kretprobe profile handler */
-static __kprobes void
-kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri,
+static void
+kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
struct pt_regs *regs)
{
- struct ftrace_event_call *call = &tp->call;
+ struct ftrace_event_call *call = &tk->tp.call;
struct kretprobe_trace_entry_head *entry;
struct hlist_head *head;
int size, __size, dsize;
@@ -1129,8 +1184,8 @@ kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri,
if (hlist_empty(head))
return;
- dsize = __get_data_size(tp, regs);
- __size = sizeof(*entry) + tp->size + dsize;
+ dsize = __get_data_size(&tk->tp, regs);
+ __size = sizeof(*entry) + tk->tp.size + dsize;
size = ALIGN(__size + sizeof(u32), sizeof(u64));
size -= sizeof(u32);
@@ -1138,11 +1193,12 @@ kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri,
if (!entry)
return;
- entry->func = (unsigned long)tp->rp.kp.addr;
+ entry->func = (unsigned long)tk->rp.kp.addr;
entry->ret_ip = (unsigned long)ri->ret_addr;
- store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
+ store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL);
}
+NOKPROBE_SYMBOL(kretprobe_perf_func);
#endif /* CONFIG_PERF_EVENTS */
/*
@@ -1151,24 +1207,23 @@ kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri,
* kprobe_trace_self_tests_init() does enable_trace_probe/disable_trace_probe
* lockless, but we can't race with this __init function.
*/
-static __kprobes
-int kprobe_register(struct ftrace_event_call *event,
- enum trace_reg type, void *data)
+static int kprobe_register(struct ftrace_event_call *event,
+ enum trace_reg type, void *data)
{
- struct trace_probe *tp = (struct trace_probe *)event->data;
+ struct trace_kprobe *tk = (struct trace_kprobe *)event->data;
struct ftrace_event_file *file = data;
switch (type) {
case TRACE_REG_REGISTER:
- return enable_trace_probe(tp, file);
+ return enable_trace_kprobe(tk, file);
case TRACE_REG_UNREGISTER:
- return disable_trace_probe(tp, file);
+ return disable_trace_kprobe(tk, file);
#ifdef CONFIG_PERF_EVENTS
case TRACE_REG_PERF_REGISTER:
- return enable_trace_probe(tp, NULL);
+ return enable_trace_kprobe(tk, NULL);
case TRACE_REG_PERF_UNREGISTER:
- return disable_trace_probe(tp, NULL);
+ return disable_trace_kprobe(tk, NULL);
case TRACE_REG_PERF_OPEN:
case TRACE_REG_PERF_CLOSE:
case TRACE_REG_PERF_ADD:
@@ -1179,37 +1234,38 @@ int kprobe_register(struct ftrace_event_call *event,
return 0;
}
-static __kprobes
-int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
+static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
{
- struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
+ struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp);
- tp->nhit++;
+ tk->nhit++;
- if (tp->flags & TP_FLAG_TRACE)
- kprobe_trace_func(tp, regs);
+ if (tk->tp.flags & TP_FLAG_TRACE)
+ kprobe_trace_func(tk, regs);
#ifdef CONFIG_PERF_EVENTS
- if (tp->flags & TP_FLAG_PROFILE)
- kprobe_perf_func(tp, regs);
+ if (tk->tp.flags & TP_FLAG_PROFILE)
+ kprobe_perf_func(tk, regs);
#endif
return 0; /* We don't tweek kernel, so just return 0 */
}
+NOKPROBE_SYMBOL(kprobe_dispatcher);
-static __kprobes
-int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
+static int
+kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
{
- struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
+ struct trace_kprobe *tk = container_of(ri->rp, struct trace_kprobe, rp);
- tp->nhit++;
+ tk->nhit++;
- if (tp->flags & TP_FLAG_TRACE)
- kretprobe_trace_func(tp, ri, regs);
+ if (tk->tp.flags & TP_FLAG_TRACE)
+ kretprobe_trace_func(tk, ri, regs);
#ifdef CONFIG_PERF_EVENTS
- if (tp->flags & TP_FLAG_PROFILE)
- kretprobe_perf_func(tp, ri, regs);
+ if (tk->tp.flags & TP_FLAG_PROFILE)
+ kretprobe_perf_func(tk, ri, regs);
#endif
return 0; /* We don't tweek kernel, so just return 0 */
}
+NOKPROBE_SYMBOL(kretprobe_dispatcher);
static struct trace_event_functions kretprobe_funcs = {
.trace = print_kretprobe_event
@@ -1219,21 +1275,21 @@ static struct trace_event_functions kprobe_funcs = {
.trace = print_kprobe_event
};
-static int register_probe_event(struct trace_probe *tp)
+static int register_kprobe_event(struct trace_kprobe *tk)
{
- struct ftrace_event_call *call = &tp->call;
+ struct ftrace_event_call *call = &tk->tp.call;
int ret;
/* Initialize ftrace_event_call */
INIT_LIST_HEAD(&call->class->fields);
- if (trace_probe_is_return(tp)) {
+ if (trace_kprobe_is_return(tk)) {
call->event.funcs = &kretprobe_funcs;
call->class->define_fields = kretprobe_event_define_fields;
} else {
call->event.funcs = &kprobe_funcs;
call->class->define_fields = kprobe_event_define_fields;
}
- if (set_print_fmt(tp) < 0)
+ if (set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0)
return -ENOMEM;
ret = register_ftrace_event(&call->event);
if (!ret) {
@@ -1242,24 +1298,25 @@ static int register_probe_event(struct trace_probe *tp)
}
call->flags = 0;
call->class->reg = kprobe_register;
- call->data = tp;
+ call->data = tk;
ret = trace_add_event_call(call);
if (ret) {
- pr_info("Failed to register kprobe event: %s\n", call->name);
+ pr_info("Failed to register kprobe event: %s\n",
+ ftrace_event_name(call));
kfree(call->print_fmt);
unregister_ftrace_event(&call->event);
}
return ret;
}
-static int unregister_probe_event(struct trace_probe *tp)
+static int unregister_kprobe_event(struct trace_kprobe *tk)
{
int ret;
/* tp->event is unregistered in trace_remove_event_call() */
- ret = trace_remove_event_call(&tp->call);
+ ret = trace_remove_event_call(&tk->tp.call);
if (!ret)
- kfree(tp->call.print_fmt);
+ kfree(tk->tp.call.print_fmt);
return ret;
}
@@ -1269,7 +1326,7 @@ static __init int init_kprobe_trace(void)
struct dentry *d_tracer;
struct dentry *entry;
- if (register_module_notifier(&trace_probe_module_nb))
+ if (register_module_notifier(&trace_kprobe_module_nb))
return -EINVAL;
d_tracer = tracing_init_dentry();
@@ -1309,72 +1366,75 @@ static __used int kprobe_trace_selftest_target(int a1, int a2, int a3,
}
static struct ftrace_event_file *
-find_trace_probe_file(struct trace_probe *tp, struct trace_array *tr)
+find_trace_probe_file(struct trace_kprobe *tk, struct trace_array *tr)
{
struct ftrace_event_file *file;
list_for_each_entry(file, &tr->events, list)
- if (file->event_call == &tp->call)
+ if (file->event_call == &tk->tp.call)
return file;
return NULL;
}
/*
- * Nobody but us can call enable_trace_probe/disable_trace_probe at this
+ * Nobody but us can call enable_trace_kprobe/disable_trace_kprobe at this
* stage, we can do this lockless.
*/
static __init int kprobe_trace_self_tests_init(void)
{
int ret, warn = 0;
int (*target)(int, int, int, int, int, int);
- struct trace_probe *tp;
+ struct trace_kprobe *tk;
struct ftrace_event_file *file;
+ if (tracing_is_disabled())
+ return -ENODEV;
+
target = kprobe_trace_selftest_target;
pr_info("Testing kprobe tracing: ");
ret = traceprobe_command("p:testprobe kprobe_trace_selftest_target "
"$stack $stack0 +0($stack)",
- create_trace_probe);
+ create_trace_kprobe);
if (WARN_ON_ONCE(ret)) {
pr_warn("error on probing function entry.\n");
warn++;
} else {
/* Enable trace point */
- tp = find_trace_probe("testprobe", KPROBE_EVENT_SYSTEM);
- if (WARN_ON_ONCE(tp == NULL)) {
+ tk = find_trace_kprobe("testprobe", KPROBE_EVENT_SYSTEM);
+ if (WARN_ON_ONCE(tk == NULL)) {
pr_warn("error on getting new probe.\n");
warn++;
} else {
- file = find_trace_probe_file(tp, top_trace_array());
+ file = find_trace_probe_file(tk, top_trace_array());
if (WARN_ON_ONCE(file == NULL)) {
pr_warn("error on getting probe file.\n");
warn++;
} else
- enable_trace_probe(tp, file);
+ enable_trace_kprobe(tk, file);
}
}
ret = traceprobe_command("r:testprobe2 kprobe_trace_selftest_target "
- "$retval", create_trace_probe);
+ "$retval", create_trace_kprobe);
if (WARN_ON_ONCE(ret)) {
pr_warn("error on probing function return.\n");
warn++;
} else {
/* Enable trace point */
- tp = find_trace_probe("testprobe2", KPROBE_EVENT_SYSTEM);
- if (WARN_ON_ONCE(tp == NULL)) {
+ tk = find_trace_kprobe("testprobe2", KPROBE_EVENT_SYSTEM);
+ if (WARN_ON_ONCE(tk == NULL)) {
pr_warn("error on getting 2nd new probe.\n");
warn++;
} else {
- file = find_trace_probe_file(tp, top_trace_array());
+ file = find_trace_probe_file(tk, top_trace_array());
if (WARN_ON_ONCE(file == NULL)) {
pr_warn("error on getting probe file.\n");
warn++;
} else
- enable_trace_probe(tp, file);
+ enable_trace_kprobe(tk, file);
}
}
@@ -1384,46 +1444,46 @@ static __init int kprobe_trace_self_tests_init(void)
ret = target(1, 2, 3, 4, 5, 6);
/* Disable trace points before removing it */
- tp = find_trace_probe("testprobe", KPROBE_EVENT_SYSTEM);
- if (WARN_ON_ONCE(tp == NULL)) {
+ tk = find_trace_kprobe("testprobe", KPROBE_EVENT_SYSTEM);
+ if (WARN_ON_ONCE(tk == NULL)) {
pr_warn("error on getting test probe.\n");
warn++;
} else {
- file = find_trace_probe_file(tp, top_trace_array());
+ file = find_trace_probe_file(tk, top_trace_array());
if (WARN_ON_ONCE(file == NULL)) {
pr_warn("error on getting probe file.\n");
warn++;
} else
- disable_trace_probe(tp, file);
+ disable_trace_kprobe(tk, file);
}
- tp = find_trace_probe("testprobe2", KPROBE_EVENT_SYSTEM);
- if (WARN_ON_ONCE(tp == NULL)) {
+ tk = find_trace_kprobe("testprobe2", KPROBE_EVENT_SYSTEM);
+ if (WARN_ON_ONCE(tk == NULL)) {
pr_warn("error on getting 2nd test probe.\n");
warn++;
} else {
- file = find_trace_probe_file(tp, top_trace_array());
+ file = find_trace_probe_file(tk, top_trace_array());
if (WARN_ON_ONCE(file == NULL)) {
pr_warn("error on getting probe file.\n");
warn++;
} else
- disable_trace_probe(tp, file);
+ disable_trace_kprobe(tk, file);
}
- ret = traceprobe_command("-:testprobe", create_trace_probe);
+ ret = traceprobe_command("-:testprobe", create_trace_kprobe);
if (WARN_ON_ONCE(ret)) {
pr_warn("error on deleting a probe.\n");
warn++;
}
- ret = traceprobe_command("-:testprobe2", create_trace_probe);
+ ret = traceprobe_command("-:testprobe2", create_trace_kprobe);
if (WARN_ON_ONCE(ret)) {
pr_warn("error on deleting a probe.\n");
warn++;
}
end:
- release_all_trace_probes();
+ release_all_trace_kprobes();
if (warn)
pr_cont("NG: Some tests are failed. Please check them.\n");
else
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index b3dcfb2f0fe..0abd9b86347 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -323,7 +323,7 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,
entry = ring_buffer_event_data(event);
entry->rw = *rw;
- if (!filter_check_discard(call, entry, buffer, event))
+ if (!call_filter_check_discard(call, entry, buffer, event))
trace_buffer_unlock_commit(buffer, event, 0, pc);
}
@@ -353,7 +353,7 @@ static void __trace_mmiotrace_map(struct trace_array *tr,
entry = ring_buffer_event_data(event);
entry->map = *map;
- if (!filter_check_discard(call, entry, buffer, event))
+ if (!call_filter_check_discard(call, entry, buffer, event))
trace_buffer_unlock_commit(buffer, event, 0, pc);
}
diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c
index 394f94417e2..fcf0a9e4891 100644
--- a/kernel/trace/trace_nop.c
+++ b/kernel/trace/trace_nop.c
@@ -62,7 +62,7 @@ static void nop_trace_reset(struct trace_array *tr)
* If you don't implement it, then the flag setting will be
* automatically accepted.
*/
-static int nop_set_flag(u32 old_flags, u32 bit, int set)
+static int nop_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
{
/*
* Note that you don't need to update nop_flags.val yourself.
@@ -91,11 +91,11 @@ struct tracer nop_trace __read_mostly =
.name = "nop",
.init = nop_trace_init,
.reset = nop_trace_reset,
- .wait_pipe = poll_wait_pipe,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_nop,
#endif
.flags = &nop_flags,
- .set_flag = nop_set_flag
+ .set_flag = nop_set_flag,
+ .allow_instances = true,
};
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 34e7cbac0c9..f3dad80c20b 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -126,6 +126,34 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
EXPORT_SYMBOL_GPL(trace_seq_printf);
/**
+ * trace_seq_bitmask - put a list of longs as a bitmask print output
+ * @s: trace sequence descriptor
+ * @maskp: points to an array of unsigned longs that represent a bitmask
+ * @nmaskbits: The number of bits that are valid in @maskp
+ *
+ * It returns 0 if the trace oversizes the buffer's free
+ * space, 1 otherwise.
+ *
+ * Writes a ASCII representation of a bitmask string into @s.
+ */
+int
+trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp,
+ int nmaskbits)
+{
+ int len = (PAGE_SIZE - 1) - s->len;
+ int ret;
+
+ if (s->full || !len)
+ return 0;
+
+ ret = bitmap_scnprintf(s->buffer, len, maskp, nmaskbits);
+ s->len += ret;
+
+ return 1;
+}
+EXPORT_SYMBOL_GPL(trace_seq_bitmask);
+
+/**
* trace_seq_vprintf - sequence printing of trace information
* @s: trace sequence descriptor
* @fmt: printf format string
@@ -399,6 +427,19 @@ EXPORT_SYMBOL(ftrace_print_symbols_seq_u64);
#endif
const char *
+ftrace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr,
+ unsigned int bitmask_size)
+{
+ const char *ret = p->buffer + p->len;
+
+ trace_seq_bitmask(p, bitmask_ptr, bitmask_size * 8);
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(ftrace_print_bitmask_seq);
+
+const char *
ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len)
{
int i;
@@ -431,7 +472,7 @@ int ftrace_raw_output_prep(struct trace_iterator *iter,
}
trace_seq_init(p);
- ret = trace_seq_printf(s, "%s: ", event->name);
+ ret = trace_seq_printf(s, "%s: ", ftrace_event_name(event));
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
@@ -439,6 +480,37 @@ int ftrace_raw_output_prep(struct trace_iterator *iter,
}
EXPORT_SYMBOL(ftrace_raw_output_prep);
+static int ftrace_output_raw(struct trace_iterator *iter, char *name,
+ char *fmt, va_list ap)
+{
+ struct trace_seq *s = &iter->seq;
+ int ret;
+
+ ret = trace_seq_printf(s, "%s: ", name);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ ret = trace_seq_vprintf(s, fmt, ap);
+
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ return TRACE_TYPE_HANDLED;
+}
+
+int ftrace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...)
+{
+ va_list ap;
+ int ret;
+
+ va_start(ap, fmt);
+ ret = ftrace_output_raw(iter, name, fmt, ap);
+ va_end(ap);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(ftrace_output_call);
+
#ifdef CONFIG_KRETPROBES
static inline const char *kretprobed(const char *name)
{
@@ -618,8 +690,23 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
(entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
(entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' :
'.';
- need_resched =
- (entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.';
+
+ switch (entry->flags & (TRACE_FLAG_NEED_RESCHED |
+ TRACE_FLAG_PREEMPT_RESCHED)) {
+ case TRACE_FLAG_NEED_RESCHED | TRACE_FLAG_PREEMPT_RESCHED:
+ need_resched = 'N';
+ break;
+ case TRACE_FLAG_NEED_RESCHED:
+ need_resched = 'n';
+ break;
+ case TRACE_FLAG_PREEMPT_RESCHED:
+ need_resched = 'p';
+ break;
+ default:
+ need_resched = '.';
+ break;
+ }
+
hardsoft_irq =
(hardirq && softirq) ? 'H' :
hardirq ? 'h' :
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 412e959709b..d4b9fc22cd2 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -35,48 +35,28 @@ const char *reserved_field_names[] = {
FIELD_STRING_FUNC,
};
-/* Printing function type */
-#define PRINT_TYPE_FUNC_NAME(type) print_type_##type
-#define PRINT_TYPE_FMT_NAME(type) print_type_format_##type
-
/* Printing in basic type function template */
-#define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt, cast) \
-static __kprobes int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, \
- const char *name, \
- void *data, void *ent)\
+#define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt) \
+int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, const char *name, \
+ void *data, void *ent) \
{ \
- return trace_seq_printf(s, " %s=" fmt, name, (cast)*(type *)data);\
+ return trace_seq_printf(s, " %s=" fmt, name, *(type *)data); \
} \
-static const char PRINT_TYPE_FMT_NAME(type)[] = fmt;
-
-DEFINE_BASIC_PRINT_TYPE_FUNC(u8, "%x", unsigned int)
-DEFINE_BASIC_PRINT_TYPE_FUNC(u16, "%x", unsigned int)
-DEFINE_BASIC_PRINT_TYPE_FUNC(u32, "%lx", unsigned long)
-DEFINE_BASIC_PRINT_TYPE_FUNC(u64, "%llx", unsigned long long)
-DEFINE_BASIC_PRINT_TYPE_FUNC(s8, "%d", int)
-DEFINE_BASIC_PRINT_TYPE_FUNC(s16, "%d", int)
-DEFINE_BASIC_PRINT_TYPE_FUNC(s32, "%ld", long)
-DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%lld", long long)
-
-static inline void *get_rloc_data(u32 *dl)
-{
- return (u8 *)dl + get_rloc_offs(*dl);
-}
-
-/* For data_loc conversion */
-static inline void *get_loc_data(u32 *dl, void *ent)
-{
- return (u8 *)ent + get_rloc_offs(*dl);
-}
-
-/* For defining macros, define string/string_size types */
-typedef u32 string;
-typedef u32 string_size;
+const char PRINT_TYPE_FMT_NAME(type)[] = fmt; \
+NOKPROBE_SYMBOL(PRINT_TYPE_FUNC_NAME(type));
+
+DEFINE_BASIC_PRINT_TYPE_FUNC(u8 , "0x%x")
+DEFINE_BASIC_PRINT_TYPE_FUNC(u16, "0x%x")
+DEFINE_BASIC_PRINT_TYPE_FUNC(u32, "0x%x")
+DEFINE_BASIC_PRINT_TYPE_FUNC(u64, "0x%Lx")
+DEFINE_BASIC_PRINT_TYPE_FUNC(s8, "%d")
+DEFINE_BASIC_PRINT_TYPE_FUNC(s16, "%d")
+DEFINE_BASIC_PRINT_TYPE_FUNC(s32, "%d")
+DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%Ld")
/* Print type function for string type */
-static __kprobes int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s,
- const char *name,
- void *data, void *ent)
+int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s, const char *name,
+ void *data, void *ent)
{
int len = *(u32 *)data >> 16;
@@ -86,19 +66,9 @@ static __kprobes int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s,
return trace_seq_printf(s, " %s=\"%s\"", name,
(const char *)get_loc_data(data, ent));
}
+NOKPROBE_SYMBOL(PRINT_TYPE_FUNC_NAME(string));
-static const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\"";
-
-#define FETCH_FUNC_NAME(method, type) fetch_##method##_##type
-/*
- * Define macro for basic types - we don't need to define s* types, because
- * we have to care only about bitwidth at recording time.
- */
-#define DEFINE_BASIC_FETCH_FUNCS(method) \
-DEFINE_FETCH_##method(u8) \
-DEFINE_FETCH_##method(u16) \
-DEFINE_FETCH_##method(u32) \
-DEFINE_FETCH_##method(u64)
+const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\"";
#define CHECK_FETCH_FUNCS(method, fn) \
(((FETCH_FUNC_NAME(method, u8) == fn) || \
@@ -111,208 +81,79 @@ DEFINE_FETCH_##method(u64)
/* Data fetch function templates */
#define DEFINE_FETCH_reg(type) \
-static __kprobes void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs, \
- void *offset, void *dest) \
+void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs, void *offset, void *dest) \
{ \
*(type *)dest = (type)regs_get_register(regs, \
(unsigned int)((unsigned long)offset)); \
-}
+} \
+NOKPROBE_SYMBOL(FETCH_FUNC_NAME(reg, type));
DEFINE_BASIC_FETCH_FUNCS(reg)
/* No string on the register */
#define fetch_reg_string NULL
#define fetch_reg_string_size NULL
-#define DEFINE_FETCH_stack(type) \
-static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\
- void *offset, void *dest) \
-{ \
- *(type *)dest = (type)regs_get_kernel_stack_nth(regs, \
- (unsigned int)((unsigned long)offset)); \
-}
-DEFINE_BASIC_FETCH_FUNCS(stack)
-/* No string on the stack entry */
-#define fetch_stack_string NULL
-#define fetch_stack_string_size NULL
-
#define DEFINE_FETCH_retval(type) \
-static __kprobes void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs,\
- void *dummy, void *dest) \
+void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs, \
+ void *dummy, void *dest) \
{ \
*(type *)dest = (type)regs_return_value(regs); \
-}
+} \
+NOKPROBE_SYMBOL(FETCH_FUNC_NAME(retval, type));
DEFINE_BASIC_FETCH_FUNCS(retval)
/* No string on the retval */
#define fetch_retval_string NULL
#define fetch_retval_string_size NULL
-#define DEFINE_FETCH_memory(type) \
-static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\
- void *addr, void *dest) \
-{ \
- type retval; \
- if (probe_kernel_address(addr, retval)) \
- *(type *)dest = 0; \
- else \
- *(type *)dest = retval; \
-}
-DEFINE_BASIC_FETCH_FUNCS(memory)
-/*
- * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
- * length and relative data location.
- */
-static __kprobes void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
- void *addr, void *dest)
-{
- long ret;
- int maxlen = get_rloc_len(*(u32 *)dest);
- u8 *dst = get_rloc_data(dest);
- u8 *src = addr;
- mm_segment_t old_fs = get_fs();
-
- if (!maxlen)
- return;
-
- /*
- * Try to get string again, since the string can be changed while
- * probing.
- */
- set_fs(KERNEL_DS);
- pagefault_disable();
-
- do
- ret = __copy_from_user_inatomic(dst++, src++, 1);
- while (dst[-1] && ret == 0 && src - (u8 *)addr < maxlen);
-
- dst[-1] = '\0';
- pagefault_enable();
- set_fs(old_fs);
-
- if (ret < 0) { /* Failed to fetch string */
- ((u8 *)get_rloc_data(dest))[0] = '\0';
- *(u32 *)dest = make_data_rloc(0, get_rloc_offs(*(u32 *)dest));
- } else {
- *(u32 *)dest = make_data_rloc(src - (u8 *)addr,
- get_rloc_offs(*(u32 *)dest));
- }
-}
-
-/* Return the length of string -- including null terminal byte */
-static __kprobes void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
- void *addr, void *dest)
-{
- mm_segment_t old_fs;
- int ret, len = 0;
- u8 c;
-
- old_fs = get_fs();
- set_fs(KERNEL_DS);
- pagefault_disable();
-
- do {
- ret = __copy_from_user_inatomic(&c, (u8 *)addr + len, 1);
- len++;
- } while (c && ret == 0 && len < MAX_STRING_SIZE);
-
- pagefault_enable();
- set_fs(old_fs);
-
- if (ret < 0) /* Failed to check the length */
- *(u32 *)dest = 0;
- else
- *(u32 *)dest = len;
-}
-
-/* Memory fetching by symbol */
-struct symbol_cache {
- char *symbol;
- long offset;
- unsigned long addr;
-};
-
-static unsigned long update_symbol_cache(struct symbol_cache *sc)
-{
- sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol);
-
- if (sc->addr)
- sc->addr += sc->offset;
-
- return sc->addr;
-}
-
-static void free_symbol_cache(struct symbol_cache *sc)
-{
- kfree(sc->symbol);
- kfree(sc);
-}
-
-static struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)
-{
- struct symbol_cache *sc;
-
- if (!sym || strlen(sym) == 0)
- return NULL;
-
- sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL);
- if (!sc)
- return NULL;
-
- sc->symbol = kstrdup(sym, GFP_KERNEL);
- if (!sc->symbol) {
- kfree(sc);
- return NULL;
- }
- sc->offset = offset;
- update_symbol_cache(sc);
-
- return sc;
-}
-
-#define DEFINE_FETCH_symbol(type) \
-static __kprobes void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs,\
- void *data, void *dest) \
-{ \
- struct symbol_cache *sc = data; \
- if (sc->addr) \
- fetch_memory_##type(regs, (void *)sc->addr, dest); \
- else \
- *(type *)dest = 0; \
-}
-DEFINE_BASIC_FETCH_FUNCS(symbol)
-DEFINE_FETCH_symbol(string)
-DEFINE_FETCH_symbol(string_size)
-
/* Dereference memory access function */
struct deref_fetch_param {
struct fetch_param orig;
long offset;
+ fetch_func_t fetch;
+ fetch_func_t fetch_size;
};
#define DEFINE_FETCH_deref(type) \
-static __kprobes void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs,\
- void *data, void *dest) \
+void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs, \
+ void *data, void *dest) \
{ \
struct deref_fetch_param *dprm = data; \
unsigned long addr; \
call_fetch(&dprm->orig, regs, &addr); \
if (addr) { \
addr += dprm->offset; \
- fetch_memory_##type(regs, (void *)addr, dest); \
+ dprm->fetch(regs, (void *)addr, dest); \
} else \
*(type *)dest = 0; \
-}
+} \
+NOKPROBE_SYMBOL(FETCH_FUNC_NAME(deref, type));
DEFINE_BASIC_FETCH_FUNCS(deref)
DEFINE_FETCH_deref(string)
-DEFINE_FETCH_deref(string_size)
-static __kprobes void update_deref_fetch_param(struct deref_fetch_param *data)
+void FETCH_FUNC_NAME(deref, string_size)(struct pt_regs *regs,
+ void *data, void *dest)
+{
+ struct deref_fetch_param *dprm = data;
+ unsigned long addr;
+
+ call_fetch(&dprm->orig, regs, &addr);
+ if (addr && dprm->fetch_size) {
+ addr += dprm->offset;
+ dprm->fetch_size(regs, (void *)addr, dest);
+ } else
+ *(string_size *)dest = 0;
+}
+NOKPROBE_SYMBOL(FETCH_FUNC_NAME(deref, string_size));
+
+static void update_deref_fetch_param(struct deref_fetch_param *data)
{
if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
update_deref_fetch_param(data->orig.data);
else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
update_symbol_cache(data->orig.data);
}
+NOKPROBE_SYMBOL(update_deref_fetch_param);
-static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data)
+static void free_deref_fetch_param(struct deref_fetch_param *data)
{
if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
free_deref_fetch_param(data->orig.data);
@@ -320,6 +161,7 @@ static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data)
free_symbol_cache(data->orig.data);
kfree(data);
}
+NOKPROBE_SYMBOL(free_deref_fetch_param);
/* Bitfield fetch function */
struct bitfield_fetch_param {
@@ -329,8 +171,8 @@ struct bitfield_fetch_param {
};
#define DEFINE_FETCH_bitfield(type) \
-static __kprobes void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs,\
- void *data, void *dest) \
+void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs, \
+ void *data, void *dest) \
{ \
struct bitfield_fetch_param *bprm = data; \
type buf = 0; \
@@ -340,13 +182,13 @@ static __kprobes void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs,\
buf >>= bprm->low_shift; \
} \
*(type *)dest = buf; \
-}
-
+} \
+NOKPROBE_SYMBOL(FETCH_FUNC_NAME(bitfield, type));
DEFINE_BASIC_FETCH_FUNCS(bitfield)
#define fetch_bitfield_string NULL
#define fetch_bitfield_string_size NULL
-static __kprobes void
+static void
update_bitfield_fetch_param(struct bitfield_fetch_param *data)
{
/*
@@ -359,7 +201,7 @@ update_bitfield_fetch_param(struct bitfield_fetch_param *data)
update_symbol_cache(data->orig.data);
}
-static __kprobes void
+static void
free_bitfield_fetch_param(struct bitfield_fetch_param *data)
{
/*
@@ -374,58 +216,8 @@ free_bitfield_fetch_param(struct bitfield_fetch_param *data)
kfree(data);
}
-/* Default (unsigned long) fetch type */
-#define __DEFAULT_FETCH_TYPE(t) u##t
-#define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t)
-#define DEFAULT_FETCH_TYPE _DEFAULT_FETCH_TYPE(BITS_PER_LONG)
-#define DEFAULT_FETCH_TYPE_STR __stringify(DEFAULT_FETCH_TYPE)
-
-#define ASSIGN_FETCH_FUNC(method, type) \
- [FETCH_MTD_##method] = FETCH_FUNC_NAME(method, type)
-
-#define __ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, _fmttype) \
- {.name = _name, \
- .size = _size, \
- .is_signed = sign, \
- .print = PRINT_TYPE_FUNC_NAME(ptype), \
- .fmt = PRINT_TYPE_FMT_NAME(ptype), \
- .fmttype = _fmttype, \
- .fetch = { \
-ASSIGN_FETCH_FUNC(reg, ftype), \
-ASSIGN_FETCH_FUNC(stack, ftype), \
-ASSIGN_FETCH_FUNC(retval, ftype), \
-ASSIGN_FETCH_FUNC(memory, ftype), \
-ASSIGN_FETCH_FUNC(symbol, ftype), \
-ASSIGN_FETCH_FUNC(deref, ftype), \
-ASSIGN_FETCH_FUNC(bitfield, ftype), \
- } \
- }
-
-#define ASSIGN_FETCH_TYPE(ptype, ftype, sign) \
- __ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, #ptype)
-
-#define FETCH_TYPE_STRING 0
-#define FETCH_TYPE_STRSIZE 1
-
-/* Fetch type information table */
-static const struct fetch_type fetch_type_table[] = {
- /* Special types */
- [FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string,
- sizeof(u32), 1, "__data_loc char[]"),
- [FETCH_TYPE_STRSIZE] = __ASSIGN_FETCH_TYPE("string_size", u32,
- string_size, sizeof(u32), 0, "u32"),
- /* Basic types */
- ASSIGN_FETCH_TYPE(u8, u8, 0),
- ASSIGN_FETCH_TYPE(u16, u16, 0),
- ASSIGN_FETCH_TYPE(u32, u32, 0),
- ASSIGN_FETCH_TYPE(u64, u64, 0),
- ASSIGN_FETCH_TYPE(s8, u8, 1),
- ASSIGN_FETCH_TYPE(s16, u16, 1),
- ASSIGN_FETCH_TYPE(s32, u32, 1),
- ASSIGN_FETCH_TYPE(s64, u64, 1),
-};
-
-static const struct fetch_type *find_fetch_type(const char *type)
+static const struct fetch_type *find_fetch_type(const char *type,
+ const struct fetch_type *ftbl)
{
int i;
@@ -446,44 +238,52 @@ static const struct fetch_type *find_fetch_type(const char *type)
switch (bs) {
case 8:
- return find_fetch_type("u8");
+ return find_fetch_type("u8", ftbl);
case 16:
- return find_fetch_type("u16");
+ return find_fetch_type("u16", ftbl);
case 32:
- return find_fetch_type("u32");
+ return find_fetch_type("u32", ftbl);
case 64:
- return find_fetch_type("u64");
+ return find_fetch_type("u64", ftbl);
default:
goto fail;
}
}
- for (i = 0; i < ARRAY_SIZE(fetch_type_table); i++)
- if (strcmp(type, fetch_type_table[i].name) == 0)
- return &fetch_type_table[i];
+ for (i = 0; ftbl[i].name; i++) {
+ if (strcmp(type, ftbl[i].name) == 0)
+ return &ftbl[i];
+ }
fail:
return NULL;
}
/* Special function : only accept unsigned long */
-static __kprobes void fetch_stack_address(struct pt_regs *regs,
- void *dummy, void *dest)
+static void fetch_kernel_stack_address(struct pt_regs *regs, void *dummy, void *dest)
{
*(unsigned long *)dest = kernel_stack_pointer(regs);
}
+NOKPROBE_SYMBOL(fetch_kernel_stack_address);
+
+static void fetch_user_stack_address(struct pt_regs *regs, void *dummy, void *dest)
+{
+ *(unsigned long *)dest = user_stack_pointer(regs);
+}
+NOKPROBE_SYMBOL(fetch_user_stack_address);
static fetch_func_t get_fetch_size_function(const struct fetch_type *type,
- fetch_func_t orig_fn)
+ fetch_func_t orig_fn,
+ const struct fetch_type *ftbl)
{
int i;
- if (type != &fetch_type_table[FETCH_TYPE_STRING])
+ if (type != &ftbl[FETCH_TYPE_STRING])
return NULL; /* Only string type needs size function */
for (i = 0; i < FETCH_MTD_END; i++)
if (type->fetch[i] == orig_fn)
- return fetch_type_table[FETCH_TYPE_STRSIZE].fetch[i];
+ return ftbl[FETCH_TYPE_STRSIZE].fetch[i];
WARN_ON(1); /* This should not happen */
@@ -516,7 +316,8 @@ int traceprobe_split_symbol_offset(char *symbol, unsigned long *offset)
#define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long))
static int parse_probe_vars(char *arg, const struct fetch_type *t,
- struct fetch_param *f, bool is_return)
+ struct fetch_param *f, bool is_return,
+ bool is_kprobe)
{
int ret = 0;
unsigned long param;
@@ -528,13 +329,16 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t,
ret = -EINVAL;
} else if (strncmp(arg, "stack", 5) == 0) {
if (arg[5] == '\0') {
- if (strcmp(t->name, DEFAULT_FETCH_TYPE_STR) == 0)
- f->fn = fetch_stack_address;
+ if (strcmp(t->name, DEFAULT_FETCH_TYPE_STR))
+ return -EINVAL;
+
+ if (is_kprobe)
+ f->fn = fetch_kernel_stack_address;
else
- ret = -EINVAL;
+ f->fn = fetch_user_stack_address;
} else if (isdigit(arg[5])) {
ret = kstrtoul(arg + 5, 10, &param);
- if (ret || param > PARAM_MAX_STACK)
+ if (ret || (is_kprobe && param > PARAM_MAX_STACK))
ret = -EINVAL;
else {
f->fn = t->fetch[FETCH_MTD_stack];
@@ -552,20 +356,18 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t,
static int parse_probe_arg(char *arg, const struct fetch_type *t,
struct fetch_param *f, bool is_return, bool is_kprobe)
{
+ const struct fetch_type *ftbl;
unsigned long param;
long offset;
char *tmp;
- int ret;
+ int ret = 0;
- ret = 0;
-
- /* Until uprobe_events supports only reg arguments */
- if (!is_kprobe && arg[0] != '%')
- return -EINVAL;
+ ftbl = is_kprobe ? kprobes_fetch_type_table : uprobes_fetch_type_table;
+ BUG_ON(ftbl == NULL);
switch (arg[0]) {
case '$':
- ret = parse_probe_vars(arg + 1, t, f, is_return);
+ ret = parse_probe_vars(arg + 1, t, f, is_return, is_kprobe);
break;
case '%': /* named register */
@@ -577,7 +379,7 @@ static int parse_probe_arg(char *arg, const struct fetch_type *t,
}
break;
- case '@': /* memory or symbol */
+ case '@': /* memory, file-offset or symbol */
if (isdigit(arg[1])) {
ret = kstrtoul(arg + 1, 0, &param);
if (ret)
@@ -585,7 +387,22 @@ static int parse_probe_arg(char *arg, const struct fetch_type *t,
f->fn = t->fetch[FETCH_MTD_memory];
f->data = (void *)param;
+ } else if (arg[1] == '+') {
+ /* kprobes don't support file offsets */
+ if (is_kprobe)
+ return -EINVAL;
+
+ ret = kstrtol(arg + 2, 0, &offset);
+ if (ret)
+ break;
+
+ f->fn = t->fetch[FETCH_MTD_file_offset];
+ f->data = (void *)offset;
} else {
+ /* uprobes don't support symbols */
+ if (!is_kprobe)
+ return -EINVAL;
+
ret = traceprobe_split_symbol_offset(arg + 1, &offset);
if (ret)
break;
@@ -616,7 +433,7 @@ static int parse_probe_arg(char *arg, const struct fetch_type *t,
struct deref_fetch_param *dprm;
const struct fetch_type *t2;
- t2 = find_fetch_type(NULL);
+ t2 = find_fetch_type(NULL, ftbl);
*tmp = '\0';
dprm = kzalloc(sizeof(struct deref_fetch_param), GFP_KERNEL);
@@ -624,6 +441,9 @@ static int parse_probe_arg(char *arg, const struct fetch_type *t,
return -ENOMEM;
dprm->offset = offset;
+ dprm->fetch = t->fetch[FETCH_MTD_memory];
+ dprm->fetch_size = get_fetch_size_function(t,
+ dprm->fetch, ftbl);
ret = parse_probe_arg(arg, t2, &dprm->orig, is_return,
is_kprobe);
if (ret)
@@ -685,9 +505,13 @@ static int __parse_bitfield_probe_arg(const char *bf,
int traceprobe_parse_probe_arg(char *arg, ssize_t *size,
struct probe_arg *parg, bool is_return, bool is_kprobe)
{
+ const struct fetch_type *ftbl;
const char *t;
int ret;
+ ftbl = is_kprobe ? kprobes_fetch_type_table : uprobes_fetch_type_table;
+ BUG_ON(ftbl == NULL);
+
if (strlen(arg) > MAX_ARGSTR_LEN) {
pr_info("Argument is too long.: %s\n", arg);
return -ENOSPC;
@@ -702,7 +526,7 @@ int traceprobe_parse_probe_arg(char *arg, ssize_t *size,
arg[t - parg->comm] = '\0';
t++;
}
- parg->type = find_fetch_type(t);
+ parg->type = find_fetch_type(t, ftbl);
if (!parg->type) {
pr_info("Unsupported type: %s\n", t);
return -EINVAL;
@@ -716,7 +540,8 @@ int traceprobe_parse_probe_arg(char *arg, ssize_t *size,
if (ret >= 0) {
parg->fetch_size.fn = get_fetch_size_function(parg->type,
- parg->fetch.fn);
+ parg->fetch.fn,
+ ftbl);
parg->fetch_size.data = parg->fetch.data;
}
@@ -837,3 +662,65 @@ out:
return ret;
}
+
+static int __set_print_fmt(struct trace_probe *tp, char *buf, int len,
+ bool is_return)
+{
+ int i;
+ int pos = 0;
+
+ const char *fmt, *arg;
+
+ if (!is_return) {
+ fmt = "(%lx)";
+ arg = "REC->" FIELD_STRING_IP;
+ } else {
+ fmt = "(%lx <- %lx)";
+ arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP;
+ }
+
+ /* When len=0, we just calculate the needed length */
+#define LEN_OR_ZERO (len ? len - pos : 0)
+
+ pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt);
+
+ for (i = 0; i < tp->nr_args; i++) {
+ pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%s",
+ tp->args[i].name, tp->args[i].type->fmt);
+ }
+
+ pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg);
+
+ for (i = 0; i < tp->nr_args; i++) {
+ if (strcmp(tp->args[i].type->name, "string") == 0)
+ pos += snprintf(buf + pos, LEN_OR_ZERO,
+ ", __get_str(%s)",
+ tp->args[i].name);
+ else
+ pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s",
+ tp->args[i].name);
+ }
+
+#undef LEN_OR_ZERO
+
+ /* return the length of print_fmt */
+ return pos;
+}
+
+int set_print_fmt(struct trace_probe *tp, bool is_return)
+{
+ int len;
+ char *print_fmt;
+
+ /* First: called with 0 length to calculate the needed length */
+ len = __set_print_fmt(tp, NULL, 0, is_return);
+ print_fmt = kmalloc(len + 1, GFP_KERNEL);
+ if (!print_fmt)
+ return -ENOMEM;
+
+ /* Second: actually write the @print_fmt */
+ __set_print_fmt(tp, print_fmt, len + 1, is_return);
+ tp->call.print_fmt = print_fmt;
+
+ return 0;
+}
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 5c7e09d10d7..4f815fbce16 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -81,6 +81,17 @@
*/
#define convert_rloc_to_loc(dl, offs) ((u32)(dl) + (offs))
+static nokprobe_inline void *get_rloc_data(u32 *dl)
+{
+ return (u8 *)dl + get_rloc_offs(*dl);
+}
+
+/* For data_loc conversion */
+static nokprobe_inline void *get_loc_data(u32 *dl, void *ent)
+{
+ return (u8 *)ent + get_rloc_offs(*dl);
+}
+
/* Data fetch function type */
typedef void (*fetch_func_t)(struct pt_regs *, void *, void *);
/* Printing function type */
@@ -95,6 +106,7 @@ enum {
FETCH_MTD_symbol,
FETCH_MTD_deref,
FETCH_MTD_bitfield,
+ FETCH_MTD_file_offset,
FETCH_MTD_END,
};
@@ -115,6 +127,147 @@ struct fetch_param {
void *data;
};
+/* For defining macros, define string/string_size types */
+typedef u32 string;
+typedef u32 string_size;
+
+#define PRINT_TYPE_FUNC_NAME(type) print_type_##type
+#define PRINT_TYPE_FMT_NAME(type) print_type_format_##type
+
+/* Printing in basic type function template */
+#define DECLARE_BASIC_PRINT_TYPE_FUNC(type) \
+int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, const char *name, \
+ void *data, void *ent); \
+extern const char PRINT_TYPE_FMT_NAME(type)[]
+
+DECLARE_BASIC_PRINT_TYPE_FUNC(u8);
+DECLARE_BASIC_PRINT_TYPE_FUNC(u16);
+DECLARE_BASIC_PRINT_TYPE_FUNC(u32);
+DECLARE_BASIC_PRINT_TYPE_FUNC(u64);
+DECLARE_BASIC_PRINT_TYPE_FUNC(s8);
+DECLARE_BASIC_PRINT_TYPE_FUNC(s16);
+DECLARE_BASIC_PRINT_TYPE_FUNC(s32);
+DECLARE_BASIC_PRINT_TYPE_FUNC(s64);
+DECLARE_BASIC_PRINT_TYPE_FUNC(string);
+
+#define FETCH_FUNC_NAME(method, type) fetch_##method##_##type
+
+/* Declare macro for basic types */
+#define DECLARE_FETCH_FUNC(method, type) \
+extern void FETCH_FUNC_NAME(method, type)(struct pt_regs *regs, \
+ void *data, void *dest)
+
+#define DECLARE_BASIC_FETCH_FUNCS(method) \
+DECLARE_FETCH_FUNC(method, u8); \
+DECLARE_FETCH_FUNC(method, u16); \
+DECLARE_FETCH_FUNC(method, u32); \
+DECLARE_FETCH_FUNC(method, u64)
+
+DECLARE_BASIC_FETCH_FUNCS(reg);
+#define fetch_reg_string NULL
+#define fetch_reg_string_size NULL
+
+DECLARE_BASIC_FETCH_FUNCS(retval);
+#define fetch_retval_string NULL
+#define fetch_retval_string_size NULL
+
+DECLARE_BASIC_FETCH_FUNCS(symbol);
+DECLARE_FETCH_FUNC(symbol, string);
+DECLARE_FETCH_FUNC(symbol, string_size);
+
+DECLARE_BASIC_FETCH_FUNCS(deref);
+DECLARE_FETCH_FUNC(deref, string);
+DECLARE_FETCH_FUNC(deref, string_size);
+
+DECLARE_BASIC_FETCH_FUNCS(bitfield);
+#define fetch_bitfield_string NULL
+#define fetch_bitfield_string_size NULL
+
+/*
+ * Define macro for basic types - we don't need to define s* types, because
+ * we have to care only about bitwidth at recording time.
+ */
+#define DEFINE_BASIC_FETCH_FUNCS(method) \
+DEFINE_FETCH_##method(u8) \
+DEFINE_FETCH_##method(u16) \
+DEFINE_FETCH_##method(u32) \
+DEFINE_FETCH_##method(u64)
+
+/* Default (unsigned long) fetch type */
+#define __DEFAULT_FETCH_TYPE(t) u##t
+#define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t)
+#define DEFAULT_FETCH_TYPE _DEFAULT_FETCH_TYPE(BITS_PER_LONG)
+#define DEFAULT_FETCH_TYPE_STR __stringify(DEFAULT_FETCH_TYPE)
+
+#define ASSIGN_FETCH_FUNC(method, type) \
+ [FETCH_MTD_##method] = FETCH_FUNC_NAME(method, type)
+
+#define __ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, _fmttype) \
+ {.name = _name, \
+ .size = _size, \
+ .is_signed = sign, \
+ .print = PRINT_TYPE_FUNC_NAME(ptype), \
+ .fmt = PRINT_TYPE_FMT_NAME(ptype), \
+ .fmttype = _fmttype, \
+ .fetch = { \
+ASSIGN_FETCH_FUNC(reg, ftype), \
+ASSIGN_FETCH_FUNC(stack, ftype), \
+ASSIGN_FETCH_FUNC(retval, ftype), \
+ASSIGN_FETCH_FUNC(memory, ftype), \
+ASSIGN_FETCH_FUNC(symbol, ftype), \
+ASSIGN_FETCH_FUNC(deref, ftype), \
+ASSIGN_FETCH_FUNC(bitfield, ftype), \
+ASSIGN_FETCH_FUNC(file_offset, ftype), \
+ } \
+ }
+
+#define ASSIGN_FETCH_TYPE(ptype, ftype, sign) \
+ __ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, #ptype)
+
+#define ASSIGN_FETCH_TYPE_END {}
+
+#define FETCH_TYPE_STRING 0
+#define FETCH_TYPE_STRSIZE 1
+
+/*
+ * Fetch type information table.
+ * It's declared as a weak symbol due to conditional compilation.
+ */
+extern __weak const struct fetch_type kprobes_fetch_type_table[];
+extern __weak const struct fetch_type uprobes_fetch_type_table[];
+
+#ifdef CONFIG_KPROBE_EVENT
+struct symbol_cache;
+unsigned long update_symbol_cache(struct symbol_cache *sc);
+void free_symbol_cache(struct symbol_cache *sc);
+struct symbol_cache *alloc_symbol_cache(const char *sym, long offset);
+#else
+/* uprobes do not support symbol fetch methods */
+#define fetch_symbol_u8 NULL
+#define fetch_symbol_u16 NULL
+#define fetch_symbol_u32 NULL
+#define fetch_symbol_u64 NULL
+#define fetch_symbol_string NULL
+#define fetch_symbol_string_size NULL
+
+struct symbol_cache {
+};
+static inline unsigned long __used update_symbol_cache(struct symbol_cache *sc)
+{
+ return 0;
+}
+
+static inline void __used free_symbol_cache(struct symbol_cache *sc)
+{
+}
+
+static inline struct symbol_cache * __used
+alloc_symbol_cache(const char *sym, long offset)
+{
+ return NULL;
+}
+#endif /* CONFIG_KPROBE_EVENT */
+
struct probe_arg {
struct fetch_param fetch;
struct fetch_param fetch_size;
@@ -124,7 +277,32 @@ struct probe_arg {
const struct fetch_type *type; /* Type of this argument */
};
-static inline __kprobes void call_fetch(struct fetch_param *fprm,
+struct trace_probe {
+ unsigned int flags; /* For TP_FLAG_* */
+ struct ftrace_event_class class;
+ struct ftrace_event_call call;
+ struct list_head files;
+ ssize_t size; /* trace entry size */
+ unsigned int nr_args;
+ struct probe_arg args[];
+};
+
+struct event_file_link {
+ struct ftrace_event_file *file;
+ struct list_head list;
+};
+
+static inline bool trace_probe_is_enabled(struct trace_probe *tp)
+{
+ return !!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE));
+}
+
+static inline bool trace_probe_is_registered(struct trace_probe *tp)
+{
+ return !!(tp->flags & TP_FLAG_REGISTERED);
+}
+
+static nokprobe_inline void call_fetch(struct fetch_param *fprm,
struct pt_regs *regs, void *dest)
{
return fprm->fn(regs, fprm->data, dest);
@@ -142,6 +320,18 @@ static inline int is_good_name(const char *name)
return 1;
}
+static inline struct event_file_link *
+find_event_file_link(struct trace_probe *tp, struct ftrace_event_file *file)
+{
+ struct event_file_link *link;
+
+ list_for_each_entry(link, &tp->files, list)
+ if (link->file == file)
+ return link;
+
+ return NULL;
+}
+
extern int traceprobe_parse_probe_arg(char *arg, ssize_t *size,
struct probe_arg *parg, bool is_return, bool is_kprobe);
@@ -158,3 +348,53 @@ extern ssize_t traceprobe_probes_write(struct file *file,
int (*createfn)(int, char**));
extern int traceprobe_command(const char *buf, int (*createfn)(int, char**));
+
+/* Sum up total data length for dynamic arraies (strings) */
+static nokprobe_inline int
+__get_data_size(struct trace_probe *tp, struct pt_regs *regs)
+{
+ int i, ret = 0;
+ u32 len;
+
+ for (i = 0; i < tp->nr_args; i++)
+ if (unlikely(tp->args[i].fetch_size.fn)) {
+ call_fetch(&tp->args[i].fetch_size, regs, &len);
+ ret += len;
+ }
+
+ return ret;
+}
+
+/* Store the value of each argument */
+static nokprobe_inline void
+store_trace_args(int ent_size, struct trace_probe *tp, struct pt_regs *regs,
+ u8 *data, int maxlen)
+{
+ int i;
+ u32 end = tp->size;
+ u32 *dl; /* Data (relative) location */
+
+ for (i = 0; i < tp->nr_args; i++) {
+ if (unlikely(tp->args[i].fetch_size.fn)) {
+ /*
+ * First, we set the relative location and
+ * maximum data length to *dl
+ */
+ dl = (u32 *)(data + tp->args[i].offset);
+ *dl = make_data_rloc(maxlen, end - tp->args[i].offset);
+ /* Then try to fetch string or dynamic array data */
+ call_fetch(&tp->args[i].fetch, regs, dl);
+ /* Reduce maximum length */
+ end += get_rloc_len(*dl);
+ maxlen -= get_rloc_len(*dl);
+ /* Trick here, convert data_rloc to data_loc */
+ *dl = convert_rloc_to_loc(*dl,
+ ent_size + tp->args[i].offset);
+ } else
+ /* Just fetching data normally */
+ call_fetch(&tp->args[i].fetch, regs,
+ data + tp->args[i].offset);
+ }
+}
+
+extern int set_print_fmt(struct trace_probe *tp, bool is_return);
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index 4e98e3b257a..3f34dc9b40f 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -45,7 +45,7 @@ tracing_sched_switch_trace(struct trace_array *tr,
entry->next_state = next->state;
entry->next_cpu = task_cpu(next);
- if (!filter_check_discard(call, entry, buffer, event))
+ if (!call_filter_check_discard(call, entry, buffer, event))
trace_buffer_unlock_commit(buffer, event, flags, pc);
}
@@ -101,7 +101,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
entry->next_state = wakee->state;
entry->next_cpu = task_cpu(wakee);
- if (!filter_check_discard(call, entry, buffer, event))
+ if (!call_filter_check_discard(call, entry, buffer, event))
trace_buffer_unlock_commit(buffer, event, flags, pc);
}
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index fee77e15d81..19bd8928ce9 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -16,6 +16,7 @@
#include <linux/uaccess.h>
#include <linux/ftrace.h>
#include <linux/sched/rt.h>
+#include <linux/sched/deadline.h>
#include <trace/events/sched.h>
#include "trace.h"
@@ -27,6 +28,8 @@ static int wakeup_cpu;
static int wakeup_current_cpu;
static unsigned wakeup_prio = -1;
static int wakeup_rt;
+static int wakeup_dl;
+static int tracing_dl = 0;
static arch_spinlock_t wakeup_lock =
(arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
@@ -127,15 +130,9 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip,
atomic_dec(&data->disabled);
preempt_enable_notrace();
}
-
-static struct ftrace_ops trace_ops __read_mostly =
-{
- .func = wakeup_tracer_call,
- .flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE,
-};
#endif /* CONFIG_FUNCTION_TRACER */
-static int register_wakeup_function(int graph, int set)
+static int register_wakeup_function(struct trace_array *tr, int graph, int set)
{
int ret;
@@ -147,7 +144,7 @@ static int register_wakeup_function(int graph, int set)
ret = register_ftrace_graph(&wakeup_graph_return,
&wakeup_graph_entry);
else
- ret = register_ftrace_function(&trace_ops);
+ ret = register_ftrace_function(tr->ops);
if (!ret)
function_enabled = true;
@@ -155,7 +152,7 @@ static int register_wakeup_function(int graph, int set)
return ret;
}
-static void unregister_wakeup_function(int graph)
+static void unregister_wakeup_function(struct trace_array *tr, int graph)
{
if (!function_enabled)
return;
@@ -163,32 +160,34 @@ static void unregister_wakeup_function(int graph)
if (graph)
unregister_ftrace_graph();
else
- unregister_ftrace_function(&trace_ops);
+ unregister_ftrace_function(tr->ops);
function_enabled = false;
}
-static void wakeup_function_set(int set)
+static void wakeup_function_set(struct trace_array *tr, int set)
{
if (set)
- register_wakeup_function(is_graph(), 1);
+ register_wakeup_function(tr, is_graph(), 1);
else
- unregister_wakeup_function(is_graph());
+ unregister_wakeup_function(tr, is_graph());
}
-static int wakeup_flag_changed(struct tracer *tracer, u32 mask, int set)
+static int wakeup_flag_changed(struct trace_array *tr, u32 mask, int set)
{
+ struct tracer *tracer = tr->current_trace;
+
if (mask & TRACE_ITER_FUNCTION)
- wakeup_function_set(set);
+ wakeup_function_set(tr, set);
return trace_keep_overwrite(tracer, mask, set);
}
-static int start_func_tracer(int graph)
+static int start_func_tracer(struct trace_array *tr, int graph)
{
int ret;
- ret = register_wakeup_function(graph, 0);
+ ret = register_wakeup_function(tr, graph, 0);
if (!ret && tracing_is_enabled())
tracer_enabled = 1;
@@ -198,15 +197,16 @@ static int start_func_tracer(int graph)
return ret;
}
-static void stop_func_tracer(int graph)
+static void stop_func_tracer(struct trace_array *tr, int graph)
{
tracer_enabled = 0;
- unregister_wakeup_function(graph);
+ unregister_wakeup_function(tr, graph);
}
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-static int wakeup_set_flag(u32 old_flags, u32 bit, int set)
+static int
+wakeup_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
{
if (!(bit & TRACE_DISPLAY_GRAPH))
@@ -215,12 +215,12 @@ static int wakeup_set_flag(u32 old_flags, u32 bit, int set)
if (!(is_graph() ^ set))
return 0;
- stop_func_tracer(!set);
+ stop_func_tracer(tr, !set);
wakeup_reset(wakeup_trace);
- tracing_max_latency = 0;
+ tr->max_latency = 0;
- return start_func_tracer(set);
+ return start_func_tracer(tr, set);
}
static int wakeup_graph_entry(struct ftrace_graph_ent *trace)
@@ -308,7 +308,8 @@ __trace_function(struct trace_array *tr,
#else
#define __trace_function trace_function
-static int wakeup_set_flag(u32 old_flags, u32 bit, int set)
+static int
+wakeup_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
{
return -EINVAL;
}
@@ -343,13 +344,13 @@ static void wakeup_print_header(struct seq_file *s)
/*
* Should this new latency be reported/recorded?
*/
-static int report_latency(cycle_t delta)
+static int report_latency(struct trace_array *tr, cycle_t delta)
{
if (tracing_thresh) {
if (delta < tracing_thresh)
return 0;
} else {
- if (delta <= tracing_max_latency)
+ if (delta <= tr->max_latency)
return 0;
}
return 1;
@@ -417,11 +418,11 @@ probe_wakeup_sched_switch(void *ignore,
T1 = ftrace_now(cpu);
delta = T1-T0;
- if (!report_latency(delta))
+ if (!report_latency(wakeup_trace, delta))
goto out_unlock;
if (likely(!is_tracing_stopped())) {
- tracing_max_latency = delta;
+ wakeup_trace->max_latency = delta;
update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu);
}
@@ -437,6 +438,7 @@ static void __wakeup_reset(struct trace_array *tr)
{
wakeup_cpu = -1;
wakeup_prio = -1;
+ tracing_dl = 0;
if (wakeup_task)
put_task_struct(wakeup_task);
@@ -472,9 +474,17 @@ probe_wakeup(void *ignore, struct task_struct *p, int success)
tracing_record_cmdline(p);
tracing_record_cmdline(current);
- if ((wakeup_rt && !rt_task(p)) ||
- p->prio >= wakeup_prio ||
- p->prio >= current->prio)
+ /*
+ * Semantic is like this:
+ * - wakeup tracer handles all tasks in the system, independently
+ * from their scheduling class;
+ * - wakeup_rt tracer handles tasks belonging to sched_dl and
+ * sched_rt class;
+ * - wakeup_dl handles tasks belonging to sched_dl class only.
+ */
+ if (tracing_dl || (wakeup_dl && !dl_task(p)) ||
+ (wakeup_rt && !dl_task(p) && !rt_task(p)) ||
+ (!dl_task(p) && (p->prio >= wakeup_prio || p->prio >= current->prio)))
return;
pc = preempt_count();
@@ -486,7 +496,8 @@ probe_wakeup(void *ignore, struct task_struct *p, int success)
arch_spin_lock(&wakeup_lock);
/* check for races. */
- if (!tracer_enabled || p->prio >= wakeup_prio)
+ if (!tracer_enabled || tracing_dl ||
+ (!dl_task(p) && p->prio >= wakeup_prio))
goto out_locked;
/* reset the trace */
@@ -496,6 +507,15 @@ probe_wakeup(void *ignore, struct task_struct *p, int success)
wakeup_current_cpu = wakeup_cpu;
wakeup_prio = p->prio;
+ /*
+ * Once you start tracing a -deadline task, don't bother tracing
+ * another task until the first one wakes up.
+ */
+ if (dl_task(p))
+ tracing_dl = 1;
+ else
+ tracing_dl = 0;
+
wakeup_task = p;
get_task_struct(wakeup_task);
@@ -561,7 +581,7 @@ static void start_wakeup_tracer(struct trace_array *tr)
*/
smp_wmb();
- if (start_func_tracer(is_graph()))
+ if (start_func_tracer(tr, is_graph()))
printk(KERN_ERR "failed to start wakeup tracer\n");
return;
@@ -574,13 +594,15 @@ fail_deprobe:
static void stop_wakeup_tracer(struct trace_array *tr)
{
tracer_enabled = 0;
- stop_func_tracer(is_graph());
+ stop_func_tracer(tr, is_graph());
unregister_trace_sched_switch(probe_wakeup_sched_switch, NULL);
unregister_trace_sched_wakeup_new(probe_wakeup, NULL);
unregister_trace_sched_wakeup(probe_wakeup, NULL);
unregister_trace_sched_migrate_task(probe_wakeup_migrate_task, NULL);
}
+static bool wakeup_busy;
+
static int __wakeup_tracer_init(struct trace_array *tr)
{
save_flags = trace_flags;
@@ -589,24 +611,45 @@ static int __wakeup_tracer_init(struct trace_array *tr)
set_tracer_flag(tr, TRACE_ITER_OVERWRITE, 1);
set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, 1);
- tracing_max_latency = 0;
+ tr->max_latency = 0;
wakeup_trace = tr;
+ ftrace_init_array_ops(tr, wakeup_tracer_call);
start_wakeup_tracer(tr);
+
+ wakeup_busy = true;
return 0;
}
static int wakeup_tracer_init(struct trace_array *tr)
{
+ if (wakeup_busy)
+ return -EBUSY;
+
+ wakeup_dl = 0;
wakeup_rt = 0;
return __wakeup_tracer_init(tr);
}
static int wakeup_rt_tracer_init(struct trace_array *tr)
{
+ if (wakeup_busy)
+ return -EBUSY;
+
+ wakeup_dl = 0;
wakeup_rt = 1;
return __wakeup_tracer_init(tr);
}
+static int wakeup_dl_tracer_init(struct trace_array *tr)
+{
+ if (wakeup_busy)
+ return -EBUSY;
+
+ wakeup_dl = 1;
+ wakeup_rt = 0;
+ return __wakeup_tracer_init(tr);
+}
+
static void wakeup_tracer_reset(struct trace_array *tr)
{
int lat_flag = save_flags & TRACE_ITER_LATENCY_FMT;
@@ -618,6 +661,8 @@ static void wakeup_tracer_reset(struct trace_array *tr)
set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, lat_flag);
set_tracer_flag(tr, TRACE_ITER_OVERWRITE, overwrite_flag);
+ ftrace_reset_array_ops(tr);
+ wakeup_busy = false;
}
static void wakeup_tracer_start(struct trace_array *tr)
@@ -649,6 +694,7 @@ static struct tracer wakeup_tracer __read_mostly =
#endif
.open = wakeup_trace_open,
.close = wakeup_trace_close,
+ .allow_instances = true,
.use_max_tr = true,
};
@@ -659,7 +705,28 @@ static struct tracer wakeup_rt_tracer __read_mostly =
.reset = wakeup_tracer_reset,
.start = wakeup_tracer_start,
.stop = wakeup_tracer_stop,
- .wait_pipe = poll_wait_pipe,
+ .print_max = true,
+ .print_header = wakeup_print_header,
+ .print_line = wakeup_print_line,
+ .flags = &tracer_flags,
+ .set_flag = wakeup_set_flag,
+ .flag_changed = wakeup_flag_changed,
+#ifdef CONFIG_FTRACE_SELFTEST
+ .selftest = trace_selftest_startup_wakeup,
+#endif
+ .open = wakeup_trace_open,
+ .close = wakeup_trace_close,
+ .allow_instances = true,
+ .use_max_tr = true,
+};
+
+static struct tracer wakeup_dl_tracer __read_mostly =
+{
+ .name = "wakeup_dl",
+ .init = wakeup_dl_tracer_init,
+ .reset = wakeup_tracer_reset,
+ .start = wakeup_tracer_start,
+ .stop = wakeup_tracer_stop,
.print_max = true,
.print_header = wakeup_print_header,
.print_line = wakeup_print_line,
@@ -686,6 +753,10 @@ __init static int init_wakeup_tracer(void)
if (ret)
return ret;
+ ret = register_tracer(&wakeup_dl_tracer);
+ if (ret)
+ return ret;
+
return 0;
}
core_initcall(init_wakeup_tracer);
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index a7329b7902f..5ef60499dc8 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -65,7 +65,7 @@ static int trace_test_buffer(struct trace_buffer *buf, unsigned long *count)
/* Don't allow flipping of max traces now */
local_irq_save(flags);
- arch_spin_lock(&ftrace_max_lock);
+ arch_spin_lock(&buf->tr->max_lock);
cnt = ring_buffer_entries(buf->buffer);
@@ -83,7 +83,7 @@ static int trace_test_buffer(struct trace_buffer *buf, unsigned long *count)
break;
}
tracing_on();
- arch_spin_unlock(&ftrace_max_lock);
+ arch_spin_unlock(&buf->tr->max_lock);
local_irq_restore(flags);
if (count)
@@ -161,11 +161,6 @@ static struct ftrace_ops test_probe3 = {
.flags = FTRACE_OPS_FL_RECURSION_SAFE,
};
-static struct ftrace_ops test_global = {
- .func = trace_selftest_test_global_func,
- .flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE,
-};
-
static void print_counts(void)
{
printk("(%d %d %d %d %d) ",
@@ -185,7 +180,7 @@ static void reset_counts(void)
trace_selftest_test_dyn_cnt = 0;
}
-static int trace_selftest_ops(int cnt)
+static int trace_selftest_ops(struct trace_array *tr, int cnt)
{
int save_ftrace_enabled = ftrace_enabled;
struct ftrace_ops *dyn_ops;
@@ -220,7 +215,11 @@ static int trace_selftest_ops(int cnt)
register_ftrace_function(&test_probe1);
register_ftrace_function(&test_probe2);
register_ftrace_function(&test_probe3);
- register_ftrace_function(&test_global);
+ /* First time we are running with main function */
+ if (cnt > 1) {
+ ftrace_init_array_ops(tr, trace_selftest_test_global_func);
+ register_ftrace_function(tr->ops);
+ }
DYN_FTRACE_TEST_NAME();
@@ -232,8 +231,10 @@ static int trace_selftest_ops(int cnt)
goto out;
if (trace_selftest_test_probe3_cnt != 1)
goto out;
- if (trace_selftest_test_global_cnt == 0)
- goto out;
+ if (cnt > 1) {
+ if (trace_selftest_test_global_cnt == 0)
+ goto out;
+ }
DYN_FTRACE_TEST_NAME2();
@@ -269,8 +270,10 @@ static int trace_selftest_ops(int cnt)
goto out_free;
if (trace_selftest_test_probe3_cnt != 3)
goto out_free;
- if (trace_selftest_test_global_cnt == 0)
- goto out;
+ if (cnt > 1) {
+ if (trace_selftest_test_global_cnt == 0)
+ goto out;
+ }
if (trace_selftest_test_dyn_cnt == 0)
goto out_free;
@@ -295,7 +298,9 @@ static int trace_selftest_ops(int cnt)
unregister_ftrace_function(&test_probe1);
unregister_ftrace_function(&test_probe2);
unregister_ftrace_function(&test_probe3);
- unregister_ftrace_function(&test_global);
+ if (cnt > 1)
+ unregister_ftrace_function(tr->ops);
+ ftrace_reset_array_ops(tr);
/* Make sure everything is off */
reset_counts();
@@ -315,9 +320,9 @@ static int trace_selftest_ops(int cnt)
}
/* Test dynamic code modification and ftrace filters */
-int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
- struct trace_array *tr,
- int (*func)(void))
+static int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
+ struct trace_array *tr,
+ int (*func)(void))
{
int save_ftrace_enabled = ftrace_enabled;
unsigned long count;
@@ -388,7 +393,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
}
/* Test the ops with global tracing running */
- ret = trace_selftest_ops(1);
+ ret = trace_selftest_ops(tr, 1);
trace->reset(tr);
out:
@@ -399,7 +404,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
/* Test the ops with global tracing off */
if (!ret)
- ret = trace_selftest_ops(2);
+ ret = trace_selftest_ops(tr, 2);
return ret;
}
@@ -802,7 +807,7 @@ out:
int
trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
{
- unsigned long save_max = tracing_max_latency;
+ unsigned long save_max = tr->max_latency;
unsigned long count;
int ret;
@@ -814,7 +819,7 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
}
/* reset the max latency */
- tracing_max_latency = 0;
+ tr->max_latency = 0;
/* disable interrupts for a bit */
local_irq_disable();
udelay(100);
@@ -841,7 +846,7 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
ret = -1;
}
- tracing_max_latency = save_max;
+ tr->max_latency = save_max;
return ret;
}
@@ -851,7 +856,7 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
int
trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)
{
- unsigned long save_max = tracing_max_latency;
+ unsigned long save_max = tr->max_latency;
unsigned long count;
int ret;
@@ -876,7 +881,7 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)
}
/* reset the max latency */
- tracing_max_latency = 0;
+ tr->max_latency = 0;
/* disable preemption for a bit */
preempt_disable();
udelay(100);
@@ -903,7 +908,7 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)
ret = -1;
}
- tracing_max_latency = save_max;
+ tr->max_latency = save_max;
return ret;
}
@@ -913,7 +918,7 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)
int
trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *tr)
{
- unsigned long save_max = tracing_max_latency;
+ unsigned long save_max = tr->max_latency;
unsigned long count;
int ret;
@@ -938,7 +943,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
}
/* reset the max latency */
- tracing_max_latency = 0;
+ tr->max_latency = 0;
/* disable preemption and interrupts for a bit */
preempt_disable();
@@ -973,7 +978,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
}
/* do the test by disabling interrupts first this time */
- tracing_max_latency = 0;
+ tr->max_latency = 0;
tracing_start();
trace->start(tr);
@@ -1004,7 +1009,7 @@ out:
tracing_start();
out_no_start:
trace->reset(tr);
- tracing_max_latency = save_max;
+ tr->max_latency = save_max;
return ret;
}
@@ -1022,11 +1027,16 @@ trace_selftest_startup_nop(struct tracer *trace, struct trace_array *tr)
#ifdef CONFIG_SCHED_TRACER
static int trace_wakeup_test_thread(void *data)
{
- /* Make this a RT thread, doesn't need to be too high */
- static const struct sched_param param = { .sched_priority = 5 };
+ /* Make this a -deadline thread */
+ static const struct sched_attr attr = {
+ .sched_policy = SCHED_DEADLINE,
+ .sched_runtime = 100000ULL,
+ .sched_deadline = 10000000ULL,
+ .sched_period = 10000000ULL
+ };
struct completion *x = data;
- sched_setscheduler(current, SCHED_FIFO, &param);
+ sched_setattr(current, &attr);
/* Make it know we have a new prio */
complete(x);
@@ -1040,8 +1050,8 @@ static int trace_wakeup_test_thread(void *data)
/* we are awake, now wait to disappear */
while (!kthread_should_stop()) {
/*
- * This is an RT task, do short sleeps to let
- * others run.
+ * This will likely be the system top priority
+ * task, do short sleeps to let others run.
*/
msleep(100);
}
@@ -1052,23 +1062,23 @@ static int trace_wakeup_test_thread(void *data)
int
trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
{
- unsigned long save_max = tracing_max_latency;
+ unsigned long save_max = tr->max_latency;
struct task_struct *p;
- struct completion isrt;
+ struct completion is_ready;
unsigned long count;
int ret;
- init_completion(&isrt);
+ init_completion(&is_ready);
- /* create a high prio thread */
- p = kthread_run(trace_wakeup_test_thread, &isrt, "ftrace-test");
+ /* create a -deadline thread */
+ p = kthread_run(trace_wakeup_test_thread, &is_ready, "ftrace-test");
if (IS_ERR(p)) {
printk(KERN_CONT "Failed to create ftrace wakeup test thread ");
return -1;
}
- /* make sure the thread is running at an RT prio */
- wait_for_completion(&isrt);
+ /* make sure the thread is running at -deadline policy */
+ wait_for_completion(&is_ready);
/* start the tracing */
ret = tracer_init(trace, tr);
@@ -1078,23 +1088,23 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
}
/* reset the max latency */
- tracing_max_latency = 0;
+ tr->max_latency = 0;
while (p->on_rq) {
/*
- * Sleep to make sure the RT thread is asleep too.
+ * Sleep to make sure the -deadline thread is asleep too.
* On virtual machines we can't rely on timings,
* but we want to make sure this test still works.
*/
msleep(100);
}
- init_completion(&isrt);
+ init_completion(&is_ready);
wake_up_process(p);
/* Wait for the task to wake up */
- wait_for_completion(&isrt);
+ wait_for_completion(&is_ready);
/* stop the tracing. */
tracing_stop();
@@ -1108,7 +1118,7 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
trace->reset(tr);
tracing_start();
- tracing_max_latency = save_max;
+ tr->max_latency = save_max;
/* kill the thread */
kthread_stop(p);
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index b20428c5efe..8a4e5cb66a4 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -13,6 +13,7 @@
#include <linux/sysctl.h>
#include <linux/init.h>
#include <linux/fs.h>
+#include <linux/magic.h>
#include <asm/setup.h>
@@ -50,11 +51,33 @@ static DEFINE_MUTEX(stack_sysctl_mutex);
int stack_tracer_enabled;
static int last_stack_tracer_enabled;
+static inline void print_max_stack(void)
+{
+ long i;
+ int size;
+
+ pr_emerg(" Depth Size Location (%d entries)\n"
+ " ----- ---- --------\n",
+ max_stack_trace.nr_entries - 1);
+
+ for (i = 0; i < max_stack_trace.nr_entries; i++) {
+ if (stack_dump_trace[i] == ULONG_MAX)
+ break;
+ if (i+1 == max_stack_trace.nr_entries ||
+ stack_dump_trace[i+1] == ULONG_MAX)
+ size = stack_dump_index[i];
+ else
+ size = stack_dump_index[i] - stack_dump_index[i+1];
+
+ pr_emerg("%3ld) %8d %5d %pS\n", i, stack_dump_index[i],
+ size, (void *)stack_dump_trace[i]);
+ }
+}
+
static inline void
check_stack(unsigned long ip, unsigned long *stack)
{
- unsigned long this_size, flags;
- unsigned long *p, *top, *start;
+ unsigned long this_size, flags; unsigned long *p, *top, *start;
static int tracer_frame;
int frame_size = ACCESS_ONCE(tracer_frame);
int i;
@@ -84,8 +107,12 @@ check_stack(unsigned long ip, unsigned long *stack)
max_stack_size = this_size;
- max_stack_trace.nr_entries = 0;
- max_stack_trace.skip = 3;
+ max_stack_trace.nr_entries = 0;
+
+ if (using_ftrace_ops_list_func())
+ max_stack_trace.skip = 4;
+ else
+ max_stack_trace.skip = 3;
save_stack_trace(&max_stack_trace);
@@ -144,6 +171,12 @@ check_stack(unsigned long ip, unsigned long *stack)
i++;
}
+ if ((current != &init_task &&
+ *(end_of_stack(current)) != STACK_END_MAGIC)) {
+ print_max_stack();
+ BUG();
+ }
+
out:
arch_spin_unlock(&max_stack_lock);
local_irq_restore(flags);
@@ -382,7 +415,7 @@ static const struct file_operations stack_trace_filter_fops = {
.open = stack_trace_filter_open,
.read = seq_read,
.write = ftrace_filter_write,
- .llseek = ftrace_filter_lseek,
+ .llseek = tracing_lseek,
.release = ftrace_regex_release,
};
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
index 847f88a6194..7af67360b33 100644
--- a/kernel/trace/trace_stat.c
+++ b/kernel/trace/trace_stat.c
@@ -43,46 +43,15 @@ static DEFINE_MUTEX(all_stat_sessions_mutex);
/* The root directory for all stat files */
static struct dentry *stat_dir;
-/*
- * Iterate through the rbtree using a post order traversal path
- * to release the next node.
- * It won't necessary release one at each iteration
- * but it will at least advance closer to the next one
- * to be released.
- */
-static struct rb_node *release_next(struct tracer_stat *ts,
- struct rb_node *node)
+static void __reset_stat_session(struct stat_session *session)
{
- struct stat_node *snode;
- struct rb_node *parent = rb_parent(node);
-
- if (node->rb_left)
- return node->rb_left;
- else if (node->rb_right)
- return node->rb_right;
- else {
- if (!parent)
- ;
- else if (parent->rb_left == node)
- parent->rb_left = NULL;
- else
- parent->rb_right = NULL;
+ struct stat_node *snode, *n;
- snode = container_of(node, struct stat_node, node);
- if (ts->stat_release)
- ts->stat_release(snode->stat);
+ rbtree_postorder_for_each_entry_safe(snode, n, &session->stat_root, node) {
+ if (session->ts->stat_release)
+ session->ts->stat_release(snode->stat);
kfree(snode);
-
- return parent;
}
-}
-
-static void __reset_stat_session(struct stat_session *session)
-{
- struct rb_node *node = session->stat_root.rb_node;
-
- while (node)
- node = release_next(session->ts, node);
session->stat_root = RB_ROOT;
}
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 559329d9bd2..759d5e00451 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -302,6 +302,7 @@ static int __init syscall_exit_define_fields(struct ftrace_event_call *call)
static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
{
struct trace_array *tr = data;
+ struct ftrace_event_file *ftrace_file;
struct syscall_trace_enter *entry;
struct syscall_metadata *sys_data;
struct ring_buffer_event *event;
@@ -314,7 +315,13 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
syscall_nr = trace_get_syscall_nr(current, regs);
if (syscall_nr < 0)
return;
- if (!test_bit(syscall_nr, tr->enabled_enter_syscalls))
+
+ /* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE) */
+ ftrace_file = rcu_dereference_sched(tr->enter_syscall_files[syscall_nr]);
+ if (!ftrace_file)
+ return;
+
+ if (ftrace_trigger_soft_disabled(ftrace_file))
return;
sys_data = syscall_nr_to_meta(syscall_nr);
@@ -336,15 +343,14 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
entry->nr = syscall_nr;
syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
- if (!filter_current_check_discard(buffer, sys_data->enter_event,
- entry, event))
- trace_current_buffer_unlock_commit(buffer, event,
- irq_flags, pc);
+ event_trigger_unlock_commit(ftrace_file, buffer, event, entry,
+ irq_flags, pc);
}
static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
{
struct trace_array *tr = data;
+ struct ftrace_event_file *ftrace_file;
struct syscall_trace_exit *entry;
struct syscall_metadata *sys_data;
struct ring_buffer_event *event;
@@ -356,7 +362,13 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
syscall_nr = trace_get_syscall_nr(current, regs);
if (syscall_nr < 0)
return;
- if (!test_bit(syscall_nr, tr->enabled_exit_syscalls))
+
+ /* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE()) */
+ ftrace_file = rcu_dereference_sched(tr->exit_syscall_files[syscall_nr]);
+ if (!ftrace_file)
+ return;
+
+ if (ftrace_trigger_soft_disabled(ftrace_file))
return;
sys_data = syscall_nr_to_meta(syscall_nr);
@@ -377,10 +389,8 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
entry->nr = syscall_nr;
entry->ret = syscall_get_return_value(current, regs);
- if (!filter_current_check_discard(buffer, sys_data->exit_event,
- entry, event))
- trace_current_buffer_unlock_commit(buffer, event,
- irq_flags, pc);
+ event_trigger_unlock_commit(ftrace_file, buffer, event, entry,
+ irq_flags, pc);
}
static int reg_event_syscall_enter(struct ftrace_event_file *file,
@@ -397,7 +407,7 @@ static int reg_event_syscall_enter(struct ftrace_event_file *file,
if (!tr->sys_refcount_enter)
ret = register_trace_sys_enter(ftrace_syscall_enter, tr);
if (!ret) {
- set_bit(num, tr->enabled_enter_syscalls);
+ rcu_assign_pointer(tr->enter_syscall_files[num], file);
tr->sys_refcount_enter++;
}
mutex_unlock(&syscall_trace_lock);
@@ -415,7 +425,7 @@ static void unreg_event_syscall_enter(struct ftrace_event_file *file,
return;
mutex_lock(&syscall_trace_lock);
tr->sys_refcount_enter--;
- clear_bit(num, tr->enabled_enter_syscalls);
+ rcu_assign_pointer(tr->enter_syscall_files[num], NULL);
if (!tr->sys_refcount_enter)
unregister_trace_sys_enter(ftrace_syscall_enter, tr);
mutex_unlock(&syscall_trace_lock);
@@ -435,7 +445,7 @@ static int reg_event_syscall_exit(struct ftrace_event_file *file,
if (!tr->sys_refcount_exit)
ret = register_trace_sys_exit(ftrace_syscall_exit, tr);
if (!ret) {
- set_bit(num, tr->enabled_exit_syscalls);
+ rcu_assign_pointer(tr->exit_syscall_files[num], file);
tr->sys_refcount_exit++;
}
mutex_unlock(&syscall_trace_lock);
@@ -453,7 +463,7 @@ static void unreg_event_syscall_exit(struct ftrace_event_file *file,
return;
mutex_lock(&syscall_trace_lock);
tr->sys_refcount_exit--;
- clear_bit(num, tr->enabled_exit_syscalls);
+ rcu_assign_pointer(tr->exit_syscall_files[num], NULL);
if (!tr->sys_refcount_exit)
unregister_trace_sys_exit(ftrace_syscall_exit, tr);
mutex_unlock(&syscall_trace_lock);
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 272261b5f94..3c9b97e6b1f 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -51,22 +51,17 @@ struct trace_uprobe_filter {
*/
struct trace_uprobe {
struct list_head list;
- struct ftrace_event_class class;
- struct ftrace_event_call call;
struct trace_uprobe_filter filter;
struct uprobe_consumer consumer;
struct inode *inode;
char *filename;
unsigned long offset;
unsigned long nhit;
- unsigned int flags; /* For TP_FLAG_* */
- ssize_t size; /* trace entry size */
- unsigned int nr_args;
- struct probe_arg args[];
+ struct trace_probe tp;
};
-#define SIZEOF_TRACE_UPROBE(n) \
- (offsetof(struct trace_uprobe, args) + \
+#define SIZEOF_TRACE_UPROBE(n) \
+ (offsetof(struct trace_uprobe, tp.args) + \
(sizeof(struct probe_arg) * (n)))
static int register_uprobe_event(struct trace_uprobe *tu);
@@ -75,10 +70,151 @@ static int unregister_uprobe_event(struct trace_uprobe *tu);
static DEFINE_MUTEX(uprobe_lock);
static LIST_HEAD(uprobe_list);
+struct uprobe_dispatch_data {
+ struct trace_uprobe *tu;
+ unsigned long bp_addr;
+};
+
static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs);
static int uretprobe_dispatcher(struct uprobe_consumer *con,
unsigned long func, struct pt_regs *regs);
+#ifdef CONFIG_STACK_GROWSUP
+static unsigned long adjust_stack_addr(unsigned long addr, unsigned int n)
+{
+ return addr - (n * sizeof(long));
+}
+#else
+static unsigned long adjust_stack_addr(unsigned long addr, unsigned int n)
+{
+ return addr + (n * sizeof(long));
+}
+#endif
+
+static unsigned long get_user_stack_nth(struct pt_regs *regs, unsigned int n)
+{
+ unsigned long ret;
+ unsigned long addr = user_stack_pointer(regs);
+
+ addr = adjust_stack_addr(addr, n);
+
+ if (copy_from_user(&ret, (void __force __user *) addr, sizeof(ret)))
+ return 0;
+
+ return ret;
+}
+
+/*
+ * Uprobes-specific fetch functions
+ */
+#define DEFINE_FETCH_stack(type) \
+static void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs, \
+ void *offset, void *dest) \
+{ \
+ *(type *)dest = (type)get_user_stack_nth(regs, \
+ ((unsigned long)offset)); \
+}
+DEFINE_BASIC_FETCH_FUNCS(stack)
+/* No string on the stack entry */
+#define fetch_stack_string NULL
+#define fetch_stack_string_size NULL
+
+#define DEFINE_FETCH_memory(type) \
+static void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs, \
+ void *addr, void *dest) \
+{ \
+ type retval; \
+ void __user *vaddr = (void __force __user *) addr; \
+ \
+ if (copy_from_user(&retval, vaddr, sizeof(type))) \
+ *(type *)dest = 0; \
+ else \
+ *(type *) dest = retval; \
+}
+DEFINE_BASIC_FETCH_FUNCS(memory)
+/*
+ * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
+ * length and relative data location.
+ */
+static void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
+ void *addr, void *dest)
+{
+ long ret;
+ u32 rloc = *(u32 *)dest;
+ int maxlen = get_rloc_len(rloc);
+ u8 *dst = get_rloc_data(dest);
+ void __user *src = (void __force __user *) addr;
+
+ if (!maxlen)
+ return;
+
+ ret = strncpy_from_user(dst, src, maxlen);
+
+ if (ret < 0) { /* Failed to fetch string */
+ ((u8 *)get_rloc_data(dest))[0] = '\0';
+ *(u32 *)dest = make_data_rloc(0, get_rloc_offs(rloc));
+ } else {
+ *(u32 *)dest = make_data_rloc(ret, get_rloc_offs(rloc));
+ }
+}
+
+static void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
+ void *addr, void *dest)
+{
+ int len;
+ void __user *vaddr = (void __force __user *) addr;
+
+ len = strnlen_user(vaddr, MAX_STRING_SIZE);
+
+ if (len == 0 || len > MAX_STRING_SIZE) /* Failed to check length */
+ *(u32 *)dest = 0;
+ else
+ *(u32 *)dest = len;
+}
+
+static unsigned long translate_user_vaddr(void *file_offset)
+{
+ unsigned long base_addr;
+ struct uprobe_dispatch_data *udd;
+
+ udd = (void *) current->utask->vaddr;
+
+ base_addr = udd->bp_addr - udd->tu->offset;
+ return base_addr + (unsigned long)file_offset;
+}
+
+#define DEFINE_FETCH_file_offset(type) \
+static void FETCH_FUNC_NAME(file_offset, type)(struct pt_regs *regs, \
+ void *offset, void *dest)\
+{ \
+ void *vaddr = (void *)translate_user_vaddr(offset); \
+ \
+ FETCH_FUNC_NAME(memory, type)(regs, vaddr, dest); \
+}
+DEFINE_BASIC_FETCH_FUNCS(file_offset)
+DEFINE_FETCH_file_offset(string)
+DEFINE_FETCH_file_offset(string_size)
+
+/* Fetch type information table */
+const struct fetch_type uprobes_fetch_type_table[] = {
+ /* Special types */
+ [FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string,
+ sizeof(u32), 1, "__data_loc char[]"),
+ [FETCH_TYPE_STRSIZE] = __ASSIGN_FETCH_TYPE("string_size", u32,
+ string_size, sizeof(u32), 0, "u32"),
+ /* Basic types */
+ ASSIGN_FETCH_TYPE(u8, u8, 0),
+ ASSIGN_FETCH_TYPE(u16, u16, 0),
+ ASSIGN_FETCH_TYPE(u32, u32, 0),
+ ASSIGN_FETCH_TYPE(u64, u64, 0),
+ ASSIGN_FETCH_TYPE(s8, u8, 1),
+ ASSIGN_FETCH_TYPE(s16, u16, 1),
+ ASSIGN_FETCH_TYPE(s32, u32, 1),
+ ASSIGN_FETCH_TYPE(s64, u64, 1),
+
+ ASSIGN_FETCH_TYPE_END
+};
+
static inline void init_trace_uprobe_filter(struct trace_uprobe_filter *filter)
{
rwlock_init(&filter->rwlock);
@@ -114,24 +250,26 @@ alloc_trace_uprobe(const char *group, const char *event, int nargs, bool is_ret)
if (!tu)
return ERR_PTR(-ENOMEM);
- tu->call.class = &tu->class;
- tu->call.name = kstrdup(event, GFP_KERNEL);
- if (!tu->call.name)
+ tu->tp.call.class = &tu->tp.class;
+ tu->tp.call.name = kstrdup(event, GFP_KERNEL);
+ if (!tu->tp.call.name)
goto error;
- tu->class.system = kstrdup(group, GFP_KERNEL);
- if (!tu->class.system)
+ tu->tp.class.system = kstrdup(group, GFP_KERNEL);
+ if (!tu->tp.class.system)
goto error;
INIT_LIST_HEAD(&tu->list);
+ INIT_LIST_HEAD(&tu->tp.files);
tu->consumer.handler = uprobe_dispatcher;
if (is_ret)
tu->consumer.ret_handler = uretprobe_dispatcher;
init_trace_uprobe_filter(&tu->filter);
+ tu->tp.call.flags |= TRACE_EVENT_FL_USE_CALL_FILTER;
return tu;
error:
- kfree(tu->call.name);
+ kfree(tu->tp.call.name);
kfree(tu);
return ERR_PTR(-ENOMEM);
@@ -141,12 +279,12 @@ static void free_trace_uprobe(struct trace_uprobe *tu)
{
int i;
- for (i = 0; i < tu->nr_args; i++)
- traceprobe_free_probe_arg(&tu->args[i]);
+ for (i = 0; i < tu->tp.nr_args; i++)
+ traceprobe_free_probe_arg(&tu->tp.args[i]);
iput(tu->inode);
- kfree(tu->call.class->system);
- kfree(tu->call.name);
+ kfree(tu->tp.call.class->system);
+ kfree(tu->tp.call.name);
kfree(tu->filename);
kfree(tu);
}
@@ -156,8 +294,8 @@ static struct trace_uprobe *find_probe_event(const char *event, const char *grou
struct trace_uprobe *tu;
list_for_each_entry(tu, &uprobe_list, list)
- if (strcmp(tu->call.name, event) == 0 &&
- strcmp(tu->call.class->system, group) == 0)
+ if (strcmp(ftrace_event_name(&tu->tp.call), event) == 0 &&
+ strcmp(tu->tp.call.class->system, group) == 0)
return tu;
return NULL;
@@ -180,16 +318,17 @@ static int unregister_trace_uprobe(struct trace_uprobe *tu)
/* Register a trace_uprobe and probe_event */
static int register_trace_uprobe(struct trace_uprobe *tu)
{
- struct trace_uprobe *old_tp;
+ struct trace_uprobe *old_tu;
int ret;
mutex_lock(&uprobe_lock);
/* register as an event */
- old_tp = find_probe_event(tu->call.name, tu->call.class->system);
- if (old_tp) {
+ old_tu = find_probe_event(ftrace_event_name(&tu->tp.call),
+ tu->tp.call.class->system);
+ if (old_tu) {
/* delete old event */
- ret = unregister_trace_uprobe(old_tp);
+ ret = unregister_trace_uprobe(old_tu);
if (ret)
goto end;
}
@@ -210,7 +349,7 @@ end:
/*
* Argument syntax:
- * - Add uprobe: p|r[:[GRP/]EVENT] PATH:SYMBOL [FETCHARGS]
+ * - Add uprobe: p|r[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS]
*
* - Remove uprobe: -:[GRP/]EVENT
*/
@@ -359,34 +498,36 @@ static int create_trace_uprobe(int argc, char **argv)
/* parse arguments */
ret = 0;
for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
+ struct probe_arg *parg = &tu->tp.args[i];
+
/* Increment count for freeing args in error case */
- tu->nr_args++;
+ tu->tp.nr_args++;
/* Parse argument name */
arg = strchr(argv[i], '=');
if (arg) {
*arg++ = '\0';
- tu->args[i].name = kstrdup(argv[i], GFP_KERNEL);
+ parg->name = kstrdup(argv[i], GFP_KERNEL);
} else {
arg = argv[i];
/* If argument name is omitted, set "argN" */
snprintf(buf, MAX_EVENT_NAME_LEN, "arg%d", i + 1);
- tu->args[i].name = kstrdup(buf, GFP_KERNEL);
+ parg->name = kstrdup(buf, GFP_KERNEL);
}
- if (!tu->args[i].name) {
+ if (!parg->name) {
pr_info("Failed to allocate argument[%d] name.\n", i);
ret = -ENOMEM;
goto error;
}
- if (!is_good_name(tu->args[i].name)) {
- pr_info("Invalid argument[%d] name: %s\n", i, tu->args[i].name);
+ if (!is_good_name(parg->name)) {
+ pr_info("Invalid argument[%d] name: %s\n", i, parg->name);
ret = -EINVAL;
goto error;
}
- if (traceprobe_conflict_field_name(tu->args[i].name, tu->args, i)) {
+ if (traceprobe_conflict_field_name(parg->name, tu->tp.args, i)) {
pr_info("Argument[%d] name '%s' conflicts with "
"another field.\n", i, argv[i]);
ret = -EINVAL;
@@ -394,7 +535,8 @@ static int create_trace_uprobe(int argc, char **argv)
}
/* Parse fetch argument */
- ret = traceprobe_parse_probe_arg(arg, &tu->size, &tu->args[i], false, false);
+ ret = traceprobe_parse_probe_arg(arg, &tu->tp.size, parg,
+ is_return, false);
if (ret) {
pr_info("Parse error at argument[%d]. (%d)\n", i, ret);
goto error;
@@ -458,11 +600,12 @@ static int probes_seq_show(struct seq_file *m, void *v)
char c = is_ret_probe(tu) ? 'r' : 'p';
int i;
- seq_printf(m, "%c:%s/%s", c, tu->call.class->system, tu->call.name);
+ seq_printf(m, "%c:%s/%s", c, tu->tp.call.class->system,
+ ftrace_event_name(&tu->tp.call));
seq_printf(m, " %s:0x%p", tu->filename, (void *)tu->offset);
- for (i = 0; i < tu->nr_args; i++)
- seq_printf(m, " %s=%s", tu->args[i].name, tu->args[i].comm);
+ for (i = 0; i < tu->tp.nr_args; i++)
+ seq_printf(m, " %s=%s", tu->tp.args[i].name, tu->tp.args[i].comm);
seq_printf(m, "\n");
return 0;
@@ -508,7 +651,8 @@ static int probes_profile_seq_show(struct seq_file *m, void *v)
{
struct trace_uprobe *tu = v;
- seq_printf(m, " %s %-44s %15lu\n", tu->filename, tu->call.name, tu->nhit);
+ seq_printf(m, " %s %-44s %15lu\n", tu->filename,
+ ftrace_event_name(&tu->tp.call), tu->nhit);
return 0;
}
@@ -532,19 +676,122 @@ static const struct file_operations uprobe_profile_ops = {
.release = seq_release,
};
-static void uprobe_trace_print(struct trace_uprobe *tu,
- unsigned long func, struct pt_regs *regs)
+struct uprobe_cpu_buffer {
+ struct mutex mutex;
+ void *buf;
+};
+static struct uprobe_cpu_buffer __percpu *uprobe_cpu_buffer;
+static int uprobe_buffer_refcnt;
+
+static int uprobe_buffer_init(void)
+{
+ int cpu, err_cpu;
+
+ uprobe_cpu_buffer = alloc_percpu(struct uprobe_cpu_buffer);
+ if (uprobe_cpu_buffer == NULL)
+ return -ENOMEM;
+
+ for_each_possible_cpu(cpu) {
+ struct page *p = alloc_pages_node(cpu_to_node(cpu),
+ GFP_KERNEL, 0);
+ if (p == NULL) {
+ err_cpu = cpu;
+ goto err;
+ }
+ per_cpu_ptr(uprobe_cpu_buffer, cpu)->buf = page_address(p);
+ mutex_init(&per_cpu_ptr(uprobe_cpu_buffer, cpu)->mutex);
+ }
+
+ return 0;
+
+err:
+ for_each_possible_cpu(cpu) {
+ if (cpu == err_cpu)
+ break;
+ free_page((unsigned long)per_cpu_ptr(uprobe_cpu_buffer, cpu)->buf);
+ }
+
+ free_percpu(uprobe_cpu_buffer);
+ return -ENOMEM;
+}
+
+static int uprobe_buffer_enable(void)
+{
+ int ret = 0;
+
+ BUG_ON(!mutex_is_locked(&event_mutex));
+
+ if (uprobe_buffer_refcnt++ == 0) {
+ ret = uprobe_buffer_init();
+ if (ret < 0)
+ uprobe_buffer_refcnt--;
+ }
+
+ return ret;
+}
+
+static void uprobe_buffer_disable(void)
+{
+ int cpu;
+
+ BUG_ON(!mutex_is_locked(&event_mutex));
+
+ if (--uprobe_buffer_refcnt == 0) {
+ for_each_possible_cpu(cpu)
+ free_page((unsigned long)per_cpu_ptr(uprobe_cpu_buffer,
+ cpu)->buf);
+
+ free_percpu(uprobe_cpu_buffer);
+ uprobe_cpu_buffer = NULL;
+ }
+}
+
+static struct uprobe_cpu_buffer *uprobe_buffer_get(void)
+{
+ struct uprobe_cpu_buffer *ucb;
+ int cpu;
+
+ cpu = raw_smp_processor_id();
+ ucb = per_cpu_ptr(uprobe_cpu_buffer, cpu);
+
+ /*
+ * Use per-cpu buffers for fastest access, but we might migrate
+ * so the mutex makes sure we have sole access to it.
+ */
+ mutex_lock(&ucb->mutex);
+
+ return ucb;
+}
+
+static void uprobe_buffer_put(struct uprobe_cpu_buffer *ucb)
+{
+ mutex_unlock(&ucb->mutex);
+}
+
+static void __uprobe_trace_func(struct trace_uprobe *tu,
+ unsigned long func, struct pt_regs *regs,
+ struct uprobe_cpu_buffer *ucb, int dsize,
+ struct ftrace_event_file *ftrace_file)
{
struct uprobe_trace_entry_head *entry;
struct ring_buffer_event *event;
struct ring_buffer *buffer;
void *data;
- int size, i;
- struct ftrace_event_call *call = &tu->call;
+ int size, esize;
+ struct ftrace_event_call *call = &tu->tp.call;
+
+ WARN_ON(call != ftrace_file->event_call);
+
+ if (WARN_ON_ONCE(tu->tp.size + dsize > PAGE_SIZE))
+ return;
- size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
- event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
- size + tu->size, 0, 0);
+ if (ftrace_trigger_soft_disabled(ftrace_file))
+ return;
+
+ esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
+ size = esize + tu->tp.size + dsize;
+ event = trace_event_buffer_lock_reserve(&buffer, ftrace_file,
+ call->event.type, size, 0, 0);
if (!event)
return;
@@ -558,25 +805,38 @@ static void uprobe_trace_print(struct trace_uprobe *tu,
data = DATAOF_TRACE_ENTRY(entry, false);
}
- for (i = 0; i < tu->nr_args; i++)
- call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset);
+ memcpy(data, ucb->buf, tu->tp.size + dsize);
- if (!filter_current_check_discard(buffer, call, entry, event))
- trace_buffer_unlock_commit(buffer, event, 0, 0);
+ event_trigger_unlock_commit(ftrace_file, buffer, event, entry, 0, 0);
}
/* uprobe handler */
-static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
+static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs,
+ struct uprobe_cpu_buffer *ucb, int dsize)
{
- if (!is_ret_probe(tu))
- uprobe_trace_print(tu, 0, regs);
+ struct event_file_link *link;
+
+ if (is_ret_probe(tu))
+ return 0;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(link, &tu->tp.files, list)
+ __uprobe_trace_func(tu, 0, regs, ucb, dsize, link->file);
+ rcu_read_unlock();
+
return 0;
}
static void uretprobe_trace_func(struct trace_uprobe *tu, unsigned long func,
- struct pt_regs *regs)
+ struct pt_regs *regs,
+ struct uprobe_cpu_buffer *ucb, int dsize)
{
- uprobe_trace_print(tu, func, regs);
+ struct event_file_link *link;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(link, &tu->tp.files, list)
+ __uprobe_trace_func(tu, func, regs, ucb, dsize, link->file);
+ rcu_read_unlock();
}
/* Event entry printers */
@@ -590,23 +850,26 @@ print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *e
int i;
entry = (struct uprobe_trace_entry_head *)iter->ent;
- tu = container_of(event, struct trace_uprobe, call.event);
+ tu = container_of(event, struct trace_uprobe, tp.call.event);
if (is_ret_probe(tu)) {
- if (!trace_seq_printf(s, "%s: (0x%lx <- 0x%lx)", tu->call.name,
+ if (!trace_seq_printf(s, "%s: (0x%lx <- 0x%lx)",
+ ftrace_event_name(&tu->tp.call),
entry->vaddr[1], entry->vaddr[0]))
goto partial;
data = DATAOF_TRACE_ENTRY(entry, true);
} else {
- if (!trace_seq_printf(s, "%s: (0x%lx)", tu->call.name,
+ if (!trace_seq_printf(s, "%s: (0x%lx)",
+ ftrace_event_name(&tu->tp.call),
entry->vaddr[0]))
goto partial;
data = DATAOF_TRACE_ENTRY(entry, false);
}
- for (i = 0; i < tu->nr_args; i++) {
- if (!tu->args[i].type->print(s, tu->args[i].name,
- data + tu->args[i].offset, entry))
+ for (i = 0; i < tu->tp.nr_args; i++) {
+ struct probe_arg *parg = &tu->tp.args[i];
+
+ if (!parg->type->print(s, parg->name, data + parg->offset, entry))
goto partial;
}
@@ -617,43 +880,95 @@ partial:
return TRACE_TYPE_PARTIAL_LINE;
}
-static inline bool is_trace_uprobe_enabled(struct trace_uprobe *tu)
-{
- return tu->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE);
-}
-
typedef bool (*filter_func_t)(struct uprobe_consumer *self,
enum uprobe_filter_ctx ctx,
struct mm_struct *mm);
static int
-probe_event_enable(struct trace_uprobe *tu, int flag, filter_func_t filter)
+probe_event_enable(struct trace_uprobe *tu, struct ftrace_event_file *file,
+ filter_func_t filter)
{
- int ret = 0;
+ bool enabled = trace_probe_is_enabled(&tu->tp);
+ struct event_file_link *link = NULL;
+ int ret;
+
+ if (file) {
+ if (tu->tp.flags & TP_FLAG_PROFILE)
+ return -EINTR;
+
+ link = kmalloc(sizeof(*link), GFP_KERNEL);
+ if (!link)
+ return -ENOMEM;
+
+ link->file = file;
+ list_add_tail_rcu(&link->list, &tu->tp.files);
- if (is_trace_uprobe_enabled(tu))
- return -EINTR;
+ tu->tp.flags |= TP_FLAG_TRACE;
+ } else {
+ if (tu->tp.flags & TP_FLAG_TRACE)
+ return -EINTR;
+
+ tu->tp.flags |= TP_FLAG_PROFILE;
+ }
WARN_ON(!uprobe_filter_is_empty(&tu->filter));
- tu->flags |= flag;
+ if (enabled)
+ return 0;
+
+ ret = uprobe_buffer_enable();
+ if (ret)
+ goto err_flags;
+
tu->consumer.filter = filter;
ret = uprobe_register(tu->inode, tu->offset, &tu->consumer);
if (ret)
- tu->flags &= ~flag;
+ goto err_buffer;
+
+ return 0;
+ err_buffer:
+ uprobe_buffer_disable();
+
+ err_flags:
+ if (file) {
+ list_del(&link->list);
+ kfree(link);
+ tu->tp.flags &= ~TP_FLAG_TRACE;
+ } else {
+ tu->tp.flags &= ~TP_FLAG_PROFILE;
+ }
return ret;
}
-static void probe_event_disable(struct trace_uprobe *tu, int flag)
+static void
+probe_event_disable(struct trace_uprobe *tu, struct ftrace_event_file *file)
{
- if (!is_trace_uprobe_enabled(tu))
+ if (!trace_probe_is_enabled(&tu->tp))
return;
+ if (file) {
+ struct event_file_link *link;
+
+ link = find_event_file_link(&tu->tp, file);
+ if (!link)
+ return;
+
+ list_del_rcu(&link->list);
+ /* synchronize with u{,ret}probe_trace_func */
+ synchronize_sched();
+ kfree(link);
+
+ if (!list_empty(&tu->tp.files))
+ return;
+ }
+
WARN_ON(!uprobe_filter_is_empty(&tu->filter));
uprobe_unregister(tu->inode, tu->offset, &tu->consumer);
- tu->flags &= ~flag;
+ tu->tp.flags &= file ? ~TP_FLAG_TRACE : ~TP_FLAG_PROFILE;
+
+ uprobe_buffer_disable();
}
static int uprobe_event_define_fields(struct ftrace_event_call *event_call)
@@ -671,12 +986,12 @@ static int uprobe_event_define_fields(struct ftrace_event_call *event_call)
size = SIZEOF_TRACE_ENTRY(false);
}
/* Set argument names as fields */
- for (i = 0; i < tu->nr_args; i++) {
- ret = trace_define_field(event_call, tu->args[i].type->fmttype,
- tu->args[i].name,
- size + tu->args[i].offset,
- tu->args[i].type->size,
- tu->args[i].type->is_signed,
+ for (i = 0; i < tu->tp.nr_args; i++) {
+ struct probe_arg *parg = &tu->tp.args[i];
+
+ ret = trace_define_field(event_call, parg->type->fmttype,
+ parg->name, size + parg->offset,
+ parg->type->size, parg->type->is_signed,
FILTER_OTHER);
if (ret)
@@ -685,59 +1000,6 @@ static int uprobe_event_define_fields(struct ftrace_event_call *event_call)
return 0;
}
-#define LEN_OR_ZERO (len ? len - pos : 0)
-static int __set_print_fmt(struct trace_uprobe *tu, char *buf, int len)
-{
- const char *fmt, *arg;
- int i;
- int pos = 0;
-
- if (is_ret_probe(tu)) {
- fmt = "(%lx <- %lx)";
- arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP;
- } else {
- fmt = "(%lx)";
- arg = "REC->" FIELD_STRING_IP;
- }
-
- /* When len=0, we just calculate the needed length */
-
- pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt);
-
- for (i = 0; i < tu->nr_args; i++) {
- pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%s",
- tu->args[i].name, tu->args[i].type->fmt);
- }
-
- pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg);
-
- for (i = 0; i < tu->nr_args; i++) {
- pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s",
- tu->args[i].name);
- }
-
- return pos; /* return the length of print_fmt */
-}
-#undef LEN_OR_ZERO
-
-static int set_print_fmt(struct trace_uprobe *tu)
-{
- char *print_fmt;
- int len;
-
- /* First: called with 0 length to calculate the needed length */
- len = __set_print_fmt(tu, NULL, 0);
- print_fmt = kmalloc(len + 1, GFP_KERNEL);
- if (!print_fmt)
- return -ENOMEM;
-
- /* Second: actually write the @print_fmt */
- __set_print_fmt(tu, print_fmt, len + 1);
- tu->call.print_fmt = print_fmt;
-
- return 0;
-}
-
#ifdef CONFIG_PERF_EVENTS
static bool
__uprobe_perf_filter(struct trace_uprobe_filter *filter, struct mm_struct *mm)
@@ -761,56 +1023,60 @@ uprobe_filter_event(struct trace_uprobe *tu, struct perf_event *event)
return __uprobe_perf_filter(&tu->filter, event->hw.tp_target->mm);
}
-static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event)
+static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event)
{
bool done;
write_lock(&tu->filter.rwlock);
if (event->hw.tp_target) {
- /*
- * event->parent != NULL means copy_process(), we can avoid
- * uprobe_apply(). current->mm must be probed and we can rely
- * on dup_mmap() which preserves the already installed bp's.
- *
- * attr.enable_on_exec means that exec/mmap will install the
- * breakpoints we need.
- */
+ list_del(&event->hw.tp_list);
done = tu->filter.nr_systemwide ||
- event->parent || event->attr.enable_on_exec ||
+ (event->hw.tp_target->flags & PF_EXITING) ||
uprobe_filter_event(tu, event);
- list_add(&event->hw.tp_list, &tu->filter.perf_events);
} else {
+ tu->filter.nr_systemwide--;
done = tu->filter.nr_systemwide;
- tu->filter.nr_systemwide++;
}
write_unlock(&tu->filter.rwlock);
if (!done)
- uprobe_apply(tu->inode, tu->offset, &tu->consumer, true);
+ return uprobe_apply(tu->inode, tu->offset, &tu->consumer, false);
return 0;
}
-static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event)
+static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event)
{
bool done;
+ int err;
write_lock(&tu->filter.rwlock);
if (event->hw.tp_target) {
- list_del(&event->hw.tp_list);
+ /*
+ * event->parent != NULL means copy_process(), we can avoid
+ * uprobe_apply(). current->mm must be probed and we can rely
+ * on dup_mmap() which preserves the already installed bp's.
+ *
+ * attr.enable_on_exec means that exec/mmap will install the
+ * breakpoints we need.
+ */
done = tu->filter.nr_systemwide ||
- (event->hw.tp_target->flags & PF_EXITING) ||
+ event->parent || event->attr.enable_on_exec ||
uprobe_filter_event(tu, event);
+ list_add(&event->hw.tp_list, &tu->filter.perf_events);
} else {
- tu->filter.nr_systemwide--;
done = tu->filter.nr_systemwide;
+ tu->filter.nr_systemwide++;
}
write_unlock(&tu->filter.rwlock);
- if (!done)
- uprobe_apply(tu->inode, tu->offset, &tu->consumer, false);
-
- return 0;
+ err = 0;
+ if (!done) {
+ err = uprobe_apply(tu->inode, tu->offset, &tu->consumer, true);
+ if (err)
+ uprobe_perf_close(tu, event);
+ }
+ return err;
}
static bool uprobe_perf_filter(struct uprobe_consumer *uc,
@@ -827,17 +1093,23 @@ static bool uprobe_perf_filter(struct uprobe_consumer *uc,
return ret;
}
-static void uprobe_perf_print(struct trace_uprobe *tu,
- unsigned long func, struct pt_regs *regs)
+static void __uprobe_perf_func(struct trace_uprobe *tu,
+ unsigned long func, struct pt_regs *regs,
+ struct uprobe_cpu_buffer *ucb, int dsize)
{
- struct ftrace_event_call *call = &tu->call;
+ struct ftrace_event_call *call = &tu->tp.call;
struct uprobe_trace_entry_head *entry;
struct hlist_head *head;
void *data;
- int size, rctx, i;
+ int size, esize;
+ int rctx;
- size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
- size = ALIGN(size + tu->size + sizeof(u32), sizeof(u64)) - sizeof(u32);
+ esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
+
+ size = esize + tu->tp.size + dsize;
+ size = ALIGN(size + sizeof(u32), sizeof(u64)) - sizeof(u32);
+ if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough"))
+ return;
preempt_disable();
head = this_cpu_ptr(call->perf_events);
@@ -857,8 +1129,13 @@ static void uprobe_perf_print(struct trace_uprobe *tu,
data = DATAOF_TRACE_ENTRY(entry, false);
}
- for (i = 0; i < tu->nr_args; i++)
- call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset);
+ memcpy(data, ucb->buf, tu->tp.size + dsize);
+
+ if (size - esize > tu->tp.size + dsize) {
+ int len = tu->tp.size + dsize;
+
+ memset(data + len, 0, size - esize - len);
+ }
perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL);
out:
@@ -866,42 +1143,46 @@ static void uprobe_perf_print(struct trace_uprobe *tu,
}
/* uprobe profile handler */
-static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
+static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs,
+ struct uprobe_cpu_buffer *ucb, int dsize)
{
if (!uprobe_perf_filter(&tu->consumer, 0, current->mm))
return UPROBE_HANDLER_REMOVE;
if (!is_ret_probe(tu))
- uprobe_perf_print(tu, 0, regs);
+ __uprobe_perf_func(tu, 0, regs, ucb, dsize);
return 0;
}
static void uretprobe_perf_func(struct trace_uprobe *tu, unsigned long func,
- struct pt_regs *regs)
+ struct pt_regs *regs,
+ struct uprobe_cpu_buffer *ucb, int dsize)
{
- uprobe_perf_print(tu, func, regs);
+ __uprobe_perf_func(tu, func, regs, ucb, dsize);
}
#endif /* CONFIG_PERF_EVENTS */
-static
-int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type, void *data)
+static int
+trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type,
+ void *data)
{
struct trace_uprobe *tu = event->data;
+ struct ftrace_event_file *file = data;
switch (type) {
case TRACE_REG_REGISTER:
- return probe_event_enable(tu, TP_FLAG_TRACE, NULL);
+ return probe_event_enable(tu, file, NULL);
case TRACE_REG_UNREGISTER:
- probe_event_disable(tu, TP_FLAG_TRACE);
+ probe_event_disable(tu, file);
return 0;
#ifdef CONFIG_PERF_EVENTS
case TRACE_REG_PERF_REGISTER:
- return probe_event_enable(tu, TP_FLAG_PROFILE, uprobe_perf_filter);
+ return probe_event_enable(tu, NULL, uprobe_perf_filter);
case TRACE_REG_PERF_UNREGISTER:
- probe_event_disable(tu, TP_FLAG_PROFILE);
+ probe_event_disable(tu, NULL);
return 0;
case TRACE_REG_PERF_OPEN:
@@ -920,18 +1201,37 @@ int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type,
static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
{
struct trace_uprobe *tu;
+ struct uprobe_dispatch_data udd;
+ struct uprobe_cpu_buffer *ucb;
+ int dsize, esize;
int ret = 0;
+
tu = container_of(con, struct trace_uprobe, consumer);
tu->nhit++;
- if (tu->flags & TP_FLAG_TRACE)
- ret |= uprobe_trace_func(tu, regs);
+ udd.tu = tu;
+ udd.bp_addr = instruction_pointer(regs);
+
+ current->utask->vaddr = (unsigned long) &udd;
+
+ if (WARN_ON_ONCE(!uprobe_cpu_buffer))
+ return 0;
+
+ dsize = __get_data_size(&tu->tp, regs);
+ esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
+
+ ucb = uprobe_buffer_get();
+ store_trace_args(esize, &tu->tp, regs, ucb->buf, dsize);
+
+ if (tu->tp.flags & TP_FLAG_TRACE)
+ ret |= uprobe_trace_func(tu, regs, ucb, dsize);
#ifdef CONFIG_PERF_EVENTS
- if (tu->flags & TP_FLAG_PROFILE)
- ret |= uprobe_perf_func(tu, regs);
+ if (tu->tp.flags & TP_FLAG_PROFILE)
+ ret |= uprobe_perf_func(tu, regs, ucb, dsize);
#endif
+ uprobe_buffer_put(ucb);
return ret;
}
@@ -939,16 +1239,34 @@ static int uretprobe_dispatcher(struct uprobe_consumer *con,
unsigned long func, struct pt_regs *regs)
{
struct trace_uprobe *tu;
+ struct uprobe_dispatch_data udd;
+ struct uprobe_cpu_buffer *ucb;
+ int dsize, esize;
tu = container_of(con, struct trace_uprobe, consumer);
- if (tu->flags & TP_FLAG_TRACE)
- uretprobe_trace_func(tu, func, regs);
+ udd.tu = tu;
+ udd.bp_addr = func;
+
+ current->utask->vaddr = (unsigned long) &udd;
+
+ if (WARN_ON_ONCE(!uprobe_cpu_buffer))
+ return 0;
+
+ dsize = __get_data_size(&tu->tp, regs);
+ esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
+
+ ucb = uprobe_buffer_get();
+ store_trace_args(esize, &tu->tp, regs, ucb->buf, dsize);
+
+ if (tu->tp.flags & TP_FLAG_TRACE)
+ uretprobe_trace_func(tu, func, regs, ucb, dsize);
#ifdef CONFIG_PERF_EVENTS
- if (tu->flags & TP_FLAG_PROFILE)
- uretprobe_perf_func(tu, func, regs);
+ if (tu->tp.flags & TP_FLAG_PROFILE)
+ uretprobe_perf_func(tu, func, regs, ucb, dsize);
#endif
+ uprobe_buffer_put(ucb);
return 0;
}
@@ -958,7 +1276,7 @@ static struct trace_event_functions uprobe_funcs = {
static int register_uprobe_event(struct trace_uprobe *tu)
{
- struct ftrace_event_call *call = &tu->call;
+ struct ftrace_event_call *call = &tu->tp.call;
int ret;
/* Initialize ftrace_event_call */
@@ -966,7 +1284,7 @@ static int register_uprobe_event(struct trace_uprobe *tu)
call->event.funcs = &uprobe_funcs;
call->class->define_fields = uprobe_event_define_fields;
- if (set_print_fmt(tu) < 0)
+ if (set_print_fmt(&tu->tp, is_ret_probe(tu)) < 0)
return -ENOMEM;
ret = register_ftrace_event(&call->event);
@@ -980,7 +1298,8 @@ static int register_uprobe_event(struct trace_uprobe *tu)
ret = trace_add_event_call(call);
if (ret) {
- pr_info("Failed to register uprobe event: %s\n", call->name);
+ pr_info("Failed to register uprobe event: %s\n",
+ ftrace_event_name(call));
kfree(call->print_fmt);
unregister_ftrace_event(&call->event);
}
@@ -993,11 +1312,11 @@ static int unregister_uprobe_event(struct trace_uprobe *tu)
int ret;
/* tu->event is unregistered in trace_remove_event_call() */
- ret = trace_remove_event_call(&tu->call);
+ ret = trace_remove_event_call(&tu->tp.call);
if (ret)
return ret;
- kfree(tu->call.print_fmt);
- tu->call.print_fmt = NULL;
+ kfree(tu->tp.call.print_fmt);
+ tu->tp.call.print_fmt = NULL;
return 0;
}