diff options
Diffstat (limited to 'kernel/trace')
33 files changed, 5578 insertions, 1994 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 015f85aaca0..d4409356f40 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -424,6 +424,7 @@ config UPROBE_EVENT bool "Enable uprobes-based dynamic events" depends on ARCH_SUPPORTS_UPROBES depends on MMU + depends on PERF_EVENTS select UPROBES select PROBE_EVENTS select TRACING @@ -534,6 +535,36 @@ config MMIOTRACE_TEST Say N, unless you absolutely know what you are doing. +config TRACEPOINT_BENCHMARK + bool "Add tracepoint that benchmarks tracepoints" + help + This option creates the tracepoint "benchmark:benchmark_event". + When the tracepoint is enabled, it kicks off a kernel thread that + goes into an infinite loop (calling cond_sched() to let other tasks + run), and calls the tracepoint. Each iteration will record the time + it took to write to the tracepoint and the next iteration that + data will be passed to the tracepoint itself. That is, the tracepoint + will report the time it took to do the previous tracepoint. + The string written to the tracepoint is a static string of 128 bytes + to keep the time the same. The initial string is simply a write of + "START". The second string records the cold cache time of the first + write which is not added to the rest of the calculations. + + As it is a tight loop, it benchmarks as hot cache. That's fine because + we care most about hot paths that are probably in cache already. + + An example of the output: + + START + first=3672 [COLD CACHED] + last=632 first=3672 max=632 min=632 avg=316 std=446 std^2=199712 + last=278 first=3672 max=632 min=278 avg=303 std=316 std^2=100337 + last=277 first=3672 max=632 min=277 avg=296 std=258 std^2=67064 + last=273 first=3672 max=632 min=273 avg=292 std=224 std^2=50411 + last=273 first=3672 max=632 min=273 avg=288 std=200 std^2=40389 + last=281 first=3672 max=632 min=273 avg=287 std=183 std^2=33666 + + config RING_BUFFER_BENCHMARK tristate "Ring buffer benchmark stress tester" depends on RING_BUFFER diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index d7e2068e4b7..2611613f14f 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -17,6 +17,7 @@ ifdef CONFIG_TRACING_BRANCHES KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING endif +CFLAGS_trace_benchmark.o := -I$(src) CFLAGS_trace_events_filter.o := -I$(src) obj-$(CONFIG_TRACE_CLOCK) += trace_clock.o @@ -50,6 +51,7 @@ ifeq ($(CONFIG_PERF_EVENTS),y) obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o endif obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o +obj-$(CONFIG_EVENT_TRACING) += trace_events_trigger.o obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o obj-$(CONFIG_TRACEPOINTS) += power-traces.o ifeq ($(CONFIG_PM_RUNTIME),y) @@ -61,4 +63,6 @@ endif obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o obj-$(CONFIG_UPROBE_EVENT) += trace_uprobe.o +obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o + libftrace-y := ftrace.o diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index b8b8560bfb9..c1bd4ada2a0 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -26,6 +26,7 @@ #include <linux/export.h> #include <linux/time.h> #include <linux/uaccess.h> +#include <linux/list.h> #include <trace/events/block.h> @@ -38,6 +39,9 @@ static unsigned int blktrace_seq __read_mostly = 1; static struct trace_array *blk_tr; static bool blk_tracer_enabled __read_mostly; +static LIST_HEAD(running_trace_list); +static __cacheline_aligned_in_smp DEFINE_SPINLOCK(running_trace_lock); + /* Select an alternative, minimalistic output than the original one */ #define TRACE_BLK_OPT_CLASSIC 0x1 @@ -107,10 +111,18 @@ record_it: * Send out a notify for this process, if we haven't done so since a trace * started */ -static void trace_note_tsk(struct blk_trace *bt, struct task_struct *tsk) +static void trace_note_tsk(struct task_struct *tsk) { + unsigned long flags; + struct blk_trace *bt; + tsk->btrace_seq = blktrace_seq; - trace_note(bt, tsk->pid, BLK_TN_PROCESS, tsk->comm, sizeof(tsk->comm)); + spin_lock_irqsave(&running_trace_lock, flags); + list_for_each_entry(bt, &running_trace_list, running_list) { + trace_note(bt, tsk->pid, BLK_TN_PROCESS, tsk->comm, + sizeof(tsk->comm)); + } + spin_unlock_irqrestore(&running_trace_lock, flags); } static void trace_note_time(struct blk_trace *bt) @@ -229,16 +241,15 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, goto record_it; } + if (unlikely(tsk->btrace_seq != blktrace_seq)) + trace_note_tsk(tsk); + /* * A word about the locking here - we disable interrupts to reserve * some space in the relay per-cpu buffer, to prevent an irq * from coming in and stepping on our toes. */ local_irq_save(flags); - - if (unlikely(tsk->btrace_seq != blktrace_seq)) - trace_note_tsk(bt, tsk); - t = relay_reserve(bt->rchan, sizeof(*t) + pdu_len); if (t) { sequence = per_cpu_ptr(bt->sequence, cpu); @@ -477,6 +488,7 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, bt->dir = dir; bt->dev = dev; atomic_set(&bt->dropped, 0); + INIT_LIST_HEAD(&bt->running_list); ret = -EIO; bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt, @@ -567,13 +579,12 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name, .end_lba = cbuts.end_lba, .pid = cbuts.pid, }; - memcpy(&buts.name, &cbuts.name, 32); ret = do_blk_trace_setup(q, name, dev, bdev, &buts); if (ret) return ret; - if (copy_to_user(arg, &buts.name, 32)) { + if (copy_to_user(arg, &buts.name, ARRAY_SIZE(buts.name))) { blk_trace_remove(q); return -EFAULT; } @@ -601,6 +612,9 @@ int blk_trace_startstop(struct request_queue *q, int start) blktrace_seq++; smp_mb(); bt->trace_state = Blktrace_running; + spin_lock_irq(&running_trace_lock); + list_add(&bt->running_list, &running_trace_list); + spin_unlock_irq(&running_trace_lock); trace_note_time(bt); ret = 0; @@ -608,6 +622,9 @@ int blk_trace_startstop(struct request_queue *q, int start) } else { if (bt->trace_state == Blktrace_running) { bt->trace_state = Blktrace_stopped; + spin_lock_irq(&running_trace_lock); + list_del_init(&bt->running_list); + spin_unlock_irq(&running_trace_lock); relay_flush(bt->rchan); ret = 0; } @@ -685,6 +702,7 @@ void blk_trace_shutdown(struct request_queue *q) * blk_add_trace_rq - Add a trace for a request oriented action * @q: queue the io is for * @rq: the source request + * @nr_bytes: number of completed bytes * @what: the action * * Description: @@ -692,7 +710,7 @@ void blk_trace_shutdown(struct request_queue *q) * **/ static void blk_add_trace_rq(struct request_queue *q, struct request *rq, - u32 what) + unsigned int nr_bytes, u32 what) { struct blk_trace *bt = q->blk_trace; @@ -701,11 +719,11 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq, if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { what |= BLK_TC_ACT(BLK_TC_PC); - __blk_add_trace(bt, 0, blk_rq_bytes(rq), rq->cmd_flags, + __blk_add_trace(bt, 0, nr_bytes, rq->cmd_flags, what, rq->errors, rq->cmd_len, rq->cmd); } else { what |= BLK_TC_ACT(BLK_TC_FS); - __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), + __blk_add_trace(bt, blk_rq_pos(rq), nr_bytes, rq->cmd_flags, what, rq->errors, 0, NULL); } } @@ -713,33 +731,34 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq, static void blk_add_trace_rq_abort(void *ignore, struct request_queue *q, struct request *rq) { - blk_add_trace_rq(q, rq, BLK_TA_ABORT); + blk_add_trace_rq(q, rq, blk_rq_bytes(rq), BLK_TA_ABORT); } static void blk_add_trace_rq_insert(void *ignore, struct request_queue *q, struct request *rq) { - blk_add_trace_rq(q, rq, BLK_TA_INSERT); + blk_add_trace_rq(q, rq, blk_rq_bytes(rq), BLK_TA_INSERT); } static void blk_add_trace_rq_issue(void *ignore, struct request_queue *q, struct request *rq) { - blk_add_trace_rq(q, rq, BLK_TA_ISSUE); + blk_add_trace_rq(q, rq, blk_rq_bytes(rq), BLK_TA_ISSUE); } static void blk_add_trace_rq_requeue(void *ignore, struct request_queue *q, struct request *rq) { - blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); + blk_add_trace_rq(q, rq, blk_rq_bytes(rq), BLK_TA_REQUEUE); } static void blk_add_trace_rq_complete(void *ignore, struct request_queue *q, - struct request *rq) + struct request *rq, + unsigned int nr_bytes) { - blk_add_trace_rq(q, rq, BLK_TA_COMPLETE); + blk_add_trace_rq(q, rq, nr_bytes, BLK_TA_COMPLETE); } /** @@ -764,8 +783,8 @@ static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, if (!error && !bio_flagged(bio, BIO_UPTODATE)) error = EIO; - __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, - error, 0, NULL); + __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, + bio->bi_rw, what, error, 0, NULL); } static void blk_add_trace_bio_bounce(void *ignore, @@ -868,8 +887,9 @@ static void blk_add_trace_split(void *ignore, if (bt) { __be64 rpdu = cpu_to_be64(pdu); - __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, - BLK_TA_SPLIT, !bio_flagged(bio, BIO_UPTODATE), + __blk_add_trace(bt, bio->bi_iter.bi_sector, + bio->bi_iter.bi_size, bio->bi_rw, BLK_TA_SPLIT, + !bio_flagged(bio, BIO_UPTODATE), sizeof(rpdu), &rpdu); } } @@ -901,9 +921,9 @@ static void blk_add_trace_bio_remap(void *ignore, r.device_to = cpu_to_be32(bio->bi_bdev->bd_dev); r.sector_from = cpu_to_be64(from); - __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, - BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE), - sizeof(r), &r); + __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, + bio->bi_rw, BLK_TA_REMAP, + !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r); } /** @@ -1409,7 +1429,8 @@ static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter) return print_one_line(iter, true); } -static int blk_tracer_set_flag(u32 old_flags, u32 bit, int set) +static int +blk_tracer_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set) { /* don't output context-info for blk_classic output */ if (bit == TRACE_BLK_OPT_CLASSIC) { @@ -1472,6 +1493,9 @@ static int blk_trace_remove_queue(struct request_queue *q) if (atomic_dec_and_test(&blk_probes_ref)) blk_unregister_tracepoints(); + spin_lock_irq(&running_trace_lock); + list_del(&bt->running_list); + spin_unlock_irq(&running_trace_lock); blk_trace_free(bt); return 0; } diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 03cf44ac54d..ac9d1dad630 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -62,7 +62,7 @@ #define FTRACE_HASH_DEFAULT_BITS 10 #define FTRACE_HASH_MAX_BITS 12 -#define FL_GLOBAL_CONTROL_MASK (FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_CONTROL) +#define FL_GLOBAL_CONTROL_MASK (FTRACE_OPS_FL_CONTROL) #ifdef CONFIG_DYNAMIC_FTRACE #define INIT_REGEX_LOCK(opsname) \ @@ -85,6 +85,8 @@ int function_trace_stop __read_mostly; /* Current function tracing op */ struct ftrace_ops *function_trace_op __read_mostly = &ftrace_list_end; +/* What to set function_trace_op to */ +static struct ftrace_ops *set_function_trace_op; /* List for set_ftrace_pid's pids. */ LIST_HEAD(ftrace_pids); @@ -101,7 +103,6 @@ static int ftrace_disabled __read_mostly; static DEFINE_MUTEX(ftrace_lock); -static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end; static struct ftrace_ops *ftrace_control_list __read_mostly = &ftrace_list_end; static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end; ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; @@ -169,23 +170,6 @@ int ftrace_nr_registered_ops(void) return cnt; } -static void -ftrace_global_list_func(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *regs) -{ - int bit; - - bit = trace_test_and_set_recursion(TRACE_GLOBAL_START, TRACE_GLOBAL_MAX); - if (bit < 0) - return; - - do_for_each_ftrace_op(op, ftrace_global_list) { - op->func(ip, parent_ip, op, regs); - } while_for_each_ftrace_op(op); - - trace_clear_recursion(bit); -} - static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct pt_regs *regs) { @@ -235,55 +219,33 @@ static int control_ops_alloc(struct ftrace_ops *ops) return 0; } -static void control_ops_free(struct ftrace_ops *ops) -{ - free_percpu(ops->disabled); -} - -static void update_global_ops(void) +static void ftrace_sync(struct work_struct *work) { - ftrace_func_t func; - /* - * If there's only one function registered, then call that - * function directly. Otherwise, we need to iterate over the - * registered callers. + * This function is just a stub to implement a hard force + * of synchronize_sched(). This requires synchronizing + * tasks even in userspace and idle. + * + * Yes, function tracing is rude. */ - if (ftrace_global_list == &ftrace_list_end || - ftrace_global_list->next == &ftrace_list_end) { - func = ftrace_global_list->func; - /* - * As we are calling the function directly. - * If it does not have recursion protection, - * the function_trace_op needs to be updated - * accordingly. - */ - if (ftrace_global_list->flags & FTRACE_OPS_FL_RECURSION_SAFE) - global_ops.flags |= FTRACE_OPS_FL_RECURSION_SAFE; - else - global_ops.flags &= ~FTRACE_OPS_FL_RECURSION_SAFE; - } else { - func = ftrace_global_list_func; - /* The list has its own recursion protection. */ - global_ops.flags |= FTRACE_OPS_FL_RECURSION_SAFE; - } - - - /* If we filter on pids, update to use the pid function */ - if (!list_empty(&ftrace_pids)) { - set_ftrace_pid_function(func); - func = ftrace_pid_func; - } +} - global_ops.func = func; +static void ftrace_sync_ipi(void *data) +{ + /* Probably not needed, but do it anyway */ + smp_rmb(); } +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +static void update_function_graph_func(void); +#else +static inline void update_function_graph_func(void) { } +#endif + static void update_ftrace_function(void) { ftrace_func_t func; - update_global_ops(); - /* * If we are at the end of the list and this ops is * recursion safe and not dynamic and the arch supports passing ops, @@ -295,20 +257,67 @@ static void update_ftrace_function(void) (ftrace_ops_list->flags & FTRACE_OPS_FL_RECURSION_SAFE) && !FTRACE_FORCE_LIST_FUNC)) { /* Set the ftrace_ops that the arch callback uses */ - if (ftrace_ops_list == &global_ops) - function_trace_op = ftrace_global_list; - else - function_trace_op = ftrace_ops_list; + set_function_trace_op = ftrace_ops_list; func = ftrace_ops_list->func; } else { /* Just use the default ftrace_ops */ - function_trace_op = &ftrace_list_end; + set_function_trace_op = &ftrace_list_end; func = ftrace_ops_list_func; } + update_function_graph_func(); + + /* If there's no change, then do nothing more here */ + if (ftrace_trace_function == func) + return; + + /* + * If we are using the list function, it doesn't care + * about the function_trace_ops. + */ + if (func == ftrace_ops_list_func) { + ftrace_trace_function = func; + /* + * Don't even bother setting function_trace_ops, + * it would be racy to do so anyway. + */ + return; + } + +#ifndef CONFIG_DYNAMIC_FTRACE + /* + * For static tracing, we need to be a bit more careful. + * The function change takes affect immediately. Thus, + * we need to coorditate the setting of the function_trace_ops + * with the setting of the ftrace_trace_function. + * + * Set the function to the list ops, which will call the + * function we want, albeit indirectly, but it handles the + * ftrace_ops and doesn't depend on function_trace_op. + */ + ftrace_trace_function = ftrace_ops_list_func; + /* + * Make sure all CPUs see this. Yes this is slow, but static + * tracing is slow and nasty to have enabled. + */ + schedule_on_each_cpu(ftrace_sync); + /* Now all cpus are using the list ops. */ + function_trace_op = set_function_trace_op; + /* Make sure the function_trace_op is visible on all CPUs */ + smp_wmb(); + /* Nasty way to force a rmb on all cpus */ + smp_call_function(ftrace_sync_ipi, NULL, 1); + /* OK, we are all set to update the ftrace_trace_function now! */ +#endif /* !CONFIG_DYNAMIC_FTRACE */ + ftrace_trace_function = func; } +int using_ftrace_ops_list_func(void) +{ + return ftrace_trace_function == ftrace_ops_list_func; +} + static void add_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops) { ops->next = *list; @@ -367,19 +376,12 @@ static int remove_ftrace_list_ops(struct ftrace_ops **list, static int __register_ftrace_function(struct ftrace_ops *ops) { - if (unlikely(ftrace_disabled)) - return -ENODEV; - - if (FTRACE_WARN_ON(ops == &global_ops)) + if (ops->flags & FTRACE_OPS_FL_DELETED) return -EINVAL; if (WARN_ON(ops->flags & FTRACE_OPS_FL_ENABLED)) return -EBUSY; - /* We don't support both control and global flags set. */ - if ((ops->flags & FL_GLOBAL_CONTROL_MASK) == FL_GLOBAL_CONTROL_MASK) - return -EINVAL; - #ifndef CONFIG_DYNAMIC_FTRACE_WITH_REGS /* * If the ftrace_ops specifies SAVE_REGS, then it only can be used @@ -397,10 +399,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops) if (!core_kernel_data((unsigned long)ops)) ops->flags |= FTRACE_OPS_FL_DYNAMIC; - if (ops->flags & FTRACE_OPS_FL_GLOBAL) { - add_ftrace_list_ops(&ftrace_global_list, &global_ops, ops); - ops->flags |= FTRACE_OPS_FL_ENABLED; - } else if (ops->flags & FTRACE_OPS_FL_CONTROL) { + if (ops->flags & FTRACE_OPS_FL_CONTROL) { if (control_ops_alloc(ops)) return -ENOMEM; add_ftrace_list_ops(&ftrace_control_list, &control_ops, ops); @@ -413,52 +412,16 @@ static int __register_ftrace_function(struct ftrace_ops *ops) return 0; } -static void ftrace_sync(struct work_struct *work) -{ - /* - * This function is just a stub to implement a hard force - * of synchronize_sched(). This requires synchronizing - * tasks even in userspace and idle. - * - * Yes, function tracing is rude. - */ -} - static int __unregister_ftrace_function(struct ftrace_ops *ops) { int ret; - if (ftrace_disabled) - return -ENODEV; - if (WARN_ON(!(ops->flags & FTRACE_OPS_FL_ENABLED))) return -EBUSY; - if (FTRACE_WARN_ON(ops == &global_ops)) - return -EINVAL; - - if (ops->flags & FTRACE_OPS_FL_GLOBAL) { - ret = remove_ftrace_list_ops(&ftrace_global_list, - &global_ops, ops); - if (!ret) - ops->flags &= ~FTRACE_OPS_FL_ENABLED; - } else if (ops->flags & FTRACE_OPS_FL_CONTROL) { + if (ops->flags & FTRACE_OPS_FL_CONTROL) { ret = remove_ftrace_list_ops(&ftrace_control_list, &control_ops, ops); - if (!ret) { - /* - * The ftrace_ops is now removed from the list, - * so there'll be no new users. We must ensure - * all current users are done before we free - * the control data. - * Note synchronize_sched() is not enough, as we - * use preempt_disable() to do RCU, but the function - * tracer can be called where RCU is not active - * (before user_exit()). - */ - schedule_on_each_cpu(ftrace_sync); - control_ops_free(ops); - } } else ret = remove_ftrace_ops(&ftrace_ops_list, ops); @@ -468,17 +431,6 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops) if (ftrace_enabled) update_ftrace_function(); - /* - * Dynamic ops may be freed, we must make sure that all - * callers are done before leaving this function. - * - * Again, normal synchronize_sched() is not good enough. - * We need to do a hard force of sched synchronization. - */ - if (ops->flags & FTRACE_OPS_FL_DYNAMIC) - schedule_on_each_cpu(ftrace_sync); - - return 0; } @@ -781,7 +733,7 @@ static int ftrace_profile_init(void) int cpu; int ret = 0; - for_each_online_cpu(cpu) { + for_each_possible_cpu(cpu) { ret = ftrace_profile_init_cpu(cpu); if (ret) break; @@ -870,7 +822,7 @@ function_profile_call(unsigned long ip, unsigned long parent_ip, local_irq_save(flags); - stat = &__get_cpu_var(ftrace_profile_stats); + stat = this_cpu_ptr(&ftrace_profile_stats); if (!stat->hash || !ftrace_profile_enabled) goto out; @@ -901,7 +853,7 @@ static void profile_graph_return(struct ftrace_graph_ret *trace) unsigned long flags; local_irq_save(flags); - stat = &__get_cpu_var(ftrace_profile_stats); + stat = this_cpu_ptr(&ftrace_profile_stats); if (!stat->hash || !ftrace_profile_enabled) goto out; @@ -1088,19 +1040,6 @@ static __init void ftrace_profile_debugfs(struct dentry *d_tracer) static struct pid * const ftrace_swapper_pid = &init_struct_pid; -loff_t -ftrace_filter_lseek(struct file *file, loff_t offset, int whence) -{ - loff_t ret; - - if (file->f_mode & FMODE_READ) - ret = seq_lseek(file, offset, whence); - else - file->f_pos = ret = 1; - - return ret; -} - #ifdef CONFIG_DYNAMIC_FTRACE #ifndef CONFIG_FTRACE_MCOUNT_RECORD @@ -1157,8 +1096,6 @@ struct ftrace_page { int size; }; -static struct ftrace_page *ftrace_new_pgs; - #define ENTRY_SIZE sizeof(struct dyn_ftrace) #define ENTRIES_PER_PAGE (PAGE_SIZE / ENTRY_SIZE) @@ -1168,7 +1105,7 @@ static struct ftrace_page *ftrace_new_pgs; static struct ftrace_page *ftrace_pages_start; static struct ftrace_page *ftrace_pages; -static bool ftrace_hash_empty(struct ftrace_hash *hash) +static bool __always_inline ftrace_hash_empty(struct ftrace_hash *hash) { return !hash || !hash->count; } @@ -1545,7 +1482,7 @@ unsigned long ftrace_location(unsigned long ip) * the function tracer. It checks the ftrace internal tables to * determine if the address belongs or not. */ -int ftrace_text_reserved(void *start, void *end) +int ftrace_text_reserved(const void *start, const void *end) { unsigned long ret; @@ -1615,7 +1552,14 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops, in_other_hash = !!ftrace_lookup_ip(other_hash, rec->ip); /* + * If filter_hash is set, we want to match all functions + * that are in the hash but not in the other hash. * + * If filter_hash is not set, then we are decrementing. + * That means we match anything that is in the hash + * and also in the other_hash. That is, we need to turn + * off functions in the other hash because they are disabled + * by this hash. */ if (filter_hash && in_hash && !in_other_hash) match = 1; @@ -1757,19 +1701,15 @@ static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update) /* * If this record is being updated from a nop, then * return UPDATE_MAKE_CALL. - * Otherwise, if the EN flag is set, then return - * UPDATE_MODIFY_CALL_REGS to tell the caller to convert - * from the non-save regs, to a save regs function. * Otherwise, * return UPDATE_MODIFY_CALL to tell the caller to convert - * from the save regs, to a non-save regs function. + * from the save regs, to a non-save regs function or + * vice versa. */ if (flag & FTRACE_FL_ENABLED) return FTRACE_UPDATE_MAKE_CALL; - else if (rec->flags & FTRACE_FL_REGS_EN) - return FTRACE_UPDATE_MODIFY_CALL_REGS; - else - return FTRACE_UPDATE_MODIFY_CALL; + + return FTRACE_UPDATE_MODIFY_CALL; } if (update) { @@ -1811,6 +1751,42 @@ int ftrace_test_record(struct dyn_ftrace *rec, int enable) return ftrace_check_record(rec, enable, 0); } +/** + * ftrace_get_addr_new - Get the call address to set to + * @rec: The ftrace record descriptor + * + * If the record has the FTRACE_FL_REGS set, that means that it + * wants to convert to a callback that saves all regs. If FTRACE_FL_REGS + * is not not set, then it wants to convert to the normal callback. + * + * Returns the address of the trampoline to set to + */ +unsigned long ftrace_get_addr_new(struct dyn_ftrace *rec) +{ + if (rec->flags & FTRACE_FL_REGS) + return (unsigned long)FTRACE_REGS_ADDR; + else + return (unsigned long)FTRACE_ADDR; +} + +/** + * ftrace_get_addr_curr - Get the call address that is already there + * @rec: The ftrace record descriptor + * + * The FTRACE_FL_REGS_EN is set when the record already points to + * a function that saves all the regs. Basically the '_EN' version + * represents the current state of the function. + * + * Returns the address of the trampoline that is currently being called + */ +unsigned long ftrace_get_addr_curr(struct dyn_ftrace *rec) +{ + if (rec->flags & FTRACE_FL_REGS_EN) + return (unsigned long)FTRACE_REGS_ADDR; + else + return (unsigned long)FTRACE_ADDR; +} + static int __ftrace_replace_code(struct dyn_ftrace *rec, int enable) { @@ -1818,12 +1794,12 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable) unsigned long ftrace_addr; int ret; - ret = ftrace_update_record(rec, enable); + ftrace_addr = ftrace_get_addr_new(rec); - if (rec->flags & FTRACE_FL_REGS) - ftrace_addr = (unsigned long)FTRACE_REGS_ADDR; - else - ftrace_addr = (unsigned long)FTRACE_ADDR; + /* This needs to be done before we call ftrace_update_record */ + ftrace_old_addr = ftrace_get_addr_curr(rec); + + ret = ftrace_update_record(rec, enable); switch (ret) { case FTRACE_UPDATE_IGNORE: @@ -1835,13 +1811,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable) case FTRACE_UPDATE_MAKE_NOP: return ftrace_make_nop(NULL, rec, ftrace_addr); - case FTRACE_UPDATE_MODIFY_CALL_REGS: case FTRACE_UPDATE_MODIFY_CALL: - if (rec->flags & FTRACE_FL_REGS) - ftrace_old_addr = (unsigned long)FTRACE_ADDR; - else - ftrace_old_addr = (unsigned long)FTRACE_REGS_ADDR; - return ftrace_modify_call(rec, ftrace_old_addr, ftrace_addr); } @@ -1979,6 +1949,7 @@ int __weak ftrace_arch_code_modify_post_process(void) void ftrace_modify_all_code(int command) { int update = command & FTRACE_UPDATE_TRACE_FUNC; + int err = 0; /* * If the ftrace_caller calls a ftrace_ops func directly, @@ -1990,21 +1961,33 @@ void ftrace_modify_all_code(int command) * to make sure the ops are having the right functions * traced. */ - if (update) - ftrace_update_ftrace_func(ftrace_ops_list_func); + if (update) { + err = ftrace_update_ftrace_func(ftrace_ops_list_func); + if (FTRACE_WARN_ON(err)) + return; + } if (command & FTRACE_UPDATE_CALLS) ftrace_replace_code(1); else if (command & FTRACE_DISABLE_CALLS) ftrace_replace_code(0); - if (update && ftrace_trace_function != ftrace_ops_list_func) - ftrace_update_ftrace_func(ftrace_trace_function); + if (update && ftrace_trace_function != ftrace_ops_list_func) { + function_trace_op = set_function_trace_op; + smp_wmb(); + /* If irqs are disabled, we are in stop machine */ + if (!irqs_disabled()) + smp_call_function(ftrace_sync_ipi, NULL, 1); + err = ftrace_update_ftrace_func(ftrace_trace_function); + if (FTRACE_WARN_ON(err)) + return; + } if (command & FTRACE_START_FUNC_RET) - ftrace_enable_ftrace_graph_caller(); + err = ftrace_enable_ftrace_graph_caller(); else if (command & FTRACE_STOP_FUNC_RET) - ftrace_disable_ftrace_graph_caller(); + err = ftrace_disable_ftrace_graph_caller(); + FTRACE_WARN_ON(err); } static int __ftrace_modify_code(void *data) @@ -2072,6 +2055,11 @@ static ftrace_func_t saved_ftrace_func; static int ftrace_start_up; static int global_start_up; +static void control_ops_free(struct ftrace_ops *ops) +{ + free_percpu(ops->disabled); +} + static void ftrace_startup_enable(int command) { if (saved_ftrace_func != ftrace_trace_function) { @@ -2087,38 +2075,37 @@ static void ftrace_startup_enable(int command) static int ftrace_startup(struct ftrace_ops *ops, int command) { - bool hash_enable = true; + int ret; if (unlikely(ftrace_disabled)) return -ENODEV; + ret = __register_ftrace_function(ops); + if (ret) + return ret; + ftrace_start_up++; command |= FTRACE_UPDATE_CALLS; - /* ops marked global share the filter hashes */ - if (ops->flags & FTRACE_OPS_FL_GLOBAL) { - ops = &global_ops; - /* Don't update hash if global is already set */ - if (global_start_up) - hash_enable = false; - global_start_up++; - } - ops->flags |= FTRACE_OPS_FL_ENABLED; - if (hash_enable) - ftrace_hash_rec_enable(ops, 1); + + ftrace_hash_rec_enable(ops, 1); ftrace_startup_enable(command); return 0; } -static void ftrace_shutdown(struct ftrace_ops *ops, int command) +static int ftrace_shutdown(struct ftrace_ops *ops, int command) { - bool hash_disable = true; + int ret; if (unlikely(ftrace_disabled)) - return; + return -ENODEV; + + ret = __unregister_ftrace_function(ops); + if (ret) + return ret; ftrace_start_up--; /* @@ -2128,21 +2115,9 @@ static void ftrace_shutdown(struct ftrace_ops *ops, int command) */ WARN_ON_ONCE(ftrace_start_up < 0); - if (ops->flags & FTRACE_OPS_FL_GLOBAL) { - ops = &global_ops; - global_start_up--; - WARN_ON_ONCE(global_start_up < 0); - /* Don't update hash if global still has users */ - if (global_start_up) { - WARN_ON_ONCE(!ftrace_start_up); - hash_disable = false; - } - } - - if (hash_disable) - ftrace_hash_rec_disable(ops, 1); + ftrace_hash_rec_disable(ops, 1); - if (ops != &global_ops || !global_start_up) + if (!global_start_up) ops->flags &= ~FTRACE_OPS_FL_ENABLED; command |= FTRACE_UPDATE_CALLS; @@ -2152,10 +2127,42 @@ static void ftrace_shutdown(struct ftrace_ops *ops, int command) command |= FTRACE_UPDATE_TRACE_FUNC; } - if (!command || !ftrace_enabled) - return; + if (!command || !ftrace_enabled) { + /* + * If these are control ops, they still need their + * per_cpu field freed. Since, function tracing is + * not currently active, we can just free them + * without synchronizing all CPUs. + */ + if (ops->flags & FTRACE_OPS_FL_CONTROL) + control_ops_free(ops); + return 0; + } ftrace_run_update_code(command); + + /* + * Dynamic ops may be freed, we must make sure that all + * callers are done before leaving this function. + * The same goes for freeing the per_cpu data of the control + * ops. + * + * Again, normal synchronize_sched() is not good enough. + * We need to do a hard force of sched synchronization. + * This is because we use preempt_disable() to do RCU, but + * the function tracers can be called where RCU is not watching + * (like before user_exit()). We can not rely on the RCU + * infrastructure to do the synchronization, thus we must do it + * ourselves. + */ + if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_CONTROL)) { + schedule_on_each_cpu(ftrace_sync); + + if (ops->flags & FTRACE_OPS_FL_CONTROL) + control_ops_free(ops); + } + + return 0; } static void ftrace_startup_sysctl(void) @@ -2181,7 +2188,6 @@ static void ftrace_shutdown_sysctl(void) } static cycle_t ftrace_update_time; -static unsigned long ftrace_update_cnt; unsigned long ftrace_update_tot_cnt; static inline int ops_traces_mod(struct ftrace_ops *ops) @@ -2237,11 +2243,12 @@ static int referenced_filters(struct dyn_ftrace *rec) return cnt; } -static int ftrace_update_code(struct module *mod) +static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs) { struct ftrace_page *pg; struct dyn_ftrace *p; cycle_t start, stop; + unsigned long update_cnt = 0; unsigned long ref = 0; bool test = false; int i; @@ -2267,9 +2274,8 @@ static int ftrace_update_code(struct module *mod) } start = ftrace_now(raw_smp_processor_id()); - ftrace_update_cnt = 0; - for (pg = ftrace_new_pgs; pg; pg = pg->next) { + for (pg = new_pgs; pg; pg = pg->next) { for (i = 0; i < pg->index; i++) { int cnt = ref; @@ -2290,7 +2296,7 @@ static int ftrace_update_code(struct module *mod) if (!ftrace_code_disable(mod, p)) break; - ftrace_update_cnt++; + update_cnt++; /* * If the tracing is enabled, go ahead and enable the record. @@ -2309,11 +2315,9 @@ static int ftrace_update_code(struct module *mod) } } - ftrace_new_pgs = NULL; - stop = ftrace_now(raw_smp_processor_id()); ftrace_update_time = stop - start; - ftrace_update_tot_cnt += ftrace_update_cnt; + ftrace_update_tot_cnt += update_cnt; return 0; } @@ -2405,22 +2409,6 @@ ftrace_allocate_pages(unsigned long num_to_init) return NULL; } -static int __init ftrace_dyn_table_alloc(unsigned long num_to_init) -{ - int cnt; - - if (!num_to_init) { - pr_info("ftrace: No functions to be traced?\n"); - return -1; - } - - cnt = num_to_init / ENTRIES_PER_PAGE; - pr_info("ftrace: allocating %ld entries in %d pages\n", - num_to_init, cnt + 1); - - return 0; -} - #define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */ struct ftrace_iterator { @@ -2734,7 +2722,7 @@ static void ftrace_filter_reset(struct ftrace_hash *hash) * routine, you can use ftrace_filter_write() for the write * routine if @flag has FTRACE_ITER_FILTER set, or * ftrace_notrace_write() if @flag has FTRACE_ITER_NOTRACE set. - * ftrace_filter_lseek() should be used as the lseek routine, and + * tracing_lseek() should be used as the lseek routine, and * release must call ftrace_regex_release(). */ int @@ -2808,7 +2796,9 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag, static int ftrace_filter_open(struct inode *inode, struct file *file) { - return ftrace_regex_open(&global_ops, + struct ftrace_ops *ops = inode->i_private; + + return ftrace_regex_open(ops, FTRACE_ITER_FILTER | FTRACE_ITER_DO_HASH, inode, file); } @@ -2816,7 +2806,9 @@ ftrace_filter_open(struct inode *inode, struct file *file) static int ftrace_notrace_open(struct inode *inode, struct file *file) { - return ftrace_regex_open(&global_ops, FTRACE_ITER_NOTRACE, + struct ftrace_ops *ops = inode->i_private; + + return ftrace_regex_open(ops, FTRACE_ITER_NOTRACE, inode, file); } @@ -3060,16 +3052,13 @@ static void __enable_ftrace_function_probe(void) if (i == FTRACE_FUNC_HASHSIZE) return; - ret = __register_ftrace_function(&trace_probe_ops); - if (!ret) - ret = ftrace_startup(&trace_probe_ops, 0); + ret = ftrace_startup(&trace_probe_ops, 0); ftrace_probe_registered = 1; } static void __disable_ftrace_function_probe(void) { - int ret; int i; if (!ftrace_probe_registered) @@ -3082,9 +3071,7 @@ static void __disable_ftrace_function_probe(void) } /* no more funcs left */ - ret = __unregister_ftrace_function(&trace_probe_ops); - if (!ret) - ftrace_shutdown(&trace_probe_ops, 0); + ftrace_shutdown(&trace_probe_ops, 0); ftrace_probe_registered = 0; } @@ -3307,7 +3294,11 @@ void unregister_ftrace_function_probe_all(char *glob) static LIST_HEAD(ftrace_commands); static DEFINE_MUTEX(ftrace_cmd_mutex); -int register_ftrace_command(struct ftrace_func_command *cmd) +/* + * Currently we only register ftrace commands from __init, so mark this + * __init too. + */ +__init int register_ftrace_command(struct ftrace_func_command *cmd) { struct ftrace_func_command *p; int ret = 0; @@ -3326,7 +3317,11 @@ int register_ftrace_command(struct ftrace_func_command *cmd) return ret; } -int unregister_ftrace_command(struct ftrace_func_command *cmd) +/* + * Currently we only unregister ftrace commands from __init, so mark + * this __init too. + */ +__init int unregister_ftrace_command(struct ftrace_func_command *cmd) { struct ftrace_func_command *p, *n; int ret = -ENODEV; @@ -3466,10 +3461,6 @@ ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len, struct ftrace_hash *hash; int ret; - /* All global ops uses the global ops filters */ - if (ops->flags & FTRACE_OPS_FL_GLOBAL) - ops = &global_ops; - if (unlikely(ftrace_disabled)) return -ENODEV; @@ -3581,8 +3572,7 @@ int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf, } EXPORT_SYMBOL_GPL(ftrace_set_notrace); /** - * ftrace_set_filter - set a function to filter on in ftrace - * @ops - the ops to set the filter with + * ftrace_set_global_filter - set a function to filter on with global tracers * @buf - the string that holds the function filter text. * @len - the length of the string. * @reset - non zero to reset all filters before applying this filter. @@ -3597,8 +3587,7 @@ void ftrace_set_global_filter(unsigned char *buf, int len, int reset) EXPORT_SYMBOL_GPL(ftrace_set_global_filter); /** - * ftrace_set_notrace - set a function to not trace in ftrace - * @ops - the ops to set the notrace filter with + * ftrace_set_global_notrace - set a function to not trace with global tracers * @buf - the string that holds the function notrace text. * @len - the length of the string. * @reset - non zero to reset all filters before applying this filter. @@ -3641,7 +3630,7 @@ __setup("ftrace_filter=", set_ftrace_filter); #ifdef CONFIG_FUNCTION_GRAPH_TRACER static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata; -static int ftrace_set_func(unsigned long *array, int *idx, char *buffer); +static int ftrace_set_func(unsigned long *array, int *idx, int size, char *buffer); static int __init set_graph_function(char *str) { @@ -3659,7 +3648,7 @@ static void __init set_ftrace_early_graph(char *buf) func = strsep(&buf, ","); /* we allow only one expression at a time */ ret = ftrace_set_func(ftrace_graph_funcs, &ftrace_graph_count, - func); + FTRACE_GRAPH_MAX_FUNCS, func); if (ret) printk(KERN_DEBUG "ftrace: function %s not " "traceable\n", func); @@ -3759,7 +3748,7 @@ static const struct file_operations ftrace_filter_fops = { .open = ftrace_filter_open, .read = seq_read, .write = ftrace_filter_write, - .llseek = ftrace_filter_lseek, + .llseek = tracing_lseek, .release = ftrace_regex_release, }; @@ -3767,7 +3756,7 @@ static const struct file_operations ftrace_notrace_fops = { .open = ftrace_notrace_open, .read = seq_read, .write = ftrace_notrace_write, - .llseek = ftrace_filter_lseek, + .llseek = tracing_lseek, .release = ftrace_regex_release, }; @@ -3776,15 +3765,25 @@ static const struct file_operations ftrace_notrace_fops = { static DEFINE_MUTEX(graph_lock); int ftrace_graph_count; -int ftrace_graph_filter_enabled; +int ftrace_graph_notrace_count; unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly; +unsigned long ftrace_graph_notrace_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly; + +struct ftrace_graph_data { + unsigned long *table; + size_t size; + int *count; + const struct seq_operations *seq_ops; +}; static void * __g_next(struct seq_file *m, loff_t *pos) { - if (*pos >= ftrace_graph_count) + struct ftrace_graph_data *fgd = m->private; + + if (*pos >= *fgd->count) return NULL; - return &ftrace_graph_funcs[*pos]; + return &fgd->table[*pos]; } static void * @@ -3796,10 +3795,12 @@ g_next(struct seq_file *m, void *v, loff_t *pos) static void *g_start(struct seq_file *m, loff_t *pos) { + struct ftrace_graph_data *fgd = m->private; + mutex_lock(&graph_lock); /* Nothing, tell g_show to print all functions are enabled */ - if (!ftrace_graph_filter_enabled && !*pos) + if (!*fgd->count && !*pos) return (void *)1; return __g_next(m, pos); @@ -3835,38 +3836,88 @@ static const struct seq_operations ftrace_graph_seq_ops = { }; static int -ftrace_graph_open(struct inode *inode, struct file *file) +__ftrace_graph_open(struct inode *inode, struct file *file, + struct ftrace_graph_data *fgd) { int ret = 0; - if (unlikely(ftrace_disabled)) - return -ENODEV; - mutex_lock(&graph_lock); if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { - ftrace_graph_filter_enabled = 0; - ftrace_graph_count = 0; - memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs)); + *fgd->count = 0; + memset(fgd->table, 0, fgd->size * sizeof(*fgd->table)); } mutex_unlock(&graph_lock); - if (file->f_mode & FMODE_READ) - ret = seq_open(file, &ftrace_graph_seq_ops); + if (file->f_mode & FMODE_READ) { + ret = seq_open(file, fgd->seq_ops); + if (!ret) { + struct seq_file *m = file->private_data; + m->private = fgd; + } + } else + file->private_data = fgd; return ret; } static int +ftrace_graph_open(struct inode *inode, struct file *file) +{ + struct ftrace_graph_data *fgd; + + if (unlikely(ftrace_disabled)) + return -ENODEV; + + fgd = kmalloc(sizeof(*fgd), GFP_KERNEL); + if (fgd == NULL) + return -ENOMEM; + + fgd->table = ftrace_graph_funcs; + fgd->size = FTRACE_GRAPH_MAX_FUNCS; + fgd->count = &ftrace_graph_count; + fgd->seq_ops = &ftrace_graph_seq_ops; + + return __ftrace_graph_open(inode, file, fgd); +} + +static int +ftrace_graph_notrace_open(struct inode *inode, struct file *file) +{ + struct ftrace_graph_data *fgd; + + if (unlikely(ftrace_disabled)) + return -ENODEV; + + fgd = kmalloc(sizeof(*fgd), GFP_KERNEL); + if (fgd == NULL) + return -ENOMEM; + + fgd->table = ftrace_graph_notrace_funcs; + fgd->size = FTRACE_GRAPH_MAX_FUNCS; + fgd->count = &ftrace_graph_notrace_count; + fgd->seq_ops = &ftrace_graph_seq_ops; + + return __ftrace_graph_open(inode, file, fgd); +} + +static int ftrace_graph_release(struct inode *inode, struct file *file) { - if (file->f_mode & FMODE_READ) + if (file->f_mode & FMODE_READ) { + struct seq_file *m = file->private_data; + + kfree(m->private); seq_release(inode, file); + } else { + kfree(file->private_data); + } + return 0; } static int -ftrace_set_func(unsigned long *array, int *idx, char *buffer) +ftrace_set_func(unsigned long *array, int *idx, int size, char *buffer) { struct dyn_ftrace *rec; struct ftrace_page *pg; @@ -3879,7 +3930,7 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer) /* decode regex */ type = filter_parse_regex(buffer, strlen(buffer), &search, ¬); - if (!not && *idx >= FTRACE_GRAPH_MAX_FUNCS) + if (!not && *idx >= size) return -EBUSY; search_len = strlen(search); @@ -3907,7 +3958,7 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer) fail = 0; if (!exists) { array[(*idx)++] = rec->ip; - if (*idx >= FTRACE_GRAPH_MAX_FUNCS) + if (*idx >= size) goto out; } } else { @@ -3925,8 +3976,6 @@ out: if (fail) return -EINVAL; - ftrace_graph_filter_enabled = !!(*idx); - return 0; } @@ -3935,36 +3984,33 @@ ftrace_graph_write(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_parser parser; - ssize_t read, ret; + ssize_t read, ret = 0; + struct ftrace_graph_data *fgd = file->private_data; if (!cnt) return 0; - mutex_lock(&graph_lock); - - if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) { - ret = -ENOMEM; - goto out_unlock; - } + if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) + return -ENOMEM; read = trace_get_user(&parser, ubuf, cnt, ppos); if (read >= 0 && trace_parser_loaded((&parser))) { parser.buffer[parser.idx] = 0; + mutex_lock(&graph_lock); + /* we allow only one expression at a time */ - ret = ftrace_set_func(ftrace_graph_funcs, &ftrace_graph_count, - parser.buffer); - if (ret) - goto out_free; + ret = ftrace_set_func(fgd->table, fgd->count, fgd->size, + parser.buffer); + + mutex_unlock(&graph_lock); } - ret = read; + if (!ret) + ret = read; -out_free: trace_parser_put(&parser); -out_unlock: - mutex_unlock(&graph_lock); return ret; } @@ -3973,11 +4019,49 @@ static const struct file_operations ftrace_graph_fops = { .open = ftrace_graph_open, .read = seq_read, .write = ftrace_graph_write, - .llseek = ftrace_filter_lseek, + .llseek = tracing_lseek, + .release = ftrace_graph_release, +}; + +static const struct file_operations ftrace_graph_notrace_fops = { + .open = ftrace_graph_notrace_open, + .read = seq_read, + .write = ftrace_graph_write, + .llseek = tracing_lseek, .release = ftrace_graph_release, }; #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ +void ftrace_create_filter_files(struct ftrace_ops *ops, + struct dentry *parent) +{ + + trace_create_file("set_ftrace_filter", 0644, parent, + ops, &ftrace_filter_fops); + + trace_create_file("set_ftrace_notrace", 0644, parent, + ops, &ftrace_notrace_fops); +} + +/* + * The name "destroy_filter_files" is really a misnomer. Although + * in the future, it may actualy delete the files, but this is + * really intended to make sure the ops passed in are disabled + * and that when this function returns, the caller is free to + * free the ops. + * + * The "destroy" name is only to match the "create" name that this + * should be paired with. + */ +void ftrace_destroy_filter_files(struct ftrace_ops *ops) +{ + mutex_lock(&ftrace_lock); + if (ops->flags & FTRACE_OPS_FL_ENABLED) + ftrace_shutdown(ops, 0); + ops->flags |= FTRACE_OPS_FL_DELETED; + mutex_unlock(&ftrace_lock); +} + static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { @@ -3987,16 +4071,15 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer) trace_create_file("enabled_functions", 0444, d_tracer, NULL, &ftrace_enabled_fops); - trace_create_file("set_ftrace_filter", 0644, d_tracer, - NULL, &ftrace_filter_fops); - - trace_create_file("set_ftrace_notrace", 0644, d_tracer, - NULL, &ftrace_notrace_fops); + ftrace_create_filter_files(&global_ops, d_tracer); #ifdef CONFIG_FUNCTION_GRAPH_TRACER trace_create_file("set_graph_function", 0444, d_tracer, NULL, &ftrace_graph_fops); + trace_create_file("set_graph_notrace", 0444, d_tracer, + NULL, + &ftrace_graph_notrace_fops); #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ return 0; @@ -4104,9 +4187,6 @@ static int ftrace_process_locs(struct module *mod, /* Assign the last page to ftrace_pages */ ftrace_pages = pg; - /* These new locations need to be initialized */ - ftrace_new_pgs = start_pg; - /* * We only need to disable interrupts on start up * because we are modifying code that an interrupt @@ -4117,7 +4197,7 @@ static int ftrace_process_locs(struct module *mod, */ if (!mod) local_irq_save(flags); - ftrace_update_code(mod); + ftrace_update_code(mod, start_pg); if (!mod) local_irq_restore(flags); ret = 0; @@ -4181,16 +4261,11 @@ static void ftrace_init_module(struct module *mod, ftrace_process_locs(mod, start, end); } -static int ftrace_module_notify_enter(struct notifier_block *self, - unsigned long val, void *data) +void ftrace_module_init(struct module *mod) { - struct module *mod = data; - - if (val == MODULE_STATE_COMING) - ftrace_init_module(mod, mod->ftrace_callsites, - mod->ftrace_callsites + - mod->num_ftrace_callsites); - return 0; + ftrace_init_module(mod, mod->ftrace_callsites, + mod->ftrace_callsites + + mod->num_ftrace_callsites); } static int ftrace_module_notify_exit(struct notifier_block *self, @@ -4204,11 +4279,6 @@ static int ftrace_module_notify_exit(struct notifier_block *self, return 0; } #else -static int ftrace_module_notify_enter(struct notifier_block *self, - unsigned long val, void *data) -{ - return 0; -} static int ftrace_module_notify_exit(struct notifier_block *self, unsigned long val, void *data) { @@ -4216,40 +4286,32 @@ static int ftrace_module_notify_exit(struct notifier_block *self, } #endif /* CONFIG_MODULES */ -struct notifier_block ftrace_module_enter_nb = { - .notifier_call = ftrace_module_notify_enter, - .priority = INT_MAX, /* Run before anything that can use kprobes */ -}; - struct notifier_block ftrace_module_exit_nb = { .notifier_call = ftrace_module_notify_exit, .priority = INT_MIN, /* Run after anything that can remove kprobes */ }; -extern unsigned long __start_mcount_loc[]; -extern unsigned long __stop_mcount_loc[]; - void __init ftrace_init(void) { - unsigned long count, addr, flags; + extern unsigned long __start_mcount_loc[]; + extern unsigned long __stop_mcount_loc[]; + unsigned long count, flags; int ret; - /* Keep the ftrace pointer to the stub */ - addr = (unsigned long)ftrace_stub; - local_irq_save(flags); - ftrace_dyn_arch_init(&addr); + ret = ftrace_dyn_arch_init(); local_irq_restore(flags); - - /* ftrace_dyn_arch_init places the return code in addr */ - if (addr) + if (ret) goto failed; count = __stop_mcount_loc - __start_mcount_loc; - - ret = ftrace_dyn_table_alloc(count); - if (ret) + if (!count) { + pr_info("ftrace: No functions to be traced?\n"); goto failed; + } + + pr_info("ftrace: allocating %ld entries in %ld pages\n", + count, count / ENTRIES_PER_PAGE + 1); last_ftrace_enabled = ftrace_enabled = 1; @@ -4257,10 +4319,6 @@ void __init ftrace_init(void) __start_mcount_loc, __stop_mcount_loc); - ret = register_module_notifier(&ftrace_module_enter_nb); - if (ret) - pr_warning("Failed to register trace ftrace module enter notifier\n"); - ret = register_module_notifier(&ftrace_module_exit_nb); if (ret) pr_warning("Failed to register trace ftrace module exit notifier\n"); @@ -4290,12 +4348,21 @@ core_initcall(ftrace_nodyn_init); static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; } static inline void ftrace_startup_enable(int command) { } /* Keep as macros so we do not need to define the commands */ -# define ftrace_startup(ops, command) \ - ({ \ - (ops)->flags |= FTRACE_OPS_FL_ENABLED; \ - 0; \ +# define ftrace_startup(ops, command) \ + ({ \ + int ___ret = __register_ftrace_function(ops); \ + if (!___ret) \ + (ops)->flags |= FTRACE_OPS_FL_ENABLED; \ + ___ret; \ }) -# define ftrace_shutdown(ops, command) do { } while (0) +# define ftrace_shutdown(ops, command) \ + ({ \ + int ___ret = __unregister_ftrace_function(ops); \ + if (!___ret) \ + (ops)->flags &= ~FTRACE_OPS_FL_ENABLED; \ + ___ret; \ + }) + # define ftrace_startup_sysctl() do { } while (0) # define ftrace_shutdown_sysctl() do { } while (0) @@ -4307,6 +4374,34 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs) #endif /* CONFIG_DYNAMIC_FTRACE */ +__init void ftrace_init_global_array_ops(struct trace_array *tr) +{ + tr->ops = &global_ops; + tr->ops->private = tr; +} + +void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func) +{ + /* If we filter on pids, update to use the pid function */ + if (tr->flags & TRACE_ARRAY_FL_GLOBAL) { + if (WARN_ON(tr->ops->func != ftrace_stub)) + printk("ftrace ops had %pS for function\n", + tr->ops->func); + /* Only the top level instance does pid tracing */ + if (!list_empty(&ftrace_pids)) { + set_ftrace_pid_function(func); + func = ftrace_pid_func; + } + } + tr->ops->func = func; + tr->ops->private = tr; +} + +void ftrace_reset_array_ops(struct trace_array *tr) +{ + tr->ops->func = ftrace_stub; +} + static void ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct pt_regs *regs) @@ -4320,12 +4415,21 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip, */ preempt_disable_notrace(); trace_recursion_set(TRACE_CONTROL_BIT); + + /* + * Control funcs (perf) uses RCU. Only trace if + * RCU is currently active. + */ + if (!rcu_is_watching()) + goto out; + do_for_each_ftrace_op(op, ftrace_control_list) { if (!(op->flags & FTRACE_OPS_FL_STUB) && !ftrace_function_local_disabled(op) && ftrace_ops_test(op, ip, regs)) op->func(ip, parent_ip, op, regs); } while_for_each_ftrace_op(op); + out: trace_recursion_clear(TRACE_CONTROL_BIT); preempt_enable_notrace(); } @@ -4356,9 +4460,16 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, */ preempt_disable_notrace(); do_for_each_ftrace_op(op, ftrace_ops_list) { - if (ftrace_ops_test(op, ip, regs)) + if (ftrace_ops_test(op, ip, regs)) { + if (WARN_ON(!op->func)) { + function_trace_stop = 1; + printk("op=%p %pS\n", op, op); + goto out; + } op->func(ip, parent_ip, op, regs); + } } while_for_each_ftrace_op(op); +out: preempt_enable_notrace(); trace_clear_recursion(bit); } @@ -4631,7 +4742,7 @@ static const struct file_operations ftrace_pid_fops = { .open = ftrace_pid_open, .write = ftrace_pid_write, .read = seq_read, - .llseek = ftrace_filter_lseek, + .llseek = tracing_lseek, .release = ftrace_pid_release, }; @@ -4695,9 +4806,7 @@ int register_ftrace_function(struct ftrace_ops *ops) mutex_lock(&ftrace_lock); - ret = __register_ftrace_function(ops); - if (!ret) - ret = ftrace_startup(ops, 0); + ret = ftrace_startup(ops, 0); mutex_unlock(&ftrace_lock); @@ -4716,9 +4825,7 @@ int unregister_ftrace_function(struct ftrace_ops *ops) int ret; mutex_lock(&ftrace_lock); - ret = __unregister_ftrace_function(ops); - if (!ret) - ftrace_shutdown(ops, 0); + ret = ftrace_shutdown(ops, 0); mutex_unlock(&ftrace_lock); return ret; @@ -4767,7 +4874,6 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, #ifdef CONFIG_FUNCTION_GRAPH_TRACER static int ftrace_graph_active; -static struct notifier_block ftrace_suspend_notifier; int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace) { @@ -4778,6 +4884,7 @@ int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace) trace_func_graph_ret_t ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; trace_func_graph_ent_t ftrace_graph_entry = ftrace_graph_entry_stub; +static trace_func_graph_ent_t __ftrace_graph_entry = ftrace_graph_entry_stub; /* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) @@ -4912,6 +5019,34 @@ ftrace_suspend_notifier_call(struct notifier_block *bl, unsigned long state, return NOTIFY_DONE; } +static int ftrace_graph_entry_test(struct ftrace_graph_ent *trace) +{ + if (!ftrace_ops_test(&global_ops, trace->func, NULL)) + return 0; + return __ftrace_graph_entry(trace); +} + +/* + * The function graph tracer should only trace the functions defined + * by set_ftrace_filter and set_ftrace_notrace. If another function + * tracer ops is registered, the graph tracer requires testing the + * function against the global ops, and not just trace any function + * that any ftrace_ops registered. + */ +static void update_function_graph_func(void) +{ + if (ftrace_ops_list == &ftrace_list_end || + (ftrace_ops_list == &global_ops && + global_ops.next == &ftrace_list_end)) + ftrace_graph_entry = __ftrace_graph_entry; + else + ftrace_graph_entry = ftrace_graph_entry_test; +} + +static struct notifier_block ftrace_suspend_notifier = { + .notifier_call = ftrace_suspend_notifier_call, +}; + int register_ftrace_graph(trace_func_graph_ret_t retfunc, trace_func_graph_ent_t entryfunc) { @@ -4925,7 +5060,6 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc, goto out; } - ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call; register_pm_notifier(&ftrace_suspend_notifier); ftrace_graph_active++; @@ -4936,7 +5070,19 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc, } ftrace_graph_return = retfunc; - ftrace_graph_entry = entryfunc; + + /* + * Update the indirect function to the entryfunc, and the + * function that gets called to the entry_test first. Then + * call the update fgraph entry function to determine if + * the entryfunc should be called directly or not. + */ + __ftrace_graph_entry = entryfunc; + ftrace_graph_entry = ftrace_graph_entry_test; + update_function_graph_func(); + + /* Function graph doesn't use the .func field of global_ops */ + global_ops.flags |= FTRACE_OPS_FL_STUB; ret = ftrace_startup(&global_ops, FTRACE_START_FUNC_RET); @@ -4955,7 +5101,9 @@ void unregister_ftrace_graph(void) ftrace_graph_active--; ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; ftrace_graph_entry = ftrace_graph_entry_stub; + __ftrace_graph_entry = ftrace_graph_entry_stub; ftrace_shutdown(&global_ops, FTRACE_STOP_FUNC_RET); + global_ops.flags &= ~FTRACE_OPS_FL_STUB; unregister_pm_notifier(&ftrace_suspend_notifier); unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index cc2f66f68dc..ff7027199a9 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -543,7 +543,7 @@ static void rb_wake_up_waiters(struct irq_work *work) * as data is added to any of the @buffer's cpu buffers. Otherwise * it will wait for data to be added to a specific cpu buffer. */ -void ring_buffer_wait(struct ring_buffer *buffer, int cpu) +int ring_buffer_wait(struct ring_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; DEFINE_WAIT(wait); @@ -557,6 +557,8 @@ void ring_buffer_wait(struct ring_buffer *buffer, int cpu) if (cpu == RING_BUFFER_ALL_CPUS) work = &buffer->irq_work; else { + if (!cpumask_test_cpu(cpu, buffer->cpumask)) + return -ENODEV; cpu_buffer = buffer->buffers[cpu]; work = &cpu_buffer->irq_work; } @@ -591,6 +593,7 @@ void ring_buffer_wait(struct ring_buffer *buffer, int cpu) schedule(); finish_wait(&work->waiters, &wait); + return 0; } /** @@ -613,10 +616,6 @@ int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu, struct ring_buffer_per_cpu *cpu_buffer; struct rb_irq_work *work; - if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) || - (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu))) - return POLLIN | POLLRDNORM; - if (cpu == RING_BUFFER_ALL_CPUS) work = &buffer->irq_work; else { @@ -1301,7 +1300,7 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, * In that off case, we need to allocate for all possible cpus. */ #ifdef CONFIG_HOTPLUG_CPU - get_online_cpus(); + cpu_notifier_register_begin(); cpumask_copy(buffer->cpumask, cpu_online_mask); #else cpumask_copy(buffer->cpumask, cpu_possible_mask); @@ -1324,10 +1323,10 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, #ifdef CONFIG_HOTPLUG_CPU buffer->cpu_notify.notifier_call = rb_cpu_notify; buffer->cpu_notify.priority = 0; - register_cpu_notifier(&buffer->cpu_notify); + __register_cpu_notifier(&buffer->cpu_notify); + cpu_notifier_register_done(); #endif - put_online_cpus(); mutex_init(&buffer->mutex); return buffer; @@ -1341,7 +1340,9 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, fail_free_cpumask: free_cpumask_var(buffer->cpumask); - put_online_cpus(); +#ifdef CONFIG_HOTPLUG_CPU + cpu_notifier_register_done(); +#endif fail_free_buffer: kfree(buffer); @@ -1358,16 +1359,17 @@ ring_buffer_free(struct ring_buffer *buffer) { int cpu; - get_online_cpus(); - #ifdef CONFIG_HOTPLUG_CPU - unregister_cpu_notifier(&buffer->cpu_notify); + cpu_notifier_register_begin(); + __unregister_cpu_notifier(&buffer->cpu_notify); #endif for_each_buffer_cpu(buffer, cpu) rb_free_cpu_buffer(buffer->buffers[cpu]); - put_online_cpus(); +#ifdef CONFIG_HOTPLUG_CPU + cpu_notifier_register_done(); +#endif kfree(buffer->buffers); free_cpumask_var(buffer->cpumask); @@ -2397,6 +2399,13 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, write &= RB_WRITE_MASK; tail = write - length; + /* + * If this is the first commit on the page, then it has the same + * timestamp as the page itself. + */ + if (!tail) + delta = 0; + /* See if we shot pass the end of this buffer page */ if (unlikely(write > BUF_PAGE_SIZE)) return rb_move_tail(cpu_buffer, length, tail, @@ -2558,7 +2567,7 @@ rb_reserve_next_event(struct ring_buffer *buffer, if (unlikely(test_time_stamp(delta))) { int local_clock_stable = 1; #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK - local_clock_stable = sched_clock_stable; + local_clock_stable = sched_clock_stable(); #endif WARN_ONCE(delta > (1ULL << 59), KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s", diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c index a5457d577b9..0434ff1b808 100644 --- a/kernel/trace/ring_buffer_benchmark.c +++ b/kernel/trace/ring_buffer_benchmark.c @@ -40,8 +40,8 @@ static int write_iteration = 50; module_param(write_iteration, uint, 0644); MODULE_PARM_DESC(write_iteration, "# of writes between timestamp readings"); -static int producer_nice = 19; -static int consumer_nice = 19; +static int producer_nice = MAX_NICE; +static int consumer_nice = MAX_NICE; static int producer_fifo = -1; static int consumer_fifo = -1; @@ -308,7 +308,7 @@ static void ring_buffer_producer(void) /* Let the user know that the test is running at low priority */ if (producer_fifo < 0 && consumer_fifo < 0 && - producer_nice == 19 && consumer_nice == 19) + producer_nice == MAX_NICE && consumer_nice == MAX_NICE) trace_printk("WARNING!!! This test is running at lowest priority.\n"); trace_printk("Time: %lld (usecs)\n", time); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 7974ba20557..291397e6666 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -73,7 +73,8 @@ static struct tracer_flags dummy_tracer_flags = { .opts = dummy_tracer_opt }; -static int dummy_set_flag(u32 old_flags, u32 bit, int set) +static int +dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set) { return 0; } @@ -118,7 +119,7 @@ enum ftrace_dump_mode ftrace_dump_on_oops; /* When set, tracing will stop when a WARN*() is hit */ int __disable_trace_on_warning; -static int tracing_set_tracer(const char *buf); +static int tracing_set_tracer(struct trace_array *tr, const char *buf); #define MAX_TRACER_SIZE 100 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata; @@ -180,6 +181,17 @@ static int __init set_trace_boot_options(char *str) } __setup("trace_options=", set_trace_boot_options); +static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata; +static char *trace_boot_clock __initdata; + +static int __init set_trace_boot_clock(char *str) +{ + strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE); + trace_boot_clock = trace_boot_clock_buf; + return 0; +} +__setup("trace_clock=", set_trace_boot_clock); + unsigned long long ns2usecs(cycle_t nsec) { @@ -235,15 +247,35 @@ void trace_array_put(struct trace_array *this_tr) mutex_unlock(&trace_types_lock); } -int filter_current_check_discard(struct ring_buffer *buffer, - struct ftrace_event_call *call, void *rec, - struct ring_buffer_event *event) +int filter_check_discard(struct ftrace_event_file *file, void *rec, + struct ring_buffer *buffer, + struct ring_buffer_event *event) { - return filter_check_discard(call, rec, buffer, event); + if (unlikely(file->flags & FTRACE_EVENT_FL_FILTERED) && + !filter_match_preds(file->filter, rec)) { + ring_buffer_discard_commit(buffer, event); + return 1; + } + + return 0; } -EXPORT_SYMBOL_GPL(filter_current_check_discard); +EXPORT_SYMBOL_GPL(filter_check_discard); -cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu) +int call_filter_check_discard(struct ftrace_event_call *call, void *rec, + struct ring_buffer *buffer, + struct ring_buffer_event *event) +{ + if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) && + !filter_match_preds(call->filter, rec)) { + ring_buffer_discard_commit(buffer, event); + return 1; + } + + return 0; +} +EXPORT_SYMBOL_GPL(call_filter_check_discard); + +static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu) { u64 ts; @@ -434,13 +466,22 @@ int __trace_puts(unsigned long ip, const char *str, int size) struct print_entry *entry; unsigned long irq_flags; int alloc; + int pc; + + if (!(trace_flags & TRACE_ITER_PRINTK)) + return 0; + + pc = preempt_count(); + + if (unlikely(tracing_selftest_running || tracing_disabled)) + return 0; alloc = sizeof(*entry) + size + 2; /* possible \n added */ local_save_flags(irq_flags); buffer = global_trace.trace_buffer.buffer; event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, - irq_flags, preempt_count()); + irq_flags, pc); if (!event) return 0; @@ -457,6 +498,7 @@ int __trace_puts(unsigned long ip, const char *str, int size) entry->buf[size] = '\0'; __buffer_unlock_commit(buffer, event); + ftrace_trace_stack(buffer, irq_flags, 4, pc); return size; } @@ -474,11 +516,20 @@ int __trace_bputs(unsigned long ip, const char *str) struct bputs_entry *entry; unsigned long irq_flags; int size = sizeof(struct bputs_entry); + int pc; + + if (!(trace_flags & TRACE_ITER_PRINTK)) + return 0; + + pc = preempt_count(); + + if (unlikely(tracing_selftest_running || tracing_disabled)) + return 0; local_save_flags(irq_flags); buffer = global_trace.trace_buffer.buffer; event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size, - irq_flags, preempt_count()); + irq_flags, pc); if (!event) return 0; @@ -487,6 +538,7 @@ int __trace_bputs(unsigned long ip, const char *str) entry->str = str; __buffer_unlock_commit(buffer, event); + ftrace_trace_stack(buffer, irq_flags, 4, pc); return 1; } @@ -561,7 +613,7 @@ static int alloc_snapshot(struct trace_array *tr) return 0; } -void free_snapshot(struct trace_array *tr) +static void free_snapshot(struct trace_array *tr) { /* * We don't free the ring buffer. instead, resize it because @@ -575,6 +627,28 @@ void free_snapshot(struct trace_array *tr) } /** + * tracing_alloc_snapshot - allocate snapshot buffer. + * + * This only allocates the snapshot buffer if it isn't already + * allocated - it doesn't also take a snapshot. + * + * This is meant to be used in cases where the snapshot buffer needs + * to be set up for events that can't sleep but need to be able to + * trigger a snapshot. + */ +int tracing_alloc_snapshot(void) +{ + struct trace_array *tr = &global_trace; + int ret; + + ret = alloc_snapshot(tr); + WARN_ON(ret < 0); + + return ret; +} +EXPORT_SYMBOL_GPL(tracing_alloc_snapshot); + +/** * trace_snapshot_alloc - allocate and take a snapshot of the current buffer. * * This is similar to trace_snapshot(), but it will allocate the @@ -587,11 +661,10 @@ void free_snapshot(struct trace_array *tr) */ void tracing_snapshot_alloc(void) { - struct trace_array *tr = &global_trace; int ret; - ret = alloc_snapshot(tr); - if (WARN_ON(ret < 0)) + ret = tracing_alloc_snapshot(); + if (ret < 0) return; tracing_snapshot(); @@ -603,6 +676,12 @@ void tracing_snapshot(void) WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used"); } EXPORT_SYMBOL_GPL(tracing_snapshot); +int tracing_alloc_snapshot(void) +{ + WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used"); + return -ENODEV; +} +EXPORT_SYMBOL_GPL(tracing_alloc_snapshot); void tracing_snapshot_alloc(void) { /* Give warning */ @@ -744,7 +823,7 @@ static struct { { trace_clock_local, "local", 1 }, { trace_clock_global, "global", 1 }, { trace_clock_counter, "counter", 0 }, - { trace_clock_jiffies, "uptime", 1 }, + { trace_clock_jiffies, "uptime", 0 }, { trace_clock, "perf", 1 }, ARCH_TRACE_CLOCKS }; @@ -843,9 +922,12 @@ int trace_get_user(struct trace_parser *parser, const char __user *ubuf, if (isspace(ch)) { parser->buffer[parser->idx] = 0; parser->cont = false; - } else { + } else if (parser->idx < parser->size - 1) { parser->cont = true; parser->buffer[parser->idx++] = ch; + } else { + ret = -EINVAL; + goto out; } *ppos += read; @@ -895,27 +977,9 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) return cnt; } -/* - * ftrace_max_lock is used to protect the swapping of buffers - * when taking a max snapshot. The buffers themselves are - * protected by per_cpu spinlocks. But the action of the swap - * needs its own lock. - * - * This is defined as a arch_spinlock_t in order to help - * with performance when lockdep debugging is enabled. - * - * It is also used in other places outside the update_max_tr - * so it needs to be defined outside of the - * CONFIG_TRACER_MAX_TRACE. - */ -static arch_spinlock_t ftrace_max_lock = - (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; - unsigned long __read_mostly tracing_thresh; #ifdef CONFIG_TRACER_MAX_TRACE -unsigned long __read_mostly tracing_max_latency; - /* * Copy the new maximum trace into the separate maximum-trace * structure. (this way the maximum trace is permanently saved, @@ -932,7 +996,7 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) max_buf->cpu = cpu; max_buf->time_start = data->preempt_timestamp; - max_data->saved_latency = tracing_max_latency; + max_data->saved_latency = tr->max_latency; max_data->critical_start = data->critical_start; max_data->critical_end = data->critical_end; @@ -980,14 +1044,14 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) return; } - arch_spin_lock(&ftrace_max_lock); + arch_spin_lock(&tr->max_lock); buf = tr->trace_buffer.buffer; tr->trace_buffer.buffer = tr->max_buffer.buffer; tr->max_buffer.buffer = buf; __update_max_tr(tr, tsk, cpu); - arch_spin_unlock(&ftrace_max_lock); + arch_spin_unlock(&tr->max_lock); } /** @@ -1013,7 +1077,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) return; } - arch_spin_lock(&ftrace_max_lock); + arch_spin_lock(&tr->max_lock); ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu); @@ -1031,17 +1095,17 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY); __update_max_tr(tr, tsk, cpu); - arch_spin_unlock(&ftrace_max_lock); + arch_spin_unlock(&tr->max_lock); } #endif /* CONFIG_TRACER_MAX_TRACE */ -static void default_wait_pipe(struct trace_iterator *iter) +static int wait_on_pipe(struct trace_iterator *iter) { /* Iterators are static, they should be filled or empty */ if (trace_buffer_iter(iter, iter->cpu_file)) - return; + return 0; - ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file); + return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file); } #ifdef CONFIG_FTRACE_STARTUP_TEST @@ -1152,8 +1216,6 @@ int register_tracer(struct tracer *type) else if (!type->flags->opts) type->flags->opts = dummy_tracer_opt; - if (!type->wait_pipe) - type->wait_pipe = default_wait_pipe; ret = run_tracer_selftest(type); if (ret < 0) @@ -1174,7 +1236,7 @@ int register_tracer(struct tracer *type) printk(KERN_INFO "Starting tracer '%s'\n", type->name); /* Do we want this tracer to start on bootup? */ - tracing_set_tracer(type->name); + tracing_set_tracer(&global_trace, type->name); default_bootup_tracer = NULL; /* disable other selftests, since this will break it. */ tracing_selftest_disabled = true; @@ -1237,42 +1299,76 @@ void tracing_reset_all_online_cpus(void) } } -#define SAVED_CMDLINES 128 +#define SAVED_CMDLINES_DEFAULT 128 #define NO_CMDLINE_MAP UINT_MAX -static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1]; -static unsigned map_cmdline_to_pid[SAVED_CMDLINES]; -static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN]; -static int cmdline_idx; static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED; +struct saved_cmdlines_buffer { + unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1]; + unsigned *map_cmdline_to_pid; + unsigned cmdline_num; + int cmdline_idx; + char *saved_cmdlines; +}; +static struct saved_cmdlines_buffer *savedcmd; /* temporary disable recording */ static atomic_t trace_record_cmdline_disabled __read_mostly; -static void trace_init_cmdlines(void) +static inline char *get_saved_cmdlines(int idx) { - memset(&map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(map_pid_to_cmdline)); - memset(&map_cmdline_to_pid, NO_CMDLINE_MAP, sizeof(map_cmdline_to_pid)); - cmdline_idx = 0; + return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN]; } -int is_tracing_stopped(void) +static inline void set_cmdline(int idx, const char *cmdline) { - return global_trace.stop_count; + memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN); } -/** - * ftrace_off_permanent - disable all ftrace code permanently - * - * This should only be called when a serious anomally has - * been detected. This will turn off the function tracing, - * ring buffers, and other tracing utilites. It takes no - * locks and can be called from any context. - */ -void ftrace_off_permanent(void) +static int allocate_cmdlines_buffer(unsigned int val, + struct saved_cmdlines_buffer *s) { - tracing_disabled = 1; - ftrace_stop(); - tracing_off_permanent(); + s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid), + GFP_KERNEL); + if (!s->map_cmdline_to_pid) + return -ENOMEM; + + s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL); + if (!s->saved_cmdlines) { + kfree(s->map_cmdline_to_pid); + return -ENOMEM; + } + + s->cmdline_idx = 0; + s->cmdline_num = val; + memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP, + sizeof(s->map_pid_to_cmdline)); + memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP, + val * sizeof(*s->map_cmdline_to_pid)); + + return 0; +} + +static int trace_create_savedcmd(void) +{ + int ret; + + savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL); + if (!savedcmd) + return -ENOMEM; + + ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd); + if (ret < 0) { + kfree(savedcmd); + savedcmd = NULL; + return -ENOMEM; + } + + return 0; +} + +int is_tracing_stopped(void) +{ + return global_trace.stop_count; } /** @@ -1300,7 +1396,7 @@ void tracing_start(void) } /* Prevent the buffers from switching */ - arch_spin_lock(&ftrace_max_lock); + arch_spin_lock(&global_trace.max_lock); buffer = global_trace.trace_buffer.buffer; if (buffer) @@ -1312,9 +1408,8 @@ void tracing_start(void) ring_buffer_record_enable(buffer); #endif - arch_spin_unlock(&ftrace_max_lock); + arch_spin_unlock(&global_trace.max_lock); - ftrace_start(); out: raw_spin_unlock_irqrestore(&global_trace.start_lock, flags); } @@ -1361,13 +1456,12 @@ void tracing_stop(void) struct ring_buffer *buffer; unsigned long flags; - ftrace_stop(); raw_spin_lock_irqsave(&global_trace.start_lock, flags); if (global_trace.stop_count++) goto out; /* Prevent the buffers from switching */ - arch_spin_lock(&ftrace_max_lock); + arch_spin_lock(&global_trace.max_lock); buffer = global_trace.trace_buffer.buffer; if (buffer) @@ -1379,7 +1473,7 @@ void tracing_stop(void) ring_buffer_record_disable(buffer); #endif - arch_spin_unlock(&ftrace_max_lock); + arch_spin_unlock(&global_trace.max_lock); out: raw_spin_unlock_irqrestore(&global_trace.start_lock, flags); @@ -1408,12 +1502,12 @@ static void tracing_stop_tr(struct trace_array *tr) void trace_stop_cmdline_recording(void); -static void trace_save_cmdline(struct task_struct *tsk) +static int trace_save_cmdline(struct task_struct *tsk) { unsigned pid, idx; if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT)) - return; + return 0; /* * It's not the end of the world if we don't get @@ -1422,11 +1516,11 @@ static void trace_save_cmdline(struct task_struct *tsk) * so if we miss here, then better luck next time. */ if (!arch_spin_trylock(&trace_cmdline_lock)) - return; + return 0; - idx = map_pid_to_cmdline[tsk->pid]; + idx = savedcmd->map_pid_to_cmdline[tsk->pid]; if (idx == NO_CMDLINE_MAP) { - idx = (cmdline_idx + 1) % SAVED_CMDLINES; + idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num; /* * Check whether the cmdline buffer at idx has a pid @@ -1434,22 +1528,24 @@ static void trace_save_cmdline(struct task_struct *tsk) * need to clear the map_pid_to_cmdline. Otherwise we * would read the new comm for the old pid. */ - pid = map_cmdline_to_pid[idx]; + pid = savedcmd->map_cmdline_to_pid[idx]; if (pid != NO_CMDLINE_MAP) - map_pid_to_cmdline[pid] = NO_CMDLINE_MAP; + savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP; - map_cmdline_to_pid[idx] = tsk->pid; - map_pid_to_cmdline[tsk->pid] = idx; + savedcmd->map_cmdline_to_pid[idx] = tsk->pid; + savedcmd->map_pid_to_cmdline[tsk->pid] = idx; - cmdline_idx = idx; + savedcmd->cmdline_idx = idx; } - memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN); + set_cmdline(idx, tsk->comm); arch_spin_unlock(&trace_cmdline_lock); + + return 1; } -void trace_find_cmdline(int pid, char comm[]) +static void __trace_find_cmdline(int pid, char comm[]) { unsigned map; @@ -1468,13 +1564,19 @@ void trace_find_cmdline(int pid, char comm[]) return; } - preempt_disable(); - arch_spin_lock(&trace_cmdline_lock); - map = map_pid_to_cmdline[pid]; + map = savedcmd->map_pid_to_cmdline[pid]; if (map != NO_CMDLINE_MAP) - strcpy(comm, saved_cmdlines[map]); + strcpy(comm, get_saved_cmdlines(map)); else strcpy(comm, "<...>"); +} + +void trace_find_cmdline(int pid, char comm[]) +{ + preempt_disable(); + arch_spin_lock(&trace_cmdline_lock); + + __trace_find_cmdline(pid, comm); arch_spin_unlock(&trace_cmdline_lock); preempt_enable(); @@ -1488,9 +1590,8 @@ void tracing_record_cmdline(struct task_struct *tsk) if (!__this_cpu_read(trace_cmdline_save)) return; - __this_cpu_write(trace_cmdline_save, false); - - trace_save_cmdline(tsk); + if (trace_save_cmdline(tsk)) + __this_cpu_write(trace_cmdline_save, false); } void @@ -1509,7 +1610,8 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, #endif ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | - (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0); + (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) | + (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0); } EXPORT_SYMBOL_GPL(tracing_generic_entry_update); @@ -1558,15 +1660,31 @@ void trace_buffer_unlock_commit(struct ring_buffer *buffer, } EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit); +static struct ring_buffer *temp_buffer; + struct ring_buffer_event * trace_event_buffer_lock_reserve(struct ring_buffer **current_rb, struct ftrace_event_file *ftrace_file, int type, unsigned long len, unsigned long flags, int pc) { + struct ring_buffer_event *entry; + *current_rb = ftrace_file->tr->trace_buffer.buffer; - return trace_buffer_lock_reserve(*current_rb, + entry = trace_buffer_lock_reserve(*current_rb, type, len, flags, pc); + /* + * If tracing is off, but we have triggers enabled + * we still need to look at the event data. Use the temp_buffer + * to store the trace event for the tigger to use. It's recusive + * safe and will not be recorded anywhere. + */ + if (!entry && ftrace_file->flags & FTRACE_EVENT_FL_TRIGGER_COND) { + *current_rb = temp_buffer; + entry = trace_buffer_lock_reserve(*current_rb, + type, len, flags, pc); + } + return entry; } EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve); @@ -1630,7 +1748,7 @@ trace_function(struct trace_array *tr, entry->ip = ip; entry->parent_ip = parent_ip; - if (!filter_check_discard(call, entry, buffer, event)) + if (!call_filter_check_discard(call, entry, buffer, event)) __buffer_unlock_commit(buffer, event); } @@ -1676,7 +1794,7 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer, */ barrier(); if (use_stack == 1) { - trace.entries = &__get_cpu_var(ftrace_stack).calls[0]; + trace.entries = this_cpu_ptr(ftrace_stack.calls); trace.max_entries = FTRACE_STACK_MAX_ENTRIES; if (regs) @@ -1714,7 +1832,7 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer, entry->size = trace.nr_entries; - if (!filter_check_discard(call, entry, buffer, event)) + if (!call_filter_check_discard(call, entry, buffer, event)) __buffer_unlock_commit(buffer, event); out: @@ -1816,7 +1934,7 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) trace.entries = entry->caller; save_stack_trace_user(&trace); - if (!filter_check_discard(call, entry, buffer, event)) + if (!call_filter_check_discard(call, entry, buffer, event)) __buffer_unlock_commit(buffer, event); out_drop_count: @@ -1925,7 +2043,21 @@ void trace_printk_init_buffers(void) if (alloc_percpu_trace_buffer()) return; - pr_info("ftrace: Allocated trace_printk buffers\n"); + /* trace_printk() is for debug use only. Don't use it in production. */ + + pr_warning("\n**********************************************************\n"); + pr_warning("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); + pr_warning("** **\n"); + pr_warning("** trace_printk() being used. Allocating extra memory. **\n"); + pr_warning("** **\n"); + pr_warning("** This means that this is a DEBUG kernel and it is **\n"); + pr_warning("** unsafe for produciton use. **\n"); + pr_warning("** **\n"); + pr_warning("** If you see this message and you are not debugging **\n"); + pr_warning("** the kernel, report this immediately to your vendor! **\n"); + pr_warning("** **\n"); + pr_warning("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); + pr_warning("**********************************************************\n"); /* Expand the buffers to set size */ tracing_update_buffers(); @@ -2008,7 +2140,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) entry->fmt = fmt; memcpy(entry->buf, tbuffer, sizeof(u32) * len); - if (!filter_check_discard(call, entry, buffer, event)) { + if (!call_filter_check_discard(call, entry, buffer, event)) { __buffer_unlock_commit(buffer, event); ftrace_trace_stack(buffer, flags, 6, pc); } @@ -2063,7 +2195,7 @@ __trace_array_vprintk(struct ring_buffer *buffer, memcpy(&entry->buf, tbuffer, len); entry->buf[len] = '\0'; - if (!filter_check_discard(call, entry, buffer, event)) { + if (!call_filter_check_discard(call, entry, buffer, event)) { __buffer_unlock_commit(buffer, event); ftrace_trace_stack(buffer, flags, 6, pc); } @@ -2760,7 +2892,7 @@ static void show_snapshot_main_help(struct seq_file *m) seq_printf(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"); seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"); seq_printf(m, "# Takes a snapshot of the main buffer.\n"); - seq_printf(m, "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate)\n"); + seq_printf(m, "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"); seq_printf(m, "# (Doesn't have to be '2' works with any number that\n"); seq_printf(m, "# is not a '0' or '1')\n"); } @@ -2964,6 +3096,11 @@ int tracing_open_generic(struct inode *inode, struct file *filp) return 0; } +bool tracing_is_disabled(void) +{ + return (tracing_disabled) ? true: false; +} + /* * Open and update trace_array ref count. * Must have the current trace_array passed to it. @@ -3074,27 +3211,52 @@ static int tracing_open(struct inode *inode, struct file *file) return ret; } +/* + * Some tracers are not suitable for instance buffers. + * A tracer is always available for the global array (toplevel) + * or if it explicitly states that it is. + */ +static bool +trace_ok_for_array(struct tracer *t, struct trace_array *tr) +{ + return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances; +} + +/* Find the next tracer that this trace array may use */ +static struct tracer * +get_tracer_for_array(struct trace_array *tr, struct tracer *t) +{ + while (t && !trace_ok_for_array(t, tr)) + t = t->next; + + return t; +} + static void * t_next(struct seq_file *m, void *v, loff_t *pos) { + struct trace_array *tr = m->private; struct tracer *t = v; (*pos)++; if (t) - t = t->next; + t = get_tracer_for_array(tr, t->next); return t; } static void *t_start(struct seq_file *m, loff_t *pos) { + struct trace_array *tr = m->private; struct tracer *t; loff_t l = 0; mutex_lock(&trace_types_lock); - for (t = trace_types; t && l < *pos; t = t_next(m, t, &l)) - ; + + t = get_tracer_for_array(tr, trace_types); + for (; t && l < *pos; t = t_next(m, t, &l)) + ; return t; } @@ -3129,10 +3291,21 @@ static const struct seq_operations show_traces_seq_ops = { static int show_traces_open(struct inode *inode, struct file *file) { + struct trace_array *tr = inode->i_private; + struct seq_file *m; + int ret; + if (tracing_disabled) return -ENODEV; - return seq_open(file, &show_traces_seq_ops); + ret = seq_open(file, &show_traces_seq_ops); + if (ret) + return ret; + + m = file->private_data; + m->private = tr; + + return 0; } static ssize_t @@ -3142,19 +3315,23 @@ tracing_write_stub(struct file *filp, const char __user *ubuf, return count; } -static loff_t tracing_seek(struct file *file, loff_t offset, int origin) +loff_t tracing_lseek(struct file *file, loff_t offset, int whence) { + int ret; + if (file->f_mode & FMODE_READ) - return seq_lseek(file, offset, origin); + ret = seq_lseek(file, offset, whence); else - return 0; + file->f_pos = ret = 0; + + return ret; } static const struct file_operations tracing_fops = { .open = tracing_open, .read = seq_read, .write = tracing_write_stub, - .llseek = tracing_seek, + .llseek = tracing_lseek, .release = tracing_release, }; @@ -3218,7 +3395,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, mutex_lock(&tracing_cpumask_update_lock); local_irq_disable(); - arch_spin_lock(&ftrace_max_lock); + arch_spin_lock(&tr->max_lock); for_each_tracing_cpu(cpu) { /* * Increase/decrease the disabled counter if we are @@ -3235,7 +3412,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu); } } - arch_spin_unlock(&ftrace_max_lock); + arch_spin_unlock(&tr->max_lock); local_irq_enable(); cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new); @@ -3288,13 +3465,14 @@ static int tracing_trace_options_show(struct seq_file *m, void *v) return 0; } -static int __set_tracer_option(struct tracer *trace, +static int __set_tracer_option(struct trace_array *tr, struct tracer_flags *tracer_flags, struct tracer_opt *opts, int neg) { + struct tracer *trace = tr->current_trace; int ret; - ret = trace->set_flag(tracer_flags->val, opts->bit, !neg); + ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg); if (ret) return ret; @@ -3306,8 +3484,9 @@ static int __set_tracer_option(struct tracer *trace, } /* Try to assign a tracer specific option */ -static int set_tracer_option(struct tracer *trace, char *cmp, int neg) +static int set_tracer_option(struct trace_array *tr, char *cmp, int neg) { + struct tracer *trace = tr->current_trace; struct tracer_flags *tracer_flags = trace->flags; struct tracer_opt *opts = NULL; int i; @@ -3316,8 +3495,7 @@ static int set_tracer_option(struct tracer *trace, char *cmp, int neg) opts = &tracer_flags->opts[i]; if (strcmp(cmp, opts->name) == 0) - return __set_tracer_option(trace, trace->flags, - opts, neg); + return __set_tracer_option(tr, trace->flags, opts, neg); } return -EINVAL; @@ -3340,7 +3518,7 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled) /* Give the tracer a chance to approve the change */ if (tr->current_trace->flag_changed) - if (tr->current_trace->flag_changed(tr->current_trace, mask, !!enabled)) + if (tr->current_trace->flag_changed(tr, mask, !!enabled)) return -EINVAL; if (enabled) @@ -3389,7 +3567,7 @@ static int trace_set_options(struct trace_array *tr, char *option) /* If no option could be set, test the specific tracer options */ if (!trace_options[i]) - ret = set_tracer_option(tr->current_trace, cmp, neg); + ret = set_tracer_option(tr, cmp, neg); mutex_unlock(&trace_types_lock); @@ -3474,60 +3652,106 @@ static const char readme_msg[] = " instances\t\t- Make sub-buffers with: mkdir instances/foo\n" "\t\t\t Remove sub-buffer with rmdir\n" " trace_options\t\t- Set format or modify how tracing happens\n" - "\t\t\t Disable an option by adding a suffix 'no' to the option name\n" + "\t\t\t Disable an option by adding a suffix 'no' to the\n" + "\t\t\t option name\n" + " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n" #ifdef CONFIG_DYNAMIC_FTRACE "\n available_filter_functions - list of functions that can be filtered on\n" - " set_ftrace_filter\t- echo function name in here to only trace these functions\n" - " accepts: func_full_name, *func_end, func_begin*, *func_middle*\n" - " modules: Can select a group via module\n" - " Format: :mod:<module-name>\n" - " example: echo :mod:ext3 > set_ftrace_filter\n" - " triggers: a command to perform when function is hit\n" - " Format: <function>:<trigger>[:count]\n" - " trigger: traceon, traceoff\n" - " enable_event:<system>:<event>\n" - " disable_event:<system>:<event>\n" + " set_ftrace_filter\t- echo function name in here to only trace these\n" + "\t\t\t functions\n" + "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n" + "\t modules: Can select a group via module\n" + "\t Format: :mod:<module-name>\n" + "\t example: echo :mod:ext3 > set_ftrace_filter\n" + "\t triggers: a command to perform when function is hit\n" + "\t Format: <function>:<trigger>[:count]\n" + "\t trigger: traceon, traceoff\n" + "\t\t enable_event:<system>:<event>\n" + "\t\t disable_event:<system>:<event>\n" #ifdef CONFIG_STACKTRACE - " stacktrace\n" + "\t\t stacktrace\n" #endif #ifdef CONFIG_TRACER_SNAPSHOT - " snapshot\n" + "\t\t snapshot\n" #endif - " example: echo do_fault:traceoff > set_ftrace_filter\n" - " echo do_trap:traceoff:3 > set_ftrace_filter\n" - " The first one will disable tracing every time do_fault is hit\n" - " The second will disable tracing at most 3 times when do_trap is hit\n" - " The first time do trap is hit and it disables tracing, the counter\n" - " will decrement to 2. If tracing is already disabled, the counter\n" - " will not decrement. It only decrements when the trigger did work\n" - " To remove trigger without count:\n" - " echo '!<function>:<trigger> > set_ftrace_filter\n" - " To remove trigger with a count:\n" - " echo '!<function>:<trigger>:0 > set_ftrace_filter\n" + "\t\t dump\n" + "\t\t cpudump\n" + "\t example: echo do_fault:traceoff > set_ftrace_filter\n" + "\t echo do_trap:traceoff:3 > set_ftrace_filter\n" + "\t The first one will disable tracing every time do_fault is hit\n" + "\t The second will disable tracing at most 3 times when do_trap is hit\n" + "\t The first time do trap is hit and it disables tracing, the\n" + "\t counter will decrement to 2. If tracing is already disabled,\n" + "\t the counter will not decrement. It only decrements when the\n" + "\t trigger did work\n" + "\t To remove trigger without count:\n" + "\t echo '!<function>:<trigger> > set_ftrace_filter\n" + "\t To remove trigger with a count:\n" + "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n" " set_ftrace_notrace\t- echo function name in here to never trace.\n" - " accepts: func_full_name, *func_end, func_begin*, *func_middle*\n" - " modules: Can select a group via module command :mod:\n" - " Does not accept triggers\n" + "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n" + "\t modules: Can select a group via module command :mod:\n" + "\t Does not accept triggers\n" #endif /* CONFIG_DYNAMIC_FTRACE */ #ifdef CONFIG_FUNCTION_TRACER - " set_ftrace_pid\t- Write pid(s) to only function trace those pids (function)\n" + " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n" + "\t\t (function)\n" #endif #ifdef CONFIG_FUNCTION_GRAPH_TRACER " set_graph_function\t- Trace the nested calls of a function (function_graph)\n" " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n" #endif #ifdef CONFIG_TRACER_SNAPSHOT - "\n snapshot\t\t- Like 'trace' but shows the content of the static snapshot buffer\n" - "\t\t\t Read the contents for more information\n" + "\n snapshot\t\t- Like 'trace' but shows the content of the static\n" + "\t\t\t snapshot buffer. Read the contents for more\n" + "\t\t\t information\n" #endif #ifdef CONFIG_STACK_TRACER " stack_trace\t\t- Shows the max stack trace when active\n" " stack_max_size\t- Shows current max stack size that was traced\n" - "\t\t\t Write into this file to reset the max size (trigger a new trace)\n" + "\t\t\t Write into this file to reset the max size (trigger a\n" + "\t\t\t new trace)\n" #ifdef CONFIG_DYNAMIC_FTRACE - " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace traces\n" + " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n" + "\t\t\t traces\n" #endif #endif /* CONFIG_STACK_TRACER */ + " events/\t\t- Directory containing all trace event subsystems:\n" + " enable\t\t- Write 0/1 to enable/disable tracing of all events\n" + " events/<system>/\t- Directory containing all trace events for <system>:\n" + " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n" + "\t\t\t events\n" + " filter\t\t- If set, only events passing filter are traced\n" + " events/<system>/<event>/\t- Directory containing control files for\n" + "\t\t\t <event>:\n" + " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n" + " filter\t\t- If set, only events passing filter are traced\n" + " trigger\t\t- If set, a command to perform when event is hit\n" + "\t Format: <trigger>[:count][if <filter>]\n" + "\t trigger: traceon, traceoff\n" + "\t enable_event:<system>:<event>\n" + "\t disable_event:<system>:<event>\n" +#ifdef CONFIG_STACKTRACE + "\t\t stacktrace\n" +#endif +#ifdef CONFIG_TRACER_SNAPSHOT + "\t\t snapshot\n" +#endif + "\t example: echo traceoff > events/block/block_unplug/trigger\n" + "\t echo traceoff:3 > events/block/block_unplug/trigger\n" + "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n" + "\t events/block/block_unplug/trigger\n" + "\t The first disables tracing every time block_unplug is hit.\n" + "\t The second disables tracing the first 3 times block_unplug is hit.\n" + "\t The third enables the kmalloc event the first 3 times block_unplug\n" + "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n" + "\t Like function triggers, the counter is only decremented if it\n" + "\t enabled or disabled tracing.\n" + "\t To remove a trigger without a count:\n" + "\t echo '!<trigger> > <system>/<event>/trigger\n" + "\t To remove a trigger with a count:\n" + "\t echo '!<trigger>:0 > <system>/<event>/trigger\n" + "\t Filters can be ignored when removing a trigger.\n" ; static ssize_t @@ -3544,55 +3768,153 @@ static const struct file_operations tracing_readme_fops = { .llseek = generic_file_llseek, }; +static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos) +{ + unsigned int *ptr = v; + + if (*pos || m->count) + ptr++; + + (*pos)++; + + for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num]; + ptr++) { + if (*ptr == -1 || *ptr == NO_CMDLINE_MAP) + continue; + + return ptr; + } + + return NULL; +} + +static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos) +{ + void *v; + loff_t l = 0; + + preempt_disable(); + arch_spin_lock(&trace_cmdline_lock); + + v = &savedcmd->map_cmdline_to_pid[0]; + while (l <= *pos) { + v = saved_cmdlines_next(m, v, &l); + if (!v) + return NULL; + } + + return v; +} + +static void saved_cmdlines_stop(struct seq_file *m, void *v) +{ + arch_spin_unlock(&trace_cmdline_lock); + preempt_enable(); +} + +static int saved_cmdlines_show(struct seq_file *m, void *v) +{ + char buf[TASK_COMM_LEN]; + unsigned int *pid = v; + + __trace_find_cmdline(*pid, buf); + seq_printf(m, "%d %s\n", *pid, buf); + return 0; +} + +static const struct seq_operations tracing_saved_cmdlines_seq_ops = { + .start = saved_cmdlines_start, + .next = saved_cmdlines_next, + .stop = saved_cmdlines_stop, + .show = saved_cmdlines_show, +}; + +static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp) +{ + if (tracing_disabled) + return -ENODEV; + + return seq_open(filp, &tracing_saved_cmdlines_seq_ops); +} + +static const struct file_operations tracing_saved_cmdlines_fops = { + .open = tracing_saved_cmdlines_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + static ssize_t -tracing_saved_cmdlines_read(struct file *file, char __user *ubuf, - size_t cnt, loff_t *ppos) +tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) { - char *buf_comm; - char *file_buf; - char *buf; - int len = 0; - int pid; - int i; + char buf[64]; + int r; + + arch_spin_lock(&trace_cmdline_lock); + r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num); + arch_spin_unlock(&trace_cmdline_lock); - file_buf = kmalloc(SAVED_CMDLINES*(16+TASK_COMM_LEN), GFP_KERNEL); - if (!file_buf) + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); +} + +static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s) +{ + kfree(s->saved_cmdlines); + kfree(s->map_cmdline_to_pid); + kfree(s); +} + +static int tracing_resize_saved_cmdlines(unsigned int val) +{ + struct saved_cmdlines_buffer *s, *savedcmd_temp; + + s = kmalloc(sizeof(*s), GFP_KERNEL); + if (!s) return -ENOMEM; - buf_comm = kmalloc(TASK_COMM_LEN, GFP_KERNEL); - if (!buf_comm) { - kfree(file_buf); + if (allocate_cmdlines_buffer(val, s) < 0) { + kfree(s); return -ENOMEM; } - buf = file_buf; + arch_spin_lock(&trace_cmdline_lock); + savedcmd_temp = savedcmd; + savedcmd = s; + arch_spin_unlock(&trace_cmdline_lock); + free_saved_cmdlines_buffer(savedcmd_temp); + + return 0; +} - for (i = 0; i < SAVED_CMDLINES; i++) { - int r; +static ssize_t +tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + unsigned long val; + int ret; - pid = map_cmdline_to_pid[i]; - if (pid == -1 || pid == NO_CMDLINE_MAP) - continue; + ret = kstrtoul_from_user(ubuf, cnt, 10, &val); + if (ret) + return ret; - trace_find_cmdline(pid, buf_comm); - r = sprintf(buf, "%d %s\n", pid, buf_comm); - buf += r; - len += r; - } + /* must have at least 1 entry or less than PID_MAX_DEFAULT */ + if (!val || val > PID_MAX_DEFAULT) + return -EINVAL; - len = simple_read_from_buffer(ubuf, cnt, ppos, - file_buf, len); + ret = tracing_resize_saved_cmdlines((unsigned int)val); + if (ret < 0) + return ret; - kfree(file_buf); - kfree(buf_comm); + *ppos += cnt; - return len; + return cnt; } -static const struct file_operations tracing_saved_cmdlines_fops = { - .open = tracing_open_generic, - .read = tracing_saved_cmdlines_read, - .llseek = generic_file_llseek, +static const struct file_operations tracing_saved_cmdlines_size_fops = { + .open = tracing_open_generic, + .read = tracing_saved_cmdlines_size_read, + .write = tracing_saved_cmdlines_size_write, }; static ssize_t @@ -3775,10 +4097,26 @@ create_trace_option_files(struct trace_array *tr, struct tracer *tracer); static void destroy_trace_option_files(struct trace_option_dentry *topts); -static int tracing_set_tracer(const char *buf) +/* + * Used to clear out the tracer before deletion of an instance. + * Must have trace_types_lock held. + */ +static void tracing_set_nop(struct trace_array *tr) +{ + if (tr->current_trace == &nop_trace) + return; + + tr->current_trace->enabled--; + + if (tr->current_trace->reset) + tr->current_trace->reset(tr); + + tr->current_trace = &nop_trace; +} + +static int tracing_set_tracer(struct trace_array *tr, const char *buf) { static struct trace_option_dentry *topts; - struct trace_array *tr = &global_trace; struct tracer *t; #ifdef CONFIG_TRACER_MAX_TRACE bool had_max_tr; @@ -3806,9 +4144,15 @@ static int tracing_set_tracer(const char *buf) if (t == tr->current_trace) goto out; + /* Some tracers are only allowed for the top level buffer */ + if (!trace_ok_for_array(t, tr)) { + ret = -EINVAL; + goto out; + } + trace_branch_disable(); - tr->current_trace->enabled = false; + tr->current_trace->enabled--; if (tr->current_trace->reset) tr->current_trace->reset(tr); @@ -3831,9 +4175,11 @@ static int tracing_set_tracer(const char *buf) free_snapshot(tr); } #endif - destroy_trace_option_files(topts); - - topts = create_trace_option_files(tr, t); + /* Currently, only the top instance has options */ + if (tr->flags & TRACE_ARRAY_FL_GLOBAL) { + destroy_trace_option_files(topts); + topts = create_trace_option_files(tr, t); + } #ifdef CONFIG_TRACER_MAX_TRACE if (t->use_max_tr && !had_max_tr) { @@ -3850,7 +4196,7 @@ static int tracing_set_tracer(const char *buf) } tr->current_trace = t; - tr->current_trace->enabled = true; + tr->current_trace->enabled++; trace_branch_enable(tr); out: mutex_unlock(&trace_types_lock); @@ -3862,6 +4208,7 @@ static ssize_t tracing_set_trace_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { + struct trace_array *tr = filp->private_data; char buf[MAX_TRACER_SIZE+1]; int i; size_t ret; @@ -3881,7 +4228,7 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf, for (i = cnt - 1; i > 0 && isspace(buf[i]); i--) buf[i] = 0; - err = tracing_set_tracer(buf); + err = tracing_set_tracer(tr, buf); if (err) return err; @@ -4039,29 +4386,11 @@ tracing_poll_pipe(struct file *filp, poll_table *poll_table) return trace_poll(iter, filp, poll_table); } -/* - * This is a make-shift waitqueue. - * A tracer might use this callback on some rare cases: - * - * 1) the current tracer might hold the runqueue lock when it wakes up - * a reader, hence a deadlock (sched, function, and function graph tracers) - * 2) the function tracers, trace all functions, we don't want - * the overhead of calling wake_up and friends - * (and tracing them too) - * - * Anyway, this is really very primitive wakeup. - */ -void poll_wait_pipe(struct trace_iterator *iter) -{ - set_current_state(TASK_INTERRUPTIBLE); - /* sleep for 100 msecs, and try again. */ - schedule_timeout(HZ / 10); -} - /* Must be called with trace_types_lock mutex held. */ static int tracing_wait_pipe(struct file *filp) { struct trace_iterator *iter = filp->private_data; + int ret; while (trace_empty(iter)) { @@ -4069,15 +4398,6 @@ static int tracing_wait_pipe(struct file *filp) return -EAGAIN; } - mutex_unlock(&iter->mutex); - - iter->trace->wait_pipe(iter); - - mutex_lock(&iter->mutex); - - if (signal_pending(current)) - return -EINTR; - /* * We block until we read something and tracing is disabled. * We still block if tracing is disabled, but we have never @@ -4089,6 +4409,18 @@ static int tracing_wait_pipe(struct file *filp) */ if (!tracing_is_on() && iter->pos) break; + + mutex_unlock(&iter->mutex); + + ret = wait_on_pipe(iter); + + mutex_lock(&iter->mutex); + + if (ret) + return ret; + + if (signal_pending(current)) + return -EINTR; } return 1; @@ -4198,12 +4530,6 @@ out: return sret; } -static void tracing_pipe_buf_release(struct pipe_inode_info *pipe, - struct pipe_buffer *buf) -{ - __free_page(buf->page); -} - static void tracing_spd_release_pipe(struct splice_pipe_desc *spd, unsigned int idx) { @@ -4212,10 +4538,8 @@ static void tracing_spd_release_pipe(struct splice_pipe_desc *spd, static const struct pipe_buf_operations tracing_pipe_buf_ops = { .can_merge = 0, - .map = generic_pipe_buf_map, - .unmap = generic_pipe_buf_unmap, .confirm = generic_pipe_buf_confirm, - .release = tracing_pipe_buf_release, + .release = generic_pipe_buf_release, .steal = generic_pipe_buf_steal, .get = generic_pipe_buf_get, }; @@ -4308,7 +4632,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, trace_access_lock(iter->cpu_file); /* Fill as many pages as possible. */ - for (i = 0, rem = len; i < pipe->buffers && rem; i++) { + for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) { spd.pages[i] = alloc_page(GFP_KERNEL); if (!spd.pages[i]) break; @@ -4595,25 +4919,10 @@ static int tracing_clock_show(struct seq_file *m, void *v) return 0; } -static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, - size_t cnt, loff_t *fpos) +static int tracing_set_clock(struct trace_array *tr, const char *clockstr) { - struct seq_file *m = filp->private_data; - struct trace_array *tr = m->private; - char buf[64]; - const char *clockstr; int i; - if (cnt >= sizeof(buf)) - return -EINVAL; - - if (copy_from_user(&buf, ubuf, cnt)) - return -EFAULT; - - buf[cnt] = 0; - - clockstr = strstrip(buf); - for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) { if (strcmp(trace_clocks[i].name, clockstr) == 0) break; @@ -4641,6 +4950,32 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, mutex_unlock(&trace_types_lock); + return 0; +} + +static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *fpos) +{ + struct seq_file *m = filp->private_data; + struct trace_array *tr = m->private; + char buf[64]; + const char *clockstr; + int ret; + + if (cnt >= sizeof(buf)) + return -EINVAL; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + + clockstr = strstrip(buf); + + ret = tracing_set_clock(tr, clockstr); + if (ret) + return ret; + *fpos += cnt; return cnt; @@ -4899,7 +5234,7 @@ static const struct file_operations snapshot_fops = { .open = tracing_snapshot_open, .read = seq_read, .write = tracing_snapshot_write, - .llseek = tracing_seek, + .llseek = tracing_lseek, .release = tracing_snapshot_release, }; @@ -5008,8 +5343,12 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, goto out_unlock; } mutex_unlock(&trace_types_lock); - iter->trace->wait_pipe(iter); + ret = wait_on_pipe(iter); mutex_lock(&trace_types_lock); + if (ret) { + size = ret; + goto out_unlock; + } if (signal_pending(current)) { size = -EINTR; goto out_unlock; @@ -5090,8 +5429,6 @@ static void buffer_pipe_buf_get(struct pipe_inode_info *pipe, /* Pipe buffer operations for a buffer. */ static const struct pipe_buf_operations buffer_pipe_buf_ops = { .can_merge = 0, - .map = generic_pipe_buf_map, - .unmap = generic_pipe_buf_unmap, .confirm = generic_pipe_buf_confirm, .release = buffer_pipe_buf_release, .steal = generic_pipe_buf_steal, @@ -5167,7 +5504,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, trace_access_lock(iter->cpu_file); entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file); - for (i = 0; i < pipe->buffers && len && entries; i++, len -= PAGE_SIZE) { + for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) { struct page *page; int r; @@ -5221,8 +5558,10 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, goto out; } mutex_unlock(&trace_types_lock); - iter->trace->wait_pipe(iter); + ret = wait_on_pipe(iter); mutex_lock(&trace_types_lock); + if (ret) + goto out; if (signal_pending(current)) { ret = -EINTR; goto out; @@ -5454,12 +5793,12 @@ static struct ftrace_func_command ftrace_snapshot_cmd = { .func = ftrace_trace_snapshot_callback, }; -static int register_snapshot_cmd(void) +static __init int register_snapshot_cmd(void) { return register_ftrace_command(&ftrace_snapshot_cmd); } #else -static inline int register_snapshot_cmd(void) { return 0; } +static inline __init int register_snapshot_cmd(void) { return 0; } #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */ struct dentry *tracing_init_dentry_tr(struct trace_array *tr) @@ -5601,7 +5940,7 @@ trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt, if (!!(topt->flags->val & topt->opt->bit) != val) { mutex_lock(&trace_types_lock); - ret = __set_tracer_option(topt->tr->current_trace, topt->flags, + ret = __set_tracer_option(topt->tr, topt->flags, topt->opt, !val); mutex_unlock(&trace_types_lock); if (ret) @@ -5869,6 +6208,8 @@ allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0; + buf->tr = tr; + buf->buffer = ring_buffer_alloc(size, rb_flags); if (!buf->buffer) return -ENOMEM; @@ -5913,6 +6254,28 @@ static int allocate_trace_buffers(struct trace_array *tr, int size) return 0; } +static void free_trace_buffer(struct trace_buffer *buf) +{ + if (buf->buffer) { + ring_buffer_free(buf->buffer); + buf->buffer = NULL; + free_percpu(buf->data); + buf->data = NULL; + } +} + +static void free_trace_buffers(struct trace_array *tr) +{ + if (!tr) + return; + + free_trace_buffer(&tr->trace_buffer); + +#ifdef CONFIG_TRACER_MAX_TRACE + free_trace_buffer(&tr->max_buffer); +#endif +} + static int new_instance_create(const char *name) { struct trace_array *tr; @@ -5942,6 +6305,8 @@ static int new_instance_create(const char *name) raw_spin_lock_init(&tr->start_lock); + tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; + tr->current_trace = &nop_trace; INIT_LIST_HEAD(&tr->systems); @@ -5969,8 +6334,7 @@ static int new_instance_create(const char *name) return 0; out_free_tr: - if (tr->trace_buffer.buffer) - ring_buffer_free(tr->trace_buffer.buffer); + free_trace_buffers(tr); free_cpumask_var(tr->tracing_cpumask); kfree(tr->name); kfree(tr); @@ -6006,10 +6370,11 @@ static int instance_delete(const char *name) list_del(&tr->list); + tracing_set_nop(tr); event_trace_del_tracer(tr); + ftrace_destroy_function_files(tr); debugfs_remove_recursive(tr->dir); - free_percpu(tr->trace_buffer.data); - ring_buffer_free(tr->trace_buffer.buffer); + free_trace_buffers(tr); kfree(tr->name); kfree(tr); @@ -6101,6 +6466,12 @@ init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer) { int cpu; + trace_create_file("available_tracers", 0444, d_tracer, + tr, &show_traces_fops); + + trace_create_file("current_tracer", 0644, d_tracer, + tr, &set_tracer_fops); + trace_create_file("tracing_cpumask", 0644, d_tracer, tr, &tracing_cpumask_fops); @@ -6131,6 +6502,14 @@ init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer) trace_create_file("tracing_on", 0644, d_tracer, tr, &rb_simple_fops); +#ifdef CONFIG_TRACER_MAX_TRACE + trace_create_file("tracing_max_latency", 0644, d_tracer, + &tr->max_latency, &tracing_max_lat_fops); +#endif + + if (ftrace_create_function_files(tr, d_tracer)) + WARN(1, "Could not allocate function filter files"); + #ifdef CONFIG_TRACER_SNAPSHOT trace_create_file("snapshot", 0644, d_tracer, tr, &snapshot_fops); @@ -6153,17 +6532,6 @@ static __init int tracer_init_debugfs(void) init_tracer_debugfs(&global_trace, d_tracer); - trace_create_file("available_tracers", 0444, d_tracer, - &global_trace, &show_traces_fops); - - trace_create_file("current_tracer", 0644, d_tracer, - &global_trace, &set_tracer_fops); - -#ifdef CONFIG_TRACER_MAX_TRACE - trace_create_file("tracing_max_latency", 0644, d_tracer, - &tracing_max_latency, &tracing_max_lat_fops); -#endif - trace_create_file("tracing_thresh", 0644, d_tracer, &tracing_thresh, &tracing_max_lat_fops); @@ -6173,6 +6541,9 @@ static __init int tracer_init_debugfs(void) trace_create_file("saved_cmdlines", 0444, d_tracer, NULL, &tracing_saved_cmdlines_fops); + trace_create_file("saved_cmdlines_size", 0644, d_tracer, + NULL, &tracing_saved_cmdlines_size_fops); + #ifdef CONFIG_DYNAMIC_FTRACE trace_create_file("dyn_ftrace_total_info", 0444, d_tracer, &ftrace_update_tot_cnt, &tracing_dyn_info_fops); @@ -6253,6 +6624,17 @@ void trace_init_global_iter(struct trace_iterator *iter) iter->trace = iter->tr->current_trace; iter->cpu_file = RING_BUFFER_ALL_CPUS; iter->trace_buffer = &global_trace.trace_buffer; + + if (iter->trace && iter->trace->open) + iter->trace->open(iter); + + /* Annotate start of buffers if we had overruns */ + if (ring_buffer_overruns(iter->trace_buffer->buffer)) + iter->iter_flags |= TRACE_FILE_ANNOTATE; + + /* Output in nanoseconds only if we are using a clock in nanoseconds. */ + if (trace_clocks[iter->tr->clock_id].in_ns) + iter->iter_flags |= TRACE_FILE_TIME_IN_NS; } void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) @@ -6393,17 +6775,30 @@ __init static int tracer_alloc_buffers(void) raw_spin_lock_init(&global_trace.start_lock); + /* Used for event triggers */ + temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE); + if (!temp_buffer) + goto out_free_cpumask; + + if (trace_create_savedcmd() < 0) + goto out_free_temp_buffer; + /* TODO: make the number of buffers hot pluggable with CPUS */ if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) { printk(KERN_ERR "tracer: failed to allocate ring buffer!\n"); WARN_ON(1); - goto out_free_cpumask; + goto out_free_savedcmd; } if (global_trace.buffer_disabled) tracing_off(); - trace_init_cmdlines(); + if (trace_boot_clock) { + ret = tracing_set_clock(&global_trace, trace_boot_clock); + if (ret < 0) + pr_warning("Trace clock %s not defined, going back to default\n", + trace_boot_clock); + } /* * register_tracer() might reference current_trace, so it @@ -6412,6 +6807,10 @@ __init static int tracer_alloc_buffers(void) */ global_trace.current_trace = &nop_trace; + global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; + + ftrace_init_global_array_ops(&global_trace); + register_tracer(&nop_trace); /* All seems OK, enable tracing */ @@ -6439,11 +6838,11 @@ __init static int tracer_alloc_buffers(void) return 0; +out_free_savedcmd: + free_saved_cmdlines_buffer(savedcmd); +out_free_temp_buffer: + ring_buffer_free(temp_buffer); out_free_cpumask: - free_percpu(global_trace.trace_buffer.data); -#ifdef CONFIG_TRACER_MAX_TRACE - free_percpu(global_trace.max_buffer.data); -#endif free_cpumask_var(global_trace.tracing_cpumask); out_free_buffer_mask: free_cpumask_var(tracing_buffer_mask); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 10c86fb7a2b..9258f5a815d 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1,3 +1,4 @@ + #ifndef _LINUX_KERNEL_TRACE_H #define _LINUX_KERNEL_TRACE_H @@ -12,6 +13,7 @@ #include <linux/hw_breakpoint.h> #include <linux/trace_seq.h> #include <linux/ftrace_event.h> +#include <linux/compiler.h> #ifdef CONFIG_FTRACE_SYSCALLS #include <asm/unistd.h> /* For NR_SYSCALLS */ @@ -124,6 +126,7 @@ enum trace_flag_type { TRACE_FLAG_NEED_RESCHED = 0x04, TRACE_FLAG_HARDIRQ = 0x08, TRACE_FLAG_SOFTIRQ = 0x10, + TRACE_FLAG_PREEMPT_RESCHED = 0x20, }; #define TRACE_BUF_SIZE 1024 @@ -187,13 +190,28 @@ struct trace_array { */ struct trace_buffer max_buffer; bool allocated_snapshot; + unsigned long max_latency; #endif + /* + * max_lock is used to protect the swapping of buffers + * when taking a max snapshot. The buffers themselves are + * protected by per_cpu spinlocks. But the action of the swap + * needs its own lock. + * + * This is defined as a arch_spinlock_t in order to help + * with performance when lockdep debugging is enabled. + * + * It is also used in other places outside the update_max_tr + * so it needs to be defined outside of the + * CONFIG_TRACER_MAX_TRACE. + */ + arch_spinlock_t max_lock; int buffer_disabled; #ifdef CONFIG_FTRACE_SYSCALLS int sys_refcount_enter; int sys_refcount_exit; - DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls); - DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls); + struct ftrace_event_file __rcu *enter_syscall_files[NR_syscalls]; + struct ftrace_event_file __rcu *exit_syscall_files[NR_syscalls]; #endif int stop_count; int clock_id; @@ -208,6 +226,11 @@ struct trace_array { struct list_head events; cpumask_var_t tracing_cpumask; /* only trace on set CPUs */ int ref; +#ifdef CONFIG_FUNCTION_TRACER + struct ftrace_ops *ops; + /* function tracing enabled */ + int function_enabled; +#endif }; enum { @@ -229,6 +252,9 @@ static inline struct trace_array *top_trace_array(void) { struct trace_array *tr; + if (list_empty(&ftrace_trace_arrays)) + return NULL; + tr = list_entry(ftrace_trace_arrays.prev, typeof(*tr), list); WARN_ON(!(tr->flags & TRACE_ARRAY_FL_GLOBAL)); @@ -315,7 +341,6 @@ struct tracer_flags { * @stop: called when tracing is paused (echo 0 > tracing_enabled) * @open: called when the trace file is opened * @pipe_open: called when the trace_pipe file is opened - * @wait_pipe: override how the user waits for traces on trace_pipe * @close: called when the trace file is released * @pipe_close: called when the trace_pipe file is released * @read: override the default read callback on trace_pipe @@ -334,7 +359,6 @@ struct tracer { void (*stop)(struct trace_array *tr); void (*open)(struct trace_iterator *iter); void (*pipe_open)(struct trace_iterator *iter); - void (*wait_pipe)(struct trace_iterator *iter); void (*close)(struct trace_iterator *iter); void (*pipe_close)(struct trace_iterator *iter); ssize_t (*read)(struct trace_iterator *iter, @@ -353,14 +377,16 @@ struct tracer { void (*print_header)(struct seq_file *m); enum print_line_t (*print_line)(struct trace_iterator *iter); /* If you handled the flag setting, return 0 */ - int (*set_flag)(u32 old_flags, u32 bit, int set); + int (*set_flag)(struct trace_array *tr, + u32 old_flags, u32 bit, int set); /* Return 0 if OK with change, else return non-zero */ - int (*flag_changed)(struct tracer *tracer, + int (*flag_changed)(struct trace_array *tr, u32 mask, int set); struct tracer *next; struct tracer_flags *flags; + int enabled; bool print_max; - bool enabled; + bool allow_instances; #ifdef CONFIG_TRACER_MAX_TRACE bool use_max_tr; #endif @@ -406,13 +432,7 @@ enum { TRACE_FTRACE_IRQ_BIT, TRACE_FTRACE_SIRQ_BIT, - /* GLOBAL_BITs must be greater than FTRACE_BITs */ - TRACE_GLOBAL_BIT, - TRACE_GLOBAL_NMI_BIT, - TRACE_GLOBAL_IRQ_BIT, - TRACE_GLOBAL_SIRQ_BIT, - - /* INTERNAL_BITs must be greater than GLOBAL_BITs */ + /* INTERNAL_BITs must be greater than FTRACE_BITs */ TRACE_INTERNAL_BIT, TRACE_INTERNAL_NMI_BIT, TRACE_INTERNAL_IRQ_BIT, @@ -439,9 +459,6 @@ enum { #define TRACE_FTRACE_START TRACE_FTRACE_BIT #define TRACE_FTRACE_MAX ((1 << (TRACE_FTRACE_START + TRACE_CONTEXT_BITS)) - 1) -#define TRACE_GLOBAL_START TRACE_GLOBAL_BIT -#define TRACE_GLOBAL_MAX ((1 << (TRACE_GLOBAL_START + TRACE_CONTEXT_BITS)) - 1) - #define TRACE_LIST_START TRACE_INTERNAL_BIT #define TRACE_LIST_MAX ((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1) @@ -514,6 +531,7 @@ void tracing_reset_online_cpus(struct trace_buffer *buf); void tracing_reset_current(int cpu); void tracing_reset_all_online_cpus(void); int tracing_open_generic(struct inode *inode, struct file *filp); +bool tracing_is_disabled(void); struct dentry *trace_create_file(const char *name, umode_t mode, struct dentry *parent, @@ -549,8 +567,6 @@ void trace_init_global_iter(struct trace_iterator *iter); void tracing_iter_reset(struct trace_iterator *iter, int cpu); -void poll_wait_pipe(struct trace_iterator *iter); - void tracing_sched_switch_trace(struct trace_array *tr, struct task_struct *prev, struct task_struct *next, @@ -585,6 +601,8 @@ void tracing_start_sched_switch_record(void); int register_tracer(struct tracer *type); int is_tracing_stopped(void); +loff_t tracing_lseek(struct file *file, loff_t offset, int whence); + extern cpumask_var_t __read_mostly tracing_buffer_mask; #define for_each_tracing_cpu(cpu) \ @@ -595,8 +613,6 @@ extern unsigned long nsecs_to_usecs(unsigned long nsecs); extern unsigned long tracing_thresh; #ifdef CONFIG_TRACER_MAX_TRACE -extern unsigned long tracing_max_latency; - void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu); void update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu); @@ -711,6 +727,10 @@ extern unsigned long trace_flags; #define TRACE_GRAPH_PRINT_PROC 0x8 #define TRACE_GRAPH_PRINT_DURATION 0x10 #define TRACE_GRAPH_PRINT_ABS_TIME 0x20 +#define TRACE_GRAPH_PRINT_IRQS 0x40 +#define TRACE_GRAPH_PRINT_TAIL 0x80 +#define TRACE_GRAPH_PRINT_FILL_SHIFT 28 +#define TRACE_GRAPH_PRINT_FILL_MASK (0x3 << TRACE_GRAPH_PRINT_FILL_SHIFT) extern enum print_line_t print_graph_function_flags(struct trace_iterator *iter, u32 flags); @@ -730,15 +750,16 @@ extern void __trace_graph_return(struct trace_array *tr, #ifdef CONFIG_DYNAMIC_FTRACE /* TODO: make this variable */ #define FTRACE_GRAPH_MAX_FUNCS 32 -extern int ftrace_graph_filter_enabled; extern int ftrace_graph_count; extern unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS]; +extern int ftrace_graph_notrace_count; +extern unsigned long ftrace_graph_notrace_funcs[FTRACE_GRAPH_MAX_FUNCS]; static inline int ftrace_graph_addr(unsigned long addr) { int i; - if (!ftrace_graph_filter_enabled) + if (!ftrace_graph_count) return 1; for (i = 0; i < ftrace_graph_count; i++) { @@ -758,11 +779,31 @@ static inline int ftrace_graph_addr(unsigned long addr) return 0; } + +static inline int ftrace_graph_notrace_addr(unsigned long addr) +{ + int i; + + if (!ftrace_graph_notrace_count) + return 0; + + for (i = 0; i < ftrace_graph_notrace_count; i++) { + if (addr == ftrace_graph_notrace_funcs[i]) + return 1; + } + + return 0; +} #else static inline int ftrace_graph_addr(unsigned long addr) { return 1; } + +static inline int ftrace_graph_notrace_addr(unsigned long addr) +{ + return 0; +} #endif /* CONFIG_DYNAMIC_FTRACE */ #else /* CONFIG_FUNCTION_GRAPH_TRACER */ static inline enum print_line_t @@ -784,13 +825,45 @@ static inline int ftrace_trace_task(struct task_struct *task) return test_tsk_trace_trace(task); } extern int ftrace_is_dead(void); +int ftrace_create_function_files(struct trace_array *tr, + struct dentry *parent); +void ftrace_destroy_function_files(struct trace_array *tr); +void ftrace_init_global_array_ops(struct trace_array *tr); +void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func); +void ftrace_reset_array_ops(struct trace_array *tr); +int using_ftrace_ops_list_func(void); #else static inline int ftrace_trace_task(struct task_struct *task) { return 1; } static inline int ftrace_is_dead(void) { return 0; } -#endif +static inline int +ftrace_create_function_files(struct trace_array *tr, + struct dentry *parent) +{ + return 0; +} +static inline void ftrace_destroy_function_files(struct trace_array *tr) { } +static inline __init void +ftrace_init_global_array_ops(struct trace_array *tr) { } +static inline void ftrace_reset_array_ops(struct trace_array *tr) { } +/* ftace_func_t type is not defined, use macro instead of static inline */ +#define ftrace_init_array_ops(tr, func) do { } while (0) +#endif /* CONFIG_FUNCTION_TRACER */ + +#if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_DYNAMIC_FTRACE) +void ftrace_create_filter_files(struct ftrace_ops *ops, + struct dentry *parent); +void ftrace_destroy_filter_files(struct ftrace_ops *ops); +#else +/* + * The ops parameter passed in is usually undefined. + * This must be a macro. + */ +#define ftrace_create_filter_files(ops, parent) do { } while (0) +#define ftrace_destroy_filter_files(ops) do { } while (0) +#endif /* CONFIG_FUNCTION_TRACER && CONFIG_DYNAMIC_FTRACE */ int ftrace_event_is_function(struct ftrace_event_call *call); @@ -986,40 +1059,216 @@ struct filter_pred { extern enum regex_type filter_parse_regex(char *buff, int len, char **search, int *not); -extern void print_event_filter(struct ftrace_event_call *call, +extern void print_event_filter(struct ftrace_event_file *file, struct trace_seq *s); -extern int apply_event_filter(struct ftrace_event_call *call, +extern int apply_event_filter(struct ftrace_event_file *file, char *filter_string); extern int apply_subsystem_event_filter(struct ftrace_subsystem_dir *dir, char *filter_string); extern void print_subsystem_event_filter(struct event_subsystem *system, struct trace_seq *s); extern int filter_assign_type(const char *type); +extern int create_event_filter(struct ftrace_event_call *call, + char *filter_str, bool set_str, + struct event_filter **filterp); +extern void free_event_filter(struct event_filter *filter); struct ftrace_event_field * trace_find_event_field(struct ftrace_event_call *call, char *name); -static inline int -filter_check_discard(struct ftrace_event_call *call, void *rec, - struct ring_buffer *buffer, - struct ring_buffer_event *event) -{ - if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) && - !filter_match_preds(call->filter, rec)) { - ring_buffer_discard_commit(buffer, event); - return 1; - } - - return 0; -} - extern void trace_event_enable_cmd_record(bool enable); extern int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr); extern int event_trace_del_tracer(struct trace_array *tr); +extern struct ftrace_event_file *find_event_file(struct trace_array *tr, + const char *system, + const char *event); + +static inline void *event_file_data(struct file *filp) +{ + return ACCESS_ONCE(file_inode(filp)->i_private); +} + extern struct mutex event_mutex; extern struct list_head ftrace_events; +extern const struct file_operations event_trigger_fops; + +extern int register_trigger_cmds(void); +extern void clear_event_triggers(struct trace_array *tr); + +struct event_trigger_data { + unsigned long count; + int ref; + struct event_trigger_ops *ops; + struct event_command *cmd_ops; + struct event_filter __rcu *filter; + char *filter_str; + void *private_data; + struct list_head list; +}; + +/** + * struct event_trigger_ops - callbacks for trace event triggers + * + * The methods in this structure provide per-event trigger hooks for + * various trigger operations. + * + * All the methods below, except for @init() and @free(), must be + * implemented. + * + * @func: The trigger 'probe' function called when the triggering + * event occurs. The data passed into this callback is the data + * that was supplied to the event_command @reg() function that + * registered the trigger (see struct event_command). + * + * @init: An optional initialization function called for the trigger + * when the trigger is registered (via the event_command reg() + * function). This can be used to perform per-trigger + * initialization such as incrementing a per-trigger reference + * count, for instance. This is usually implemented by the + * generic utility function @event_trigger_init() (see + * trace_event_triggers.c). + * + * @free: An optional de-initialization function called for the + * trigger when the trigger is unregistered (via the + * event_command @reg() function). This can be used to perform + * per-trigger de-initialization such as decrementing a + * per-trigger reference count and freeing corresponding trigger + * data, for instance. This is usually implemented by the + * generic utility function @event_trigger_free() (see + * trace_event_triggers.c). + * + * @print: The callback function invoked to have the trigger print + * itself. This is usually implemented by a wrapper function + * that calls the generic utility function @event_trigger_print() + * (see trace_event_triggers.c). + */ +struct event_trigger_ops { + void (*func)(struct event_trigger_data *data); + int (*init)(struct event_trigger_ops *ops, + struct event_trigger_data *data); + void (*free)(struct event_trigger_ops *ops, + struct event_trigger_data *data); + int (*print)(struct seq_file *m, + struct event_trigger_ops *ops, + struct event_trigger_data *data); +}; + +/** + * struct event_command - callbacks and data members for event commands + * + * Event commands are invoked by users by writing the command name + * into the 'trigger' file associated with a trace event. The + * parameters associated with a specific invocation of an event + * command are used to create an event trigger instance, which is + * added to the list of trigger instances associated with that trace + * event. When the event is hit, the set of triggers associated with + * that event is invoked. + * + * The data members in this structure provide per-event command data + * for various event commands. + * + * All the data members below, except for @post_trigger, must be set + * for each event command. + * + * @name: The unique name that identifies the event command. This is + * the name used when setting triggers via trigger files. + * + * @trigger_type: A unique id that identifies the event command + * 'type'. This value has two purposes, the first to ensure that + * only one trigger of the same type can be set at a given time + * for a particular event e.g. it doesn't make sense to have both + * a traceon and traceoff trigger attached to a single event at + * the same time, so traceon and traceoff have the same type + * though they have different names. The @trigger_type value is + * also used as a bit value for deferring the actual trigger + * action until after the current event is finished. Some + * commands need to do this if they themselves log to the trace + * buffer (see the @post_trigger() member below). @trigger_type + * values are defined by adding new values to the trigger_type + * enum in include/linux/ftrace_event.h. + * + * @post_trigger: A flag that says whether or not this command needs + * to have its action delayed until after the current event has + * been closed. Some triggers need to avoid being invoked while + * an event is currently in the process of being logged, since + * the trigger may itself log data into the trace buffer. Thus + * we make sure the current event is committed before invoking + * those triggers. To do that, the trigger invocation is split + * in two - the first part checks the filter using the current + * trace record; if a command has the @post_trigger flag set, it + * sets a bit for itself in the return value, otherwise it + * directly invokes the trigger. Once all commands have been + * either invoked or set their return flag, the current record is + * either committed or discarded. At that point, if any commands + * have deferred their triggers, those commands are finally + * invoked following the close of the current event. In other + * words, if the event_trigger_ops @func() probe implementation + * itself logs to the trace buffer, this flag should be set, + * otherwise it can be left unspecified. + * + * All the methods below, except for @set_filter(), must be + * implemented. + * + * @func: The callback function responsible for parsing and + * registering the trigger written to the 'trigger' file by the + * user. It allocates the trigger instance and registers it with + * the appropriate trace event. It makes use of the other + * event_command callback functions to orchestrate this, and is + * usually implemented by the generic utility function + * @event_trigger_callback() (see trace_event_triggers.c). + * + * @reg: Adds the trigger to the list of triggers associated with the + * event, and enables the event trigger itself, after + * initializing it (via the event_trigger_ops @init() function). + * This is also where commands can use the @trigger_type value to + * make the decision as to whether or not multiple instances of + * the trigger should be allowed. This is usually implemented by + * the generic utility function @register_trigger() (see + * trace_event_triggers.c). + * + * @unreg: Removes the trigger from the list of triggers associated + * with the event, and disables the event trigger itself, after + * initializing it (via the event_trigger_ops @free() function). + * This is usually implemented by the generic utility function + * @unregister_trigger() (see trace_event_triggers.c). + * + * @set_filter: An optional function called to parse and set a filter + * for the trigger. If no @set_filter() method is set for the + * event command, filters set by the user for the command will be + * ignored. This is usually implemented by the generic utility + * function @set_trigger_filter() (see trace_event_triggers.c). + * + * @get_trigger_ops: The callback function invoked to retrieve the + * event_trigger_ops implementation associated with the command. + */ +struct event_command { + struct list_head list; + char *name; + enum event_trigger_type trigger_type; + bool post_trigger; + int (*func)(struct event_command *cmd_ops, + struct ftrace_event_file *file, + char *glob, char *cmd, char *params); + int (*reg)(char *glob, + struct event_trigger_ops *ops, + struct event_trigger_data *data, + struct ftrace_event_file *file); + void (*unreg)(char *glob, + struct event_trigger_ops *ops, + struct event_trigger_data *data, + struct ftrace_event_file *file); + int (*set_filter)(char *filter_str, + struct event_trigger_data *data, + struct ftrace_event_file *file); + struct event_trigger_ops *(*get_trigger_ops)(char *cmd, char *param); +}; + +extern int trace_event_enable_disable(struct ftrace_event_file *file, + int enable, int soft_disable); +extern int tracing_alloc_snapshot(void); + extern const char *__start___trace_bprintk_fmt[]; extern const char *__stop___trace_bprintk_fmt[]; @@ -1045,7 +1294,7 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled); #undef FTRACE_ENTRY #define FTRACE_ENTRY(call, struct_name, id, tstruct, print, filter) \ extern struct ftrace_event_call \ - __attribute__((__aligned__(4))) event_##call; + __aligned(4) event_##call; #undef FTRACE_ENTRY_DUP #define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print, filter) \ FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print), \ diff --git a/kernel/trace/trace_benchmark.c b/kernel/trace/trace_benchmark.c new file mode 100644 index 00000000000..40a14cbcf8e --- /dev/null +++ b/kernel/trace/trace_benchmark.c @@ -0,0 +1,198 @@ +#include <linux/delay.h> +#include <linux/module.h> +#include <linux/kthread.h> +#include <linux/trace_clock.h> + +#define CREATE_TRACE_POINTS +#include "trace_benchmark.h" + +static struct task_struct *bm_event_thread; + +static char bm_str[BENCHMARK_EVENT_STRLEN] = "START"; + +static u64 bm_total; +static u64 bm_totalsq; +static u64 bm_last; +static u64 bm_max; +static u64 bm_min; +static u64 bm_first; +static u64 bm_cnt; +static u64 bm_stddev; +static unsigned int bm_avg; +static unsigned int bm_std; + +/* + * This gets called in a loop recording the time it took to write + * the tracepoint. What it writes is the time statistics of the last + * tracepoint write. As there is nothing to write the first time + * it simply writes "START". As the first write is cold cache and + * the rest is hot, we save off that time in bm_first and it is + * reported as "first", which is shown in the second write to the + * tracepoint. The "first" field is writen within the statics from + * then on but never changes. + */ +static void trace_do_benchmark(void) +{ + u64 start; + u64 stop; + u64 delta; + u64 stddev; + u64 seed; + u64 last_seed; + unsigned int avg; + unsigned int std = 0; + + /* Only run if the tracepoint is actually active */ + if (!trace_benchmark_event_enabled()) + return; + + local_irq_disable(); + start = trace_clock_local(); + trace_benchmark_event(bm_str); + stop = trace_clock_local(); + local_irq_enable(); + + bm_cnt++; + + delta = stop - start; + + /* + * The first read is cold cached, keep it separate from the + * other calculations. + */ + if (bm_cnt == 1) { + bm_first = delta; + scnprintf(bm_str, BENCHMARK_EVENT_STRLEN, + "first=%llu [COLD CACHED]", bm_first); + return; + } + + bm_last = delta; + + if (delta > bm_max) + bm_max = delta; + if (!bm_min || delta < bm_min) + bm_min = delta; + + /* + * When bm_cnt is greater than UINT_MAX, it breaks the statistics + * accounting. Freeze the statistics when that happens. + * We should have enough data for the avg and stddev anyway. + */ + if (bm_cnt > UINT_MAX) { + scnprintf(bm_str, BENCHMARK_EVENT_STRLEN, + "last=%llu first=%llu max=%llu min=%llu ** avg=%u std=%d std^2=%lld", + bm_last, bm_first, bm_max, bm_min, bm_avg, bm_std, bm_stddev); + return; + } + + bm_total += delta; + bm_totalsq += delta * delta; + + + if (bm_cnt > 1) { + /* + * Apply Welford's method to calculate standard deviation: + * s^2 = 1 / (n * (n-1)) * (n * \Sum (x_i)^2 - (\Sum x_i)^2) + */ + stddev = (u64)bm_cnt * bm_totalsq - bm_total * bm_total; + do_div(stddev, (u32)bm_cnt); + do_div(stddev, (u32)bm_cnt - 1); + } else + stddev = 0; + + delta = bm_total; + do_div(delta, bm_cnt); + avg = delta; + + if (stddev > 0) { + int i = 0; + /* + * stddev is the square of standard deviation but + * we want the actualy number. Use the average + * as our seed to find the std. + * + * The next try is: + * x = (x + N/x) / 2 + * + * Where N is the squared number to find the square + * root of. + */ + seed = avg; + do { + last_seed = seed; + seed = stddev; + if (!last_seed) + break; + do_div(seed, last_seed); + seed += last_seed; + do_div(seed, 2); + } while (i++ < 10 && last_seed != seed); + + std = seed; + } + + scnprintf(bm_str, BENCHMARK_EVENT_STRLEN, + "last=%llu first=%llu max=%llu min=%llu avg=%u std=%d std^2=%lld", + bm_last, bm_first, bm_max, bm_min, avg, std, stddev); + + bm_std = std; + bm_avg = avg; + bm_stddev = stddev; +} + +static int benchmark_event_kthread(void *arg) +{ + /* sleep a bit to make sure the tracepoint gets activated */ + msleep(100); + + while (!kthread_should_stop()) { + + trace_do_benchmark(); + + /* + * We don't go to sleep, but let others + * run as well. + */ + cond_resched(); + } + + return 0; +} + +/* + * When the benchmark tracepoint is enabled, it calls this + * function and the thread that calls the tracepoint is created. + */ +void trace_benchmark_reg(void) +{ + bm_event_thread = kthread_run(benchmark_event_kthread, + NULL, "event_benchmark"); + WARN_ON(!bm_event_thread); +} + +/* + * When the benchmark tracepoint is disabled, it calls this + * function and the thread that calls the tracepoint is deleted + * and all the numbers are reset. + */ +void trace_benchmark_unreg(void) +{ + if (!bm_event_thread) + return; + + kthread_stop(bm_event_thread); + + strcpy(bm_str, "START"); + bm_total = 0; + bm_totalsq = 0; + bm_last = 0; + bm_max = 0; + bm_min = 0; + bm_cnt = 0; + /* These don't need to be reset but reset them anyway */ + bm_first = 0; + bm_std = 0; + bm_avg = 0; + bm_stddev = 0; +} diff --git a/kernel/trace/trace_benchmark.h b/kernel/trace/trace_benchmark.h new file mode 100644 index 00000000000..3c1df1df4e2 --- /dev/null +++ b/kernel/trace/trace_benchmark.h @@ -0,0 +1,41 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM benchmark + +#if !defined(_TRACE_BENCHMARK_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_BENCHMARK_H + +#include <linux/tracepoint.h> + +extern void trace_benchmark_reg(void); +extern void trace_benchmark_unreg(void); + +#define BENCHMARK_EVENT_STRLEN 128 + +TRACE_EVENT_FN(benchmark_event, + + TP_PROTO(const char *str), + + TP_ARGS(str), + + TP_STRUCT__entry( + __array( char, str, BENCHMARK_EVENT_STRLEN ) + ), + + TP_fast_assign( + memcpy(__entry->str, str, BENCHMARK_EVENT_STRLEN); + ), + + TP_printk("%s", __entry->str), + + trace_benchmark_reg, trace_benchmark_unreg +); + +#endif /* _TRACE_BENCHMARK_H */ + +#undef TRACE_INCLUDE_FILE +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_benchmark + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index d594da0dc03..697fb9bac8f 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -78,7 +78,7 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) entry->line = f->line; entry->correct = val == expect; - if (!filter_check_discard(call, entry, buffer, event)) + if (!call_filter_check_discard(call, entry, buffer, event)) __buffer_unlock_commit(buffer, event); out: diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c index 26dc348332b..57b67b1f24d 100644 --- a/kernel/trace/trace_clock.c +++ b/kernel/trace/trace_clock.c @@ -59,13 +59,14 @@ u64 notrace trace_clock(void) /* * trace_jiffy_clock(): Simply use jiffies as a clock counter. + * Note that this use of jiffies_64 is not completely safe on + * 32-bit systems. But the window is tiny, and the effect if + * we are affected is that we will have an obviously bogus + * timestamp on a trace event - i.e. not life threatening. */ u64 notrace trace_clock_jiffies(void) { - u64 jiffy = jiffies - INITIAL_JIFFIES; - - /* Return nsecs */ - return (u64)jiffies_to_usecs(jiffy) * 1000ULL; + return jiffies_64_to_clock_t(jiffies_64 - INITIAL_JIFFIES); } /* diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 80c36bcf66e..5d12bb407b4 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -24,10 +24,32 @@ static int total_ref_count; static int perf_trace_event_perm(struct ftrace_event_call *tp_event, struct perf_event *p_event) { + if (tp_event->perf_perm) { + int ret = tp_event->perf_perm(tp_event, p_event); + if (ret) + return ret; + } + /* The ftrace function trace is allowed only for root. */ - if (ftrace_event_is_function(tp_event) && - perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) - return -EPERM; + if (ftrace_event_is_function(tp_event)) { + if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + /* + * We don't allow user space callchains for function trace + * event, due to issues with page faults while tracing page + * fault handler and its overall trickiness nature. + */ + if (!p_event->attr.exclude_callchain_user) + return -EINVAL; + + /* + * Same reason to disable user stack dump as for user space + * callchains above. + */ + if (p_event->attr.sample_type & PERF_SAMPLE_STACK_USER) + return -EINVAL; + } /* No tracing, just counting, so no obvious leak */ if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW)) @@ -173,7 +195,7 @@ static int perf_trace_event_init(struct ftrace_event_call *tp_event, int perf_trace_init(struct perf_event *p_event) { struct ftrace_event_call *tp_event; - int event_id = p_event->attr.config; + u64 event_id = p_event->attr.config; int ret = -EINVAL; mutex_lock(&event_mutex); @@ -226,8 +248,8 @@ void perf_trace_del(struct perf_event *p_event, int flags) tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event); } -__kprobes void *perf_trace_buf_prepare(int size, unsigned short type, - struct pt_regs *regs, int *rctxp) +void *perf_trace_buf_prepare(int size, unsigned short type, + struct pt_regs *regs, int *rctxp) { struct trace_entry *entry; unsigned long flags; @@ -259,6 +281,7 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, return raw_data; } EXPORT_SYMBOL_GPL(perf_trace_buf_prepare); +NOKPROBE_SYMBOL(perf_trace_buf_prepare); #ifdef CONFIG_FUNCTION_TRACER static void diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 368a4d50cc3..2de53628689 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -27,12 +27,6 @@ DEFINE_MUTEX(event_mutex); -DEFINE_MUTEX(event_storage_mutex); -EXPORT_SYMBOL_GPL(event_storage_mutex); - -char event_storage[EVENT_STORAGE_SIZE]; -EXPORT_SYMBOL_GPL(event_storage); - LIST_HEAD(ftrace_events); static LIST_HEAD(ftrace_common_fields); @@ -194,29 +188,60 @@ int trace_event_raw_init(struct ftrace_event_call *call) } EXPORT_SYMBOL_GPL(trace_event_raw_init); +void *ftrace_event_buffer_reserve(struct ftrace_event_buffer *fbuffer, + struct ftrace_event_file *ftrace_file, + unsigned long len) +{ + struct ftrace_event_call *event_call = ftrace_file->event_call; + + local_save_flags(fbuffer->flags); + fbuffer->pc = preempt_count(); + fbuffer->ftrace_file = ftrace_file; + + fbuffer->event = + trace_event_buffer_lock_reserve(&fbuffer->buffer, ftrace_file, + event_call->event.type, len, + fbuffer->flags, fbuffer->pc); + if (!fbuffer->event) + return NULL; + + fbuffer->entry = ring_buffer_event_data(fbuffer->event); + return fbuffer->entry; +} +EXPORT_SYMBOL_GPL(ftrace_event_buffer_reserve); + +void ftrace_event_buffer_commit(struct ftrace_event_buffer *fbuffer) +{ + event_trigger_unlock_commit(fbuffer->ftrace_file, fbuffer->buffer, + fbuffer->event, fbuffer->entry, + fbuffer->flags, fbuffer->pc); +} +EXPORT_SYMBOL_GPL(ftrace_event_buffer_commit); + int ftrace_event_reg(struct ftrace_event_call *call, enum trace_reg type, void *data) { struct ftrace_event_file *file = data; + WARN_ON(!(call->flags & TRACE_EVENT_FL_TRACEPOINT)); switch (type) { case TRACE_REG_REGISTER: - return tracepoint_probe_register(call->name, + return tracepoint_probe_register(call->tp, call->class->probe, file); case TRACE_REG_UNREGISTER: - tracepoint_probe_unregister(call->name, + tracepoint_probe_unregister(call->tp, call->class->probe, file); return 0; #ifdef CONFIG_PERF_EVENTS case TRACE_REG_PERF_REGISTER: - return tracepoint_probe_register(call->name, + return tracepoint_probe_register(call->tp, call->class->perf_probe, call); case TRACE_REG_PERF_UNREGISTER: - tracepoint_probe_unregister(call->name, + tracepoint_probe_unregister(call->tp, call->class->perf_probe, call); return 0; @@ -328,7 +353,7 @@ static int __ftrace_event_enable_disable(struct ftrace_event_file *file, if (ret) { tracing_stop_cmdline_record(); pr_info("event trace: Could not enable event " - "%s\n", call->name); + "%s\n", ftrace_event_name(call)); break; } set_bit(FTRACE_EVENT_FL_ENABLED_BIT, &file->flags); @@ -342,6 +367,12 @@ static int __ftrace_event_enable_disable(struct ftrace_event_file *file, return ret; } +int trace_event_enable_disable(struct ftrace_event_file *file, + int enable, int soft_disable) +{ + return __ftrace_event_enable_disable(file, enable, soft_disable); +} + static int ftrace_event_enable_disable(struct ftrace_event_file *file, int enable) { @@ -421,11 +452,6 @@ static void remove_subsystem(struct ftrace_subsystem_dir *dir) } } -static void *event_file_data(struct file *filp) -{ - return ACCESS_ONCE(file_inode(filp)->i_private); -} - static void remove_event_file_dir(struct ftrace_event_file *file) { struct dentry *dir = file->dir; @@ -444,6 +470,7 @@ static void remove_event_file_dir(struct ftrace_event_file *file) list_del(&file->list); remove_subsystem(file->system); + free_event_filter(file->filter); kmem_cache_free(file_cachep, file); } @@ -456,27 +483,29 @@ __ftrace_set_clr_event_nolock(struct trace_array *tr, const char *match, { struct ftrace_event_file *file; struct ftrace_event_call *call; + const char *name; int ret = -EINVAL; list_for_each_entry(file, &tr->events, list) { call = file->event_call; + name = ftrace_event_name(call); - if (!call->name || !call->class || !call->class->reg) + if (!name || !call->class || !call->class->reg) continue; if (call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) continue; if (match && - strcmp(match, call->name) != 0 && + strcmp(match, name) != 0 && strcmp(match, call->class->system) != 0) continue; if (sub && strcmp(sub, call->class->system) != 0) continue; - if (event && strcmp(event, call->name) != 0) + if (event && strcmp(event, name) != 0) continue; ftrace_event_enable_disable(file, set); @@ -546,6 +575,9 @@ int trace_set_clr_event(const char *system, const char *event, int set) { struct trace_array *tr = top_trace_array(); + if (!tr) + return -ENODEV; + return __ftrace_set_clr_event(tr, NULL, system, event, set); } EXPORT_SYMBOL_GPL(trace_set_clr_event); @@ -674,7 +706,7 @@ static int t_show(struct seq_file *m, void *v) if (strcmp(call->class->system, TRACE_SYSTEM) != 0) seq_printf(m, "%s:", call->class->system); - seq_printf(m, "%s\n", call->name); + seq_printf(m, "%s\n", ftrace_event_name(call)); return 0; } @@ -767,7 +799,7 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt, mutex_lock(&event_mutex); list_for_each_entry(file, &tr->events, list) { call = file->event_call; - if (!call->name || !call->class || !call->class->reg) + if (!ftrace_event_name(call) || !call->class || !call->class->reg) continue; if (system && strcmp(call->class->system, system->name) != 0) @@ -882,7 +914,7 @@ static int f_show(struct seq_file *m, void *v) switch ((unsigned long)v) { case FORMAT_HEADER: - seq_printf(m, "name: %s\n", call->name); + seq_printf(m, "name: %s\n", ftrace_event_name(call)); seq_printf(m, "ID: %d\n", call->event.type); seq_printf(m, "format:\n"); return 0; @@ -989,7 +1021,7 @@ static ssize_t event_filter_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { - struct ftrace_event_call *call; + struct ftrace_event_file *file; struct trace_seq *s; int r = -ENODEV; @@ -1004,12 +1036,12 @@ event_filter_read(struct file *filp, char __user *ubuf, size_t cnt, trace_seq_init(s); mutex_lock(&event_mutex); - call = event_file_data(filp); - if (call) - print_event_filter(call, s); + file = event_file_data(filp); + if (file) + print_event_filter(file, s); mutex_unlock(&event_mutex); - if (call) + if (file) r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len); kfree(s); @@ -1021,7 +1053,7 @@ static ssize_t event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { - struct ftrace_event_call *call; + struct ftrace_event_file *file; char *buf; int err = -ENODEV; @@ -1039,9 +1071,9 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, buf[cnt] = '\0'; mutex_lock(&event_mutex); - call = event_file_data(filp); - if (call) - err = apply_event_filter(call, buf); + file = event_file_data(filp); + if (file) + err = apply_event_filter(file, buf); mutex_unlock(&event_mutex); free_page((unsigned long) buf); @@ -1062,6 +1094,9 @@ static int subsystem_open(struct inode *inode, struct file *filp) struct trace_array *tr; int ret; + if (tracing_is_disabled()) + return -ENODEV; + /* Make sure the system still exists */ mutex_lock(&trace_types_lock); mutex_lock(&event_mutex); @@ -1108,6 +1143,9 @@ static int system_tr_open(struct inode *inode, struct file *filp) struct trace_array *tr = inode->i_private; int ret; + if (tracing_is_disabled()) + return -ENODEV; + if (trace_array_get(tr) < 0) return -ENODEV; @@ -1124,11 +1162,12 @@ static int system_tr_open(struct inode *inode, struct file *filp) if (ret < 0) { trace_array_put(tr); kfree(dir); + return ret; } filp->private_data = dir; - return ret; + return 0; } static int subsystem_release(struct inode *inode, struct file *file) @@ -1495,6 +1534,7 @@ event_create_dir(struct dentry *parent, struct ftrace_event_file *file) struct trace_array *tr = file->tr; struct list_head *head; struct dentry *d_events; + const char *name; int ret; /* @@ -1508,10 +1548,11 @@ event_create_dir(struct dentry *parent, struct ftrace_event_file *file) } else d_events = parent; - file->dir = debugfs_create_dir(call->name, d_events); + name = ftrace_event_name(call); + file->dir = debugfs_create_dir(name, d_events); if (!file->dir) { pr_warning("Could not create debugfs '%s' directory\n", - call->name); + name); return -1; } @@ -1535,13 +1576,16 @@ event_create_dir(struct dentry *parent, struct ftrace_event_file *file) ret = call->class->define_fields(call); if (ret < 0) { pr_warning("Could not initialize trace point" - " events/%s\n", call->name); + " events/%s\n", name); return -1; } } - trace_create_file("filter", 0644, file->dir, call, + trace_create_file("filter", 0644, file->dir, file, &ftrace_event_filter_fops); + trace_create_file("trigger", 0644, file->dir, file, + &event_trigger_fops); + trace_create_file("format", 0444, file->dir, call, &ftrace_event_format_fops); @@ -1577,6 +1621,7 @@ static void event_remove(struct ftrace_event_call *call) if (file->event_call != call) continue; ftrace_event_enable_disable(file, 0); + destroy_preds(file); /* * The do_for_each_event_file() is * a double loop. After finding the call for this @@ -1595,15 +1640,17 @@ static void event_remove(struct ftrace_event_call *call) static int event_init(struct ftrace_event_call *call) { int ret = 0; + const char *name; - if (WARN_ON(!call->name)) + name = ftrace_event_name(call); + if (WARN_ON(!name)) return -EINVAL; if (call->class->raw_init) { ret = call->class->raw_init(call); if (ret < 0 && ret != -ENOSYS) pr_warn("Could not initialize trace events/%s\n", - call->name); + name); } return ret; @@ -1637,6 +1684,8 @@ trace_create_new_event(struct ftrace_event_call *call, file->event_call = call; file->tr = tr; atomic_set(&file->sm_ref, 0); + atomic_set(&file->tm_ref, 0); + INIT_LIST_HEAD(&file->triggers); list_add(&file->list, &tr->events); return file; @@ -1700,7 +1749,7 @@ static void __trace_remove_event_call(struct ftrace_event_call *call) { event_remove(call); trace_destroy_fields(call); - destroy_preds(call); + destroy_call_preds(call); } static int probe_remove_event_call(struct ftrace_event_call *call) @@ -1763,6 +1812,16 @@ static void trace_module_add_events(struct module *mod) { struct ftrace_event_call **call, **start, **end; + if (!mod->num_trace_events) + return; + + /* Don't add infrastructure for mods without tracepoints */ + if (trace_module_has_bad_taint(mod)) { + pr_err("%s: module has bad taint, not creating trace events\n", + mod->name); + return; + } + start = mod->trace_events; end = mod->trace_events + mod->num_trace_events; @@ -1837,46 +1896,48 @@ __trace_add_event_dirs(struct trace_array *tr) ret = __trace_add_new_event(call, tr); if (ret < 0) pr_warning("Could not create directory for event %s\n", - call->name); + ftrace_event_name(call)); } } -#ifdef CONFIG_DYNAMIC_FTRACE - -/* Avoid typos */ -#define ENABLE_EVENT_STR "enable_event" -#define DISABLE_EVENT_STR "disable_event" - -struct event_probe_data { - struct ftrace_event_file *file; - unsigned long count; - int ref; - bool enable; -}; - -static struct ftrace_event_file * +struct ftrace_event_file * find_event_file(struct trace_array *tr, const char *system, const char *event) { struct ftrace_event_file *file; struct ftrace_event_call *call; + const char *name; list_for_each_entry(file, &tr->events, list) { call = file->event_call; + name = ftrace_event_name(call); - if (!call->name || !call->class || !call->class->reg) + if (!name || !call->class || !call->class->reg) continue; if (call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) continue; - if (strcmp(event, call->name) == 0 && + if (strcmp(event, name) == 0 && strcmp(system, call->class->system) == 0) return file; } return NULL; } +#ifdef CONFIG_DYNAMIC_FTRACE + +/* Avoid typos */ +#define ENABLE_EVENT_STR "enable_event" +#define DISABLE_EVENT_STR "disable_event" + +struct event_probe_data { + struct ftrace_event_file *file; + unsigned long count; + int ref; + bool enable; +}; + static void event_enable_probe(unsigned long ip, unsigned long parent_ip, void **_data) { @@ -1925,7 +1986,7 @@ event_enable_print(struct seq_file *m, unsigned long ip, seq_printf(m, "%s:%s:%s", data->enable ? ENABLE_EVENT_STR : DISABLE_EVENT_STR, data->file->event_call->class->system, - data->file->event_call->name); + ftrace_event_name(data->file->event_call)); if (data->count == -1) seq_printf(m, ":unlimited\n"); @@ -2008,6 +2069,9 @@ event_enable_func(struct ftrace_hash *hash, bool enable; int ret; + if (!tr) + return -ENODEV; + /* hash funcs only work with set_ftrace_filter */ if (!enabled || !param) return -EINVAL; @@ -2145,7 +2209,7 @@ __trace_early_add_event_dirs(struct trace_array *tr) ret = event_create_dir(tr->event_dir, file); if (ret < 0) pr_warning("Could not create directory for event %s\n", - file->event_call->name); + ftrace_event_name(file->event_call)); } } @@ -2169,7 +2233,7 @@ __trace_early_add_events(struct trace_array *tr) ret = __trace_early_add_new_event(call, tr); if (ret < 0) pr_warning("Could not create early event %s\n", - call->name); + ftrace_event_name(call)); } } @@ -2303,9 +2367,15 @@ int event_trace_del_tracer(struct trace_array *tr) { mutex_lock(&event_mutex); + /* Disable any event triggers and associated soft-disabled events */ + clear_event_triggers(tr); + /* Disable any running events */ __ftrace_set_clr_event_nolock(tr, NULL, NULL, NULL, 0); + /* Access to events are within rcu_read_lock_sched() */ + synchronize_sched(); + down_write(&trace_event_sem); __trace_remove_event_dirs(tr); debugfs_remove_recursive(tr->event_dir); @@ -2333,6 +2403,9 @@ static __init int event_trace_enable(void) char *token; int ret; + if (!tr) + return -ENODEV; + for_each_event(iter, __start_ftrace_events, __stop_ftrace_events) { call = *iter; @@ -2366,6 +2439,8 @@ static __init int event_trace_enable(void) register_event_cmds(); + register_trigger_cmds(); + return 0; } @@ -2377,6 +2452,8 @@ static __init int event_trace_init(void) int ret; tr = top_trace_array(); + if (!tr) + return -ENODEV; d_tracer = tracing_init_dentry(); if (!d_tracer) @@ -2470,6 +2547,8 @@ static __init void event_trace_self_tests(void) int ret; tr = top_trace_array(); + if (!tr) + return; pr_info("Running tests on trace events:\n"); @@ -2493,7 +2572,7 @@ static __init void event_trace_self_tests(void) continue; #endif - pr_info("Testing event %s: ", call->name); + pr_info("Testing event %s: ", ftrace_event_name(call)); /* * If an event is already enabled, someone is using diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 97daa8cf958..8a8631926a0 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -637,10 +637,18 @@ static void append_filter_err(struct filter_parse_state *ps, free_page((unsigned long) buf); } +static inline struct event_filter *event_filter(struct ftrace_event_file *file) +{ + if (file->event_call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) + return file->event_call->filter; + else + return file->filter; +} + /* caller must hold event_mutex */ -void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s) +void print_event_filter(struct ftrace_event_file *file, struct trace_seq *s) { - struct event_filter *filter = call->filter; + struct event_filter *filter = event_filter(file); if (filter && filter->filter_string) trace_seq_printf(s, "%s\n", filter->filter_string); @@ -766,11 +774,21 @@ static void __free_preds(struct event_filter *filter) filter->n_preds = 0; } -static void filter_disable(struct ftrace_event_call *call) +static void call_filter_disable(struct ftrace_event_call *call) { call->flags &= ~TRACE_EVENT_FL_FILTERED; } +static void filter_disable(struct ftrace_event_file *file) +{ + struct ftrace_event_call *call = file->event_call; + + if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) + call_filter_disable(call); + else + file->flags &= ~FTRACE_EVENT_FL_FILTERED; +} + static void __free_filter(struct event_filter *filter) { if (!filter) @@ -781,16 +799,35 @@ static void __free_filter(struct event_filter *filter) kfree(filter); } +void free_event_filter(struct event_filter *filter) +{ + __free_filter(filter); +} + +void destroy_call_preds(struct ftrace_event_call *call) +{ + __free_filter(call->filter); + call->filter = NULL; +} + +static void destroy_file_preds(struct ftrace_event_file *file) +{ + __free_filter(file->filter); + file->filter = NULL; +} + /* - * Called when destroying the ftrace_event_call. - * The call is being freed, so we do not need to worry about - * the call being currently used. This is for module code removing + * Called when destroying the ftrace_event_file. + * The file is being freed, so we do not need to worry about + * the file being currently used. This is for module code removing * the tracepoints from within it. */ -void destroy_preds(struct ftrace_event_call *call) +void destroy_preds(struct ftrace_event_file *file) { - __free_filter(call->filter); - call->filter = NULL; + if (file->event_call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) + destroy_call_preds(file->event_call); + else + destroy_file_preds(file); } static struct event_filter *__alloc_filter(void) @@ -825,28 +862,56 @@ static int __alloc_preds(struct event_filter *filter, int n_preds) return 0; } -static void filter_free_subsystem_preds(struct event_subsystem *system) +static inline void __remove_filter(struct ftrace_event_file *file) { + struct ftrace_event_call *call = file->event_call; + + filter_disable(file); + if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) + remove_filter_string(call->filter); + else + remove_filter_string(file->filter); +} + +static void filter_free_subsystem_preds(struct event_subsystem *system, + struct trace_array *tr) +{ + struct ftrace_event_file *file; struct ftrace_event_call *call; - list_for_each_entry(call, &ftrace_events, list) { + list_for_each_entry(file, &tr->events, list) { + call = file->event_call; if (strcmp(call->class->system, system->name) != 0) continue; - filter_disable(call); - remove_filter_string(call->filter); + __remove_filter(file); } } -static void filter_free_subsystem_filters(struct event_subsystem *system) +static inline void __free_subsystem_filter(struct ftrace_event_file *file) { + struct ftrace_event_call *call = file->event_call; + + if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) { + __free_filter(call->filter); + call->filter = NULL; + } else { + __free_filter(file->filter); + file->filter = NULL; + } +} + +static void filter_free_subsystem_filters(struct event_subsystem *system, + struct trace_array *tr) +{ + struct ftrace_event_file *file; struct ftrace_event_call *call; - list_for_each_entry(call, &ftrace_events, list) { + list_for_each_entry(file, &tr->events, list) { + call = file->event_call; if (strcmp(call->class->system, system->name) != 0) continue; - __free_filter(call->filter); - call->filter = NULL; + __free_subsystem_filter(file); } } @@ -1617,15 +1682,85 @@ fail: return err; } +static inline void event_set_filtered_flag(struct ftrace_event_file *file) +{ + struct ftrace_event_call *call = file->event_call; + + if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) + call->flags |= TRACE_EVENT_FL_FILTERED; + else + file->flags |= FTRACE_EVENT_FL_FILTERED; +} + +static inline void event_set_filter(struct ftrace_event_file *file, + struct event_filter *filter) +{ + struct ftrace_event_call *call = file->event_call; + + if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) + rcu_assign_pointer(call->filter, filter); + else + rcu_assign_pointer(file->filter, filter); +} + +static inline void event_clear_filter(struct ftrace_event_file *file) +{ + struct ftrace_event_call *call = file->event_call; + + if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) + RCU_INIT_POINTER(call->filter, NULL); + else + RCU_INIT_POINTER(file->filter, NULL); +} + +static inline void +event_set_no_set_filter_flag(struct ftrace_event_file *file) +{ + struct ftrace_event_call *call = file->event_call; + + if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) + call->flags |= TRACE_EVENT_FL_NO_SET_FILTER; + else + file->flags |= FTRACE_EVENT_FL_NO_SET_FILTER; +} + +static inline void +event_clear_no_set_filter_flag(struct ftrace_event_file *file) +{ + struct ftrace_event_call *call = file->event_call; + + if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) + call->flags &= ~TRACE_EVENT_FL_NO_SET_FILTER; + else + file->flags &= ~FTRACE_EVENT_FL_NO_SET_FILTER; +} + +static inline bool +event_no_set_filter_flag(struct ftrace_event_file *file) +{ + struct ftrace_event_call *call = file->event_call; + + if (file->flags & FTRACE_EVENT_FL_NO_SET_FILTER) + return true; + + if ((call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) && + (call->flags & TRACE_EVENT_FL_NO_SET_FILTER)) + return true; + + return false; +} + struct filter_list { struct list_head list; struct event_filter *filter; }; static int replace_system_preds(struct event_subsystem *system, + struct trace_array *tr, struct filter_parse_state *ps, char *filter_string) { + struct ftrace_event_file *file; struct ftrace_event_call *call; struct filter_list *filter_item; struct filter_list *tmp; @@ -1633,8 +1768,8 @@ static int replace_system_preds(struct event_subsystem *system, bool fail = true; int err; - list_for_each_entry(call, &ftrace_events, list) { - + list_for_each_entry(file, &tr->events, list) { + call = file->event_call; if (strcmp(call->class->system, system->name) != 0) continue; @@ -1644,18 +1779,20 @@ static int replace_system_preds(struct event_subsystem *system, */ err = replace_preds(call, NULL, ps, filter_string, true); if (err) - call->flags |= TRACE_EVENT_FL_NO_SET_FILTER; + event_set_no_set_filter_flag(file); else - call->flags &= ~TRACE_EVENT_FL_NO_SET_FILTER; + event_clear_no_set_filter_flag(file); } - list_for_each_entry(call, &ftrace_events, list) { + list_for_each_entry(file, &tr->events, list) { struct event_filter *filter; + call = file->event_call; + if (strcmp(call->class->system, system->name) != 0) continue; - if (call->flags & TRACE_EVENT_FL_NO_SET_FILTER) + if (event_no_set_filter_flag(file)) continue; filter_item = kzalloc(sizeof(*filter_item), GFP_KERNEL); @@ -1676,17 +1813,17 @@ static int replace_system_preds(struct event_subsystem *system, err = replace_preds(call, filter, ps, filter_string, false); if (err) { - filter_disable(call); + filter_disable(file); parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0); append_filter_err(ps, filter); } else - call->flags |= TRACE_EVENT_FL_FILTERED; + event_set_filtered_flag(file); /* * Regardless of if this returned an error, we still * replace the filter for the call. */ - filter = call->filter; - rcu_assign_pointer(call->filter, filter_item->filter); + filter = event_filter(file); + event_set_filter(file, filter_item->filter); filter_item->filter = filter; fail = false; @@ -1806,6 +1943,13 @@ static int create_filter(struct ftrace_event_call *call, return err; } +int create_event_filter(struct ftrace_event_call *call, + char *filter_str, bool set_str, + struct event_filter **filterp) +{ + return create_filter(call, filter_str, set_str, filterp); +} + /** * create_system_filter - create a filter for an event_subsystem * @system: event_subsystem to create a filter for @@ -1816,6 +1960,7 @@ static int create_filter(struct ftrace_event_call *call, * and always remembers @filter_str. */ static int create_system_filter(struct event_subsystem *system, + struct trace_array *tr, char *filter_str, struct event_filter **filterp) { struct event_filter *filter = NULL; @@ -1824,7 +1969,7 @@ static int create_system_filter(struct event_subsystem *system, err = create_filter_start(filter_str, true, &ps, &filter); if (!err) { - err = replace_system_preds(system, ps, filter_str); + err = replace_system_preds(system, tr, ps, filter_str); if (!err) { /* System filters just show a default message */ kfree(filter->filter_string); @@ -1840,20 +1985,25 @@ static int create_system_filter(struct event_subsystem *system, } /* caller must hold event_mutex */ -int apply_event_filter(struct ftrace_event_call *call, char *filter_string) +int apply_event_filter(struct ftrace_event_file *file, char *filter_string) { + struct ftrace_event_call *call = file->event_call; struct event_filter *filter; int err; if (!strcmp(strstrip(filter_string), "0")) { - filter_disable(call); - filter = call->filter; + filter_disable(file); + filter = event_filter(file); + if (!filter) return 0; - RCU_INIT_POINTER(call->filter, NULL); + + event_clear_filter(file); + /* Make sure the filter is not being used */ synchronize_sched(); __free_filter(filter); + return 0; } @@ -1866,14 +2016,15 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string) * string */ if (filter) { - struct event_filter *tmp = call->filter; + struct event_filter *tmp; + tmp = event_filter(file); if (!err) - call->flags |= TRACE_EVENT_FL_FILTERED; + event_set_filtered_flag(file); else - filter_disable(call); + filter_disable(file); - rcu_assign_pointer(call->filter, filter); + event_set_filter(file, filter); if (tmp) { /* Make sure the call is done with the filter */ @@ -1889,6 +2040,7 @@ int apply_subsystem_event_filter(struct ftrace_subsystem_dir *dir, char *filter_string) { struct event_subsystem *system = dir->subsystem; + struct trace_array *tr = dir->tr; struct event_filter *filter; int err = 0; @@ -1901,18 +2053,18 @@ int apply_subsystem_event_filter(struct ftrace_subsystem_dir *dir, } if (!strcmp(strstrip(filter_string), "0")) { - filter_free_subsystem_preds(system); + filter_free_subsystem_preds(system, tr); remove_filter_string(system->filter); filter = system->filter; system->filter = NULL; /* Ensure all filters are no longer used */ synchronize_sched(); - filter_free_subsystem_filters(system); + filter_free_subsystem_filters(system, tr); __free_filter(filter); goto out_unlock; } - err = create_system_filter(system, filter_string, &filter); + err = create_system_filter(system, tr, filter_string, &filter); if (filter) { /* * No event actually uses the system filter diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c new file mode 100644 index 00000000000..4747b476a03 --- /dev/null +++ b/kernel/trace/trace_events_trigger.c @@ -0,0 +1,1437 @@ +/* + * trace_events_trigger - trace event triggers + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) 2013 Tom Zanussi <tom.zanussi@linux.intel.com> + */ + +#include <linux/module.h> +#include <linux/ctype.h> +#include <linux/mutex.h> +#include <linux/slab.h> + +#include "trace.h" + +static LIST_HEAD(trigger_commands); +static DEFINE_MUTEX(trigger_cmd_mutex); + +static void +trigger_data_free(struct event_trigger_data *data) +{ + if (data->cmd_ops->set_filter) + data->cmd_ops->set_filter(NULL, data, NULL); + + synchronize_sched(); /* make sure current triggers exit before free */ + kfree(data); +} + +/** + * event_triggers_call - Call triggers associated with a trace event + * @file: The ftrace_event_file associated with the event + * @rec: The trace entry for the event, NULL for unconditional invocation + * + * For each trigger associated with an event, invoke the trigger + * function registered with the associated trigger command. If rec is + * non-NULL, it means that the trigger requires further processing and + * shouldn't be unconditionally invoked. If rec is non-NULL and the + * trigger has a filter associated with it, rec will checked against + * the filter and if the record matches the trigger will be invoked. + * If the trigger is a 'post_trigger', meaning it shouldn't be invoked + * in any case until the current event is written, the trigger + * function isn't invoked but the bit associated with the deferred + * trigger is set in the return value. + * + * Returns an enum event_trigger_type value containing a set bit for + * any trigger that should be deferred, ETT_NONE if nothing to defer. + * + * Called from tracepoint handlers (with rcu_read_lock_sched() held). + * + * Return: an enum event_trigger_type value containing a set bit for + * any trigger that should be deferred, ETT_NONE if nothing to defer. + */ +enum event_trigger_type +event_triggers_call(struct ftrace_event_file *file, void *rec) +{ + struct event_trigger_data *data; + enum event_trigger_type tt = ETT_NONE; + struct event_filter *filter; + + if (list_empty(&file->triggers)) + return tt; + + list_for_each_entry_rcu(data, &file->triggers, list) { + if (!rec) { + data->ops->func(data); + continue; + } + filter = rcu_dereference_sched(data->filter); + if (filter && !filter_match_preds(filter, rec)) + continue; + if (data->cmd_ops->post_trigger) { + tt |= data->cmd_ops->trigger_type; + continue; + } + data->ops->func(data); + } + return tt; +} +EXPORT_SYMBOL_GPL(event_triggers_call); + +/** + * event_triggers_post_call - Call 'post_triggers' for a trace event + * @file: The ftrace_event_file associated with the event + * @tt: enum event_trigger_type containing a set bit for each trigger to invoke + * + * For each trigger associated with an event, invoke the trigger + * function registered with the associated trigger command, if the + * corresponding bit is set in the tt enum passed into this function. + * See @event_triggers_call for details on how those bits are set. + * + * Called from tracepoint handlers (with rcu_read_lock_sched() held). + */ +void +event_triggers_post_call(struct ftrace_event_file *file, + enum event_trigger_type tt) +{ + struct event_trigger_data *data; + + list_for_each_entry_rcu(data, &file->triggers, list) { + if (data->cmd_ops->trigger_type & tt) + data->ops->func(data); + } +} +EXPORT_SYMBOL_GPL(event_triggers_post_call); + +#define SHOW_AVAILABLE_TRIGGERS (void *)(1UL) + +static void *trigger_next(struct seq_file *m, void *t, loff_t *pos) +{ + struct ftrace_event_file *event_file = event_file_data(m->private); + + if (t == SHOW_AVAILABLE_TRIGGERS) + return NULL; + + return seq_list_next(t, &event_file->triggers, pos); +} + +static void *trigger_start(struct seq_file *m, loff_t *pos) +{ + struct ftrace_event_file *event_file; + + /* ->stop() is called even if ->start() fails */ + mutex_lock(&event_mutex); + event_file = event_file_data(m->private); + if (unlikely(!event_file)) + return ERR_PTR(-ENODEV); + + if (list_empty(&event_file->triggers)) + return *pos == 0 ? SHOW_AVAILABLE_TRIGGERS : NULL; + + return seq_list_start(&event_file->triggers, *pos); +} + +static void trigger_stop(struct seq_file *m, void *t) +{ + mutex_unlock(&event_mutex); +} + +static int trigger_show(struct seq_file *m, void *v) +{ + struct event_trigger_data *data; + struct event_command *p; + + if (v == SHOW_AVAILABLE_TRIGGERS) { + seq_puts(m, "# Available triggers:\n"); + seq_putc(m, '#'); + mutex_lock(&trigger_cmd_mutex); + list_for_each_entry_reverse(p, &trigger_commands, list) + seq_printf(m, " %s", p->name); + seq_putc(m, '\n'); + mutex_unlock(&trigger_cmd_mutex); + return 0; + } + + data = list_entry(v, struct event_trigger_data, list); + data->ops->print(m, data->ops, data); + + return 0; +} + +static const struct seq_operations event_triggers_seq_ops = { + .start = trigger_start, + .next = trigger_next, + .stop = trigger_stop, + .show = trigger_show, +}; + +static int event_trigger_regex_open(struct inode *inode, struct file *file) +{ + int ret = 0; + + mutex_lock(&event_mutex); + + if (unlikely(!event_file_data(file))) { + mutex_unlock(&event_mutex); + return -ENODEV; + } + + if (file->f_mode & FMODE_READ) { + ret = seq_open(file, &event_triggers_seq_ops); + if (!ret) { + struct seq_file *m = file->private_data; + m->private = file; + } + } + + mutex_unlock(&event_mutex); + + return ret; +} + +static int trigger_process_regex(struct ftrace_event_file *file, char *buff) +{ + char *command, *next = buff; + struct event_command *p; + int ret = -EINVAL; + + command = strsep(&next, ": \t"); + command = (command[0] != '!') ? command : command + 1; + + mutex_lock(&trigger_cmd_mutex); + list_for_each_entry(p, &trigger_commands, list) { + if (strcmp(p->name, command) == 0) { + ret = p->func(p, file, buff, command, next); + goto out_unlock; + } + } + out_unlock: + mutex_unlock(&trigger_cmd_mutex); + + return ret; +} + +static ssize_t event_trigger_regex_write(struct file *file, + const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct ftrace_event_file *event_file; + ssize_t ret; + char *buf; + + if (!cnt) + return 0; + + if (cnt >= PAGE_SIZE) + return -EINVAL; + + buf = (char *)__get_free_page(GFP_TEMPORARY); + if (!buf) + return -ENOMEM; + + if (copy_from_user(buf, ubuf, cnt)) { + free_page((unsigned long)buf); + return -EFAULT; + } + buf[cnt] = '\0'; + strim(buf); + + mutex_lock(&event_mutex); + event_file = event_file_data(file); + if (unlikely(!event_file)) { + mutex_unlock(&event_mutex); + free_page((unsigned long)buf); + return -ENODEV; + } + ret = trigger_process_regex(event_file, buf); + mutex_unlock(&event_mutex); + + free_page((unsigned long)buf); + if (ret < 0) + goto out; + + *ppos += cnt; + ret = cnt; + out: + return ret; +} + +static int event_trigger_regex_release(struct inode *inode, struct file *file) +{ + mutex_lock(&event_mutex); + + if (file->f_mode & FMODE_READ) + seq_release(inode, file); + + mutex_unlock(&event_mutex); + + return 0; +} + +static ssize_t +event_trigger_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + return event_trigger_regex_write(filp, ubuf, cnt, ppos); +} + +static int +event_trigger_open(struct inode *inode, struct file *filp) +{ + return event_trigger_regex_open(inode, filp); +} + +static int +event_trigger_release(struct inode *inode, struct file *file) +{ + return event_trigger_regex_release(inode, file); +} + +const struct file_operations event_trigger_fops = { + .open = event_trigger_open, + .read = seq_read, + .write = event_trigger_write, + .llseek = tracing_lseek, + .release = event_trigger_release, +}; + +/* + * Currently we only register event commands from __init, so mark this + * __init too. + */ +static __init int register_event_command(struct event_command *cmd) +{ + struct event_command *p; + int ret = 0; + + mutex_lock(&trigger_cmd_mutex); + list_for_each_entry(p, &trigger_commands, list) { + if (strcmp(cmd->name, p->name) == 0) { + ret = -EBUSY; + goto out_unlock; + } + } + list_add(&cmd->list, &trigger_commands); + out_unlock: + mutex_unlock(&trigger_cmd_mutex); + + return ret; +} + +/* + * Currently we only unregister event commands from __init, so mark + * this __init too. + */ +static __init int unregister_event_command(struct event_command *cmd) +{ + struct event_command *p, *n; + int ret = -ENODEV; + + mutex_lock(&trigger_cmd_mutex); + list_for_each_entry_safe(p, n, &trigger_commands, list) { + if (strcmp(cmd->name, p->name) == 0) { + ret = 0; + list_del_init(&p->list); + goto out_unlock; + } + } + out_unlock: + mutex_unlock(&trigger_cmd_mutex); + + return ret; +} + +/** + * event_trigger_print - Generic event_trigger_ops @print implementation + * @name: The name of the event trigger + * @m: The seq_file being printed to + * @data: Trigger-specific data + * @filter_str: filter_str to print, if present + * + * Common implementation for event triggers to print themselves. + * + * Usually wrapped by a function that simply sets the @name of the + * trigger command and then invokes this. + * + * Return: 0 on success, errno otherwise + */ +static int +event_trigger_print(const char *name, struct seq_file *m, + void *data, char *filter_str) +{ + long count = (long)data; + + seq_printf(m, "%s", name); + + if (count == -1) + seq_puts(m, ":unlimited"); + else + seq_printf(m, ":count=%ld", count); + + if (filter_str) + seq_printf(m, " if %s\n", filter_str); + else + seq_puts(m, "\n"); + + return 0; +} + +/** + * event_trigger_init - Generic event_trigger_ops @init implementation + * @ops: The trigger ops associated with the trigger + * @data: Trigger-specific data + * + * Common implementation of event trigger initialization. + * + * Usually used directly as the @init method in event trigger + * implementations. + * + * Return: 0 on success, errno otherwise + */ +static int +event_trigger_init(struct event_trigger_ops *ops, + struct event_trigger_data *data) +{ + data->ref++; + return 0; +} + +/** + * event_trigger_free - Generic event_trigger_ops @free implementation + * @ops: The trigger ops associated with the trigger + * @data: Trigger-specific data + * + * Common implementation of event trigger de-initialization. + * + * Usually used directly as the @free method in event trigger + * implementations. + */ +static void +event_trigger_free(struct event_trigger_ops *ops, + struct event_trigger_data *data) +{ + if (WARN_ON_ONCE(data->ref <= 0)) + return; + + data->ref--; + if (!data->ref) + trigger_data_free(data); +} + +static int trace_event_trigger_enable_disable(struct ftrace_event_file *file, + int trigger_enable) +{ + int ret = 0; + + if (trigger_enable) { + if (atomic_inc_return(&file->tm_ref) > 1) + return ret; + set_bit(FTRACE_EVENT_FL_TRIGGER_MODE_BIT, &file->flags); + ret = trace_event_enable_disable(file, 1, 1); + } else { + if (atomic_dec_return(&file->tm_ref) > 0) + return ret; + clear_bit(FTRACE_EVENT_FL_TRIGGER_MODE_BIT, &file->flags); + ret = trace_event_enable_disable(file, 0, 1); + } + + return ret; +} + +/** + * clear_event_triggers - Clear all triggers associated with a trace array + * @tr: The trace array to clear + * + * For each trigger, the triggering event has its tm_ref decremented + * via trace_event_trigger_enable_disable(), and any associated event + * (in the case of enable/disable_event triggers) will have its sm_ref + * decremented via free()->trace_event_enable_disable(). That + * combination effectively reverses the soft-mode/trigger state added + * by trigger registration. + * + * Must be called with event_mutex held. + */ +void +clear_event_triggers(struct trace_array *tr) +{ + struct ftrace_event_file *file; + + list_for_each_entry(file, &tr->events, list) { + struct event_trigger_data *data; + list_for_each_entry_rcu(data, &file->triggers, list) { + trace_event_trigger_enable_disable(file, 0); + if (data->ops->free) + data->ops->free(data->ops, data); + } + } +} + +/** + * update_cond_flag - Set or reset the TRIGGER_COND bit + * @file: The ftrace_event_file associated with the event + * + * If an event has triggers and any of those triggers has a filter or + * a post_trigger, trigger invocation needs to be deferred until after + * the current event has logged its data, and the event should have + * its TRIGGER_COND bit set, otherwise the TRIGGER_COND bit should be + * cleared. + */ +static void update_cond_flag(struct ftrace_event_file *file) +{ + struct event_trigger_data *data; + bool set_cond = false; + + list_for_each_entry_rcu(data, &file->triggers, list) { + if (data->filter || data->cmd_ops->post_trigger) { + set_cond = true; + break; + } + } + + if (set_cond) + set_bit(FTRACE_EVENT_FL_TRIGGER_COND_BIT, &file->flags); + else + clear_bit(FTRACE_EVENT_FL_TRIGGER_COND_BIT, &file->flags); +} + +/** + * register_trigger - Generic event_command @reg implementation + * @glob: The raw string used to register the trigger + * @ops: The trigger ops associated with the trigger + * @data: Trigger-specific data to associate with the trigger + * @file: The ftrace_event_file associated with the event + * + * Common implementation for event trigger registration. + * + * Usually used directly as the @reg method in event command + * implementations. + * + * Return: 0 on success, errno otherwise + */ +static int register_trigger(char *glob, struct event_trigger_ops *ops, + struct event_trigger_data *data, + struct ftrace_event_file *file) +{ + struct event_trigger_data *test; + int ret = 0; + + list_for_each_entry_rcu(test, &file->triggers, list) { + if (test->cmd_ops->trigger_type == data->cmd_ops->trigger_type) { + ret = -EEXIST; + goto out; + } + } + + if (data->ops->init) { + ret = data->ops->init(data->ops, data); + if (ret < 0) + goto out; + } + + list_add_rcu(&data->list, &file->triggers); + ret++; + + if (trace_event_trigger_enable_disable(file, 1) < 0) { + list_del_rcu(&data->list); + ret--; + } + update_cond_flag(file); +out: + return ret; +} + +/** + * unregister_trigger - Generic event_command @unreg implementation + * @glob: The raw string used to register the trigger + * @ops: The trigger ops associated with the trigger + * @test: Trigger-specific data used to find the trigger to remove + * @file: The ftrace_event_file associated with the event + * + * Common implementation for event trigger unregistration. + * + * Usually used directly as the @unreg method in event command + * implementations. + */ +static void unregister_trigger(char *glob, struct event_trigger_ops *ops, + struct event_trigger_data *test, + struct ftrace_event_file *file) +{ + struct event_trigger_data *data; + bool unregistered = false; + + list_for_each_entry_rcu(data, &file->triggers, list) { + if (data->cmd_ops->trigger_type == test->cmd_ops->trigger_type) { + unregistered = true; + list_del_rcu(&data->list); + update_cond_flag(file); + trace_event_trigger_enable_disable(file, 0); + break; + } + } + + if (unregistered && data->ops->free) + data->ops->free(data->ops, data); +} + +/** + * event_trigger_callback - Generic event_command @func implementation + * @cmd_ops: The command ops, used for trigger registration + * @file: The ftrace_event_file associated with the event + * @glob: The raw string used to register the trigger + * @cmd: The cmd portion of the string used to register the trigger + * @param: The params portion of the string used to register the trigger + * + * Common implementation for event command parsing and trigger + * instantiation. + * + * Usually used directly as the @func method in event command + * implementations. + * + * Return: 0 on success, errno otherwise + */ +static int +event_trigger_callback(struct event_command *cmd_ops, + struct ftrace_event_file *file, + char *glob, char *cmd, char *param) +{ + struct event_trigger_data *trigger_data; + struct event_trigger_ops *trigger_ops; + char *trigger = NULL; + char *number; + int ret; + + /* separate the trigger from the filter (t:n [if filter]) */ + if (param && isdigit(param[0])) + trigger = strsep(¶m, " \t"); + + trigger_ops = cmd_ops->get_trigger_ops(cmd, trigger); + + ret = -ENOMEM; + trigger_data = kzalloc(sizeof(*trigger_data), GFP_KERNEL); + if (!trigger_data) + goto out; + + trigger_data->count = -1; + trigger_data->ops = trigger_ops; + trigger_data->cmd_ops = cmd_ops; + INIT_LIST_HEAD(&trigger_data->list); + + if (glob[0] == '!') { + cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file); + kfree(trigger_data); + ret = 0; + goto out; + } + + if (trigger) { + number = strsep(&trigger, ":"); + + ret = -EINVAL; + if (!strlen(number)) + goto out_free; + + /* + * We use the callback data field (which is a pointer) + * as our counter. + */ + ret = kstrtoul(number, 0, &trigger_data->count); + if (ret) + goto out_free; + } + + if (!param) /* if param is non-empty, it's supposed to be a filter */ + goto out_reg; + + if (!cmd_ops->set_filter) + goto out_reg; + + ret = cmd_ops->set_filter(param, trigger_data, file); + if (ret < 0) + goto out_free; + + out_reg: + ret = cmd_ops->reg(glob, trigger_ops, trigger_data, file); + /* + * The above returns on success the # of functions enabled, + * but if it didn't find any functions it returns zero. + * Consider no functions a failure too. + */ + if (!ret) { + ret = -ENOENT; + goto out_free; + } else if (ret < 0) + goto out_free; + ret = 0; + out: + return ret; + + out_free: + if (cmd_ops->set_filter) + cmd_ops->set_filter(NULL, trigger_data, NULL); + kfree(trigger_data); + goto out; +} + +/** + * set_trigger_filter - Generic event_command @set_filter implementation + * @filter_str: The filter string for the trigger, NULL to remove filter + * @trigger_data: Trigger-specific data + * @file: The ftrace_event_file associated with the event + * + * Common implementation for event command filter parsing and filter + * instantiation. + * + * Usually used directly as the @set_filter method in event command + * implementations. + * + * Also used to remove a filter (if filter_str = NULL). + * + * Return: 0 on success, errno otherwise + */ +static int set_trigger_filter(char *filter_str, + struct event_trigger_data *trigger_data, + struct ftrace_event_file *file) +{ + struct event_trigger_data *data = trigger_data; + struct event_filter *filter = NULL, *tmp; + int ret = -EINVAL; + char *s; + + if (!filter_str) /* clear the current filter */ + goto assign; + + s = strsep(&filter_str, " \t"); + + if (!strlen(s) || strcmp(s, "if") != 0) + goto out; + + if (!filter_str) + goto out; + + /* The filter is for the 'trigger' event, not the triggered event */ + ret = create_event_filter(file->event_call, filter_str, false, &filter); + if (ret) + goto out; + assign: + tmp = rcu_access_pointer(data->filter); + + rcu_assign_pointer(data->filter, filter); + + if (tmp) { + /* Make sure the call is done with the filter */ + synchronize_sched(); + free_event_filter(tmp); + } + + kfree(data->filter_str); + data->filter_str = NULL; + + if (filter_str) { + data->filter_str = kstrdup(filter_str, GFP_KERNEL); + if (!data->filter_str) { + free_event_filter(rcu_access_pointer(data->filter)); + data->filter = NULL; + ret = -ENOMEM; + } + } + out: + return ret; +} + +static void +traceon_trigger(struct event_trigger_data *data) +{ + if (tracing_is_on()) + return; + + tracing_on(); +} + +static void +traceon_count_trigger(struct event_trigger_data *data) +{ + if (tracing_is_on()) + return; + + if (!data->count) + return; + + if (data->count != -1) + (data->count)--; + + tracing_on(); +} + +static void +traceoff_trigger(struct event_trigger_data *data) +{ + if (!tracing_is_on()) + return; + + tracing_off(); +} + +static void +traceoff_count_trigger(struct event_trigger_data *data) +{ + if (!tracing_is_on()) + return; + + if (!data->count) + return; + + if (data->count != -1) + (data->count)--; + + tracing_off(); +} + +static int +traceon_trigger_print(struct seq_file *m, struct event_trigger_ops *ops, + struct event_trigger_data *data) +{ + return event_trigger_print("traceon", m, (void *)data->count, + data->filter_str); +} + +static int +traceoff_trigger_print(struct seq_file *m, struct event_trigger_ops *ops, + struct event_trigger_data *data) +{ + return event_trigger_print("traceoff", m, (void *)data->count, + data->filter_str); +} + +static struct event_trigger_ops traceon_trigger_ops = { + .func = traceon_trigger, + .print = traceon_trigger_print, + .init = event_trigger_init, + .free = event_trigger_free, +}; + +static struct event_trigger_ops traceon_count_trigger_ops = { + .func = traceon_count_trigger, + .print = traceon_trigger_print, + .init = event_trigger_init, + .free = event_trigger_free, +}; + +static struct event_trigger_ops traceoff_trigger_ops = { + .func = traceoff_trigger, + .print = traceoff_trigger_print, + .init = event_trigger_init, + .free = event_trigger_free, +}; + +static struct event_trigger_ops traceoff_count_trigger_ops = { + .func = traceoff_count_trigger, + .print = traceoff_trigger_print, + .init = event_trigger_init, + .free = event_trigger_free, +}; + +static struct event_trigger_ops * +onoff_get_trigger_ops(char *cmd, char *param) +{ + struct event_trigger_ops *ops; + + /* we register both traceon and traceoff to this callback */ + if (strcmp(cmd, "traceon") == 0) + ops = param ? &traceon_count_trigger_ops : + &traceon_trigger_ops; + else + ops = param ? &traceoff_count_trigger_ops : + &traceoff_trigger_ops; + + return ops; +} + +static struct event_command trigger_traceon_cmd = { + .name = "traceon", + .trigger_type = ETT_TRACE_ONOFF, + .func = event_trigger_callback, + .reg = register_trigger, + .unreg = unregister_trigger, + .get_trigger_ops = onoff_get_trigger_ops, + .set_filter = set_trigger_filter, +}; + +static struct event_command trigger_traceoff_cmd = { + .name = "traceoff", + .trigger_type = ETT_TRACE_ONOFF, + .func = event_trigger_callback, + .reg = register_trigger, + .unreg = unregister_trigger, + .get_trigger_ops = onoff_get_trigger_ops, + .set_filter = set_trigger_filter, +}; + +#ifdef CONFIG_TRACER_SNAPSHOT +static void +snapshot_trigger(struct event_trigger_data *data) +{ + tracing_snapshot(); +} + +static void +snapshot_count_trigger(struct event_trigger_data *data) +{ + if (!data->count) + return; + + if (data->count != -1) + (data->count)--; + + snapshot_trigger(data); +} + +static int +register_snapshot_trigger(char *glob, struct event_trigger_ops *ops, + struct event_trigger_data *data, + struct ftrace_event_file *file) +{ + int ret = register_trigger(glob, ops, data, file); + + if (ret > 0 && tracing_alloc_snapshot() != 0) { + unregister_trigger(glob, ops, data, file); + ret = 0; + } + + return ret; +} + +static int +snapshot_trigger_print(struct seq_file *m, struct event_trigger_ops *ops, + struct event_trigger_data *data) +{ + return event_trigger_print("snapshot", m, (void *)data->count, + data->filter_str); +} + +static struct event_trigger_ops snapshot_trigger_ops = { + .func = snapshot_trigger, + .print = snapshot_trigger_print, + .init = event_trigger_init, + .free = event_trigger_free, +}; + +static struct event_trigger_ops snapshot_count_trigger_ops = { + .func = snapshot_count_trigger, + .print = snapshot_trigger_print, + .init = event_trigger_init, + .free = event_trigger_free, +}; + +static struct event_trigger_ops * +snapshot_get_trigger_ops(char *cmd, char *param) +{ + return param ? &snapshot_count_trigger_ops : &snapshot_trigger_ops; +} + +static struct event_command trigger_snapshot_cmd = { + .name = "snapshot", + .trigger_type = ETT_SNAPSHOT, + .func = event_trigger_callback, + .reg = register_snapshot_trigger, + .unreg = unregister_trigger, + .get_trigger_ops = snapshot_get_trigger_ops, + .set_filter = set_trigger_filter, +}; + +static __init int register_trigger_snapshot_cmd(void) +{ + int ret; + + ret = register_event_command(&trigger_snapshot_cmd); + WARN_ON(ret < 0); + + return ret; +} +#else +static __init int register_trigger_snapshot_cmd(void) { return 0; } +#endif /* CONFIG_TRACER_SNAPSHOT */ + +#ifdef CONFIG_STACKTRACE +/* + * Skip 3: + * stacktrace_trigger() + * event_triggers_post_call() + * ftrace_raw_event_xxx() + */ +#define STACK_SKIP 3 + +static void +stacktrace_trigger(struct event_trigger_data *data) +{ + trace_dump_stack(STACK_SKIP); +} + +static void +stacktrace_count_trigger(struct event_trigger_data *data) +{ + if (!data->count) + return; + + if (data->count != -1) + (data->count)--; + + stacktrace_trigger(data); +} + +static int +stacktrace_trigger_print(struct seq_file *m, struct event_trigger_ops *ops, + struct event_trigger_data *data) +{ + return event_trigger_print("stacktrace", m, (void *)data->count, + data->filter_str); +} + +static struct event_trigger_ops stacktrace_trigger_ops = { + .func = stacktrace_trigger, + .print = stacktrace_trigger_print, + .init = event_trigger_init, + .free = event_trigger_free, +}; + +static struct event_trigger_ops stacktrace_count_trigger_ops = { + .func = stacktrace_count_trigger, + .print = stacktrace_trigger_print, + .init = event_trigger_init, + .free = event_trigger_free, +}; + +static struct event_trigger_ops * +stacktrace_get_trigger_ops(char *cmd, char *param) +{ + return param ? &stacktrace_count_trigger_ops : &stacktrace_trigger_ops; +} + +static struct event_command trigger_stacktrace_cmd = { + .name = "stacktrace", + .trigger_type = ETT_STACKTRACE, + .post_trigger = true, + .func = event_trigger_callback, + .reg = register_trigger, + .unreg = unregister_trigger, + .get_trigger_ops = stacktrace_get_trigger_ops, + .set_filter = set_trigger_filter, +}; + +static __init int register_trigger_stacktrace_cmd(void) +{ + int ret; + + ret = register_event_command(&trigger_stacktrace_cmd); + WARN_ON(ret < 0); + + return ret; +} +#else +static __init int register_trigger_stacktrace_cmd(void) { return 0; } +#endif /* CONFIG_STACKTRACE */ + +static __init void unregister_trigger_traceon_traceoff_cmds(void) +{ + unregister_event_command(&trigger_traceon_cmd); + unregister_event_command(&trigger_traceoff_cmd); +} + +/* Avoid typos */ +#define ENABLE_EVENT_STR "enable_event" +#define DISABLE_EVENT_STR "disable_event" + +struct enable_trigger_data { + struct ftrace_event_file *file; + bool enable; +}; + +static void +event_enable_trigger(struct event_trigger_data *data) +{ + struct enable_trigger_data *enable_data = data->private_data; + + if (enable_data->enable) + clear_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &enable_data->file->flags); + else + set_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &enable_data->file->flags); +} + +static void +event_enable_count_trigger(struct event_trigger_data *data) +{ + struct enable_trigger_data *enable_data = data->private_data; + + if (!data->count) + return; + + /* Skip if the event is in a state we want to switch to */ + if (enable_data->enable == !(enable_data->file->flags & FTRACE_EVENT_FL_SOFT_DISABLED)) + return; + + if (data->count != -1) + (data->count)--; + + event_enable_trigger(data); +} + +static int +event_enable_trigger_print(struct seq_file *m, struct event_trigger_ops *ops, + struct event_trigger_data *data) +{ + struct enable_trigger_data *enable_data = data->private_data; + + seq_printf(m, "%s:%s:%s", + enable_data->enable ? ENABLE_EVENT_STR : DISABLE_EVENT_STR, + enable_data->file->event_call->class->system, + ftrace_event_name(enable_data->file->event_call)); + + if (data->count == -1) + seq_puts(m, ":unlimited"); + else + seq_printf(m, ":count=%ld", data->count); + + if (data->filter_str) + seq_printf(m, " if %s\n", data->filter_str); + else + seq_puts(m, "\n"); + + return 0; +} + +static void +event_enable_trigger_free(struct event_trigger_ops *ops, + struct event_trigger_data *data) +{ + struct enable_trigger_data *enable_data = data->private_data; + + if (WARN_ON_ONCE(data->ref <= 0)) + return; + + data->ref--; + if (!data->ref) { + /* Remove the SOFT_MODE flag */ + trace_event_enable_disable(enable_data->file, 0, 1); + module_put(enable_data->file->event_call->mod); + trigger_data_free(data); + kfree(enable_data); + } +} + +static struct event_trigger_ops event_enable_trigger_ops = { + .func = event_enable_trigger, + .print = event_enable_trigger_print, + .init = event_trigger_init, + .free = event_enable_trigger_free, +}; + +static struct event_trigger_ops event_enable_count_trigger_ops = { + .func = event_enable_count_trigger, + .print = event_enable_trigger_print, + .init = event_trigger_init, + .free = event_enable_trigger_free, +}; + +static struct event_trigger_ops event_disable_trigger_ops = { + .func = event_enable_trigger, + .print = event_enable_trigger_print, + .init = event_trigger_init, + .free = event_enable_trigger_free, +}; + +static struct event_trigger_ops event_disable_count_trigger_ops = { + .func = event_enable_count_trigger, + .print = event_enable_trigger_print, + .init = event_trigger_init, + .free = event_enable_trigger_free, +}; + +static int +event_enable_trigger_func(struct event_command *cmd_ops, + struct ftrace_event_file *file, + char *glob, char *cmd, char *param) +{ + struct ftrace_event_file *event_enable_file; + struct enable_trigger_data *enable_data; + struct event_trigger_data *trigger_data; + struct event_trigger_ops *trigger_ops; + struct trace_array *tr = file->tr; + const char *system; + const char *event; + char *trigger; + char *number; + bool enable; + int ret; + + if (!param) + return -EINVAL; + + /* separate the trigger from the filter (s:e:n [if filter]) */ + trigger = strsep(¶m, " \t"); + if (!trigger) + return -EINVAL; + + system = strsep(&trigger, ":"); + if (!trigger) + return -EINVAL; + + event = strsep(&trigger, ":"); + + ret = -EINVAL; + event_enable_file = find_event_file(tr, system, event); + if (!event_enable_file) + goto out; + + enable = strcmp(cmd, ENABLE_EVENT_STR) == 0; + + trigger_ops = cmd_ops->get_trigger_ops(cmd, trigger); + + ret = -ENOMEM; + trigger_data = kzalloc(sizeof(*trigger_data), GFP_KERNEL); + if (!trigger_data) + goto out; + + enable_data = kzalloc(sizeof(*enable_data), GFP_KERNEL); + if (!enable_data) { + kfree(trigger_data); + goto out; + } + + trigger_data->count = -1; + trigger_data->ops = trigger_ops; + trigger_data->cmd_ops = cmd_ops; + INIT_LIST_HEAD(&trigger_data->list); + RCU_INIT_POINTER(trigger_data->filter, NULL); + + enable_data->enable = enable; + enable_data->file = event_enable_file; + trigger_data->private_data = enable_data; + + if (glob[0] == '!') { + cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file); + kfree(trigger_data); + kfree(enable_data); + ret = 0; + goto out; + } + + if (trigger) { + number = strsep(&trigger, ":"); + + ret = -EINVAL; + if (!strlen(number)) + goto out_free; + + /* + * We use the callback data field (which is a pointer) + * as our counter. + */ + ret = kstrtoul(number, 0, &trigger_data->count); + if (ret) + goto out_free; + } + + if (!param) /* if param is non-empty, it's supposed to be a filter */ + goto out_reg; + + if (!cmd_ops->set_filter) + goto out_reg; + + ret = cmd_ops->set_filter(param, trigger_data, file); + if (ret < 0) + goto out_free; + + out_reg: + /* Don't let event modules unload while probe registered */ + ret = try_module_get(event_enable_file->event_call->mod); + if (!ret) { + ret = -EBUSY; + goto out_free; + } + + ret = trace_event_enable_disable(event_enable_file, 1, 1); + if (ret < 0) + goto out_put; + ret = cmd_ops->reg(glob, trigger_ops, trigger_data, file); + /* + * The above returns on success the # of functions enabled, + * but if it didn't find any functions it returns zero. + * Consider no functions a failure too. + */ + if (!ret) { + ret = -ENOENT; + goto out_disable; + } else if (ret < 0) + goto out_disable; + /* Just return zero, not the number of enabled functions */ + ret = 0; + out: + return ret; + + out_disable: + trace_event_enable_disable(event_enable_file, 0, 1); + out_put: + module_put(event_enable_file->event_call->mod); + out_free: + if (cmd_ops->set_filter) + cmd_ops->set_filter(NULL, trigger_data, NULL); + kfree(trigger_data); + kfree(enable_data); + goto out; +} + +static int event_enable_register_trigger(char *glob, + struct event_trigger_ops *ops, + struct event_trigger_data *data, + struct ftrace_event_file *file) +{ + struct enable_trigger_data *enable_data = data->private_data; + struct enable_trigger_data *test_enable_data; + struct event_trigger_data *test; + int ret = 0; + + list_for_each_entry_rcu(test, &file->triggers, list) { + test_enable_data = test->private_data; + if (test_enable_data && + (test_enable_data->file == enable_data->file)) { + ret = -EEXIST; + goto out; + } + } + + if (data->ops->init) { + ret = data->ops->init(data->ops, data); + if (ret < 0) + goto out; + } + + list_add_rcu(&data->list, &file->triggers); + ret++; + + if (trace_event_trigger_enable_disable(file, 1) < 0) { + list_del_rcu(&data->list); + ret--; + } + update_cond_flag(file); +out: + return ret; +} + +static void event_enable_unregister_trigger(char *glob, + struct event_trigger_ops *ops, + struct event_trigger_data *test, + struct ftrace_event_file *file) +{ + struct enable_trigger_data *test_enable_data = test->private_data; + struct enable_trigger_data *enable_data; + struct event_trigger_data *data; + bool unregistered = false; + + list_for_each_entry_rcu(data, &file->triggers, list) { + enable_data = data->private_data; + if (enable_data && + (enable_data->file == test_enable_data->file)) { + unregistered = true; + list_del_rcu(&data->list); + update_cond_flag(file); + trace_event_trigger_enable_disable(file, 0); + break; + } + } + + if (unregistered && data->ops->free) + data->ops->free(data->ops, data); +} + +static struct event_trigger_ops * +event_enable_get_trigger_ops(char *cmd, char *param) +{ + struct event_trigger_ops *ops; + bool enable; + + enable = strcmp(cmd, ENABLE_EVENT_STR) == 0; + + if (enable) + ops = param ? &event_enable_count_trigger_ops : + &event_enable_trigger_ops; + else + ops = param ? &event_disable_count_trigger_ops : + &event_disable_trigger_ops; + + return ops; +} + +static struct event_command trigger_enable_cmd = { + .name = ENABLE_EVENT_STR, + .trigger_type = ETT_EVENT_ENABLE, + .func = event_enable_trigger_func, + .reg = event_enable_register_trigger, + .unreg = event_enable_unregister_trigger, + .get_trigger_ops = event_enable_get_trigger_ops, + .set_filter = set_trigger_filter, +}; + +static struct event_command trigger_disable_cmd = { + .name = DISABLE_EVENT_STR, + .trigger_type = ETT_EVENT_ENABLE, + .func = event_enable_trigger_func, + .reg = event_enable_register_trigger, + .unreg = event_enable_unregister_trigger, + .get_trigger_ops = event_enable_get_trigger_ops, + .set_filter = set_trigger_filter, +}; + +static __init void unregister_trigger_enable_disable_cmds(void) +{ + unregister_event_command(&trigger_enable_cmd); + unregister_event_command(&trigger_disable_cmd); +} + +static __init int register_trigger_enable_disable_cmds(void) +{ + int ret; + + ret = register_event_command(&trigger_enable_cmd); + if (WARN_ON(ret < 0)) + return ret; + ret = register_event_command(&trigger_disable_cmd); + if (WARN_ON(ret < 0)) + unregister_trigger_enable_disable_cmds(); + + return ret; +} + +static __init int register_trigger_traceon_traceoff_cmds(void) +{ + int ret; + + ret = register_event_command(&trigger_traceon_cmd); + if (WARN_ON(ret < 0)) + return ret; + ret = register_event_command(&trigger_traceoff_cmd); + if (WARN_ON(ret < 0)) + unregister_trigger_traceon_traceoff_cmds(); + + return ret; +} + +__init int register_trigger_cmds(void) +{ + register_trigger_traceon_traceoff_cmds(); + register_trigger_snapshot_cmd(); + register_trigger_stacktrace_cmd(); + register_trigger_enable_disable_cmds(); + + return 0; +} diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index d21a7467008..d4ddde28a81 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -95,15 +95,12 @@ static void __always_unused ____ftrace_check_##name(void) \ #undef __array #define __array(type, item, len) \ do { \ + char *type_str = #type"["__stringify(len)"]"; \ BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ - mutex_lock(&event_storage_mutex); \ - snprintf(event_storage, sizeof(event_storage), \ - "%s[%d]", #type, len); \ - ret = trace_define_field(event_call, event_storage, #item, \ + ret = trace_define_field(event_call, type_str, #item, \ offsetof(typeof(field), item), \ sizeof(field.item), \ is_signed_type(type), filter_type); \ - mutex_unlock(&event_storage_mutex); \ if (ret) \ return ret; \ } while (0); @@ -176,11 +173,13 @@ struct ftrace_event_class __refdata event_class_ftrace_##call = { \ }; \ \ struct ftrace_event_call __used event_##call = { \ - .name = #call, \ - .event.type = etype, \ .class = &event_class_ftrace_##call, \ + { \ + .name = #call, \ + }, \ + .event.type = etype, \ .print_fmt = print, \ - .flags = TRACE_EVENT_FL_IGNORE_ENABLE, \ + .flags = TRACE_EVENT_FL_IGNORE_ENABLE | TRACE_EVENT_FL_USE_CALL_FILTER, \ }; \ struct ftrace_event_call __used \ __attribute__((section("_ftrace_events"))) *__event_##call = &event_##call; diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 38fe1483c50..57f0ec962d2 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -13,33 +13,106 @@ #include <linux/debugfs.h> #include <linux/uaccess.h> #include <linux/ftrace.h> +#include <linux/slab.h> #include <linux/fs.h> #include "trace.h" -/* function tracing enabled */ -static int ftrace_function_enabled; +static void tracing_start_function_trace(struct trace_array *tr); +static void tracing_stop_function_trace(struct trace_array *tr); +static void +function_trace_call(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct pt_regs *pt_regs); +static void +function_stack_trace_call(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct pt_regs *pt_regs); +static struct tracer_flags func_flags; + +/* Our option */ +enum { + TRACE_FUNC_OPT_STACK = 0x1, +}; + +static int allocate_ftrace_ops(struct trace_array *tr) +{ + struct ftrace_ops *ops; -static struct trace_array *func_trace; + ops = kzalloc(sizeof(*ops), GFP_KERNEL); + if (!ops) + return -ENOMEM; + + /* Currently only the non stack verision is supported */ + ops->func = function_trace_call; + ops->flags = FTRACE_OPS_FL_RECURSION_SAFE; + + tr->ops = ops; + ops->private = tr; + return 0; +} -static void tracing_start_function_trace(void); -static void tracing_stop_function_trace(void); + +int ftrace_create_function_files(struct trace_array *tr, + struct dentry *parent) +{ + int ret; + + /* + * The top level array uses the "global_ops", and the files are + * created on boot up. + */ + if (tr->flags & TRACE_ARRAY_FL_GLOBAL) + return 0; + + ret = allocate_ftrace_ops(tr); + if (ret) + return ret; + + ftrace_create_filter_files(tr->ops, parent); + + return 0; +} + +void ftrace_destroy_function_files(struct trace_array *tr) +{ + ftrace_destroy_filter_files(tr->ops); + kfree(tr->ops); + tr->ops = NULL; +} static int function_trace_init(struct trace_array *tr) { - func_trace = tr; + ftrace_func_t func; + + /* + * Instance trace_arrays get their ops allocated + * at instance creation. Unless it failed + * the allocation. + */ + if (!tr->ops) + return -ENOMEM; + + /* Currently only the global instance can do stack tracing */ + if (tr->flags & TRACE_ARRAY_FL_GLOBAL && + func_flags.val & TRACE_FUNC_OPT_STACK) + func = function_stack_trace_call; + else + func = function_trace_call; + + ftrace_init_array_ops(tr, func); + tr->trace_buffer.cpu = get_cpu(); put_cpu(); tracing_start_cmdline_record(); - tracing_start_function_trace(); + tracing_start_function_trace(tr); return 0; } static void function_trace_reset(struct trace_array *tr) { - tracing_stop_function_trace(); + tracing_stop_function_trace(tr); tracing_stop_cmdline_record(); + ftrace_reset_array_ops(tr); } static void function_trace_start(struct trace_array *tr) @@ -47,25 +120,18 @@ static void function_trace_start(struct trace_array *tr) tracing_reset_online_cpus(&tr->trace_buffer); } -/* Our option */ -enum { - TRACE_FUNC_OPT_STACK = 0x1, -}; - -static struct tracer_flags func_flags; - static void function_trace_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct pt_regs *pt_regs) { - struct trace_array *tr = func_trace; + struct trace_array *tr = op->private; struct trace_array_cpu *data; unsigned long flags; int bit; int cpu; int pc; - if (unlikely(!ftrace_function_enabled)) + if (unlikely(!tr->function_enabled)) return; pc = preempt_count(); @@ -91,14 +157,14 @@ static void function_stack_trace_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct pt_regs *pt_regs) { - struct trace_array *tr = func_trace; + struct trace_array *tr = op->private; struct trace_array_cpu *data; unsigned long flags; long disabled; int cpu; int pc; - if (unlikely(!ftrace_function_enabled)) + if (unlikely(!tr->function_enabled)) return; /* @@ -128,19 +194,6 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip, local_irq_restore(flags); } - -static struct ftrace_ops trace_ops __read_mostly = -{ - .func = function_trace_call, - .flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE, -}; - -static struct ftrace_ops trace_stack_ops __read_mostly = -{ - .func = function_stack_trace_call, - .flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE, -}; - static struct tracer_opt func_opts[] = { #ifdef CONFIG_STACKTRACE { TRACER_OPT(func_stack_trace, TRACE_FUNC_OPT_STACK) }, @@ -153,29 +206,21 @@ static struct tracer_flags func_flags = { .opts = func_opts }; -static void tracing_start_function_trace(void) +static void tracing_start_function_trace(struct trace_array *tr) { - ftrace_function_enabled = 0; - - if (func_flags.val & TRACE_FUNC_OPT_STACK) - register_ftrace_function(&trace_stack_ops); - else - register_ftrace_function(&trace_ops); - - ftrace_function_enabled = 1; + tr->function_enabled = 0; + register_ftrace_function(tr->ops); + tr->function_enabled = 1; } -static void tracing_stop_function_trace(void) +static void tracing_stop_function_trace(struct trace_array *tr) { - ftrace_function_enabled = 0; - - if (func_flags.val & TRACE_FUNC_OPT_STACK) - unregister_ftrace_function(&trace_stack_ops); - else - unregister_ftrace_function(&trace_ops); + tr->function_enabled = 0; + unregister_ftrace_function(tr->ops); } -static int func_set_flag(u32 old_flags, u32 bit, int set) +static int +func_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set) { switch (bit) { case TRACE_FUNC_OPT_STACK: @@ -183,12 +228,14 @@ static int func_set_flag(u32 old_flags, u32 bit, int set) if (!!set == !!(func_flags.val & TRACE_FUNC_OPT_STACK)) break; + unregister_ftrace_function(tr->ops); + if (set) { - unregister_ftrace_function(&trace_ops); - register_ftrace_function(&trace_stack_ops); + tr->ops->func = function_stack_trace_call; + register_ftrace_function(tr->ops); } else { - unregister_ftrace_function(&trace_stack_ops); - register_ftrace_function(&trace_ops); + tr->ops->func = function_trace_call; + register_ftrace_function(tr->ops); } break; @@ -205,9 +252,9 @@ static struct tracer function_trace __tracer_data = .init = function_trace_init, .reset = function_trace_reset, .start = function_trace_start, - .wait_pipe = poll_wait_pipe, .flags = &func_flags, .set_flag = func_set_flag, + .allow_instances = true, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_function, #endif diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index b5c09242683..4de3e57f723 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -38,15 +38,6 @@ struct fgraph_data { #define TRACE_GRAPH_INDENT 2 -/* Flag options */ -#define TRACE_GRAPH_PRINT_OVERRUN 0x1 -#define TRACE_GRAPH_PRINT_CPU 0x2 -#define TRACE_GRAPH_PRINT_OVERHEAD 0x4 -#define TRACE_GRAPH_PRINT_PROC 0x8 -#define TRACE_GRAPH_PRINT_DURATION 0x10 -#define TRACE_GRAPH_PRINT_ABS_TIME 0x20 -#define TRACE_GRAPH_PRINT_IRQS 0x40 - static unsigned int max_depth; static struct tracer_opt trace_opts[] = { @@ -64,11 +55,13 @@ static struct tracer_opt trace_opts[] = { { TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) }, /* Display interrupts */ { TRACER_OPT(funcgraph-irqs, TRACE_GRAPH_PRINT_IRQS) }, + /* Display function name after trailing } */ + { TRACER_OPT(funcgraph-tail, TRACE_GRAPH_PRINT_TAIL) }, { } /* Empty entry */ }; static struct tracer_flags tracer_flags = { - /* Don't display overruns and proc by default */ + /* Don't display overruns, proc, or tail by default */ .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD | TRACE_GRAPH_PRINT_DURATION | TRACE_GRAPH_PRINT_IRQS, .opts = trace_opts @@ -82,9 +75,9 @@ static struct trace_array *graph_array; * to fill in space into DURATION column. */ enum { - DURATION_FILL_FULL = -1, - DURATION_FILL_START = -2, - DURATION_FILL_END = -3, + FLAGS_FILL_FULL = 1 << TRACE_GRAPH_PRINT_FILL_SHIFT, + FLAGS_FILL_START = 2 << TRACE_GRAPH_PRINT_FILL_SHIFT, + FLAGS_FILL_END = 3 << TRACE_GRAPH_PRINT_FILL_SHIFT, }; static enum print_line_t @@ -114,16 +107,37 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth, return -EBUSY; } + /* + * The curr_ret_stack is an index to ftrace return stack of + * current task. Its value should be in [0, FTRACE_RETFUNC_ + * DEPTH) when the function graph tracer is used. To support + * filtering out specific functions, it makes the index + * negative by subtracting huge value (FTRACE_NOTRACE_DEPTH) + * so when it sees a negative index the ftrace will ignore + * the record. And the index gets recovered when returning + * from the filtered function by adding the FTRACE_NOTRACE_ + * DEPTH and then it'll continue to record functions normally. + * + * The curr_ret_stack is initialized to -1 and get increased + * in this function. So it can be less than -1 only if it was + * filtered out via ftrace_graph_notrace_addr() which can be + * set from set_graph_notrace file in debugfs by user. + */ + if (current->curr_ret_stack < -1) + return -EBUSY; + calltime = trace_clock_local(); index = ++current->curr_ret_stack; + if (ftrace_graph_notrace_addr(func)) + current->curr_ret_stack -= FTRACE_NOTRACE_DEPTH; barrier(); current->ret_stack[index].ret = ret; current->ret_stack[index].func = func; current->ret_stack[index].calltime = calltime; current->ret_stack[index].subtime = 0; current->ret_stack[index].fp = frame_pointer; - *depth = index; + *depth = current->curr_ret_stack; return 0; } @@ -137,7 +151,17 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret, index = current->curr_ret_stack; - if (unlikely(index < 0)) { + /* + * A negative index here means that it's just returned from a + * notrace'd function. Recover index to get an original + * return address. See ftrace_push_return_trace(). + * + * TODO: Need to check whether the stack gets corrupted. + */ + if (index < 0) + index += FTRACE_NOTRACE_DEPTH; + + if (unlikely(index < 0 || index >= FTRACE_RETFUNC_DEPTH)) { ftrace_graph_stop(); WARN_ON(1); /* Might as well panic, otherwise we have no where to go */ @@ -193,6 +217,15 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer) trace.rettime = trace_clock_local(); barrier(); current->curr_ret_stack--; + /* + * The curr_ret_stack can be less than -1 only if it was + * filtered out and it's about to return from the function. + * Recover the index and continue to trace normal functions. + */ + if (current->curr_ret_stack < -1) { + current->curr_ret_stack += FTRACE_NOTRACE_DEPTH; + return ret; + } /* * The trace should run after decrementing the ret counter @@ -230,7 +263,7 @@ int __trace_graph_entry(struct trace_array *tr, return 0; entry = ring_buffer_event_data(event); entry->graph_ent = *trace; - if (!filter_current_check_discard(buffer, call, entry, event)) + if (!call_filter_check_discard(call, entry, buffer, event)) __buffer_unlock_commit(buffer, event); return 1; @@ -259,10 +292,20 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) /* trace it when it is-nested-in or is a function enabled. */ if ((!(trace->depth || ftrace_graph_addr(trace->func)) || - ftrace_graph_ignore_irqs()) || + ftrace_graph_ignore_irqs()) || (trace->depth < 0) || (max_depth && trace->depth >= max_depth)) return 0; + /* + * Do not trace a function if it's filtered by set_graph_notrace. + * Make the index of ret stack negative to indicate that it should + * ignore further functions. But it needs its own ret stack entry + * to recover the original index in order to continue tracing after + * returning from the function. + */ + if (ftrace_graph_notrace_addr(trace->func)) + return 1; + local_irq_save(flags); cpu = raw_smp_processor_id(); data = per_cpu_ptr(tr->trace_buffer.data, cpu); @@ -335,7 +378,7 @@ void __trace_graph_return(struct trace_array *tr, return; entry = ring_buffer_event_data(event); entry->ret = *trace; - if (!filter_current_check_discard(buffer, call, entry, event)) + if (!call_filter_check_discard(call, entry, buffer, event)) __buffer_unlock_commit(buffer, event); } @@ -652,7 +695,7 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr, } /* No overhead */ - ret = print_graph_duration(DURATION_FILL_START, s, flags); + ret = print_graph_duration(0, s, flags | FLAGS_FILL_START); if (ret != TRACE_TYPE_HANDLED) return ret; @@ -664,7 +707,7 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr, if (!ret) return TRACE_TYPE_PARTIAL_LINE; - ret = print_graph_duration(DURATION_FILL_END, s, flags); + ret = print_graph_duration(0, s, flags | FLAGS_FILL_END); if (ret != TRACE_TYPE_HANDLED) return ret; @@ -729,14 +772,14 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s, return TRACE_TYPE_HANDLED; /* No real adata, just filling the column with spaces */ - switch (duration) { - case DURATION_FILL_FULL: + switch (flags & TRACE_GRAPH_PRINT_FILL_MASK) { + case FLAGS_FILL_FULL: ret = trace_seq_puts(s, " | "); return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; - case DURATION_FILL_START: + case FLAGS_FILL_START: ret = trace_seq_puts(s, " "); return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; - case DURATION_FILL_END: + case FLAGS_FILL_END: ret = trace_seq_puts(s, " |"); return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; } @@ -852,7 +895,7 @@ print_graph_entry_nested(struct trace_iterator *iter, } /* No time */ - ret = print_graph_duration(DURATION_FILL_FULL, s, flags); + ret = print_graph_duration(0, s, flags | FLAGS_FILL_FULL); if (ret != TRACE_TYPE_HANDLED) return ret; @@ -1126,9 +1169,10 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, * If the return function does not have a matching entry, * then the entry was lost. Instead of just printing * the '}' and letting the user guess what function this - * belongs to, write out the function name. + * belongs to, write out the function name. Always do + * that if the funcgraph-tail option is enabled. */ - if (func_match) { + if (func_match && !(flags & TRACE_GRAPH_PRINT_TAIL)) { ret = trace_seq_puts(s, "}\n"); if (!ret) return TRACE_TYPE_PARTIAL_LINE; @@ -1172,7 +1216,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent, return TRACE_TYPE_PARTIAL_LINE; /* No time */ - ret = print_graph_duration(DURATION_FILL_FULL, s, flags); + ret = print_graph_duration(0, s, flags | FLAGS_FILL_FULL); if (ret != TRACE_TYPE_HANDLED) return ret; @@ -1426,7 +1470,8 @@ void graph_trace_close(struct trace_iterator *iter) } } -static int func_graph_set_flag(u32 old_flags, u32 bit, int set) +static int +func_graph_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set) { if (bit == TRACE_GRAPH_PRINT_IRQS) ftrace_graph_skip_irqs = !set; @@ -1454,7 +1499,6 @@ static struct tracer graph_trace __tracer_data = { .pipe_open = graph_trace_open, .close = graph_trace_close, .pipe_close = graph_trace_close, - .wait_pipe = poll_wait_pipe, .init = graph_trace_init, .reset = graph_trace_reset, .print_line = print_graph_function, diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 2aefbee93a6..9bb104f748d 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -151,16 +151,11 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip, atomic_dec(&data->disabled); } - -static struct ftrace_ops trace_ops __read_mostly = -{ - .func = irqsoff_tracer_call, - .flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE, -}; #endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER -static int irqsoff_set_flag(u32 old_flags, u32 bit, int set) +static int +irqsoff_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set) { int cpu; @@ -175,7 +170,7 @@ static int irqsoff_set_flag(u32 old_flags, u32 bit, int set) for_each_possible_cpu(cpu) per_cpu(tracing_cpu, cpu) = 0; - tracing_max_latency = 0; + tr->max_latency = 0; tracing_reset_online_cpus(&irqsoff_trace->trace_buffer); return start_irqsoff_tracer(irqsoff_trace, set); @@ -266,7 +261,8 @@ __trace_function(struct trace_array *tr, #else #define __trace_function trace_function -static int irqsoff_set_flag(u32 old_flags, u32 bit, int set) +static int +irqsoff_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set) { return -EINVAL; } @@ -301,13 +297,13 @@ static void irqsoff_print_header(struct seq_file *s) /* * Should this new latency be reported/recorded? */ -static int report_latency(cycle_t delta) +static int report_latency(struct trace_array *tr, cycle_t delta) { if (tracing_thresh) { if (delta < tracing_thresh) return 0; } else { - if (delta <= tracing_max_latency) + if (delta <= tr->max_latency) return 0; } return 1; @@ -331,13 +327,13 @@ check_critical_timing(struct trace_array *tr, pc = preempt_count(); - if (!report_latency(delta)) + if (!report_latency(tr, delta)) goto out; raw_spin_lock_irqsave(&max_trace_lock, flags); /* check if we are still the max latency */ - if (!report_latency(delta)) + if (!report_latency(tr, delta)) goto out_unlock; __trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); @@ -350,7 +346,7 @@ check_critical_timing(struct trace_array *tr, data->critical_end = parent_ip; if (likely(!is_tracing_stopped())) { - tracing_max_latency = delta; + tr->max_latency = delta; update_max_tr_single(tr, current, cpu); } @@ -498,14 +494,14 @@ void trace_hardirqs_off(void) } EXPORT_SYMBOL(trace_hardirqs_off); -void trace_hardirqs_on_caller(unsigned long caller_addr) +__visible void trace_hardirqs_on_caller(unsigned long caller_addr) { if (!preempt_trace() && irq_trace()) stop_critical_timing(CALLER_ADDR0, caller_addr); } EXPORT_SYMBOL(trace_hardirqs_on_caller); -void trace_hardirqs_off_caller(unsigned long caller_addr) +__visible void trace_hardirqs_off_caller(unsigned long caller_addr) { if (!preempt_trace() && irq_trace()) start_critical_timing(CALLER_ADDR0, caller_addr); @@ -529,7 +525,7 @@ void trace_preempt_off(unsigned long a0, unsigned long a1) } #endif /* CONFIG_PREEMPT_TRACER */ -static int register_irqsoff_function(int graph, int set) +static int register_irqsoff_function(struct trace_array *tr, int graph, int set) { int ret; @@ -541,7 +537,7 @@ static int register_irqsoff_function(int graph, int set) ret = register_ftrace_graph(&irqsoff_graph_return, &irqsoff_graph_entry); else - ret = register_ftrace_function(&trace_ops); + ret = register_ftrace_function(tr->ops); if (!ret) function_enabled = true; @@ -549,7 +545,7 @@ static int register_irqsoff_function(int graph, int set) return ret; } -static void unregister_irqsoff_function(int graph) +static void unregister_irqsoff_function(struct trace_array *tr, int graph) { if (!function_enabled) return; @@ -557,23 +553,25 @@ static void unregister_irqsoff_function(int graph) if (graph) unregister_ftrace_graph(); else - unregister_ftrace_function(&trace_ops); + unregister_ftrace_function(tr->ops); function_enabled = false; } -static void irqsoff_function_set(int set) +static void irqsoff_function_set(struct trace_array *tr, int set) { if (set) - register_irqsoff_function(is_graph(), 1); + register_irqsoff_function(tr, is_graph(), 1); else - unregister_irqsoff_function(is_graph()); + unregister_irqsoff_function(tr, is_graph()); } -static int irqsoff_flag_changed(struct tracer *tracer, u32 mask, int set) +static int irqsoff_flag_changed(struct trace_array *tr, u32 mask, int set) { + struct tracer *tracer = tr->current_trace; + if (mask & TRACE_ITER_FUNCTION) - irqsoff_function_set(set); + irqsoff_function_set(tr, set); return trace_keep_overwrite(tracer, mask, set); } @@ -582,7 +580,7 @@ static int start_irqsoff_tracer(struct trace_array *tr, int graph) { int ret; - ret = register_irqsoff_function(graph, 0); + ret = register_irqsoff_function(tr, graph, 0); if (!ret && tracing_is_enabled()) tracer_enabled = 1; @@ -596,25 +594,37 @@ static void stop_irqsoff_tracer(struct trace_array *tr, int graph) { tracer_enabled = 0; - unregister_irqsoff_function(graph); + unregister_irqsoff_function(tr, graph); } -static void __irqsoff_tracer_init(struct trace_array *tr) +static bool irqsoff_busy; + +static int __irqsoff_tracer_init(struct trace_array *tr) { + if (irqsoff_busy) + return -EBUSY; + save_flags = trace_flags; /* non overwrite screws up the latency tracers */ set_tracer_flag(tr, TRACE_ITER_OVERWRITE, 1); set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, 1); - tracing_max_latency = 0; + tr->max_latency = 0; irqsoff_trace = tr; /* make sure that the tracer is visible */ smp_wmb(); tracing_reset_online_cpus(&tr->trace_buffer); - if (start_irqsoff_tracer(tr, is_graph())) + ftrace_init_array_ops(tr, irqsoff_tracer_call); + + /* Only toplevel instance supports graph tracing */ + if (start_irqsoff_tracer(tr, (tr->flags & TRACE_ARRAY_FL_GLOBAL && + is_graph()))) printk(KERN_ERR "failed to start irqsoff tracer\n"); + + irqsoff_busy = true; + return 0; } static void irqsoff_tracer_reset(struct trace_array *tr) @@ -626,6 +636,9 @@ static void irqsoff_tracer_reset(struct trace_array *tr) set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, lat_flag); set_tracer_flag(tr, TRACE_ITER_OVERWRITE, overwrite_flag); + ftrace_reset_array_ops(tr); + + irqsoff_busy = false; } static void irqsoff_tracer_start(struct trace_array *tr) @@ -643,8 +656,7 @@ static int irqsoff_tracer_init(struct trace_array *tr) { trace_type = TRACER_IRQS_OFF; - __irqsoff_tracer_init(tr); - return 0; + return __irqsoff_tracer_init(tr); } static struct tracer irqsoff_tracer __read_mostly = { @@ -664,6 +676,7 @@ static struct tracer irqsoff_tracer __read_mostly = #endif .open = irqsoff_trace_open, .close = irqsoff_trace_close, + .allow_instances = true, .use_max_tr = true, }; # define register_irqsoff(trace) register_tracer(&trace) @@ -676,8 +689,7 @@ static int preemptoff_tracer_init(struct trace_array *tr) { trace_type = TRACER_PREEMPT_OFF; - __irqsoff_tracer_init(tr); - return 0; + return __irqsoff_tracer_init(tr); } static struct tracer preemptoff_tracer __read_mostly = @@ -698,6 +710,7 @@ static struct tracer preemptoff_tracer __read_mostly = #endif .open = irqsoff_trace_open, .close = irqsoff_trace_close, + .allow_instances = true, .use_max_tr = true, }; # define register_preemptoff(trace) register_tracer(&trace) @@ -712,8 +725,7 @@ static int preemptirqsoff_tracer_init(struct trace_array *tr) { trace_type = TRACER_IRQS_OFF | TRACER_PREEMPT_OFF; - __irqsoff_tracer_init(tr); - return 0; + return __irqsoff_tracer_init(tr); } static struct tracer preemptirqsoff_tracer __read_mostly = @@ -734,6 +746,7 @@ static struct tracer preemptirqsoff_tracer __read_mostly = #endif .open = irqsoff_trace_open, .close = irqsoff_trace_close, + .allow_instances = true, .use_max_tr = true, }; diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 243f6834d02..282f6e4e553 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -27,75 +27,54 @@ /** * Kprobe event core functions */ -struct trace_probe { +struct trace_kprobe { struct list_head list; struct kretprobe rp; /* Use rp.kp for kprobe use */ unsigned long nhit; - unsigned int flags; /* For TP_FLAG_* */ const char *symbol; /* symbol name */ - struct ftrace_event_class class; - struct ftrace_event_call call; - struct list_head files; - ssize_t size; /* trace entry size */ - unsigned int nr_args; - struct probe_arg args[]; + struct trace_probe tp; }; -struct event_file_link { - struct ftrace_event_file *file; - struct list_head list; -}; - -#define SIZEOF_TRACE_PROBE(n) \ - (offsetof(struct trace_probe, args) + \ +#define SIZEOF_TRACE_KPROBE(n) \ + (offsetof(struct trace_kprobe, tp.args) + \ (sizeof(struct probe_arg) * (n))) -static __kprobes bool trace_probe_is_return(struct trace_probe *tp) -{ - return tp->rp.handler != NULL; -} - -static __kprobes const char *trace_probe_symbol(struct trace_probe *tp) -{ - return tp->symbol ? tp->symbol : "unknown"; -} - -static __kprobes unsigned long trace_probe_offset(struct trace_probe *tp) +static nokprobe_inline bool trace_kprobe_is_return(struct trace_kprobe *tk) { - return tp->rp.kp.offset; + return tk->rp.handler != NULL; } -static __kprobes bool trace_probe_is_enabled(struct trace_probe *tp) +static nokprobe_inline const char *trace_kprobe_symbol(struct trace_kprobe *tk) { - return !!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE)); + return tk->symbol ? tk->symbol : "unknown"; } -static __kprobes bool trace_probe_is_registered(struct trace_probe *tp) +static nokprobe_inline unsigned long trace_kprobe_offset(struct trace_kprobe *tk) { - return !!(tp->flags & TP_FLAG_REGISTERED); + return tk->rp.kp.offset; } -static __kprobes bool trace_probe_has_gone(struct trace_probe *tp) +static nokprobe_inline bool trace_kprobe_has_gone(struct trace_kprobe *tk) { - return !!(kprobe_gone(&tp->rp.kp)); + return !!(kprobe_gone(&tk->rp.kp)); } -static __kprobes bool trace_probe_within_module(struct trace_probe *tp, - struct module *mod) +static nokprobe_inline bool trace_kprobe_within_module(struct trace_kprobe *tk, + struct module *mod) { int len = strlen(mod->name); - const char *name = trace_probe_symbol(tp); + const char *name = trace_kprobe_symbol(tk); return strncmp(mod->name, name, len) == 0 && name[len] == ':'; } -static __kprobes bool trace_probe_is_on_module(struct trace_probe *tp) +static nokprobe_inline bool trace_kprobe_is_on_module(struct trace_kprobe *tk) { - return !!strchr(trace_probe_symbol(tp), ':'); + return !!strchr(trace_kprobe_symbol(tk), ':'); } -static int register_probe_event(struct trace_probe *tp); -static int unregister_probe_event(struct trace_probe *tp); +static int register_kprobe_event(struct trace_kprobe *tk); +static int unregister_kprobe_event(struct trace_kprobe *tk); static DEFINE_MUTEX(probe_lock); static LIST_HEAD(probe_list); @@ -104,45 +83,231 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs); static int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs); +/* Memory fetching by symbol */ +struct symbol_cache { + char *symbol; + long offset; + unsigned long addr; +}; + +unsigned long update_symbol_cache(struct symbol_cache *sc) +{ + sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol); + + if (sc->addr) + sc->addr += sc->offset; + + return sc->addr; +} + +void free_symbol_cache(struct symbol_cache *sc) +{ + kfree(sc->symbol); + kfree(sc); +} + +struct symbol_cache *alloc_symbol_cache(const char *sym, long offset) +{ + struct symbol_cache *sc; + + if (!sym || strlen(sym) == 0) + return NULL; + + sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL); + if (!sc) + return NULL; + + sc->symbol = kstrdup(sym, GFP_KERNEL); + if (!sc->symbol) { + kfree(sc); + return NULL; + } + sc->offset = offset; + update_symbol_cache(sc); + + return sc; +} + +/* + * Kprobes-specific fetch functions + */ +#define DEFINE_FETCH_stack(type) \ +static void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs, \ + void *offset, void *dest) \ +{ \ + *(type *)dest = (type)regs_get_kernel_stack_nth(regs, \ + (unsigned int)((unsigned long)offset)); \ +} \ +NOKPROBE_SYMBOL(FETCH_FUNC_NAME(stack, type)); + +DEFINE_BASIC_FETCH_FUNCS(stack) +/* No string on the stack entry */ +#define fetch_stack_string NULL +#define fetch_stack_string_size NULL + +#define DEFINE_FETCH_memory(type) \ +static void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs, \ + void *addr, void *dest) \ +{ \ + type retval; \ + if (probe_kernel_address(addr, retval)) \ + *(type *)dest = 0; \ + else \ + *(type *)dest = retval; \ +} \ +NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, type)); + +DEFINE_BASIC_FETCH_FUNCS(memory) +/* + * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max + * length and relative data location. + */ +static void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs, + void *addr, void *dest) +{ + long ret; + int maxlen = get_rloc_len(*(u32 *)dest); + u8 *dst = get_rloc_data(dest); + u8 *src = addr; + mm_segment_t old_fs = get_fs(); + + if (!maxlen) + return; + + /* + * Try to get string again, since the string can be changed while + * probing. + */ + set_fs(KERNEL_DS); + pagefault_disable(); + + do + ret = __copy_from_user_inatomic(dst++, src++, 1); + while (dst[-1] && ret == 0 && src - (u8 *)addr < maxlen); + + dst[-1] = '\0'; + pagefault_enable(); + set_fs(old_fs); + + if (ret < 0) { /* Failed to fetch string */ + ((u8 *)get_rloc_data(dest))[0] = '\0'; + *(u32 *)dest = make_data_rloc(0, get_rloc_offs(*(u32 *)dest)); + } else { + *(u32 *)dest = make_data_rloc(src - (u8 *)addr, + get_rloc_offs(*(u32 *)dest)); + } +} +NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, string)); + +/* Return the length of string -- including null terminal byte */ +static void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs, + void *addr, void *dest) +{ + mm_segment_t old_fs; + int ret, len = 0; + u8 c; + + old_fs = get_fs(); + set_fs(KERNEL_DS); + pagefault_disable(); + + do { + ret = __copy_from_user_inatomic(&c, (u8 *)addr + len, 1); + len++; + } while (c && ret == 0 && len < MAX_STRING_SIZE); + + pagefault_enable(); + set_fs(old_fs); + + if (ret < 0) /* Failed to check the length */ + *(u32 *)dest = 0; + else + *(u32 *)dest = len; +} +NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, string_size)); + +#define DEFINE_FETCH_symbol(type) \ +void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs, void *data, void *dest)\ +{ \ + struct symbol_cache *sc = data; \ + if (sc->addr) \ + fetch_memory_##type(regs, (void *)sc->addr, dest); \ + else \ + *(type *)dest = 0; \ +} \ +NOKPROBE_SYMBOL(FETCH_FUNC_NAME(symbol, type)); + +DEFINE_BASIC_FETCH_FUNCS(symbol) +DEFINE_FETCH_symbol(string) +DEFINE_FETCH_symbol(string_size) + +/* kprobes don't support file_offset fetch methods */ +#define fetch_file_offset_u8 NULL +#define fetch_file_offset_u16 NULL +#define fetch_file_offset_u32 NULL +#define fetch_file_offset_u64 NULL +#define fetch_file_offset_string NULL +#define fetch_file_offset_string_size NULL + +/* Fetch type information table */ +const struct fetch_type kprobes_fetch_type_table[] = { + /* Special types */ + [FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string, + sizeof(u32), 1, "__data_loc char[]"), + [FETCH_TYPE_STRSIZE] = __ASSIGN_FETCH_TYPE("string_size", u32, + string_size, sizeof(u32), 0, "u32"), + /* Basic types */ + ASSIGN_FETCH_TYPE(u8, u8, 0), + ASSIGN_FETCH_TYPE(u16, u16, 0), + ASSIGN_FETCH_TYPE(u32, u32, 0), + ASSIGN_FETCH_TYPE(u64, u64, 0), + ASSIGN_FETCH_TYPE(s8, u8, 1), + ASSIGN_FETCH_TYPE(s16, u16, 1), + ASSIGN_FETCH_TYPE(s32, u32, 1), + ASSIGN_FETCH_TYPE(s64, u64, 1), + + ASSIGN_FETCH_TYPE_END +}; + /* * Allocate new trace_probe and initialize it (including kprobes). */ -static struct trace_probe *alloc_trace_probe(const char *group, +static struct trace_kprobe *alloc_trace_kprobe(const char *group, const char *event, void *addr, const char *symbol, unsigned long offs, int nargs, bool is_return) { - struct trace_probe *tp; + struct trace_kprobe *tk; int ret = -ENOMEM; - tp = kzalloc(SIZEOF_TRACE_PROBE(nargs), GFP_KERNEL); - if (!tp) + tk = kzalloc(SIZEOF_TRACE_KPROBE(nargs), GFP_KERNEL); + if (!tk) return ERR_PTR(ret); if (symbol) { - tp->symbol = kstrdup(symbol, GFP_KERNEL); - if (!tp->symbol) + tk->symbol = kstrdup(symbol, GFP_KERNEL); + if (!tk->symbol) goto error; - tp->rp.kp.symbol_name = tp->symbol; - tp->rp.kp.offset = offs; + tk->rp.kp.symbol_name = tk->symbol; + tk->rp.kp.offset = offs; } else - tp->rp.kp.addr = addr; + tk->rp.kp.addr = addr; if (is_return) - tp->rp.handler = kretprobe_dispatcher; + tk->rp.handler = kretprobe_dispatcher; else - tp->rp.kp.pre_handler = kprobe_dispatcher; + tk->rp.kp.pre_handler = kprobe_dispatcher; if (!event || !is_good_name(event)) { ret = -EINVAL; goto error; } - tp->call.class = &tp->class; - tp->call.name = kstrdup(event, GFP_KERNEL); - if (!tp->call.name) + tk->tp.call.class = &tk->tp.class; + tk->tp.call.name = kstrdup(event, GFP_KERNEL); + if (!tk->tp.call.name) goto error; if (!group || !is_good_name(group)) { @@ -150,42 +315,42 @@ static struct trace_probe *alloc_trace_probe(const char *group, goto error; } - tp->class.system = kstrdup(group, GFP_KERNEL); - if (!tp->class.system) + tk->tp.class.system = kstrdup(group, GFP_KERNEL); + if (!tk->tp.class.system) goto error; - INIT_LIST_HEAD(&tp->list); - INIT_LIST_HEAD(&tp->files); - return tp; + INIT_LIST_HEAD(&tk->list); + INIT_LIST_HEAD(&tk->tp.files); + return tk; error: - kfree(tp->call.name); - kfree(tp->symbol); - kfree(tp); + kfree(tk->tp.call.name); + kfree(tk->symbol); + kfree(tk); return ERR_PTR(ret); } -static void free_trace_probe(struct trace_probe *tp) +static void free_trace_kprobe(struct trace_kprobe *tk) { int i; - for (i = 0; i < tp->nr_args; i++) - traceprobe_free_probe_arg(&tp->args[i]); + for (i = 0; i < tk->tp.nr_args; i++) + traceprobe_free_probe_arg(&tk->tp.args[i]); - kfree(tp->call.class->system); - kfree(tp->call.name); - kfree(tp->symbol); - kfree(tp); + kfree(tk->tp.call.class->system); + kfree(tk->tp.call.name); + kfree(tk->symbol); + kfree(tk); } -static struct trace_probe *find_trace_probe(const char *event, - const char *group) +static struct trace_kprobe *find_trace_kprobe(const char *event, + const char *group) { - struct trace_probe *tp; + struct trace_kprobe *tk; - list_for_each_entry(tp, &probe_list, list) - if (strcmp(tp->call.name, event) == 0 && - strcmp(tp->call.class->system, group) == 0) - return tp; + list_for_each_entry(tk, &probe_list, list) + if (strcmp(ftrace_event_name(&tk->tp.call), event) == 0 && + strcmp(tk->tp.call.class->system, group) == 0) + return tk; return NULL; } @@ -194,7 +359,7 @@ static struct trace_probe *find_trace_probe(const char *event, * if the file is NULL, enable "perf" handler, or enable "trace" handler. */ static int -enable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) +enable_trace_kprobe(struct trace_kprobe *tk, struct ftrace_event_file *file) { int ret = 0; @@ -208,47 +373,35 @@ enable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) } link->file = file; - list_add_tail_rcu(&link->list, &tp->files); + list_add_tail_rcu(&link->list, &tk->tp.files); - tp->flags |= TP_FLAG_TRACE; + tk->tp.flags |= TP_FLAG_TRACE; } else - tp->flags |= TP_FLAG_PROFILE; + tk->tp.flags |= TP_FLAG_PROFILE; - if (trace_probe_is_registered(tp) && !trace_probe_has_gone(tp)) { - if (trace_probe_is_return(tp)) - ret = enable_kretprobe(&tp->rp); + if (trace_probe_is_registered(&tk->tp) && !trace_kprobe_has_gone(tk)) { + if (trace_kprobe_is_return(tk)) + ret = enable_kretprobe(&tk->rp); else - ret = enable_kprobe(&tp->rp.kp); + ret = enable_kprobe(&tk->rp.kp); } out: return ret; } -static struct event_file_link * -find_event_file_link(struct trace_probe *tp, struct ftrace_event_file *file) -{ - struct event_file_link *link; - - list_for_each_entry(link, &tp->files, list) - if (link->file == file) - return link; - - return NULL; -} - /* * Disable trace_probe * if the file is NULL, disable "perf" handler, or disable "trace" handler. */ static int -disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) +disable_trace_kprobe(struct trace_kprobe *tk, struct ftrace_event_file *file) { struct event_file_link *link = NULL; int wait = 0; int ret = 0; if (file) { - link = find_event_file_link(tp, file); + link = find_event_file_link(&tk->tp, file); if (!link) { ret = -EINVAL; goto out; @@ -256,18 +409,18 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) list_del_rcu(&link->list); wait = 1; - if (!list_empty(&tp->files)) + if (!list_empty(&tk->tp.files)) goto out; - tp->flags &= ~TP_FLAG_TRACE; + tk->tp.flags &= ~TP_FLAG_TRACE; } else - tp->flags &= ~TP_FLAG_PROFILE; + tk->tp.flags &= ~TP_FLAG_PROFILE; - if (!trace_probe_is_enabled(tp) && trace_probe_is_registered(tp)) { - if (trace_probe_is_return(tp)) - disable_kretprobe(&tp->rp); + if (!trace_probe_is_enabled(&tk->tp) && trace_probe_is_registered(&tk->tp)) { + if (trace_kprobe_is_return(tk)) + disable_kretprobe(&tk->rp); else - disable_kprobe(&tp->rp.kp); + disable_kprobe(&tk->rp.kp); wait = 1; } out: @@ -288,40 +441,40 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) } /* Internal register function - just handle k*probes and flags */ -static int __register_trace_probe(struct trace_probe *tp) +static int __register_trace_kprobe(struct trace_kprobe *tk) { int i, ret; - if (trace_probe_is_registered(tp)) + if (trace_probe_is_registered(&tk->tp)) return -EINVAL; - for (i = 0; i < tp->nr_args; i++) - traceprobe_update_arg(&tp->args[i]); + for (i = 0; i < tk->tp.nr_args; i++) + traceprobe_update_arg(&tk->tp.args[i]); /* Set/clear disabled flag according to tp->flag */ - if (trace_probe_is_enabled(tp)) - tp->rp.kp.flags &= ~KPROBE_FLAG_DISABLED; + if (trace_probe_is_enabled(&tk->tp)) + tk->rp.kp.flags &= ~KPROBE_FLAG_DISABLED; else - tp->rp.kp.flags |= KPROBE_FLAG_DISABLED; + tk->rp.kp.flags |= KPROBE_FLAG_DISABLED; - if (trace_probe_is_return(tp)) - ret = register_kretprobe(&tp->rp); + if (trace_kprobe_is_return(tk)) + ret = register_kretprobe(&tk->rp); else - ret = register_kprobe(&tp->rp.kp); + ret = register_kprobe(&tk->rp.kp); if (ret == 0) - tp->flags |= TP_FLAG_REGISTERED; + tk->tp.flags |= TP_FLAG_REGISTERED; else { pr_warning("Could not insert probe at %s+%lu: %d\n", - trace_probe_symbol(tp), trace_probe_offset(tp), ret); - if (ret == -ENOENT && trace_probe_is_on_module(tp)) { + trace_kprobe_symbol(tk), trace_kprobe_offset(tk), ret); + if (ret == -ENOENT && trace_kprobe_is_on_module(tk)) { pr_warning("This probe might be able to register after" "target module is loaded. Continue.\n"); ret = 0; } else if (ret == -EILSEQ) { pr_warning("Probing address(0x%p) is not an " "instruction boundary.\n", - tp->rp.kp.addr); + tk->rp.kp.addr); ret = -EINVAL; } } @@ -330,67 +483,68 @@ static int __register_trace_probe(struct trace_probe *tp) } /* Internal unregister function - just handle k*probes and flags */ -static void __unregister_trace_probe(struct trace_probe *tp) +static void __unregister_trace_kprobe(struct trace_kprobe *tk) { - if (trace_probe_is_registered(tp)) { - if (trace_probe_is_return(tp)) - unregister_kretprobe(&tp->rp); + if (trace_probe_is_registered(&tk->tp)) { + if (trace_kprobe_is_return(tk)) + unregister_kretprobe(&tk->rp); else - unregister_kprobe(&tp->rp.kp); - tp->flags &= ~TP_FLAG_REGISTERED; + unregister_kprobe(&tk->rp.kp); + tk->tp.flags &= ~TP_FLAG_REGISTERED; /* Cleanup kprobe for reuse */ - if (tp->rp.kp.symbol_name) - tp->rp.kp.addr = NULL; + if (tk->rp.kp.symbol_name) + tk->rp.kp.addr = NULL; } } /* Unregister a trace_probe and probe_event: call with locking probe_lock */ -static int unregister_trace_probe(struct trace_probe *tp) +static int unregister_trace_kprobe(struct trace_kprobe *tk) { /* Enabled event can not be unregistered */ - if (trace_probe_is_enabled(tp)) + if (trace_probe_is_enabled(&tk->tp)) return -EBUSY; /* Will fail if probe is being used by ftrace or perf */ - if (unregister_probe_event(tp)) + if (unregister_kprobe_event(tk)) return -EBUSY; - __unregister_trace_probe(tp); - list_del(&tp->list); + __unregister_trace_kprobe(tk); + list_del(&tk->list); return 0; } /* Register a trace_probe and probe_event */ -static int register_trace_probe(struct trace_probe *tp) +static int register_trace_kprobe(struct trace_kprobe *tk) { - struct trace_probe *old_tp; + struct trace_kprobe *old_tk; int ret; mutex_lock(&probe_lock); /* Delete old (same name) event if exist */ - old_tp = find_trace_probe(tp->call.name, tp->call.class->system); - if (old_tp) { - ret = unregister_trace_probe(old_tp); + old_tk = find_trace_kprobe(ftrace_event_name(&tk->tp.call), + tk->tp.call.class->system); + if (old_tk) { + ret = unregister_trace_kprobe(old_tk); if (ret < 0) goto end; - free_trace_probe(old_tp); + free_trace_kprobe(old_tk); } /* Register new event */ - ret = register_probe_event(tp); + ret = register_kprobe_event(tk); if (ret) { pr_warning("Failed to register probe event(%d)\n", ret); goto end; } /* Register k*probe */ - ret = __register_trace_probe(tp); + ret = __register_trace_kprobe(tk); if (ret < 0) - unregister_probe_event(tp); + unregister_kprobe_event(tk); else - list_add_tail(&tp->list, &probe_list); + list_add_tail(&tk->list, &probe_list); end: mutex_unlock(&probe_lock); @@ -398,11 +552,11 @@ end: } /* Module notifier call back, checking event on the module */ -static int trace_probe_module_callback(struct notifier_block *nb, +static int trace_kprobe_module_callback(struct notifier_block *nb, unsigned long val, void *data) { struct module *mod = data; - struct trace_probe *tp; + struct trace_kprobe *tk; int ret; if (val != MODULE_STATE_COMING) @@ -410,15 +564,16 @@ static int trace_probe_module_callback(struct notifier_block *nb, /* Update probes on coming module */ mutex_lock(&probe_lock); - list_for_each_entry(tp, &probe_list, list) { - if (trace_probe_within_module(tp, mod)) { + list_for_each_entry(tk, &probe_list, list) { + if (trace_kprobe_within_module(tk, mod)) { /* Don't need to check busy - this should have gone. */ - __unregister_trace_probe(tp); - ret = __register_trace_probe(tp); + __unregister_trace_kprobe(tk); + ret = __register_trace_kprobe(tk); if (ret) pr_warning("Failed to re-register probe %s on" "%s: %d\n", - tp->call.name, mod->name, ret); + ftrace_event_name(&tk->tp.call), + mod->name, ret); } } mutex_unlock(&probe_lock); @@ -426,12 +581,12 @@ static int trace_probe_module_callback(struct notifier_block *nb, return NOTIFY_DONE; } -static struct notifier_block trace_probe_module_nb = { - .notifier_call = trace_probe_module_callback, +static struct notifier_block trace_kprobe_module_nb = { + .notifier_call = trace_kprobe_module_callback, .priority = 1 /* Invoked after kprobe module callback */ }; -static int create_trace_probe(int argc, char **argv) +static int create_trace_kprobe(int argc, char **argv) { /* * Argument syntax: @@ -451,7 +606,7 @@ static int create_trace_probe(int argc, char **argv) * Type of args: * FETCHARG:TYPE : use TYPE instead of unsigned long. */ - struct trace_probe *tp; + struct trace_kprobe *tk; int i, ret = 0; bool is_return = false, is_delete = false; char *symbol = NULL, *event = NULL, *group = NULL; @@ -498,16 +653,16 @@ static int create_trace_probe(int argc, char **argv) return -EINVAL; } mutex_lock(&probe_lock); - tp = find_trace_probe(event, group); - if (!tp) { + tk = find_trace_kprobe(event, group); + if (!tk) { mutex_unlock(&probe_lock); pr_info("Event %s/%s doesn't exist.\n", group, event); return -ENOENT; } /* delete an event */ - ret = unregister_trace_probe(tp); + ret = unregister_trace_kprobe(tk); if (ret == 0) - free_trace_probe(tp); + free_trace_kprobe(tk); mutex_unlock(&probe_lock); return ret; } @@ -554,47 +709,49 @@ static int create_trace_probe(int argc, char **argv) is_return ? 'r' : 'p', addr); event = buf; } - tp = alloc_trace_probe(group, event, addr, symbol, offset, argc, + tk = alloc_trace_kprobe(group, event, addr, symbol, offset, argc, is_return); - if (IS_ERR(tp)) { + if (IS_ERR(tk)) { pr_info("Failed to allocate trace_probe.(%d)\n", - (int)PTR_ERR(tp)); - return PTR_ERR(tp); + (int)PTR_ERR(tk)); + return PTR_ERR(tk); } /* parse arguments */ ret = 0; for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) { + struct probe_arg *parg = &tk->tp.args[i]; + /* Increment count for freeing args in error case */ - tp->nr_args++; + tk->tp.nr_args++; /* Parse argument name */ arg = strchr(argv[i], '='); if (arg) { *arg++ = '\0'; - tp->args[i].name = kstrdup(argv[i], GFP_KERNEL); + parg->name = kstrdup(argv[i], GFP_KERNEL); } else { arg = argv[i]; /* If argument name is omitted, set "argN" */ snprintf(buf, MAX_EVENT_NAME_LEN, "arg%d", i + 1); - tp->args[i].name = kstrdup(buf, GFP_KERNEL); + parg->name = kstrdup(buf, GFP_KERNEL); } - if (!tp->args[i].name) { + if (!parg->name) { pr_info("Failed to allocate argument[%d] name.\n", i); ret = -ENOMEM; goto error; } - if (!is_good_name(tp->args[i].name)) { + if (!is_good_name(parg->name)) { pr_info("Invalid argument[%d] name: %s\n", - i, tp->args[i].name); + i, parg->name); ret = -EINVAL; goto error; } - if (traceprobe_conflict_field_name(tp->args[i].name, - tp->args, i)) { + if (traceprobe_conflict_field_name(parg->name, + tk->tp.args, i)) { pr_info("Argument[%d] name '%s' conflicts with " "another field.\n", i, argv[i]); ret = -EINVAL; @@ -602,7 +759,7 @@ static int create_trace_probe(int argc, char **argv) } /* Parse fetch argument */ - ret = traceprobe_parse_probe_arg(arg, &tp->size, &tp->args[i], + ret = traceprobe_parse_probe_arg(arg, &tk->tp.size, parg, is_return, true); if (ret) { pr_info("Parse error at argument[%d]. (%d)\n", i, ret); @@ -610,35 +767,35 @@ static int create_trace_probe(int argc, char **argv) } } - ret = register_trace_probe(tp); + ret = register_trace_kprobe(tk); if (ret) goto error; return 0; error: - free_trace_probe(tp); + free_trace_kprobe(tk); return ret; } -static int release_all_trace_probes(void) +static int release_all_trace_kprobes(void) { - struct trace_probe *tp; + struct trace_kprobe *tk; int ret = 0; mutex_lock(&probe_lock); /* Ensure no probe is in use. */ - list_for_each_entry(tp, &probe_list, list) - if (trace_probe_is_enabled(tp)) { + list_for_each_entry(tk, &probe_list, list) + if (trace_probe_is_enabled(&tk->tp)) { ret = -EBUSY; goto end; } /* TODO: Use batch unregistration */ while (!list_empty(&probe_list)) { - tp = list_entry(probe_list.next, struct trace_probe, list); - ret = unregister_trace_probe(tp); + tk = list_entry(probe_list.next, struct trace_kprobe, list); + ret = unregister_trace_kprobe(tk); if (ret) goto end; - free_trace_probe(tp); + free_trace_kprobe(tk); } end: @@ -666,22 +823,23 @@ static void probes_seq_stop(struct seq_file *m, void *v) static int probes_seq_show(struct seq_file *m, void *v) { - struct trace_probe *tp = v; + struct trace_kprobe *tk = v; int i; - seq_printf(m, "%c", trace_probe_is_return(tp) ? 'r' : 'p'); - seq_printf(m, ":%s/%s", tp->call.class->system, tp->call.name); + seq_printf(m, "%c", trace_kprobe_is_return(tk) ? 'r' : 'p'); + seq_printf(m, ":%s/%s", tk->tp.call.class->system, + ftrace_event_name(&tk->tp.call)); - if (!tp->symbol) - seq_printf(m, " 0x%p", tp->rp.kp.addr); - else if (tp->rp.kp.offset) - seq_printf(m, " %s+%u", trace_probe_symbol(tp), - tp->rp.kp.offset); + if (!tk->symbol) + seq_printf(m, " 0x%p", tk->rp.kp.addr); + else if (tk->rp.kp.offset) + seq_printf(m, " %s+%u", trace_kprobe_symbol(tk), + tk->rp.kp.offset); else - seq_printf(m, " %s", trace_probe_symbol(tp)); + seq_printf(m, " %s", trace_kprobe_symbol(tk)); - for (i = 0; i < tp->nr_args; i++) - seq_printf(m, " %s=%s", tp->args[i].name, tp->args[i].comm); + for (i = 0; i < tk->tp.nr_args; i++) + seq_printf(m, " %s=%s", tk->tp.args[i].name, tk->tp.args[i].comm); seq_printf(m, "\n"); return 0; @@ -699,7 +857,7 @@ static int probes_open(struct inode *inode, struct file *file) int ret; if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { - ret = release_all_trace_probes(); + ret = release_all_trace_kprobes(); if (ret < 0) return ret; } @@ -711,7 +869,7 @@ static ssize_t probes_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { return traceprobe_probes_write(file, buffer, count, ppos, - create_trace_probe); + create_trace_kprobe); } static const struct file_operations kprobe_events_ops = { @@ -726,10 +884,11 @@ static const struct file_operations kprobe_events_ops = { /* Probes profiling interfaces */ static int probes_profile_seq_show(struct seq_file *m, void *v) { - struct trace_probe *tp = v; + struct trace_kprobe *tk = v; - seq_printf(m, " %-44s %15lu %15lu\n", tp->call.name, tp->nhit, - tp->rp.kp.nmissed); + seq_printf(m, " %-44s %15lu %15lu\n", + ftrace_event_name(&tk->tp.call), tk->nhit, + tk->rp.kp.nmissed); return 0; } @@ -754,57 +913,9 @@ static const struct file_operations kprobe_profile_ops = { .release = seq_release, }; -/* Sum up total data length for dynamic arraies (strings) */ -static __kprobes int __get_data_size(struct trace_probe *tp, - struct pt_regs *regs) -{ - int i, ret = 0; - u32 len; - - for (i = 0; i < tp->nr_args; i++) - if (unlikely(tp->args[i].fetch_size.fn)) { - call_fetch(&tp->args[i].fetch_size, regs, &len); - ret += len; - } - - return ret; -} - -/* Store the value of each argument */ -static __kprobes void store_trace_args(int ent_size, struct trace_probe *tp, - struct pt_regs *regs, - u8 *data, int maxlen) -{ - int i; - u32 end = tp->size; - u32 *dl; /* Data (relative) location */ - - for (i = 0; i < tp->nr_args; i++) { - if (unlikely(tp->args[i].fetch_size.fn)) { - /* - * First, we set the relative location and - * maximum data length to *dl - */ - dl = (u32 *)(data + tp->args[i].offset); - *dl = make_data_rloc(maxlen, end - tp->args[i].offset); - /* Then try to fetch string or dynamic array data */ - call_fetch(&tp->args[i].fetch, regs, dl); - /* Reduce maximum length */ - end += get_rloc_len(*dl); - maxlen -= get_rloc_len(*dl); - /* Trick here, convert data_rloc to data_loc */ - *dl = convert_rloc_to_loc(*dl, - ent_size + tp->args[i].offset); - } else - /* Just fetching data normally */ - call_fetch(&tp->args[i].fetch, regs, - data + tp->args[i].offset); - } -} - /* Kprobe handler */ -static __kprobes void -__kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs, +static nokprobe_inline void +__kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs, struct ftrace_event_file *ftrace_file) { struct kprobe_trace_entry_head *entry; @@ -812,18 +923,18 @@ __kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs, struct ring_buffer *buffer; int size, dsize, pc; unsigned long irq_flags; - struct ftrace_event_call *call = &tp->call; + struct ftrace_event_call *call = &tk->tp.call; WARN_ON(call != ftrace_file->event_call); - if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &ftrace_file->flags)) + if (ftrace_trigger_soft_disabled(ftrace_file)) return; local_save_flags(irq_flags); pc = preempt_count(); - dsize = __get_data_size(tp, regs); - size = sizeof(*entry) + tp->size + dsize; + dsize = __get_data_size(&tk->tp, regs); + size = sizeof(*entry) + tk->tp.size + dsize; event = trace_event_buffer_lock_reserve(&buffer, ftrace_file, call->event.type, @@ -832,26 +943,26 @@ __kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs, return; entry = ring_buffer_event_data(event); - entry->ip = (unsigned long)tp->rp.kp.addr; - store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); + entry->ip = (unsigned long)tk->rp.kp.addr; + store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize); - if (!filter_current_check_discard(buffer, call, entry, event)) - trace_buffer_unlock_commit_regs(buffer, event, - irq_flags, pc, regs); + event_trigger_unlock_commit_regs(ftrace_file, buffer, event, + entry, irq_flags, pc, regs); } -static __kprobes void -kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs) +static void +kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs) { struct event_file_link *link; - list_for_each_entry_rcu(link, &tp->files, list) - __kprobe_trace_func(tp, regs, link->file); + list_for_each_entry_rcu(link, &tk->tp.files, list) + __kprobe_trace_func(tk, regs, link->file); } +NOKPROBE_SYMBOL(kprobe_trace_func); /* Kretprobe handler */ -static __kprobes void -__kretprobe_trace_func(struct trace_probe *tp, struct kretprobe_instance *ri, +static nokprobe_inline void +__kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, struct pt_regs *regs, struct ftrace_event_file *ftrace_file) { @@ -860,18 +971,18 @@ __kretprobe_trace_func(struct trace_probe *tp, struct kretprobe_instance *ri, struct ring_buffer *buffer; int size, pc, dsize; unsigned long irq_flags; - struct ftrace_event_call *call = &tp->call; + struct ftrace_event_call *call = &tk->tp.call; WARN_ON(call != ftrace_file->event_call); - if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &ftrace_file->flags)) + if (ftrace_trigger_soft_disabled(ftrace_file)) return; local_save_flags(irq_flags); pc = preempt_count(); - dsize = __get_data_size(tp, regs); - size = sizeof(*entry) + tp->size + dsize; + dsize = __get_data_size(&tk->tp, regs); + size = sizeof(*entry) + tk->tp.size + dsize; event = trace_event_buffer_lock_reserve(&buffer, ftrace_file, call->event.type, @@ -880,24 +991,24 @@ __kretprobe_trace_func(struct trace_probe *tp, struct kretprobe_instance *ri, return; entry = ring_buffer_event_data(event); - entry->func = (unsigned long)tp->rp.kp.addr; + entry->func = (unsigned long)tk->rp.kp.addr; entry->ret_ip = (unsigned long)ri->ret_addr; - store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); + store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize); - if (!filter_current_check_discard(buffer, call, entry, event)) - trace_buffer_unlock_commit_regs(buffer, event, - irq_flags, pc, regs); + event_trigger_unlock_commit_regs(ftrace_file, buffer, event, + entry, irq_flags, pc, regs); } -static __kprobes void -kretprobe_trace_func(struct trace_probe *tp, struct kretprobe_instance *ri, +static void +kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, struct pt_regs *regs) { struct event_file_link *link; - list_for_each_entry_rcu(link, &tp->files, list) - __kretprobe_trace_func(tp, ri, regs, link->file); + list_for_each_entry_rcu(link, &tk->tp.files, list) + __kretprobe_trace_func(tk, ri, regs, link->file); } +NOKPROBE_SYMBOL(kretprobe_trace_func); /* Event entry printers */ static enum print_line_t @@ -913,7 +1024,7 @@ print_kprobe_event(struct trace_iterator *iter, int flags, field = (struct kprobe_trace_entry_head *)iter->ent; tp = container_of(event, struct trace_probe, call.event); - if (!trace_seq_printf(s, "%s: (", tp->call.name)) + if (!trace_seq_printf(s, "%s: (", ftrace_event_name(&tp->call))) goto partial; if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET)) @@ -949,7 +1060,7 @@ print_kretprobe_event(struct trace_iterator *iter, int flags, field = (struct kretprobe_trace_entry_head *)iter->ent; tp = container_of(event, struct trace_probe, call.event); - if (!trace_seq_printf(s, "%s: (", tp->call.name)) + if (!trace_seq_printf(s, "%s: (", ftrace_event_name(&tp->call))) goto partial; if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET)) @@ -983,16 +1094,18 @@ static int kprobe_event_define_fields(struct ftrace_event_call *event_call) { int ret, i; struct kprobe_trace_entry_head field; - struct trace_probe *tp = (struct trace_probe *)event_call->data; + struct trace_kprobe *tk = (struct trace_kprobe *)event_call->data; DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0); /* Set argument names as fields */ - for (i = 0; i < tp->nr_args; i++) { - ret = trace_define_field(event_call, tp->args[i].type->fmttype, - tp->args[i].name, - sizeof(field) + tp->args[i].offset, - tp->args[i].type->size, - tp->args[i].type->is_signed, + for (i = 0; i < tk->tp.nr_args; i++) { + struct probe_arg *parg = &tk->tp.args[i]; + + ret = trace_define_field(event_call, parg->type->fmttype, + parg->name, + sizeof(field) + parg->offset, + parg->type->size, + parg->type->is_signed, FILTER_OTHER); if (ret) return ret; @@ -1004,17 +1117,19 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call) { int ret, i; struct kretprobe_trace_entry_head field; - struct trace_probe *tp = (struct trace_probe *)event_call->data; + struct trace_kprobe *tk = (struct trace_kprobe *)event_call->data; DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0); DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0); /* Set argument names as fields */ - for (i = 0; i < tp->nr_args; i++) { - ret = trace_define_field(event_call, tp->args[i].type->fmttype, - tp->args[i].name, - sizeof(field) + tp->args[i].offset, - tp->args[i].type->size, - tp->args[i].type->is_signed, + for (i = 0; i < tk->tp.nr_args; i++) { + struct probe_arg *parg = &tk->tp.args[i]; + + ret = trace_define_field(event_call, parg->type->fmttype, + parg->name, + sizeof(field) + parg->offset, + parg->type->size, + parg->type->is_signed, FILTER_OTHER); if (ret) return ret; @@ -1022,74 +1137,13 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call) return 0; } -static int __set_print_fmt(struct trace_probe *tp, char *buf, int len) -{ - int i; - int pos = 0; - - const char *fmt, *arg; - - if (!trace_probe_is_return(tp)) { - fmt = "(%lx)"; - arg = "REC->" FIELD_STRING_IP; - } else { - fmt = "(%lx <- %lx)"; - arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP; - } - - /* When len=0, we just calculate the needed length */ -#define LEN_OR_ZERO (len ? len - pos : 0) - - pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt); - - for (i = 0; i < tp->nr_args; i++) { - pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%s", - tp->args[i].name, tp->args[i].type->fmt); - } - - pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg); - - for (i = 0; i < tp->nr_args; i++) { - if (strcmp(tp->args[i].type->name, "string") == 0) - pos += snprintf(buf + pos, LEN_OR_ZERO, - ", __get_str(%s)", - tp->args[i].name); - else - pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s", - tp->args[i].name); - } - -#undef LEN_OR_ZERO - - /* return the length of print_fmt */ - return pos; -} - -static int set_print_fmt(struct trace_probe *tp) -{ - int len; - char *print_fmt; - - /* First: called with 0 length to calculate the needed length */ - len = __set_print_fmt(tp, NULL, 0); - print_fmt = kmalloc(len + 1, GFP_KERNEL); - if (!print_fmt) - return -ENOMEM; - - /* Second: actually write the @print_fmt */ - __set_print_fmt(tp, print_fmt, len + 1); - tp->call.print_fmt = print_fmt; - - return 0; -} - #ifdef CONFIG_PERF_EVENTS /* Kprobe profile handler */ -static __kprobes void -kprobe_perf_func(struct trace_probe *tp, struct pt_regs *regs) +static void +kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) { - struct ftrace_event_call *call = &tp->call; + struct ftrace_event_call *call = &tk->tp.call; struct kprobe_trace_entry_head *entry; struct hlist_head *head; int size, __size, dsize; @@ -1099,8 +1153,8 @@ kprobe_perf_func(struct trace_probe *tp, struct pt_regs *regs) if (hlist_empty(head)) return; - dsize = __get_data_size(tp, regs); - __size = sizeof(*entry) + tp->size + dsize; + dsize = __get_data_size(&tk->tp, regs); + __size = sizeof(*entry) + tk->tp.size + dsize; size = ALIGN(__size + sizeof(u32), sizeof(u64)); size -= sizeof(u32); @@ -1108,18 +1162,19 @@ kprobe_perf_func(struct trace_probe *tp, struct pt_regs *regs) if (!entry) return; - entry->ip = (unsigned long)tp->rp.kp.addr; + entry->ip = (unsigned long)tk->rp.kp.addr; memset(&entry[1], 0, dsize); - store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); + store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize); perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL); } +NOKPROBE_SYMBOL(kprobe_perf_func); /* Kretprobe profile handler */ -static __kprobes void -kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri, +static void +kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, struct pt_regs *regs) { - struct ftrace_event_call *call = &tp->call; + struct ftrace_event_call *call = &tk->tp.call; struct kretprobe_trace_entry_head *entry; struct hlist_head *head; int size, __size, dsize; @@ -1129,8 +1184,8 @@ kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri, if (hlist_empty(head)) return; - dsize = __get_data_size(tp, regs); - __size = sizeof(*entry) + tp->size + dsize; + dsize = __get_data_size(&tk->tp, regs); + __size = sizeof(*entry) + tk->tp.size + dsize; size = ALIGN(__size + sizeof(u32), sizeof(u64)); size -= sizeof(u32); @@ -1138,11 +1193,12 @@ kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri, if (!entry) return; - entry->func = (unsigned long)tp->rp.kp.addr; + entry->func = (unsigned long)tk->rp.kp.addr; entry->ret_ip = (unsigned long)ri->ret_addr; - store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); + store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize); perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL); } +NOKPROBE_SYMBOL(kretprobe_perf_func); #endif /* CONFIG_PERF_EVENTS */ /* @@ -1151,24 +1207,23 @@ kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri, * kprobe_trace_self_tests_init() does enable_trace_probe/disable_trace_probe * lockless, but we can't race with this __init function. */ -static __kprobes -int kprobe_register(struct ftrace_event_call *event, - enum trace_reg type, void *data) +static int kprobe_register(struct ftrace_event_call *event, + enum trace_reg type, void *data) { - struct trace_probe *tp = (struct trace_probe *)event->data; + struct trace_kprobe *tk = (struct trace_kprobe *)event->data; struct ftrace_event_file *file = data; switch (type) { case TRACE_REG_REGISTER: - return enable_trace_probe(tp, file); + return enable_trace_kprobe(tk, file); case TRACE_REG_UNREGISTER: - return disable_trace_probe(tp, file); + return disable_trace_kprobe(tk, file); #ifdef CONFIG_PERF_EVENTS case TRACE_REG_PERF_REGISTER: - return enable_trace_probe(tp, NULL); + return enable_trace_kprobe(tk, NULL); case TRACE_REG_PERF_UNREGISTER: - return disable_trace_probe(tp, NULL); + return disable_trace_kprobe(tk, NULL); case TRACE_REG_PERF_OPEN: case TRACE_REG_PERF_CLOSE: case TRACE_REG_PERF_ADD: @@ -1179,37 +1234,38 @@ int kprobe_register(struct ftrace_event_call *event, return 0; } -static __kprobes -int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) +static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) { - struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); + struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp); - tp->nhit++; + tk->nhit++; - if (tp->flags & TP_FLAG_TRACE) - kprobe_trace_func(tp, regs); + if (tk->tp.flags & TP_FLAG_TRACE) + kprobe_trace_func(tk, regs); #ifdef CONFIG_PERF_EVENTS - if (tp->flags & TP_FLAG_PROFILE) - kprobe_perf_func(tp, regs); + if (tk->tp.flags & TP_FLAG_PROFILE) + kprobe_perf_func(tk, regs); #endif return 0; /* We don't tweek kernel, so just return 0 */ } +NOKPROBE_SYMBOL(kprobe_dispatcher); -static __kprobes -int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs) +static int +kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs) { - struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); + struct trace_kprobe *tk = container_of(ri->rp, struct trace_kprobe, rp); - tp->nhit++; + tk->nhit++; - if (tp->flags & TP_FLAG_TRACE) - kretprobe_trace_func(tp, ri, regs); + if (tk->tp.flags & TP_FLAG_TRACE) + kretprobe_trace_func(tk, ri, regs); #ifdef CONFIG_PERF_EVENTS - if (tp->flags & TP_FLAG_PROFILE) - kretprobe_perf_func(tp, ri, regs); + if (tk->tp.flags & TP_FLAG_PROFILE) + kretprobe_perf_func(tk, ri, regs); #endif return 0; /* We don't tweek kernel, so just return 0 */ } +NOKPROBE_SYMBOL(kretprobe_dispatcher); static struct trace_event_functions kretprobe_funcs = { .trace = print_kretprobe_event @@ -1219,21 +1275,21 @@ static struct trace_event_functions kprobe_funcs = { .trace = print_kprobe_event }; -static int register_probe_event(struct trace_probe *tp) +static int register_kprobe_event(struct trace_kprobe *tk) { - struct ftrace_event_call *call = &tp->call; + struct ftrace_event_call *call = &tk->tp.call; int ret; /* Initialize ftrace_event_call */ INIT_LIST_HEAD(&call->class->fields); - if (trace_probe_is_return(tp)) { + if (trace_kprobe_is_return(tk)) { call->event.funcs = &kretprobe_funcs; call->class->define_fields = kretprobe_event_define_fields; } else { call->event.funcs = &kprobe_funcs; call->class->define_fields = kprobe_event_define_fields; } - if (set_print_fmt(tp) < 0) + if (set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0) return -ENOMEM; ret = register_ftrace_event(&call->event); if (!ret) { @@ -1242,24 +1298,25 @@ static int register_probe_event(struct trace_probe *tp) } call->flags = 0; call->class->reg = kprobe_register; - call->data = tp; + call->data = tk; ret = trace_add_event_call(call); if (ret) { - pr_info("Failed to register kprobe event: %s\n", call->name); + pr_info("Failed to register kprobe event: %s\n", + ftrace_event_name(call)); kfree(call->print_fmt); unregister_ftrace_event(&call->event); } return ret; } -static int unregister_probe_event(struct trace_probe *tp) +static int unregister_kprobe_event(struct trace_kprobe *tk) { int ret; /* tp->event is unregistered in trace_remove_event_call() */ - ret = trace_remove_event_call(&tp->call); + ret = trace_remove_event_call(&tk->tp.call); if (!ret) - kfree(tp->call.print_fmt); + kfree(tk->tp.call.print_fmt); return ret; } @@ -1269,7 +1326,7 @@ static __init int init_kprobe_trace(void) struct dentry *d_tracer; struct dentry *entry; - if (register_module_notifier(&trace_probe_module_nb)) + if (register_module_notifier(&trace_kprobe_module_nb)) return -EINVAL; d_tracer = tracing_init_dentry(); @@ -1309,72 +1366,75 @@ static __used int kprobe_trace_selftest_target(int a1, int a2, int a3, } static struct ftrace_event_file * -find_trace_probe_file(struct trace_probe *tp, struct trace_array *tr) +find_trace_probe_file(struct trace_kprobe *tk, struct trace_array *tr) { struct ftrace_event_file *file; list_for_each_entry(file, &tr->events, list) - if (file->event_call == &tp->call) + if (file->event_call == &tk->tp.call) return file; return NULL; } /* - * Nobody but us can call enable_trace_probe/disable_trace_probe at this + * Nobody but us can call enable_trace_kprobe/disable_trace_kprobe at this * stage, we can do this lockless. */ static __init int kprobe_trace_self_tests_init(void) { int ret, warn = 0; int (*target)(int, int, int, int, int, int); - struct trace_probe *tp; + struct trace_kprobe *tk; struct ftrace_event_file *file; + if (tracing_is_disabled()) + return -ENODEV; + target = kprobe_trace_selftest_target; pr_info("Testing kprobe tracing: "); ret = traceprobe_command("p:testprobe kprobe_trace_selftest_target " "$stack $stack0 +0($stack)", - create_trace_probe); + create_trace_kprobe); if (WARN_ON_ONCE(ret)) { pr_warn("error on probing function entry.\n"); warn++; } else { /* Enable trace point */ - tp = find_trace_probe("testprobe", KPROBE_EVENT_SYSTEM); - if (WARN_ON_ONCE(tp == NULL)) { + tk = find_trace_kprobe("testprobe", KPROBE_EVENT_SYSTEM); + if (WARN_ON_ONCE(tk == NULL)) { pr_warn("error on getting new probe.\n"); warn++; } else { - file = find_trace_probe_file(tp, top_trace_array()); + file = find_trace_probe_file(tk, top_trace_array()); if (WARN_ON_ONCE(file == NULL)) { pr_warn("error on getting probe file.\n"); warn++; } else - enable_trace_probe(tp, file); + enable_trace_kprobe(tk, file); } } ret = traceprobe_command("r:testprobe2 kprobe_trace_selftest_target " - "$retval", create_trace_probe); + "$retval", create_trace_kprobe); if (WARN_ON_ONCE(ret)) { pr_warn("error on probing function return.\n"); warn++; } else { /* Enable trace point */ - tp = find_trace_probe("testprobe2", KPROBE_EVENT_SYSTEM); - if (WARN_ON_ONCE(tp == NULL)) { + tk = find_trace_kprobe("testprobe2", KPROBE_EVENT_SYSTEM); + if (WARN_ON_ONCE(tk == NULL)) { pr_warn("error on getting 2nd new probe.\n"); warn++; } else { - file = find_trace_probe_file(tp, top_trace_array()); + file = find_trace_probe_file(tk, top_trace_array()); if (WARN_ON_ONCE(file == NULL)) { pr_warn("error on getting probe file.\n"); warn++; } else - enable_trace_probe(tp, file); + enable_trace_kprobe(tk, file); } } @@ -1384,46 +1444,46 @@ static __init int kprobe_trace_self_tests_init(void) ret = target(1, 2, 3, 4, 5, 6); /* Disable trace points before removing it */ - tp = find_trace_probe("testprobe", KPROBE_EVENT_SYSTEM); - if (WARN_ON_ONCE(tp == NULL)) { + tk = find_trace_kprobe("testprobe", KPROBE_EVENT_SYSTEM); + if (WARN_ON_ONCE(tk == NULL)) { pr_warn("error on getting test probe.\n"); warn++; } else { - file = find_trace_probe_file(tp, top_trace_array()); + file = find_trace_probe_file(tk, top_trace_array()); if (WARN_ON_ONCE(file == NULL)) { pr_warn("error on getting probe file.\n"); warn++; } else - disable_trace_probe(tp, file); + disable_trace_kprobe(tk, file); } - tp = find_trace_probe("testprobe2", KPROBE_EVENT_SYSTEM); - if (WARN_ON_ONCE(tp == NULL)) { + tk = find_trace_kprobe("testprobe2", KPROBE_EVENT_SYSTEM); + if (WARN_ON_ONCE(tk == NULL)) { pr_warn("error on getting 2nd test probe.\n"); warn++; } else { - file = find_trace_probe_file(tp, top_trace_array()); + file = find_trace_probe_file(tk, top_trace_array()); if (WARN_ON_ONCE(file == NULL)) { pr_warn("error on getting probe file.\n"); warn++; } else - disable_trace_probe(tp, file); + disable_trace_kprobe(tk, file); } - ret = traceprobe_command("-:testprobe", create_trace_probe); + ret = traceprobe_command("-:testprobe", create_trace_kprobe); if (WARN_ON_ONCE(ret)) { pr_warn("error on deleting a probe.\n"); warn++; } - ret = traceprobe_command("-:testprobe2", create_trace_probe); + ret = traceprobe_command("-:testprobe2", create_trace_kprobe); if (WARN_ON_ONCE(ret)) { pr_warn("error on deleting a probe.\n"); warn++; } end: - release_all_trace_probes(); + release_all_trace_kprobes(); if (warn) pr_cont("NG: Some tests are failed. Please check them.\n"); else diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index b3dcfb2f0fe..0abd9b86347 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -323,7 +323,7 @@ static void __trace_mmiotrace_rw(struct trace_array *tr, entry = ring_buffer_event_data(event); entry->rw = *rw; - if (!filter_check_discard(call, entry, buffer, event)) + if (!call_filter_check_discard(call, entry, buffer, event)) trace_buffer_unlock_commit(buffer, event, 0, pc); } @@ -353,7 +353,7 @@ static void __trace_mmiotrace_map(struct trace_array *tr, entry = ring_buffer_event_data(event); entry->map = *map; - if (!filter_check_discard(call, entry, buffer, event)) + if (!call_filter_check_discard(call, entry, buffer, event)) trace_buffer_unlock_commit(buffer, event, 0, pc); } diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c index 394f94417e2..fcf0a9e4891 100644 --- a/kernel/trace/trace_nop.c +++ b/kernel/trace/trace_nop.c @@ -62,7 +62,7 @@ static void nop_trace_reset(struct trace_array *tr) * If you don't implement it, then the flag setting will be * automatically accepted. */ -static int nop_set_flag(u32 old_flags, u32 bit, int set) +static int nop_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set) { /* * Note that you don't need to update nop_flags.val yourself. @@ -91,11 +91,11 @@ struct tracer nop_trace __read_mostly = .name = "nop", .init = nop_trace_init, .reset = nop_trace_reset, - .wait_pipe = poll_wait_pipe, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_nop, #endif .flags = &nop_flags, - .set_flag = nop_set_flag + .set_flag = nop_set_flag, + .allow_instances = true, }; diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 34e7cbac0c9..f3dad80c20b 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -126,6 +126,34 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...) EXPORT_SYMBOL_GPL(trace_seq_printf); /** + * trace_seq_bitmask - put a list of longs as a bitmask print output + * @s: trace sequence descriptor + * @maskp: points to an array of unsigned longs that represent a bitmask + * @nmaskbits: The number of bits that are valid in @maskp + * + * It returns 0 if the trace oversizes the buffer's free + * space, 1 otherwise. + * + * Writes a ASCII representation of a bitmask string into @s. + */ +int +trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp, + int nmaskbits) +{ + int len = (PAGE_SIZE - 1) - s->len; + int ret; + + if (s->full || !len) + return 0; + + ret = bitmap_scnprintf(s->buffer, len, maskp, nmaskbits); + s->len += ret; + + return 1; +} +EXPORT_SYMBOL_GPL(trace_seq_bitmask); + +/** * trace_seq_vprintf - sequence printing of trace information * @s: trace sequence descriptor * @fmt: printf format string @@ -399,6 +427,19 @@ EXPORT_SYMBOL(ftrace_print_symbols_seq_u64); #endif const char * +ftrace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr, + unsigned int bitmask_size) +{ + const char *ret = p->buffer + p->len; + + trace_seq_bitmask(p, bitmask_ptr, bitmask_size * 8); + trace_seq_putc(p, 0); + + return ret; +} +EXPORT_SYMBOL_GPL(ftrace_print_bitmask_seq); + +const char * ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len) { int i; @@ -431,7 +472,7 @@ int ftrace_raw_output_prep(struct trace_iterator *iter, } trace_seq_init(p); - ret = trace_seq_printf(s, "%s: ", event->name); + ret = trace_seq_printf(s, "%s: ", ftrace_event_name(event)); if (!ret) return TRACE_TYPE_PARTIAL_LINE; @@ -439,6 +480,37 @@ int ftrace_raw_output_prep(struct trace_iterator *iter, } EXPORT_SYMBOL(ftrace_raw_output_prep); +static int ftrace_output_raw(struct trace_iterator *iter, char *name, + char *fmt, va_list ap) +{ + struct trace_seq *s = &iter->seq; + int ret; + + ret = trace_seq_printf(s, "%s: ", name); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + ret = trace_seq_vprintf(s, fmt, ap); + + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + return TRACE_TYPE_HANDLED; +} + +int ftrace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...) +{ + va_list ap; + int ret; + + va_start(ap, fmt); + ret = ftrace_output_raw(iter, name, fmt, ap); + va_end(ap); + + return ret; +} +EXPORT_SYMBOL_GPL(ftrace_output_call); + #ifdef CONFIG_KRETPROBES static inline const char *kretprobed(const char *name) { @@ -618,8 +690,23 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' : '.'; - need_resched = - (entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'; + + switch (entry->flags & (TRACE_FLAG_NEED_RESCHED | + TRACE_FLAG_PREEMPT_RESCHED)) { + case TRACE_FLAG_NEED_RESCHED | TRACE_FLAG_PREEMPT_RESCHED: + need_resched = 'N'; + break; + case TRACE_FLAG_NEED_RESCHED: + need_resched = 'n'; + break; + case TRACE_FLAG_PREEMPT_RESCHED: + need_resched = 'p'; + break; + default: + need_resched = '.'; + break; + } + hardsoft_irq = (hardirq && softirq) ? 'H' : hardirq ? 'h' : diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 412e959709b..d4b9fc22cd2 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -35,48 +35,28 @@ const char *reserved_field_names[] = { FIELD_STRING_FUNC, }; -/* Printing function type */ -#define PRINT_TYPE_FUNC_NAME(type) print_type_##type -#define PRINT_TYPE_FMT_NAME(type) print_type_format_##type - /* Printing in basic type function template */ -#define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt, cast) \ -static __kprobes int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, \ - const char *name, \ - void *data, void *ent)\ +#define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt) \ +int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, const char *name, \ + void *data, void *ent) \ { \ - return trace_seq_printf(s, " %s=" fmt, name, (cast)*(type *)data);\ + return trace_seq_printf(s, " %s=" fmt, name, *(type *)data); \ } \ -static const char PRINT_TYPE_FMT_NAME(type)[] = fmt; - -DEFINE_BASIC_PRINT_TYPE_FUNC(u8, "%x", unsigned int) -DEFINE_BASIC_PRINT_TYPE_FUNC(u16, "%x", unsigned int) -DEFINE_BASIC_PRINT_TYPE_FUNC(u32, "%lx", unsigned long) -DEFINE_BASIC_PRINT_TYPE_FUNC(u64, "%llx", unsigned long long) -DEFINE_BASIC_PRINT_TYPE_FUNC(s8, "%d", int) -DEFINE_BASIC_PRINT_TYPE_FUNC(s16, "%d", int) -DEFINE_BASIC_PRINT_TYPE_FUNC(s32, "%ld", long) -DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%lld", long long) - -static inline void *get_rloc_data(u32 *dl) -{ - return (u8 *)dl + get_rloc_offs(*dl); -} - -/* For data_loc conversion */ -static inline void *get_loc_data(u32 *dl, void *ent) -{ - return (u8 *)ent + get_rloc_offs(*dl); -} - -/* For defining macros, define string/string_size types */ -typedef u32 string; -typedef u32 string_size; +const char PRINT_TYPE_FMT_NAME(type)[] = fmt; \ +NOKPROBE_SYMBOL(PRINT_TYPE_FUNC_NAME(type)); + +DEFINE_BASIC_PRINT_TYPE_FUNC(u8 , "0x%x") +DEFINE_BASIC_PRINT_TYPE_FUNC(u16, "0x%x") +DEFINE_BASIC_PRINT_TYPE_FUNC(u32, "0x%x") +DEFINE_BASIC_PRINT_TYPE_FUNC(u64, "0x%Lx") +DEFINE_BASIC_PRINT_TYPE_FUNC(s8, "%d") +DEFINE_BASIC_PRINT_TYPE_FUNC(s16, "%d") +DEFINE_BASIC_PRINT_TYPE_FUNC(s32, "%d") +DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%Ld") /* Print type function for string type */ -static __kprobes int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s, - const char *name, - void *data, void *ent) +int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s, const char *name, + void *data, void *ent) { int len = *(u32 *)data >> 16; @@ -86,19 +66,9 @@ static __kprobes int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s, return trace_seq_printf(s, " %s=\"%s\"", name, (const char *)get_loc_data(data, ent)); } +NOKPROBE_SYMBOL(PRINT_TYPE_FUNC_NAME(string)); -static const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\""; - -#define FETCH_FUNC_NAME(method, type) fetch_##method##_##type -/* - * Define macro for basic types - we don't need to define s* types, because - * we have to care only about bitwidth at recording time. - */ -#define DEFINE_BASIC_FETCH_FUNCS(method) \ -DEFINE_FETCH_##method(u8) \ -DEFINE_FETCH_##method(u16) \ -DEFINE_FETCH_##method(u32) \ -DEFINE_FETCH_##method(u64) +const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\""; #define CHECK_FETCH_FUNCS(method, fn) \ (((FETCH_FUNC_NAME(method, u8) == fn) || \ @@ -111,208 +81,79 @@ DEFINE_FETCH_##method(u64) /* Data fetch function templates */ #define DEFINE_FETCH_reg(type) \ -static __kprobes void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs, \ - void *offset, void *dest) \ +void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs, void *offset, void *dest) \ { \ *(type *)dest = (type)regs_get_register(regs, \ (unsigned int)((unsigned long)offset)); \ -} +} \ +NOKPROBE_SYMBOL(FETCH_FUNC_NAME(reg, type)); DEFINE_BASIC_FETCH_FUNCS(reg) /* No string on the register */ #define fetch_reg_string NULL #define fetch_reg_string_size NULL -#define DEFINE_FETCH_stack(type) \ -static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\ - void *offset, void *dest) \ -{ \ - *(type *)dest = (type)regs_get_kernel_stack_nth(regs, \ - (unsigned int)((unsigned long)offset)); \ -} -DEFINE_BASIC_FETCH_FUNCS(stack) -/* No string on the stack entry */ -#define fetch_stack_string NULL -#define fetch_stack_string_size NULL - #define DEFINE_FETCH_retval(type) \ -static __kprobes void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs,\ - void *dummy, void *dest) \ +void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs, \ + void *dummy, void *dest) \ { \ *(type *)dest = (type)regs_return_value(regs); \ -} +} \ +NOKPROBE_SYMBOL(FETCH_FUNC_NAME(retval, type)); DEFINE_BASIC_FETCH_FUNCS(retval) /* No string on the retval */ #define fetch_retval_string NULL #define fetch_retval_string_size NULL -#define DEFINE_FETCH_memory(type) \ -static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\ - void *addr, void *dest) \ -{ \ - type retval; \ - if (probe_kernel_address(addr, retval)) \ - *(type *)dest = 0; \ - else \ - *(type *)dest = retval; \ -} -DEFINE_BASIC_FETCH_FUNCS(memory) -/* - * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max - * length and relative data location. - */ -static __kprobes void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs, - void *addr, void *dest) -{ - long ret; - int maxlen = get_rloc_len(*(u32 *)dest); - u8 *dst = get_rloc_data(dest); - u8 *src = addr; - mm_segment_t old_fs = get_fs(); - - if (!maxlen) - return; - - /* - * Try to get string again, since the string can be changed while - * probing. - */ - set_fs(KERNEL_DS); - pagefault_disable(); - - do - ret = __copy_from_user_inatomic(dst++, src++, 1); - while (dst[-1] && ret == 0 && src - (u8 *)addr < maxlen); - - dst[-1] = '\0'; - pagefault_enable(); - set_fs(old_fs); - - if (ret < 0) { /* Failed to fetch string */ - ((u8 *)get_rloc_data(dest))[0] = '\0'; - *(u32 *)dest = make_data_rloc(0, get_rloc_offs(*(u32 *)dest)); - } else { - *(u32 *)dest = make_data_rloc(src - (u8 *)addr, - get_rloc_offs(*(u32 *)dest)); - } -} - -/* Return the length of string -- including null terminal byte */ -static __kprobes void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs, - void *addr, void *dest) -{ - mm_segment_t old_fs; - int ret, len = 0; - u8 c; - - old_fs = get_fs(); - set_fs(KERNEL_DS); - pagefault_disable(); - - do { - ret = __copy_from_user_inatomic(&c, (u8 *)addr + len, 1); - len++; - } while (c && ret == 0 && len < MAX_STRING_SIZE); - - pagefault_enable(); - set_fs(old_fs); - - if (ret < 0) /* Failed to check the length */ - *(u32 *)dest = 0; - else - *(u32 *)dest = len; -} - -/* Memory fetching by symbol */ -struct symbol_cache { - char *symbol; - long offset; - unsigned long addr; -}; - -static unsigned long update_symbol_cache(struct symbol_cache *sc) -{ - sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol); - - if (sc->addr) - sc->addr += sc->offset; - - return sc->addr; -} - -static void free_symbol_cache(struct symbol_cache *sc) -{ - kfree(sc->symbol); - kfree(sc); -} - -static struct symbol_cache *alloc_symbol_cache(const char *sym, long offset) -{ - struct symbol_cache *sc; - - if (!sym || strlen(sym) == 0) - return NULL; - - sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL); - if (!sc) - return NULL; - - sc->symbol = kstrdup(sym, GFP_KERNEL); - if (!sc->symbol) { - kfree(sc); - return NULL; - } - sc->offset = offset; - update_symbol_cache(sc); - - return sc; -} - -#define DEFINE_FETCH_symbol(type) \ -static __kprobes void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs,\ - void *data, void *dest) \ -{ \ - struct symbol_cache *sc = data; \ - if (sc->addr) \ - fetch_memory_##type(regs, (void *)sc->addr, dest); \ - else \ - *(type *)dest = 0; \ -} -DEFINE_BASIC_FETCH_FUNCS(symbol) -DEFINE_FETCH_symbol(string) -DEFINE_FETCH_symbol(string_size) - /* Dereference memory access function */ struct deref_fetch_param { struct fetch_param orig; long offset; + fetch_func_t fetch; + fetch_func_t fetch_size; }; #define DEFINE_FETCH_deref(type) \ -static __kprobes void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs,\ - void *data, void *dest) \ +void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs, \ + void *data, void *dest) \ { \ struct deref_fetch_param *dprm = data; \ unsigned long addr; \ call_fetch(&dprm->orig, regs, &addr); \ if (addr) { \ addr += dprm->offset; \ - fetch_memory_##type(regs, (void *)addr, dest); \ + dprm->fetch(regs, (void *)addr, dest); \ } else \ *(type *)dest = 0; \ -} +} \ +NOKPROBE_SYMBOL(FETCH_FUNC_NAME(deref, type)); DEFINE_BASIC_FETCH_FUNCS(deref) DEFINE_FETCH_deref(string) -DEFINE_FETCH_deref(string_size) -static __kprobes void update_deref_fetch_param(struct deref_fetch_param *data) +void FETCH_FUNC_NAME(deref, string_size)(struct pt_regs *regs, + void *data, void *dest) +{ + struct deref_fetch_param *dprm = data; + unsigned long addr; + + call_fetch(&dprm->orig, regs, &addr); + if (addr && dprm->fetch_size) { + addr += dprm->offset; + dprm->fetch_size(regs, (void *)addr, dest); + } else + *(string_size *)dest = 0; +} +NOKPROBE_SYMBOL(FETCH_FUNC_NAME(deref, string_size)); + +static void update_deref_fetch_param(struct deref_fetch_param *data) { if (CHECK_FETCH_FUNCS(deref, data->orig.fn)) update_deref_fetch_param(data->orig.data); else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn)) update_symbol_cache(data->orig.data); } +NOKPROBE_SYMBOL(update_deref_fetch_param); -static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data) +static void free_deref_fetch_param(struct deref_fetch_param *data) { if (CHECK_FETCH_FUNCS(deref, data->orig.fn)) free_deref_fetch_param(data->orig.data); @@ -320,6 +161,7 @@ static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data) free_symbol_cache(data->orig.data); kfree(data); } +NOKPROBE_SYMBOL(free_deref_fetch_param); /* Bitfield fetch function */ struct bitfield_fetch_param { @@ -329,8 +171,8 @@ struct bitfield_fetch_param { }; #define DEFINE_FETCH_bitfield(type) \ -static __kprobes void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs,\ - void *data, void *dest) \ +void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs, \ + void *data, void *dest) \ { \ struct bitfield_fetch_param *bprm = data; \ type buf = 0; \ @@ -340,13 +182,13 @@ static __kprobes void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs,\ buf >>= bprm->low_shift; \ } \ *(type *)dest = buf; \ -} - +} \ +NOKPROBE_SYMBOL(FETCH_FUNC_NAME(bitfield, type)); DEFINE_BASIC_FETCH_FUNCS(bitfield) #define fetch_bitfield_string NULL #define fetch_bitfield_string_size NULL -static __kprobes void +static void update_bitfield_fetch_param(struct bitfield_fetch_param *data) { /* @@ -359,7 +201,7 @@ update_bitfield_fetch_param(struct bitfield_fetch_param *data) update_symbol_cache(data->orig.data); } -static __kprobes void +static void free_bitfield_fetch_param(struct bitfield_fetch_param *data) { /* @@ -374,58 +216,8 @@ free_bitfield_fetch_param(struct bitfield_fetch_param *data) kfree(data); } -/* Default (unsigned long) fetch type */ -#define __DEFAULT_FETCH_TYPE(t) u##t -#define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t) -#define DEFAULT_FETCH_TYPE _DEFAULT_FETCH_TYPE(BITS_PER_LONG) -#define DEFAULT_FETCH_TYPE_STR __stringify(DEFAULT_FETCH_TYPE) - -#define ASSIGN_FETCH_FUNC(method, type) \ - [FETCH_MTD_##method] = FETCH_FUNC_NAME(method, type) - -#define __ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, _fmttype) \ - {.name = _name, \ - .size = _size, \ - .is_signed = sign, \ - .print = PRINT_TYPE_FUNC_NAME(ptype), \ - .fmt = PRINT_TYPE_FMT_NAME(ptype), \ - .fmttype = _fmttype, \ - .fetch = { \ -ASSIGN_FETCH_FUNC(reg, ftype), \ -ASSIGN_FETCH_FUNC(stack, ftype), \ -ASSIGN_FETCH_FUNC(retval, ftype), \ -ASSIGN_FETCH_FUNC(memory, ftype), \ -ASSIGN_FETCH_FUNC(symbol, ftype), \ -ASSIGN_FETCH_FUNC(deref, ftype), \ -ASSIGN_FETCH_FUNC(bitfield, ftype), \ - } \ - } - -#define ASSIGN_FETCH_TYPE(ptype, ftype, sign) \ - __ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, #ptype) - -#define FETCH_TYPE_STRING 0 -#define FETCH_TYPE_STRSIZE 1 - -/* Fetch type information table */ -static const struct fetch_type fetch_type_table[] = { - /* Special types */ - [FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string, - sizeof(u32), 1, "__data_loc char[]"), - [FETCH_TYPE_STRSIZE] = __ASSIGN_FETCH_TYPE("string_size", u32, - string_size, sizeof(u32), 0, "u32"), - /* Basic types */ - ASSIGN_FETCH_TYPE(u8, u8, 0), - ASSIGN_FETCH_TYPE(u16, u16, 0), - ASSIGN_FETCH_TYPE(u32, u32, 0), - ASSIGN_FETCH_TYPE(u64, u64, 0), - ASSIGN_FETCH_TYPE(s8, u8, 1), - ASSIGN_FETCH_TYPE(s16, u16, 1), - ASSIGN_FETCH_TYPE(s32, u32, 1), - ASSIGN_FETCH_TYPE(s64, u64, 1), -}; - -static const struct fetch_type *find_fetch_type(const char *type) +static const struct fetch_type *find_fetch_type(const char *type, + const struct fetch_type *ftbl) { int i; @@ -446,44 +238,52 @@ static const struct fetch_type *find_fetch_type(const char *type) switch (bs) { case 8: - return find_fetch_type("u8"); + return find_fetch_type("u8", ftbl); case 16: - return find_fetch_type("u16"); + return find_fetch_type("u16", ftbl); case 32: - return find_fetch_type("u32"); + return find_fetch_type("u32", ftbl); case 64: - return find_fetch_type("u64"); + return find_fetch_type("u64", ftbl); default: goto fail; } } - for (i = 0; i < ARRAY_SIZE(fetch_type_table); i++) - if (strcmp(type, fetch_type_table[i].name) == 0) - return &fetch_type_table[i]; + for (i = 0; ftbl[i].name; i++) { + if (strcmp(type, ftbl[i].name) == 0) + return &ftbl[i]; + } fail: return NULL; } /* Special function : only accept unsigned long */ -static __kprobes void fetch_stack_address(struct pt_regs *regs, - void *dummy, void *dest) +static void fetch_kernel_stack_address(struct pt_regs *regs, void *dummy, void *dest) { *(unsigned long *)dest = kernel_stack_pointer(regs); } +NOKPROBE_SYMBOL(fetch_kernel_stack_address); + +static void fetch_user_stack_address(struct pt_regs *regs, void *dummy, void *dest) +{ + *(unsigned long *)dest = user_stack_pointer(regs); +} +NOKPROBE_SYMBOL(fetch_user_stack_address); static fetch_func_t get_fetch_size_function(const struct fetch_type *type, - fetch_func_t orig_fn) + fetch_func_t orig_fn, + const struct fetch_type *ftbl) { int i; - if (type != &fetch_type_table[FETCH_TYPE_STRING]) + if (type != &ftbl[FETCH_TYPE_STRING]) return NULL; /* Only string type needs size function */ for (i = 0; i < FETCH_MTD_END; i++) if (type->fetch[i] == orig_fn) - return fetch_type_table[FETCH_TYPE_STRSIZE].fetch[i]; + return ftbl[FETCH_TYPE_STRSIZE].fetch[i]; WARN_ON(1); /* This should not happen */ @@ -516,7 +316,8 @@ int traceprobe_split_symbol_offset(char *symbol, unsigned long *offset) #define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long)) static int parse_probe_vars(char *arg, const struct fetch_type *t, - struct fetch_param *f, bool is_return) + struct fetch_param *f, bool is_return, + bool is_kprobe) { int ret = 0; unsigned long param; @@ -528,13 +329,16 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t, ret = -EINVAL; } else if (strncmp(arg, "stack", 5) == 0) { if (arg[5] == '\0') { - if (strcmp(t->name, DEFAULT_FETCH_TYPE_STR) == 0) - f->fn = fetch_stack_address; + if (strcmp(t->name, DEFAULT_FETCH_TYPE_STR)) + return -EINVAL; + + if (is_kprobe) + f->fn = fetch_kernel_stack_address; else - ret = -EINVAL; + f->fn = fetch_user_stack_address; } else if (isdigit(arg[5])) { ret = kstrtoul(arg + 5, 10, ¶m); - if (ret || param > PARAM_MAX_STACK) + if (ret || (is_kprobe && param > PARAM_MAX_STACK)) ret = -EINVAL; else { f->fn = t->fetch[FETCH_MTD_stack]; @@ -552,20 +356,18 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t, static int parse_probe_arg(char *arg, const struct fetch_type *t, struct fetch_param *f, bool is_return, bool is_kprobe) { + const struct fetch_type *ftbl; unsigned long param; long offset; char *tmp; - int ret; + int ret = 0; - ret = 0; - - /* Until uprobe_events supports only reg arguments */ - if (!is_kprobe && arg[0] != '%') - return -EINVAL; + ftbl = is_kprobe ? kprobes_fetch_type_table : uprobes_fetch_type_table; + BUG_ON(ftbl == NULL); switch (arg[0]) { case '$': - ret = parse_probe_vars(arg + 1, t, f, is_return); + ret = parse_probe_vars(arg + 1, t, f, is_return, is_kprobe); break; case '%': /* named register */ @@ -577,7 +379,7 @@ static int parse_probe_arg(char *arg, const struct fetch_type *t, } break; - case '@': /* memory or symbol */ + case '@': /* memory, file-offset or symbol */ if (isdigit(arg[1])) { ret = kstrtoul(arg + 1, 0, ¶m); if (ret) @@ -585,7 +387,22 @@ static int parse_probe_arg(char *arg, const struct fetch_type *t, f->fn = t->fetch[FETCH_MTD_memory]; f->data = (void *)param; + } else if (arg[1] == '+') { + /* kprobes don't support file offsets */ + if (is_kprobe) + return -EINVAL; + + ret = kstrtol(arg + 2, 0, &offset); + if (ret) + break; + + f->fn = t->fetch[FETCH_MTD_file_offset]; + f->data = (void *)offset; } else { + /* uprobes don't support symbols */ + if (!is_kprobe) + return -EINVAL; + ret = traceprobe_split_symbol_offset(arg + 1, &offset); if (ret) break; @@ -616,7 +433,7 @@ static int parse_probe_arg(char *arg, const struct fetch_type *t, struct deref_fetch_param *dprm; const struct fetch_type *t2; - t2 = find_fetch_type(NULL); + t2 = find_fetch_type(NULL, ftbl); *tmp = '\0'; dprm = kzalloc(sizeof(struct deref_fetch_param), GFP_KERNEL); @@ -624,6 +441,9 @@ static int parse_probe_arg(char *arg, const struct fetch_type *t, return -ENOMEM; dprm->offset = offset; + dprm->fetch = t->fetch[FETCH_MTD_memory]; + dprm->fetch_size = get_fetch_size_function(t, + dprm->fetch, ftbl); ret = parse_probe_arg(arg, t2, &dprm->orig, is_return, is_kprobe); if (ret) @@ -685,9 +505,13 @@ static int __parse_bitfield_probe_arg(const char *bf, int traceprobe_parse_probe_arg(char *arg, ssize_t *size, struct probe_arg *parg, bool is_return, bool is_kprobe) { + const struct fetch_type *ftbl; const char *t; int ret; + ftbl = is_kprobe ? kprobes_fetch_type_table : uprobes_fetch_type_table; + BUG_ON(ftbl == NULL); + if (strlen(arg) > MAX_ARGSTR_LEN) { pr_info("Argument is too long.: %s\n", arg); return -ENOSPC; @@ -702,7 +526,7 @@ int traceprobe_parse_probe_arg(char *arg, ssize_t *size, arg[t - parg->comm] = '\0'; t++; } - parg->type = find_fetch_type(t); + parg->type = find_fetch_type(t, ftbl); if (!parg->type) { pr_info("Unsupported type: %s\n", t); return -EINVAL; @@ -716,7 +540,8 @@ int traceprobe_parse_probe_arg(char *arg, ssize_t *size, if (ret >= 0) { parg->fetch_size.fn = get_fetch_size_function(parg->type, - parg->fetch.fn); + parg->fetch.fn, + ftbl); parg->fetch_size.data = parg->fetch.data; } @@ -837,3 +662,65 @@ out: return ret; } + +static int __set_print_fmt(struct trace_probe *tp, char *buf, int len, + bool is_return) +{ + int i; + int pos = 0; + + const char *fmt, *arg; + + if (!is_return) { + fmt = "(%lx)"; + arg = "REC->" FIELD_STRING_IP; + } else { + fmt = "(%lx <- %lx)"; + arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP; + } + + /* When len=0, we just calculate the needed length */ +#define LEN_OR_ZERO (len ? len - pos : 0) + + pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt); + + for (i = 0; i < tp->nr_args; i++) { + pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%s", + tp->args[i].name, tp->args[i].type->fmt); + } + + pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg); + + for (i = 0; i < tp->nr_args; i++) { + if (strcmp(tp->args[i].type->name, "string") == 0) + pos += snprintf(buf + pos, LEN_OR_ZERO, + ", __get_str(%s)", + tp->args[i].name); + else + pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s", + tp->args[i].name); + } + +#undef LEN_OR_ZERO + + /* return the length of print_fmt */ + return pos; +} + +int set_print_fmt(struct trace_probe *tp, bool is_return) +{ + int len; + char *print_fmt; + + /* First: called with 0 length to calculate the needed length */ + len = __set_print_fmt(tp, NULL, 0, is_return); + print_fmt = kmalloc(len + 1, GFP_KERNEL); + if (!print_fmt) + return -ENOMEM; + + /* Second: actually write the @print_fmt */ + __set_print_fmt(tp, print_fmt, len + 1, is_return); + tp->call.print_fmt = print_fmt; + + return 0; +} diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 5c7e09d10d7..4f815fbce16 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -81,6 +81,17 @@ */ #define convert_rloc_to_loc(dl, offs) ((u32)(dl) + (offs)) +static nokprobe_inline void *get_rloc_data(u32 *dl) +{ + return (u8 *)dl + get_rloc_offs(*dl); +} + +/* For data_loc conversion */ +static nokprobe_inline void *get_loc_data(u32 *dl, void *ent) +{ + return (u8 *)ent + get_rloc_offs(*dl); +} + /* Data fetch function type */ typedef void (*fetch_func_t)(struct pt_regs *, void *, void *); /* Printing function type */ @@ -95,6 +106,7 @@ enum { FETCH_MTD_symbol, FETCH_MTD_deref, FETCH_MTD_bitfield, + FETCH_MTD_file_offset, FETCH_MTD_END, }; @@ -115,6 +127,147 @@ struct fetch_param { void *data; }; +/* For defining macros, define string/string_size types */ +typedef u32 string; +typedef u32 string_size; + +#define PRINT_TYPE_FUNC_NAME(type) print_type_##type +#define PRINT_TYPE_FMT_NAME(type) print_type_format_##type + +/* Printing in basic type function template */ +#define DECLARE_BASIC_PRINT_TYPE_FUNC(type) \ +int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, const char *name, \ + void *data, void *ent); \ +extern const char PRINT_TYPE_FMT_NAME(type)[] + +DECLARE_BASIC_PRINT_TYPE_FUNC(u8); +DECLARE_BASIC_PRINT_TYPE_FUNC(u16); +DECLARE_BASIC_PRINT_TYPE_FUNC(u32); +DECLARE_BASIC_PRINT_TYPE_FUNC(u64); +DECLARE_BASIC_PRINT_TYPE_FUNC(s8); +DECLARE_BASIC_PRINT_TYPE_FUNC(s16); +DECLARE_BASIC_PRINT_TYPE_FUNC(s32); +DECLARE_BASIC_PRINT_TYPE_FUNC(s64); +DECLARE_BASIC_PRINT_TYPE_FUNC(string); + +#define FETCH_FUNC_NAME(method, type) fetch_##method##_##type + +/* Declare macro for basic types */ +#define DECLARE_FETCH_FUNC(method, type) \ +extern void FETCH_FUNC_NAME(method, type)(struct pt_regs *regs, \ + void *data, void *dest) + +#define DECLARE_BASIC_FETCH_FUNCS(method) \ +DECLARE_FETCH_FUNC(method, u8); \ +DECLARE_FETCH_FUNC(method, u16); \ +DECLARE_FETCH_FUNC(method, u32); \ +DECLARE_FETCH_FUNC(method, u64) + +DECLARE_BASIC_FETCH_FUNCS(reg); +#define fetch_reg_string NULL +#define fetch_reg_string_size NULL + +DECLARE_BASIC_FETCH_FUNCS(retval); +#define fetch_retval_string NULL +#define fetch_retval_string_size NULL + +DECLARE_BASIC_FETCH_FUNCS(symbol); +DECLARE_FETCH_FUNC(symbol, string); +DECLARE_FETCH_FUNC(symbol, string_size); + +DECLARE_BASIC_FETCH_FUNCS(deref); +DECLARE_FETCH_FUNC(deref, string); +DECLARE_FETCH_FUNC(deref, string_size); + +DECLARE_BASIC_FETCH_FUNCS(bitfield); +#define fetch_bitfield_string NULL +#define fetch_bitfield_string_size NULL + +/* + * Define macro for basic types - we don't need to define s* types, because + * we have to care only about bitwidth at recording time. + */ +#define DEFINE_BASIC_FETCH_FUNCS(method) \ +DEFINE_FETCH_##method(u8) \ +DEFINE_FETCH_##method(u16) \ +DEFINE_FETCH_##method(u32) \ +DEFINE_FETCH_##method(u64) + +/* Default (unsigned long) fetch type */ +#define __DEFAULT_FETCH_TYPE(t) u##t +#define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t) +#define DEFAULT_FETCH_TYPE _DEFAULT_FETCH_TYPE(BITS_PER_LONG) +#define DEFAULT_FETCH_TYPE_STR __stringify(DEFAULT_FETCH_TYPE) + +#define ASSIGN_FETCH_FUNC(method, type) \ + [FETCH_MTD_##method] = FETCH_FUNC_NAME(method, type) + +#define __ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, _fmttype) \ + {.name = _name, \ + .size = _size, \ + .is_signed = sign, \ + .print = PRINT_TYPE_FUNC_NAME(ptype), \ + .fmt = PRINT_TYPE_FMT_NAME(ptype), \ + .fmttype = _fmttype, \ + .fetch = { \ +ASSIGN_FETCH_FUNC(reg, ftype), \ +ASSIGN_FETCH_FUNC(stack, ftype), \ +ASSIGN_FETCH_FUNC(retval, ftype), \ +ASSIGN_FETCH_FUNC(memory, ftype), \ +ASSIGN_FETCH_FUNC(symbol, ftype), \ +ASSIGN_FETCH_FUNC(deref, ftype), \ +ASSIGN_FETCH_FUNC(bitfield, ftype), \ +ASSIGN_FETCH_FUNC(file_offset, ftype), \ + } \ + } + +#define ASSIGN_FETCH_TYPE(ptype, ftype, sign) \ + __ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, #ptype) + +#define ASSIGN_FETCH_TYPE_END {} + +#define FETCH_TYPE_STRING 0 +#define FETCH_TYPE_STRSIZE 1 + +/* + * Fetch type information table. + * It's declared as a weak symbol due to conditional compilation. + */ +extern __weak const struct fetch_type kprobes_fetch_type_table[]; +extern __weak const struct fetch_type uprobes_fetch_type_table[]; + +#ifdef CONFIG_KPROBE_EVENT +struct symbol_cache; +unsigned long update_symbol_cache(struct symbol_cache *sc); +void free_symbol_cache(struct symbol_cache *sc); +struct symbol_cache *alloc_symbol_cache(const char *sym, long offset); +#else +/* uprobes do not support symbol fetch methods */ +#define fetch_symbol_u8 NULL +#define fetch_symbol_u16 NULL +#define fetch_symbol_u32 NULL +#define fetch_symbol_u64 NULL +#define fetch_symbol_string NULL +#define fetch_symbol_string_size NULL + +struct symbol_cache { +}; +static inline unsigned long __used update_symbol_cache(struct symbol_cache *sc) +{ + return 0; +} + +static inline void __used free_symbol_cache(struct symbol_cache *sc) +{ +} + +static inline struct symbol_cache * __used +alloc_symbol_cache(const char *sym, long offset) +{ + return NULL; +} +#endif /* CONFIG_KPROBE_EVENT */ + struct probe_arg { struct fetch_param fetch; struct fetch_param fetch_size; @@ -124,7 +277,32 @@ struct probe_arg { const struct fetch_type *type; /* Type of this argument */ }; -static inline __kprobes void call_fetch(struct fetch_param *fprm, +struct trace_probe { + unsigned int flags; /* For TP_FLAG_* */ + struct ftrace_event_class class; + struct ftrace_event_call call; + struct list_head files; + ssize_t size; /* trace entry size */ + unsigned int nr_args; + struct probe_arg args[]; +}; + +struct event_file_link { + struct ftrace_event_file *file; + struct list_head list; +}; + +static inline bool trace_probe_is_enabled(struct trace_probe *tp) +{ + return !!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE)); +} + +static inline bool trace_probe_is_registered(struct trace_probe *tp) +{ + return !!(tp->flags & TP_FLAG_REGISTERED); +} + +static nokprobe_inline void call_fetch(struct fetch_param *fprm, struct pt_regs *regs, void *dest) { return fprm->fn(regs, fprm->data, dest); @@ -142,6 +320,18 @@ static inline int is_good_name(const char *name) return 1; } +static inline struct event_file_link * +find_event_file_link(struct trace_probe *tp, struct ftrace_event_file *file) +{ + struct event_file_link *link; + + list_for_each_entry(link, &tp->files, list) + if (link->file == file) + return link; + + return NULL; +} + extern int traceprobe_parse_probe_arg(char *arg, ssize_t *size, struct probe_arg *parg, bool is_return, bool is_kprobe); @@ -158,3 +348,53 @@ extern ssize_t traceprobe_probes_write(struct file *file, int (*createfn)(int, char**)); extern int traceprobe_command(const char *buf, int (*createfn)(int, char**)); + +/* Sum up total data length for dynamic arraies (strings) */ +static nokprobe_inline int +__get_data_size(struct trace_probe *tp, struct pt_regs *regs) +{ + int i, ret = 0; + u32 len; + + for (i = 0; i < tp->nr_args; i++) + if (unlikely(tp->args[i].fetch_size.fn)) { + call_fetch(&tp->args[i].fetch_size, regs, &len); + ret += len; + } + + return ret; +} + +/* Store the value of each argument */ +static nokprobe_inline void +store_trace_args(int ent_size, struct trace_probe *tp, struct pt_regs *regs, + u8 *data, int maxlen) +{ + int i; + u32 end = tp->size; + u32 *dl; /* Data (relative) location */ + + for (i = 0; i < tp->nr_args; i++) { + if (unlikely(tp->args[i].fetch_size.fn)) { + /* + * First, we set the relative location and + * maximum data length to *dl + */ + dl = (u32 *)(data + tp->args[i].offset); + *dl = make_data_rloc(maxlen, end - tp->args[i].offset); + /* Then try to fetch string or dynamic array data */ + call_fetch(&tp->args[i].fetch, regs, dl); + /* Reduce maximum length */ + end += get_rloc_len(*dl); + maxlen -= get_rloc_len(*dl); + /* Trick here, convert data_rloc to data_loc */ + *dl = convert_rloc_to_loc(*dl, + ent_size + tp->args[i].offset); + } else + /* Just fetching data normally */ + call_fetch(&tp->args[i].fetch, regs, + data + tp->args[i].offset); + } +} + +extern int set_print_fmt(struct trace_probe *tp, bool is_return); diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 4e98e3b257a..3f34dc9b40f 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -45,7 +45,7 @@ tracing_sched_switch_trace(struct trace_array *tr, entry->next_state = next->state; entry->next_cpu = task_cpu(next); - if (!filter_check_discard(call, entry, buffer, event)) + if (!call_filter_check_discard(call, entry, buffer, event)) trace_buffer_unlock_commit(buffer, event, flags, pc); } @@ -101,7 +101,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->next_state = wakee->state; entry->next_cpu = task_cpu(wakee); - if (!filter_check_discard(call, entry, buffer, event)) + if (!call_filter_check_discard(call, entry, buffer, event)) trace_buffer_unlock_commit(buffer, event, flags, pc); } diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index fee77e15d81..19bd8928ce9 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -16,6 +16,7 @@ #include <linux/uaccess.h> #include <linux/ftrace.h> #include <linux/sched/rt.h> +#include <linux/sched/deadline.h> #include <trace/events/sched.h> #include "trace.h" @@ -27,6 +28,8 @@ static int wakeup_cpu; static int wakeup_current_cpu; static unsigned wakeup_prio = -1; static int wakeup_rt; +static int wakeup_dl; +static int tracing_dl = 0; static arch_spinlock_t wakeup_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; @@ -127,15 +130,9 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip, atomic_dec(&data->disabled); preempt_enable_notrace(); } - -static struct ftrace_ops trace_ops __read_mostly = -{ - .func = wakeup_tracer_call, - .flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE, -}; #endif /* CONFIG_FUNCTION_TRACER */ -static int register_wakeup_function(int graph, int set) +static int register_wakeup_function(struct trace_array *tr, int graph, int set) { int ret; @@ -147,7 +144,7 @@ static int register_wakeup_function(int graph, int set) ret = register_ftrace_graph(&wakeup_graph_return, &wakeup_graph_entry); else - ret = register_ftrace_function(&trace_ops); + ret = register_ftrace_function(tr->ops); if (!ret) function_enabled = true; @@ -155,7 +152,7 @@ static int register_wakeup_function(int graph, int set) return ret; } -static void unregister_wakeup_function(int graph) +static void unregister_wakeup_function(struct trace_array *tr, int graph) { if (!function_enabled) return; @@ -163,32 +160,34 @@ static void unregister_wakeup_function(int graph) if (graph) unregister_ftrace_graph(); else - unregister_ftrace_function(&trace_ops); + unregister_ftrace_function(tr->ops); function_enabled = false; } -static void wakeup_function_set(int set) +static void wakeup_function_set(struct trace_array *tr, int set) { if (set) - register_wakeup_function(is_graph(), 1); + register_wakeup_function(tr, is_graph(), 1); else - unregister_wakeup_function(is_graph()); + unregister_wakeup_function(tr, is_graph()); } -static int wakeup_flag_changed(struct tracer *tracer, u32 mask, int set) +static int wakeup_flag_changed(struct trace_array *tr, u32 mask, int set) { + struct tracer *tracer = tr->current_trace; + if (mask & TRACE_ITER_FUNCTION) - wakeup_function_set(set); + wakeup_function_set(tr, set); return trace_keep_overwrite(tracer, mask, set); } -static int start_func_tracer(int graph) +static int start_func_tracer(struct trace_array *tr, int graph) { int ret; - ret = register_wakeup_function(graph, 0); + ret = register_wakeup_function(tr, graph, 0); if (!ret && tracing_is_enabled()) tracer_enabled = 1; @@ -198,15 +197,16 @@ static int start_func_tracer(int graph) return ret; } -static void stop_func_tracer(int graph) +static void stop_func_tracer(struct trace_array *tr, int graph) { tracer_enabled = 0; - unregister_wakeup_function(graph); + unregister_wakeup_function(tr, graph); } #ifdef CONFIG_FUNCTION_GRAPH_TRACER -static int wakeup_set_flag(u32 old_flags, u32 bit, int set) +static int +wakeup_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set) { if (!(bit & TRACE_DISPLAY_GRAPH)) @@ -215,12 +215,12 @@ static int wakeup_set_flag(u32 old_flags, u32 bit, int set) if (!(is_graph() ^ set)) return 0; - stop_func_tracer(!set); + stop_func_tracer(tr, !set); wakeup_reset(wakeup_trace); - tracing_max_latency = 0; + tr->max_latency = 0; - return start_func_tracer(set); + return start_func_tracer(tr, set); } static int wakeup_graph_entry(struct ftrace_graph_ent *trace) @@ -308,7 +308,8 @@ __trace_function(struct trace_array *tr, #else #define __trace_function trace_function -static int wakeup_set_flag(u32 old_flags, u32 bit, int set) +static int +wakeup_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set) { return -EINVAL; } @@ -343,13 +344,13 @@ static void wakeup_print_header(struct seq_file *s) /* * Should this new latency be reported/recorded? */ -static int report_latency(cycle_t delta) +static int report_latency(struct trace_array *tr, cycle_t delta) { if (tracing_thresh) { if (delta < tracing_thresh) return 0; } else { - if (delta <= tracing_max_latency) + if (delta <= tr->max_latency) return 0; } return 1; @@ -417,11 +418,11 @@ probe_wakeup_sched_switch(void *ignore, T1 = ftrace_now(cpu); delta = T1-T0; - if (!report_latency(delta)) + if (!report_latency(wakeup_trace, delta)) goto out_unlock; if (likely(!is_tracing_stopped())) { - tracing_max_latency = delta; + wakeup_trace->max_latency = delta; update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu); } @@ -437,6 +438,7 @@ static void __wakeup_reset(struct trace_array *tr) { wakeup_cpu = -1; wakeup_prio = -1; + tracing_dl = 0; if (wakeup_task) put_task_struct(wakeup_task); @@ -472,9 +474,17 @@ probe_wakeup(void *ignore, struct task_struct *p, int success) tracing_record_cmdline(p); tracing_record_cmdline(current); - if ((wakeup_rt && !rt_task(p)) || - p->prio >= wakeup_prio || - p->prio >= current->prio) + /* + * Semantic is like this: + * - wakeup tracer handles all tasks in the system, independently + * from their scheduling class; + * - wakeup_rt tracer handles tasks belonging to sched_dl and + * sched_rt class; + * - wakeup_dl handles tasks belonging to sched_dl class only. + */ + if (tracing_dl || (wakeup_dl && !dl_task(p)) || + (wakeup_rt && !dl_task(p) && !rt_task(p)) || + (!dl_task(p) && (p->prio >= wakeup_prio || p->prio >= current->prio))) return; pc = preempt_count(); @@ -486,7 +496,8 @@ probe_wakeup(void *ignore, struct task_struct *p, int success) arch_spin_lock(&wakeup_lock); /* check for races. */ - if (!tracer_enabled || p->prio >= wakeup_prio) + if (!tracer_enabled || tracing_dl || + (!dl_task(p) && p->prio >= wakeup_prio)) goto out_locked; /* reset the trace */ @@ -496,6 +507,15 @@ probe_wakeup(void *ignore, struct task_struct *p, int success) wakeup_current_cpu = wakeup_cpu; wakeup_prio = p->prio; + /* + * Once you start tracing a -deadline task, don't bother tracing + * another task until the first one wakes up. + */ + if (dl_task(p)) + tracing_dl = 1; + else + tracing_dl = 0; + wakeup_task = p; get_task_struct(wakeup_task); @@ -561,7 +581,7 @@ static void start_wakeup_tracer(struct trace_array *tr) */ smp_wmb(); - if (start_func_tracer(is_graph())) + if (start_func_tracer(tr, is_graph())) printk(KERN_ERR "failed to start wakeup tracer\n"); return; @@ -574,13 +594,15 @@ fail_deprobe: static void stop_wakeup_tracer(struct trace_array *tr) { tracer_enabled = 0; - stop_func_tracer(is_graph()); + stop_func_tracer(tr, is_graph()); unregister_trace_sched_switch(probe_wakeup_sched_switch, NULL); unregister_trace_sched_wakeup_new(probe_wakeup, NULL); unregister_trace_sched_wakeup(probe_wakeup, NULL); unregister_trace_sched_migrate_task(probe_wakeup_migrate_task, NULL); } +static bool wakeup_busy; + static int __wakeup_tracer_init(struct trace_array *tr) { save_flags = trace_flags; @@ -589,24 +611,45 @@ static int __wakeup_tracer_init(struct trace_array *tr) set_tracer_flag(tr, TRACE_ITER_OVERWRITE, 1); set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, 1); - tracing_max_latency = 0; + tr->max_latency = 0; wakeup_trace = tr; + ftrace_init_array_ops(tr, wakeup_tracer_call); start_wakeup_tracer(tr); + + wakeup_busy = true; return 0; } static int wakeup_tracer_init(struct trace_array *tr) { + if (wakeup_busy) + return -EBUSY; + + wakeup_dl = 0; wakeup_rt = 0; return __wakeup_tracer_init(tr); } static int wakeup_rt_tracer_init(struct trace_array *tr) { + if (wakeup_busy) + return -EBUSY; + + wakeup_dl = 0; wakeup_rt = 1; return __wakeup_tracer_init(tr); } +static int wakeup_dl_tracer_init(struct trace_array *tr) +{ + if (wakeup_busy) + return -EBUSY; + + wakeup_dl = 1; + wakeup_rt = 0; + return __wakeup_tracer_init(tr); +} + static void wakeup_tracer_reset(struct trace_array *tr) { int lat_flag = save_flags & TRACE_ITER_LATENCY_FMT; @@ -618,6 +661,8 @@ static void wakeup_tracer_reset(struct trace_array *tr) set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, lat_flag); set_tracer_flag(tr, TRACE_ITER_OVERWRITE, overwrite_flag); + ftrace_reset_array_ops(tr); + wakeup_busy = false; } static void wakeup_tracer_start(struct trace_array *tr) @@ -649,6 +694,7 @@ static struct tracer wakeup_tracer __read_mostly = #endif .open = wakeup_trace_open, .close = wakeup_trace_close, + .allow_instances = true, .use_max_tr = true, }; @@ -659,7 +705,28 @@ static struct tracer wakeup_rt_tracer __read_mostly = .reset = wakeup_tracer_reset, .start = wakeup_tracer_start, .stop = wakeup_tracer_stop, - .wait_pipe = poll_wait_pipe, + .print_max = true, + .print_header = wakeup_print_header, + .print_line = wakeup_print_line, + .flags = &tracer_flags, + .set_flag = wakeup_set_flag, + .flag_changed = wakeup_flag_changed, +#ifdef CONFIG_FTRACE_SELFTEST + .selftest = trace_selftest_startup_wakeup, +#endif + .open = wakeup_trace_open, + .close = wakeup_trace_close, + .allow_instances = true, + .use_max_tr = true, +}; + +static struct tracer wakeup_dl_tracer __read_mostly = +{ + .name = "wakeup_dl", + .init = wakeup_dl_tracer_init, + .reset = wakeup_tracer_reset, + .start = wakeup_tracer_start, + .stop = wakeup_tracer_stop, .print_max = true, .print_header = wakeup_print_header, .print_line = wakeup_print_line, @@ -686,6 +753,10 @@ __init static int init_wakeup_tracer(void) if (ret) return ret; + ret = register_tracer(&wakeup_dl_tracer); + if (ret) + return ret; + return 0; } core_initcall(init_wakeup_tracer); diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index a7329b7902f..5ef60499dc8 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -65,7 +65,7 @@ static int trace_test_buffer(struct trace_buffer *buf, unsigned long *count) /* Don't allow flipping of max traces now */ local_irq_save(flags); - arch_spin_lock(&ftrace_max_lock); + arch_spin_lock(&buf->tr->max_lock); cnt = ring_buffer_entries(buf->buffer); @@ -83,7 +83,7 @@ static int trace_test_buffer(struct trace_buffer *buf, unsigned long *count) break; } tracing_on(); - arch_spin_unlock(&ftrace_max_lock); + arch_spin_unlock(&buf->tr->max_lock); local_irq_restore(flags); if (count) @@ -161,11 +161,6 @@ static struct ftrace_ops test_probe3 = { .flags = FTRACE_OPS_FL_RECURSION_SAFE, }; -static struct ftrace_ops test_global = { - .func = trace_selftest_test_global_func, - .flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE, -}; - static void print_counts(void) { printk("(%d %d %d %d %d) ", @@ -185,7 +180,7 @@ static void reset_counts(void) trace_selftest_test_dyn_cnt = 0; } -static int trace_selftest_ops(int cnt) +static int trace_selftest_ops(struct trace_array *tr, int cnt) { int save_ftrace_enabled = ftrace_enabled; struct ftrace_ops *dyn_ops; @@ -220,7 +215,11 @@ static int trace_selftest_ops(int cnt) register_ftrace_function(&test_probe1); register_ftrace_function(&test_probe2); register_ftrace_function(&test_probe3); - register_ftrace_function(&test_global); + /* First time we are running with main function */ + if (cnt > 1) { + ftrace_init_array_ops(tr, trace_selftest_test_global_func); + register_ftrace_function(tr->ops); + } DYN_FTRACE_TEST_NAME(); @@ -232,8 +231,10 @@ static int trace_selftest_ops(int cnt) goto out; if (trace_selftest_test_probe3_cnt != 1) goto out; - if (trace_selftest_test_global_cnt == 0) - goto out; + if (cnt > 1) { + if (trace_selftest_test_global_cnt == 0) + goto out; + } DYN_FTRACE_TEST_NAME2(); @@ -269,8 +270,10 @@ static int trace_selftest_ops(int cnt) goto out_free; if (trace_selftest_test_probe3_cnt != 3) goto out_free; - if (trace_selftest_test_global_cnt == 0) - goto out; + if (cnt > 1) { + if (trace_selftest_test_global_cnt == 0) + goto out; + } if (trace_selftest_test_dyn_cnt == 0) goto out_free; @@ -295,7 +298,9 @@ static int trace_selftest_ops(int cnt) unregister_ftrace_function(&test_probe1); unregister_ftrace_function(&test_probe2); unregister_ftrace_function(&test_probe3); - unregister_ftrace_function(&test_global); + if (cnt > 1) + unregister_ftrace_function(tr->ops); + ftrace_reset_array_ops(tr); /* Make sure everything is off */ reset_counts(); @@ -315,9 +320,9 @@ static int trace_selftest_ops(int cnt) } /* Test dynamic code modification and ftrace filters */ -int trace_selftest_startup_dynamic_tracing(struct tracer *trace, - struct trace_array *tr, - int (*func)(void)) +static int trace_selftest_startup_dynamic_tracing(struct tracer *trace, + struct trace_array *tr, + int (*func)(void)) { int save_ftrace_enabled = ftrace_enabled; unsigned long count; @@ -388,7 +393,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace, } /* Test the ops with global tracing running */ - ret = trace_selftest_ops(1); + ret = trace_selftest_ops(tr, 1); trace->reset(tr); out: @@ -399,7 +404,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace, /* Test the ops with global tracing off */ if (!ret) - ret = trace_selftest_ops(2); + ret = trace_selftest_ops(tr, 2); return ret; } @@ -802,7 +807,7 @@ out: int trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr) { - unsigned long save_max = tracing_max_latency; + unsigned long save_max = tr->max_latency; unsigned long count; int ret; @@ -814,7 +819,7 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr) } /* reset the max latency */ - tracing_max_latency = 0; + tr->max_latency = 0; /* disable interrupts for a bit */ local_irq_disable(); udelay(100); @@ -841,7 +846,7 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr) ret = -1; } - tracing_max_latency = save_max; + tr->max_latency = save_max; return ret; } @@ -851,7 +856,7 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr) int trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr) { - unsigned long save_max = tracing_max_latency; + unsigned long save_max = tr->max_latency; unsigned long count; int ret; @@ -876,7 +881,7 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr) } /* reset the max latency */ - tracing_max_latency = 0; + tr->max_latency = 0; /* disable preemption for a bit */ preempt_disable(); udelay(100); @@ -903,7 +908,7 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr) ret = -1; } - tracing_max_latency = save_max; + tr->max_latency = save_max; return ret; } @@ -913,7 +918,7 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr) int trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *tr) { - unsigned long save_max = tracing_max_latency; + unsigned long save_max = tr->max_latency; unsigned long count; int ret; @@ -938,7 +943,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array * } /* reset the max latency */ - tracing_max_latency = 0; + tr->max_latency = 0; /* disable preemption and interrupts for a bit */ preempt_disable(); @@ -973,7 +978,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array * } /* do the test by disabling interrupts first this time */ - tracing_max_latency = 0; + tr->max_latency = 0; tracing_start(); trace->start(tr); @@ -1004,7 +1009,7 @@ out: tracing_start(); out_no_start: trace->reset(tr); - tracing_max_latency = save_max; + tr->max_latency = save_max; return ret; } @@ -1022,11 +1027,16 @@ trace_selftest_startup_nop(struct tracer *trace, struct trace_array *tr) #ifdef CONFIG_SCHED_TRACER static int trace_wakeup_test_thread(void *data) { - /* Make this a RT thread, doesn't need to be too high */ - static const struct sched_param param = { .sched_priority = 5 }; + /* Make this a -deadline thread */ + static const struct sched_attr attr = { + .sched_policy = SCHED_DEADLINE, + .sched_runtime = 100000ULL, + .sched_deadline = 10000000ULL, + .sched_period = 10000000ULL + }; struct completion *x = data; - sched_setscheduler(current, SCHED_FIFO, ¶m); + sched_setattr(current, &attr); /* Make it know we have a new prio */ complete(x); @@ -1040,8 +1050,8 @@ static int trace_wakeup_test_thread(void *data) /* we are awake, now wait to disappear */ while (!kthread_should_stop()) { /* - * This is an RT task, do short sleeps to let - * others run. + * This will likely be the system top priority + * task, do short sleeps to let others run. */ msleep(100); } @@ -1052,23 +1062,23 @@ static int trace_wakeup_test_thread(void *data) int trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr) { - unsigned long save_max = tracing_max_latency; + unsigned long save_max = tr->max_latency; struct task_struct *p; - struct completion isrt; + struct completion is_ready; unsigned long count; int ret; - init_completion(&isrt); + init_completion(&is_ready); - /* create a high prio thread */ - p = kthread_run(trace_wakeup_test_thread, &isrt, "ftrace-test"); + /* create a -deadline thread */ + p = kthread_run(trace_wakeup_test_thread, &is_ready, "ftrace-test"); if (IS_ERR(p)) { printk(KERN_CONT "Failed to create ftrace wakeup test thread "); return -1; } - /* make sure the thread is running at an RT prio */ - wait_for_completion(&isrt); + /* make sure the thread is running at -deadline policy */ + wait_for_completion(&is_ready); /* start the tracing */ ret = tracer_init(trace, tr); @@ -1078,23 +1088,23 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr) } /* reset the max latency */ - tracing_max_latency = 0; + tr->max_latency = 0; while (p->on_rq) { /* - * Sleep to make sure the RT thread is asleep too. + * Sleep to make sure the -deadline thread is asleep too. * On virtual machines we can't rely on timings, * but we want to make sure this test still works. */ msleep(100); } - init_completion(&isrt); + init_completion(&is_ready); wake_up_process(p); /* Wait for the task to wake up */ - wait_for_completion(&isrt); + wait_for_completion(&is_ready); /* stop the tracing. */ tracing_stop(); @@ -1108,7 +1118,7 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr) trace->reset(tr); tracing_start(); - tracing_max_latency = save_max; + tr->max_latency = save_max; /* kill the thread */ kthread_stop(p); diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index b20428c5efe..8a4e5cb66a4 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -13,6 +13,7 @@ #include <linux/sysctl.h> #include <linux/init.h> #include <linux/fs.h> +#include <linux/magic.h> #include <asm/setup.h> @@ -50,11 +51,33 @@ static DEFINE_MUTEX(stack_sysctl_mutex); int stack_tracer_enabled; static int last_stack_tracer_enabled; +static inline void print_max_stack(void) +{ + long i; + int size; + + pr_emerg(" Depth Size Location (%d entries)\n" + " ----- ---- --------\n", + max_stack_trace.nr_entries - 1); + + for (i = 0; i < max_stack_trace.nr_entries; i++) { + if (stack_dump_trace[i] == ULONG_MAX) + break; + if (i+1 == max_stack_trace.nr_entries || + stack_dump_trace[i+1] == ULONG_MAX) + size = stack_dump_index[i]; + else + size = stack_dump_index[i] - stack_dump_index[i+1]; + + pr_emerg("%3ld) %8d %5d %pS\n", i, stack_dump_index[i], + size, (void *)stack_dump_trace[i]); + } +} + static inline void check_stack(unsigned long ip, unsigned long *stack) { - unsigned long this_size, flags; - unsigned long *p, *top, *start; + unsigned long this_size, flags; unsigned long *p, *top, *start; static int tracer_frame; int frame_size = ACCESS_ONCE(tracer_frame); int i; @@ -84,8 +107,12 @@ check_stack(unsigned long ip, unsigned long *stack) max_stack_size = this_size; - max_stack_trace.nr_entries = 0; - max_stack_trace.skip = 3; + max_stack_trace.nr_entries = 0; + + if (using_ftrace_ops_list_func()) + max_stack_trace.skip = 4; + else + max_stack_trace.skip = 3; save_stack_trace(&max_stack_trace); @@ -144,6 +171,12 @@ check_stack(unsigned long ip, unsigned long *stack) i++; } + if ((current != &init_task && + *(end_of_stack(current)) != STACK_END_MAGIC)) { + print_max_stack(); + BUG(); + } + out: arch_spin_unlock(&max_stack_lock); local_irq_restore(flags); @@ -382,7 +415,7 @@ static const struct file_operations stack_trace_filter_fops = { .open = stack_trace_filter_open, .read = seq_read, .write = ftrace_filter_write, - .llseek = ftrace_filter_lseek, + .llseek = tracing_lseek, .release = ftrace_regex_release, }; diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c index 847f88a6194..7af67360b33 100644 --- a/kernel/trace/trace_stat.c +++ b/kernel/trace/trace_stat.c @@ -43,46 +43,15 @@ static DEFINE_MUTEX(all_stat_sessions_mutex); /* The root directory for all stat files */ static struct dentry *stat_dir; -/* - * Iterate through the rbtree using a post order traversal path - * to release the next node. - * It won't necessary release one at each iteration - * but it will at least advance closer to the next one - * to be released. - */ -static struct rb_node *release_next(struct tracer_stat *ts, - struct rb_node *node) +static void __reset_stat_session(struct stat_session *session) { - struct stat_node *snode; - struct rb_node *parent = rb_parent(node); - - if (node->rb_left) - return node->rb_left; - else if (node->rb_right) - return node->rb_right; - else { - if (!parent) - ; - else if (parent->rb_left == node) - parent->rb_left = NULL; - else - parent->rb_right = NULL; + struct stat_node *snode, *n; - snode = container_of(node, struct stat_node, node); - if (ts->stat_release) - ts->stat_release(snode->stat); + rbtree_postorder_for_each_entry_safe(snode, n, &session->stat_root, node) { + if (session->ts->stat_release) + session->ts->stat_release(snode->stat); kfree(snode); - - return parent; } -} - -static void __reset_stat_session(struct stat_session *session) -{ - struct rb_node *node = session->stat_root.rb_node; - - while (node) - node = release_next(session->ts, node); session->stat_root = RB_ROOT; } diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 559329d9bd2..759d5e00451 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -302,6 +302,7 @@ static int __init syscall_exit_define_fields(struct ftrace_event_call *call) static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) { struct trace_array *tr = data; + struct ftrace_event_file *ftrace_file; struct syscall_trace_enter *entry; struct syscall_metadata *sys_data; struct ring_buffer_event *event; @@ -314,7 +315,13 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) syscall_nr = trace_get_syscall_nr(current, regs); if (syscall_nr < 0) return; - if (!test_bit(syscall_nr, tr->enabled_enter_syscalls)) + + /* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE) */ + ftrace_file = rcu_dereference_sched(tr->enter_syscall_files[syscall_nr]); + if (!ftrace_file) + return; + + if (ftrace_trigger_soft_disabled(ftrace_file)) return; sys_data = syscall_nr_to_meta(syscall_nr); @@ -336,15 +343,14 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) entry->nr = syscall_nr; syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args); - if (!filter_current_check_discard(buffer, sys_data->enter_event, - entry, event)) - trace_current_buffer_unlock_commit(buffer, event, - irq_flags, pc); + event_trigger_unlock_commit(ftrace_file, buffer, event, entry, + irq_flags, pc); } static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) { struct trace_array *tr = data; + struct ftrace_event_file *ftrace_file; struct syscall_trace_exit *entry; struct syscall_metadata *sys_data; struct ring_buffer_event *event; @@ -356,7 +362,13 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) syscall_nr = trace_get_syscall_nr(current, regs); if (syscall_nr < 0) return; - if (!test_bit(syscall_nr, tr->enabled_exit_syscalls)) + + /* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE()) */ + ftrace_file = rcu_dereference_sched(tr->exit_syscall_files[syscall_nr]); + if (!ftrace_file) + return; + + if (ftrace_trigger_soft_disabled(ftrace_file)) return; sys_data = syscall_nr_to_meta(syscall_nr); @@ -377,10 +389,8 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) entry->nr = syscall_nr; entry->ret = syscall_get_return_value(current, regs); - if (!filter_current_check_discard(buffer, sys_data->exit_event, - entry, event)) - trace_current_buffer_unlock_commit(buffer, event, - irq_flags, pc); + event_trigger_unlock_commit(ftrace_file, buffer, event, entry, + irq_flags, pc); } static int reg_event_syscall_enter(struct ftrace_event_file *file, @@ -397,7 +407,7 @@ static int reg_event_syscall_enter(struct ftrace_event_file *file, if (!tr->sys_refcount_enter) ret = register_trace_sys_enter(ftrace_syscall_enter, tr); if (!ret) { - set_bit(num, tr->enabled_enter_syscalls); + rcu_assign_pointer(tr->enter_syscall_files[num], file); tr->sys_refcount_enter++; } mutex_unlock(&syscall_trace_lock); @@ -415,7 +425,7 @@ static void unreg_event_syscall_enter(struct ftrace_event_file *file, return; mutex_lock(&syscall_trace_lock); tr->sys_refcount_enter--; - clear_bit(num, tr->enabled_enter_syscalls); + rcu_assign_pointer(tr->enter_syscall_files[num], NULL); if (!tr->sys_refcount_enter) unregister_trace_sys_enter(ftrace_syscall_enter, tr); mutex_unlock(&syscall_trace_lock); @@ -435,7 +445,7 @@ static int reg_event_syscall_exit(struct ftrace_event_file *file, if (!tr->sys_refcount_exit) ret = register_trace_sys_exit(ftrace_syscall_exit, tr); if (!ret) { - set_bit(num, tr->enabled_exit_syscalls); + rcu_assign_pointer(tr->exit_syscall_files[num], file); tr->sys_refcount_exit++; } mutex_unlock(&syscall_trace_lock); @@ -453,7 +463,7 @@ static void unreg_event_syscall_exit(struct ftrace_event_file *file, return; mutex_lock(&syscall_trace_lock); tr->sys_refcount_exit--; - clear_bit(num, tr->enabled_exit_syscalls); + rcu_assign_pointer(tr->exit_syscall_files[num], NULL); if (!tr->sys_refcount_exit) unregister_trace_sys_exit(ftrace_syscall_exit, tr); mutex_unlock(&syscall_trace_lock); diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 272261b5f94..3c9b97e6b1f 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -51,22 +51,17 @@ struct trace_uprobe_filter { */ struct trace_uprobe { struct list_head list; - struct ftrace_event_class class; - struct ftrace_event_call call; struct trace_uprobe_filter filter; struct uprobe_consumer consumer; struct inode *inode; char *filename; unsigned long offset; unsigned long nhit; - unsigned int flags; /* For TP_FLAG_* */ - ssize_t size; /* trace entry size */ - unsigned int nr_args; - struct probe_arg args[]; + struct trace_probe tp; }; -#define SIZEOF_TRACE_UPROBE(n) \ - (offsetof(struct trace_uprobe, args) + \ +#define SIZEOF_TRACE_UPROBE(n) \ + (offsetof(struct trace_uprobe, tp.args) + \ (sizeof(struct probe_arg) * (n))) static int register_uprobe_event(struct trace_uprobe *tu); @@ -75,10 +70,151 @@ static int unregister_uprobe_event(struct trace_uprobe *tu); static DEFINE_MUTEX(uprobe_lock); static LIST_HEAD(uprobe_list); +struct uprobe_dispatch_data { + struct trace_uprobe *tu; + unsigned long bp_addr; +}; + static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs); static int uretprobe_dispatcher(struct uprobe_consumer *con, unsigned long func, struct pt_regs *regs); +#ifdef CONFIG_STACK_GROWSUP +static unsigned long adjust_stack_addr(unsigned long addr, unsigned int n) +{ + return addr - (n * sizeof(long)); +} +#else +static unsigned long adjust_stack_addr(unsigned long addr, unsigned int n) +{ + return addr + (n * sizeof(long)); +} +#endif + +static unsigned long get_user_stack_nth(struct pt_regs *regs, unsigned int n) +{ + unsigned long ret; + unsigned long addr = user_stack_pointer(regs); + + addr = adjust_stack_addr(addr, n); + + if (copy_from_user(&ret, (void __force __user *) addr, sizeof(ret))) + return 0; + + return ret; +} + +/* + * Uprobes-specific fetch functions + */ +#define DEFINE_FETCH_stack(type) \ +static void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs, \ + void *offset, void *dest) \ +{ \ + *(type *)dest = (type)get_user_stack_nth(regs, \ + ((unsigned long)offset)); \ +} +DEFINE_BASIC_FETCH_FUNCS(stack) +/* No string on the stack entry */ +#define fetch_stack_string NULL +#define fetch_stack_string_size NULL + +#define DEFINE_FETCH_memory(type) \ +static void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs, \ + void *addr, void *dest) \ +{ \ + type retval; \ + void __user *vaddr = (void __force __user *) addr; \ + \ + if (copy_from_user(&retval, vaddr, sizeof(type))) \ + *(type *)dest = 0; \ + else \ + *(type *) dest = retval; \ +} +DEFINE_BASIC_FETCH_FUNCS(memory) +/* + * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max + * length and relative data location. + */ +static void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs, + void *addr, void *dest) +{ + long ret; + u32 rloc = *(u32 *)dest; + int maxlen = get_rloc_len(rloc); + u8 *dst = get_rloc_data(dest); + void __user *src = (void __force __user *) addr; + + if (!maxlen) + return; + + ret = strncpy_from_user(dst, src, maxlen); + + if (ret < 0) { /* Failed to fetch string */ + ((u8 *)get_rloc_data(dest))[0] = '\0'; + *(u32 *)dest = make_data_rloc(0, get_rloc_offs(rloc)); + } else { + *(u32 *)dest = make_data_rloc(ret, get_rloc_offs(rloc)); + } +} + +static void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs, + void *addr, void *dest) +{ + int len; + void __user *vaddr = (void __force __user *) addr; + + len = strnlen_user(vaddr, MAX_STRING_SIZE); + + if (len == 0 || len > MAX_STRING_SIZE) /* Failed to check length */ + *(u32 *)dest = 0; + else + *(u32 *)dest = len; +} + +static unsigned long translate_user_vaddr(void *file_offset) +{ + unsigned long base_addr; + struct uprobe_dispatch_data *udd; + + udd = (void *) current->utask->vaddr; + + base_addr = udd->bp_addr - udd->tu->offset; + return base_addr + (unsigned long)file_offset; +} + +#define DEFINE_FETCH_file_offset(type) \ +static void FETCH_FUNC_NAME(file_offset, type)(struct pt_regs *regs, \ + void *offset, void *dest)\ +{ \ + void *vaddr = (void *)translate_user_vaddr(offset); \ + \ + FETCH_FUNC_NAME(memory, type)(regs, vaddr, dest); \ +} +DEFINE_BASIC_FETCH_FUNCS(file_offset) +DEFINE_FETCH_file_offset(string) +DEFINE_FETCH_file_offset(string_size) + +/* Fetch type information table */ +const struct fetch_type uprobes_fetch_type_table[] = { + /* Special types */ + [FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string, + sizeof(u32), 1, "__data_loc char[]"), + [FETCH_TYPE_STRSIZE] = __ASSIGN_FETCH_TYPE("string_size", u32, + string_size, sizeof(u32), 0, "u32"), + /* Basic types */ + ASSIGN_FETCH_TYPE(u8, u8, 0), + ASSIGN_FETCH_TYPE(u16, u16, 0), + ASSIGN_FETCH_TYPE(u32, u32, 0), + ASSIGN_FETCH_TYPE(u64, u64, 0), + ASSIGN_FETCH_TYPE(s8, u8, 1), + ASSIGN_FETCH_TYPE(s16, u16, 1), + ASSIGN_FETCH_TYPE(s32, u32, 1), + ASSIGN_FETCH_TYPE(s64, u64, 1), + + ASSIGN_FETCH_TYPE_END +}; + static inline void init_trace_uprobe_filter(struct trace_uprobe_filter *filter) { rwlock_init(&filter->rwlock); @@ -114,24 +250,26 @@ alloc_trace_uprobe(const char *group, const char *event, int nargs, bool is_ret) if (!tu) return ERR_PTR(-ENOMEM); - tu->call.class = &tu->class; - tu->call.name = kstrdup(event, GFP_KERNEL); - if (!tu->call.name) + tu->tp.call.class = &tu->tp.class; + tu->tp.call.name = kstrdup(event, GFP_KERNEL); + if (!tu->tp.call.name) goto error; - tu->class.system = kstrdup(group, GFP_KERNEL); - if (!tu->class.system) + tu->tp.class.system = kstrdup(group, GFP_KERNEL); + if (!tu->tp.class.system) goto error; INIT_LIST_HEAD(&tu->list); + INIT_LIST_HEAD(&tu->tp.files); tu->consumer.handler = uprobe_dispatcher; if (is_ret) tu->consumer.ret_handler = uretprobe_dispatcher; init_trace_uprobe_filter(&tu->filter); + tu->tp.call.flags |= TRACE_EVENT_FL_USE_CALL_FILTER; return tu; error: - kfree(tu->call.name); + kfree(tu->tp.call.name); kfree(tu); return ERR_PTR(-ENOMEM); @@ -141,12 +279,12 @@ static void free_trace_uprobe(struct trace_uprobe *tu) { int i; - for (i = 0; i < tu->nr_args; i++) - traceprobe_free_probe_arg(&tu->args[i]); + for (i = 0; i < tu->tp.nr_args; i++) + traceprobe_free_probe_arg(&tu->tp.args[i]); iput(tu->inode); - kfree(tu->call.class->system); - kfree(tu->call.name); + kfree(tu->tp.call.class->system); + kfree(tu->tp.call.name); kfree(tu->filename); kfree(tu); } @@ -156,8 +294,8 @@ static struct trace_uprobe *find_probe_event(const char *event, const char *grou struct trace_uprobe *tu; list_for_each_entry(tu, &uprobe_list, list) - if (strcmp(tu->call.name, event) == 0 && - strcmp(tu->call.class->system, group) == 0) + if (strcmp(ftrace_event_name(&tu->tp.call), event) == 0 && + strcmp(tu->tp.call.class->system, group) == 0) return tu; return NULL; @@ -180,16 +318,17 @@ static int unregister_trace_uprobe(struct trace_uprobe *tu) /* Register a trace_uprobe and probe_event */ static int register_trace_uprobe(struct trace_uprobe *tu) { - struct trace_uprobe *old_tp; + struct trace_uprobe *old_tu; int ret; mutex_lock(&uprobe_lock); /* register as an event */ - old_tp = find_probe_event(tu->call.name, tu->call.class->system); - if (old_tp) { + old_tu = find_probe_event(ftrace_event_name(&tu->tp.call), + tu->tp.call.class->system); + if (old_tu) { /* delete old event */ - ret = unregister_trace_uprobe(old_tp); + ret = unregister_trace_uprobe(old_tu); if (ret) goto end; } @@ -210,7 +349,7 @@ end: /* * Argument syntax: - * - Add uprobe: p|r[:[GRP/]EVENT] PATH:SYMBOL [FETCHARGS] + * - Add uprobe: p|r[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] * * - Remove uprobe: -:[GRP/]EVENT */ @@ -359,34 +498,36 @@ static int create_trace_uprobe(int argc, char **argv) /* parse arguments */ ret = 0; for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) { + struct probe_arg *parg = &tu->tp.args[i]; + /* Increment count for freeing args in error case */ - tu->nr_args++; + tu->tp.nr_args++; /* Parse argument name */ arg = strchr(argv[i], '='); if (arg) { *arg++ = '\0'; - tu->args[i].name = kstrdup(argv[i], GFP_KERNEL); + parg->name = kstrdup(argv[i], GFP_KERNEL); } else { arg = argv[i]; /* If argument name is omitted, set "argN" */ snprintf(buf, MAX_EVENT_NAME_LEN, "arg%d", i + 1); - tu->args[i].name = kstrdup(buf, GFP_KERNEL); + parg->name = kstrdup(buf, GFP_KERNEL); } - if (!tu->args[i].name) { + if (!parg->name) { pr_info("Failed to allocate argument[%d] name.\n", i); ret = -ENOMEM; goto error; } - if (!is_good_name(tu->args[i].name)) { - pr_info("Invalid argument[%d] name: %s\n", i, tu->args[i].name); + if (!is_good_name(parg->name)) { + pr_info("Invalid argument[%d] name: %s\n", i, parg->name); ret = -EINVAL; goto error; } - if (traceprobe_conflict_field_name(tu->args[i].name, tu->args, i)) { + if (traceprobe_conflict_field_name(parg->name, tu->tp.args, i)) { pr_info("Argument[%d] name '%s' conflicts with " "another field.\n", i, argv[i]); ret = -EINVAL; @@ -394,7 +535,8 @@ static int create_trace_uprobe(int argc, char **argv) } /* Parse fetch argument */ - ret = traceprobe_parse_probe_arg(arg, &tu->size, &tu->args[i], false, false); + ret = traceprobe_parse_probe_arg(arg, &tu->tp.size, parg, + is_return, false); if (ret) { pr_info("Parse error at argument[%d]. (%d)\n", i, ret); goto error; @@ -458,11 +600,12 @@ static int probes_seq_show(struct seq_file *m, void *v) char c = is_ret_probe(tu) ? 'r' : 'p'; int i; - seq_printf(m, "%c:%s/%s", c, tu->call.class->system, tu->call.name); + seq_printf(m, "%c:%s/%s", c, tu->tp.call.class->system, + ftrace_event_name(&tu->tp.call)); seq_printf(m, " %s:0x%p", tu->filename, (void *)tu->offset); - for (i = 0; i < tu->nr_args; i++) - seq_printf(m, " %s=%s", tu->args[i].name, tu->args[i].comm); + for (i = 0; i < tu->tp.nr_args; i++) + seq_printf(m, " %s=%s", tu->tp.args[i].name, tu->tp.args[i].comm); seq_printf(m, "\n"); return 0; @@ -508,7 +651,8 @@ static int probes_profile_seq_show(struct seq_file *m, void *v) { struct trace_uprobe *tu = v; - seq_printf(m, " %s %-44s %15lu\n", tu->filename, tu->call.name, tu->nhit); + seq_printf(m, " %s %-44s %15lu\n", tu->filename, + ftrace_event_name(&tu->tp.call), tu->nhit); return 0; } @@ -532,19 +676,122 @@ static const struct file_operations uprobe_profile_ops = { .release = seq_release, }; -static void uprobe_trace_print(struct trace_uprobe *tu, - unsigned long func, struct pt_regs *regs) +struct uprobe_cpu_buffer { + struct mutex mutex; + void *buf; +}; +static struct uprobe_cpu_buffer __percpu *uprobe_cpu_buffer; +static int uprobe_buffer_refcnt; + +static int uprobe_buffer_init(void) +{ + int cpu, err_cpu; + + uprobe_cpu_buffer = alloc_percpu(struct uprobe_cpu_buffer); + if (uprobe_cpu_buffer == NULL) + return -ENOMEM; + + for_each_possible_cpu(cpu) { + struct page *p = alloc_pages_node(cpu_to_node(cpu), + GFP_KERNEL, 0); + if (p == NULL) { + err_cpu = cpu; + goto err; + } + per_cpu_ptr(uprobe_cpu_buffer, cpu)->buf = page_address(p); + mutex_init(&per_cpu_ptr(uprobe_cpu_buffer, cpu)->mutex); + } + + return 0; + +err: + for_each_possible_cpu(cpu) { + if (cpu == err_cpu) + break; + free_page((unsigned long)per_cpu_ptr(uprobe_cpu_buffer, cpu)->buf); + } + + free_percpu(uprobe_cpu_buffer); + return -ENOMEM; +} + +static int uprobe_buffer_enable(void) +{ + int ret = 0; + + BUG_ON(!mutex_is_locked(&event_mutex)); + + if (uprobe_buffer_refcnt++ == 0) { + ret = uprobe_buffer_init(); + if (ret < 0) + uprobe_buffer_refcnt--; + } + + return ret; +} + +static void uprobe_buffer_disable(void) +{ + int cpu; + + BUG_ON(!mutex_is_locked(&event_mutex)); + + if (--uprobe_buffer_refcnt == 0) { + for_each_possible_cpu(cpu) + free_page((unsigned long)per_cpu_ptr(uprobe_cpu_buffer, + cpu)->buf); + + free_percpu(uprobe_cpu_buffer); + uprobe_cpu_buffer = NULL; + } +} + +static struct uprobe_cpu_buffer *uprobe_buffer_get(void) +{ + struct uprobe_cpu_buffer *ucb; + int cpu; + + cpu = raw_smp_processor_id(); + ucb = per_cpu_ptr(uprobe_cpu_buffer, cpu); + + /* + * Use per-cpu buffers for fastest access, but we might migrate + * so the mutex makes sure we have sole access to it. + */ + mutex_lock(&ucb->mutex); + + return ucb; +} + +static void uprobe_buffer_put(struct uprobe_cpu_buffer *ucb) +{ + mutex_unlock(&ucb->mutex); +} + +static void __uprobe_trace_func(struct trace_uprobe *tu, + unsigned long func, struct pt_regs *regs, + struct uprobe_cpu_buffer *ucb, int dsize, + struct ftrace_event_file *ftrace_file) { struct uprobe_trace_entry_head *entry; struct ring_buffer_event *event; struct ring_buffer *buffer; void *data; - int size, i; - struct ftrace_event_call *call = &tu->call; + int size, esize; + struct ftrace_event_call *call = &tu->tp.call; + + WARN_ON(call != ftrace_file->event_call); + + if (WARN_ON_ONCE(tu->tp.size + dsize > PAGE_SIZE)) + return; - size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); - event = trace_current_buffer_lock_reserve(&buffer, call->event.type, - size + tu->size, 0, 0); + if (ftrace_trigger_soft_disabled(ftrace_file)) + return; + + esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); + size = esize + tu->tp.size + dsize; + event = trace_event_buffer_lock_reserve(&buffer, ftrace_file, + call->event.type, size, 0, 0); if (!event) return; @@ -558,25 +805,38 @@ static void uprobe_trace_print(struct trace_uprobe *tu, data = DATAOF_TRACE_ENTRY(entry, false); } - for (i = 0; i < tu->nr_args; i++) - call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); + memcpy(data, ucb->buf, tu->tp.size + dsize); - if (!filter_current_check_discard(buffer, call, entry, event)) - trace_buffer_unlock_commit(buffer, event, 0, 0); + event_trigger_unlock_commit(ftrace_file, buffer, event, entry, 0, 0); } /* uprobe handler */ -static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs) +static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs, + struct uprobe_cpu_buffer *ucb, int dsize) { - if (!is_ret_probe(tu)) - uprobe_trace_print(tu, 0, regs); + struct event_file_link *link; + + if (is_ret_probe(tu)) + return 0; + + rcu_read_lock(); + list_for_each_entry_rcu(link, &tu->tp.files, list) + __uprobe_trace_func(tu, 0, regs, ucb, dsize, link->file); + rcu_read_unlock(); + return 0; } static void uretprobe_trace_func(struct trace_uprobe *tu, unsigned long func, - struct pt_regs *regs) + struct pt_regs *regs, + struct uprobe_cpu_buffer *ucb, int dsize) { - uprobe_trace_print(tu, func, regs); + struct event_file_link *link; + + rcu_read_lock(); + list_for_each_entry_rcu(link, &tu->tp.files, list) + __uprobe_trace_func(tu, func, regs, ucb, dsize, link->file); + rcu_read_unlock(); } /* Event entry printers */ @@ -590,23 +850,26 @@ print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *e int i; entry = (struct uprobe_trace_entry_head *)iter->ent; - tu = container_of(event, struct trace_uprobe, call.event); + tu = container_of(event, struct trace_uprobe, tp.call.event); if (is_ret_probe(tu)) { - if (!trace_seq_printf(s, "%s: (0x%lx <- 0x%lx)", tu->call.name, + if (!trace_seq_printf(s, "%s: (0x%lx <- 0x%lx)", + ftrace_event_name(&tu->tp.call), entry->vaddr[1], entry->vaddr[0])) goto partial; data = DATAOF_TRACE_ENTRY(entry, true); } else { - if (!trace_seq_printf(s, "%s: (0x%lx)", tu->call.name, + if (!trace_seq_printf(s, "%s: (0x%lx)", + ftrace_event_name(&tu->tp.call), entry->vaddr[0])) goto partial; data = DATAOF_TRACE_ENTRY(entry, false); } - for (i = 0; i < tu->nr_args; i++) { - if (!tu->args[i].type->print(s, tu->args[i].name, - data + tu->args[i].offset, entry)) + for (i = 0; i < tu->tp.nr_args; i++) { + struct probe_arg *parg = &tu->tp.args[i]; + + if (!parg->type->print(s, parg->name, data + parg->offset, entry)) goto partial; } @@ -617,43 +880,95 @@ partial: return TRACE_TYPE_PARTIAL_LINE; } -static inline bool is_trace_uprobe_enabled(struct trace_uprobe *tu) -{ - return tu->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE); -} - typedef bool (*filter_func_t)(struct uprobe_consumer *self, enum uprobe_filter_ctx ctx, struct mm_struct *mm); static int -probe_event_enable(struct trace_uprobe *tu, int flag, filter_func_t filter) +probe_event_enable(struct trace_uprobe *tu, struct ftrace_event_file *file, + filter_func_t filter) { - int ret = 0; + bool enabled = trace_probe_is_enabled(&tu->tp); + struct event_file_link *link = NULL; + int ret; + + if (file) { + if (tu->tp.flags & TP_FLAG_PROFILE) + return -EINTR; + + link = kmalloc(sizeof(*link), GFP_KERNEL); + if (!link) + return -ENOMEM; + + link->file = file; + list_add_tail_rcu(&link->list, &tu->tp.files); - if (is_trace_uprobe_enabled(tu)) - return -EINTR; + tu->tp.flags |= TP_FLAG_TRACE; + } else { + if (tu->tp.flags & TP_FLAG_TRACE) + return -EINTR; + + tu->tp.flags |= TP_FLAG_PROFILE; + } WARN_ON(!uprobe_filter_is_empty(&tu->filter)); - tu->flags |= flag; + if (enabled) + return 0; + + ret = uprobe_buffer_enable(); + if (ret) + goto err_flags; + tu->consumer.filter = filter; ret = uprobe_register(tu->inode, tu->offset, &tu->consumer); if (ret) - tu->flags &= ~flag; + goto err_buffer; + + return 0; + err_buffer: + uprobe_buffer_disable(); + + err_flags: + if (file) { + list_del(&link->list); + kfree(link); + tu->tp.flags &= ~TP_FLAG_TRACE; + } else { + tu->tp.flags &= ~TP_FLAG_PROFILE; + } return ret; } -static void probe_event_disable(struct trace_uprobe *tu, int flag) +static void +probe_event_disable(struct trace_uprobe *tu, struct ftrace_event_file *file) { - if (!is_trace_uprobe_enabled(tu)) + if (!trace_probe_is_enabled(&tu->tp)) return; + if (file) { + struct event_file_link *link; + + link = find_event_file_link(&tu->tp, file); + if (!link) + return; + + list_del_rcu(&link->list); + /* synchronize with u{,ret}probe_trace_func */ + synchronize_sched(); + kfree(link); + + if (!list_empty(&tu->tp.files)) + return; + } + WARN_ON(!uprobe_filter_is_empty(&tu->filter)); uprobe_unregister(tu->inode, tu->offset, &tu->consumer); - tu->flags &= ~flag; + tu->tp.flags &= file ? ~TP_FLAG_TRACE : ~TP_FLAG_PROFILE; + + uprobe_buffer_disable(); } static int uprobe_event_define_fields(struct ftrace_event_call *event_call) @@ -671,12 +986,12 @@ static int uprobe_event_define_fields(struct ftrace_event_call *event_call) size = SIZEOF_TRACE_ENTRY(false); } /* Set argument names as fields */ - for (i = 0; i < tu->nr_args; i++) { - ret = trace_define_field(event_call, tu->args[i].type->fmttype, - tu->args[i].name, - size + tu->args[i].offset, - tu->args[i].type->size, - tu->args[i].type->is_signed, + for (i = 0; i < tu->tp.nr_args; i++) { + struct probe_arg *parg = &tu->tp.args[i]; + + ret = trace_define_field(event_call, parg->type->fmttype, + parg->name, size + parg->offset, + parg->type->size, parg->type->is_signed, FILTER_OTHER); if (ret) @@ -685,59 +1000,6 @@ static int uprobe_event_define_fields(struct ftrace_event_call *event_call) return 0; } -#define LEN_OR_ZERO (len ? len - pos : 0) -static int __set_print_fmt(struct trace_uprobe *tu, char *buf, int len) -{ - const char *fmt, *arg; - int i; - int pos = 0; - - if (is_ret_probe(tu)) { - fmt = "(%lx <- %lx)"; - arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP; - } else { - fmt = "(%lx)"; - arg = "REC->" FIELD_STRING_IP; - } - - /* When len=0, we just calculate the needed length */ - - pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt); - - for (i = 0; i < tu->nr_args; i++) { - pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%s", - tu->args[i].name, tu->args[i].type->fmt); - } - - pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg); - - for (i = 0; i < tu->nr_args; i++) { - pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s", - tu->args[i].name); - } - - return pos; /* return the length of print_fmt */ -} -#undef LEN_OR_ZERO - -static int set_print_fmt(struct trace_uprobe *tu) -{ - char *print_fmt; - int len; - - /* First: called with 0 length to calculate the needed length */ - len = __set_print_fmt(tu, NULL, 0); - print_fmt = kmalloc(len + 1, GFP_KERNEL); - if (!print_fmt) - return -ENOMEM; - - /* Second: actually write the @print_fmt */ - __set_print_fmt(tu, print_fmt, len + 1); - tu->call.print_fmt = print_fmt; - - return 0; -} - #ifdef CONFIG_PERF_EVENTS static bool __uprobe_perf_filter(struct trace_uprobe_filter *filter, struct mm_struct *mm) @@ -761,56 +1023,60 @@ uprobe_filter_event(struct trace_uprobe *tu, struct perf_event *event) return __uprobe_perf_filter(&tu->filter, event->hw.tp_target->mm); } -static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event) +static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event) { bool done; write_lock(&tu->filter.rwlock); if (event->hw.tp_target) { - /* - * event->parent != NULL means copy_process(), we can avoid - * uprobe_apply(). current->mm must be probed and we can rely - * on dup_mmap() which preserves the already installed bp's. - * - * attr.enable_on_exec means that exec/mmap will install the - * breakpoints we need. - */ + list_del(&event->hw.tp_list); done = tu->filter.nr_systemwide || - event->parent || event->attr.enable_on_exec || + (event->hw.tp_target->flags & PF_EXITING) || uprobe_filter_event(tu, event); - list_add(&event->hw.tp_list, &tu->filter.perf_events); } else { + tu->filter.nr_systemwide--; done = tu->filter.nr_systemwide; - tu->filter.nr_systemwide++; } write_unlock(&tu->filter.rwlock); if (!done) - uprobe_apply(tu->inode, tu->offset, &tu->consumer, true); + return uprobe_apply(tu->inode, tu->offset, &tu->consumer, false); return 0; } -static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event) +static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event) { bool done; + int err; write_lock(&tu->filter.rwlock); if (event->hw.tp_target) { - list_del(&event->hw.tp_list); + /* + * event->parent != NULL means copy_process(), we can avoid + * uprobe_apply(). current->mm must be probed and we can rely + * on dup_mmap() which preserves the already installed bp's. + * + * attr.enable_on_exec means that exec/mmap will install the + * breakpoints we need. + */ done = tu->filter.nr_systemwide || - (event->hw.tp_target->flags & PF_EXITING) || + event->parent || event->attr.enable_on_exec || uprobe_filter_event(tu, event); + list_add(&event->hw.tp_list, &tu->filter.perf_events); } else { - tu->filter.nr_systemwide--; done = tu->filter.nr_systemwide; + tu->filter.nr_systemwide++; } write_unlock(&tu->filter.rwlock); - if (!done) - uprobe_apply(tu->inode, tu->offset, &tu->consumer, false); - - return 0; + err = 0; + if (!done) { + err = uprobe_apply(tu->inode, tu->offset, &tu->consumer, true); + if (err) + uprobe_perf_close(tu, event); + } + return err; } static bool uprobe_perf_filter(struct uprobe_consumer *uc, @@ -827,17 +1093,23 @@ static bool uprobe_perf_filter(struct uprobe_consumer *uc, return ret; } -static void uprobe_perf_print(struct trace_uprobe *tu, - unsigned long func, struct pt_regs *regs) +static void __uprobe_perf_func(struct trace_uprobe *tu, + unsigned long func, struct pt_regs *regs, + struct uprobe_cpu_buffer *ucb, int dsize) { - struct ftrace_event_call *call = &tu->call; + struct ftrace_event_call *call = &tu->tp.call; struct uprobe_trace_entry_head *entry; struct hlist_head *head; void *data; - int size, rctx, i; + int size, esize; + int rctx; - size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); - size = ALIGN(size + tu->size + sizeof(u32), sizeof(u64)) - sizeof(u32); + esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); + + size = esize + tu->tp.size + dsize; + size = ALIGN(size + sizeof(u32), sizeof(u64)) - sizeof(u32); + if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough")) + return; preempt_disable(); head = this_cpu_ptr(call->perf_events); @@ -857,8 +1129,13 @@ static void uprobe_perf_print(struct trace_uprobe *tu, data = DATAOF_TRACE_ENTRY(entry, false); } - for (i = 0; i < tu->nr_args; i++) - call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); + memcpy(data, ucb->buf, tu->tp.size + dsize); + + if (size - esize > tu->tp.size + dsize) { + int len = tu->tp.size + dsize; + + memset(data + len, 0, size - esize - len); + } perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL); out: @@ -866,42 +1143,46 @@ static void uprobe_perf_print(struct trace_uprobe *tu, } /* uprobe profile handler */ -static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs) +static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs, + struct uprobe_cpu_buffer *ucb, int dsize) { if (!uprobe_perf_filter(&tu->consumer, 0, current->mm)) return UPROBE_HANDLER_REMOVE; if (!is_ret_probe(tu)) - uprobe_perf_print(tu, 0, regs); + __uprobe_perf_func(tu, 0, regs, ucb, dsize); return 0; } static void uretprobe_perf_func(struct trace_uprobe *tu, unsigned long func, - struct pt_regs *regs) + struct pt_regs *regs, + struct uprobe_cpu_buffer *ucb, int dsize) { - uprobe_perf_print(tu, func, regs); + __uprobe_perf_func(tu, func, regs, ucb, dsize); } #endif /* CONFIG_PERF_EVENTS */ -static -int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type, void *data) +static int +trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type, + void *data) { struct trace_uprobe *tu = event->data; + struct ftrace_event_file *file = data; switch (type) { case TRACE_REG_REGISTER: - return probe_event_enable(tu, TP_FLAG_TRACE, NULL); + return probe_event_enable(tu, file, NULL); case TRACE_REG_UNREGISTER: - probe_event_disable(tu, TP_FLAG_TRACE); + probe_event_disable(tu, file); return 0; #ifdef CONFIG_PERF_EVENTS case TRACE_REG_PERF_REGISTER: - return probe_event_enable(tu, TP_FLAG_PROFILE, uprobe_perf_filter); + return probe_event_enable(tu, NULL, uprobe_perf_filter); case TRACE_REG_PERF_UNREGISTER: - probe_event_disable(tu, TP_FLAG_PROFILE); + probe_event_disable(tu, NULL); return 0; case TRACE_REG_PERF_OPEN: @@ -920,18 +1201,37 @@ int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type, static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs) { struct trace_uprobe *tu; + struct uprobe_dispatch_data udd; + struct uprobe_cpu_buffer *ucb; + int dsize, esize; int ret = 0; + tu = container_of(con, struct trace_uprobe, consumer); tu->nhit++; - if (tu->flags & TP_FLAG_TRACE) - ret |= uprobe_trace_func(tu, regs); + udd.tu = tu; + udd.bp_addr = instruction_pointer(regs); + + current->utask->vaddr = (unsigned long) &udd; + + if (WARN_ON_ONCE(!uprobe_cpu_buffer)) + return 0; + + dsize = __get_data_size(&tu->tp, regs); + esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); + + ucb = uprobe_buffer_get(); + store_trace_args(esize, &tu->tp, regs, ucb->buf, dsize); + + if (tu->tp.flags & TP_FLAG_TRACE) + ret |= uprobe_trace_func(tu, regs, ucb, dsize); #ifdef CONFIG_PERF_EVENTS - if (tu->flags & TP_FLAG_PROFILE) - ret |= uprobe_perf_func(tu, regs); + if (tu->tp.flags & TP_FLAG_PROFILE) + ret |= uprobe_perf_func(tu, regs, ucb, dsize); #endif + uprobe_buffer_put(ucb); return ret; } @@ -939,16 +1239,34 @@ static int uretprobe_dispatcher(struct uprobe_consumer *con, unsigned long func, struct pt_regs *regs) { struct trace_uprobe *tu; + struct uprobe_dispatch_data udd; + struct uprobe_cpu_buffer *ucb; + int dsize, esize; tu = container_of(con, struct trace_uprobe, consumer); - if (tu->flags & TP_FLAG_TRACE) - uretprobe_trace_func(tu, func, regs); + udd.tu = tu; + udd.bp_addr = func; + + current->utask->vaddr = (unsigned long) &udd; + + if (WARN_ON_ONCE(!uprobe_cpu_buffer)) + return 0; + + dsize = __get_data_size(&tu->tp, regs); + esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); + + ucb = uprobe_buffer_get(); + store_trace_args(esize, &tu->tp, regs, ucb->buf, dsize); + + if (tu->tp.flags & TP_FLAG_TRACE) + uretprobe_trace_func(tu, func, regs, ucb, dsize); #ifdef CONFIG_PERF_EVENTS - if (tu->flags & TP_FLAG_PROFILE) - uretprobe_perf_func(tu, func, regs); + if (tu->tp.flags & TP_FLAG_PROFILE) + uretprobe_perf_func(tu, func, regs, ucb, dsize); #endif + uprobe_buffer_put(ucb); return 0; } @@ -958,7 +1276,7 @@ static struct trace_event_functions uprobe_funcs = { static int register_uprobe_event(struct trace_uprobe *tu) { - struct ftrace_event_call *call = &tu->call; + struct ftrace_event_call *call = &tu->tp.call; int ret; /* Initialize ftrace_event_call */ @@ -966,7 +1284,7 @@ static int register_uprobe_event(struct trace_uprobe *tu) call->event.funcs = &uprobe_funcs; call->class->define_fields = uprobe_event_define_fields; - if (set_print_fmt(tu) < 0) + if (set_print_fmt(&tu->tp, is_ret_probe(tu)) < 0) return -ENOMEM; ret = register_ftrace_event(&call->event); @@ -980,7 +1298,8 @@ static int register_uprobe_event(struct trace_uprobe *tu) ret = trace_add_event_call(call); if (ret) { - pr_info("Failed to register uprobe event: %s\n", call->name); + pr_info("Failed to register uprobe event: %s\n", + ftrace_event_name(call)); kfree(call->print_fmt); unregister_ftrace_event(&call->event); } @@ -993,11 +1312,11 @@ static int unregister_uprobe_event(struct trace_uprobe *tu) int ret; /* tu->event is unregistered in trace_remove_event_call() */ - ret = trace_remove_event_call(&tu->call); + ret = trace_remove_event_call(&tu->tp.call); if (ret) return ret; - kfree(tu->call.print_fmt); - tu->call.print_fmt = NULL; + kfree(tu->tp.call.print_fmt); + tu->tp.call.print_fmt = NULL; return 0; } |
