diff options
Diffstat (limited to 'kernel/trace')
33 files changed, 5578 insertions, 1994 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 015f85aaca0..d4409356f40 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -424,6 +424,7 @@ config UPROBE_EVENT  	bool "Enable uprobes-based dynamic events"  	depends on ARCH_SUPPORTS_UPROBES  	depends on MMU +	depends on PERF_EVENTS  	select UPROBES  	select PROBE_EVENTS  	select TRACING @@ -534,6 +535,36 @@ config MMIOTRACE_TEST  	  Say N, unless you absolutely know what you are doing. +config TRACEPOINT_BENCHMARK +        bool "Add tracepoint that benchmarks tracepoints" +	help +	 This option creates the tracepoint "benchmark:benchmark_event". +	 When the tracepoint is enabled, it kicks off a kernel thread that +	 goes into an infinite loop (calling cond_sched() to let other tasks +	 run), and calls the tracepoint. Each iteration will record the time +	 it took to write to the tracepoint and the next iteration that +	 data will be passed to the tracepoint itself. That is, the tracepoint +	 will report the time it took to do the previous tracepoint. +	 The string written to the tracepoint is a static string of 128 bytes +	 to keep the time the same. The initial string is simply a write of +	 "START". The second string records the cold cache time of the first +	 write which is not added to the rest of the calculations. + +	 As it is a tight loop, it benchmarks as hot cache. That's fine because +	 we care most about hot paths that are probably in cache already. + +	 An example of the output: + +	      START +	      first=3672 [COLD CACHED] +	      last=632 first=3672 max=632 min=632 avg=316 std=446 std^2=199712 +	      last=278 first=3672 max=632 min=278 avg=303 std=316 std^2=100337 +	      last=277 first=3672 max=632 min=277 avg=296 std=258 std^2=67064 +	      last=273 first=3672 max=632 min=273 avg=292 std=224 std^2=50411 +	      last=273 first=3672 max=632 min=273 avg=288 std=200 std^2=40389 +	      last=281 first=3672 max=632 min=273 avg=287 std=183 std^2=33666 + +  config RING_BUFFER_BENCHMARK  	tristate "Ring buffer benchmark stress tester"  	depends on RING_BUFFER diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index d7e2068e4b7..2611613f14f 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -17,6 +17,7 @@ ifdef CONFIG_TRACING_BRANCHES  KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING  endif +CFLAGS_trace_benchmark.o := -I$(src)  CFLAGS_trace_events_filter.o := -I$(src)  obj-$(CONFIG_TRACE_CLOCK) += trace_clock.o @@ -50,6 +51,7 @@ ifeq ($(CONFIG_PERF_EVENTS),y)  obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o  endif  obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o +obj-$(CONFIG_EVENT_TRACING) += trace_events_trigger.o  obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o  obj-$(CONFIG_TRACEPOINTS) += power-traces.o  ifeq ($(CONFIG_PM_RUNTIME),y) @@ -61,4 +63,6 @@ endif  obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o  obj-$(CONFIG_UPROBE_EVENT) += trace_uprobe.o +obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o +  libftrace-y := ftrace.o diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index b8b8560bfb9..c1bd4ada2a0 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -26,6 +26,7 @@  #include <linux/export.h>  #include <linux/time.h>  #include <linux/uaccess.h> +#include <linux/list.h>  #include <trace/events/block.h> @@ -38,6 +39,9 @@ static unsigned int blktrace_seq __read_mostly = 1;  static struct trace_array *blk_tr;  static bool blk_tracer_enabled __read_mostly; +static LIST_HEAD(running_trace_list); +static __cacheline_aligned_in_smp DEFINE_SPINLOCK(running_trace_lock); +  /* Select an alternative, minimalistic output than the original one */  #define TRACE_BLK_OPT_CLASSIC	0x1 @@ -107,10 +111,18 @@ record_it:   * Send out a notify for this process, if we haven't done so since a trace   * started   */ -static void trace_note_tsk(struct blk_trace *bt, struct task_struct *tsk) +static void trace_note_tsk(struct task_struct *tsk)  { +	unsigned long flags; +	struct blk_trace *bt; +  	tsk->btrace_seq = blktrace_seq; -	trace_note(bt, tsk->pid, BLK_TN_PROCESS, tsk->comm, sizeof(tsk->comm)); +	spin_lock_irqsave(&running_trace_lock, flags); +	list_for_each_entry(bt, &running_trace_list, running_list) { +		trace_note(bt, tsk->pid, BLK_TN_PROCESS, tsk->comm, +			   sizeof(tsk->comm)); +	} +	spin_unlock_irqrestore(&running_trace_lock, flags);  }  static void trace_note_time(struct blk_trace *bt) @@ -229,16 +241,15 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,  		goto record_it;  	} +	if (unlikely(tsk->btrace_seq != blktrace_seq)) +		trace_note_tsk(tsk); +  	/*  	 * A word about the locking here - we disable interrupts to reserve  	 * some space in the relay per-cpu buffer, to prevent an irq  	 * from coming in and stepping on our toes.  	 */  	local_irq_save(flags); - -	if (unlikely(tsk->btrace_seq != blktrace_seq)) -		trace_note_tsk(bt, tsk); -  	t = relay_reserve(bt->rchan, sizeof(*t) + pdu_len);  	if (t) {  		sequence = per_cpu_ptr(bt->sequence, cpu); @@ -477,6 +488,7 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,  	bt->dir = dir;  	bt->dev = dev;  	atomic_set(&bt->dropped, 0); +	INIT_LIST_HEAD(&bt->running_list);  	ret = -EIO;  	bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt, @@ -567,13 +579,12 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name,  		.end_lba = cbuts.end_lba,  		.pid = cbuts.pid,  	}; -	memcpy(&buts.name, &cbuts.name, 32);  	ret = do_blk_trace_setup(q, name, dev, bdev, &buts);  	if (ret)  		return ret; -	if (copy_to_user(arg, &buts.name, 32)) { +	if (copy_to_user(arg, &buts.name, ARRAY_SIZE(buts.name))) {  		blk_trace_remove(q);  		return -EFAULT;  	} @@ -601,6 +612,9 @@ int blk_trace_startstop(struct request_queue *q, int start)  			blktrace_seq++;  			smp_mb();  			bt->trace_state = Blktrace_running; +			spin_lock_irq(&running_trace_lock); +			list_add(&bt->running_list, &running_trace_list); +			spin_unlock_irq(&running_trace_lock);  			trace_note_time(bt);  			ret = 0; @@ -608,6 +622,9 @@ int blk_trace_startstop(struct request_queue *q, int start)  	} else {  		if (bt->trace_state == Blktrace_running) {  			bt->trace_state = Blktrace_stopped; +			spin_lock_irq(&running_trace_lock); +			list_del_init(&bt->running_list); +			spin_unlock_irq(&running_trace_lock);  			relay_flush(bt->rchan);  			ret = 0;  		} @@ -685,6 +702,7 @@ void blk_trace_shutdown(struct request_queue *q)   * blk_add_trace_rq - Add a trace for a request oriented action   * @q:		queue the io is for   * @rq:		the source request + * @nr_bytes:	number of completed bytes   * @what:	the action   *   * Description: @@ -692,7 +710,7 @@ void blk_trace_shutdown(struct request_queue *q)   *   **/  static void blk_add_trace_rq(struct request_queue *q, struct request *rq, -			     u32 what) +			     unsigned int nr_bytes, u32 what)  {  	struct blk_trace *bt = q->blk_trace; @@ -701,11 +719,11 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq,  	if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {  		what |= BLK_TC_ACT(BLK_TC_PC); -		__blk_add_trace(bt, 0, blk_rq_bytes(rq), rq->cmd_flags, +		__blk_add_trace(bt, 0, nr_bytes, rq->cmd_flags,  				what, rq->errors, rq->cmd_len, rq->cmd);  	} else  {  		what |= BLK_TC_ACT(BLK_TC_FS); -		__blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), +		__blk_add_trace(bt, blk_rq_pos(rq), nr_bytes,  				rq->cmd_flags, what, rq->errors, 0, NULL);  	}  } @@ -713,33 +731,34 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq,  static void blk_add_trace_rq_abort(void *ignore,  				   struct request_queue *q, struct request *rq)  { -	blk_add_trace_rq(q, rq, BLK_TA_ABORT); +	blk_add_trace_rq(q, rq, blk_rq_bytes(rq), BLK_TA_ABORT);  }  static void blk_add_trace_rq_insert(void *ignore,  				    struct request_queue *q, struct request *rq)  { -	blk_add_trace_rq(q, rq, BLK_TA_INSERT); +	blk_add_trace_rq(q, rq, blk_rq_bytes(rq), BLK_TA_INSERT);  }  static void blk_add_trace_rq_issue(void *ignore,  				   struct request_queue *q, struct request *rq)  { -	blk_add_trace_rq(q, rq, BLK_TA_ISSUE); +	blk_add_trace_rq(q, rq, blk_rq_bytes(rq), BLK_TA_ISSUE);  }  static void blk_add_trace_rq_requeue(void *ignore,  				     struct request_queue *q,  				     struct request *rq)  { -	blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); +	blk_add_trace_rq(q, rq, blk_rq_bytes(rq), BLK_TA_REQUEUE);  }  static void blk_add_trace_rq_complete(void *ignore,  				      struct request_queue *q, -				      struct request *rq) +				      struct request *rq, +				      unsigned int nr_bytes)  { -	blk_add_trace_rq(q, rq, BLK_TA_COMPLETE); +	blk_add_trace_rq(q, rq, nr_bytes, BLK_TA_COMPLETE);  }  /** @@ -764,8 +783,8 @@ static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,  	if (!error && !bio_flagged(bio, BIO_UPTODATE))  		error = EIO; -	__blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, -			error, 0, NULL); +	__blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, +			bio->bi_rw, what, error, 0, NULL);  }  static void blk_add_trace_bio_bounce(void *ignore, @@ -868,8 +887,9 @@ static void blk_add_trace_split(void *ignore,  	if (bt) {  		__be64 rpdu = cpu_to_be64(pdu); -		__blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, -				BLK_TA_SPLIT, !bio_flagged(bio, BIO_UPTODATE), +		__blk_add_trace(bt, bio->bi_iter.bi_sector, +				bio->bi_iter.bi_size, bio->bi_rw, BLK_TA_SPLIT, +				!bio_flagged(bio, BIO_UPTODATE),  				sizeof(rpdu), &rpdu);  	}  } @@ -901,9 +921,9 @@ static void blk_add_trace_bio_remap(void *ignore,  	r.device_to   = cpu_to_be32(bio->bi_bdev->bd_dev);  	r.sector_from = cpu_to_be64(from); -	__blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, -			BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE), -			sizeof(r), &r); +	__blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, +			bio->bi_rw, BLK_TA_REMAP, +			!bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r);  }  /** @@ -1409,7 +1429,8 @@ static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter)  	return print_one_line(iter, true);  } -static int blk_tracer_set_flag(u32 old_flags, u32 bit, int set) +static int +blk_tracer_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)  {  	/* don't output context-info for blk_classic output */  	if (bit == TRACE_BLK_OPT_CLASSIC) { @@ -1472,6 +1493,9 @@ static int blk_trace_remove_queue(struct request_queue *q)  	if (atomic_dec_and_test(&blk_probes_ref))  		blk_unregister_tracepoints(); +	spin_lock_irq(&running_trace_lock); +	list_del(&bt->running_list); +	spin_unlock_irq(&running_trace_lock);  	blk_trace_free(bt);  	return 0;  } diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 03cf44ac54d..ac9d1dad630 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -62,7 +62,7 @@  #define FTRACE_HASH_DEFAULT_BITS 10  #define FTRACE_HASH_MAX_BITS 12 -#define FL_GLOBAL_CONTROL_MASK (FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_CONTROL) +#define FL_GLOBAL_CONTROL_MASK (FTRACE_OPS_FL_CONTROL)  #ifdef CONFIG_DYNAMIC_FTRACE  #define INIT_REGEX_LOCK(opsname)	\ @@ -85,6 +85,8 @@ int function_trace_stop __read_mostly;  /* Current function tracing op */  struct ftrace_ops *function_trace_op __read_mostly = &ftrace_list_end; +/* What to set function_trace_op to */ +static struct ftrace_ops *set_function_trace_op;  /* List for set_ftrace_pid's pids. */  LIST_HEAD(ftrace_pids); @@ -101,7 +103,6 @@ static int ftrace_disabled __read_mostly;  static DEFINE_MUTEX(ftrace_lock); -static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end;  static struct ftrace_ops *ftrace_control_list __read_mostly = &ftrace_list_end;  static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end;  ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; @@ -169,23 +170,6 @@ int ftrace_nr_registered_ops(void)  	return cnt;  } -static void -ftrace_global_list_func(unsigned long ip, unsigned long parent_ip, -			struct ftrace_ops *op, struct pt_regs *regs) -{ -	int bit; - -	bit = trace_test_and_set_recursion(TRACE_GLOBAL_START, TRACE_GLOBAL_MAX); -	if (bit < 0) -		return; - -	do_for_each_ftrace_op(op, ftrace_global_list) { -		op->func(ip, parent_ip, op, regs); -	} while_for_each_ftrace_op(op); - -	trace_clear_recursion(bit); -} -  static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip,  			    struct ftrace_ops *op, struct pt_regs *regs)  { @@ -235,55 +219,33 @@ static int control_ops_alloc(struct ftrace_ops *ops)  	return 0;  } -static void control_ops_free(struct ftrace_ops *ops) -{ -	free_percpu(ops->disabled); -} - -static void update_global_ops(void) +static void ftrace_sync(struct work_struct *work)  { -	ftrace_func_t func; -  	/* -	 * If there's only one function registered, then call that -	 * function directly. Otherwise, we need to iterate over the -	 * registered callers. +	 * This function is just a stub to implement a hard force +	 * of synchronize_sched(). This requires synchronizing +	 * tasks even in userspace and idle. +	 * +	 * Yes, function tracing is rude.  	 */ -	if (ftrace_global_list == &ftrace_list_end || -	    ftrace_global_list->next == &ftrace_list_end) { -		func = ftrace_global_list->func; -		/* -		 * As we are calling the function directly. -		 * If it does not have recursion protection, -		 * the function_trace_op needs to be updated -		 * accordingly. -		 */ -		if (ftrace_global_list->flags & FTRACE_OPS_FL_RECURSION_SAFE) -			global_ops.flags |= FTRACE_OPS_FL_RECURSION_SAFE; -		else -			global_ops.flags &= ~FTRACE_OPS_FL_RECURSION_SAFE; -	} else { -		func = ftrace_global_list_func; -		/* The list has its own recursion protection. */ -		global_ops.flags |= FTRACE_OPS_FL_RECURSION_SAFE; -	} - - -	/* If we filter on pids, update to use the pid function */ -	if (!list_empty(&ftrace_pids)) { -		set_ftrace_pid_function(func); -		func = ftrace_pid_func; -	} +} -	global_ops.func = func; +static void ftrace_sync_ipi(void *data) +{ +	/* Probably not needed, but do it anyway */ +	smp_rmb();  } +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +static void update_function_graph_func(void); +#else +static inline void update_function_graph_func(void) { } +#endif +  static void update_ftrace_function(void)  {  	ftrace_func_t func; -	update_global_ops(); -  	/*  	 * If we are at the end of the list and this ops is  	 * recursion safe and not dynamic and the arch supports passing ops, @@ -295,20 +257,67 @@ static void update_ftrace_function(void)  	     (ftrace_ops_list->flags & FTRACE_OPS_FL_RECURSION_SAFE) &&  	     !FTRACE_FORCE_LIST_FUNC)) {  		/* Set the ftrace_ops that the arch callback uses */ -		if (ftrace_ops_list == &global_ops) -			function_trace_op = ftrace_global_list; -		else -			function_trace_op = ftrace_ops_list; +		set_function_trace_op = ftrace_ops_list;  		func = ftrace_ops_list->func;  	} else {  		/* Just use the default ftrace_ops */ -		function_trace_op = &ftrace_list_end; +		set_function_trace_op = &ftrace_list_end;  		func = ftrace_ops_list_func;  	} +	update_function_graph_func(); + +	/* If there's no change, then do nothing more here */ +	if (ftrace_trace_function == func) +		return; + +	/* +	 * If we are using the list function, it doesn't care +	 * about the function_trace_ops. +	 */ +	if (func == ftrace_ops_list_func) { +		ftrace_trace_function = func; +		/* +		 * Don't even bother setting function_trace_ops, +		 * it would be racy to do so anyway. +		 */ +		return; +	} + +#ifndef CONFIG_DYNAMIC_FTRACE +	/* +	 * For static tracing, we need to be a bit more careful. +	 * The function change takes affect immediately. Thus, +	 * we need to coorditate the setting of the function_trace_ops +	 * with the setting of the ftrace_trace_function. +	 * +	 * Set the function to the list ops, which will call the +	 * function we want, albeit indirectly, but it handles the +	 * ftrace_ops and doesn't depend on function_trace_op. +	 */ +	ftrace_trace_function = ftrace_ops_list_func; +	/* +	 * Make sure all CPUs see this. Yes this is slow, but static +	 * tracing is slow and nasty to have enabled. +	 */ +	schedule_on_each_cpu(ftrace_sync); +	/* Now all cpus are using the list ops. */ +	function_trace_op = set_function_trace_op; +	/* Make sure the function_trace_op is visible on all CPUs */ +	smp_wmb(); +	/* Nasty way to force a rmb on all cpus */ +	smp_call_function(ftrace_sync_ipi, NULL, 1); +	/* OK, we are all set to update the ftrace_trace_function now! */ +#endif /* !CONFIG_DYNAMIC_FTRACE */ +  	ftrace_trace_function = func;  } +int using_ftrace_ops_list_func(void) +{ +	return ftrace_trace_function == ftrace_ops_list_func; +} +  static void add_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops)  {  	ops->next = *list; @@ -367,19 +376,12 @@ static int remove_ftrace_list_ops(struct ftrace_ops **list,  static int __register_ftrace_function(struct ftrace_ops *ops)  { -	if (unlikely(ftrace_disabled)) -		return -ENODEV; - -	if (FTRACE_WARN_ON(ops == &global_ops)) +	if (ops->flags & FTRACE_OPS_FL_DELETED)  		return -EINVAL;  	if (WARN_ON(ops->flags & FTRACE_OPS_FL_ENABLED))  		return -EBUSY; -	/* We don't support both control and global flags set. */ -	if ((ops->flags & FL_GLOBAL_CONTROL_MASK) == FL_GLOBAL_CONTROL_MASK) -		return -EINVAL; -  #ifndef CONFIG_DYNAMIC_FTRACE_WITH_REGS  	/*  	 * If the ftrace_ops specifies SAVE_REGS, then it only can be used @@ -397,10 +399,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops)  	if (!core_kernel_data((unsigned long)ops))  		ops->flags |= FTRACE_OPS_FL_DYNAMIC; -	if (ops->flags & FTRACE_OPS_FL_GLOBAL) { -		add_ftrace_list_ops(&ftrace_global_list, &global_ops, ops); -		ops->flags |= FTRACE_OPS_FL_ENABLED; -	} else if (ops->flags & FTRACE_OPS_FL_CONTROL) { +	if (ops->flags & FTRACE_OPS_FL_CONTROL) {  		if (control_ops_alloc(ops))  			return -ENOMEM;  		add_ftrace_list_ops(&ftrace_control_list, &control_ops, ops); @@ -413,52 +412,16 @@ static int __register_ftrace_function(struct ftrace_ops *ops)  	return 0;  } -static void ftrace_sync(struct work_struct *work) -{ -	/* -	 * This function is just a stub to implement a hard force -	 * of synchronize_sched(). This requires synchronizing -	 * tasks even in userspace and idle. -	 * -	 * Yes, function tracing is rude. -	 */ -} -  static int __unregister_ftrace_function(struct ftrace_ops *ops)  {  	int ret; -	if (ftrace_disabled) -		return -ENODEV; -  	if (WARN_ON(!(ops->flags & FTRACE_OPS_FL_ENABLED)))  		return -EBUSY; -	if (FTRACE_WARN_ON(ops == &global_ops)) -		return -EINVAL; - -	if (ops->flags & FTRACE_OPS_FL_GLOBAL) { -		ret = remove_ftrace_list_ops(&ftrace_global_list, -					     &global_ops, ops); -		if (!ret) -			ops->flags &= ~FTRACE_OPS_FL_ENABLED; -	} else if (ops->flags & FTRACE_OPS_FL_CONTROL) { +	if (ops->flags & FTRACE_OPS_FL_CONTROL) {  		ret = remove_ftrace_list_ops(&ftrace_control_list,  					     &control_ops, ops); -		if (!ret) { -			/* -			 * The ftrace_ops is now removed from the list, -			 * so there'll be no new users. We must ensure -			 * all current users are done before we free -			 * the control data. -			 * Note synchronize_sched() is not enough, as we -			 * use preempt_disable() to do RCU, but the function -			 * tracer can be called where RCU is not active -			 * (before user_exit()). -			 */ -			schedule_on_each_cpu(ftrace_sync); -			control_ops_free(ops); -		}  	} else  		ret = remove_ftrace_ops(&ftrace_ops_list, ops); @@ -468,17 +431,6 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)  	if (ftrace_enabled)  		update_ftrace_function(); -	/* -	 * Dynamic ops may be freed, we must make sure that all -	 * callers are done before leaving this function. -	 * -	 * Again, normal synchronize_sched() is not good enough. -	 * We need to do a hard force of sched synchronization. -	 */ -	if (ops->flags & FTRACE_OPS_FL_DYNAMIC) -		schedule_on_each_cpu(ftrace_sync); - -  	return 0;  } @@ -781,7 +733,7 @@ static int ftrace_profile_init(void)  	int cpu;  	int ret = 0; -	for_each_online_cpu(cpu) { +	for_each_possible_cpu(cpu) {  		ret = ftrace_profile_init_cpu(cpu);  		if (ret)  			break; @@ -870,7 +822,7 @@ function_profile_call(unsigned long ip, unsigned long parent_ip,  	local_irq_save(flags); -	stat = &__get_cpu_var(ftrace_profile_stats); +	stat = this_cpu_ptr(&ftrace_profile_stats);  	if (!stat->hash || !ftrace_profile_enabled)  		goto out; @@ -901,7 +853,7 @@ static void profile_graph_return(struct ftrace_graph_ret *trace)  	unsigned long flags;  	local_irq_save(flags); -	stat = &__get_cpu_var(ftrace_profile_stats); +	stat = this_cpu_ptr(&ftrace_profile_stats);  	if (!stat->hash || !ftrace_profile_enabled)  		goto out; @@ -1088,19 +1040,6 @@ static __init void ftrace_profile_debugfs(struct dentry *d_tracer)  static struct pid * const ftrace_swapper_pid = &init_struct_pid; -loff_t -ftrace_filter_lseek(struct file *file, loff_t offset, int whence) -{ -	loff_t ret; - -	if (file->f_mode & FMODE_READ) -		ret = seq_lseek(file, offset, whence); -	else -		file->f_pos = ret = 1; - -	return ret; -} -  #ifdef CONFIG_DYNAMIC_FTRACE  #ifndef CONFIG_FTRACE_MCOUNT_RECORD @@ -1157,8 +1096,6 @@ struct ftrace_page {  	int			size;  }; -static struct ftrace_page *ftrace_new_pgs; -  #define ENTRY_SIZE sizeof(struct dyn_ftrace)  #define ENTRIES_PER_PAGE (PAGE_SIZE / ENTRY_SIZE) @@ -1168,7 +1105,7 @@ static struct ftrace_page *ftrace_new_pgs;  static struct ftrace_page	*ftrace_pages_start;  static struct ftrace_page	*ftrace_pages; -static bool ftrace_hash_empty(struct ftrace_hash *hash) +static bool __always_inline ftrace_hash_empty(struct ftrace_hash *hash)  {  	return !hash || !hash->count;  } @@ -1545,7 +1482,7 @@ unsigned long ftrace_location(unsigned long ip)   * the function tracer. It checks the ftrace internal tables to   * determine if the address belongs or not.   */ -int ftrace_text_reserved(void *start, void *end) +int ftrace_text_reserved(const void *start, const void *end)  {  	unsigned long ret; @@ -1615,7 +1552,14 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,  			in_other_hash = !!ftrace_lookup_ip(other_hash, rec->ip);  			/* +			 * If filter_hash is set, we want to match all functions +			 * that are in the hash but not in the other hash.  			 * +			 * If filter_hash is not set, then we are decrementing. +			 * That means we match anything that is in the hash +			 * and also in the other_hash. That is, we need to turn +			 * off functions in the other hash because they are disabled +			 * by this hash.  			 */  			if (filter_hash && in_hash && !in_other_hash)  				match = 1; @@ -1757,19 +1701,15 @@ static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update)  		/*  		 * If this record is being updated from a nop, then  		 *   return UPDATE_MAKE_CALL. -		 * Otherwise, if the EN flag is set, then return -		 *   UPDATE_MODIFY_CALL_REGS to tell the caller to convert -		 *   from the non-save regs, to a save regs function.  		 * Otherwise,  		 *   return UPDATE_MODIFY_CALL to tell the caller to convert -		 *   from the save regs, to a non-save regs function. +		 *   from the save regs, to a non-save regs function or +		 *   vice versa.  		 */  		if (flag & FTRACE_FL_ENABLED)  			return FTRACE_UPDATE_MAKE_CALL; -		else if (rec->flags & FTRACE_FL_REGS_EN) -			return FTRACE_UPDATE_MODIFY_CALL_REGS; -		else -			return FTRACE_UPDATE_MODIFY_CALL; + +		return FTRACE_UPDATE_MODIFY_CALL;  	}  	if (update) { @@ -1811,6 +1751,42 @@ int ftrace_test_record(struct dyn_ftrace *rec, int enable)  	return ftrace_check_record(rec, enable, 0);  } +/** + * ftrace_get_addr_new - Get the call address to set to + * @rec:  The ftrace record descriptor + * + * If the record has the FTRACE_FL_REGS set, that means that it + * wants to convert to a callback that saves all regs. If FTRACE_FL_REGS + * is not not set, then it wants to convert to the normal callback. + * + * Returns the address of the trampoline to set to + */ +unsigned long ftrace_get_addr_new(struct dyn_ftrace *rec) +{ +	if (rec->flags & FTRACE_FL_REGS) +		return (unsigned long)FTRACE_REGS_ADDR; +	else +		return (unsigned long)FTRACE_ADDR; +} + +/** + * ftrace_get_addr_curr - Get the call address that is already there + * @rec:  The ftrace record descriptor + * + * The FTRACE_FL_REGS_EN is set when the record already points to + * a function that saves all the regs. Basically the '_EN' version + * represents the current state of the function. + * + * Returns the address of the trampoline that is currently being called + */ +unsigned long ftrace_get_addr_curr(struct dyn_ftrace *rec) +{ +	if (rec->flags & FTRACE_FL_REGS_EN) +		return (unsigned long)FTRACE_REGS_ADDR; +	else +		return (unsigned long)FTRACE_ADDR; +} +  static int  __ftrace_replace_code(struct dyn_ftrace *rec, int enable)  { @@ -1818,12 +1794,12 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)  	unsigned long ftrace_addr;  	int ret; -	ret = ftrace_update_record(rec, enable); +	ftrace_addr = ftrace_get_addr_new(rec); -	if (rec->flags & FTRACE_FL_REGS) -		ftrace_addr = (unsigned long)FTRACE_REGS_ADDR; -	else -		ftrace_addr = (unsigned long)FTRACE_ADDR; +	/* This needs to be done before we call ftrace_update_record */ +	ftrace_old_addr = ftrace_get_addr_curr(rec); + +	ret = ftrace_update_record(rec, enable);  	switch (ret) {  	case FTRACE_UPDATE_IGNORE: @@ -1835,13 +1811,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)  	case FTRACE_UPDATE_MAKE_NOP:  		return ftrace_make_nop(NULL, rec, ftrace_addr); -	case FTRACE_UPDATE_MODIFY_CALL_REGS:  	case FTRACE_UPDATE_MODIFY_CALL: -		if (rec->flags & FTRACE_FL_REGS) -			ftrace_old_addr = (unsigned long)FTRACE_ADDR; -		else -			ftrace_old_addr = (unsigned long)FTRACE_REGS_ADDR; -  		return ftrace_modify_call(rec, ftrace_old_addr, ftrace_addr);  	} @@ -1979,6 +1949,7 @@ int __weak ftrace_arch_code_modify_post_process(void)  void ftrace_modify_all_code(int command)  {  	int update = command & FTRACE_UPDATE_TRACE_FUNC; +	int err = 0;  	/*  	 * If the ftrace_caller calls a ftrace_ops func directly, @@ -1990,21 +1961,33 @@ void ftrace_modify_all_code(int command)  	 * to make sure the ops are having the right functions  	 * traced.  	 */ -	if (update) -		ftrace_update_ftrace_func(ftrace_ops_list_func); +	if (update) { +		err = ftrace_update_ftrace_func(ftrace_ops_list_func); +		if (FTRACE_WARN_ON(err)) +			return; +	}  	if (command & FTRACE_UPDATE_CALLS)  		ftrace_replace_code(1);  	else if (command & FTRACE_DISABLE_CALLS)  		ftrace_replace_code(0); -	if (update && ftrace_trace_function != ftrace_ops_list_func) -		ftrace_update_ftrace_func(ftrace_trace_function); +	if (update && ftrace_trace_function != ftrace_ops_list_func) { +		function_trace_op = set_function_trace_op; +		smp_wmb(); +		/* If irqs are disabled, we are in stop machine */ +		if (!irqs_disabled()) +			smp_call_function(ftrace_sync_ipi, NULL, 1); +		err = ftrace_update_ftrace_func(ftrace_trace_function); +		if (FTRACE_WARN_ON(err)) +			return; +	}  	if (command & FTRACE_START_FUNC_RET) -		ftrace_enable_ftrace_graph_caller(); +		err = ftrace_enable_ftrace_graph_caller();  	else if (command & FTRACE_STOP_FUNC_RET) -		ftrace_disable_ftrace_graph_caller(); +		err = ftrace_disable_ftrace_graph_caller(); +	FTRACE_WARN_ON(err);  }  static int __ftrace_modify_code(void *data) @@ -2072,6 +2055,11 @@ static ftrace_func_t saved_ftrace_func;  static int ftrace_start_up;  static int global_start_up; +static void control_ops_free(struct ftrace_ops *ops) +{ +	free_percpu(ops->disabled); +} +  static void ftrace_startup_enable(int command)  {  	if (saved_ftrace_func != ftrace_trace_function) { @@ -2087,38 +2075,37 @@ static void ftrace_startup_enable(int command)  static int ftrace_startup(struct ftrace_ops *ops, int command)  { -	bool hash_enable = true; +	int ret;  	if (unlikely(ftrace_disabled))  		return -ENODEV; +	ret = __register_ftrace_function(ops); +	if (ret) +		return ret; +  	ftrace_start_up++;  	command |= FTRACE_UPDATE_CALLS; -	/* ops marked global share the filter hashes */ -	if (ops->flags & FTRACE_OPS_FL_GLOBAL) { -		ops = &global_ops; -		/* Don't update hash if global is already set */ -		if (global_start_up) -			hash_enable = false; -		global_start_up++; -	} -  	ops->flags |= FTRACE_OPS_FL_ENABLED; -	if (hash_enable) -		ftrace_hash_rec_enable(ops, 1); + +	ftrace_hash_rec_enable(ops, 1);  	ftrace_startup_enable(command);  	return 0;  } -static void ftrace_shutdown(struct ftrace_ops *ops, int command) +static int ftrace_shutdown(struct ftrace_ops *ops, int command)  { -	bool hash_disable = true; +	int ret;  	if (unlikely(ftrace_disabled)) -		return; +		return -ENODEV; + +	ret = __unregister_ftrace_function(ops); +	if (ret) +		return ret;  	ftrace_start_up--;  	/* @@ -2128,21 +2115,9 @@ static void ftrace_shutdown(struct ftrace_ops *ops, int command)  	 */  	WARN_ON_ONCE(ftrace_start_up < 0); -	if (ops->flags & FTRACE_OPS_FL_GLOBAL) { -		ops = &global_ops; -		global_start_up--; -		WARN_ON_ONCE(global_start_up < 0); -		/* Don't update hash if global still has users */ -		if (global_start_up) { -			WARN_ON_ONCE(!ftrace_start_up); -			hash_disable = false; -		} -	} - -	if (hash_disable) -		ftrace_hash_rec_disable(ops, 1); +	ftrace_hash_rec_disable(ops, 1); -	if (ops != &global_ops || !global_start_up) +	if (!global_start_up)  		ops->flags &= ~FTRACE_OPS_FL_ENABLED;  	command |= FTRACE_UPDATE_CALLS; @@ -2152,10 +2127,42 @@ static void ftrace_shutdown(struct ftrace_ops *ops, int command)  		command |= FTRACE_UPDATE_TRACE_FUNC;  	} -	if (!command || !ftrace_enabled) -		return; +	if (!command || !ftrace_enabled) { +		/* +		 * If these are control ops, they still need their +		 * per_cpu field freed. Since, function tracing is +		 * not currently active, we can just free them +		 * without synchronizing all CPUs. +		 */ +		if (ops->flags & FTRACE_OPS_FL_CONTROL) +			control_ops_free(ops); +		return 0; +	}  	ftrace_run_update_code(command); + +	/* +	 * Dynamic ops may be freed, we must make sure that all +	 * callers are done before leaving this function. +	 * The same goes for freeing the per_cpu data of the control +	 * ops. +	 * +	 * Again, normal synchronize_sched() is not good enough. +	 * We need to do a hard force of sched synchronization. +	 * This is because we use preempt_disable() to do RCU, but +	 * the function tracers can be called where RCU is not watching +	 * (like before user_exit()). We can not rely on the RCU +	 * infrastructure to do the synchronization, thus we must do it +	 * ourselves. +	 */ +	if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_CONTROL)) { +		schedule_on_each_cpu(ftrace_sync); + +		if (ops->flags & FTRACE_OPS_FL_CONTROL) +			control_ops_free(ops); +	} + +	return 0;  }  static void ftrace_startup_sysctl(void) @@ -2181,7 +2188,6 @@ static void ftrace_shutdown_sysctl(void)  }  static cycle_t		ftrace_update_time; -static unsigned long	ftrace_update_cnt;  unsigned long		ftrace_update_tot_cnt;  static inline int ops_traces_mod(struct ftrace_ops *ops) @@ -2237,11 +2243,12 @@ static int referenced_filters(struct dyn_ftrace *rec)  	return cnt;  } -static int ftrace_update_code(struct module *mod) +static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs)  {  	struct ftrace_page *pg;  	struct dyn_ftrace *p;  	cycle_t start, stop; +	unsigned long update_cnt = 0;  	unsigned long ref = 0;  	bool test = false;  	int i; @@ -2267,9 +2274,8 @@ static int ftrace_update_code(struct module *mod)  	}  	start = ftrace_now(raw_smp_processor_id()); -	ftrace_update_cnt = 0; -	for (pg = ftrace_new_pgs; pg; pg = pg->next) { +	for (pg = new_pgs; pg; pg = pg->next) {  		for (i = 0; i < pg->index; i++) {  			int cnt = ref; @@ -2290,7 +2296,7 @@ static int ftrace_update_code(struct module *mod)  			if (!ftrace_code_disable(mod, p))  				break; -			ftrace_update_cnt++; +			update_cnt++;  			/*  			 * If the tracing is enabled, go ahead and enable the record. @@ -2309,11 +2315,9 @@ static int ftrace_update_code(struct module *mod)  		}  	} -	ftrace_new_pgs = NULL; -  	stop = ftrace_now(raw_smp_processor_id());  	ftrace_update_time = stop - start; -	ftrace_update_tot_cnt += ftrace_update_cnt; +	ftrace_update_tot_cnt += update_cnt;  	return 0;  } @@ -2405,22 +2409,6 @@ ftrace_allocate_pages(unsigned long num_to_init)  	return NULL;  } -static int __init ftrace_dyn_table_alloc(unsigned long num_to_init) -{ -	int cnt; - -	if (!num_to_init) { -		pr_info("ftrace: No functions to be traced?\n"); -		return -1; -	} - -	cnt = num_to_init / ENTRIES_PER_PAGE; -	pr_info("ftrace: allocating %ld entries in %d pages\n", -		num_to_init, cnt + 1); - -	return 0; -} -  #define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */  struct ftrace_iterator { @@ -2734,7 +2722,7 @@ static void ftrace_filter_reset(struct ftrace_hash *hash)   * routine, you can use ftrace_filter_write() for the write   * routine if @flag has FTRACE_ITER_FILTER set, or   * ftrace_notrace_write() if @flag has FTRACE_ITER_NOTRACE set. - * ftrace_filter_lseek() should be used as the lseek routine, and + * tracing_lseek() should be used as the lseek routine, and   * release must call ftrace_regex_release().   */  int @@ -2808,7 +2796,9 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag,  static int  ftrace_filter_open(struct inode *inode, struct file *file)  { -	return ftrace_regex_open(&global_ops, +	struct ftrace_ops *ops = inode->i_private; + +	return ftrace_regex_open(ops,  			FTRACE_ITER_FILTER | FTRACE_ITER_DO_HASH,  			inode, file);  } @@ -2816,7 +2806,9 @@ ftrace_filter_open(struct inode *inode, struct file *file)  static int  ftrace_notrace_open(struct inode *inode, struct file *file)  { -	return ftrace_regex_open(&global_ops, FTRACE_ITER_NOTRACE, +	struct ftrace_ops *ops = inode->i_private; + +	return ftrace_regex_open(ops, FTRACE_ITER_NOTRACE,  				 inode, file);  } @@ -3060,16 +3052,13 @@ static void __enable_ftrace_function_probe(void)  	if (i == FTRACE_FUNC_HASHSIZE)  		return; -	ret = __register_ftrace_function(&trace_probe_ops); -	if (!ret) -		ret = ftrace_startup(&trace_probe_ops, 0); +	ret = ftrace_startup(&trace_probe_ops, 0);  	ftrace_probe_registered = 1;  }  static void __disable_ftrace_function_probe(void)  { -	int ret;  	int i;  	if (!ftrace_probe_registered) @@ -3082,9 +3071,7 @@ static void __disable_ftrace_function_probe(void)  	}  	/* no more funcs left */ -	ret = __unregister_ftrace_function(&trace_probe_ops); -	if (!ret) -		ftrace_shutdown(&trace_probe_ops, 0); +	ftrace_shutdown(&trace_probe_ops, 0);  	ftrace_probe_registered = 0;  } @@ -3307,7 +3294,11 @@ void unregister_ftrace_function_probe_all(char *glob)  static LIST_HEAD(ftrace_commands);  static DEFINE_MUTEX(ftrace_cmd_mutex); -int register_ftrace_command(struct ftrace_func_command *cmd) +/* + * Currently we only register ftrace commands from __init, so mark this + * __init too. + */ +__init int register_ftrace_command(struct ftrace_func_command *cmd)  {  	struct ftrace_func_command *p;  	int ret = 0; @@ -3326,7 +3317,11 @@ int register_ftrace_command(struct ftrace_func_command *cmd)  	return ret;  } -int unregister_ftrace_command(struct ftrace_func_command *cmd) +/* + * Currently we only unregister ftrace commands from __init, so mark + * this __init too. + */ +__init int unregister_ftrace_command(struct ftrace_func_command *cmd)  {  	struct ftrace_func_command *p, *n;  	int ret = -ENODEV; @@ -3466,10 +3461,6 @@ ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len,  	struct ftrace_hash *hash;  	int ret; -	/* All global ops uses the global ops filters */ -	if (ops->flags & FTRACE_OPS_FL_GLOBAL) -		ops = &global_ops; -  	if (unlikely(ftrace_disabled))  		return -ENODEV; @@ -3581,8 +3572,7 @@ int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf,  }  EXPORT_SYMBOL_GPL(ftrace_set_notrace);  /** - * ftrace_set_filter - set a function to filter on in ftrace - * @ops - the ops to set the filter with + * ftrace_set_global_filter - set a function to filter on with global tracers   * @buf - the string that holds the function filter text.   * @len - the length of the string.   * @reset - non zero to reset all filters before applying this filter. @@ -3597,8 +3587,7 @@ void ftrace_set_global_filter(unsigned char *buf, int len, int reset)  EXPORT_SYMBOL_GPL(ftrace_set_global_filter);  /** - * ftrace_set_notrace - set a function to not trace in ftrace - * @ops - the ops to set the notrace filter with + * ftrace_set_global_notrace - set a function to not trace with global tracers   * @buf - the string that holds the function notrace text.   * @len - the length of the string.   * @reset - non zero to reset all filters before applying this filter. @@ -3641,7 +3630,7 @@ __setup("ftrace_filter=", set_ftrace_filter);  #ifdef CONFIG_FUNCTION_GRAPH_TRACER  static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata; -static int ftrace_set_func(unsigned long *array, int *idx, char *buffer); +static int ftrace_set_func(unsigned long *array, int *idx, int size, char *buffer);  static int __init set_graph_function(char *str)  { @@ -3659,7 +3648,7 @@ static void __init set_ftrace_early_graph(char *buf)  		func = strsep(&buf, ",");  		/* we allow only one expression at a time */  		ret = ftrace_set_func(ftrace_graph_funcs, &ftrace_graph_count, -				      func); +				      FTRACE_GRAPH_MAX_FUNCS, func);  		if (ret)  			printk(KERN_DEBUG "ftrace: function %s not "  					  "traceable\n", func); @@ -3759,7 +3748,7 @@ static const struct file_operations ftrace_filter_fops = {  	.open = ftrace_filter_open,  	.read = seq_read,  	.write = ftrace_filter_write, -	.llseek = ftrace_filter_lseek, +	.llseek = tracing_lseek,  	.release = ftrace_regex_release,  }; @@ -3767,7 +3756,7 @@ static const struct file_operations ftrace_notrace_fops = {  	.open = ftrace_notrace_open,  	.read = seq_read,  	.write = ftrace_notrace_write, -	.llseek = ftrace_filter_lseek, +	.llseek = tracing_lseek,  	.release = ftrace_regex_release,  }; @@ -3776,15 +3765,25 @@ static const struct file_operations ftrace_notrace_fops = {  static DEFINE_MUTEX(graph_lock);  int ftrace_graph_count; -int ftrace_graph_filter_enabled; +int ftrace_graph_notrace_count;  unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly; +unsigned long ftrace_graph_notrace_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly; + +struct ftrace_graph_data { +	unsigned long *table; +	size_t size; +	int *count; +	const struct seq_operations *seq_ops; +};  static void *  __g_next(struct seq_file *m, loff_t *pos)  { -	if (*pos >= ftrace_graph_count) +	struct ftrace_graph_data *fgd = m->private; + +	if (*pos >= *fgd->count)  		return NULL; -	return &ftrace_graph_funcs[*pos]; +	return &fgd->table[*pos];  }  static void * @@ -3796,10 +3795,12 @@ g_next(struct seq_file *m, void *v, loff_t *pos)  static void *g_start(struct seq_file *m, loff_t *pos)  { +	struct ftrace_graph_data *fgd = m->private; +  	mutex_lock(&graph_lock);  	/* Nothing, tell g_show to print all functions are enabled */ -	if (!ftrace_graph_filter_enabled && !*pos) +	if (!*fgd->count && !*pos)  		return (void *)1;  	return __g_next(m, pos); @@ -3835,38 +3836,88 @@ static const struct seq_operations ftrace_graph_seq_ops = {  };  static int -ftrace_graph_open(struct inode *inode, struct file *file) +__ftrace_graph_open(struct inode *inode, struct file *file, +		    struct ftrace_graph_data *fgd)  {  	int ret = 0; -	if (unlikely(ftrace_disabled)) -		return -ENODEV; -  	mutex_lock(&graph_lock);  	if ((file->f_mode & FMODE_WRITE) &&  	    (file->f_flags & O_TRUNC)) { -		ftrace_graph_filter_enabled = 0; -		ftrace_graph_count = 0; -		memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs)); +		*fgd->count = 0; +		memset(fgd->table, 0, fgd->size * sizeof(*fgd->table));  	}  	mutex_unlock(&graph_lock); -	if (file->f_mode & FMODE_READ) -		ret = seq_open(file, &ftrace_graph_seq_ops); +	if (file->f_mode & FMODE_READ) { +		ret = seq_open(file, fgd->seq_ops); +		if (!ret) { +			struct seq_file *m = file->private_data; +			m->private = fgd; +		} +	} else +		file->private_data = fgd;  	return ret;  }  static int +ftrace_graph_open(struct inode *inode, struct file *file) +{ +	struct ftrace_graph_data *fgd; + +	if (unlikely(ftrace_disabled)) +		return -ENODEV; + +	fgd = kmalloc(sizeof(*fgd), GFP_KERNEL); +	if (fgd == NULL) +		return -ENOMEM; + +	fgd->table = ftrace_graph_funcs; +	fgd->size = FTRACE_GRAPH_MAX_FUNCS; +	fgd->count = &ftrace_graph_count; +	fgd->seq_ops = &ftrace_graph_seq_ops; + +	return __ftrace_graph_open(inode, file, fgd); +} + +static int +ftrace_graph_notrace_open(struct inode *inode, struct file *file) +{ +	struct ftrace_graph_data *fgd; + +	if (unlikely(ftrace_disabled)) +		return -ENODEV; + +	fgd = kmalloc(sizeof(*fgd), GFP_KERNEL); +	if (fgd == NULL) +		return -ENOMEM; + +	fgd->table = ftrace_graph_notrace_funcs; +	fgd->size = FTRACE_GRAPH_MAX_FUNCS; +	fgd->count = &ftrace_graph_notrace_count; +	fgd->seq_ops = &ftrace_graph_seq_ops; + +	return __ftrace_graph_open(inode, file, fgd); +} + +static int  ftrace_graph_release(struct inode *inode, struct file *file)  { -	if (file->f_mode & FMODE_READ) +	if (file->f_mode & FMODE_READ) { +		struct seq_file *m = file->private_data; + +		kfree(m->private);  		seq_release(inode, file); +	} else { +		kfree(file->private_data); +	} +  	return 0;  }  static int -ftrace_set_func(unsigned long *array, int *idx, char *buffer) +ftrace_set_func(unsigned long *array, int *idx, int size, char *buffer)  {  	struct dyn_ftrace *rec;  	struct ftrace_page *pg; @@ -3879,7 +3930,7 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)  	/* decode regex */  	type = filter_parse_regex(buffer, strlen(buffer), &search, ¬); -	if (!not && *idx >= FTRACE_GRAPH_MAX_FUNCS) +	if (!not && *idx >= size)  		return -EBUSY;  	search_len = strlen(search); @@ -3907,7 +3958,7 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)  				fail = 0;  				if (!exists) {  					array[(*idx)++] = rec->ip; -					if (*idx >= FTRACE_GRAPH_MAX_FUNCS) +					if (*idx >= size)  						goto out;  				}  			} else { @@ -3925,8 +3976,6 @@ out:  	if (fail)  		return -EINVAL; -	ftrace_graph_filter_enabled = !!(*idx); -  	return 0;  } @@ -3935,36 +3984,33 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,  		   size_t cnt, loff_t *ppos)  {  	struct trace_parser parser; -	ssize_t read, ret; +	ssize_t read, ret = 0; +	struct ftrace_graph_data *fgd = file->private_data;  	if (!cnt)  		return 0; -	mutex_lock(&graph_lock); - -	if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) { -		ret = -ENOMEM; -		goto out_unlock; -	} +	if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) +		return -ENOMEM;  	read = trace_get_user(&parser, ubuf, cnt, ppos);  	if (read >= 0 && trace_parser_loaded((&parser))) {  		parser.buffer[parser.idx] = 0; +		mutex_lock(&graph_lock); +  		/* we allow only one expression at a time */ -		ret = ftrace_set_func(ftrace_graph_funcs, &ftrace_graph_count, -					parser.buffer); -		if (ret) -			goto out_free; +		ret = ftrace_set_func(fgd->table, fgd->count, fgd->size, +				      parser.buffer); + +		mutex_unlock(&graph_lock);  	} -	ret = read; +	if (!ret) +		ret = read; -out_free:  	trace_parser_put(&parser); -out_unlock: -	mutex_unlock(&graph_lock);  	return ret;  } @@ -3973,11 +4019,49 @@ static const struct file_operations ftrace_graph_fops = {  	.open		= ftrace_graph_open,  	.read		= seq_read,  	.write		= ftrace_graph_write, -	.llseek		= ftrace_filter_lseek, +	.llseek		= tracing_lseek, +	.release	= ftrace_graph_release, +}; + +static const struct file_operations ftrace_graph_notrace_fops = { +	.open		= ftrace_graph_notrace_open, +	.read		= seq_read, +	.write		= ftrace_graph_write, +	.llseek		= tracing_lseek,  	.release	= ftrace_graph_release,  };  #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ +void ftrace_create_filter_files(struct ftrace_ops *ops, +				struct dentry *parent) +{ + +	trace_create_file("set_ftrace_filter", 0644, parent, +			  ops, &ftrace_filter_fops); + +	trace_create_file("set_ftrace_notrace", 0644, parent, +			  ops, &ftrace_notrace_fops); +} + +/* + * The name "destroy_filter_files" is really a misnomer. Although + * in the future, it may actualy delete the files, but this is + * really intended to make sure the ops passed in are disabled + * and that when this function returns, the caller is free to + * free the ops. + * + * The "destroy" name is only to match the "create" name that this + * should be paired with. + */ +void ftrace_destroy_filter_files(struct ftrace_ops *ops) +{ +	mutex_lock(&ftrace_lock); +	if (ops->flags & FTRACE_OPS_FL_ENABLED) +		ftrace_shutdown(ops, 0); +	ops->flags |= FTRACE_OPS_FL_DELETED; +	mutex_unlock(&ftrace_lock); +} +  static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)  { @@ -3987,16 +4071,15 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)  	trace_create_file("enabled_functions", 0444,  			d_tracer, NULL, &ftrace_enabled_fops); -	trace_create_file("set_ftrace_filter", 0644, d_tracer, -			NULL, &ftrace_filter_fops); - -	trace_create_file("set_ftrace_notrace", 0644, d_tracer, -				    NULL, &ftrace_notrace_fops); +	ftrace_create_filter_files(&global_ops, d_tracer);  #ifdef CONFIG_FUNCTION_GRAPH_TRACER  	trace_create_file("set_graph_function", 0444, d_tracer,  				    NULL,  				    &ftrace_graph_fops); +	trace_create_file("set_graph_notrace", 0444, d_tracer, +				    NULL, +				    &ftrace_graph_notrace_fops);  #endif /* CONFIG_FUNCTION_GRAPH_TRACER */  	return 0; @@ -4104,9 +4187,6 @@ static int ftrace_process_locs(struct module *mod,  	/* Assign the last page to ftrace_pages */  	ftrace_pages = pg; -	/* These new locations need to be initialized */ -	ftrace_new_pgs = start_pg; -  	/*  	 * We only need to disable interrupts on start up  	 * because we are modifying code that an interrupt @@ -4117,7 +4197,7 @@ static int ftrace_process_locs(struct module *mod,  	 */  	if (!mod)  		local_irq_save(flags); -	ftrace_update_code(mod); +	ftrace_update_code(mod, start_pg);  	if (!mod)  		local_irq_restore(flags);  	ret = 0; @@ -4181,16 +4261,11 @@ static void ftrace_init_module(struct module *mod,  	ftrace_process_locs(mod, start, end);  } -static int ftrace_module_notify_enter(struct notifier_block *self, -				      unsigned long val, void *data) +void ftrace_module_init(struct module *mod)  { -	struct module *mod = data; - -	if (val == MODULE_STATE_COMING) -		ftrace_init_module(mod, mod->ftrace_callsites, -				   mod->ftrace_callsites + -				   mod->num_ftrace_callsites); -	return 0; +	ftrace_init_module(mod, mod->ftrace_callsites, +			   mod->ftrace_callsites + +			   mod->num_ftrace_callsites);  }  static int ftrace_module_notify_exit(struct notifier_block *self, @@ -4204,11 +4279,6 @@ static int ftrace_module_notify_exit(struct notifier_block *self,  	return 0;  }  #else -static int ftrace_module_notify_enter(struct notifier_block *self, -				      unsigned long val, void *data) -{ -	return 0; -}  static int ftrace_module_notify_exit(struct notifier_block *self,  				     unsigned long val, void *data)  { @@ -4216,40 +4286,32 @@ static int ftrace_module_notify_exit(struct notifier_block *self,  }  #endif /* CONFIG_MODULES */ -struct notifier_block ftrace_module_enter_nb = { -	.notifier_call = ftrace_module_notify_enter, -	.priority = INT_MAX,	/* Run before anything that can use kprobes */ -}; -  struct notifier_block ftrace_module_exit_nb = {  	.notifier_call = ftrace_module_notify_exit,  	.priority = INT_MIN,	/* Run after anything that can remove kprobes */  }; -extern unsigned long __start_mcount_loc[]; -extern unsigned long __stop_mcount_loc[]; -  void __init ftrace_init(void)  { -	unsigned long count, addr, flags; +	extern unsigned long __start_mcount_loc[]; +	extern unsigned long __stop_mcount_loc[]; +	unsigned long count, flags;  	int ret; -	/* Keep the ftrace pointer to the stub */ -	addr = (unsigned long)ftrace_stub; -  	local_irq_save(flags); -	ftrace_dyn_arch_init(&addr); +	ret = ftrace_dyn_arch_init();  	local_irq_restore(flags); - -	/* ftrace_dyn_arch_init places the return code in addr */ -	if (addr) +	if (ret)  		goto failed;  	count = __stop_mcount_loc - __start_mcount_loc; - -	ret = ftrace_dyn_table_alloc(count); -	if (ret) +	if (!count) { +		pr_info("ftrace: No functions to be traced?\n");  		goto failed; +	} + +	pr_info("ftrace: allocating %ld entries in %ld pages\n", +		count, count / ENTRIES_PER_PAGE + 1);  	last_ftrace_enabled = ftrace_enabled = 1; @@ -4257,10 +4319,6 @@ void __init ftrace_init(void)  				  __start_mcount_loc,  				  __stop_mcount_loc); -	ret = register_module_notifier(&ftrace_module_enter_nb); -	if (ret) -		pr_warning("Failed to register trace ftrace module enter notifier\n"); -  	ret = register_module_notifier(&ftrace_module_exit_nb);  	if (ret)  		pr_warning("Failed to register trace ftrace module exit notifier\n"); @@ -4290,12 +4348,21 @@ core_initcall(ftrace_nodyn_init);  static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; }  static inline void ftrace_startup_enable(int command) { }  /* Keep as macros so we do not need to define the commands */ -# define ftrace_startup(ops, command)			\ -	({						\ -		(ops)->flags |= FTRACE_OPS_FL_ENABLED;	\ -		0;					\ +# define ftrace_startup(ops, command)					\ +	({								\ +		int ___ret = __register_ftrace_function(ops);		\ +		if (!___ret)						\ +			(ops)->flags |= FTRACE_OPS_FL_ENABLED;		\ +		___ret;							\  	}) -# define ftrace_shutdown(ops, command)	do { } while (0) +# define ftrace_shutdown(ops, command)					\ +	({								\ +		int ___ret = __unregister_ftrace_function(ops);		\ +		if (!___ret)						\ +			(ops)->flags &= ~FTRACE_OPS_FL_ENABLED;		\ +		___ret;							\ +	}) +  # define ftrace_startup_sysctl()	do { } while (0)  # define ftrace_shutdown_sysctl()	do { } while (0) @@ -4307,6 +4374,34 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs)  #endif /* CONFIG_DYNAMIC_FTRACE */ +__init void ftrace_init_global_array_ops(struct trace_array *tr) +{ +	tr->ops = &global_ops; +	tr->ops->private = tr; +} + +void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func) +{ +	/* If we filter on pids, update to use the pid function */ +	if (tr->flags & TRACE_ARRAY_FL_GLOBAL) { +		if (WARN_ON(tr->ops->func != ftrace_stub)) +			printk("ftrace ops had %pS for function\n", +			       tr->ops->func); +		/* Only the top level instance does pid tracing */ +		if (!list_empty(&ftrace_pids)) { +			set_ftrace_pid_function(func); +			func = ftrace_pid_func; +		} +	} +	tr->ops->func = func; +	tr->ops->private = tr; +} + +void ftrace_reset_array_ops(struct trace_array *tr) +{ +	tr->ops->func = ftrace_stub; +} +  static void  ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip,  			struct ftrace_ops *op, struct pt_regs *regs) @@ -4320,12 +4415,21 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip,  	 */  	preempt_disable_notrace();  	trace_recursion_set(TRACE_CONTROL_BIT); + +	/* +	 * Control funcs (perf) uses RCU. Only trace if +	 * RCU is currently active. +	 */ +	if (!rcu_is_watching()) +		goto out; +  	do_for_each_ftrace_op(op, ftrace_control_list) {  		if (!(op->flags & FTRACE_OPS_FL_STUB) &&  		    !ftrace_function_local_disabled(op) &&  		    ftrace_ops_test(op, ip, regs))  			op->func(ip, parent_ip, op, regs);  	} while_for_each_ftrace_op(op); + out:  	trace_recursion_clear(TRACE_CONTROL_BIT);  	preempt_enable_notrace();  } @@ -4356,9 +4460,16 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,  	 */  	preempt_disable_notrace();  	do_for_each_ftrace_op(op, ftrace_ops_list) { -		if (ftrace_ops_test(op, ip, regs)) +		if (ftrace_ops_test(op, ip, regs)) { +			if (WARN_ON(!op->func)) { +				function_trace_stop = 1; +				printk("op=%p %pS\n", op, op); +				goto out; +			}  			op->func(ip, parent_ip, op, regs); +		}  	} while_for_each_ftrace_op(op); +out:  	preempt_enable_notrace();  	trace_clear_recursion(bit);  } @@ -4631,7 +4742,7 @@ static const struct file_operations ftrace_pid_fops = {  	.open		= ftrace_pid_open,  	.write		= ftrace_pid_write,  	.read		= seq_read, -	.llseek		= ftrace_filter_lseek, +	.llseek		= tracing_lseek,  	.release	= ftrace_pid_release,  }; @@ -4695,9 +4806,7 @@ int register_ftrace_function(struct ftrace_ops *ops)  	mutex_lock(&ftrace_lock); -	ret = __register_ftrace_function(ops); -	if (!ret) -		ret = ftrace_startup(ops, 0); +	ret = ftrace_startup(ops, 0);  	mutex_unlock(&ftrace_lock); @@ -4716,9 +4825,7 @@ int unregister_ftrace_function(struct ftrace_ops *ops)  	int ret;  	mutex_lock(&ftrace_lock); -	ret = __unregister_ftrace_function(ops); -	if (!ret) -		ftrace_shutdown(ops, 0); +	ret = ftrace_shutdown(ops, 0);  	mutex_unlock(&ftrace_lock);  	return ret; @@ -4767,7 +4874,6 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,  #ifdef CONFIG_FUNCTION_GRAPH_TRACER  static int ftrace_graph_active; -static struct notifier_block ftrace_suspend_notifier;  int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)  { @@ -4778,6 +4884,7 @@ int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)  trace_func_graph_ret_t ftrace_graph_return =  			(trace_func_graph_ret_t)ftrace_stub;  trace_func_graph_ent_t ftrace_graph_entry = ftrace_graph_entry_stub; +static trace_func_graph_ent_t __ftrace_graph_entry = ftrace_graph_entry_stub;  /* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */  static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) @@ -4912,6 +5019,34 @@ ftrace_suspend_notifier_call(struct notifier_block *bl, unsigned long state,  	return NOTIFY_DONE;  } +static int ftrace_graph_entry_test(struct ftrace_graph_ent *trace) +{ +	if (!ftrace_ops_test(&global_ops, trace->func, NULL)) +		return 0; +	return __ftrace_graph_entry(trace); +} + +/* + * The function graph tracer should only trace the functions defined + * by set_ftrace_filter and set_ftrace_notrace. If another function + * tracer ops is registered, the graph tracer requires testing the + * function against the global ops, and not just trace any function + * that any ftrace_ops registered. + */ +static void update_function_graph_func(void) +{ +	if (ftrace_ops_list == &ftrace_list_end || +	    (ftrace_ops_list == &global_ops && +	     global_ops.next == &ftrace_list_end)) +		ftrace_graph_entry = __ftrace_graph_entry; +	else +		ftrace_graph_entry = ftrace_graph_entry_test; +} + +static struct notifier_block ftrace_suspend_notifier = { +	.notifier_call = ftrace_suspend_notifier_call, +}; +  int register_ftrace_graph(trace_func_graph_ret_t retfunc,  			trace_func_graph_ent_t entryfunc)  { @@ -4925,7 +5060,6 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,  		goto out;  	} -	ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call;  	register_pm_notifier(&ftrace_suspend_notifier);  	ftrace_graph_active++; @@ -4936,7 +5070,19 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,  	}  	ftrace_graph_return = retfunc; -	ftrace_graph_entry = entryfunc; + +	/* +	 * Update the indirect function to the entryfunc, and the +	 * function that gets called to the entry_test first. Then +	 * call the update fgraph entry function to determine if +	 * the entryfunc should be called directly or not. +	 */ +	__ftrace_graph_entry = entryfunc; +	ftrace_graph_entry = ftrace_graph_entry_test; +	update_function_graph_func(); + +	/* Function graph doesn't use the .func field of global_ops */ +	global_ops.flags |= FTRACE_OPS_FL_STUB;  	ret = ftrace_startup(&global_ops, FTRACE_START_FUNC_RET); @@ -4955,7 +5101,9 @@ void unregister_ftrace_graph(void)  	ftrace_graph_active--;  	ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;  	ftrace_graph_entry = ftrace_graph_entry_stub; +	__ftrace_graph_entry = ftrace_graph_entry_stub;  	ftrace_shutdown(&global_ops, FTRACE_STOP_FUNC_RET); +	global_ops.flags &= ~FTRACE_OPS_FL_STUB;  	unregister_pm_notifier(&ftrace_suspend_notifier);  	unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index cc2f66f68dc..ff7027199a9 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -543,7 +543,7 @@ static void rb_wake_up_waiters(struct irq_work *work)   * as data is added to any of the @buffer's cpu buffers. Otherwise   * it will wait for data to be added to a specific cpu buffer.   */ -void ring_buffer_wait(struct ring_buffer *buffer, int cpu) +int ring_buffer_wait(struct ring_buffer *buffer, int cpu)  {  	struct ring_buffer_per_cpu *cpu_buffer;  	DEFINE_WAIT(wait); @@ -557,6 +557,8 @@ void ring_buffer_wait(struct ring_buffer *buffer, int cpu)  	if (cpu == RING_BUFFER_ALL_CPUS)  		work = &buffer->irq_work;  	else { +		if (!cpumask_test_cpu(cpu, buffer->cpumask)) +			return -ENODEV;  		cpu_buffer = buffer->buffers[cpu];  		work = &cpu_buffer->irq_work;  	} @@ -591,6 +593,7 @@ void ring_buffer_wait(struct ring_buffer *buffer, int cpu)  		schedule();  	finish_wait(&work->waiters, &wait); +	return 0;  }  /** @@ -613,10 +616,6 @@ int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,  	struct ring_buffer_per_cpu *cpu_buffer;  	struct rb_irq_work *work; -	if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) || -	    (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu))) -		return POLLIN | POLLRDNORM; -  	if (cpu == RING_BUFFER_ALL_CPUS)  		work = &buffer->irq_work;  	else { @@ -1301,7 +1300,7 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,  	 * In that off case, we need to allocate for all possible cpus.  	 */  #ifdef CONFIG_HOTPLUG_CPU -	get_online_cpus(); +	cpu_notifier_register_begin();  	cpumask_copy(buffer->cpumask, cpu_online_mask);  #else  	cpumask_copy(buffer->cpumask, cpu_possible_mask); @@ -1324,10 +1323,10 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,  #ifdef CONFIG_HOTPLUG_CPU  	buffer->cpu_notify.notifier_call = rb_cpu_notify;  	buffer->cpu_notify.priority = 0; -	register_cpu_notifier(&buffer->cpu_notify); +	__register_cpu_notifier(&buffer->cpu_notify); +	cpu_notifier_register_done();  #endif -	put_online_cpus();  	mutex_init(&buffer->mutex);  	return buffer; @@ -1341,7 +1340,9 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,   fail_free_cpumask:  	free_cpumask_var(buffer->cpumask); -	put_online_cpus(); +#ifdef CONFIG_HOTPLUG_CPU +	cpu_notifier_register_done(); +#endif   fail_free_buffer:  	kfree(buffer); @@ -1358,16 +1359,17 @@ ring_buffer_free(struct ring_buffer *buffer)  {  	int cpu; -	get_online_cpus(); -  #ifdef CONFIG_HOTPLUG_CPU -	unregister_cpu_notifier(&buffer->cpu_notify); +	cpu_notifier_register_begin(); +	__unregister_cpu_notifier(&buffer->cpu_notify);  #endif  	for_each_buffer_cpu(buffer, cpu)  		rb_free_cpu_buffer(buffer->buffers[cpu]); -	put_online_cpus(); +#ifdef CONFIG_HOTPLUG_CPU +	cpu_notifier_register_done(); +#endif  	kfree(buffer->buffers);  	free_cpumask_var(buffer->cpumask); @@ -2397,6 +2399,13 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,  	write &= RB_WRITE_MASK;  	tail = write - length; +	/* +	 * If this is the first commit on the page, then it has the same +	 * timestamp as the page itself. +	 */ +	if (!tail) +		delta = 0; +  	/* See if we shot pass the end of this buffer page */  	if (unlikely(write > BUF_PAGE_SIZE))  		return rb_move_tail(cpu_buffer, length, tail, @@ -2558,7 +2567,7 @@ rb_reserve_next_event(struct ring_buffer *buffer,  		if (unlikely(test_time_stamp(delta))) {  			int local_clock_stable = 1;  #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK -			local_clock_stable = sched_clock_stable; +			local_clock_stable = sched_clock_stable();  #endif  			WARN_ONCE(delta > (1ULL << 59),  				  KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s", diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c index a5457d577b9..0434ff1b808 100644 --- a/kernel/trace/ring_buffer_benchmark.c +++ b/kernel/trace/ring_buffer_benchmark.c @@ -40,8 +40,8 @@ static int write_iteration = 50;  module_param(write_iteration, uint, 0644);  MODULE_PARM_DESC(write_iteration, "# of writes between timestamp readings"); -static int producer_nice = 19; -static int consumer_nice = 19; +static int producer_nice = MAX_NICE; +static int consumer_nice = MAX_NICE;  static int producer_fifo = -1;  static int consumer_fifo = -1; @@ -308,7 +308,7 @@ static void ring_buffer_producer(void)  	/* Let the user know that the test is running at low priority */  	if (producer_fifo < 0 && consumer_fifo < 0 && -	    producer_nice == 19 && consumer_nice == 19) +	    producer_nice == MAX_NICE && consumer_nice == MAX_NICE)  		trace_printk("WARNING!!! This test is running at lowest priority.\n");  	trace_printk("Time:     %lld (usecs)\n", time); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 7974ba20557..291397e6666 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -73,7 +73,8 @@ static struct tracer_flags dummy_tracer_flags = {  	.opts = dummy_tracer_opt  }; -static int dummy_set_flag(u32 old_flags, u32 bit, int set) +static int +dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)  {  	return 0;  } @@ -118,7 +119,7 @@ enum ftrace_dump_mode ftrace_dump_on_oops;  /* When set, tracing will stop when a WARN*() is hit */  int __disable_trace_on_warning; -static int tracing_set_tracer(const char *buf); +static int tracing_set_tracer(struct trace_array *tr, const char *buf);  #define MAX_TRACER_SIZE		100  static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata; @@ -180,6 +181,17 @@ static int __init set_trace_boot_options(char *str)  }  __setup("trace_options=", set_trace_boot_options); +static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata; +static char *trace_boot_clock __initdata; + +static int __init set_trace_boot_clock(char *str) +{ +	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE); +	trace_boot_clock = trace_boot_clock_buf; +	return 0; +} +__setup("trace_clock=", set_trace_boot_clock); +  unsigned long long ns2usecs(cycle_t nsec)  { @@ -235,15 +247,35 @@ void trace_array_put(struct trace_array *this_tr)  	mutex_unlock(&trace_types_lock);  } -int filter_current_check_discard(struct ring_buffer *buffer, -				 struct ftrace_event_call *call, void *rec, -				 struct ring_buffer_event *event) +int filter_check_discard(struct ftrace_event_file *file, void *rec, +			 struct ring_buffer *buffer, +			 struct ring_buffer_event *event)  { -	return filter_check_discard(call, rec, buffer, event); +	if (unlikely(file->flags & FTRACE_EVENT_FL_FILTERED) && +	    !filter_match_preds(file->filter, rec)) { +		ring_buffer_discard_commit(buffer, event); +		return 1; +	} + +	return 0;  } -EXPORT_SYMBOL_GPL(filter_current_check_discard); +EXPORT_SYMBOL_GPL(filter_check_discard); -cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu) +int call_filter_check_discard(struct ftrace_event_call *call, void *rec, +			      struct ring_buffer *buffer, +			      struct ring_buffer_event *event) +{ +	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) && +	    !filter_match_preds(call->filter, rec)) { +		ring_buffer_discard_commit(buffer, event); +		return 1; +	} + +	return 0; +} +EXPORT_SYMBOL_GPL(call_filter_check_discard); + +static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)  {  	u64 ts; @@ -434,13 +466,22 @@ int __trace_puts(unsigned long ip, const char *str, int size)  	struct print_entry *entry;  	unsigned long irq_flags;  	int alloc; +	int pc; + +	if (!(trace_flags & TRACE_ITER_PRINTK)) +		return 0; + +	pc = preempt_count(); + +	if (unlikely(tracing_selftest_running || tracing_disabled)) +		return 0;  	alloc = sizeof(*entry) + size + 2; /* possible \n added */  	local_save_flags(irq_flags);  	buffer = global_trace.trace_buffer.buffer;  	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,  -					  irq_flags, preempt_count()); +					  irq_flags, pc);  	if (!event)  		return 0; @@ -457,6 +498,7 @@ int __trace_puts(unsigned long ip, const char *str, int size)  		entry->buf[size] = '\0';  	__buffer_unlock_commit(buffer, event); +	ftrace_trace_stack(buffer, irq_flags, 4, pc);  	return size;  } @@ -474,11 +516,20 @@ int __trace_bputs(unsigned long ip, const char *str)  	struct bputs_entry *entry;  	unsigned long irq_flags;  	int size = sizeof(struct bputs_entry); +	int pc; + +	if (!(trace_flags & TRACE_ITER_PRINTK)) +		return 0; + +	pc = preempt_count(); + +	if (unlikely(tracing_selftest_running || tracing_disabled)) +		return 0;  	local_save_flags(irq_flags);  	buffer = global_trace.trace_buffer.buffer;  	event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size, -					  irq_flags, preempt_count()); +					  irq_flags, pc);  	if (!event)  		return 0; @@ -487,6 +538,7 @@ int __trace_bputs(unsigned long ip, const char *str)  	entry->str			= str;  	__buffer_unlock_commit(buffer, event); +	ftrace_trace_stack(buffer, irq_flags, 4, pc);  	return 1;  } @@ -561,7 +613,7 @@ static int alloc_snapshot(struct trace_array *tr)  	return 0;  } -void free_snapshot(struct trace_array *tr) +static void free_snapshot(struct trace_array *tr)  {  	/*  	 * We don't free the ring buffer. instead, resize it because @@ -575,6 +627,28 @@ void free_snapshot(struct trace_array *tr)  }  /** + * tracing_alloc_snapshot - allocate snapshot buffer. + * + * This only allocates the snapshot buffer if it isn't already + * allocated - it doesn't also take a snapshot. + * + * This is meant to be used in cases where the snapshot buffer needs + * to be set up for events that can't sleep but need to be able to + * trigger a snapshot. + */ +int tracing_alloc_snapshot(void) +{ +	struct trace_array *tr = &global_trace; +	int ret; + +	ret = alloc_snapshot(tr); +	WARN_ON(ret < 0); + +	return ret; +} +EXPORT_SYMBOL_GPL(tracing_alloc_snapshot); + +/**   * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.   *   * This is similar to trace_snapshot(), but it will allocate the @@ -587,11 +661,10 @@ void free_snapshot(struct trace_array *tr)   */  void tracing_snapshot_alloc(void)  { -	struct trace_array *tr = &global_trace;  	int ret; -	ret = alloc_snapshot(tr); -	if (WARN_ON(ret < 0)) +	ret = tracing_alloc_snapshot(); +	if (ret < 0)  		return;  	tracing_snapshot(); @@ -603,6 +676,12 @@ void tracing_snapshot(void)  	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");  }  EXPORT_SYMBOL_GPL(tracing_snapshot); +int tracing_alloc_snapshot(void) +{ +	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used"); +	return -ENODEV; +} +EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);  void tracing_snapshot_alloc(void)  {  	/* Give warning */ @@ -744,7 +823,7 @@ static struct {  	{ trace_clock_local,	"local",	1 },  	{ trace_clock_global,	"global",	1 },  	{ trace_clock_counter,	"counter",	0 }, -	{ trace_clock_jiffies,	"uptime",	1 }, +	{ trace_clock_jiffies,	"uptime",	0 },  	{ trace_clock,		"perf",		1 },  	ARCH_TRACE_CLOCKS  }; @@ -843,9 +922,12 @@ int trace_get_user(struct trace_parser *parser, const char __user *ubuf,  	if (isspace(ch)) {  		parser->buffer[parser->idx] = 0;  		parser->cont = false; -	} else { +	} else if (parser->idx < parser->size - 1) {  		parser->cont = true;  		parser->buffer[parser->idx++] = ch; +	} else { +		ret = -EINVAL; +		goto out;  	}  	*ppos += read; @@ -895,27 +977,9 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)  	return cnt;  } -/* - * ftrace_max_lock is used to protect the swapping of buffers - * when taking a max snapshot. The buffers themselves are - * protected by per_cpu spinlocks. But the action of the swap - * needs its own lock. - * - * This is defined as a arch_spinlock_t in order to help - * with performance when lockdep debugging is enabled. - * - * It is also used in other places outside the update_max_tr - * so it needs to be defined outside of the - * CONFIG_TRACER_MAX_TRACE. - */ -static arch_spinlock_t ftrace_max_lock = -	(arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; -  unsigned long __read_mostly	tracing_thresh;  #ifdef CONFIG_TRACER_MAX_TRACE -unsigned long __read_mostly	tracing_max_latency; -  /*   * Copy the new maximum trace into the separate maximum-trace   * structure. (this way the maximum trace is permanently saved, @@ -932,7 +996,7 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)  	max_buf->cpu = cpu;  	max_buf->time_start = data->preempt_timestamp; -	max_data->saved_latency = tracing_max_latency; +	max_data->saved_latency = tr->max_latency;  	max_data->critical_start = data->critical_start;  	max_data->critical_end = data->critical_end; @@ -980,14 +1044,14 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)  		return;  	} -	arch_spin_lock(&ftrace_max_lock); +	arch_spin_lock(&tr->max_lock);  	buf = tr->trace_buffer.buffer;  	tr->trace_buffer.buffer = tr->max_buffer.buffer;  	tr->max_buffer.buffer = buf;  	__update_max_tr(tr, tsk, cpu); -	arch_spin_unlock(&ftrace_max_lock); +	arch_spin_unlock(&tr->max_lock);  }  /** @@ -1013,7 +1077,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)  		return;  	} -	arch_spin_lock(&ftrace_max_lock); +	arch_spin_lock(&tr->max_lock);  	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu); @@ -1031,17 +1095,17 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)  	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);  	__update_max_tr(tr, tsk, cpu); -	arch_spin_unlock(&ftrace_max_lock); +	arch_spin_unlock(&tr->max_lock);  }  #endif /* CONFIG_TRACER_MAX_TRACE */ -static void default_wait_pipe(struct trace_iterator *iter) +static int wait_on_pipe(struct trace_iterator *iter)  {  	/* Iterators are static, they should be filled or empty */  	if (trace_buffer_iter(iter, iter->cpu_file)) -		return; +		return 0; -	ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file); +	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file);  }  #ifdef CONFIG_FTRACE_STARTUP_TEST @@ -1152,8 +1216,6 @@ int register_tracer(struct tracer *type)  	else  		if (!type->flags->opts)  			type->flags->opts = dummy_tracer_opt; -	if (!type->wait_pipe) -		type->wait_pipe = default_wait_pipe;  	ret = run_tracer_selftest(type);  	if (ret < 0) @@ -1174,7 +1236,7 @@ int register_tracer(struct tracer *type)  	printk(KERN_INFO "Starting tracer '%s'\n", type->name);  	/* Do we want this tracer to start on bootup? */ -	tracing_set_tracer(type->name); +	tracing_set_tracer(&global_trace, type->name);  	default_bootup_tracer = NULL;  	/* disable other selftests, since this will break it. */  	tracing_selftest_disabled = true; @@ -1237,42 +1299,76 @@ void tracing_reset_all_online_cpus(void)  	}  } -#define SAVED_CMDLINES 128 +#define SAVED_CMDLINES_DEFAULT 128  #define NO_CMDLINE_MAP UINT_MAX -static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1]; -static unsigned map_cmdline_to_pid[SAVED_CMDLINES]; -static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN]; -static int cmdline_idx;  static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED; +struct saved_cmdlines_buffer { +	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1]; +	unsigned *map_cmdline_to_pid; +	unsigned cmdline_num; +	int cmdline_idx; +	char *saved_cmdlines; +}; +static struct saved_cmdlines_buffer *savedcmd;  /* temporary disable recording */  static atomic_t trace_record_cmdline_disabled __read_mostly; -static void trace_init_cmdlines(void) +static inline char *get_saved_cmdlines(int idx)  { -	memset(&map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(map_pid_to_cmdline)); -	memset(&map_cmdline_to_pid, NO_CMDLINE_MAP, sizeof(map_cmdline_to_pid)); -	cmdline_idx = 0; +	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];  } -int is_tracing_stopped(void) +static inline void set_cmdline(int idx, const char *cmdline)  { -	return global_trace.stop_count; +	memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);  } -/** - * ftrace_off_permanent - disable all ftrace code permanently - * - * This should only be called when a serious anomally has - * been detected.  This will turn off the function tracing, - * ring buffers, and other tracing utilites. It takes no - * locks and can be called from any context. - */ -void ftrace_off_permanent(void) +static int allocate_cmdlines_buffer(unsigned int val, +				    struct saved_cmdlines_buffer *s)  { -	tracing_disabled = 1; -	ftrace_stop(); -	tracing_off_permanent(); +	s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid), +					GFP_KERNEL); +	if (!s->map_cmdline_to_pid) +		return -ENOMEM; + +	s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL); +	if (!s->saved_cmdlines) { +		kfree(s->map_cmdline_to_pid); +		return -ENOMEM; +	} + +	s->cmdline_idx = 0; +	s->cmdline_num = val; +	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP, +	       sizeof(s->map_pid_to_cmdline)); +	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP, +	       val * sizeof(*s->map_cmdline_to_pid)); + +	return 0; +} + +static int trace_create_savedcmd(void) +{ +	int ret; + +	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL); +	if (!savedcmd) +		return -ENOMEM; + +	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd); +	if (ret < 0) { +		kfree(savedcmd); +		savedcmd = NULL; +		return -ENOMEM; +	} + +	return 0; +} + +int is_tracing_stopped(void) +{ +	return global_trace.stop_count;  }  /** @@ -1300,7 +1396,7 @@ void tracing_start(void)  	}  	/* Prevent the buffers from switching */ -	arch_spin_lock(&ftrace_max_lock); +	arch_spin_lock(&global_trace.max_lock);  	buffer = global_trace.trace_buffer.buffer;  	if (buffer) @@ -1312,9 +1408,8 @@ void tracing_start(void)  		ring_buffer_record_enable(buffer);  #endif -	arch_spin_unlock(&ftrace_max_lock); +	arch_spin_unlock(&global_trace.max_lock); -	ftrace_start();   out:  	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);  } @@ -1361,13 +1456,12 @@ void tracing_stop(void)  	struct ring_buffer *buffer;  	unsigned long flags; -	ftrace_stop();  	raw_spin_lock_irqsave(&global_trace.start_lock, flags);  	if (global_trace.stop_count++)  		goto out;  	/* Prevent the buffers from switching */ -	arch_spin_lock(&ftrace_max_lock); +	arch_spin_lock(&global_trace.max_lock);  	buffer = global_trace.trace_buffer.buffer;  	if (buffer) @@ -1379,7 +1473,7 @@ void tracing_stop(void)  		ring_buffer_record_disable(buffer);  #endif -	arch_spin_unlock(&ftrace_max_lock); +	arch_spin_unlock(&global_trace.max_lock);   out:  	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags); @@ -1408,12 +1502,12 @@ static void tracing_stop_tr(struct trace_array *tr)  void trace_stop_cmdline_recording(void); -static void trace_save_cmdline(struct task_struct *tsk) +static int trace_save_cmdline(struct task_struct *tsk)  {  	unsigned pid, idx;  	if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT)) -		return; +		return 0;  	/*  	 * It's not the end of the world if we don't get @@ -1422,11 +1516,11 @@ static void trace_save_cmdline(struct task_struct *tsk)  	 * so if we miss here, then better luck next time.  	 */  	if (!arch_spin_trylock(&trace_cmdline_lock)) -		return; +		return 0; -	idx = map_pid_to_cmdline[tsk->pid]; +	idx = savedcmd->map_pid_to_cmdline[tsk->pid];  	if (idx == NO_CMDLINE_MAP) { -		idx = (cmdline_idx + 1) % SAVED_CMDLINES; +		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;  		/*  		 * Check whether the cmdline buffer at idx has a pid @@ -1434,22 +1528,24 @@ static void trace_save_cmdline(struct task_struct *tsk)  		 * need to clear the map_pid_to_cmdline. Otherwise we  		 * would read the new comm for the old pid.  		 */ -		pid = map_cmdline_to_pid[idx]; +		pid = savedcmd->map_cmdline_to_pid[idx];  		if (pid != NO_CMDLINE_MAP) -			map_pid_to_cmdline[pid] = NO_CMDLINE_MAP; +			savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP; -		map_cmdline_to_pid[idx] = tsk->pid; -		map_pid_to_cmdline[tsk->pid] = idx; +		savedcmd->map_cmdline_to_pid[idx] = tsk->pid; +		savedcmd->map_pid_to_cmdline[tsk->pid] = idx; -		cmdline_idx = idx; +		savedcmd->cmdline_idx = idx;  	} -	memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN); +	set_cmdline(idx, tsk->comm);  	arch_spin_unlock(&trace_cmdline_lock); + +	return 1;  } -void trace_find_cmdline(int pid, char comm[]) +static void __trace_find_cmdline(int pid, char comm[])  {  	unsigned map; @@ -1468,13 +1564,19 @@ void trace_find_cmdline(int pid, char comm[])  		return;  	} -	preempt_disable(); -	arch_spin_lock(&trace_cmdline_lock); -	map = map_pid_to_cmdline[pid]; +	map = savedcmd->map_pid_to_cmdline[pid];  	if (map != NO_CMDLINE_MAP) -		strcpy(comm, saved_cmdlines[map]); +		strcpy(comm, get_saved_cmdlines(map));  	else  		strcpy(comm, "<...>"); +} + +void trace_find_cmdline(int pid, char comm[]) +{ +	preempt_disable(); +	arch_spin_lock(&trace_cmdline_lock); + +	__trace_find_cmdline(pid, comm);  	arch_spin_unlock(&trace_cmdline_lock);  	preempt_enable(); @@ -1488,9 +1590,8 @@ void tracing_record_cmdline(struct task_struct *tsk)  	if (!__this_cpu_read(trace_cmdline_save))  		return; -	__this_cpu_write(trace_cmdline_save, false); - -	trace_save_cmdline(tsk); +	if (trace_save_cmdline(tsk)) +		__this_cpu_write(trace_cmdline_save, false);  }  void @@ -1509,7 +1610,8 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,  #endif  		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |  		((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | -		(need_resched() ? TRACE_FLAG_NEED_RESCHED : 0); +		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) | +		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);  }  EXPORT_SYMBOL_GPL(tracing_generic_entry_update); @@ -1558,15 +1660,31 @@ void trace_buffer_unlock_commit(struct ring_buffer *buffer,  }  EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit); +static struct ring_buffer *temp_buffer; +  struct ring_buffer_event *  trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,  			  struct ftrace_event_file *ftrace_file,  			  int type, unsigned long len,  			  unsigned long flags, int pc)  { +	struct ring_buffer_event *entry; +  	*current_rb = ftrace_file->tr->trace_buffer.buffer; -	return trace_buffer_lock_reserve(*current_rb, +	entry = trace_buffer_lock_reserve(*current_rb,  					 type, len, flags, pc); +	/* +	 * If tracing is off, but we have triggers enabled +	 * we still need to look at the event data. Use the temp_buffer +	 * to store the trace event for the tigger to use. It's recusive +	 * safe and will not be recorded anywhere. +	 */ +	if (!entry && ftrace_file->flags & FTRACE_EVENT_FL_TRIGGER_COND) { +		*current_rb = temp_buffer; +		entry = trace_buffer_lock_reserve(*current_rb, +						  type, len, flags, pc); +	} +	return entry;  }  EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve); @@ -1630,7 +1748,7 @@ trace_function(struct trace_array *tr,  	entry->ip			= ip;  	entry->parent_ip		= parent_ip; -	if (!filter_check_discard(call, entry, buffer, event)) +	if (!call_filter_check_discard(call, entry, buffer, event))  		__buffer_unlock_commit(buffer, event);  } @@ -1676,7 +1794,7 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,  	 */  	barrier();  	if (use_stack == 1) { -		trace.entries		= &__get_cpu_var(ftrace_stack).calls[0]; +		trace.entries		= this_cpu_ptr(ftrace_stack.calls);  		trace.max_entries	= FTRACE_STACK_MAX_ENTRIES;  		if (regs) @@ -1714,7 +1832,7 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,  	entry->size = trace.nr_entries; -	if (!filter_check_discard(call, entry, buffer, event)) +	if (!call_filter_check_discard(call, entry, buffer, event))  		__buffer_unlock_commit(buffer, event);   out: @@ -1816,7 +1934,7 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)  	trace.entries		= entry->caller;  	save_stack_trace_user(&trace); -	if (!filter_check_discard(call, entry, buffer, event)) +	if (!call_filter_check_discard(call, entry, buffer, event))  		__buffer_unlock_commit(buffer, event);   out_drop_count: @@ -1925,7 +2043,21 @@ void trace_printk_init_buffers(void)  	if (alloc_percpu_trace_buffer())  		return; -	pr_info("ftrace: Allocated trace_printk buffers\n"); +	/* trace_printk() is for debug use only. Don't use it in production. */ + +	pr_warning("\n**********************************************************\n"); +	pr_warning("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n"); +	pr_warning("**                                                      **\n"); +	pr_warning("** trace_printk() being used. Allocating extra memory.  **\n"); +	pr_warning("**                                                      **\n"); +	pr_warning("** This means that this is a DEBUG kernel and it is     **\n"); +	pr_warning("** unsafe for produciton use.                           **\n"); +	pr_warning("**                                                      **\n"); +	pr_warning("** If you see this message and you are not debugging    **\n"); +	pr_warning("** the kernel, report this immediately to your vendor!  **\n"); +	pr_warning("**                                                      **\n"); +	pr_warning("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n"); +	pr_warning("**********************************************************\n");  	/* Expand the buffers to set size */  	tracing_update_buffers(); @@ -2008,7 +2140,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)  	entry->fmt			= fmt;  	memcpy(entry->buf, tbuffer, sizeof(u32) * len); -	if (!filter_check_discard(call, entry, buffer, event)) { +	if (!call_filter_check_discard(call, entry, buffer, event)) {  		__buffer_unlock_commit(buffer, event);  		ftrace_trace_stack(buffer, flags, 6, pc);  	} @@ -2063,7 +2195,7 @@ __trace_array_vprintk(struct ring_buffer *buffer,  	memcpy(&entry->buf, tbuffer, len);  	entry->buf[len] = '\0'; -	if (!filter_check_discard(call, entry, buffer, event)) { +	if (!call_filter_check_discard(call, entry, buffer, event)) {  		__buffer_unlock_commit(buffer, event);  		ftrace_trace_stack(buffer, flags, 6, pc);  	} @@ -2760,7 +2892,7 @@ static void show_snapshot_main_help(struct seq_file *m)  	seq_printf(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n");  	seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");  	seq_printf(m, "#                      Takes a snapshot of the main buffer.\n"); -	seq_printf(m, "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate)\n"); +	seq_printf(m, "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n");  	seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");  	seq_printf(m, "#                       is not a '0' or '1')\n");  } @@ -2964,6 +3096,11 @@ int tracing_open_generic(struct inode *inode, struct file *filp)  	return 0;  } +bool tracing_is_disabled(void) +{ +	return (tracing_disabled) ? true: false; +} +  /*   * Open and update trace_array ref count.   * Must have the current trace_array passed to it. @@ -3074,27 +3211,52 @@ static int tracing_open(struct inode *inode, struct file *file)  	return ret;  } +/* + * Some tracers are not suitable for instance buffers. + * A tracer is always available for the global array (toplevel) + * or if it explicitly states that it is. + */ +static bool +trace_ok_for_array(struct tracer *t, struct trace_array *tr) +{ +	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances; +} + +/* Find the next tracer that this trace array may use */ +static struct tracer * +get_tracer_for_array(struct trace_array *tr, struct tracer *t) +{ +	while (t && !trace_ok_for_array(t, tr)) +		t = t->next; + +	return t; +} +  static void *  t_next(struct seq_file *m, void *v, loff_t *pos)  { +	struct trace_array *tr = m->private;  	struct tracer *t = v;  	(*pos)++;  	if (t) -		t = t->next; +		t = get_tracer_for_array(tr, t->next);  	return t;  }  static void *t_start(struct seq_file *m, loff_t *pos)  { +	struct trace_array *tr = m->private;  	struct tracer *t;  	loff_t l = 0;  	mutex_lock(&trace_types_lock); -	for (t = trace_types; t && l < *pos; t = t_next(m, t, &l)) -		; + +	t = get_tracer_for_array(tr, trace_types); +	for (; t && l < *pos; t = t_next(m, t, &l)) +			;  	return t;  } @@ -3129,10 +3291,21 @@ static const struct seq_operations show_traces_seq_ops = {  static int show_traces_open(struct inode *inode, struct file *file)  { +	struct trace_array *tr = inode->i_private; +	struct seq_file *m; +	int ret; +  	if (tracing_disabled)  		return -ENODEV; -	return seq_open(file, &show_traces_seq_ops); +	ret = seq_open(file, &show_traces_seq_ops); +	if (ret) +		return ret; + +	m = file->private_data; +	m->private = tr; + +	return 0;  }  static ssize_t @@ -3142,19 +3315,23 @@ tracing_write_stub(struct file *filp, const char __user *ubuf,  	return count;  } -static loff_t tracing_seek(struct file *file, loff_t offset, int origin) +loff_t tracing_lseek(struct file *file, loff_t offset, int whence)  { +	int ret; +  	if (file->f_mode & FMODE_READ) -		return seq_lseek(file, offset, origin); +		ret = seq_lseek(file, offset, whence);  	else -		return 0; +		file->f_pos = ret = 0; + +	return ret;  }  static const struct file_operations tracing_fops = {  	.open		= tracing_open,  	.read		= seq_read,  	.write		= tracing_write_stub, -	.llseek		= tracing_seek, +	.llseek		= tracing_lseek,  	.release	= tracing_release,  }; @@ -3218,7 +3395,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,  	mutex_lock(&tracing_cpumask_update_lock);  	local_irq_disable(); -	arch_spin_lock(&ftrace_max_lock); +	arch_spin_lock(&tr->max_lock);  	for_each_tracing_cpu(cpu) {  		/*  		 * Increase/decrease the disabled counter if we are @@ -3235,7 +3412,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,  			ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);  		}  	} -	arch_spin_unlock(&ftrace_max_lock); +	arch_spin_unlock(&tr->max_lock);  	local_irq_enable();  	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new); @@ -3288,13 +3465,14 @@ static int tracing_trace_options_show(struct seq_file *m, void *v)  	return 0;  } -static int __set_tracer_option(struct tracer *trace, +static int __set_tracer_option(struct trace_array *tr,  			       struct tracer_flags *tracer_flags,  			       struct tracer_opt *opts, int neg)  { +	struct tracer *trace = tr->current_trace;  	int ret; -	ret = trace->set_flag(tracer_flags->val, opts->bit, !neg); +	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);  	if (ret)  		return ret; @@ -3306,8 +3484,9 @@ static int __set_tracer_option(struct tracer *trace,  }  /* Try to assign a tracer specific option */ -static int set_tracer_option(struct tracer *trace, char *cmp, int neg) +static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)  { +	struct tracer *trace = tr->current_trace;  	struct tracer_flags *tracer_flags = trace->flags;  	struct tracer_opt *opts = NULL;  	int i; @@ -3316,8 +3495,7 @@ static int set_tracer_option(struct tracer *trace, char *cmp, int neg)  		opts = &tracer_flags->opts[i];  		if (strcmp(cmp, opts->name) == 0) -			return __set_tracer_option(trace, trace->flags, -						   opts, neg); +			return __set_tracer_option(tr, trace->flags, opts, neg);  	}  	return -EINVAL; @@ -3340,7 +3518,7 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)  	/* Give the tracer a chance to approve the change */  	if (tr->current_trace->flag_changed) -		if (tr->current_trace->flag_changed(tr->current_trace, mask, !!enabled)) +		if (tr->current_trace->flag_changed(tr, mask, !!enabled))  			return -EINVAL;  	if (enabled) @@ -3389,7 +3567,7 @@ static int trace_set_options(struct trace_array *tr, char *option)  	/* If no option could be set, test the specific tracer options */  	if (!trace_options[i]) -		ret = set_tracer_option(tr->current_trace, cmp, neg); +		ret = set_tracer_option(tr, cmp, neg);  	mutex_unlock(&trace_types_lock); @@ -3474,60 +3652,106 @@ static const char readme_msg[] =  	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"  	"\t\t\t  Remove sub-buffer with rmdir\n"  	"  trace_options\t\t- Set format or modify how tracing happens\n" -	"\t\t\t  Disable an option by adding a suffix 'no' to the option name\n" +	"\t\t\t  Disable an option by adding a suffix 'no' to the\n" +	"\t\t\t  option name\n" +	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"  #ifdef CONFIG_DYNAMIC_FTRACE  	"\n  available_filter_functions - list of functions that can be filtered on\n" -	"  set_ftrace_filter\t- echo function name in here to only trace these functions\n" -	"            accepts: func_full_name, *func_end, func_begin*, *func_middle*\n" -	"            modules: Can select a group via module\n" -	"             Format: :mod:<module-name>\n" -	"             example: echo :mod:ext3 > set_ftrace_filter\n" -	"            triggers: a command to perform when function is hit\n" -	"              Format: <function>:<trigger>[:count]\n" -	"             trigger: traceon, traceoff\n" -	"                      enable_event:<system>:<event>\n" -	"                      disable_event:<system>:<event>\n" +	"  set_ftrace_filter\t- echo function name in here to only trace these\n" +	"\t\t\t  functions\n" +	"\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n" +	"\t     modules: Can select a group via module\n" +	"\t      Format: :mod:<module-name>\n" +	"\t     example: echo :mod:ext3 > set_ftrace_filter\n" +	"\t    triggers: a command to perform when function is hit\n" +	"\t      Format: <function>:<trigger>[:count]\n" +	"\t     trigger: traceon, traceoff\n" +	"\t\t      enable_event:<system>:<event>\n" +	"\t\t      disable_event:<system>:<event>\n"  #ifdef CONFIG_STACKTRACE -	"                      stacktrace\n" +	"\t\t      stacktrace\n"  #endif  #ifdef CONFIG_TRACER_SNAPSHOT -	"                      snapshot\n" +	"\t\t      snapshot\n"  #endif -	"             example: echo do_fault:traceoff > set_ftrace_filter\n" -	"                      echo do_trap:traceoff:3 > set_ftrace_filter\n" -	"             The first one will disable tracing every time do_fault is hit\n" -	"             The second will disable tracing at most 3 times when do_trap is hit\n" -	"               The first time do trap is hit and it disables tracing, the counter\n" -	"               will decrement to 2. If tracing is already disabled, the counter\n" -	"               will not decrement. It only decrements when the trigger did work\n" -	"             To remove trigger without count:\n" -	"               echo '!<function>:<trigger> > set_ftrace_filter\n" -	"             To remove trigger with a count:\n" -	"               echo '!<function>:<trigger>:0 > set_ftrace_filter\n" +	"\t\t      dump\n" +	"\t\t      cpudump\n" +	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n" +	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n" +	"\t     The first one will disable tracing every time do_fault is hit\n" +	"\t     The second will disable tracing at most 3 times when do_trap is hit\n" +	"\t       The first time do trap is hit and it disables tracing, the\n" +	"\t       counter will decrement to 2. If tracing is already disabled,\n" +	"\t       the counter will not decrement. It only decrements when the\n" +	"\t       trigger did work\n" +	"\t     To remove trigger without count:\n" +	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n" +	"\t     To remove trigger with a count:\n" +	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"  	"  set_ftrace_notrace\t- echo function name in here to never trace.\n" -	"            accepts: func_full_name, *func_end, func_begin*, *func_middle*\n" -	"            modules: Can select a group via module command :mod:\n" -	"            Does not accept triggers\n" +	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n" +	"\t    modules: Can select a group via module command :mod:\n" +	"\t    Does not accept triggers\n"  #endif /* CONFIG_DYNAMIC_FTRACE */  #ifdef CONFIG_FUNCTION_TRACER -	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids (function)\n" +	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n" +	"\t\t    (function)\n"  #endif  #ifdef CONFIG_FUNCTION_GRAPH_TRACER  	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"  	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"  #endif  #ifdef CONFIG_TRACER_SNAPSHOT -	"\n  snapshot\t\t- Like 'trace' but shows the content of the static snapshot buffer\n" -	"\t\t\t  Read the contents for more information\n" +	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n" +	"\t\t\t  snapshot buffer. Read the contents for more\n" +	"\t\t\t  information\n"  #endif  #ifdef CONFIG_STACK_TRACER  	"  stack_trace\t\t- Shows the max stack trace when active\n"  	"  stack_max_size\t- Shows current max stack size that was traced\n" -	"\t\t\t  Write into this file to reset the max size (trigger a new trace)\n" +	"\t\t\t  Write into this file to reset the max size (trigger a\n" +	"\t\t\t  new trace)\n"  #ifdef CONFIG_DYNAMIC_FTRACE -	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace traces\n" +	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n" +	"\t\t\t  traces\n"  #endif  #endif /* CONFIG_STACK_TRACER */ +	"  events/\t\t- Directory containing all trace event subsystems:\n" +	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n" +	"  events/<system>/\t- Directory containing all trace events for <system>:\n" +	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n" +	"\t\t\t  events\n" +	"      filter\t\t- If set, only events passing filter are traced\n" +	"  events/<system>/<event>/\t- Directory containing control files for\n" +	"\t\t\t  <event>:\n" +	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n" +	"      filter\t\t- If set, only events passing filter are traced\n" +	"      trigger\t\t- If set, a command to perform when event is hit\n" +	"\t    Format: <trigger>[:count][if <filter>]\n" +	"\t   trigger: traceon, traceoff\n" +	"\t            enable_event:<system>:<event>\n" +	"\t            disable_event:<system>:<event>\n" +#ifdef CONFIG_STACKTRACE +	"\t\t    stacktrace\n" +#endif +#ifdef CONFIG_TRACER_SNAPSHOT +	"\t\t    snapshot\n" +#endif +	"\t   example: echo traceoff > events/block/block_unplug/trigger\n" +	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n" +	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n" +	"\t                  events/block/block_unplug/trigger\n" +	"\t   The first disables tracing every time block_unplug is hit.\n" +	"\t   The second disables tracing the first 3 times block_unplug is hit.\n" +	"\t   The third enables the kmalloc event the first 3 times block_unplug\n" +	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n" +	"\t   Like function triggers, the counter is only decremented if it\n" +	"\t    enabled or disabled tracing.\n" +	"\t   To remove a trigger without a count:\n" +	"\t     echo '!<trigger> > <system>/<event>/trigger\n" +	"\t   To remove a trigger with a count:\n" +	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n" +	"\t   Filters can be ignored when removing a trigger.\n"  ;  static ssize_t @@ -3544,55 +3768,153 @@ static const struct file_operations tracing_readme_fops = {  	.llseek		= generic_file_llseek,  }; +static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos) +{ +	unsigned int *ptr = v; + +	if (*pos || m->count) +		ptr++; + +	(*pos)++; + +	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num]; +	     ptr++) { +		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP) +			continue; + +		return ptr; +	} + +	return NULL; +} + +static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos) +{ +	void *v; +	loff_t l = 0; + +	preempt_disable(); +	arch_spin_lock(&trace_cmdline_lock); + +	v = &savedcmd->map_cmdline_to_pid[0]; +	while (l <= *pos) { +		v = saved_cmdlines_next(m, v, &l); +		if (!v) +			return NULL; +	} + +	return v; +} + +static void saved_cmdlines_stop(struct seq_file *m, void *v) +{ +	arch_spin_unlock(&trace_cmdline_lock); +	preempt_enable(); +} + +static int saved_cmdlines_show(struct seq_file *m, void *v) +{ +	char buf[TASK_COMM_LEN]; +	unsigned int *pid = v; + +	__trace_find_cmdline(*pid, buf); +	seq_printf(m, "%d %s\n", *pid, buf); +	return 0; +} + +static const struct seq_operations tracing_saved_cmdlines_seq_ops = { +	.start		= saved_cmdlines_start, +	.next		= saved_cmdlines_next, +	.stop		= saved_cmdlines_stop, +	.show		= saved_cmdlines_show, +}; + +static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp) +{ +	if (tracing_disabled) +		return -ENODEV; + +	return seq_open(filp, &tracing_saved_cmdlines_seq_ops); +} + +static const struct file_operations tracing_saved_cmdlines_fops = { +	.open		= tracing_saved_cmdlines_open, +	.read		= seq_read, +	.llseek		= seq_lseek, +	.release	= seq_release, +}; +  static ssize_t -tracing_saved_cmdlines_read(struct file *file, char __user *ubuf, -				size_t cnt, loff_t *ppos) +tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf, +				 size_t cnt, loff_t *ppos)  { -	char *buf_comm; -	char *file_buf; -	char *buf; -	int len = 0; -	int pid; -	int i; +	char buf[64]; +	int r; + +	arch_spin_lock(&trace_cmdline_lock); +	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num); +	arch_spin_unlock(&trace_cmdline_lock); -	file_buf = kmalloc(SAVED_CMDLINES*(16+TASK_COMM_LEN), GFP_KERNEL); -	if (!file_buf) +	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); +} + +static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s) +{ +	kfree(s->saved_cmdlines); +	kfree(s->map_cmdline_to_pid); +	kfree(s); +} + +static int tracing_resize_saved_cmdlines(unsigned int val) +{ +	struct saved_cmdlines_buffer *s, *savedcmd_temp; + +	s = kmalloc(sizeof(*s), GFP_KERNEL); +	if (!s)  		return -ENOMEM; -	buf_comm = kmalloc(TASK_COMM_LEN, GFP_KERNEL); -	if (!buf_comm) { -		kfree(file_buf); +	if (allocate_cmdlines_buffer(val, s) < 0) { +		kfree(s);  		return -ENOMEM;  	} -	buf = file_buf; +	arch_spin_lock(&trace_cmdline_lock); +	savedcmd_temp = savedcmd; +	savedcmd = s; +	arch_spin_unlock(&trace_cmdline_lock); +	free_saved_cmdlines_buffer(savedcmd_temp); + +	return 0; +} -	for (i = 0; i < SAVED_CMDLINES; i++) { -		int r; +static ssize_t +tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf, +				  size_t cnt, loff_t *ppos) +{ +	unsigned long val; +	int ret; -		pid = map_cmdline_to_pid[i]; -		if (pid == -1 || pid == NO_CMDLINE_MAP) -			continue; +	ret = kstrtoul_from_user(ubuf, cnt, 10, &val); +	if (ret) +		return ret; -		trace_find_cmdline(pid, buf_comm); -		r = sprintf(buf, "%d %s\n", pid, buf_comm); -		buf += r; -		len += r; -	} +	/* must have at least 1 entry or less than PID_MAX_DEFAULT */ +	if (!val || val > PID_MAX_DEFAULT) +		return -EINVAL; -	len = simple_read_from_buffer(ubuf, cnt, ppos, -				      file_buf, len); +	ret = tracing_resize_saved_cmdlines((unsigned int)val); +	if (ret < 0) +		return ret; -	kfree(file_buf); -	kfree(buf_comm); +	*ppos += cnt; -	return len; +	return cnt;  } -static const struct file_operations tracing_saved_cmdlines_fops = { -    .open       = tracing_open_generic, -    .read       = tracing_saved_cmdlines_read, -    .llseek	= generic_file_llseek, +static const struct file_operations tracing_saved_cmdlines_size_fops = { +	.open		= tracing_open_generic, +	.read		= tracing_saved_cmdlines_size_read, +	.write		= tracing_saved_cmdlines_size_write,  };  static ssize_t @@ -3775,10 +4097,26 @@ create_trace_option_files(struct trace_array *tr, struct tracer *tracer);  static void  destroy_trace_option_files(struct trace_option_dentry *topts); -static int tracing_set_tracer(const char *buf) +/* + * Used to clear out the tracer before deletion of an instance. + * Must have trace_types_lock held. + */ +static void tracing_set_nop(struct trace_array *tr) +{ +	if (tr->current_trace == &nop_trace) +		return; +	 +	tr->current_trace->enabled--; + +	if (tr->current_trace->reset) +		tr->current_trace->reset(tr); + +	tr->current_trace = &nop_trace; +} + +static int tracing_set_tracer(struct trace_array *tr, const char *buf)  {  	static struct trace_option_dentry *topts; -	struct trace_array *tr = &global_trace;  	struct tracer *t;  #ifdef CONFIG_TRACER_MAX_TRACE  	bool had_max_tr; @@ -3806,9 +4144,15 @@ static int tracing_set_tracer(const char *buf)  	if (t == tr->current_trace)  		goto out; +	/* Some tracers are only allowed for the top level buffer */ +	if (!trace_ok_for_array(t, tr)) { +		ret = -EINVAL; +		goto out; +	} +  	trace_branch_disable(); -	tr->current_trace->enabled = false; +	tr->current_trace->enabled--;  	if (tr->current_trace->reset)  		tr->current_trace->reset(tr); @@ -3831,9 +4175,11 @@ static int tracing_set_tracer(const char *buf)  		free_snapshot(tr);  	}  #endif -	destroy_trace_option_files(topts); - -	topts = create_trace_option_files(tr, t); +	/* Currently, only the top instance has options */ +	if (tr->flags & TRACE_ARRAY_FL_GLOBAL) { +		destroy_trace_option_files(topts); +		topts = create_trace_option_files(tr, t); +	}  #ifdef CONFIG_TRACER_MAX_TRACE  	if (t->use_max_tr && !had_max_tr) { @@ -3850,7 +4196,7 @@ static int tracing_set_tracer(const char *buf)  	}  	tr->current_trace = t; -	tr->current_trace->enabled = true; +	tr->current_trace->enabled++;  	trace_branch_enable(tr);   out:  	mutex_unlock(&trace_types_lock); @@ -3862,6 +4208,7 @@ static ssize_t  tracing_set_trace_write(struct file *filp, const char __user *ubuf,  			size_t cnt, loff_t *ppos)  { +	struct trace_array *tr = filp->private_data;  	char buf[MAX_TRACER_SIZE+1];  	int i;  	size_t ret; @@ -3881,7 +4228,7 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,  	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)  		buf[i] = 0; -	err = tracing_set_tracer(buf); +	err = tracing_set_tracer(tr, buf);  	if (err)  		return err; @@ -4039,29 +4386,11 @@ tracing_poll_pipe(struct file *filp, poll_table *poll_table)  	return trace_poll(iter, filp, poll_table);  } -/* - * This is a make-shift waitqueue. - * A tracer might use this callback on some rare cases: - * - *  1) the current tracer might hold the runqueue lock when it wakes up - *     a reader, hence a deadlock (sched, function, and function graph tracers) - *  2) the function tracers, trace all functions, we don't want - *     the overhead of calling wake_up and friends - *     (and tracing them too) - * - *     Anyway, this is really very primitive wakeup. - */ -void poll_wait_pipe(struct trace_iterator *iter) -{ -	set_current_state(TASK_INTERRUPTIBLE); -	/* sleep for 100 msecs, and try again. */ -	schedule_timeout(HZ / 10); -} -  /* Must be called with trace_types_lock mutex held. */  static int tracing_wait_pipe(struct file *filp)  {  	struct trace_iterator *iter = filp->private_data; +	int ret;  	while (trace_empty(iter)) { @@ -4069,15 +4398,6 @@ static int tracing_wait_pipe(struct file *filp)  			return -EAGAIN;  		} -		mutex_unlock(&iter->mutex); - -		iter->trace->wait_pipe(iter); - -		mutex_lock(&iter->mutex); - -		if (signal_pending(current)) -			return -EINTR; -  		/*  		 * We block until we read something and tracing is disabled.  		 * We still block if tracing is disabled, but we have never @@ -4089,6 +4409,18 @@ static int tracing_wait_pipe(struct file *filp)  		 */  		if (!tracing_is_on() && iter->pos)  			break; + +		mutex_unlock(&iter->mutex); + +		ret = wait_on_pipe(iter); + +		mutex_lock(&iter->mutex); + +		if (ret) +			return ret; + +		if (signal_pending(current)) +			return -EINTR;  	}  	return 1; @@ -4198,12 +4530,6 @@ out:  	return sret;  } -static void tracing_pipe_buf_release(struct pipe_inode_info *pipe, -				     struct pipe_buffer *buf) -{ -	__free_page(buf->page); -} -  static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,  				     unsigned int idx)  { @@ -4212,10 +4538,8 @@ static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,  static const struct pipe_buf_operations tracing_pipe_buf_ops = {  	.can_merge		= 0, -	.map			= generic_pipe_buf_map, -	.unmap			= generic_pipe_buf_unmap,  	.confirm		= generic_pipe_buf_confirm, -	.release		= tracing_pipe_buf_release, +	.release		= generic_pipe_buf_release,  	.steal			= generic_pipe_buf_steal,  	.get			= generic_pipe_buf_get,  }; @@ -4308,7 +4632,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,  	trace_access_lock(iter->cpu_file);  	/* Fill as many pages as possible. */ -	for (i = 0, rem = len; i < pipe->buffers && rem; i++) { +	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {  		spd.pages[i] = alloc_page(GFP_KERNEL);  		if (!spd.pages[i])  			break; @@ -4595,25 +4919,10 @@ static int tracing_clock_show(struct seq_file *m, void *v)  	return 0;  } -static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, -				   size_t cnt, loff_t *fpos) +static int tracing_set_clock(struct trace_array *tr, const char *clockstr)  { -	struct seq_file *m = filp->private_data; -	struct trace_array *tr = m->private; -	char buf[64]; -	const char *clockstr;  	int i; -	if (cnt >= sizeof(buf)) -		return -EINVAL; - -	if (copy_from_user(&buf, ubuf, cnt)) -		return -EFAULT; - -	buf[cnt] = 0; - -	clockstr = strstrip(buf); -  	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {  		if (strcmp(trace_clocks[i].name, clockstr) == 0)  			break; @@ -4641,6 +4950,32 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,  	mutex_unlock(&trace_types_lock); +	return 0; +} + +static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, +				   size_t cnt, loff_t *fpos) +{ +	struct seq_file *m = filp->private_data; +	struct trace_array *tr = m->private; +	char buf[64]; +	const char *clockstr; +	int ret; + +	if (cnt >= sizeof(buf)) +		return -EINVAL; + +	if (copy_from_user(&buf, ubuf, cnt)) +		return -EFAULT; + +	buf[cnt] = 0; + +	clockstr = strstrip(buf); + +	ret = tracing_set_clock(tr, clockstr); +	if (ret) +		return ret; +  	*fpos += cnt;  	return cnt; @@ -4899,7 +5234,7 @@ static const struct file_operations snapshot_fops = {  	.open		= tracing_snapshot_open,  	.read		= seq_read,  	.write		= tracing_snapshot_write, -	.llseek		= tracing_seek, +	.llseek		= tracing_lseek,  	.release	= tracing_snapshot_release,  }; @@ -5008,8 +5343,12 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,  				goto out_unlock;  			}  			mutex_unlock(&trace_types_lock); -			iter->trace->wait_pipe(iter); +			ret = wait_on_pipe(iter);  			mutex_lock(&trace_types_lock); +			if (ret) { +				size = ret; +				goto out_unlock; +			}  			if (signal_pending(current)) {  				size = -EINTR;  				goto out_unlock; @@ -5090,8 +5429,6 @@ static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,  /* Pipe buffer operations for a buffer. */  static const struct pipe_buf_operations buffer_pipe_buf_ops = {  	.can_merge		= 0, -	.map			= generic_pipe_buf_map, -	.unmap			= generic_pipe_buf_unmap,  	.confirm		= generic_pipe_buf_confirm,  	.release		= buffer_pipe_buf_release,  	.steal			= generic_pipe_buf_steal, @@ -5167,7 +5504,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,  	trace_access_lock(iter->cpu_file);  	entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file); -	for (i = 0; i < pipe->buffers && len && entries; i++, len -= PAGE_SIZE) { +	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {  		struct page *page;  		int r; @@ -5221,8 +5558,10 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,  			goto out;  		}  		mutex_unlock(&trace_types_lock); -		iter->trace->wait_pipe(iter); +		ret = wait_on_pipe(iter);  		mutex_lock(&trace_types_lock); +		if (ret) +			goto out;  		if (signal_pending(current)) {  			ret = -EINTR;  			goto out; @@ -5454,12 +5793,12 @@ static struct ftrace_func_command ftrace_snapshot_cmd = {  	.func			= ftrace_trace_snapshot_callback,  }; -static int register_snapshot_cmd(void) +static __init int register_snapshot_cmd(void)  {  	return register_ftrace_command(&ftrace_snapshot_cmd);  }  #else -static inline int register_snapshot_cmd(void) { return 0; } +static inline __init int register_snapshot_cmd(void) { return 0; }  #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */  struct dentry *tracing_init_dentry_tr(struct trace_array *tr) @@ -5601,7 +5940,7 @@ trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,  	if (!!(topt->flags->val & topt->opt->bit) != val) {  		mutex_lock(&trace_types_lock); -		ret = __set_tracer_option(topt->tr->current_trace, topt->flags, +		ret = __set_tracer_option(topt->tr, topt->flags,  					  topt->opt, !val);  		mutex_unlock(&trace_types_lock);  		if (ret) @@ -5869,6 +6208,8 @@ allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size  	rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0; +	buf->tr = tr; +  	buf->buffer = ring_buffer_alloc(size, rb_flags);  	if (!buf->buffer)  		return -ENOMEM; @@ -5913,6 +6254,28 @@ static int allocate_trace_buffers(struct trace_array *tr, int size)  	return 0;  } +static void free_trace_buffer(struct trace_buffer *buf) +{ +	if (buf->buffer) { +		ring_buffer_free(buf->buffer); +		buf->buffer = NULL; +		free_percpu(buf->data); +		buf->data = NULL; +	} +} + +static void free_trace_buffers(struct trace_array *tr) +{ +	if (!tr) +		return; + +	free_trace_buffer(&tr->trace_buffer); + +#ifdef CONFIG_TRACER_MAX_TRACE +	free_trace_buffer(&tr->max_buffer); +#endif +} +  static int new_instance_create(const char *name)  {  	struct trace_array *tr; @@ -5942,6 +6305,8 @@ static int new_instance_create(const char *name)  	raw_spin_lock_init(&tr->start_lock); +	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; +  	tr->current_trace = &nop_trace;  	INIT_LIST_HEAD(&tr->systems); @@ -5969,8 +6334,7 @@ static int new_instance_create(const char *name)  	return 0;   out_free_tr: -	if (tr->trace_buffer.buffer) -		ring_buffer_free(tr->trace_buffer.buffer); +	free_trace_buffers(tr);  	free_cpumask_var(tr->tracing_cpumask);  	kfree(tr->name);  	kfree(tr); @@ -6006,10 +6370,11 @@ static int instance_delete(const char *name)  	list_del(&tr->list); +	tracing_set_nop(tr);  	event_trace_del_tracer(tr); +	ftrace_destroy_function_files(tr);  	debugfs_remove_recursive(tr->dir); -	free_percpu(tr->trace_buffer.data); -	ring_buffer_free(tr->trace_buffer.buffer); +	free_trace_buffers(tr);  	kfree(tr->name);  	kfree(tr); @@ -6101,6 +6466,12 @@ init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)  {  	int cpu; +	trace_create_file("available_tracers", 0444, d_tracer, +			tr, &show_traces_fops); + +	trace_create_file("current_tracer", 0644, d_tracer, +			tr, &set_tracer_fops); +  	trace_create_file("tracing_cpumask", 0644, d_tracer,  			  tr, &tracing_cpumask_fops); @@ -6131,6 +6502,14 @@ init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)  	trace_create_file("tracing_on", 0644, d_tracer,  			  tr, &rb_simple_fops); +#ifdef CONFIG_TRACER_MAX_TRACE +	trace_create_file("tracing_max_latency", 0644, d_tracer, +			&tr->max_latency, &tracing_max_lat_fops); +#endif + +	if (ftrace_create_function_files(tr, d_tracer)) +		WARN(1, "Could not allocate function filter files"); +  #ifdef CONFIG_TRACER_SNAPSHOT  	trace_create_file("snapshot", 0644, d_tracer,  			  tr, &snapshot_fops); @@ -6153,17 +6532,6 @@ static __init int tracer_init_debugfs(void)  	init_tracer_debugfs(&global_trace, d_tracer); -	trace_create_file("available_tracers", 0444, d_tracer, -			&global_trace, &show_traces_fops); - -	trace_create_file("current_tracer", 0644, d_tracer, -			&global_trace, &set_tracer_fops); - -#ifdef CONFIG_TRACER_MAX_TRACE -	trace_create_file("tracing_max_latency", 0644, d_tracer, -			&tracing_max_latency, &tracing_max_lat_fops); -#endif -  	trace_create_file("tracing_thresh", 0644, d_tracer,  			&tracing_thresh, &tracing_max_lat_fops); @@ -6173,6 +6541,9 @@ static __init int tracer_init_debugfs(void)  	trace_create_file("saved_cmdlines", 0444, d_tracer,  			NULL, &tracing_saved_cmdlines_fops); +	trace_create_file("saved_cmdlines_size", 0644, d_tracer, +			  NULL, &tracing_saved_cmdlines_size_fops); +  #ifdef CONFIG_DYNAMIC_FTRACE  	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,  			&ftrace_update_tot_cnt, &tracing_dyn_info_fops); @@ -6253,6 +6624,17 @@ void trace_init_global_iter(struct trace_iterator *iter)  	iter->trace = iter->tr->current_trace;  	iter->cpu_file = RING_BUFFER_ALL_CPUS;  	iter->trace_buffer = &global_trace.trace_buffer; + +	if (iter->trace && iter->trace->open) +		iter->trace->open(iter); + +	/* Annotate start of buffers if we had overruns */ +	if (ring_buffer_overruns(iter->trace_buffer->buffer)) +		iter->iter_flags |= TRACE_FILE_ANNOTATE; + +	/* Output in nanoseconds only if we are using a clock in nanoseconds. */ +	if (trace_clocks[iter->tr->clock_id].in_ns) +		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;  }  void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) @@ -6393,17 +6775,30 @@ __init static int tracer_alloc_buffers(void)  	raw_spin_lock_init(&global_trace.start_lock); +	/* Used for event triggers */ +	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE); +	if (!temp_buffer) +		goto out_free_cpumask; + +	if (trace_create_savedcmd() < 0) +		goto out_free_temp_buffer; +  	/* TODO: make the number of buffers hot pluggable with CPUS */  	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {  		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");  		WARN_ON(1); -		goto out_free_cpumask; +		goto out_free_savedcmd;  	}  	if (global_trace.buffer_disabled)  		tracing_off(); -	trace_init_cmdlines(); +	if (trace_boot_clock) { +		ret = tracing_set_clock(&global_trace, trace_boot_clock); +		if (ret < 0) +			pr_warning("Trace clock %s not defined, going back to default\n", +				   trace_boot_clock); +	}  	/*  	 * register_tracer() might reference current_trace, so it @@ -6412,6 +6807,10 @@ __init static int tracer_alloc_buffers(void)  	 */  	global_trace.current_trace = &nop_trace; +	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; + +	ftrace_init_global_array_ops(&global_trace); +  	register_tracer(&nop_trace);  	/* All seems OK, enable tracing */ @@ -6439,11 +6838,11 @@ __init static int tracer_alloc_buffers(void)  	return 0; +out_free_savedcmd: +	free_saved_cmdlines_buffer(savedcmd); +out_free_temp_buffer: +	ring_buffer_free(temp_buffer);  out_free_cpumask: -	free_percpu(global_trace.trace_buffer.data); -#ifdef CONFIG_TRACER_MAX_TRACE -	free_percpu(global_trace.max_buffer.data); -#endif  	free_cpumask_var(global_trace.tracing_cpumask);  out_free_buffer_mask:  	free_cpumask_var(tracing_buffer_mask); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 10c86fb7a2b..9258f5a815d 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1,3 +1,4 @@ +  #ifndef _LINUX_KERNEL_TRACE_H  #define _LINUX_KERNEL_TRACE_H @@ -12,6 +13,7 @@  #include <linux/hw_breakpoint.h>  #include <linux/trace_seq.h>  #include <linux/ftrace_event.h> +#include <linux/compiler.h>  #ifdef CONFIG_FTRACE_SYSCALLS  #include <asm/unistd.h>		/* For NR_SYSCALLS	     */ @@ -124,6 +126,7 @@ enum trace_flag_type {  	TRACE_FLAG_NEED_RESCHED		= 0x04,  	TRACE_FLAG_HARDIRQ		= 0x08,  	TRACE_FLAG_SOFTIRQ		= 0x10, +	TRACE_FLAG_PREEMPT_RESCHED	= 0x20,  };  #define TRACE_BUF_SIZE		1024 @@ -187,13 +190,28 @@ struct trace_array {  	 */  	struct trace_buffer	max_buffer;  	bool			allocated_snapshot; +	unsigned long		max_latency;  #endif +	/* +	 * max_lock is used to protect the swapping of buffers +	 * when taking a max snapshot. The buffers themselves are +	 * protected by per_cpu spinlocks. But the action of the swap +	 * needs its own lock. +	 * +	 * This is defined as a arch_spinlock_t in order to help +	 * with performance when lockdep debugging is enabled. +	 * +	 * It is also used in other places outside the update_max_tr +	 * so it needs to be defined outside of the +	 * CONFIG_TRACER_MAX_TRACE. +	 */ +	arch_spinlock_t		max_lock;  	int			buffer_disabled;  #ifdef CONFIG_FTRACE_SYSCALLS  	int			sys_refcount_enter;  	int			sys_refcount_exit; -	DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls); -	DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls); +	struct ftrace_event_file __rcu *enter_syscall_files[NR_syscalls]; +	struct ftrace_event_file __rcu *exit_syscall_files[NR_syscalls];  #endif  	int			stop_count;  	int			clock_id; @@ -208,6 +226,11 @@ struct trace_array {  	struct list_head	events;  	cpumask_var_t		tracing_cpumask; /* only trace on set CPUs */  	int			ref; +#ifdef CONFIG_FUNCTION_TRACER +	struct ftrace_ops	*ops; +	/* function tracing enabled */ +	int			function_enabled; +#endif  };  enum { @@ -229,6 +252,9 @@ static inline struct trace_array *top_trace_array(void)  {  	struct trace_array *tr; +	if (list_empty(&ftrace_trace_arrays)) +		return NULL; +  	tr = list_entry(ftrace_trace_arrays.prev,  			typeof(*tr), list);  	WARN_ON(!(tr->flags & TRACE_ARRAY_FL_GLOBAL)); @@ -315,7 +341,6 @@ struct tracer_flags {   * @stop: called when tracing is paused (echo 0 > tracing_enabled)   * @open: called when the trace file is opened   * @pipe_open: called when the trace_pipe file is opened - * @wait_pipe: override how the user waits for traces on trace_pipe   * @close: called when the trace file is released   * @pipe_close: called when the trace_pipe file is released   * @read: override the default read callback on trace_pipe @@ -334,7 +359,6 @@ struct tracer {  	void			(*stop)(struct trace_array *tr);  	void			(*open)(struct trace_iterator *iter);  	void			(*pipe_open)(struct trace_iterator *iter); -	void			(*wait_pipe)(struct trace_iterator *iter);  	void			(*close)(struct trace_iterator *iter);  	void			(*pipe_close)(struct trace_iterator *iter);  	ssize_t			(*read)(struct trace_iterator *iter, @@ -353,14 +377,16 @@ struct tracer {  	void			(*print_header)(struct seq_file *m);  	enum print_line_t	(*print_line)(struct trace_iterator *iter);  	/* If you handled the flag setting, return 0 */ -	int			(*set_flag)(u32 old_flags, u32 bit, int set); +	int			(*set_flag)(struct trace_array *tr, +					    u32 old_flags, u32 bit, int set);  	/* Return 0 if OK with change, else return non-zero */ -	int			(*flag_changed)(struct tracer *tracer, +	int			(*flag_changed)(struct trace_array *tr,  						u32 mask, int set);  	struct tracer		*next;  	struct tracer_flags	*flags; +	int			enabled;  	bool			print_max; -	bool			enabled; +	bool			allow_instances;  #ifdef CONFIG_TRACER_MAX_TRACE  	bool			use_max_tr;  #endif @@ -406,13 +432,7 @@ enum {  	TRACE_FTRACE_IRQ_BIT,  	TRACE_FTRACE_SIRQ_BIT, -	/* GLOBAL_BITs must be greater than FTRACE_BITs */ -	TRACE_GLOBAL_BIT, -	TRACE_GLOBAL_NMI_BIT, -	TRACE_GLOBAL_IRQ_BIT, -	TRACE_GLOBAL_SIRQ_BIT, - -	/* INTERNAL_BITs must be greater than GLOBAL_BITs */ +	/* INTERNAL_BITs must be greater than FTRACE_BITs */  	TRACE_INTERNAL_BIT,  	TRACE_INTERNAL_NMI_BIT,  	TRACE_INTERNAL_IRQ_BIT, @@ -439,9 +459,6 @@ enum {  #define TRACE_FTRACE_START	TRACE_FTRACE_BIT  #define TRACE_FTRACE_MAX	((1 << (TRACE_FTRACE_START + TRACE_CONTEXT_BITS)) - 1) -#define TRACE_GLOBAL_START	TRACE_GLOBAL_BIT -#define TRACE_GLOBAL_MAX	((1 << (TRACE_GLOBAL_START + TRACE_CONTEXT_BITS)) - 1) -  #define TRACE_LIST_START	TRACE_INTERNAL_BIT  #define TRACE_LIST_MAX		((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1) @@ -514,6 +531,7 @@ void tracing_reset_online_cpus(struct trace_buffer *buf);  void tracing_reset_current(int cpu);  void tracing_reset_all_online_cpus(void);  int tracing_open_generic(struct inode *inode, struct file *filp); +bool tracing_is_disabled(void);  struct dentry *trace_create_file(const char *name,  				 umode_t mode,  				 struct dentry *parent, @@ -549,8 +567,6 @@ void trace_init_global_iter(struct trace_iterator *iter);  void tracing_iter_reset(struct trace_iterator *iter, int cpu); -void poll_wait_pipe(struct trace_iterator *iter); -  void tracing_sched_switch_trace(struct trace_array *tr,  				struct task_struct *prev,  				struct task_struct *next, @@ -585,6 +601,8 @@ void tracing_start_sched_switch_record(void);  int register_tracer(struct tracer *type);  int is_tracing_stopped(void); +loff_t tracing_lseek(struct file *file, loff_t offset, int whence); +  extern cpumask_var_t __read_mostly tracing_buffer_mask;  #define for_each_tracing_cpu(cpu)	\ @@ -595,8 +613,6 @@ extern unsigned long nsecs_to_usecs(unsigned long nsecs);  extern unsigned long tracing_thresh;  #ifdef CONFIG_TRACER_MAX_TRACE -extern unsigned long tracing_max_latency; -  void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);  void update_max_tr_single(struct trace_array *tr,  			  struct task_struct *tsk, int cpu); @@ -711,6 +727,10 @@ extern unsigned long trace_flags;  #define TRACE_GRAPH_PRINT_PROC          0x8  #define TRACE_GRAPH_PRINT_DURATION      0x10  #define TRACE_GRAPH_PRINT_ABS_TIME      0x20 +#define TRACE_GRAPH_PRINT_IRQS          0x40 +#define TRACE_GRAPH_PRINT_TAIL          0x80 +#define TRACE_GRAPH_PRINT_FILL_SHIFT	28 +#define TRACE_GRAPH_PRINT_FILL_MASK	(0x3 << TRACE_GRAPH_PRINT_FILL_SHIFT)  extern enum print_line_t  print_graph_function_flags(struct trace_iterator *iter, u32 flags); @@ -730,15 +750,16 @@ extern void __trace_graph_return(struct trace_array *tr,  #ifdef CONFIG_DYNAMIC_FTRACE  /* TODO: make this variable */  #define FTRACE_GRAPH_MAX_FUNCS		32 -extern int ftrace_graph_filter_enabled;  extern int ftrace_graph_count;  extern unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS]; +extern int ftrace_graph_notrace_count; +extern unsigned long ftrace_graph_notrace_funcs[FTRACE_GRAPH_MAX_FUNCS];  static inline int ftrace_graph_addr(unsigned long addr)  {  	int i; -	if (!ftrace_graph_filter_enabled) +	if (!ftrace_graph_count)  		return 1;  	for (i = 0; i < ftrace_graph_count; i++) { @@ -758,11 +779,31 @@ static inline int ftrace_graph_addr(unsigned long addr)  	return 0;  } + +static inline int ftrace_graph_notrace_addr(unsigned long addr) +{ +	int i; + +	if (!ftrace_graph_notrace_count) +		return 0; + +	for (i = 0; i < ftrace_graph_notrace_count; i++) { +		if (addr == ftrace_graph_notrace_funcs[i]) +			return 1; +	} + +	return 0; +}  #else  static inline int ftrace_graph_addr(unsigned long addr)  {  	return 1;  } + +static inline int ftrace_graph_notrace_addr(unsigned long addr) +{ +	return 0; +}  #endif /* CONFIG_DYNAMIC_FTRACE */  #else /* CONFIG_FUNCTION_GRAPH_TRACER */  static inline enum print_line_t @@ -784,13 +825,45 @@ static inline int ftrace_trace_task(struct task_struct *task)  	return test_tsk_trace_trace(task);  }  extern int ftrace_is_dead(void); +int ftrace_create_function_files(struct trace_array *tr, +				 struct dentry *parent); +void ftrace_destroy_function_files(struct trace_array *tr); +void ftrace_init_global_array_ops(struct trace_array *tr); +void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func); +void ftrace_reset_array_ops(struct trace_array *tr); +int using_ftrace_ops_list_func(void);  #else  static inline int ftrace_trace_task(struct task_struct *task)  {  	return 1;  }  static inline int ftrace_is_dead(void) { return 0; } -#endif +static inline int +ftrace_create_function_files(struct trace_array *tr, +			     struct dentry *parent) +{ +	return 0; +} +static inline void ftrace_destroy_function_files(struct trace_array *tr) { } +static inline __init void +ftrace_init_global_array_ops(struct trace_array *tr) { } +static inline void ftrace_reset_array_ops(struct trace_array *tr) { } +/* ftace_func_t type is not defined, use macro instead of static inline */ +#define ftrace_init_array_ops(tr, func) do { } while (0) +#endif /* CONFIG_FUNCTION_TRACER */ + +#if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_DYNAMIC_FTRACE) +void ftrace_create_filter_files(struct ftrace_ops *ops, +				struct dentry *parent); +void ftrace_destroy_filter_files(struct ftrace_ops *ops); +#else +/* + * The ops parameter passed in is usually undefined. + * This must be a macro. + */ +#define ftrace_create_filter_files(ops, parent) do { } while (0) +#define ftrace_destroy_filter_files(ops) do { } while (0) +#endif /* CONFIG_FUNCTION_TRACER && CONFIG_DYNAMIC_FTRACE */  int ftrace_event_is_function(struct ftrace_event_call *call); @@ -986,40 +1059,216 @@ struct filter_pred {  extern enum regex_type  filter_parse_regex(char *buff, int len, char **search, int *not); -extern void print_event_filter(struct ftrace_event_call *call, +extern void print_event_filter(struct ftrace_event_file *file,  			       struct trace_seq *s); -extern int apply_event_filter(struct ftrace_event_call *call, +extern int apply_event_filter(struct ftrace_event_file *file,  			      char *filter_string);  extern int apply_subsystem_event_filter(struct ftrace_subsystem_dir *dir,  					char *filter_string);  extern void print_subsystem_event_filter(struct event_subsystem *system,  					 struct trace_seq *s);  extern int filter_assign_type(const char *type); +extern int create_event_filter(struct ftrace_event_call *call, +			       char *filter_str, bool set_str, +			       struct event_filter **filterp); +extern void free_event_filter(struct event_filter *filter);  struct ftrace_event_field *  trace_find_event_field(struct ftrace_event_call *call, char *name); -static inline int -filter_check_discard(struct ftrace_event_call *call, void *rec, -		     struct ring_buffer *buffer, -		     struct ring_buffer_event *event) -{ -	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) && -	    !filter_match_preds(call->filter, rec)) { -		ring_buffer_discard_commit(buffer, event); -		return 1; -	} - -	return 0; -} -  extern void trace_event_enable_cmd_record(bool enable);  extern int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr);  extern int event_trace_del_tracer(struct trace_array *tr); +extern struct ftrace_event_file *find_event_file(struct trace_array *tr, +						 const char *system, +						 const char *event); + +static inline void *event_file_data(struct file *filp) +{ +	return ACCESS_ONCE(file_inode(filp)->i_private); +} +  extern struct mutex event_mutex;  extern struct list_head ftrace_events; +extern const struct file_operations event_trigger_fops; + +extern int register_trigger_cmds(void); +extern void clear_event_triggers(struct trace_array *tr); + +struct event_trigger_data { +	unsigned long			count; +	int				ref; +	struct event_trigger_ops	*ops; +	struct event_command		*cmd_ops; +	struct event_filter __rcu	*filter; +	char				*filter_str; +	void				*private_data; +	struct list_head		list; +}; + +/** + * struct event_trigger_ops - callbacks for trace event triggers + * + * The methods in this structure provide per-event trigger hooks for + * various trigger operations. + * + * All the methods below, except for @init() and @free(), must be + * implemented. + * + * @func: The trigger 'probe' function called when the triggering + *	event occurs.  The data passed into this callback is the data + *	that was supplied to the event_command @reg() function that + *	registered the trigger (see struct event_command). + * + * @init: An optional initialization function called for the trigger + *	when the trigger is registered (via the event_command reg() + *	function).  This can be used to perform per-trigger + *	initialization such as incrementing a per-trigger reference + *	count, for instance.  This is usually implemented by the + *	generic utility function @event_trigger_init() (see + *	trace_event_triggers.c). + * + * @free: An optional de-initialization function called for the + *	trigger when the trigger is unregistered (via the + *	event_command @reg() function).  This can be used to perform + *	per-trigger de-initialization such as decrementing a + *	per-trigger reference count and freeing corresponding trigger + *	data, for instance.  This is usually implemented by the + *	generic utility function @event_trigger_free() (see + *	trace_event_triggers.c). + * + * @print: The callback function invoked to have the trigger print + *	itself.  This is usually implemented by a wrapper function + *	that calls the generic utility function @event_trigger_print() + *	(see trace_event_triggers.c). + */ +struct event_trigger_ops { +	void			(*func)(struct event_trigger_data *data); +	int			(*init)(struct event_trigger_ops *ops, +					struct event_trigger_data *data); +	void			(*free)(struct event_trigger_ops *ops, +					struct event_trigger_data *data); +	int			(*print)(struct seq_file *m, +					 struct event_trigger_ops *ops, +					 struct event_trigger_data *data); +}; + +/** + * struct event_command - callbacks and data members for event commands + * + * Event commands are invoked by users by writing the command name + * into the 'trigger' file associated with a trace event.  The + * parameters associated with a specific invocation of an event + * command are used to create an event trigger instance, which is + * added to the list of trigger instances associated with that trace + * event.  When the event is hit, the set of triggers associated with + * that event is invoked. + * + * The data members in this structure provide per-event command data + * for various event commands. + * + * All the data members below, except for @post_trigger, must be set + * for each event command. + * + * @name: The unique name that identifies the event command.  This is + *	the name used when setting triggers via trigger files. + * + * @trigger_type: A unique id that identifies the event command + *	'type'.  This value has two purposes, the first to ensure that + *	only one trigger of the same type can be set at a given time + *	for a particular event e.g. it doesn't make sense to have both + *	a traceon and traceoff trigger attached to a single event at + *	the same time, so traceon and traceoff have the same type + *	though they have different names.  The @trigger_type value is + *	also used as a bit value for deferring the actual trigger + *	action until after the current event is finished.  Some + *	commands need to do this if they themselves log to the trace + *	buffer (see the @post_trigger() member below).  @trigger_type + *	values are defined by adding new values to the trigger_type + *	enum in include/linux/ftrace_event.h. + * + * @post_trigger: A flag that says whether or not this command needs + *	to have its action delayed until after the current event has + *	been closed.  Some triggers need to avoid being invoked while + *	an event is currently in the process of being logged, since + *	the trigger may itself log data into the trace buffer.  Thus + *	we make sure the current event is committed before invoking + *	those triggers.  To do that, the trigger invocation is split + *	in two - the first part checks the filter using the current + *	trace record; if a command has the @post_trigger flag set, it + *	sets a bit for itself in the return value, otherwise it + *	directly invokes the trigger.  Once all commands have been + *	either invoked or set their return flag, the current record is + *	either committed or discarded.  At that point, if any commands + *	have deferred their triggers, those commands are finally + *	invoked following the close of the current event.  In other + *	words, if the event_trigger_ops @func() probe implementation + *	itself logs to the trace buffer, this flag should be set, + *	otherwise it can be left unspecified. + * + * All the methods below, except for @set_filter(), must be + * implemented. + * + * @func: The callback function responsible for parsing and + *	registering the trigger written to the 'trigger' file by the + *	user.  It allocates the trigger instance and registers it with + *	the appropriate trace event.  It makes use of the other + *	event_command callback functions to orchestrate this, and is + *	usually implemented by the generic utility function + *	@event_trigger_callback() (see trace_event_triggers.c). + * + * @reg: Adds the trigger to the list of triggers associated with the + *	event, and enables the event trigger itself, after + *	initializing it (via the event_trigger_ops @init() function). + *	This is also where commands can use the @trigger_type value to + *	make the decision as to whether or not multiple instances of + *	the trigger should be allowed.  This is usually implemented by + *	the generic utility function @register_trigger() (see + *	trace_event_triggers.c). + * + * @unreg: Removes the trigger from the list of triggers associated + *	with the event, and disables the event trigger itself, after + *	initializing it (via the event_trigger_ops @free() function). + *	This is usually implemented by the generic utility function + *	@unregister_trigger() (see trace_event_triggers.c). + * + * @set_filter: An optional function called to parse and set a filter + *	for the trigger.  If no @set_filter() method is set for the + *	event command, filters set by the user for the command will be + *	ignored.  This is usually implemented by the generic utility + *	function @set_trigger_filter() (see trace_event_triggers.c). + * + * @get_trigger_ops: The callback function invoked to retrieve the + *	event_trigger_ops implementation associated with the command. + */ +struct event_command { +	struct list_head	list; +	char			*name; +	enum event_trigger_type	trigger_type; +	bool			post_trigger; +	int			(*func)(struct event_command *cmd_ops, +					struct ftrace_event_file *file, +					char *glob, char *cmd, char *params); +	int			(*reg)(char *glob, +				       struct event_trigger_ops *ops, +				       struct event_trigger_data *data, +				       struct ftrace_event_file *file); +	void			(*unreg)(char *glob, +					 struct event_trigger_ops *ops, +					 struct event_trigger_data *data, +					 struct ftrace_event_file *file); +	int			(*set_filter)(char *filter_str, +					      struct event_trigger_data *data, +					      struct ftrace_event_file *file); +	struct event_trigger_ops *(*get_trigger_ops)(char *cmd, char *param); +}; + +extern int trace_event_enable_disable(struct ftrace_event_file *file, +				      int enable, int soft_disable); +extern int tracing_alloc_snapshot(void); +  extern const char *__start___trace_bprintk_fmt[];  extern const char *__stop___trace_bprintk_fmt[]; @@ -1045,7 +1294,7 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled);  #undef FTRACE_ENTRY  #define FTRACE_ENTRY(call, struct_name, id, tstruct, print, filter)	\  	extern struct ftrace_event_call					\ -	__attribute__((__aligned__(4))) event_##call; +	__aligned(4) event_##call;  #undef FTRACE_ENTRY_DUP  #define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print, filter)	\  	FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print), \ diff --git a/kernel/trace/trace_benchmark.c b/kernel/trace/trace_benchmark.c new file mode 100644 index 00000000000..40a14cbcf8e --- /dev/null +++ b/kernel/trace/trace_benchmark.c @@ -0,0 +1,198 @@ +#include <linux/delay.h> +#include <linux/module.h> +#include <linux/kthread.h> +#include <linux/trace_clock.h> + +#define CREATE_TRACE_POINTS +#include "trace_benchmark.h" + +static struct task_struct *bm_event_thread; + +static char bm_str[BENCHMARK_EVENT_STRLEN] = "START"; + +static u64 bm_total; +static u64 bm_totalsq; +static u64 bm_last; +static u64 bm_max; +static u64 bm_min; +static u64 bm_first; +static u64 bm_cnt; +static u64 bm_stddev; +static unsigned int bm_avg; +static unsigned int bm_std; + +/* + * This gets called in a loop recording the time it took to write + * the tracepoint. What it writes is the time statistics of the last + * tracepoint write. As there is nothing to write the first time + * it simply writes "START". As the first write is cold cache and + * the rest is hot, we save off that time in bm_first and it is + * reported as "first", which is shown in the second write to the + * tracepoint. The "first" field is writen within the statics from + * then on but never changes. + */ +static void trace_do_benchmark(void) +{ +	u64 start; +	u64 stop; +	u64 delta; +	u64 stddev; +	u64 seed; +	u64 last_seed; +	unsigned int avg; +	unsigned int std = 0; + +	/* Only run if the tracepoint is actually active */ +	if (!trace_benchmark_event_enabled()) +		return; + +	local_irq_disable(); +	start = trace_clock_local(); +	trace_benchmark_event(bm_str); +	stop = trace_clock_local(); +	local_irq_enable(); + +	bm_cnt++; + +	delta = stop - start; + +	/* +	 * The first read is cold cached, keep it separate from the +	 * other calculations. +	 */ +	if (bm_cnt == 1) { +		bm_first = delta; +		scnprintf(bm_str, BENCHMARK_EVENT_STRLEN, +			  "first=%llu [COLD CACHED]", bm_first); +		return; +	} + +	bm_last = delta; + +	if (delta > bm_max) +		bm_max = delta; +	if (!bm_min || delta < bm_min) +		bm_min = delta; + +	/* +	 * When bm_cnt is greater than UINT_MAX, it breaks the statistics +	 * accounting. Freeze the statistics when that happens. +	 * We should have enough data for the avg and stddev anyway. +	 */ +	if (bm_cnt > UINT_MAX) { +		scnprintf(bm_str, BENCHMARK_EVENT_STRLEN, +		    "last=%llu first=%llu max=%llu min=%llu ** avg=%u std=%d std^2=%lld", +			  bm_last, bm_first, bm_max, bm_min, bm_avg, bm_std, bm_stddev); +		return; +	} + +	bm_total += delta; +	bm_totalsq += delta * delta; + + +	if (bm_cnt > 1) { +		/* +		 * Apply Welford's method to calculate standard deviation: +		 * s^2 = 1 / (n * (n-1)) * (n * \Sum (x_i)^2 - (\Sum x_i)^2) +		 */ +		stddev = (u64)bm_cnt * bm_totalsq - bm_total * bm_total; +		do_div(stddev, (u32)bm_cnt); +		do_div(stddev, (u32)bm_cnt - 1); +	} else +		stddev = 0; + +	delta = bm_total; +	do_div(delta, bm_cnt); +	avg = delta; + +	if (stddev > 0) { +		int i = 0; +		/* +		 * stddev is the square of standard deviation but +		 * we want the actualy number. Use the average +		 * as our seed to find the std. +		 * +		 * The next try is: +		 *  x = (x + N/x) / 2 +		 * +		 * Where N is the squared number to find the square +		 * root of. +		 */ +		seed = avg; +		do { +			last_seed = seed; +			seed = stddev; +			if (!last_seed) +				break; +			do_div(seed, last_seed); +			seed += last_seed; +			do_div(seed, 2); +		} while (i++ < 10 && last_seed != seed); + +		std = seed; +	} + +	scnprintf(bm_str, BENCHMARK_EVENT_STRLEN, +		  "last=%llu first=%llu max=%llu min=%llu avg=%u std=%d std^2=%lld", +		  bm_last, bm_first, bm_max, bm_min, avg, std, stddev); + +	bm_std = std; +	bm_avg = avg; +	bm_stddev = stddev; +} + +static int benchmark_event_kthread(void *arg) +{ +	/* sleep a bit to make sure the tracepoint gets activated */ +	msleep(100); + +	while (!kthread_should_stop()) { + +		trace_do_benchmark(); + +		/* +		 * We don't go to sleep, but let others +		 * run as well. +		 */ +		cond_resched(); +	} + +	return 0; +} + +/* + * When the benchmark tracepoint is enabled, it calls this + * function and the thread that calls the tracepoint is created. + */ +void trace_benchmark_reg(void) +{ +	bm_event_thread = kthread_run(benchmark_event_kthread, +				      NULL, "event_benchmark"); +	WARN_ON(!bm_event_thread); +} + +/* + * When the benchmark tracepoint is disabled, it calls this + * function and the thread that calls the tracepoint is deleted + * and all the numbers are reset. + */ +void trace_benchmark_unreg(void) +{ +	if (!bm_event_thread) +		return; + +	kthread_stop(bm_event_thread); + +	strcpy(bm_str, "START"); +	bm_total = 0; +	bm_totalsq = 0; +	bm_last = 0; +	bm_max = 0; +	bm_min = 0; +	bm_cnt = 0; +	/* These don't need to be reset but reset them anyway */ +	bm_first = 0; +	bm_std = 0; +	bm_avg = 0; +	bm_stddev = 0; +} diff --git a/kernel/trace/trace_benchmark.h b/kernel/trace/trace_benchmark.h new file mode 100644 index 00000000000..3c1df1df4e2 --- /dev/null +++ b/kernel/trace/trace_benchmark.h @@ -0,0 +1,41 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM benchmark + +#if !defined(_TRACE_BENCHMARK_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_BENCHMARK_H + +#include <linux/tracepoint.h> + +extern void trace_benchmark_reg(void); +extern void trace_benchmark_unreg(void); + +#define BENCHMARK_EVENT_STRLEN		128 + +TRACE_EVENT_FN(benchmark_event, + +	TP_PROTO(const char *str), + +	TP_ARGS(str), + +	TP_STRUCT__entry( +		__array(	char,	str,	BENCHMARK_EVENT_STRLEN	) +	), + +	TP_fast_assign( +		memcpy(__entry->str, str, BENCHMARK_EVENT_STRLEN); +	), + +	TP_printk("%s", __entry->str), + +	trace_benchmark_reg, trace_benchmark_unreg +); + +#endif /* _TRACE_BENCHMARK_H */ + +#undef TRACE_INCLUDE_FILE +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_benchmark + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index d594da0dc03..697fb9bac8f 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -78,7 +78,7 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)  	entry->line = f->line;  	entry->correct = val == expect; -	if (!filter_check_discard(call, entry, buffer, event)) +	if (!call_filter_check_discard(call, entry, buffer, event))  		__buffer_unlock_commit(buffer, event);   out: diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c index 26dc348332b..57b67b1f24d 100644 --- a/kernel/trace/trace_clock.c +++ b/kernel/trace/trace_clock.c @@ -59,13 +59,14 @@ u64 notrace trace_clock(void)  /*   * trace_jiffy_clock(): Simply use jiffies as a clock counter. + * Note that this use of jiffies_64 is not completely safe on + * 32-bit systems. But the window is tiny, and the effect if + * we are affected is that we will have an obviously bogus + * timestamp on a trace event - i.e. not life threatening.   */  u64 notrace trace_clock_jiffies(void)  { -	u64 jiffy = jiffies - INITIAL_JIFFIES; - -	/* Return nsecs */ -	return (u64)jiffies_to_usecs(jiffy) * 1000ULL; +	return jiffies_64_to_clock_t(jiffies_64 - INITIAL_JIFFIES);  }  /* diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 80c36bcf66e..5d12bb407b4 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -24,10 +24,32 @@ static int	total_ref_count;  static int perf_trace_event_perm(struct ftrace_event_call *tp_event,  				 struct perf_event *p_event)  { +	if (tp_event->perf_perm) { +		int ret = tp_event->perf_perm(tp_event, p_event); +		if (ret) +			return ret; +	} +  	/* The ftrace function trace is allowed only for root. */ -	if (ftrace_event_is_function(tp_event) && -	    perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) -		return -EPERM; +	if (ftrace_event_is_function(tp_event)) { +		if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) +			return -EPERM; + +		/* +		 * We don't allow user space callchains for  function trace +		 * event, due to issues with page faults while tracing page +		 * fault handler and its overall trickiness nature. +		 */ +		if (!p_event->attr.exclude_callchain_user) +			return -EINVAL; + +		/* +		 * Same reason to disable user stack dump as for user space +		 * callchains above. +		 */ +		if (p_event->attr.sample_type & PERF_SAMPLE_STACK_USER) +			return -EINVAL; +	}  	/* No tracing, just counting, so no obvious leak */  	if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW)) @@ -173,7 +195,7 @@ static int perf_trace_event_init(struct ftrace_event_call *tp_event,  int perf_trace_init(struct perf_event *p_event)  {  	struct ftrace_event_call *tp_event; -	int event_id = p_event->attr.config; +	u64 event_id = p_event->attr.config;  	int ret = -EINVAL;  	mutex_lock(&event_mutex); @@ -226,8 +248,8 @@ void perf_trace_del(struct perf_event *p_event, int flags)  	tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event);  } -__kprobes void *perf_trace_buf_prepare(int size, unsigned short type, -				       struct pt_regs *regs, int *rctxp) +void *perf_trace_buf_prepare(int size, unsigned short type, +			     struct pt_regs *regs, int *rctxp)  {  	struct trace_entry *entry;  	unsigned long flags; @@ -259,6 +281,7 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,  	return raw_data;  }  EXPORT_SYMBOL_GPL(perf_trace_buf_prepare); +NOKPROBE_SYMBOL(perf_trace_buf_prepare);  #ifdef CONFIG_FUNCTION_TRACER  static void diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 368a4d50cc3..2de53628689 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -27,12 +27,6 @@  DEFINE_MUTEX(event_mutex); -DEFINE_MUTEX(event_storage_mutex); -EXPORT_SYMBOL_GPL(event_storage_mutex); - -char event_storage[EVENT_STORAGE_SIZE]; -EXPORT_SYMBOL_GPL(event_storage); -  LIST_HEAD(ftrace_events);  static LIST_HEAD(ftrace_common_fields); @@ -194,29 +188,60 @@ int trace_event_raw_init(struct ftrace_event_call *call)  }  EXPORT_SYMBOL_GPL(trace_event_raw_init); +void *ftrace_event_buffer_reserve(struct ftrace_event_buffer *fbuffer, +				  struct ftrace_event_file *ftrace_file, +				  unsigned long len) +{ +	struct ftrace_event_call *event_call = ftrace_file->event_call; + +	local_save_flags(fbuffer->flags); +	fbuffer->pc = preempt_count(); +	fbuffer->ftrace_file = ftrace_file; + +	fbuffer->event = +		trace_event_buffer_lock_reserve(&fbuffer->buffer, ftrace_file, +						event_call->event.type, len, +						fbuffer->flags, fbuffer->pc); +	if (!fbuffer->event) +		return NULL; + +	fbuffer->entry = ring_buffer_event_data(fbuffer->event); +	return fbuffer->entry; +} +EXPORT_SYMBOL_GPL(ftrace_event_buffer_reserve); + +void ftrace_event_buffer_commit(struct ftrace_event_buffer *fbuffer) +{ +	event_trigger_unlock_commit(fbuffer->ftrace_file, fbuffer->buffer, +				    fbuffer->event, fbuffer->entry, +				    fbuffer->flags, fbuffer->pc); +} +EXPORT_SYMBOL_GPL(ftrace_event_buffer_commit); +  int ftrace_event_reg(struct ftrace_event_call *call,  		     enum trace_reg type, void *data)  {  	struct ftrace_event_file *file = data; +	WARN_ON(!(call->flags & TRACE_EVENT_FL_TRACEPOINT));  	switch (type) {  	case TRACE_REG_REGISTER: -		return tracepoint_probe_register(call->name, +		return tracepoint_probe_register(call->tp,  						 call->class->probe,  						 file);  	case TRACE_REG_UNREGISTER: -		tracepoint_probe_unregister(call->name, +		tracepoint_probe_unregister(call->tp,  					    call->class->probe,  					    file);  		return 0;  #ifdef CONFIG_PERF_EVENTS  	case TRACE_REG_PERF_REGISTER: -		return tracepoint_probe_register(call->name, +		return tracepoint_probe_register(call->tp,  						 call->class->perf_probe,  						 call);  	case TRACE_REG_PERF_UNREGISTER: -		tracepoint_probe_unregister(call->name, +		tracepoint_probe_unregister(call->tp,  					    call->class->perf_probe,  					    call);  		return 0; @@ -328,7 +353,7 @@ static int __ftrace_event_enable_disable(struct ftrace_event_file *file,  			if (ret) {  				tracing_stop_cmdline_record();  				pr_info("event trace: Could not enable event " -					"%s\n", call->name); +					"%s\n", ftrace_event_name(call));  				break;  			}  			set_bit(FTRACE_EVENT_FL_ENABLED_BIT, &file->flags); @@ -342,6 +367,12 @@ static int __ftrace_event_enable_disable(struct ftrace_event_file *file,  	return ret;  } +int trace_event_enable_disable(struct ftrace_event_file *file, +			       int enable, int soft_disable) +{ +	return __ftrace_event_enable_disable(file, enable, soft_disable); +} +  static int ftrace_event_enable_disable(struct ftrace_event_file *file,  				       int enable)  { @@ -421,11 +452,6 @@ static void remove_subsystem(struct ftrace_subsystem_dir *dir)  	}  } -static void *event_file_data(struct file *filp) -{ -	return ACCESS_ONCE(file_inode(filp)->i_private); -} -  static void remove_event_file_dir(struct ftrace_event_file *file)  {  	struct dentry *dir = file->dir; @@ -444,6 +470,7 @@ static void remove_event_file_dir(struct ftrace_event_file *file)  	list_del(&file->list);  	remove_subsystem(file->system); +	free_event_filter(file->filter);  	kmem_cache_free(file_cachep, file);  } @@ -456,27 +483,29 @@ __ftrace_set_clr_event_nolock(struct trace_array *tr, const char *match,  {  	struct ftrace_event_file *file;  	struct ftrace_event_call *call; +	const char *name;  	int ret = -EINVAL;  	list_for_each_entry(file, &tr->events, list) {  		call = file->event_call; +		name = ftrace_event_name(call); -		if (!call->name || !call->class || !call->class->reg) +		if (!name || !call->class || !call->class->reg)  			continue;  		if (call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)  			continue;  		if (match && -		    strcmp(match, call->name) != 0 && +		    strcmp(match, name) != 0 &&  		    strcmp(match, call->class->system) != 0)  			continue;  		if (sub && strcmp(sub, call->class->system) != 0)  			continue; -		if (event && strcmp(event, call->name) != 0) +		if (event && strcmp(event, name) != 0)  			continue;  		ftrace_event_enable_disable(file, set); @@ -546,6 +575,9 @@ int trace_set_clr_event(const char *system, const char *event, int set)  {  	struct trace_array *tr = top_trace_array(); +	if (!tr) +		return -ENODEV; +  	return __ftrace_set_clr_event(tr, NULL, system, event, set);  }  EXPORT_SYMBOL_GPL(trace_set_clr_event); @@ -674,7 +706,7 @@ static int t_show(struct seq_file *m, void *v)  	if (strcmp(call->class->system, TRACE_SYSTEM) != 0)  		seq_printf(m, "%s:", call->class->system); -	seq_printf(m, "%s\n", call->name); +	seq_printf(m, "%s\n", ftrace_event_name(call));  	return 0;  } @@ -767,7 +799,7 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,  	mutex_lock(&event_mutex);  	list_for_each_entry(file, &tr->events, list) {  		call = file->event_call; -		if (!call->name || !call->class || !call->class->reg) +		if (!ftrace_event_name(call) || !call->class || !call->class->reg)  			continue;  		if (system && strcmp(call->class->system, system->name) != 0) @@ -882,7 +914,7 @@ static int f_show(struct seq_file *m, void *v)  	switch ((unsigned long)v) {  	case FORMAT_HEADER: -		seq_printf(m, "name: %s\n", call->name); +		seq_printf(m, "name: %s\n", ftrace_event_name(call));  		seq_printf(m, "ID: %d\n", call->event.type);  		seq_printf(m, "format:\n");  		return 0; @@ -989,7 +1021,7 @@ static ssize_t  event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,  		  loff_t *ppos)  { -	struct ftrace_event_call *call; +	struct ftrace_event_file *file;  	struct trace_seq *s;  	int r = -ENODEV; @@ -1004,12 +1036,12 @@ event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,  	trace_seq_init(s);  	mutex_lock(&event_mutex); -	call = event_file_data(filp); -	if (call) -		print_event_filter(call, s); +	file = event_file_data(filp); +	if (file) +		print_event_filter(file, s);  	mutex_unlock(&event_mutex); -	if (call) +	if (file)  		r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);  	kfree(s); @@ -1021,7 +1053,7 @@ static ssize_t  event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,  		   loff_t *ppos)  { -	struct ftrace_event_call *call; +	struct ftrace_event_file *file;  	char *buf;  	int err = -ENODEV; @@ -1039,9 +1071,9 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,  	buf[cnt] = '\0';  	mutex_lock(&event_mutex); -	call = event_file_data(filp); -	if (call) -		err = apply_event_filter(call, buf); +	file = event_file_data(filp); +	if (file) +		err = apply_event_filter(file, buf);  	mutex_unlock(&event_mutex);  	free_page((unsigned long) buf); @@ -1062,6 +1094,9 @@ static int subsystem_open(struct inode *inode, struct file *filp)  	struct trace_array *tr;  	int ret; +	if (tracing_is_disabled()) +		return -ENODEV; +  	/* Make sure the system still exists */  	mutex_lock(&trace_types_lock);  	mutex_lock(&event_mutex); @@ -1108,6 +1143,9 @@ static int system_tr_open(struct inode *inode, struct file *filp)  	struct trace_array *tr = inode->i_private;  	int ret; +	if (tracing_is_disabled()) +		return -ENODEV; +  	if (trace_array_get(tr) < 0)  		return -ENODEV; @@ -1124,11 +1162,12 @@ static int system_tr_open(struct inode *inode, struct file *filp)  	if (ret < 0) {  		trace_array_put(tr);  		kfree(dir); +		return ret;  	}  	filp->private_data = dir; -	return ret; +	return 0;  }  static int subsystem_release(struct inode *inode, struct file *file) @@ -1495,6 +1534,7 @@ event_create_dir(struct dentry *parent, struct ftrace_event_file *file)  	struct trace_array *tr = file->tr;  	struct list_head *head;  	struct dentry *d_events; +	const char *name;  	int ret;  	/* @@ -1508,10 +1548,11 @@ event_create_dir(struct dentry *parent, struct ftrace_event_file *file)  	} else  		d_events = parent; -	file->dir = debugfs_create_dir(call->name, d_events); +	name = ftrace_event_name(call); +	file->dir = debugfs_create_dir(name, d_events);  	if (!file->dir) {  		pr_warning("Could not create debugfs '%s' directory\n", -			   call->name); +			   name);  		return -1;  	} @@ -1535,13 +1576,16 @@ event_create_dir(struct dentry *parent, struct ftrace_event_file *file)  		ret = call->class->define_fields(call);  		if (ret < 0) {  			pr_warning("Could not initialize trace point" -				   " events/%s\n", call->name); +				   " events/%s\n", name);  			return -1;  		}  	} -	trace_create_file("filter", 0644, file->dir, call, +	trace_create_file("filter", 0644, file->dir, file,  			  &ftrace_event_filter_fops); +	trace_create_file("trigger", 0644, file->dir, file, +			  &event_trigger_fops); +  	trace_create_file("format", 0444, file->dir, call,  			  &ftrace_event_format_fops); @@ -1577,6 +1621,7 @@ static void event_remove(struct ftrace_event_call *call)  		if (file->event_call != call)  			continue;  		ftrace_event_enable_disable(file, 0); +		destroy_preds(file);  		/*  		 * The do_for_each_event_file() is  		 * a double loop. After finding the call for this @@ -1595,15 +1640,17 @@ static void event_remove(struct ftrace_event_call *call)  static int event_init(struct ftrace_event_call *call)  {  	int ret = 0; +	const char *name; -	if (WARN_ON(!call->name)) +	name = ftrace_event_name(call); +	if (WARN_ON(!name))  		return -EINVAL;  	if (call->class->raw_init) {  		ret = call->class->raw_init(call);  		if (ret < 0 && ret != -ENOSYS)  			pr_warn("Could not initialize trace events/%s\n", -				call->name); +				name);  	}  	return ret; @@ -1637,6 +1684,8 @@ trace_create_new_event(struct ftrace_event_call *call,  	file->event_call = call;  	file->tr = tr;  	atomic_set(&file->sm_ref, 0); +	atomic_set(&file->tm_ref, 0); +	INIT_LIST_HEAD(&file->triggers);  	list_add(&file->list, &tr->events);  	return file; @@ -1700,7 +1749,7 @@ static void __trace_remove_event_call(struct ftrace_event_call *call)  {  	event_remove(call);  	trace_destroy_fields(call); -	destroy_preds(call); +	destroy_call_preds(call);  }  static int probe_remove_event_call(struct ftrace_event_call *call) @@ -1763,6 +1812,16 @@ static void trace_module_add_events(struct module *mod)  {  	struct ftrace_event_call **call, **start, **end; +	if (!mod->num_trace_events) +		return; + +	/* Don't add infrastructure for mods without tracepoints */ +	if (trace_module_has_bad_taint(mod)) { +		pr_err("%s: module has bad taint, not creating trace events\n", +		       mod->name); +		return; +	} +  	start = mod->trace_events;  	end = mod->trace_events + mod->num_trace_events; @@ -1837,46 +1896,48 @@ __trace_add_event_dirs(struct trace_array *tr)  		ret = __trace_add_new_event(call, tr);  		if (ret < 0)  			pr_warning("Could not create directory for event %s\n", -				   call->name); +				   ftrace_event_name(call));  	}  } -#ifdef CONFIG_DYNAMIC_FTRACE - -/* Avoid typos */ -#define ENABLE_EVENT_STR	"enable_event" -#define DISABLE_EVENT_STR	"disable_event" - -struct event_probe_data { -	struct ftrace_event_file	*file; -	unsigned long			count; -	int				ref; -	bool				enable; -}; - -static struct ftrace_event_file * +struct ftrace_event_file *  find_event_file(struct trace_array *tr, const char *system,  const char *event)  {  	struct ftrace_event_file *file;  	struct ftrace_event_call *call; +	const char *name;  	list_for_each_entry(file, &tr->events, list) {  		call = file->event_call; +		name = ftrace_event_name(call); -		if (!call->name || !call->class || !call->class->reg) +		if (!name || !call->class || !call->class->reg)  			continue;  		if (call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)  			continue; -		if (strcmp(event, call->name) == 0 && +		if (strcmp(event, name) == 0 &&  		    strcmp(system, call->class->system) == 0)  			return file;  	}  	return NULL;  } +#ifdef CONFIG_DYNAMIC_FTRACE + +/* Avoid typos */ +#define ENABLE_EVENT_STR	"enable_event" +#define DISABLE_EVENT_STR	"disable_event" + +struct event_probe_data { +	struct ftrace_event_file	*file; +	unsigned long			count; +	int				ref; +	bool				enable; +}; +  static void  event_enable_probe(unsigned long ip, unsigned long parent_ip, void **_data)  { @@ -1925,7 +1986,7 @@ event_enable_print(struct seq_file *m, unsigned long ip,  	seq_printf(m, "%s:%s:%s",  		   data->enable ? ENABLE_EVENT_STR : DISABLE_EVENT_STR,  		   data->file->event_call->class->system, -		   data->file->event_call->name); +		   ftrace_event_name(data->file->event_call));  	if (data->count == -1)  		seq_printf(m, ":unlimited\n"); @@ -2008,6 +2069,9 @@ event_enable_func(struct ftrace_hash *hash,  	bool enable;  	int ret; +	if (!tr) +		return -ENODEV; +  	/* hash funcs only work with set_ftrace_filter */  	if (!enabled || !param)  		return -EINVAL; @@ -2145,7 +2209,7 @@ __trace_early_add_event_dirs(struct trace_array *tr)  		ret = event_create_dir(tr->event_dir, file);  		if (ret < 0)  			pr_warning("Could not create directory for event %s\n", -				   file->event_call->name); +				   ftrace_event_name(file->event_call));  	}  } @@ -2169,7 +2233,7 @@ __trace_early_add_events(struct trace_array *tr)  		ret = __trace_early_add_new_event(call, tr);  		if (ret < 0)  			pr_warning("Could not create early event %s\n", -				   call->name); +				   ftrace_event_name(call));  	}  } @@ -2303,9 +2367,15 @@ int event_trace_del_tracer(struct trace_array *tr)  {  	mutex_lock(&event_mutex); +	/* Disable any event triggers and associated soft-disabled events */ +	clear_event_triggers(tr); +  	/* Disable any running events */  	__ftrace_set_clr_event_nolock(tr, NULL, NULL, NULL, 0); +	/* Access to events are within rcu_read_lock_sched() */ +	synchronize_sched(); +  	down_write(&trace_event_sem);  	__trace_remove_event_dirs(tr);  	debugfs_remove_recursive(tr->event_dir); @@ -2333,6 +2403,9 @@ static __init int event_trace_enable(void)  	char *token;  	int ret; +	if (!tr) +		return -ENODEV; +  	for_each_event(iter, __start_ftrace_events, __stop_ftrace_events) {  		call = *iter; @@ -2366,6 +2439,8 @@ static __init int event_trace_enable(void)  	register_event_cmds(); +	register_trigger_cmds(); +  	return 0;  } @@ -2377,6 +2452,8 @@ static __init int event_trace_init(void)  	int ret;  	tr = top_trace_array(); +	if (!tr) +		return -ENODEV;  	d_tracer = tracing_init_dentry();  	if (!d_tracer) @@ -2470,6 +2547,8 @@ static __init void event_trace_self_tests(void)  	int ret;  	tr = top_trace_array(); +	if (!tr) +		return;  	pr_info("Running tests on trace events:\n"); @@ -2493,7 +2572,7 @@ static __init void event_trace_self_tests(void)  			continue;  #endif -		pr_info("Testing event %s: ", call->name); +		pr_info("Testing event %s: ", ftrace_event_name(call));  		/*  		 * If an event is already enabled, someone is using diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 97daa8cf958..8a8631926a0 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -637,10 +637,18 @@ static void append_filter_err(struct filter_parse_state *ps,  	free_page((unsigned long) buf);  } +static inline struct event_filter *event_filter(struct ftrace_event_file *file) +{ +	if (file->event_call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) +		return file->event_call->filter; +	else +		return file->filter; +} +  /* caller must hold event_mutex */ -void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s) +void print_event_filter(struct ftrace_event_file *file, struct trace_seq *s)  { -	struct event_filter *filter = call->filter; +	struct event_filter *filter = event_filter(file);  	if (filter && filter->filter_string)  		trace_seq_printf(s, "%s\n", filter->filter_string); @@ -766,11 +774,21 @@ static void __free_preds(struct event_filter *filter)  	filter->n_preds = 0;  } -static void filter_disable(struct ftrace_event_call *call) +static void call_filter_disable(struct ftrace_event_call *call)  {  	call->flags &= ~TRACE_EVENT_FL_FILTERED;  } +static void filter_disable(struct ftrace_event_file *file) +{ +	struct ftrace_event_call *call = file->event_call; + +	if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) +		call_filter_disable(call); +	else +		file->flags &= ~FTRACE_EVENT_FL_FILTERED; +} +  static void __free_filter(struct event_filter *filter)  {  	if (!filter) @@ -781,16 +799,35 @@ static void __free_filter(struct event_filter *filter)  	kfree(filter);  } +void free_event_filter(struct event_filter *filter) +{ +	__free_filter(filter); +} + +void destroy_call_preds(struct ftrace_event_call *call) +{ +	__free_filter(call->filter); +	call->filter = NULL; +} + +static void destroy_file_preds(struct ftrace_event_file *file) +{ +	__free_filter(file->filter); +	file->filter = NULL; +} +  /* - * Called when destroying the ftrace_event_call. - * The call is being freed, so we do not need to worry about - * the call being currently used. This is for module code removing + * Called when destroying the ftrace_event_file. + * The file is being freed, so we do not need to worry about + * the file being currently used. This is for module code removing   * the tracepoints from within it.   */ -void destroy_preds(struct ftrace_event_call *call) +void destroy_preds(struct ftrace_event_file *file)  { -	__free_filter(call->filter); -	call->filter = NULL; +	if (file->event_call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) +		destroy_call_preds(file->event_call); +	else +		destroy_file_preds(file);  }  static struct event_filter *__alloc_filter(void) @@ -825,28 +862,56 @@ static int __alloc_preds(struct event_filter *filter, int n_preds)  	return 0;  } -static void filter_free_subsystem_preds(struct event_subsystem *system) +static inline void __remove_filter(struct ftrace_event_file *file)  { +	struct ftrace_event_call *call = file->event_call; + +	filter_disable(file); +	if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) +		remove_filter_string(call->filter); +	else +		remove_filter_string(file->filter); +} + +static void filter_free_subsystem_preds(struct event_subsystem *system, +					struct trace_array *tr) +{ +	struct ftrace_event_file *file;  	struct ftrace_event_call *call; -	list_for_each_entry(call, &ftrace_events, list) { +	list_for_each_entry(file, &tr->events, list) { +		call = file->event_call;  		if (strcmp(call->class->system, system->name) != 0)  			continue; -		filter_disable(call); -		remove_filter_string(call->filter); +		__remove_filter(file);  	}  } -static void filter_free_subsystem_filters(struct event_subsystem *system) +static inline void __free_subsystem_filter(struct ftrace_event_file *file)  { +	struct ftrace_event_call *call = file->event_call; + +	if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) { +		__free_filter(call->filter); +		call->filter = NULL; +	} else { +		__free_filter(file->filter); +		file->filter = NULL; +	} +} + +static void filter_free_subsystem_filters(struct event_subsystem *system, +					  struct trace_array *tr) +{ +	struct ftrace_event_file *file;  	struct ftrace_event_call *call; -	list_for_each_entry(call, &ftrace_events, list) { +	list_for_each_entry(file, &tr->events, list) { +		call = file->event_call;  		if (strcmp(call->class->system, system->name) != 0)  			continue; -		__free_filter(call->filter); -		call->filter = NULL; +		__free_subsystem_filter(file);  	}  } @@ -1617,15 +1682,85 @@ fail:  	return err;  } +static inline void event_set_filtered_flag(struct ftrace_event_file *file) +{ +	struct ftrace_event_call *call = file->event_call; + +	if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) +		call->flags |= TRACE_EVENT_FL_FILTERED; +	else +		file->flags |= FTRACE_EVENT_FL_FILTERED; +} + +static inline void event_set_filter(struct ftrace_event_file *file, +				    struct event_filter *filter) +{ +	struct ftrace_event_call *call = file->event_call; + +	if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) +		rcu_assign_pointer(call->filter, filter); +	else +		rcu_assign_pointer(file->filter, filter); +} + +static inline void event_clear_filter(struct ftrace_event_file *file) +{ +	struct ftrace_event_call *call = file->event_call; + +	if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) +		RCU_INIT_POINTER(call->filter, NULL); +	else +		RCU_INIT_POINTER(file->filter, NULL); +} + +static inline void +event_set_no_set_filter_flag(struct ftrace_event_file *file) +{ +	struct ftrace_event_call *call = file->event_call; + +	if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) +		call->flags |= TRACE_EVENT_FL_NO_SET_FILTER; +	else +		file->flags |= FTRACE_EVENT_FL_NO_SET_FILTER; +} + +static inline void +event_clear_no_set_filter_flag(struct ftrace_event_file *file) +{ +	struct ftrace_event_call *call = file->event_call; + +	if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) +		call->flags &= ~TRACE_EVENT_FL_NO_SET_FILTER; +	else +		file->flags &= ~FTRACE_EVENT_FL_NO_SET_FILTER; +} + +static inline bool +event_no_set_filter_flag(struct ftrace_event_file *file) +{ +	struct ftrace_event_call *call = file->event_call; + +	if (file->flags & FTRACE_EVENT_FL_NO_SET_FILTER) +		return true; + +	if ((call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) && +	    (call->flags & TRACE_EVENT_FL_NO_SET_FILTER)) +		return true; + +	return false; +} +  struct filter_list {  	struct list_head	list;  	struct event_filter	*filter;  };  static int replace_system_preds(struct event_subsystem *system, +				struct trace_array *tr,  				struct filter_parse_state *ps,  				char *filter_string)  { +	struct ftrace_event_file *file;  	struct ftrace_event_call *call;  	struct filter_list *filter_item;  	struct filter_list *tmp; @@ -1633,8 +1768,8 @@ static int replace_system_preds(struct event_subsystem *system,  	bool fail = true;  	int err; -	list_for_each_entry(call, &ftrace_events, list) { - +	list_for_each_entry(file, &tr->events, list) { +		call = file->event_call;  		if (strcmp(call->class->system, system->name) != 0)  			continue; @@ -1644,18 +1779,20 @@ static int replace_system_preds(struct event_subsystem *system,  		 */  		err = replace_preds(call, NULL, ps, filter_string, true);  		if (err) -			call->flags |= TRACE_EVENT_FL_NO_SET_FILTER; +			event_set_no_set_filter_flag(file);  		else -			call->flags &= ~TRACE_EVENT_FL_NO_SET_FILTER; +			event_clear_no_set_filter_flag(file);  	} -	list_for_each_entry(call, &ftrace_events, list) { +	list_for_each_entry(file, &tr->events, list) {  		struct event_filter *filter; +		call = file->event_call; +  		if (strcmp(call->class->system, system->name) != 0)  			continue; -		if (call->flags & TRACE_EVENT_FL_NO_SET_FILTER) +		if (event_no_set_filter_flag(file))  			continue;  		filter_item = kzalloc(sizeof(*filter_item), GFP_KERNEL); @@ -1676,17 +1813,17 @@ static int replace_system_preds(struct event_subsystem *system,  		err = replace_preds(call, filter, ps, filter_string, false);  		if (err) { -			filter_disable(call); +			filter_disable(file);  			parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);  			append_filter_err(ps, filter);  		} else -			call->flags |= TRACE_EVENT_FL_FILTERED; +			event_set_filtered_flag(file);  		/*  		 * Regardless of if this returned an error, we still  		 * replace the filter for the call.  		 */ -		filter = call->filter; -		rcu_assign_pointer(call->filter, filter_item->filter); +		filter = event_filter(file); +		event_set_filter(file, filter_item->filter);  		filter_item->filter = filter;  		fail = false; @@ -1806,6 +1943,13 @@ static int create_filter(struct ftrace_event_call *call,  	return err;  } +int create_event_filter(struct ftrace_event_call *call, +			char *filter_str, bool set_str, +			struct event_filter **filterp) +{ +	return create_filter(call, filter_str, set_str, filterp); +} +  /**   * create_system_filter - create a filter for an event_subsystem   * @system: event_subsystem to create a filter for @@ -1816,6 +1960,7 @@ static int create_filter(struct ftrace_event_call *call,   * and always remembers @filter_str.   */  static int create_system_filter(struct event_subsystem *system, +				struct trace_array *tr,  				char *filter_str, struct event_filter **filterp)  {  	struct event_filter *filter = NULL; @@ -1824,7 +1969,7 @@ static int create_system_filter(struct event_subsystem *system,  	err = create_filter_start(filter_str, true, &ps, &filter);  	if (!err) { -		err = replace_system_preds(system, ps, filter_str); +		err = replace_system_preds(system, tr, ps, filter_str);  		if (!err) {  			/* System filters just show a default message */  			kfree(filter->filter_string); @@ -1840,20 +1985,25 @@ static int create_system_filter(struct event_subsystem *system,  }  /* caller must hold event_mutex */ -int apply_event_filter(struct ftrace_event_call *call, char *filter_string) +int apply_event_filter(struct ftrace_event_file *file, char *filter_string)  { +	struct ftrace_event_call *call = file->event_call;  	struct event_filter *filter;  	int err;  	if (!strcmp(strstrip(filter_string), "0")) { -		filter_disable(call); -		filter = call->filter; +		filter_disable(file); +		filter = event_filter(file); +  		if (!filter)  			return 0; -		RCU_INIT_POINTER(call->filter, NULL); + +		event_clear_filter(file); +  		/* Make sure the filter is not being used */  		synchronize_sched();  		__free_filter(filter); +  		return 0;  	} @@ -1866,14 +2016,15 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)  	 * string  	 */  	if (filter) { -		struct event_filter *tmp = call->filter; +		struct event_filter *tmp; +		tmp = event_filter(file);  		if (!err) -			call->flags |= TRACE_EVENT_FL_FILTERED; +			event_set_filtered_flag(file);  		else -			filter_disable(call); +			filter_disable(file); -		rcu_assign_pointer(call->filter, filter); +		event_set_filter(file, filter);  		if (tmp) {  			/* Make sure the call is done with the filter */ @@ -1889,6 +2040,7 @@ int apply_subsystem_event_filter(struct ftrace_subsystem_dir *dir,  				 char *filter_string)  {  	struct event_subsystem *system = dir->subsystem; +	struct trace_array *tr = dir->tr;  	struct event_filter *filter;  	int err = 0; @@ -1901,18 +2053,18 @@ int apply_subsystem_event_filter(struct ftrace_subsystem_dir *dir,  	}  	if (!strcmp(strstrip(filter_string), "0")) { -		filter_free_subsystem_preds(system); +		filter_free_subsystem_preds(system, tr);  		remove_filter_string(system->filter);  		filter = system->filter;  		system->filter = NULL;  		/* Ensure all filters are no longer used */  		synchronize_sched(); -		filter_free_subsystem_filters(system); +		filter_free_subsystem_filters(system, tr);  		__free_filter(filter);  		goto out_unlock;  	} -	err = create_system_filter(system, filter_string, &filter); +	err = create_system_filter(system, tr, filter_string, &filter);  	if (filter) {  		/*  		 * No event actually uses the system filter diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c new file mode 100644 index 00000000000..4747b476a03 --- /dev/null +++ b/kernel/trace/trace_events_trigger.c @@ -0,0 +1,1437 @@ +/* + * trace_events_trigger - trace event triggers + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) 2013 Tom Zanussi <tom.zanussi@linux.intel.com> + */ + +#include <linux/module.h> +#include <linux/ctype.h> +#include <linux/mutex.h> +#include <linux/slab.h> + +#include "trace.h" + +static LIST_HEAD(trigger_commands); +static DEFINE_MUTEX(trigger_cmd_mutex); + +static void +trigger_data_free(struct event_trigger_data *data) +{ +	if (data->cmd_ops->set_filter) +		data->cmd_ops->set_filter(NULL, data, NULL); + +	synchronize_sched(); /* make sure current triggers exit before free */ +	kfree(data); +} + +/** + * event_triggers_call - Call triggers associated with a trace event + * @file: The ftrace_event_file associated with the event + * @rec: The trace entry for the event, NULL for unconditional invocation + * + * For each trigger associated with an event, invoke the trigger + * function registered with the associated trigger command.  If rec is + * non-NULL, it means that the trigger requires further processing and + * shouldn't be unconditionally invoked.  If rec is non-NULL and the + * trigger has a filter associated with it, rec will checked against + * the filter and if the record matches the trigger will be invoked. + * If the trigger is a 'post_trigger', meaning it shouldn't be invoked + * in any case until the current event is written, the trigger + * function isn't invoked but the bit associated with the deferred + * trigger is set in the return value. + * + * Returns an enum event_trigger_type value containing a set bit for + * any trigger that should be deferred, ETT_NONE if nothing to defer. + * + * Called from tracepoint handlers (with rcu_read_lock_sched() held). + * + * Return: an enum event_trigger_type value containing a set bit for + * any trigger that should be deferred, ETT_NONE if nothing to defer. + */ +enum event_trigger_type +event_triggers_call(struct ftrace_event_file *file, void *rec) +{ +	struct event_trigger_data *data; +	enum event_trigger_type tt = ETT_NONE; +	struct event_filter *filter; + +	if (list_empty(&file->triggers)) +		return tt; + +	list_for_each_entry_rcu(data, &file->triggers, list) { +		if (!rec) { +			data->ops->func(data); +			continue; +		} +		filter = rcu_dereference_sched(data->filter); +		if (filter && !filter_match_preds(filter, rec)) +			continue; +		if (data->cmd_ops->post_trigger) { +			tt |= data->cmd_ops->trigger_type; +			continue; +		} +		data->ops->func(data); +	} +	return tt; +} +EXPORT_SYMBOL_GPL(event_triggers_call); + +/** + * event_triggers_post_call - Call 'post_triggers' for a trace event + * @file: The ftrace_event_file associated with the event + * @tt: enum event_trigger_type containing a set bit for each trigger to invoke + * + * For each trigger associated with an event, invoke the trigger + * function registered with the associated trigger command, if the + * corresponding bit is set in the tt enum passed into this function. + * See @event_triggers_call for details on how those bits are set. + * + * Called from tracepoint handlers (with rcu_read_lock_sched() held). + */ +void +event_triggers_post_call(struct ftrace_event_file *file, +			 enum event_trigger_type tt) +{ +	struct event_trigger_data *data; + +	list_for_each_entry_rcu(data, &file->triggers, list) { +		if (data->cmd_ops->trigger_type & tt) +			data->ops->func(data); +	} +} +EXPORT_SYMBOL_GPL(event_triggers_post_call); + +#define SHOW_AVAILABLE_TRIGGERS	(void *)(1UL) + +static void *trigger_next(struct seq_file *m, void *t, loff_t *pos) +{ +	struct ftrace_event_file *event_file = event_file_data(m->private); + +	if (t == SHOW_AVAILABLE_TRIGGERS) +		return NULL; + +	return seq_list_next(t, &event_file->triggers, pos); +} + +static void *trigger_start(struct seq_file *m, loff_t *pos) +{ +	struct ftrace_event_file *event_file; + +	/* ->stop() is called even if ->start() fails */ +	mutex_lock(&event_mutex); +	event_file = event_file_data(m->private); +	if (unlikely(!event_file)) +		return ERR_PTR(-ENODEV); + +	if (list_empty(&event_file->triggers)) +		return *pos == 0 ? SHOW_AVAILABLE_TRIGGERS : NULL; + +	return seq_list_start(&event_file->triggers, *pos); +} + +static void trigger_stop(struct seq_file *m, void *t) +{ +	mutex_unlock(&event_mutex); +} + +static int trigger_show(struct seq_file *m, void *v) +{ +	struct event_trigger_data *data; +	struct event_command *p; + +	if (v == SHOW_AVAILABLE_TRIGGERS) { +		seq_puts(m, "# Available triggers:\n"); +		seq_putc(m, '#'); +		mutex_lock(&trigger_cmd_mutex); +		list_for_each_entry_reverse(p, &trigger_commands, list) +			seq_printf(m, " %s", p->name); +		seq_putc(m, '\n'); +		mutex_unlock(&trigger_cmd_mutex); +		return 0; +	} + +	data = list_entry(v, struct event_trigger_data, list); +	data->ops->print(m, data->ops, data); + +	return 0; +} + +static const struct seq_operations event_triggers_seq_ops = { +	.start = trigger_start, +	.next = trigger_next, +	.stop = trigger_stop, +	.show = trigger_show, +}; + +static int event_trigger_regex_open(struct inode *inode, struct file *file) +{ +	int ret = 0; + +	mutex_lock(&event_mutex); + +	if (unlikely(!event_file_data(file))) { +		mutex_unlock(&event_mutex); +		return -ENODEV; +	} + +	if (file->f_mode & FMODE_READ) { +		ret = seq_open(file, &event_triggers_seq_ops); +		if (!ret) { +			struct seq_file *m = file->private_data; +			m->private = file; +		} +	} + +	mutex_unlock(&event_mutex); + +	return ret; +} + +static int trigger_process_regex(struct ftrace_event_file *file, char *buff) +{ +	char *command, *next = buff; +	struct event_command *p; +	int ret = -EINVAL; + +	command = strsep(&next, ": \t"); +	command = (command[0] != '!') ? command : command + 1; + +	mutex_lock(&trigger_cmd_mutex); +	list_for_each_entry(p, &trigger_commands, list) { +		if (strcmp(p->name, command) == 0) { +			ret = p->func(p, file, buff, command, next); +			goto out_unlock; +		} +	} + out_unlock: +	mutex_unlock(&trigger_cmd_mutex); + +	return ret; +} + +static ssize_t event_trigger_regex_write(struct file *file, +					 const char __user *ubuf, +					 size_t cnt, loff_t *ppos) +{ +	struct ftrace_event_file *event_file; +	ssize_t ret; +	char *buf; + +	if (!cnt) +		return 0; + +	if (cnt >= PAGE_SIZE) +		return -EINVAL; + +	buf = (char *)__get_free_page(GFP_TEMPORARY); +	if (!buf) +		return -ENOMEM; + +	if (copy_from_user(buf, ubuf, cnt)) { +		free_page((unsigned long)buf); +		return -EFAULT; +	} +	buf[cnt] = '\0'; +	strim(buf); + +	mutex_lock(&event_mutex); +	event_file = event_file_data(file); +	if (unlikely(!event_file)) { +		mutex_unlock(&event_mutex); +		free_page((unsigned long)buf); +		return -ENODEV; +	} +	ret = trigger_process_regex(event_file, buf); +	mutex_unlock(&event_mutex); + +	free_page((unsigned long)buf); +	if (ret < 0) +		goto out; + +	*ppos += cnt; +	ret = cnt; + out: +	return ret; +} + +static int event_trigger_regex_release(struct inode *inode, struct file *file) +{ +	mutex_lock(&event_mutex); + +	if (file->f_mode & FMODE_READ) +		seq_release(inode, file); + +	mutex_unlock(&event_mutex); + +	return 0; +} + +static ssize_t +event_trigger_write(struct file *filp, const char __user *ubuf, +		    size_t cnt, loff_t *ppos) +{ +	return event_trigger_regex_write(filp, ubuf, cnt, ppos); +} + +static int +event_trigger_open(struct inode *inode, struct file *filp) +{ +	return event_trigger_regex_open(inode, filp); +} + +static int +event_trigger_release(struct inode *inode, struct file *file) +{ +	return event_trigger_regex_release(inode, file); +} + +const struct file_operations event_trigger_fops = { +	.open = event_trigger_open, +	.read = seq_read, +	.write = event_trigger_write, +	.llseek = tracing_lseek, +	.release = event_trigger_release, +}; + +/* + * Currently we only register event commands from __init, so mark this + * __init too. + */ +static __init int register_event_command(struct event_command *cmd) +{ +	struct event_command *p; +	int ret = 0; + +	mutex_lock(&trigger_cmd_mutex); +	list_for_each_entry(p, &trigger_commands, list) { +		if (strcmp(cmd->name, p->name) == 0) { +			ret = -EBUSY; +			goto out_unlock; +		} +	} +	list_add(&cmd->list, &trigger_commands); + out_unlock: +	mutex_unlock(&trigger_cmd_mutex); + +	return ret; +} + +/* + * Currently we only unregister event commands from __init, so mark + * this __init too. + */ +static __init int unregister_event_command(struct event_command *cmd) +{ +	struct event_command *p, *n; +	int ret = -ENODEV; + +	mutex_lock(&trigger_cmd_mutex); +	list_for_each_entry_safe(p, n, &trigger_commands, list) { +		if (strcmp(cmd->name, p->name) == 0) { +			ret = 0; +			list_del_init(&p->list); +			goto out_unlock; +		} +	} + out_unlock: +	mutex_unlock(&trigger_cmd_mutex); + +	return ret; +} + +/** + * event_trigger_print - Generic event_trigger_ops @print implementation + * @name: The name of the event trigger + * @m: The seq_file being printed to + * @data: Trigger-specific data + * @filter_str: filter_str to print, if present + * + * Common implementation for event triggers to print themselves. + * + * Usually wrapped by a function that simply sets the @name of the + * trigger command and then invokes this. + * + * Return: 0 on success, errno otherwise + */ +static int +event_trigger_print(const char *name, struct seq_file *m, +		    void *data, char *filter_str) +{ +	long count = (long)data; + +	seq_printf(m, "%s", name); + +	if (count == -1) +		seq_puts(m, ":unlimited"); +	else +		seq_printf(m, ":count=%ld", count); + +	if (filter_str) +		seq_printf(m, " if %s\n", filter_str); +	else +		seq_puts(m, "\n"); + +	return 0; +} + +/** + * event_trigger_init - Generic event_trigger_ops @init implementation + * @ops: The trigger ops associated with the trigger + * @data: Trigger-specific data + * + * Common implementation of event trigger initialization. + * + * Usually used directly as the @init method in event trigger + * implementations. + * + * Return: 0 on success, errno otherwise + */ +static int +event_trigger_init(struct event_trigger_ops *ops, +		   struct event_trigger_data *data) +{ +	data->ref++; +	return 0; +} + +/** + * event_trigger_free - Generic event_trigger_ops @free implementation + * @ops: The trigger ops associated with the trigger + * @data: Trigger-specific data + * + * Common implementation of event trigger de-initialization. + * + * Usually used directly as the @free method in event trigger + * implementations. + */ +static void +event_trigger_free(struct event_trigger_ops *ops, +		   struct event_trigger_data *data) +{ +	if (WARN_ON_ONCE(data->ref <= 0)) +		return; + +	data->ref--; +	if (!data->ref) +		trigger_data_free(data); +} + +static int trace_event_trigger_enable_disable(struct ftrace_event_file *file, +					      int trigger_enable) +{ +	int ret = 0; + +	if (trigger_enable) { +		if (atomic_inc_return(&file->tm_ref) > 1) +			return ret; +		set_bit(FTRACE_EVENT_FL_TRIGGER_MODE_BIT, &file->flags); +		ret = trace_event_enable_disable(file, 1, 1); +	} else { +		if (atomic_dec_return(&file->tm_ref) > 0) +			return ret; +		clear_bit(FTRACE_EVENT_FL_TRIGGER_MODE_BIT, &file->flags); +		ret = trace_event_enable_disable(file, 0, 1); +	} + +	return ret; +} + +/** + * clear_event_triggers - Clear all triggers associated with a trace array + * @tr: The trace array to clear + * + * For each trigger, the triggering event has its tm_ref decremented + * via trace_event_trigger_enable_disable(), and any associated event + * (in the case of enable/disable_event triggers) will have its sm_ref + * decremented via free()->trace_event_enable_disable().  That + * combination effectively reverses the soft-mode/trigger state added + * by trigger registration. + * + * Must be called with event_mutex held. + */ +void +clear_event_triggers(struct trace_array *tr) +{ +	struct ftrace_event_file *file; + +	list_for_each_entry(file, &tr->events, list) { +		struct event_trigger_data *data; +		list_for_each_entry_rcu(data, &file->triggers, list) { +			trace_event_trigger_enable_disable(file, 0); +			if (data->ops->free) +				data->ops->free(data->ops, data); +		} +	} +} + +/** + * update_cond_flag - Set or reset the TRIGGER_COND bit + * @file: The ftrace_event_file associated with the event + * + * If an event has triggers and any of those triggers has a filter or + * a post_trigger, trigger invocation needs to be deferred until after + * the current event has logged its data, and the event should have + * its TRIGGER_COND bit set, otherwise the TRIGGER_COND bit should be + * cleared. + */ +static void update_cond_flag(struct ftrace_event_file *file) +{ +	struct event_trigger_data *data; +	bool set_cond = false; + +	list_for_each_entry_rcu(data, &file->triggers, list) { +		if (data->filter || data->cmd_ops->post_trigger) { +			set_cond = true; +			break; +		} +	} + +	if (set_cond) +		set_bit(FTRACE_EVENT_FL_TRIGGER_COND_BIT, &file->flags); +	else +		clear_bit(FTRACE_EVENT_FL_TRIGGER_COND_BIT, &file->flags); +} + +/** + * register_trigger - Generic event_command @reg implementation + * @glob: The raw string used to register the trigger + * @ops: The trigger ops associated with the trigger + * @data: Trigger-specific data to associate with the trigger + * @file: The ftrace_event_file associated with the event + * + * Common implementation for event trigger registration. + * + * Usually used directly as the @reg method in event command + * implementations. + * + * Return: 0 on success, errno otherwise + */ +static int register_trigger(char *glob, struct event_trigger_ops *ops, +			    struct event_trigger_data *data, +			    struct ftrace_event_file *file) +{ +	struct event_trigger_data *test; +	int ret = 0; + +	list_for_each_entry_rcu(test, &file->triggers, list) { +		if (test->cmd_ops->trigger_type == data->cmd_ops->trigger_type) { +			ret = -EEXIST; +			goto out; +		} +	} + +	if (data->ops->init) { +		ret = data->ops->init(data->ops, data); +		if (ret < 0) +			goto out; +	} + +	list_add_rcu(&data->list, &file->triggers); +	ret++; + +	if (trace_event_trigger_enable_disable(file, 1) < 0) { +		list_del_rcu(&data->list); +		ret--; +	} +	update_cond_flag(file); +out: +	return ret; +} + +/** + * unregister_trigger - Generic event_command @unreg implementation + * @glob: The raw string used to register the trigger + * @ops: The trigger ops associated with the trigger + * @test: Trigger-specific data used to find the trigger to remove + * @file: The ftrace_event_file associated with the event + * + * Common implementation for event trigger unregistration. + * + * Usually used directly as the @unreg method in event command + * implementations. + */ +static void unregister_trigger(char *glob, struct event_trigger_ops *ops, +			       struct event_trigger_data *test, +			       struct ftrace_event_file *file) +{ +	struct event_trigger_data *data; +	bool unregistered = false; + +	list_for_each_entry_rcu(data, &file->triggers, list) { +		if (data->cmd_ops->trigger_type == test->cmd_ops->trigger_type) { +			unregistered = true; +			list_del_rcu(&data->list); +			update_cond_flag(file); +			trace_event_trigger_enable_disable(file, 0); +			break; +		} +	} + +	if (unregistered && data->ops->free) +		data->ops->free(data->ops, data); +} + +/** + * event_trigger_callback - Generic event_command @func implementation + * @cmd_ops: The command ops, used for trigger registration + * @file: The ftrace_event_file associated with the event + * @glob: The raw string used to register the trigger + * @cmd: The cmd portion of the string used to register the trigger + * @param: The params portion of the string used to register the trigger + * + * Common implementation for event command parsing and trigger + * instantiation. + * + * Usually used directly as the @func method in event command + * implementations. + * + * Return: 0 on success, errno otherwise + */ +static int +event_trigger_callback(struct event_command *cmd_ops, +		       struct ftrace_event_file *file, +		       char *glob, char *cmd, char *param) +{ +	struct event_trigger_data *trigger_data; +	struct event_trigger_ops *trigger_ops; +	char *trigger = NULL; +	char *number; +	int ret; + +	/* separate the trigger from the filter (t:n [if filter]) */ +	if (param && isdigit(param[0])) +		trigger = strsep(¶m, " \t"); + +	trigger_ops = cmd_ops->get_trigger_ops(cmd, trigger); + +	ret = -ENOMEM; +	trigger_data = kzalloc(sizeof(*trigger_data), GFP_KERNEL); +	if (!trigger_data) +		goto out; + +	trigger_data->count = -1; +	trigger_data->ops = trigger_ops; +	trigger_data->cmd_ops = cmd_ops; +	INIT_LIST_HEAD(&trigger_data->list); + +	if (glob[0] == '!') { +		cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file); +		kfree(trigger_data); +		ret = 0; +		goto out; +	} + +	if (trigger) { +		number = strsep(&trigger, ":"); + +		ret = -EINVAL; +		if (!strlen(number)) +			goto out_free; + +		/* +		 * We use the callback data field (which is a pointer) +		 * as our counter. +		 */ +		ret = kstrtoul(number, 0, &trigger_data->count); +		if (ret) +			goto out_free; +	} + +	if (!param) /* if param is non-empty, it's supposed to be a filter */ +		goto out_reg; + +	if (!cmd_ops->set_filter) +		goto out_reg; + +	ret = cmd_ops->set_filter(param, trigger_data, file); +	if (ret < 0) +		goto out_free; + + out_reg: +	ret = cmd_ops->reg(glob, trigger_ops, trigger_data, file); +	/* +	 * The above returns on success the # of functions enabled, +	 * but if it didn't find any functions it returns zero. +	 * Consider no functions a failure too. +	 */ +	if (!ret) { +		ret = -ENOENT; +		goto out_free; +	} else if (ret < 0) +		goto out_free; +	ret = 0; + out: +	return ret; + + out_free: +	if (cmd_ops->set_filter) +		cmd_ops->set_filter(NULL, trigger_data, NULL); +	kfree(trigger_data); +	goto out; +} + +/** + * set_trigger_filter - Generic event_command @set_filter implementation + * @filter_str: The filter string for the trigger, NULL to remove filter + * @trigger_data: Trigger-specific data + * @file: The ftrace_event_file associated with the event + * + * Common implementation for event command filter parsing and filter + * instantiation. + * + * Usually used directly as the @set_filter method in event command + * implementations. + * + * Also used to remove a filter (if filter_str = NULL). + * + * Return: 0 on success, errno otherwise + */ +static int set_trigger_filter(char *filter_str, +			      struct event_trigger_data *trigger_data, +			      struct ftrace_event_file *file) +{ +	struct event_trigger_data *data = trigger_data; +	struct event_filter *filter = NULL, *tmp; +	int ret = -EINVAL; +	char *s; + +	if (!filter_str) /* clear the current filter */ +		goto assign; + +	s = strsep(&filter_str, " \t"); + +	if (!strlen(s) || strcmp(s, "if") != 0) +		goto out; + +	if (!filter_str) +		goto out; + +	/* The filter is for the 'trigger' event, not the triggered event */ +	ret = create_event_filter(file->event_call, filter_str, false, &filter); +	if (ret) +		goto out; + assign: +	tmp = rcu_access_pointer(data->filter); + +	rcu_assign_pointer(data->filter, filter); + +	if (tmp) { +		/* Make sure the call is done with the filter */ +		synchronize_sched(); +		free_event_filter(tmp); +	} + +	kfree(data->filter_str); +	data->filter_str = NULL; + +	if (filter_str) { +		data->filter_str = kstrdup(filter_str, GFP_KERNEL); +		if (!data->filter_str) { +			free_event_filter(rcu_access_pointer(data->filter)); +			data->filter = NULL; +			ret = -ENOMEM; +		} +	} + out: +	return ret; +} + +static void +traceon_trigger(struct event_trigger_data *data) +{ +	if (tracing_is_on()) +		return; + +	tracing_on(); +} + +static void +traceon_count_trigger(struct event_trigger_data *data) +{ +	if (tracing_is_on()) +		return; + +	if (!data->count) +		return; + +	if (data->count != -1) +		(data->count)--; + +	tracing_on(); +} + +static void +traceoff_trigger(struct event_trigger_data *data) +{ +	if (!tracing_is_on()) +		return; + +	tracing_off(); +} + +static void +traceoff_count_trigger(struct event_trigger_data *data) +{ +	if (!tracing_is_on()) +		return; + +	if (!data->count) +		return; + +	if (data->count != -1) +		(data->count)--; + +	tracing_off(); +} + +static int +traceon_trigger_print(struct seq_file *m, struct event_trigger_ops *ops, +		      struct event_trigger_data *data) +{ +	return event_trigger_print("traceon", m, (void *)data->count, +				   data->filter_str); +} + +static int +traceoff_trigger_print(struct seq_file *m, struct event_trigger_ops *ops, +		       struct event_trigger_data *data) +{ +	return event_trigger_print("traceoff", m, (void *)data->count, +				   data->filter_str); +} + +static struct event_trigger_ops traceon_trigger_ops = { +	.func			= traceon_trigger, +	.print			= traceon_trigger_print, +	.init			= event_trigger_init, +	.free			= event_trigger_free, +}; + +static struct event_trigger_ops traceon_count_trigger_ops = { +	.func			= traceon_count_trigger, +	.print			= traceon_trigger_print, +	.init			= event_trigger_init, +	.free			= event_trigger_free, +}; + +static struct event_trigger_ops traceoff_trigger_ops = { +	.func			= traceoff_trigger, +	.print			= traceoff_trigger_print, +	.init			= event_trigger_init, +	.free			= event_trigger_free, +}; + +static struct event_trigger_ops traceoff_count_trigger_ops = { +	.func			= traceoff_count_trigger, +	.print			= traceoff_trigger_print, +	.init			= event_trigger_init, +	.free			= event_trigger_free, +}; + +static struct event_trigger_ops * +onoff_get_trigger_ops(char *cmd, char *param) +{ +	struct event_trigger_ops *ops; + +	/* we register both traceon and traceoff to this callback */ +	if (strcmp(cmd, "traceon") == 0) +		ops = param ? &traceon_count_trigger_ops : +			&traceon_trigger_ops; +	else +		ops = param ? &traceoff_count_trigger_ops : +			&traceoff_trigger_ops; + +	return ops; +} + +static struct event_command trigger_traceon_cmd = { +	.name			= "traceon", +	.trigger_type		= ETT_TRACE_ONOFF, +	.func			= event_trigger_callback, +	.reg			= register_trigger, +	.unreg			= unregister_trigger, +	.get_trigger_ops	= onoff_get_trigger_ops, +	.set_filter		= set_trigger_filter, +}; + +static struct event_command trigger_traceoff_cmd = { +	.name			= "traceoff", +	.trigger_type		= ETT_TRACE_ONOFF, +	.func			= event_trigger_callback, +	.reg			= register_trigger, +	.unreg			= unregister_trigger, +	.get_trigger_ops	= onoff_get_trigger_ops, +	.set_filter		= set_trigger_filter, +}; + +#ifdef CONFIG_TRACER_SNAPSHOT +static void +snapshot_trigger(struct event_trigger_data *data) +{ +	tracing_snapshot(); +} + +static void +snapshot_count_trigger(struct event_trigger_data *data) +{ +	if (!data->count) +		return; + +	if (data->count != -1) +		(data->count)--; + +	snapshot_trigger(data); +} + +static int +register_snapshot_trigger(char *glob, struct event_trigger_ops *ops, +			  struct event_trigger_data *data, +			  struct ftrace_event_file *file) +{ +	int ret = register_trigger(glob, ops, data, file); + +	if (ret > 0 && tracing_alloc_snapshot() != 0) { +		unregister_trigger(glob, ops, data, file); +		ret = 0; +	} + +	return ret; +} + +static int +snapshot_trigger_print(struct seq_file *m, struct event_trigger_ops *ops, +		       struct event_trigger_data *data) +{ +	return event_trigger_print("snapshot", m, (void *)data->count, +				   data->filter_str); +} + +static struct event_trigger_ops snapshot_trigger_ops = { +	.func			= snapshot_trigger, +	.print			= snapshot_trigger_print, +	.init			= event_trigger_init, +	.free			= event_trigger_free, +}; + +static struct event_trigger_ops snapshot_count_trigger_ops = { +	.func			= snapshot_count_trigger, +	.print			= snapshot_trigger_print, +	.init			= event_trigger_init, +	.free			= event_trigger_free, +}; + +static struct event_trigger_ops * +snapshot_get_trigger_ops(char *cmd, char *param) +{ +	return param ? &snapshot_count_trigger_ops : &snapshot_trigger_ops; +} + +static struct event_command trigger_snapshot_cmd = { +	.name			= "snapshot", +	.trigger_type		= ETT_SNAPSHOT, +	.func			= event_trigger_callback, +	.reg			= register_snapshot_trigger, +	.unreg			= unregister_trigger, +	.get_trigger_ops	= snapshot_get_trigger_ops, +	.set_filter		= set_trigger_filter, +}; + +static __init int register_trigger_snapshot_cmd(void) +{ +	int ret; + +	ret = register_event_command(&trigger_snapshot_cmd); +	WARN_ON(ret < 0); + +	return ret; +} +#else +static __init int register_trigger_snapshot_cmd(void) { return 0; } +#endif /* CONFIG_TRACER_SNAPSHOT */ + +#ifdef CONFIG_STACKTRACE +/* + * Skip 3: + *   stacktrace_trigger() + *   event_triggers_post_call() + *   ftrace_raw_event_xxx() + */ +#define STACK_SKIP 3 + +static void +stacktrace_trigger(struct event_trigger_data *data) +{ +	trace_dump_stack(STACK_SKIP); +} + +static void +stacktrace_count_trigger(struct event_trigger_data *data) +{ +	if (!data->count) +		return; + +	if (data->count != -1) +		(data->count)--; + +	stacktrace_trigger(data); +} + +static int +stacktrace_trigger_print(struct seq_file *m, struct event_trigger_ops *ops, +			 struct event_trigger_data *data) +{ +	return event_trigger_print("stacktrace", m, (void *)data->count, +				   data->filter_str); +} + +static struct event_trigger_ops stacktrace_trigger_ops = { +	.func			= stacktrace_trigger, +	.print			= stacktrace_trigger_print, +	.init			= event_trigger_init, +	.free			= event_trigger_free, +}; + +static struct event_trigger_ops stacktrace_count_trigger_ops = { +	.func			= stacktrace_count_trigger, +	.print			= stacktrace_trigger_print, +	.init			= event_trigger_init, +	.free			= event_trigger_free, +}; + +static struct event_trigger_ops * +stacktrace_get_trigger_ops(char *cmd, char *param) +{ +	return param ? &stacktrace_count_trigger_ops : &stacktrace_trigger_ops; +} + +static struct event_command trigger_stacktrace_cmd = { +	.name			= "stacktrace", +	.trigger_type		= ETT_STACKTRACE, +	.post_trigger		= true, +	.func			= event_trigger_callback, +	.reg			= register_trigger, +	.unreg			= unregister_trigger, +	.get_trigger_ops	= stacktrace_get_trigger_ops, +	.set_filter		= set_trigger_filter, +}; + +static __init int register_trigger_stacktrace_cmd(void) +{ +	int ret; + +	ret = register_event_command(&trigger_stacktrace_cmd); +	WARN_ON(ret < 0); + +	return ret; +} +#else +static __init int register_trigger_stacktrace_cmd(void) { return 0; } +#endif /* CONFIG_STACKTRACE */ + +static __init void unregister_trigger_traceon_traceoff_cmds(void) +{ +	unregister_event_command(&trigger_traceon_cmd); +	unregister_event_command(&trigger_traceoff_cmd); +} + +/* Avoid typos */ +#define ENABLE_EVENT_STR	"enable_event" +#define DISABLE_EVENT_STR	"disable_event" + +struct enable_trigger_data { +	struct ftrace_event_file	*file; +	bool				enable; +}; + +static void +event_enable_trigger(struct event_trigger_data *data) +{ +	struct enable_trigger_data *enable_data = data->private_data; + +	if (enable_data->enable) +		clear_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &enable_data->file->flags); +	else +		set_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &enable_data->file->flags); +} + +static void +event_enable_count_trigger(struct event_trigger_data *data) +{ +	struct enable_trigger_data *enable_data = data->private_data; + +	if (!data->count) +		return; + +	/* Skip if the event is in a state we want to switch to */ +	if (enable_data->enable == !(enable_data->file->flags & FTRACE_EVENT_FL_SOFT_DISABLED)) +		return; + +	if (data->count != -1) +		(data->count)--; + +	event_enable_trigger(data); +} + +static int +event_enable_trigger_print(struct seq_file *m, struct event_trigger_ops *ops, +			   struct event_trigger_data *data) +{ +	struct enable_trigger_data *enable_data = data->private_data; + +	seq_printf(m, "%s:%s:%s", +		   enable_data->enable ? ENABLE_EVENT_STR : DISABLE_EVENT_STR, +		   enable_data->file->event_call->class->system, +		   ftrace_event_name(enable_data->file->event_call)); + +	if (data->count == -1) +		seq_puts(m, ":unlimited"); +	else +		seq_printf(m, ":count=%ld", data->count); + +	if (data->filter_str) +		seq_printf(m, " if %s\n", data->filter_str); +	else +		seq_puts(m, "\n"); + +	return 0; +} + +static void +event_enable_trigger_free(struct event_trigger_ops *ops, +			  struct event_trigger_data *data) +{ +	struct enable_trigger_data *enable_data = data->private_data; + +	if (WARN_ON_ONCE(data->ref <= 0)) +		return; + +	data->ref--; +	if (!data->ref) { +		/* Remove the SOFT_MODE flag */ +		trace_event_enable_disable(enable_data->file, 0, 1); +		module_put(enable_data->file->event_call->mod); +		trigger_data_free(data); +		kfree(enable_data); +	} +} + +static struct event_trigger_ops event_enable_trigger_ops = { +	.func			= event_enable_trigger, +	.print			= event_enable_trigger_print, +	.init			= event_trigger_init, +	.free			= event_enable_trigger_free, +}; + +static struct event_trigger_ops event_enable_count_trigger_ops = { +	.func			= event_enable_count_trigger, +	.print			= event_enable_trigger_print, +	.init			= event_trigger_init, +	.free			= event_enable_trigger_free, +}; + +static struct event_trigger_ops event_disable_trigger_ops = { +	.func			= event_enable_trigger, +	.print			= event_enable_trigger_print, +	.init			= event_trigger_init, +	.free			= event_enable_trigger_free, +}; + +static struct event_trigger_ops event_disable_count_trigger_ops = { +	.func			= event_enable_count_trigger, +	.print			= event_enable_trigger_print, +	.init			= event_trigger_init, +	.free			= event_enable_trigger_free, +}; + +static int +event_enable_trigger_func(struct event_command *cmd_ops, +			  struct ftrace_event_file *file, +			  char *glob, char *cmd, char *param) +{ +	struct ftrace_event_file *event_enable_file; +	struct enable_trigger_data *enable_data; +	struct event_trigger_data *trigger_data; +	struct event_trigger_ops *trigger_ops; +	struct trace_array *tr = file->tr; +	const char *system; +	const char *event; +	char *trigger; +	char *number; +	bool enable; +	int ret; + +	if (!param) +		return -EINVAL; + +	/* separate the trigger from the filter (s:e:n [if filter]) */ +	trigger = strsep(¶m, " \t"); +	if (!trigger) +		return -EINVAL; + +	system = strsep(&trigger, ":"); +	if (!trigger) +		return -EINVAL; + +	event = strsep(&trigger, ":"); + +	ret = -EINVAL; +	event_enable_file = find_event_file(tr, system, event); +	if (!event_enable_file) +		goto out; + +	enable = strcmp(cmd, ENABLE_EVENT_STR) == 0; + +	trigger_ops = cmd_ops->get_trigger_ops(cmd, trigger); + +	ret = -ENOMEM; +	trigger_data = kzalloc(sizeof(*trigger_data), GFP_KERNEL); +	if (!trigger_data) +		goto out; + +	enable_data = kzalloc(sizeof(*enable_data), GFP_KERNEL); +	if (!enable_data) { +		kfree(trigger_data); +		goto out; +	} + +	trigger_data->count = -1; +	trigger_data->ops = trigger_ops; +	trigger_data->cmd_ops = cmd_ops; +	INIT_LIST_HEAD(&trigger_data->list); +	RCU_INIT_POINTER(trigger_data->filter, NULL); + +	enable_data->enable = enable; +	enable_data->file = event_enable_file; +	trigger_data->private_data = enable_data; + +	if (glob[0] == '!') { +		cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file); +		kfree(trigger_data); +		kfree(enable_data); +		ret = 0; +		goto out; +	} + +	if (trigger) { +		number = strsep(&trigger, ":"); + +		ret = -EINVAL; +		if (!strlen(number)) +			goto out_free; + +		/* +		 * We use the callback data field (which is a pointer) +		 * as our counter. +		 */ +		ret = kstrtoul(number, 0, &trigger_data->count); +		if (ret) +			goto out_free; +	} + +	if (!param) /* if param is non-empty, it's supposed to be a filter */ +		goto out_reg; + +	if (!cmd_ops->set_filter) +		goto out_reg; + +	ret = cmd_ops->set_filter(param, trigger_data, file); +	if (ret < 0) +		goto out_free; + + out_reg: +	/* Don't let event modules unload while probe registered */ +	ret = try_module_get(event_enable_file->event_call->mod); +	if (!ret) { +		ret = -EBUSY; +		goto out_free; +	} + +	ret = trace_event_enable_disable(event_enable_file, 1, 1); +	if (ret < 0) +		goto out_put; +	ret = cmd_ops->reg(glob, trigger_ops, trigger_data, file); +	/* +	 * The above returns on success the # of functions enabled, +	 * but if it didn't find any functions it returns zero. +	 * Consider no functions a failure too. +	 */ +	if (!ret) { +		ret = -ENOENT; +		goto out_disable; +	} else if (ret < 0) +		goto out_disable; +	/* Just return zero, not the number of enabled functions */ +	ret = 0; + out: +	return ret; + + out_disable: +	trace_event_enable_disable(event_enable_file, 0, 1); + out_put: +	module_put(event_enable_file->event_call->mod); + out_free: +	if (cmd_ops->set_filter) +		cmd_ops->set_filter(NULL, trigger_data, NULL); +	kfree(trigger_data); +	kfree(enable_data); +	goto out; +} + +static int event_enable_register_trigger(char *glob, +					 struct event_trigger_ops *ops, +					 struct event_trigger_data *data, +					 struct ftrace_event_file *file) +{ +	struct enable_trigger_data *enable_data = data->private_data; +	struct enable_trigger_data *test_enable_data; +	struct event_trigger_data *test; +	int ret = 0; + +	list_for_each_entry_rcu(test, &file->triggers, list) { +		test_enable_data = test->private_data; +		if (test_enable_data && +		    (test_enable_data->file == enable_data->file)) { +			ret = -EEXIST; +			goto out; +		} +	} + +	if (data->ops->init) { +		ret = data->ops->init(data->ops, data); +		if (ret < 0) +			goto out; +	} + +	list_add_rcu(&data->list, &file->triggers); +	ret++; + +	if (trace_event_trigger_enable_disable(file, 1) < 0) { +		list_del_rcu(&data->list); +		ret--; +	} +	update_cond_flag(file); +out: +	return ret; +} + +static void event_enable_unregister_trigger(char *glob, +					    struct event_trigger_ops *ops, +					    struct event_trigger_data *test, +					    struct ftrace_event_file *file) +{ +	struct enable_trigger_data *test_enable_data = test->private_data; +	struct enable_trigger_data *enable_data; +	struct event_trigger_data *data; +	bool unregistered = false; + +	list_for_each_entry_rcu(data, &file->triggers, list) { +		enable_data = data->private_data; +		if (enable_data && +		    (enable_data->file == test_enable_data->file)) { +			unregistered = true; +			list_del_rcu(&data->list); +			update_cond_flag(file); +			trace_event_trigger_enable_disable(file, 0); +			break; +		} +	} + +	if (unregistered && data->ops->free) +		data->ops->free(data->ops, data); +} + +static struct event_trigger_ops * +event_enable_get_trigger_ops(char *cmd, char *param) +{ +	struct event_trigger_ops *ops; +	bool enable; + +	enable = strcmp(cmd, ENABLE_EVENT_STR) == 0; + +	if (enable) +		ops = param ? &event_enable_count_trigger_ops : +			&event_enable_trigger_ops; +	else +		ops = param ? &event_disable_count_trigger_ops : +			&event_disable_trigger_ops; + +	return ops; +} + +static struct event_command trigger_enable_cmd = { +	.name			= ENABLE_EVENT_STR, +	.trigger_type		= ETT_EVENT_ENABLE, +	.func			= event_enable_trigger_func, +	.reg			= event_enable_register_trigger, +	.unreg			= event_enable_unregister_trigger, +	.get_trigger_ops	= event_enable_get_trigger_ops, +	.set_filter		= set_trigger_filter, +}; + +static struct event_command trigger_disable_cmd = { +	.name			= DISABLE_EVENT_STR, +	.trigger_type		= ETT_EVENT_ENABLE, +	.func			= event_enable_trigger_func, +	.reg			= event_enable_register_trigger, +	.unreg			= event_enable_unregister_trigger, +	.get_trigger_ops	= event_enable_get_trigger_ops, +	.set_filter		= set_trigger_filter, +}; + +static __init void unregister_trigger_enable_disable_cmds(void) +{ +	unregister_event_command(&trigger_enable_cmd); +	unregister_event_command(&trigger_disable_cmd); +} + +static __init int register_trigger_enable_disable_cmds(void) +{ +	int ret; + +	ret = register_event_command(&trigger_enable_cmd); +	if (WARN_ON(ret < 0)) +		return ret; +	ret = register_event_command(&trigger_disable_cmd); +	if (WARN_ON(ret < 0)) +		unregister_trigger_enable_disable_cmds(); + +	return ret; +} + +static __init int register_trigger_traceon_traceoff_cmds(void) +{ +	int ret; + +	ret = register_event_command(&trigger_traceon_cmd); +	if (WARN_ON(ret < 0)) +		return ret; +	ret = register_event_command(&trigger_traceoff_cmd); +	if (WARN_ON(ret < 0)) +		unregister_trigger_traceon_traceoff_cmds(); + +	return ret; +} + +__init int register_trigger_cmds(void) +{ +	register_trigger_traceon_traceoff_cmds(); +	register_trigger_snapshot_cmd(); +	register_trigger_stacktrace_cmd(); +	register_trigger_enable_disable_cmds(); + +	return 0; +} diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index d21a7467008..d4ddde28a81 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -95,15 +95,12 @@ static void __always_unused ____ftrace_check_##name(void)		\  #undef __array  #define __array(type, item, len)					\  	do {								\ +		char *type_str = #type"["__stringify(len)"]";		\  		BUILD_BUG_ON(len > MAX_FILTER_STR_VAL);			\ -		mutex_lock(&event_storage_mutex);			\ -		snprintf(event_storage, sizeof(event_storage),		\ -			 "%s[%d]", #type, len);				\ -		ret = trace_define_field(event_call, event_storage, #item, \ +		ret = trace_define_field(event_call, type_str, #item,	\  				 offsetof(typeof(field), item),		\  				 sizeof(field.item),			\  				 is_signed_type(type), filter_type);	\ -		mutex_unlock(&event_storage_mutex);			\  		if (ret)						\  			return ret;					\  	} while (0); @@ -176,11 +173,13 @@ struct ftrace_event_class __refdata event_class_ftrace_##call = {	\  };									\  									\  struct ftrace_event_call __used event_##call = {			\ -	.name			= #call,				\ -	.event.type		= etype,				\  	.class			= &event_class_ftrace_##call,		\ +	{								\ +		.name			= #call,			\ +	},								\ +	.event.type		= etype,				\  	.print_fmt		= print,				\ -	.flags			= TRACE_EVENT_FL_IGNORE_ENABLE,		\ +	.flags			= TRACE_EVENT_FL_IGNORE_ENABLE | TRACE_EVENT_FL_USE_CALL_FILTER, \  };									\  struct ftrace_event_call __used						\  __attribute__((section("_ftrace_events"))) *__event_##call = &event_##call; diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 38fe1483c50..57f0ec962d2 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -13,33 +13,106 @@  #include <linux/debugfs.h>  #include <linux/uaccess.h>  #include <linux/ftrace.h> +#include <linux/slab.h>  #include <linux/fs.h>  #include "trace.h" -/* function tracing enabled */ -static int			ftrace_function_enabled; +static void tracing_start_function_trace(struct trace_array *tr); +static void tracing_stop_function_trace(struct trace_array *tr); +static void +function_trace_call(unsigned long ip, unsigned long parent_ip, +		    struct ftrace_ops *op, struct pt_regs *pt_regs); +static void +function_stack_trace_call(unsigned long ip, unsigned long parent_ip, +			  struct ftrace_ops *op, struct pt_regs *pt_regs); +static struct tracer_flags func_flags; + +/* Our option */ +enum { +	TRACE_FUNC_OPT_STACK	= 0x1, +}; + +static int allocate_ftrace_ops(struct trace_array *tr) +{ +	struct ftrace_ops *ops; -static struct trace_array	*func_trace; +	ops = kzalloc(sizeof(*ops), GFP_KERNEL); +	if (!ops) +		return -ENOMEM; + +	/* Currently only the non stack verision is supported */ +	ops->func = function_trace_call; +	ops->flags = FTRACE_OPS_FL_RECURSION_SAFE; + +	tr->ops = ops; +	ops->private = tr; +	return 0; +} -static void tracing_start_function_trace(void); -static void tracing_stop_function_trace(void); + +int ftrace_create_function_files(struct trace_array *tr, +				 struct dentry *parent) +{ +	int ret; + +	/* +	 * The top level array uses the "global_ops", and the files are +	 * created on boot up. +	 */ +	if (tr->flags & TRACE_ARRAY_FL_GLOBAL) +		return 0; + +	ret = allocate_ftrace_ops(tr); +	if (ret) +		return ret; + +	ftrace_create_filter_files(tr->ops, parent); + +	return 0; +} + +void ftrace_destroy_function_files(struct trace_array *tr) +{ +	ftrace_destroy_filter_files(tr->ops); +	kfree(tr->ops); +	tr->ops = NULL; +}  static int function_trace_init(struct trace_array *tr)  { -	func_trace = tr; +	ftrace_func_t func; + +	/* +	 * Instance trace_arrays get their ops allocated +	 * at instance creation. Unless it failed +	 * the allocation. +	 */ +	if (!tr->ops) +		return -ENOMEM; + +	/* Currently only the global instance can do stack tracing */ +	if (tr->flags & TRACE_ARRAY_FL_GLOBAL && +	    func_flags.val & TRACE_FUNC_OPT_STACK) +		func = function_stack_trace_call; +	else +		func = function_trace_call; + +	ftrace_init_array_ops(tr, func); +  	tr->trace_buffer.cpu = get_cpu();  	put_cpu();  	tracing_start_cmdline_record(); -	tracing_start_function_trace(); +	tracing_start_function_trace(tr);  	return 0;  }  static void function_trace_reset(struct trace_array *tr)  { -	tracing_stop_function_trace(); +	tracing_stop_function_trace(tr);  	tracing_stop_cmdline_record(); +	ftrace_reset_array_ops(tr);  }  static void function_trace_start(struct trace_array *tr) @@ -47,25 +120,18 @@ static void function_trace_start(struct trace_array *tr)  	tracing_reset_online_cpus(&tr->trace_buffer);  } -/* Our option */ -enum { -	TRACE_FUNC_OPT_STACK	= 0x1, -}; - -static struct tracer_flags func_flags; -  static void  function_trace_call(unsigned long ip, unsigned long parent_ip,  		    struct ftrace_ops *op, struct pt_regs *pt_regs)  { -	struct trace_array *tr = func_trace; +	struct trace_array *tr = op->private;  	struct trace_array_cpu *data;  	unsigned long flags;  	int bit;  	int cpu;  	int pc; -	if (unlikely(!ftrace_function_enabled)) +	if (unlikely(!tr->function_enabled))  		return;  	pc = preempt_count(); @@ -91,14 +157,14 @@ static void  function_stack_trace_call(unsigned long ip, unsigned long parent_ip,  			  struct ftrace_ops *op, struct pt_regs *pt_regs)  { -	struct trace_array *tr = func_trace; +	struct trace_array *tr = op->private;  	struct trace_array_cpu *data;  	unsigned long flags;  	long disabled;  	int cpu;  	int pc; -	if (unlikely(!ftrace_function_enabled)) +	if (unlikely(!tr->function_enabled))  		return;  	/* @@ -128,19 +194,6 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip,  	local_irq_restore(flags);  } - -static struct ftrace_ops trace_ops __read_mostly = -{ -	.func = function_trace_call, -	.flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE, -}; - -static struct ftrace_ops trace_stack_ops __read_mostly = -{ -	.func = function_stack_trace_call, -	.flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE, -}; -  static struct tracer_opt func_opts[] = {  #ifdef CONFIG_STACKTRACE  	{ TRACER_OPT(func_stack_trace, TRACE_FUNC_OPT_STACK) }, @@ -153,29 +206,21 @@ static struct tracer_flags func_flags = {  	.opts = func_opts  }; -static void tracing_start_function_trace(void) +static void tracing_start_function_trace(struct trace_array *tr)  { -	ftrace_function_enabled = 0; - -	if (func_flags.val & TRACE_FUNC_OPT_STACK) -		register_ftrace_function(&trace_stack_ops); -	else -		register_ftrace_function(&trace_ops); - -	ftrace_function_enabled = 1; +	tr->function_enabled = 0; +	register_ftrace_function(tr->ops); +	tr->function_enabled = 1;  } -static void tracing_stop_function_trace(void) +static void tracing_stop_function_trace(struct trace_array *tr)  { -	ftrace_function_enabled = 0; - -	if (func_flags.val & TRACE_FUNC_OPT_STACK) -		unregister_ftrace_function(&trace_stack_ops); -	else -		unregister_ftrace_function(&trace_ops); +	tr->function_enabled = 0; +	unregister_ftrace_function(tr->ops);  } -static int func_set_flag(u32 old_flags, u32 bit, int set) +static int +func_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)  {  	switch (bit) {  	case TRACE_FUNC_OPT_STACK: @@ -183,12 +228,14 @@ static int func_set_flag(u32 old_flags, u32 bit, int set)  		if (!!set == !!(func_flags.val & TRACE_FUNC_OPT_STACK))  			break; +		unregister_ftrace_function(tr->ops); +  		if (set) { -			unregister_ftrace_function(&trace_ops); -			register_ftrace_function(&trace_stack_ops); +			tr->ops->func = function_stack_trace_call; +			register_ftrace_function(tr->ops);  		} else { -			unregister_ftrace_function(&trace_stack_ops); -			register_ftrace_function(&trace_ops); +			tr->ops->func = function_trace_call; +			register_ftrace_function(tr->ops);  		}  		break; @@ -205,9 +252,9 @@ static struct tracer function_trace __tracer_data =  	.init		= function_trace_init,  	.reset		= function_trace_reset,  	.start		= function_trace_start, -	.wait_pipe	= poll_wait_pipe,  	.flags		= &func_flags,  	.set_flag	= func_set_flag, +	.allow_instances = true,  #ifdef CONFIG_FTRACE_SELFTEST  	.selftest	= trace_selftest_startup_function,  #endif diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index b5c09242683..4de3e57f723 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -38,15 +38,6 @@ struct fgraph_data {  #define TRACE_GRAPH_INDENT	2 -/* Flag options */ -#define TRACE_GRAPH_PRINT_OVERRUN	0x1 -#define TRACE_GRAPH_PRINT_CPU		0x2 -#define TRACE_GRAPH_PRINT_OVERHEAD	0x4 -#define TRACE_GRAPH_PRINT_PROC		0x8 -#define TRACE_GRAPH_PRINT_DURATION	0x10 -#define TRACE_GRAPH_PRINT_ABS_TIME	0x20 -#define TRACE_GRAPH_PRINT_IRQS		0x40 -  static unsigned int max_depth;  static struct tracer_opt trace_opts[] = { @@ -64,11 +55,13 @@ static struct tracer_opt trace_opts[] = {  	{ TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) },  	/* Display interrupts */  	{ TRACER_OPT(funcgraph-irqs, TRACE_GRAPH_PRINT_IRQS) }, +	/* Display function name after trailing } */ +	{ TRACER_OPT(funcgraph-tail, TRACE_GRAPH_PRINT_TAIL) },  	{ } /* Empty entry */  };  static struct tracer_flags tracer_flags = { -	/* Don't display overruns and proc by default */ +	/* Don't display overruns, proc, or tail by default */  	.val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD |  	       TRACE_GRAPH_PRINT_DURATION | TRACE_GRAPH_PRINT_IRQS,  	.opts = trace_opts @@ -82,9 +75,9 @@ static struct trace_array *graph_array;   * to fill in space into DURATION column.   */  enum { -	DURATION_FILL_FULL  = -1, -	DURATION_FILL_START = -2, -	DURATION_FILL_END   = -3, +	FLAGS_FILL_FULL  = 1 << TRACE_GRAPH_PRINT_FILL_SHIFT, +	FLAGS_FILL_START = 2 << TRACE_GRAPH_PRINT_FILL_SHIFT, +	FLAGS_FILL_END   = 3 << TRACE_GRAPH_PRINT_FILL_SHIFT,  };  static enum print_line_t @@ -114,16 +107,37 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,  		return -EBUSY;  	} +	/* +	 * The curr_ret_stack is an index to ftrace return stack of +	 * current task.  Its value should be in [0, FTRACE_RETFUNC_ +	 * DEPTH) when the function graph tracer is used.  To support +	 * filtering out specific functions, it makes the index +	 * negative by subtracting huge value (FTRACE_NOTRACE_DEPTH) +	 * so when it sees a negative index the ftrace will ignore +	 * the record.  And the index gets recovered when returning +	 * from the filtered function by adding the FTRACE_NOTRACE_ +	 * DEPTH and then it'll continue to record functions normally. +	 * +	 * The curr_ret_stack is initialized to -1 and get increased +	 * in this function.  So it can be less than -1 only if it was +	 * filtered out via ftrace_graph_notrace_addr() which can be +	 * set from set_graph_notrace file in debugfs by user. +	 */ +	if (current->curr_ret_stack < -1) +		return -EBUSY; +  	calltime = trace_clock_local();  	index = ++current->curr_ret_stack; +	if (ftrace_graph_notrace_addr(func)) +		current->curr_ret_stack -= FTRACE_NOTRACE_DEPTH;  	barrier();  	current->ret_stack[index].ret = ret;  	current->ret_stack[index].func = func;  	current->ret_stack[index].calltime = calltime;  	current->ret_stack[index].subtime = 0;  	current->ret_stack[index].fp = frame_pointer; -	*depth = index; +	*depth = current->curr_ret_stack;  	return 0;  } @@ -137,7 +151,17 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,  	index = current->curr_ret_stack; -	if (unlikely(index < 0)) { +	/* +	 * A negative index here means that it's just returned from a +	 * notrace'd function.  Recover index to get an original +	 * return address.  See ftrace_push_return_trace(). +	 * +	 * TODO: Need to check whether the stack gets corrupted. +	 */ +	if (index < 0) +		index += FTRACE_NOTRACE_DEPTH; + +	if (unlikely(index < 0 || index >= FTRACE_RETFUNC_DEPTH)) {  		ftrace_graph_stop();  		WARN_ON(1);  		/* Might as well panic, otherwise we have no where to go */ @@ -193,6 +217,15 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer)  	trace.rettime = trace_clock_local();  	barrier();  	current->curr_ret_stack--; +	/* +	 * The curr_ret_stack can be less than -1 only if it was +	 * filtered out and it's about to return from the function. +	 * Recover the index and continue to trace normal functions. +	 */ +	if (current->curr_ret_stack < -1) { +		current->curr_ret_stack += FTRACE_NOTRACE_DEPTH; +		return ret; +	}  	/*  	 * The trace should run after decrementing the ret counter @@ -230,7 +263,7 @@ int __trace_graph_entry(struct trace_array *tr,  		return 0;  	entry	= ring_buffer_event_data(event);  	entry->graph_ent			= *trace; -	if (!filter_current_check_discard(buffer, call, entry, event)) +	if (!call_filter_check_discard(call, entry, buffer, event))  		__buffer_unlock_commit(buffer, event);  	return 1; @@ -259,10 +292,20 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)  	/* trace it when it is-nested-in or is a function enabled. */  	if ((!(trace->depth || ftrace_graph_addr(trace->func)) || -	     ftrace_graph_ignore_irqs()) || +	     ftrace_graph_ignore_irqs()) || (trace->depth < 0) ||  	    (max_depth && trace->depth >= max_depth))  		return 0; +	/* +	 * Do not trace a function if it's filtered by set_graph_notrace. +	 * Make the index of ret stack negative to indicate that it should +	 * ignore further functions.  But it needs its own ret stack entry +	 * to recover the original index in order to continue tracing after +	 * returning from the function. +	 */ +	if (ftrace_graph_notrace_addr(trace->func)) +		return 1; +  	local_irq_save(flags);  	cpu = raw_smp_processor_id();  	data = per_cpu_ptr(tr->trace_buffer.data, cpu); @@ -335,7 +378,7 @@ void __trace_graph_return(struct trace_array *tr,  		return;  	entry	= ring_buffer_event_data(event);  	entry->ret				= *trace; -	if (!filter_current_check_discard(buffer, call, entry, event)) +	if (!call_filter_check_discard(call, entry, buffer, event))  		__buffer_unlock_commit(buffer, event);  } @@ -652,7 +695,7 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,  	}  	/* No overhead */ -	ret = print_graph_duration(DURATION_FILL_START, s, flags); +	ret = print_graph_duration(0, s, flags | FLAGS_FILL_START);  	if (ret != TRACE_TYPE_HANDLED)  		return ret; @@ -664,7 +707,7 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,  	if (!ret)  		return TRACE_TYPE_PARTIAL_LINE; -	ret = print_graph_duration(DURATION_FILL_END, s, flags); +	ret = print_graph_duration(0, s, flags | FLAGS_FILL_END);  	if (ret != TRACE_TYPE_HANDLED)  		return ret; @@ -729,14 +772,14 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s,  			return TRACE_TYPE_HANDLED;  	/* No real adata, just filling the column with spaces */ -	switch (duration) { -	case DURATION_FILL_FULL: +	switch (flags & TRACE_GRAPH_PRINT_FILL_MASK) { +	case FLAGS_FILL_FULL:  		ret = trace_seq_puts(s, "              |  ");  		return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; -	case DURATION_FILL_START: +	case FLAGS_FILL_START:  		ret = trace_seq_puts(s, "  ");  		return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; -	case DURATION_FILL_END: +	case FLAGS_FILL_END:  		ret = trace_seq_puts(s, " |");  		return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;  	} @@ -852,7 +895,7 @@ print_graph_entry_nested(struct trace_iterator *iter,  	}  	/* No time */ -	ret = print_graph_duration(DURATION_FILL_FULL, s, flags); +	ret = print_graph_duration(0, s, flags | FLAGS_FILL_FULL);  	if (ret != TRACE_TYPE_HANDLED)  		return ret; @@ -1126,9 +1169,10 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,  	 * If the return function does not have a matching entry,  	 * then the entry was lost. Instead of just printing  	 * the '}' and letting the user guess what function this -	 * belongs to, write out the function name. +	 * belongs to, write out the function name. Always do +	 * that if the funcgraph-tail option is enabled.  	 */ -	if (func_match) { +	if (func_match && !(flags & TRACE_GRAPH_PRINT_TAIL)) {  		ret = trace_seq_puts(s, "}\n");  		if (!ret)  			return TRACE_TYPE_PARTIAL_LINE; @@ -1172,7 +1216,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,  		return TRACE_TYPE_PARTIAL_LINE;  	/* No time */ -	ret = print_graph_duration(DURATION_FILL_FULL, s, flags); +	ret = print_graph_duration(0, s, flags | FLAGS_FILL_FULL);  	if (ret != TRACE_TYPE_HANDLED)  		return ret; @@ -1426,7 +1470,8 @@ void graph_trace_close(struct trace_iterator *iter)  	}  } -static int func_graph_set_flag(u32 old_flags, u32 bit, int set) +static int +func_graph_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)  {  	if (bit == TRACE_GRAPH_PRINT_IRQS)  		ftrace_graph_skip_irqs = !set; @@ -1454,7 +1499,6 @@ static struct tracer graph_trace __tracer_data = {  	.pipe_open	= graph_trace_open,  	.close		= graph_trace_close,  	.pipe_close	= graph_trace_close, -	.wait_pipe	= poll_wait_pipe,  	.init		= graph_trace_init,  	.reset		= graph_trace_reset,  	.print_line	= print_graph_function, diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 2aefbee93a6..9bb104f748d 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -151,16 +151,11 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip,  	atomic_dec(&data->disabled);  } - -static struct ftrace_ops trace_ops __read_mostly = -{ -	.func = irqsoff_tracer_call, -	.flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE, -};  #endif /* CONFIG_FUNCTION_TRACER */  #ifdef CONFIG_FUNCTION_GRAPH_TRACER -static int irqsoff_set_flag(u32 old_flags, u32 bit, int set) +static int +irqsoff_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)  {  	int cpu; @@ -175,7 +170,7 @@ static int irqsoff_set_flag(u32 old_flags, u32 bit, int set)  	for_each_possible_cpu(cpu)  		per_cpu(tracing_cpu, cpu) = 0; -	tracing_max_latency = 0; +	tr->max_latency = 0;  	tracing_reset_online_cpus(&irqsoff_trace->trace_buffer);  	return start_irqsoff_tracer(irqsoff_trace, set); @@ -266,7 +261,8 @@ __trace_function(struct trace_array *tr,  #else  #define __trace_function trace_function -static int irqsoff_set_flag(u32 old_flags, u32 bit, int set) +static int +irqsoff_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)  {  	return -EINVAL;  } @@ -301,13 +297,13 @@ static void irqsoff_print_header(struct seq_file *s)  /*   * Should this new latency be reported/recorded?   */ -static int report_latency(cycle_t delta) +static int report_latency(struct trace_array *tr, cycle_t delta)  {  	if (tracing_thresh) {  		if (delta < tracing_thresh)  			return 0;  	} else { -		if (delta <= tracing_max_latency) +		if (delta <= tr->max_latency)  			return 0;  	}  	return 1; @@ -331,13 +327,13 @@ check_critical_timing(struct trace_array *tr,  	pc = preempt_count(); -	if (!report_latency(delta)) +	if (!report_latency(tr, delta))  		goto out;  	raw_spin_lock_irqsave(&max_trace_lock, flags);  	/* check if we are still the max latency */ -	if (!report_latency(delta)) +	if (!report_latency(tr, delta))  		goto out_unlock;  	__trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); @@ -350,7 +346,7 @@ check_critical_timing(struct trace_array *tr,  	data->critical_end = parent_ip;  	if (likely(!is_tracing_stopped())) { -		tracing_max_latency = delta; +		tr->max_latency = delta;  		update_max_tr_single(tr, current, cpu);  	} @@ -498,14 +494,14 @@ void trace_hardirqs_off(void)  }  EXPORT_SYMBOL(trace_hardirqs_off); -void trace_hardirqs_on_caller(unsigned long caller_addr) +__visible void trace_hardirqs_on_caller(unsigned long caller_addr)  {  	if (!preempt_trace() && irq_trace())  		stop_critical_timing(CALLER_ADDR0, caller_addr);  }  EXPORT_SYMBOL(trace_hardirqs_on_caller); -void trace_hardirqs_off_caller(unsigned long caller_addr) +__visible void trace_hardirqs_off_caller(unsigned long caller_addr)  {  	if (!preempt_trace() && irq_trace())  		start_critical_timing(CALLER_ADDR0, caller_addr); @@ -529,7 +525,7 @@ void trace_preempt_off(unsigned long a0, unsigned long a1)  }  #endif /* CONFIG_PREEMPT_TRACER */ -static int register_irqsoff_function(int graph, int set) +static int register_irqsoff_function(struct trace_array *tr, int graph, int set)  {  	int ret; @@ -541,7 +537,7 @@ static int register_irqsoff_function(int graph, int set)  		ret = register_ftrace_graph(&irqsoff_graph_return,  					    &irqsoff_graph_entry);  	else -		ret = register_ftrace_function(&trace_ops); +		ret = register_ftrace_function(tr->ops);  	if (!ret)  		function_enabled = true; @@ -549,7 +545,7 @@ static int register_irqsoff_function(int graph, int set)  	return ret;  } -static void unregister_irqsoff_function(int graph) +static void unregister_irqsoff_function(struct trace_array *tr, int graph)  {  	if (!function_enabled)  		return; @@ -557,23 +553,25 @@ static void unregister_irqsoff_function(int graph)  	if (graph)  		unregister_ftrace_graph();  	else -		unregister_ftrace_function(&trace_ops); +		unregister_ftrace_function(tr->ops);  	function_enabled = false;  } -static void irqsoff_function_set(int set) +static void irqsoff_function_set(struct trace_array *tr, int set)  {  	if (set) -		register_irqsoff_function(is_graph(), 1); +		register_irqsoff_function(tr, is_graph(), 1);  	else -		unregister_irqsoff_function(is_graph()); +		unregister_irqsoff_function(tr, is_graph());  } -static int irqsoff_flag_changed(struct tracer *tracer, u32 mask, int set) +static int irqsoff_flag_changed(struct trace_array *tr, u32 mask, int set)  { +	struct tracer *tracer = tr->current_trace; +  	if (mask & TRACE_ITER_FUNCTION) -		irqsoff_function_set(set); +		irqsoff_function_set(tr, set);  	return trace_keep_overwrite(tracer, mask, set);  } @@ -582,7 +580,7 @@ static int start_irqsoff_tracer(struct trace_array *tr, int graph)  {  	int ret; -	ret = register_irqsoff_function(graph, 0); +	ret = register_irqsoff_function(tr, graph, 0);  	if (!ret && tracing_is_enabled())  		tracer_enabled = 1; @@ -596,25 +594,37 @@ static void stop_irqsoff_tracer(struct trace_array *tr, int graph)  {  	tracer_enabled = 0; -	unregister_irqsoff_function(graph); +	unregister_irqsoff_function(tr, graph);  } -static void __irqsoff_tracer_init(struct trace_array *tr) +static bool irqsoff_busy; + +static int __irqsoff_tracer_init(struct trace_array *tr)  { +	if (irqsoff_busy) +		return -EBUSY; +  	save_flags = trace_flags;  	/* non overwrite screws up the latency tracers */  	set_tracer_flag(tr, TRACE_ITER_OVERWRITE, 1);  	set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, 1); -	tracing_max_latency = 0; +	tr->max_latency = 0;  	irqsoff_trace = tr;  	/* make sure that the tracer is visible */  	smp_wmb();  	tracing_reset_online_cpus(&tr->trace_buffer); -	if (start_irqsoff_tracer(tr, is_graph())) +	ftrace_init_array_ops(tr, irqsoff_tracer_call); + +	/* Only toplevel instance supports graph tracing */ +	if (start_irqsoff_tracer(tr, (tr->flags & TRACE_ARRAY_FL_GLOBAL && +				      is_graph())))  		printk(KERN_ERR "failed to start irqsoff tracer\n"); + +	irqsoff_busy = true; +	return 0;  }  static void irqsoff_tracer_reset(struct trace_array *tr) @@ -626,6 +636,9 @@ static void irqsoff_tracer_reset(struct trace_array *tr)  	set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, lat_flag);  	set_tracer_flag(tr, TRACE_ITER_OVERWRITE, overwrite_flag); +	ftrace_reset_array_ops(tr); + +	irqsoff_busy = false;  }  static void irqsoff_tracer_start(struct trace_array *tr) @@ -643,8 +656,7 @@ static int irqsoff_tracer_init(struct trace_array *tr)  {  	trace_type = TRACER_IRQS_OFF; -	__irqsoff_tracer_init(tr); -	return 0; +	return __irqsoff_tracer_init(tr);  }  static struct tracer irqsoff_tracer __read_mostly =  { @@ -664,6 +676,7 @@ static struct tracer irqsoff_tracer __read_mostly =  #endif  	.open           = irqsoff_trace_open,  	.close          = irqsoff_trace_close, +	.allow_instances = true,  	.use_max_tr	= true,  };  # define register_irqsoff(trace) register_tracer(&trace) @@ -676,8 +689,7 @@ static int preemptoff_tracer_init(struct trace_array *tr)  {  	trace_type = TRACER_PREEMPT_OFF; -	__irqsoff_tracer_init(tr); -	return 0; +	return __irqsoff_tracer_init(tr);  }  static struct tracer preemptoff_tracer __read_mostly = @@ -698,6 +710,7 @@ static struct tracer preemptoff_tracer __read_mostly =  #endif  	.open		= irqsoff_trace_open,  	.close		= irqsoff_trace_close, +	.allow_instances = true,  	.use_max_tr	= true,  };  # define register_preemptoff(trace) register_tracer(&trace) @@ -712,8 +725,7 @@ static int preemptirqsoff_tracer_init(struct trace_array *tr)  {  	trace_type = TRACER_IRQS_OFF | TRACER_PREEMPT_OFF; -	__irqsoff_tracer_init(tr); -	return 0; +	return __irqsoff_tracer_init(tr);  }  static struct tracer preemptirqsoff_tracer __read_mostly = @@ -734,6 +746,7 @@ static struct tracer preemptirqsoff_tracer __read_mostly =  #endif  	.open		= irqsoff_trace_open,  	.close		= irqsoff_trace_close, +	.allow_instances = true,  	.use_max_tr	= true,  }; diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 243f6834d02..282f6e4e553 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -27,75 +27,54 @@  /**   * Kprobe event core functions   */ -struct trace_probe { +struct trace_kprobe {  	struct list_head	list;  	struct kretprobe	rp;	/* Use rp.kp for kprobe use */  	unsigned long 		nhit; -	unsigned int		flags;	/* For TP_FLAG_* */  	const char		*symbol;	/* symbol name */ -	struct ftrace_event_class	class; -	struct ftrace_event_call	call; -	struct list_head	files; -	ssize_t			size;		/* trace entry size */ -	unsigned int		nr_args; -	struct probe_arg	args[]; +	struct trace_probe	tp;  }; -struct event_file_link { -	struct ftrace_event_file	*file; -	struct list_head		list; -}; - -#define SIZEOF_TRACE_PROBE(n)			\ -	(offsetof(struct trace_probe, args) +	\ +#define SIZEOF_TRACE_KPROBE(n)				\ +	(offsetof(struct trace_kprobe, tp.args) +	\  	(sizeof(struct probe_arg) * (n))) -static __kprobes bool trace_probe_is_return(struct trace_probe *tp) -{ -	return tp->rp.handler != NULL; -} - -static __kprobes const char *trace_probe_symbol(struct trace_probe *tp) -{ -	return tp->symbol ? tp->symbol : "unknown"; -} - -static __kprobes unsigned long trace_probe_offset(struct trace_probe *tp) +static nokprobe_inline bool trace_kprobe_is_return(struct trace_kprobe *tk)  { -	return tp->rp.kp.offset; +	return tk->rp.handler != NULL;  } -static __kprobes bool trace_probe_is_enabled(struct trace_probe *tp) +static nokprobe_inline const char *trace_kprobe_symbol(struct trace_kprobe *tk)  { -	return !!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE)); +	return tk->symbol ? tk->symbol : "unknown";  } -static __kprobes bool trace_probe_is_registered(struct trace_probe *tp) +static nokprobe_inline unsigned long trace_kprobe_offset(struct trace_kprobe *tk)  { -	return !!(tp->flags & TP_FLAG_REGISTERED); +	return tk->rp.kp.offset;  } -static __kprobes bool trace_probe_has_gone(struct trace_probe *tp) +static nokprobe_inline bool trace_kprobe_has_gone(struct trace_kprobe *tk)  { -	return !!(kprobe_gone(&tp->rp.kp)); +	return !!(kprobe_gone(&tk->rp.kp));  } -static __kprobes bool trace_probe_within_module(struct trace_probe *tp, -						struct module *mod) +static nokprobe_inline bool trace_kprobe_within_module(struct trace_kprobe *tk, +						 struct module *mod)  {  	int len = strlen(mod->name); -	const char *name = trace_probe_symbol(tp); +	const char *name = trace_kprobe_symbol(tk);  	return strncmp(mod->name, name, len) == 0 && name[len] == ':';  } -static __kprobes bool trace_probe_is_on_module(struct trace_probe *tp) +static nokprobe_inline bool trace_kprobe_is_on_module(struct trace_kprobe *tk)  { -	return !!strchr(trace_probe_symbol(tp), ':'); +	return !!strchr(trace_kprobe_symbol(tk), ':');  } -static int register_probe_event(struct trace_probe *tp); -static int unregister_probe_event(struct trace_probe *tp); +static int register_kprobe_event(struct trace_kprobe *tk); +static int unregister_kprobe_event(struct trace_kprobe *tk);  static DEFINE_MUTEX(probe_lock);  static LIST_HEAD(probe_list); @@ -104,45 +83,231 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);  static int kretprobe_dispatcher(struct kretprobe_instance *ri,  				struct pt_regs *regs); +/* Memory fetching by symbol */ +struct symbol_cache { +	char		*symbol; +	long		offset; +	unsigned long	addr; +}; + +unsigned long update_symbol_cache(struct symbol_cache *sc) +{ +	sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol); + +	if (sc->addr) +		sc->addr += sc->offset; + +	return sc->addr; +} + +void free_symbol_cache(struct symbol_cache *sc) +{ +	kfree(sc->symbol); +	kfree(sc); +} + +struct symbol_cache *alloc_symbol_cache(const char *sym, long offset) +{ +	struct symbol_cache *sc; + +	if (!sym || strlen(sym) == 0) +		return NULL; + +	sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL); +	if (!sc) +		return NULL; + +	sc->symbol = kstrdup(sym, GFP_KERNEL); +	if (!sc->symbol) { +		kfree(sc); +		return NULL; +	} +	sc->offset = offset; +	update_symbol_cache(sc); + +	return sc; +} + +/* + * Kprobes-specific fetch functions + */ +#define DEFINE_FETCH_stack(type)					\ +static void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,		\ +					  void *offset, void *dest)	\ +{									\ +	*(type *)dest = (type)regs_get_kernel_stack_nth(regs,		\ +				(unsigned int)((unsigned long)offset));	\ +}									\ +NOKPROBE_SYMBOL(FETCH_FUNC_NAME(stack, type)); + +DEFINE_BASIC_FETCH_FUNCS(stack) +/* No string on the stack entry */ +#define fetch_stack_string	NULL +#define fetch_stack_string_size	NULL + +#define DEFINE_FETCH_memory(type)					\ +static void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,		\ +					  void *addr, void *dest)	\ +{									\ +	type retval;							\ +	if (probe_kernel_address(addr, retval))				\ +		*(type *)dest = 0;					\ +	else								\ +		*(type *)dest = retval;					\ +}									\ +NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, type)); + +DEFINE_BASIC_FETCH_FUNCS(memory) +/* + * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max + * length and relative data location. + */ +static void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs, +					    void *addr, void *dest) +{ +	long ret; +	int maxlen = get_rloc_len(*(u32 *)dest); +	u8 *dst = get_rloc_data(dest); +	u8 *src = addr; +	mm_segment_t old_fs = get_fs(); + +	if (!maxlen) +		return; + +	/* +	 * Try to get string again, since the string can be changed while +	 * probing. +	 */ +	set_fs(KERNEL_DS); +	pagefault_disable(); + +	do +		ret = __copy_from_user_inatomic(dst++, src++, 1); +	while (dst[-1] && ret == 0 && src - (u8 *)addr < maxlen); + +	dst[-1] = '\0'; +	pagefault_enable(); +	set_fs(old_fs); + +	if (ret < 0) {	/* Failed to fetch string */ +		((u8 *)get_rloc_data(dest))[0] = '\0'; +		*(u32 *)dest = make_data_rloc(0, get_rloc_offs(*(u32 *)dest)); +	} else { +		*(u32 *)dest = make_data_rloc(src - (u8 *)addr, +					      get_rloc_offs(*(u32 *)dest)); +	} +} +NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, string)); + +/* Return the length of string -- including null terminal byte */ +static void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs, +						 void *addr, void *dest) +{ +	mm_segment_t old_fs; +	int ret, len = 0; +	u8 c; + +	old_fs = get_fs(); +	set_fs(KERNEL_DS); +	pagefault_disable(); + +	do { +		ret = __copy_from_user_inatomic(&c, (u8 *)addr + len, 1); +		len++; +	} while (c && ret == 0 && len < MAX_STRING_SIZE); + +	pagefault_enable(); +	set_fs(old_fs); + +	if (ret < 0)	/* Failed to check the length */ +		*(u32 *)dest = 0; +	else +		*(u32 *)dest = len; +} +NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, string_size)); + +#define DEFINE_FETCH_symbol(type)					\ +void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs, void *data, void *dest)\ +{									\ +	struct symbol_cache *sc = data;					\ +	if (sc->addr)							\ +		fetch_memory_##type(regs, (void *)sc->addr, dest);	\ +	else								\ +		*(type *)dest = 0;					\ +}									\ +NOKPROBE_SYMBOL(FETCH_FUNC_NAME(symbol, type)); + +DEFINE_BASIC_FETCH_FUNCS(symbol) +DEFINE_FETCH_symbol(string) +DEFINE_FETCH_symbol(string_size) + +/* kprobes don't support file_offset fetch methods */ +#define fetch_file_offset_u8		NULL +#define fetch_file_offset_u16		NULL +#define fetch_file_offset_u32		NULL +#define fetch_file_offset_u64		NULL +#define fetch_file_offset_string	NULL +#define fetch_file_offset_string_size	NULL + +/* Fetch type information table */ +const struct fetch_type kprobes_fetch_type_table[] = { +	/* Special types */ +	[FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string, +					sizeof(u32), 1, "__data_loc char[]"), +	[FETCH_TYPE_STRSIZE] = __ASSIGN_FETCH_TYPE("string_size", u32, +					string_size, sizeof(u32), 0, "u32"), +	/* Basic types */ +	ASSIGN_FETCH_TYPE(u8,  u8,  0), +	ASSIGN_FETCH_TYPE(u16, u16, 0), +	ASSIGN_FETCH_TYPE(u32, u32, 0), +	ASSIGN_FETCH_TYPE(u64, u64, 0), +	ASSIGN_FETCH_TYPE(s8,  u8,  1), +	ASSIGN_FETCH_TYPE(s16, u16, 1), +	ASSIGN_FETCH_TYPE(s32, u32, 1), +	ASSIGN_FETCH_TYPE(s64, u64, 1), + +	ASSIGN_FETCH_TYPE_END +}; +  /*   * Allocate new trace_probe and initialize it (including kprobes).   */ -static struct trace_probe *alloc_trace_probe(const char *group, +static struct trace_kprobe *alloc_trace_kprobe(const char *group,  					     const char *event,  					     void *addr,  					     const char *symbol,  					     unsigned long offs,  					     int nargs, bool is_return)  { -	struct trace_probe *tp; +	struct trace_kprobe *tk;  	int ret = -ENOMEM; -	tp = kzalloc(SIZEOF_TRACE_PROBE(nargs), GFP_KERNEL); -	if (!tp) +	tk = kzalloc(SIZEOF_TRACE_KPROBE(nargs), GFP_KERNEL); +	if (!tk)  		return ERR_PTR(ret);  	if (symbol) { -		tp->symbol = kstrdup(symbol, GFP_KERNEL); -		if (!tp->symbol) +		tk->symbol = kstrdup(symbol, GFP_KERNEL); +		if (!tk->symbol)  			goto error; -		tp->rp.kp.symbol_name = tp->symbol; -		tp->rp.kp.offset = offs; +		tk->rp.kp.symbol_name = tk->symbol; +		tk->rp.kp.offset = offs;  	} else -		tp->rp.kp.addr = addr; +		tk->rp.kp.addr = addr;  	if (is_return) -		tp->rp.handler = kretprobe_dispatcher; +		tk->rp.handler = kretprobe_dispatcher;  	else -		tp->rp.kp.pre_handler = kprobe_dispatcher; +		tk->rp.kp.pre_handler = kprobe_dispatcher;  	if (!event || !is_good_name(event)) {  		ret = -EINVAL;  		goto error;  	} -	tp->call.class = &tp->class; -	tp->call.name = kstrdup(event, GFP_KERNEL); -	if (!tp->call.name) +	tk->tp.call.class = &tk->tp.class; +	tk->tp.call.name = kstrdup(event, GFP_KERNEL); +	if (!tk->tp.call.name)  		goto error;  	if (!group || !is_good_name(group)) { @@ -150,42 +315,42 @@ static struct trace_probe *alloc_trace_probe(const char *group,  		goto error;  	} -	tp->class.system = kstrdup(group, GFP_KERNEL); -	if (!tp->class.system) +	tk->tp.class.system = kstrdup(group, GFP_KERNEL); +	if (!tk->tp.class.system)  		goto error; -	INIT_LIST_HEAD(&tp->list); -	INIT_LIST_HEAD(&tp->files); -	return tp; +	INIT_LIST_HEAD(&tk->list); +	INIT_LIST_HEAD(&tk->tp.files); +	return tk;  error: -	kfree(tp->call.name); -	kfree(tp->symbol); -	kfree(tp); +	kfree(tk->tp.call.name); +	kfree(tk->symbol); +	kfree(tk);  	return ERR_PTR(ret);  } -static void free_trace_probe(struct trace_probe *tp) +static void free_trace_kprobe(struct trace_kprobe *tk)  {  	int i; -	for (i = 0; i < tp->nr_args; i++) -		traceprobe_free_probe_arg(&tp->args[i]); +	for (i = 0; i < tk->tp.nr_args; i++) +		traceprobe_free_probe_arg(&tk->tp.args[i]); -	kfree(tp->call.class->system); -	kfree(tp->call.name); -	kfree(tp->symbol); -	kfree(tp); +	kfree(tk->tp.call.class->system); +	kfree(tk->tp.call.name); +	kfree(tk->symbol); +	kfree(tk);  } -static struct trace_probe *find_trace_probe(const char *event, -					    const char *group) +static struct trace_kprobe *find_trace_kprobe(const char *event, +					      const char *group)  { -	struct trace_probe *tp; +	struct trace_kprobe *tk; -	list_for_each_entry(tp, &probe_list, list) -		if (strcmp(tp->call.name, event) == 0 && -		    strcmp(tp->call.class->system, group) == 0) -			return tp; +	list_for_each_entry(tk, &probe_list, list) +		if (strcmp(ftrace_event_name(&tk->tp.call), event) == 0 && +		    strcmp(tk->tp.call.class->system, group) == 0) +			return tk;  	return NULL;  } @@ -194,7 +359,7 @@ static struct trace_probe *find_trace_probe(const char *event,   * if the file is NULL, enable "perf" handler, or enable "trace" handler.   */  static int -enable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) +enable_trace_kprobe(struct trace_kprobe *tk, struct ftrace_event_file *file)  {  	int ret = 0; @@ -208,47 +373,35 @@ enable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)  		}  		link->file = file; -		list_add_tail_rcu(&link->list, &tp->files); +		list_add_tail_rcu(&link->list, &tk->tp.files); -		tp->flags |= TP_FLAG_TRACE; +		tk->tp.flags |= TP_FLAG_TRACE;  	} else -		tp->flags |= TP_FLAG_PROFILE; +		tk->tp.flags |= TP_FLAG_PROFILE; -	if (trace_probe_is_registered(tp) && !trace_probe_has_gone(tp)) { -		if (trace_probe_is_return(tp)) -			ret = enable_kretprobe(&tp->rp); +	if (trace_probe_is_registered(&tk->tp) && !trace_kprobe_has_gone(tk)) { +		if (trace_kprobe_is_return(tk)) +			ret = enable_kretprobe(&tk->rp);  		else -			ret = enable_kprobe(&tp->rp.kp); +			ret = enable_kprobe(&tk->rp.kp);  	}   out:  	return ret;  } -static struct event_file_link * -find_event_file_link(struct trace_probe *tp, struct ftrace_event_file *file) -{ -	struct event_file_link *link; - -	list_for_each_entry(link, &tp->files, list) -		if (link->file == file) -			return link; - -	return NULL; -} -  /*   * Disable trace_probe   * if the file is NULL, disable "perf" handler, or disable "trace" handler.   */  static int -disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) +disable_trace_kprobe(struct trace_kprobe *tk, struct ftrace_event_file *file)  {  	struct event_file_link *link = NULL;  	int wait = 0;  	int ret = 0;  	if (file) { -		link = find_event_file_link(tp, file); +		link = find_event_file_link(&tk->tp, file);  		if (!link) {  			ret = -EINVAL;  			goto out; @@ -256,18 +409,18 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)  		list_del_rcu(&link->list);  		wait = 1; -		if (!list_empty(&tp->files)) +		if (!list_empty(&tk->tp.files))  			goto out; -		tp->flags &= ~TP_FLAG_TRACE; +		tk->tp.flags &= ~TP_FLAG_TRACE;  	} else -		tp->flags &= ~TP_FLAG_PROFILE; +		tk->tp.flags &= ~TP_FLAG_PROFILE; -	if (!trace_probe_is_enabled(tp) && trace_probe_is_registered(tp)) { -		if (trace_probe_is_return(tp)) -			disable_kretprobe(&tp->rp); +	if (!trace_probe_is_enabled(&tk->tp) && trace_probe_is_registered(&tk->tp)) { +		if (trace_kprobe_is_return(tk)) +			disable_kretprobe(&tk->rp);  		else -			disable_kprobe(&tp->rp.kp); +			disable_kprobe(&tk->rp.kp);  		wait = 1;  	}   out: @@ -288,40 +441,40 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)  }  /* Internal register function - just handle k*probes and flags */ -static int __register_trace_probe(struct trace_probe *tp) +static int __register_trace_kprobe(struct trace_kprobe *tk)  {  	int i, ret; -	if (trace_probe_is_registered(tp)) +	if (trace_probe_is_registered(&tk->tp))  		return -EINVAL; -	for (i = 0; i < tp->nr_args; i++) -		traceprobe_update_arg(&tp->args[i]); +	for (i = 0; i < tk->tp.nr_args; i++) +		traceprobe_update_arg(&tk->tp.args[i]);  	/* Set/clear disabled flag according to tp->flag */ -	if (trace_probe_is_enabled(tp)) -		tp->rp.kp.flags &= ~KPROBE_FLAG_DISABLED; +	if (trace_probe_is_enabled(&tk->tp)) +		tk->rp.kp.flags &= ~KPROBE_FLAG_DISABLED;  	else -		tp->rp.kp.flags |= KPROBE_FLAG_DISABLED; +		tk->rp.kp.flags |= KPROBE_FLAG_DISABLED; -	if (trace_probe_is_return(tp)) -		ret = register_kretprobe(&tp->rp); +	if (trace_kprobe_is_return(tk)) +		ret = register_kretprobe(&tk->rp);  	else -		ret = register_kprobe(&tp->rp.kp); +		ret = register_kprobe(&tk->rp.kp);  	if (ret == 0) -		tp->flags |= TP_FLAG_REGISTERED; +		tk->tp.flags |= TP_FLAG_REGISTERED;  	else {  		pr_warning("Could not insert probe at %s+%lu: %d\n", -			   trace_probe_symbol(tp), trace_probe_offset(tp), ret); -		if (ret == -ENOENT && trace_probe_is_on_module(tp)) { +			   trace_kprobe_symbol(tk), trace_kprobe_offset(tk), ret); +		if (ret == -ENOENT && trace_kprobe_is_on_module(tk)) {  			pr_warning("This probe might be able to register after"  				   "target module is loaded. Continue.\n");  			ret = 0;  		} else if (ret == -EILSEQ) {  			pr_warning("Probing address(0x%p) is not an "  				   "instruction boundary.\n", -				   tp->rp.kp.addr); +				   tk->rp.kp.addr);  			ret = -EINVAL;  		}  	} @@ -330,67 +483,68 @@ static int __register_trace_probe(struct trace_probe *tp)  }  /* Internal unregister function - just handle k*probes and flags */ -static void __unregister_trace_probe(struct trace_probe *tp) +static void __unregister_trace_kprobe(struct trace_kprobe *tk)  { -	if (trace_probe_is_registered(tp)) { -		if (trace_probe_is_return(tp)) -			unregister_kretprobe(&tp->rp); +	if (trace_probe_is_registered(&tk->tp)) { +		if (trace_kprobe_is_return(tk)) +			unregister_kretprobe(&tk->rp);  		else -			unregister_kprobe(&tp->rp.kp); -		tp->flags &= ~TP_FLAG_REGISTERED; +			unregister_kprobe(&tk->rp.kp); +		tk->tp.flags &= ~TP_FLAG_REGISTERED;  		/* Cleanup kprobe for reuse */ -		if (tp->rp.kp.symbol_name) -			tp->rp.kp.addr = NULL; +		if (tk->rp.kp.symbol_name) +			tk->rp.kp.addr = NULL;  	}  }  /* Unregister a trace_probe and probe_event: call with locking probe_lock */ -static int unregister_trace_probe(struct trace_probe *tp) +static int unregister_trace_kprobe(struct trace_kprobe *tk)  {  	/* Enabled event can not be unregistered */ -	if (trace_probe_is_enabled(tp)) +	if (trace_probe_is_enabled(&tk->tp))  		return -EBUSY;  	/* Will fail if probe is being used by ftrace or perf */ -	if (unregister_probe_event(tp)) +	if (unregister_kprobe_event(tk))  		return -EBUSY; -	__unregister_trace_probe(tp); -	list_del(&tp->list); +	__unregister_trace_kprobe(tk); +	list_del(&tk->list);  	return 0;  }  /* Register a trace_probe and probe_event */ -static int register_trace_probe(struct trace_probe *tp) +static int register_trace_kprobe(struct trace_kprobe *tk)  { -	struct trace_probe *old_tp; +	struct trace_kprobe *old_tk;  	int ret;  	mutex_lock(&probe_lock);  	/* Delete old (same name) event if exist */ -	old_tp = find_trace_probe(tp->call.name, tp->call.class->system); -	if (old_tp) { -		ret = unregister_trace_probe(old_tp); +	old_tk = find_trace_kprobe(ftrace_event_name(&tk->tp.call), +			tk->tp.call.class->system); +	if (old_tk) { +		ret = unregister_trace_kprobe(old_tk);  		if (ret < 0)  			goto end; -		free_trace_probe(old_tp); +		free_trace_kprobe(old_tk);  	}  	/* Register new event */ -	ret = register_probe_event(tp); +	ret = register_kprobe_event(tk);  	if (ret) {  		pr_warning("Failed to register probe event(%d)\n", ret);  		goto end;  	}  	/* Register k*probe */ -	ret = __register_trace_probe(tp); +	ret = __register_trace_kprobe(tk);  	if (ret < 0) -		unregister_probe_event(tp); +		unregister_kprobe_event(tk);  	else -		list_add_tail(&tp->list, &probe_list); +		list_add_tail(&tk->list, &probe_list);  end:  	mutex_unlock(&probe_lock); @@ -398,11 +552,11 @@ end:  }  /* Module notifier call back, checking event on the module */ -static int trace_probe_module_callback(struct notifier_block *nb, +static int trace_kprobe_module_callback(struct notifier_block *nb,  				       unsigned long val, void *data)  {  	struct module *mod = data; -	struct trace_probe *tp; +	struct trace_kprobe *tk;  	int ret;  	if (val != MODULE_STATE_COMING) @@ -410,15 +564,16 @@ static int trace_probe_module_callback(struct notifier_block *nb,  	/* Update probes on coming module */  	mutex_lock(&probe_lock); -	list_for_each_entry(tp, &probe_list, list) { -		if (trace_probe_within_module(tp, mod)) { +	list_for_each_entry(tk, &probe_list, list) { +		if (trace_kprobe_within_module(tk, mod)) {  			/* Don't need to check busy - this should have gone. */ -			__unregister_trace_probe(tp); -			ret = __register_trace_probe(tp); +			__unregister_trace_kprobe(tk); +			ret = __register_trace_kprobe(tk);  			if (ret)  				pr_warning("Failed to re-register probe %s on"  					   "%s: %d\n", -					   tp->call.name, mod->name, ret); +					   ftrace_event_name(&tk->tp.call), +					   mod->name, ret);  		}  	}  	mutex_unlock(&probe_lock); @@ -426,12 +581,12 @@ static int trace_probe_module_callback(struct notifier_block *nb,  	return NOTIFY_DONE;  } -static struct notifier_block trace_probe_module_nb = { -	.notifier_call = trace_probe_module_callback, +static struct notifier_block trace_kprobe_module_nb = { +	.notifier_call = trace_kprobe_module_callback,  	.priority = 1	/* Invoked after kprobe module callback */  }; -static int create_trace_probe(int argc, char **argv) +static int create_trace_kprobe(int argc, char **argv)  {  	/*  	 * Argument syntax: @@ -451,7 +606,7 @@ static int create_trace_probe(int argc, char **argv)  	 * Type of args:  	 *  FETCHARG:TYPE : use TYPE instead of unsigned long.  	 */ -	struct trace_probe *tp; +	struct trace_kprobe *tk;  	int i, ret = 0;  	bool is_return = false, is_delete = false;  	char *symbol = NULL, *event = NULL, *group = NULL; @@ -498,16 +653,16 @@ static int create_trace_probe(int argc, char **argv)  			return -EINVAL;  		}  		mutex_lock(&probe_lock); -		tp = find_trace_probe(event, group); -		if (!tp) { +		tk = find_trace_kprobe(event, group); +		if (!tk) {  			mutex_unlock(&probe_lock);  			pr_info("Event %s/%s doesn't exist.\n", group, event);  			return -ENOENT;  		}  		/* delete an event */ -		ret = unregister_trace_probe(tp); +		ret = unregister_trace_kprobe(tk);  		if (ret == 0) -			free_trace_probe(tp); +			free_trace_kprobe(tk);  		mutex_unlock(&probe_lock);  		return ret;  	} @@ -554,47 +709,49 @@ static int create_trace_probe(int argc, char **argv)  				 is_return ? 'r' : 'p', addr);  		event = buf;  	} -	tp = alloc_trace_probe(group, event, addr, symbol, offset, argc, +	tk = alloc_trace_kprobe(group, event, addr, symbol, offset, argc,  			       is_return); -	if (IS_ERR(tp)) { +	if (IS_ERR(tk)) {  		pr_info("Failed to allocate trace_probe.(%d)\n", -			(int)PTR_ERR(tp)); -		return PTR_ERR(tp); +			(int)PTR_ERR(tk)); +		return PTR_ERR(tk);  	}  	/* parse arguments */  	ret = 0;  	for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) { +		struct probe_arg *parg = &tk->tp.args[i]; +  		/* Increment count for freeing args in error case */ -		tp->nr_args++; +		tk->tp.nr_args++;  		/* Parse argument name */  		arg = strchr(argv[i], '=');  		if (arg) {  			*arg++ = '\0'; -			tp->args[i].name = kstrdup(argv[i], GFP_KERNEL); +			parg->name = kstrdup(argv[i], GFP_KERNEL);  		} else {  			arg = argv[i];  			/* If argument name is omitted, set "argN" */  			snprintf(buf, MAX_EVENT_NAME_LEN, "arg%d", i + 1); -			tp->args[i].name = kstrdup(buf, GFP_KERNEL); +			parg->name = kstrdup(buf, GFP_KERNEL);  		} -		if (!tp->args[i].name) { +		if (!parg->name) {  			pr_info("Failed to allocate argument[%d] name.\n", i);  			ret = -ENOMEM;  			goto error;  		} -		if (!is_good_name(tp->args[i].name)) { +		if (!is_good_name(parg->name)) {  			pr_info("Invalid argument[%d] name: %s\n", -				i, tp->args[i].name); +				i, parg->name);  			ret = -EINVAL;  			goto error;  		} -		if (traceprobe_conflict_field_name(tp->args[i].name, -							tp->args, i)) { +		if (traceprobe_conflict_field_name(parg->name, +							tk->tp.args, i)) {  			pr_info("Argument[%d] name '%s' conflicts with "  				"another field.\n", i, argv[i]);  			ret = -EINVAL; @@ -602,7 +759,7 @@ static int create_trace_probe(int argc, char **argv)  		}  		/* Parse fetch argument */ -		ret = traceprobe_parse_probe_arg(arg, &tp->size, &tp->args[i], +		ret = traceprobe_parse_probe_arg(arg, &tk->tp.size, parg,  						is_return, true);  		if (ret) {  			pr_info("Parse error at argument[%d]. (%d)\n", i, ret); @@ -610,35 +767,35 @@ static int create_trace_probe(int argc, char **argv)  		}  	} -	ret = register_trace_probe(tp); +	ret = register_trace_kprobe(tk);  	if (ret)  		goto error;  	return 0;  error: -	free_trace_probe(tp); +	free_trace_kprobe(tk);  	return ret;  } -static int release_all_trace_probes(void) +static int release_all_trace_kprobes(void)  { -	struct trace_probe *tp; +	struct trace_kprobe *tk;  	int ret = 0;  	mutex_lock(&probe_lock);  	/* Ensure no probe is in use. */ -	list_for_each_entry(tp, &probe_list, list) -		if (trace_probe_is_enabled(tp)) { +	list_for_each_entry(tk, &probe_list, list) +		if (trace_probe_is_enabled(&tk->tp)) {  			ret = -EBUSY;  			goto end;  		}  	/* TODO: Use batch unregistration */  	while (!list_empty(&probe_list)) { -		tp = list_entry(probe_list.next, struct trace_probe, list); -		ret = unregister_trace_probe(tp); +		tk = list_entry(probe_list.next, struct trace_kprobe, list); +		ret = unregister_trace_kprobe(tk);  		if (ret)  			goto end; -		free_trace_probe(tp); +		free_trace_kprobe(tk);  	}  end: @@ -666,22 +823,23 @@ static void probes_seq_stop(struct seq_file *m, void *v)  static int probes_seq_show(struct seq_file *m, void *v)  { -	struct trace_probe *tp = v; +	struct trace_kprobe *tk = v;  	int i; -	seq_printf(m, "%c", trace_probe_is_return(tp) ? 'r' : 'p'); -	seq_printf(m, ":%s/%s", tp->call.class->system, tp->call.name); +	seq_printf(m, "%c", trace_kprobe_is_return(tk) ? 'r' : 'p'); +	seq_printf(m, ":%s/%s", tk->tp.call.class->system, +			ftrace_event_name(&tk->tp.call)); -	if (!tp->symbol) -		seq_printf(m, " 0x%p", tp->rp.kp.addr); -	else if (tp->rp.kp.offset) -		seq_printf(m, " %s+%u", trace_probe_symbol(tp), -			   tp->rp.kp.offset); +	if (!tk->symbol) +		seq_printf(m, " 0x%p", tk->rp.kp.addr); +	else if (tk->rp.kp.offset) +		seq_printf(m, " %s+%u", trace_kprobe_symbol(tk), +			   tk->rp.kp.offset);  	else -		seq_printf(m, " %s", trace_probe_symbol(tp)); +		seq_printf(m, " %s", trace_kprobe_symbol(tk)); -	for (i = 0; i < tp->nr_args; i++) -		seq_printf(m, " %s=%s", tp->args[i].name, tp->args[i].comm); +	for (i = 0; i < tk->tp.nr_args; i++) +		seq_printf(m, " %s=%s", tk->tp.args[i].name, tk->tp.args[i].comm);  	seq_printf(m, "\n");  	return 0; @@ -699,7 +857,7 @@ static int probes_open(struct inode *inode, struct file *file)  	int ret;  	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { -		ret = release_all_trace_probes(); +		ret = release_all_trace_kprobes();  		if (ret < 0)  			return ret;  	} @@ -711,7 +869,7 @@ static ssize_t probes_write(struct file *file, const char __user *buffer,  			    size_t count, loff_t *ppos)  {  	return traceprobe_probes_write(file, buffer, count, ppos, -			create_trace_probe); +			create_trace_kprobe);  }  static const struct file_operations kprobe_events_ops = { @@ -726,10 +884,11 @@ static const struct file_operations kprobe_events_ops = {  /* Probes profiling interfaces */  static int probes_profile_seq_show(struct seq_file *m, void *v)  { -	struct trace_probe *tp = v; +	struct trace_kprobe *tk = v; -	seq_printf(m, "  %-44s %15lu %15lu\n", tp->call.name, tp->nhit, -		   tp->rp.kp.nmissed); +	seq_printf(m, "  %-44s %15lu %15lu\n", +		   ftrace_event_name(&tk->tp.call), tk->nhit, +		   tk->rp.kp.nmissed);  	return 0;  } @@ -754,57 +913,9 @@ static const struct file_operations kprobe_profile_ops = {  	.release        = seq_release,  }; -/* Sum up total data length for dynamic arraies (strings) */ -static __kprobes int __get_data_size(struct trace_probe *tp, -				     struct pt_regs *regs) -{ -	int i, ret = 0; -	u32 len; - -	for (i = 0; i < tp->nr_args; i++) -		if (unlikely(tp->args[i].fetch_size.fn)) { -			call_fetch(&tp->args[i].fetch_size, regs, &len); -			ret += len; -		} - -	return ret; -} - -/* Store the value of each argument */ -static __kprobes void store_trace_args(int ent_size, struct trace_probe *tp, -				       struct pt_regs *regs, -				       u8 *data, int maxlen) -{ -	int i; -	u32 end = tp->size; -	u32 *dl;	/* Data (relative) location */ - -	for (i = 0; i < tp->nr_args; i++) { -		if (unlikely(tp->args[i].fetch_size.fn)) { -			/* -			 * First, we set the relative location and -			 * maximum data length to *dl -			 */ -			dl = (u32 *)(data + tp->args[i].offset); -			*dl = make_data_rloc(maxlen, end - tp->args[i].offset); -			/* Then try to fetch string or dynamic array data */ -			call_fetch(&tp->args[i].fetch, regs, dl); -			/* Reduce maximum length */ -			end += get_rloc_len(*dl); -			maxlen -= get_rloc_len(*dl); -			/* Trick here, convert data_rloc to data_loc */ -			*dl = convert_rloc_to_loc(*dl, -				 ent_size + tp->args[i].offset); -		} else -			/* Just fetching data normally */ -			call_fetch(&tp->args[i].fetch, regs, -				   data + tp->args[i].offset); -	} -} -  /* Kprobe handler */ -static __kprobes void -__kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs, +static nokprobe_inline void +__kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs,  		    struct ftrace_event_file *ftrace_file)  {  	struct kprobe_trace_entry_head *entry; @@ -812,18 +923,18 @@ __kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs,  	struct ring_buffer *buffer;  	int size, dsize, pc;  	unsigned long irq_flags; -	struct ftrace_event_call *call = &tp->call; +	struct ftrace_event_call *call = &tk->tp.call;  	WARN_ON(call != ftrace_file->event_call); -	if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &ftrace_file->flags)) +	if (ftrace_trigger_soft_disabled(ftrace_file))  		return;  	local_save_flags(irq_flags);  	pc = preempt_count(); -	dsize = __get_data_size(tp, regs); -	size = sizeof(*entry) + tp->size + dsize; +	dsize = __get_data_size(&tk->tp, regs); +	size = sizeof(*entry) + tk->tp.size + dsize;  	event = trace_event_buffer_lock_reserve(&buffer, ftrace_file,  						call->event.type, @@ -832,26 +943,26 @@ __kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs,  		return;  	entry = ring_buffer_event_data(event); -	entry->ip = (unsigned long)tp->rp.kp.addr; -	store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); +	entry->ip = (unsigned long)tk->rp.kp.addr; +	store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize); -	if (!filter_current_check_discard(buffer, call, entry, event)) -		trace_buffer_unlock_commit_regs(buffer, event, -						irq_flags, pc, regs); +	event_trigger_unlock_commit_regs(ftrace_file, buffer, event, +					 entry, irq_flags, pc, regs);  } -static __kprobes void -kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs) +static void +kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs)  {  	struct event_file_link *link; -	list_for_each_entry_rcu(link, &tp->files, list) -		__kprobe_trace_func(tp, regs, link->file); +	list_for_each_entry_rcu(link, &tk->tp.files, list) +		__kprobe_trace_func(tk, regs, link->file);  } +NOKPROBE_SYMBOL(kprobe_trace_func);  /* Kretprobe handler */ -static __kprobes void -__kretprobe_trace_func(struct trace_probe *tp, struct kretprobe_instance *ri, +static nokprobe_inline void +__kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,  		       struct pt_regs *regs,  		       struct ftrace_event_file *ftrace_file)  { @@ -860,18 +971,18 @@ __kretprobe_trace_func(struct trace_probe *tp, struct kretprobe_instance *ri,  	struct ring_buffer *buffer;  	int size, pc, dsize;  	unsigned long irq_flags; -	struct ftrace_event_call *call = &tp->call; +	struct ftrace_event_call *call = &tk->tp.call;  	WARN_ON(call != ftrace_file->event_call); -	if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &ftrace_file->flags)) +	if (ftrace_trigger_soft_disabled(ftrace_file))  		return;  	local_save_flags(irq_flags);  	pc = preempt_count(); -	dsize = __get_data_size(tp, regs); -	size = sizeof(*entry) + tp->size + dsize; +	dsize = __get_data_size(&tk->tp, regs); +	size = sizeof(*entry) + tk->tp.size + dsize;  	event = trace_event_buffer_lock_reserve(&buffer, ftrace_file,  						call->event.type, @@ -880,24 +991,24 @@ __kretprobe_trace_func(struct trace_probe *tp, struct kretprobe_instance *ri,  		return;  	entry = ring_buffer_event_data(event); -	entry->func = (unsigned long)tp->rp.kp.addr; +	entry->func = (unsigned long)tk->rp.kp.addr;  	entry->ret_ip = (unsigned long)ri->ret_addr; -	store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); +	store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize); -	if (!filter_current_check_discard(buffer, call, entry, event)) -		trace_buffer_unlock_commit_regs(buffer, event, -						irq_flags, pc, regs); +	event_trigger_unlock_commit_regs(ftrace_file, buffer, event, +					 entry, irq_flags, pc, regs);  } -static __kprobes void -kretprobe_trace_func(struct trace_probe *tp, struct kretprobe_instance *ri, +static void +kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,  		     struct pt_regs *regs)  {  	struct event_file_link *link; -	list_for_each_entry_rcu(link, &tp->files, list) -		__kretprobe_trace_func(tp, ri, regs, link->file); +	list_for_each_entry_rcu(link, &tk->tp.files, list) +		__kretprobe_trace_func(tk, ri, regs, link->file);  } +NOKPROBE_SYMBOL(kretprobe_trace_func);  /* Event entry printers */  static enum print_line_t @@ -913,7 +1024,7 @@ print_kprobe_event(struct trace_iterator *iter, int flags,  	field = (struct kprobe_trace_entry_head *)iter->ent;  	tp = container_of(event, struct trace_probe, call.event); -	if (!trace_seq_printf(s, "%s: (", tp->call.name)) +	if (!trace_seq_printf(s, "%s: (", ftrace_event_name(&tp->call)))  		goto partial;  	if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET)) @@ -949,7 +1060,7 @@ print_kretprobe_event(struct trace_iterator *iter, int flags,  	field = (struct kretprobe_trace_entry_head *)iter->ent;  	tp = container_of(event, struct trace_probe, call.event); -	if (!trace_seq_printf(s, "%s: (", tp->call.name)) +	if (!trace_seq_printf(s, "%s: (", ftrace_event_name(&tp->call)))  		goto partial;  	if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET)) @@ -983,16 +1094,18 @@ static int kprobe_event_define_fields(struct ftrace_event_call *event_call)  {  	int ret, i;  	struct kprobe_trace_entry_head field; -	struct trace_probe *tp = (struct trace_probe *)event_call->data; +	struct trace_kprobe *tk = (struct trace_kprobe *)event_call->data;  	DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);  	/* Set argument names as fields */ -	for (i = 0; i < tp->nr_args; i++) { -		ret = trace_define_field(event_call, tp->args[i].type->fmttype, -					 tp->args[i].name, -					 sizeof(field) + tp->args[i].offset, -					 tp->args[i].type->size, -					 tp->args[i].type->is_signed, +	for (i = 0; i < tk->tp.nr_args; i++) { +		struct probe_arg *parg = &tk->tp.args[i]; + +		ret = trace_define_field(event_call, parg->type->fmttype, +					 parg->name, +					 sizeof(field) + parg->offset, +					 parg->type->size, +					 parg->type->is_signed,  					 FILTER_OTHER);  		if (ret)  			return ret; @@ -1004,17 +1117,19 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)  {  	int ret, i;  	struct kretprobe_trace_entry_head field; -	struct trace_probe *tp = (struct trace_probe *)event_call->data; +	struct trace_kprobe *tk = (struct trace_kprobe *)event_call->data;  	DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);  	DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);  	/* Set argument names as fields */ -	for (i = 0; i < tp->nr_args; i++) { -		ret = trace_define_field(event_call, tp->args[i].type->fmttype, -					 tp->args[i].name, -					 sizeof(field) + tp->args[i].offset, -					 tp->args[i].type->size, -					 tp->args[i].type->is_signed, +	for (i = 0; i < tk->tp.nr_args; i++) { +		struct probe_arg *parg = &tk->tp.args[i]; + +		ret = trace_define_field(event_call, parg->type->fmttype, +					 parg->name, +					 sizeof(field) + parg->offset, +					 parg->type->size, +					 parg->type->is_signed,  					 FILTER_OTHER);  		if (ret)  			return ret; @@ -1022,74 +1137,13 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)  	return 0;  } -static int __set_print_fmt(struct trace_probe *tp, char *buf, int len) -{ -	int i; -	int pos = 0; - -	const char *fmt, *arg; - -	if (!trace_probe_is_return(tp)) { -		fmt = "(%lx)"; -		arg = "REC->" FIELD_STRING_IP; -	} else { -		fmt = "(%lx <- %lx)"; -		arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP; -	} - -	/* When len=0, we just calculate the needed length */ -#define LEN_OR_ZERO (len ? len - pos : 0) - -	pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt); - -	for (i = 0; i < tp->nr_args; i++) { -		pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%s", -				tp->args[i].name, tp->args[i].type->fmt); -	} - -	pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg); - -	for (i = 0; i < tp->nr_args; i++) { -		if (strcmp(tp->args[i].type->name, "string") == 0) -			pos += snprintf(buf + pos, LEN_OR_ZERO, -					", __get_str(%s)", -					tp->args[i].name); -		else -			pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s", -					tp->args[i].name); -	} - -#undef LEN_OR_ZERO - -	/* return the length of print_fmt */ -	return pos; -} - -static int set_print_fmt(struct trace_probe *tp) -{ -	int len; -	char *print_fmt; - -	/* First: called with 0 length to calculate the needed length */ -	len = __set_print_fmt(tp, NULL, 0); -	print_fmt = kmalloc(len + 1, GFP_KERNEL); -	if (!print_fmt) -		return -ENOMEM; - -	/* Second: actually write the @print_fmt */ -	__set_print_fmt(tp, print_fmt, len + 1); -	tp->call.print_fmt = print_fmt; - -	return 0; -} -  #ifdef CONFIG_PERF_EVENTS  /* Kprobe profile handler */ -static __kprobes void -kprobe_perf_func(struct trace_probe *tp, struct pt_regs *regs) +static void +kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)  { -	struct ftrace_event_call *call = &tp->call; +	struct ftrace_event_call *call = &tk->tp.call;  	struct kprobe_trace_entry_head *entry;  	struct hlist_head *head;  	int size, __size, dsize; @@ -1099,8 +1153,8 @@ kprobe_perf_func(struct trace_probe *tp, struct pt_regs *regs)  	if (hlist_empty(head))  		return; -	dsize = __get_data_size(tp, regs); -	__size = sizeof(*entry) + tp->size + dsize; +	dsize = __get_data_size(&tk->tp, regs); +	__size = sizeof(*entry) + tk->tp.size + dsize;  	size = ALIGN(__size + sizeof(u32), sizeof(u64));  	size -= sizeof(u32); @@ -1108,18 +1162,19 @@ kprobe_perf_func(struct trace_probe *tp, struct pt_regs *regs)  	if (!entry)  		return; -	entry->ip = (unsigned long)tp->rp.kp.addr; +	entry->ip = (unsigned long)tk->rp.kp.addr;  	memset(&entry[1], 0, dsize); -	store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); +	store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);  	perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL);  } +NOKPROBE_SYMBOL(kprobe_perf_func);  /* Kretprobe profile handler */ -static __kprobes void -kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri, +static void +kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,  		    struct pt_regs *regs)  { -	struct ftrace_event_call *call = &tp->call; +	struct ftrace_event_call *call = &tk->tp.call;  	struct kretprobe_trace_entry_head *entry;  	struct hlist_head *head;  	int size, __size, dsize; @@ -1129,8 +1184,8 @@ kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri,  	if (hlist_empty(head))  		return; -	dsize = __get_data_size(tp, regs); -	__size = sizeof(*entry) + tp->size + dsize; +	dsize = __get_data_size(&tk->tp, regs); +	__size = sizeof(*entry) + tk->tp.size + dsize;  	size = ALIGN(__size + sizeof(u32), sizeof(u64));  	size -= sizeof(u32); @@ -1138,11 +1193,12 @@ kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri,  	if (!entry)  		return; -	entry->func = (unsigned long)tp->rp.kp.addr; +	entry->func = (unsigned long)tk->rp.kp.addr;  	entry->ret_ip = (unsigned long)ri->ret_addr; -	store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); +	store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);  	perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL);  } +NOKPROBE_SYMBOL(kretprobe_perf_func);  #endif	/* CONFIG_PERF_EVENTS */  /* @@ -1151,24 +1207,23 @@ kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri,   * kprobe_trace_self_tests_init() does enable_trace_probe/disable_trace_probe   * lockless, but we can't race with this __init function.   */ -static __kprobes -int kprobe_register(struct ftrace_event_call *event, -		    enum trace_reg type, void *data) +static int kprobe_register(struct ftrace_event_call *event, +			   enum trace_reg type, void *data)  { -	struct trace_probe *tp = (struct trace_probe *)event->data; +	struct trace_kprobe *tk = (struct trace_kprobe *)event->data;  	struct ftrace_event_file *file = data;  	switch (type) {  	case TRACE_REG_REGISTER: -		return enable_trace_probe(tp, file); +		return enable_trace_kprobe(tk, file);  	case TRACE_REG_UNREGISTER: -		return disable_trace_probe(tp, file); +		return disable_trace_kprobe(tk, file);  #ifdef CONFIG_PERF_EVENTS  	case TRACE_REG_PERF_REGISTER: -		return enable_trace_probe(tp, NULL); +		return enable_trace_kprobe(tk, NULL);  	case TRACE_REG_PERF_UNREGISTER: -		return disable_trace_probe(tp, NULL); +		return disable_trace_kprobe(tk, NULL);  	case TRACE_REG_PERF_OPEN:  	case TRACE_REG_PERF_CLOSE:  	case TRACE_REG_PERF_ADD: @@ -1179,37 +1234,38 @@ int kprobe_register(struct ftrace_event_call *event,  	return 0;  } -static __kprobes -int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) +static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)  { -	struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); +	struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp); -	tp->nhit++; +	tk->nhit++; -	if (tp->flags & TP_FLAG_TRACE) -		kprobe_trace_func(tp, regs); +	if (tk->tp.flags & TP_FLAG_TRACE) +		kprobe_trace_func(tk, regs);  #ifdef CONFIG_PERF_EVENTS -	if (tp->flags & TP_FLAG_PROFILE) -		kprobe_perf_func(tp, regs); +	if (tk->tp.flags & TP_FLAG_PROFILE) +		kprobe_perf_func(tk, regs);  #endif  	return 0;	/* We don't tweek kernel, so just return 0 */  } +NOKPROBE_SYMBOL(kprobe_dispatcher); -static __kprobes -int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs) +static int +kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)  { -	struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); +	struct trace_kprobe *tk = container_of(ri->rp, struct trace_kprobe, rp); -	tp->nhit++; +	tk->nhit++; -	if (tp->flags & TP_FLAG_TRACE) -		kretprobe_trace_func(tp, ri, regs); +	if (tk->tp.flags & TP_FLAG_TRACE) +		kretprobe_trace_func(tk, ri, regs);  #ifdef CONFIG_PERF_EVENTS -	if (tp->flags & TP_FLAG_PROFILE) -		kretprobe_perf_func(tp, ri, regs); +	if (tk->tp.flags & TP_FLAG_PROFILE) +		kretprobe_perf_func(tk, ri, regs);  #endif  	return 0;	/* We don't tweek kernel, so just return 0 */  } +NOKPROBE_SYMBOL(kretprobe_dispatcher);  static struct trace_event_functions kretprobe_funcs = {  	.trace		= print_kretprobe_event @@ -1219,21 +1275,21 @@ static struct trace_event_functions kprobe_funcs = {  	.trace		= print_kprobe_event  }; -static int register_probe_event(struct trace_probe *tp) +static int register_kprobe_event(struct trace_kprobe *tk)  { -	struct ftrace_event_call *call = &tp->call; +	struct ftrace_event_call *call = &tk->tp.call;  	int ret;  	/* Initialize ftrace_event_call */  	INIT_LIST_HEAD(&call->class->fields); -	if (trace_probe_is_return(tp)) { +	if (trace_kprobe_is_return(tk)) {  		call->event.funcs = &kretprobe_funcs;  		call->class->define_fields = kretprobe_event_define_fields;  	} else {  		call->event.funcs = &kprobe_funcs;  		call->class->define_fields = kprobe_event_define_fields;  	} -	if (set_print_fmt(tp) < 0) +	if (set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0)  		return -ENOMEM;  	ret = register_ftrace_event(&call->event);  	if (!ret) { @@ -1242,24 +1298,25 @@ static int register_probe_event(struct trace_probe *tp)  	}  	call->flags = 0;  	call->class->reg = kprobe_register; -	call->data = tp; +	call->data = tk;  	ret = trace_add_event_call(call);  	if (ret) { -		pr_info("Failed to register kprobe event: %s\n", call->name); +		pr_info("Failed to register kprobe event: %s\n", +			ftrace_event_name(call));  		kfree(call->print_fmt);  		unregister_ftrace_event(&call->event);  	}  	return ret;  } -static int unregister_probe_event(struct trace_probe *tp) +static int unregister_kprobe_event(struct trace_kprobe *tk)  {  	int ret;  	/* tp->event is unregistered in trace_remove_event_call() */ -	ret = trace_remove_event_call(&tp->call); +	ret = trace_remove_event_call(&tk->tp.call);  	if (!ret) -		kfree(tp->call.print_fmt); +		kfree(tk->tp.call.print_fmt);  	return ret;  } @@ -1269,7 +1326,7 @@ static __init int init_kprobe_trace(void)  	struct dentry *d_tracer;  	struct dentry *entry; -	if (register_module_notifier(&trace_probe_module_nb)) +	if (register_module_notifier(&trace_kprobe_module_nb))  		return -EINVAL;  	d_tracer = tracing_init_dentry(); @@ -1309,72 +1366,75 @@ static __used int kprobe_trace_selftest_target(int a1, int a2, int a3,  }  static struct ftrace_event_file * -find_trace_probe_file(struct trace_probe *tp, struct trace_array *tr) +find_trace_probe_file(struct trace_kprobe *tk, struct trace_array *tr)  {  	struct ftrace_event_file *file;  	list_for_each_entry(file, &tr->events, list) -		if (file->event_call == &tp->call) +		if (file->event_call == &tk->tp.call)  			return file;  	return NULL;  }  /* - * Nobody but us can call enable_trace_probe/disable_trace_probe at this + * Nobody but us can call enable_trace_kprobe/disable_trace_kprobe at this   * stage, we can do this lockless.   */  static __init int kprobe_trace_self_tests_init(void)  {  	int ret, warn = 0;  	int (*target)(int, int, int, int, int, int); -	struct trace_probe *tp; +	struct trace_kprobe *tk;  	struct ftrace_event_file *file; +	if (tracing_is_disabled()) +		return -ENODEV; +  	target = kprobe_trace_selftest_target;  	pr_info("Testing kprobe tracing: ");  	ret = traceprobe_command("p:testprobe kprobe_trace_selftest_target "  				  "$stack $stack0 +0($stack)", -				  create_trace_probe); +				  create_trace_kprobe);  	if (WARN_ON_ONCE(ret)) {  		pr_warn("error on probing function entry.\n");  		warn++;  	} else {  		/* Enable trace point */ -		tp = find_trace_probe("testprobe", KPROBE_EVENT_SYSTEM); -		if (WARN_ON_ONCE(tp == NULL)) { +		tk = find_trace_kprobe("testprobe", KPROBE_EVENT_SYSTEM); +		if (WARN_ON_ONCE(tk == NULL)) {  			pr_warn("error on getting new probe.\n");  			warn++;  		} else { -			file = find_trace_probe_file(tp, top_trace_array()); +			file = find_trace_probe_file(tk, top_trace_array());  			if (WARN_ON_ONCE(file == NULL)) {  				pr_warn("error on getting probe file.\n");  				warn++;  			} else -				enable_trace_probe(tp, file); +				enable_trace_kprobe(tk, file);  		}  	}  	ret = traceprobe_command("r:testprobe2 kprobe_trace_selftest_target " -				  "$retval", create_trace_probe); +				  "$retval", create_trace_kprobe);  	if (WARN_ON_ONCE(ret)) {  		pr_warn("error on probing function return.\n");  		warn++;  	} else {  		/* Enable trace point */ -		tp = find_trace_probe("testprobe2", KPROBE_EVENT_SYSTEM); -		if (WARN_ON_ONCE(tp == NULL)) { +		tk = find_trace_kprobe("testprobe2", KPROBE_EVENT_SYSTEM); +		if (WARN_ON_ONCE(tk == NULL)) {  			pr_warn("error on getting 2nd new probe.\n");  			warn++;  		} else { -			file = find_trace_probe_file(tp, top_trace_array()); +			file = find_trace_probe_file(tk, top_trace_array());  			if (WARN_ON_ONCE(file == NULL)) {  				pr_warn("error on getting probe file.\n");  				warn++;  			} else -				enable_trace_probe(tp, file); +				enable_trace_kprobe(tk, file);  		}  	} @@ -1384,46 +1444,46 @@ static __init int kprobe_trace_self_tests_init(void)  	ret = target(1, 2, 3, 4, 5, 6);  	/* Disable trace points before removing it */ -	tp = find_trace_probe("testprobe", KPROBE_EVENT_SYSTEM); -	if (WARN_ON_ONCE(tp == NULL)) { +	tk = find_trace_kprobe("testprobe", KPROBE_EVENT_SYSTEM); +	if (WARN_ON_ONCE(tk == NULL)) {  		pr_warn("error on getting test probe.\n");  		warn++;  	} else { -		file = find_trace_probe_file(tp, top_trace_array()); +		file = find_trace_probe_file(tk, top_trace_array());  		if (WARN_ON_ONCE(file == NULL)) {  			pr_warn("error on getting probe file.\n");  			warn++;  		} else -			disable_trace_probe(tp, file); +			disable_trace_kprobe(tk, file);  	} -	tp = find_trace_probe("testprobe2", KPROBE_EVENT_SYSTEM); -	if (WARN_ON_ONCE(tp == NULL)) { +	tk = find_trace_kprobe("testprobe2", KPROBE_EVENT_SYSTEM); +	if (WARN_ON_ONCE(tk == NULL)) {  		pr_warn("error on getting 2nd test probe.\n");  		warn++;  	} else { -		file = find_trace_probe_file(tp, top_trace_array()); +		file = find_trace_probe_file(tk, top_trace_array());  		if (WARN_ON_ONCE(file == NULL)) {  			pr_warn("error on getting probe file.\n");  			warn++;  		} else -			disable_trace_probe(tp, file); +			disable_trace_kprobe(tk, file);  	} -	ret = traceprobe_command("-:testprobe", create_trace_probe); +	ret = traceprobe_command("-:testprobe", create_trace_kprobe);  	if (WARN_ON_ONCE(ret)) {  		pr_warn("error on deleting a probe.\n");  		warn++;  	} -	ret = traceprobe_command("-:testprobe2", create_trace_probe); +	ret = traceprobe_command("-:testprobe2", create_trace_kprobe);  	if (WARN_ON_ONCE(ret)) {  		pr_warn("error on deleting a probe.\n");  		warn++;  	}  end: -	release_all_trace_probes(); +	release_all_trace_kprobes();  	if (warn)  		pr_cont("NG: Some tests are failed. Please check them.\n");  	else diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index b3dcfb2f0fe..0abd9b86347 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -323,7 +323,7 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,  	entry	= ring_buffer_event_data(event);  	entry->rw			= *rw; -	if (!filter_check_discard(call, entry, buffer, event)) +	if (!call_filter_check_discard(call, entry, buffer, event))  		trace_buffer_unlock_commit(buffer, event, 0, pc);  } @@ -353,7 +353,7 @@ static void __trace_mmiotrace_map(struct trace_array *tr,  	entry	= ring_buffer_event_data(event);  	entry->map			= *map; -	if (!filter_check_discard(call, entry, buffer, event)) +	if (!call_filter_check_discard(call, entry, buffer, event))  		trace_buffer_unlock_commit(buffer, event, 0, pc);  } diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c index 394f94417e2..fcf0a9e4891 100644 --- a/kernel/trace/trace_nop.c +++ b/kernel/trace/trace_nop.c @@ -62,7 +62,7 @@ static void nop_trace_reset(struct trace_array *tr)   * If you don't implement it, then the flag setting will be   * automatically accepted.   */ -static int nop_set_flag(u32 old_flags, u32 bit, int set) +static int nop_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)  {  	/*  	 * Note that you don't need to update nop_flags.val yourself. @@ -91,11 +91,11 @@ struct tracer nop_trace __read_mostly =  	.name		= "nop",  	.init		= nop_trace_init,  	.reset		= nop_trace_reset, -	.wait_pipe	= poll_wait_pipe,  #ifdef CONFIG_FTRACE_SELFTEST  	.selftest	= trace_selftest_startup_nop,  #endif  	.flags		= &nop_flags, -	.set_flag	= nop_set_flag +	.set_flag	= nop_set_flag, +	.allow_instances = true,  }; diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 34e7cbac0c9..f3dad80c20b 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -126,6 +126,34 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)  EXPORT_SYMBOL_GPL(trace_seq_printf);  /** + * trace_seq_bitmask - put a list of longs as a bitmask print output + * @s:		trace sequence descriptor + * @maskp:	points to an array of unsigned longs that represent a bitmask + * @nmaskbits:	The number of bits that are valid in @maskp + * + * It returns 0 if the trace oversizes the buffer's free + * space, 1 otherwise. + * + * Writes a ASCII representation of a bitmask string into @s. + */ +int +trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp, +		  int nmaskbits) +{ +	int len = (PAGE_SIZE - 1) - s->len; +	int ret; + +	if (s->full || !len) +		return 0; + +	ret = bitmap_scnprintf(s->buffer, len, maskp, nmaskbits); +	s->len += ret; + +	return 1; +} +EXPORT_SYMBOL_GPL(trace_seq_bitmask); + +/**   * trace_seq_vprintf - sequence printing of trace information   * @s: trace sequence descriptor   * @fmt: printf format string @@ -399,6 +427,19 @@ EXPORT_SYMBOL(ftrace_print_symbols_seq_u64);  #endif  const char * +ftrace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr, +			 unsigned int bitmask_size) +{ +	const char *ret = p->buffer + p->len; + +	trace_seq_bitmask(p, bitmask_ptr, bitmask_size * 8); +	trace_seq_putc(p, 0); + +	return ret; +} +EXPORT_SYMBOL_GPL(ftrace_print_bitmask_seq); + +const char *  ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len)  {  	int i; @@ -431,7 +472,7 @@ int ftrace_raw_output_prep(struct trace_iterator *iter,  	}  	trace_seq_init(p); -	ret = trace_seq_printf(s, "%s: ", event->name); +	ret = trace_seq_printf(s, "%s: ", ftrace_event_name(event));  	if (!ret)  		return TRACE_TYPE_PARTIAL_LINE; @@ -439,6 +480,37 @@ int ftrace_raw_output_prep(struct trace_iterator *iter,  }  EXPORT_SYMBOL(ftrace_raw_output_prep); +static int ftrace_output_raw(struct trace_iterator *iter, char *name, +			     char *fmt, va_list ap) +{ +	struct trace_seq *s = &iter->seq; +	int ret; + +	ret = trace_seq_printf(s, "%s: ", name); +	if (!ret) +		return TRACE_TYPE_PARTIAL_LINE; + +	ret = trace_seq_vprintf(s, fmt, ap); + +	if (!ret) +		return TRACE_TYPE_PARTIAL_LINE; + +	return TRACE_TYPE_HANDLED; +} + +int ftrace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...) +{ +	va_list ap; +	int ret; + +	va_start(ap, fmt); +	ret = ftrace_output_raw(iter, name, fmt, ap); +	va_end(ap); + +	return ret; +} +EXPORT_SYMBOL_GPL(ftrace_output_call); +  #ifdef CONFIG_KRETPROBES  static inline const char *kretprobed(const char *name)  { @@ -618,8 +690,23 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)  		(entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' :  		(entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' :  		'.'; -	need_resched = -		(entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'; + +	switch (entry->flags & (TRACE_FLAG_NEED_RESCHED | +				TRACE_FLAG_PREEMPT_RESCHED)) { +	case TRACE_FLAG_NEED_RESCHED | TRACE_FLAG_PREEMPT_RESCHED: +		need_resched = 'N'; +		break; +	case TRACE_FLAG_NEED_RESCHED: +		need_resched = 'n'; +		break; +	case TRACE_FLAG_PREEMPT_RESCHED: +		need_resched = 'p'; +		break; +	default: +		need_resched = '.'; +		break; +	} +  	hardsoft_irq =  		(hardirq && softirq) ? 'H' :  		hardirq ? 'h' : diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 412e959709b..d4b9fc22cd2 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -35,48 +35,28 @@ const char *reserved_field_names[] = {  	FIELD_STRING_FUNC,  }; -/* Printing function type */ -#define PRINT_TYPE_FUNC_NAME(type)	print_type_##type -#define PRINT_TYPE_FMT_NAME(type)	print_type_format_##type -  /* Printing  in basic type function template */ -#define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt, cast)			\ -static __kprobes int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s,	\ -						const char *name,	\ -						void *data, void *ent)\ +#define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt)				\ +int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, const char *name,	\ +				void *data, void *ent)			\  {									\ -	return trace_seq_printf(s, " %s=" fmt, name, (cast)*(type *)data);\ +	return trace_seq_printf(s, " %s=" fmt, name, *(type *)data);	\  }									\ -static const char PRINT_TYPE_FMT_NAME(type)[] = fmt; - -DEFINE_BASIC_PRINT_TYPE_FUNC(u8, "%x", unsigned int) -DEFINE_BASIC_PRINT_TYPE_FUNC(u16, "%x", unsigned int) -DEFINE_BASIC_PRINT_TYPE_FUNC(u32, "%lx", unsigned long) -DEFINE_BASIC_PRINT_TYPE_FUNC(u64, "%llx", unsigned long long) -DEFINE_BASIC_PRINT_TYPE_FUNC(s8, "%d", int) -DEFINE_BASIC_PRINT_TYPE_FUNC(s16, "%d", int) -DEFINE_BASIC_PRINT_TYPE_FUNC(s32, "%ld", long) -DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%lld", long long) - -static inline void *get_rloc_data(u32 *dl) -{ -	return (u8 *)dl + get_rloc_offs(*dl); -} - -/* For data_loc conversion */ -static inline void *get_loc_data(u32 *dl, void *ent) -{ -	return (u8 *)ent + get_rloc_offs(*dl); -} - -/* For defining macros, define string/string_size types */ -typedef u32 string; -typedef u32 string_size; +const char PRINT_TYPE_FMT_NAME(type)[] = fmt;				\ +NOKPROBE_SYMBOL(PRINT_TYPE_FUNC_NAME(type)); + +DEFINE_BASIC_PRINT_TYPE_FUNC(u8 , "0x%x") +DEFINE_BASIC_PRINT_TYPE_FUNC(u16, "0x%x") +DEFINE_BASIC_PRINT_TYPE_FUNC(u32, "0x%x") +DEFINE_BASIC_PRINT_TYPE_FUNC(u64, "0x%Lx") +DEFINE_BASIC_PRINT_TYPE_FUNC(s8,  "%d") +DEFINE_BASIC_PRINT_TYPE_FUNC(s16, "%d") +DEFINE_BASIC_PRINT_TYPE_FUNC(s32, "%d") +DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%Ld")  /* Print type function for string type */ -static __kprobes int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s, -						  const char *name, -						  void *data, void *ent) +int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s, const char *name, +				 void *data, void *ent)  {  	int len = *(u32 *)data >> 16; @@ -86,19 +66,9 @@ static __kprobes int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s,  		return trace_seq_printf(s, " %s=\"%s\"", name,  					(const char *)get_loc_data(data, ent));  } +NOKPROBE_SYMBOL(PRINT_TYPE_FUNC_NAME(string)); -static const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\""; - -#define FETCH_FUNC_NAME(method, type)	fetch_##method##_##type -/* - * Define macro for basic types - we don't need to define s* types, because - * we have to care only about bitwidth at recording time. - */ -#define DEFINE_BASIC_FETCH_FUNCS(method) \ -DEFINE_FETCH_##method(u8)		\ -DEFINE_FETCH_##method(u16)		\ -DEFINE_FETCH_##method(u32)		\ -DEFINE_FETCH_##method(u64) +const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\"";  #define CHECK_FETCH_FUNCS(method, fn)			\  	(((FETCH_FUNC_NAME(method, u8) == fn) ||	\ @@ -111,208 +81,79 @@ DEFINE_FETCH_##method(u64)  /* Data fetch function templates */  #define DEFINE_FETCH_reg(type)						\ -static __kprobes void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs,	\ -					void *offset, void *dest)	\ +void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs, void *offset, void *dest)	\  {									\  	*(type *)dest = (type)regs_get_register(regs,			\  				(unsigned int)((unsigned long)offset));	\ -} +}									\ +NOKPROBE_SYMBOL(FETCH_FUNC_NAME(reg, type));  DEFINE_BASIC_FETCH_FUNCS(reg)  /* No string on the register */  #define fetch_reg_string	NULL  #define fetch_reg_string_size	NULL -#define DEFINE_FETCH_stack(type)					\ -static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\ -					  void *offset, void *dest)	\ -{									\ -	*(type *)dest = (type)regs_get_kernel_stack_nth(regs,		\ -				(unsigned int)((unsigned long)offset));	\ -} -DEFINE_BASIC_FETCH_FUNCS(stack) -/* No string on the stack entry */ -#define fetch_stack_string	NULL -#define fetch_stack_string_size	NULL -  #define DEFINE_FETCH_retval(type)					\ -static __kprobes void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs,\ -					  void *dummy, void *dest)	\ +void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs,		\ +				   void *dummy, void *dest)		\  {									\  	*(type *)dest = (type)regs_return_value(regs);			\ -} +}									\ +NOKPROBE_SYMBOL(FETCH_FUNC_NAME(retval, type));  DEFINE_BASIC_FETCH_FUNCS(retval)  /* No string on the retval */  #define fetch_retval_string		NULL  #define fetch_retval_string_size	NULL -#define DEFINE_FETCH_memory(type)					\ -static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\ -					  void *addr, void *dest)	\ -{									\ -	type retval;							\ -	if (probe_kernel_address(addr, retval))				\ -		*(type *)dest = 0;					\ -	else								\ -		*(type *)dest = retval;					\ -} -DEFINE_BASIC_FETCH_FUNCS(memory) -/* - * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max - * length and relative data location. - */ -static __kprobes void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs, -						      void *addr, void *dest) -{ -	long ret; -	int maxlen = get_rloc_len(*(u32 *)dest); -	u8 *dst = get_rloc_data(dest); -	u8 *src = addr; -	mm_segment_t old_fs = get_fs(); - -	if (!maxlen) -		return; - -	/* -	 * Try to get string again, since the string can be changed while -	 * probing. -	 */ -	set_fs(KERNEL_DS); -	pagefault_disable(); - -	do -		ret = __copy_from_user_inatomic(dst++, src++, 1); -	while (dst[-1] && ret == 0 && src - (u8 *)addr < maxlen); - -	dst[-1] = '\0'; -	pagefault_enable(); -	set_fs(old_fs); - -	if (ret < 0) {	/* Failed to fetch string */ -		((u8 *)get_rloc_data(dest))[0] = '\0'; -		*(u32 *)dest = make_data_rloc(0, get_rloc_offs(*(u32 *)dest)); -	} else { -		*(u32 *)dest = make_data_rloc(src - (u8 *)addr, -					      get_rloc_offs(*(u32 *)dest)); -	} -} - -/* Return the length of string -- including null terminal byte */ -static __kprobes void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs, -							void *addr, void *dest) -{ -	mm_segment_t old_fs; -	int ret, len = 0; -	u8 c; - -	old_fs = get_fs(); -	set_fs(KERNEL_DS); -	pagefault_disable(); - -	do { -		ret = __copy_from_user_inatomic(&c, (u8 *)addr + len, 1); -		len++; -	} while (c && ret == 0 && len < MAX_STRING_SIZE); - -	pagefault_enable(); -	set_fs(old_fs); - -	if (ret < 0)	/* Failed to check the length */ -		*(u32 *)dest = 0; -	else -		*(u32 *)dest = len; -} - -/* Memory fetching by symbol */ -struct symbol_cache { -	char		*symbol; -	long		offset; -	unsigned long	addr; -}; - -static unsigned long update_symbol_cache(struct symbol_cache *sc) -{ -	sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol); - -	if (sc->addr) -		sc->addr += sc->offset; - -	return sc->addr; -} - -static void free_symbol_cache(struct symbol_cache *sc) -{ -	kfree(sc->symbol); -	kfree(sc); -} - -static struct symbol_cache *alloc_symbol_cache(const char *sym, long offset) -{ -	struct symbol_cache *sc; - -	if (!sym || strlen(sym) == 0) -		return NULL; - -	sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL); -	if (!sc) -		return NULL; - -	sc->symbol = kstrdup(sym, GFP_KERNEL); -	if (!sc->symbol) { -		kfree(sc); -		return NULL; -	} -	sc->offset = offset; -	update_symbol_cache(sc); - -	return sc; -} - -#define DEFINE_FETCH_symbol(type)					\ -static __kprobes void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs,\ -					  void *data, void *dest)	\ -{									\ -	struct symbol_cache *sc = data;					\ -	if (sc->addr)							\ -		fetch_memory_##type(regs, (void *)sc->addr, dest);	\ -	else								\ -		*(type *)dest = 0;					\ -} -DEFINE_BASIC_FETCH_FUNCS(symbol) -DEFINE_FETCH_symbol(string) -DEFINE_FETCH_symbol(string_size) -  /* Dereference memory access function */  struct deref_fetch_param {  	struct fetch_param	orig;  	long			offset; +	fetch_func_t		fetch; +	fetch_func_t		fetch_size;  };  #define DEFINE_FETCH_deref(type)					\ -static __kprobes void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs,\ -					    void *data, void *dest)	\ +void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs,			\ +				  void *data, void *dest)		\  {									\  	struct deref_fetch_param *dprm = data;				\  	unsigned long addr;						\  	call_fetch(&dprm->orig, regs, &addr);				\  	if (addr) {							\  		addr += dprm->offset;					\ -		fetch_memory_##type(regs, (void *)addr, dest);		\ +		dprm->fetch(regs, (void *)addr, dest);			\  	} else								\  		*(type *)dest = 0;					\ -} +}									\ +NOKPROBE_SYMBOL(FETCH_FUNC_NAME(deref, type));  DEFINE_BASIC_FETCH_FUNCS(deref)  DEFINE_FETCH_deref(string) -DEFINE_FETCH_deref(string_size) -static __kprobes void update_deref_fetch_param(struct deref_fetch_param *data) +void FETCH_FUNC_NAME(deref, string_size)(struct pt_regs *regs, +					 void *data, void *dest) +{ +	struct deref_fetch_param *dprm = data; +	unsigned long addr; + +	call_fetch(&dprm->orig, regs, &addr); +	if (addr && dprm->fetch_size) { +		addr += dprm->offset; +		dprm->fetch_size(regs, (void *)addr, dest); +	} else +		*(string_size *)dest = 0; +} +NOKPROBE_SYMBOL(FETCH_FUNC_NAME(deref, string_size)); + +static void update_deref_fetch_param(struct deref_fetch_param *data)  {  	if (CHECK_FETCH_FUNCS(deref, data->orig.fn))  		update_deref_fetch_param(data->orig.data);  	else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))  		update_symbol_cache(data->orig.data);  } +NOKPROBE_SYMBOL(update_deref_fetch_param); -static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data) +static void free_deref_fetch_param(struct deref_fetch_param *data)  {  	if (CHECK_FETCH_FUNCS(deref, data->orig.fn))  		free_deref_fetch_param(data->orig.data); @@ -320,6 +161,7 @@ static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data)  		free_symbol_cache(data->orig.data);  	kfree(data);  } +NOKPROBE_SYMBOL(free_deref_fetch_param);  /* Bitfield fetch function */  struct bitfield_fetch_param { @@ -329,8 +171,8 @@ struct bitfield_fetch_param {  };  #define DEFINE_FETCH_bitfield(type)					\ -static __kprobes void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs,\ -					    void *data, void *dest)	\ +void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs,		\ +				     void *data, void *dest)		\  {									\  	struct bitfield_fetch_param *bprm = data;			\  	type buf = 0;							\ @@ -340,13 +182,13 @@ static __kprobes void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs,\  		buf >>= bprm->low_shift;				\  	}								\  	*(type *)dest = buf;						\ -} - +}									\ +NOKPROBE_SYMBOL(FETCH_FUNC_NAME(bitfield, type));  DEFINE_BASIC_FETCH_FUNCS(bitfield)  #define fetch_bitfield_string		NULL  #define fetch_bitfield_string_size	NULL -static __kprobes void +static void  update_bitfield_fetch_param(struct bitfield_fetch_param *data)  {  	/* @@ -359,7 +201,7 @@ update_bitfield_fetch_param(struct bitfield_fetch_param *data)  		update_symbol_cache(data->orig.data);  } -static __kprobes void +static void  free_bitfield_fetch_param(struct bitfield_fetch_param *data)  {  	/* @@ -374,58 +216,8 @@ free_bitfield_fetch_param(struct bitfield_fetch_param *data)  	kfree(data);  } -/* Default (unsigned long) fetch type */ -#define __DEFAULT_FETCH_TYPE(t) u##t -#define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t) -#define DEFAULT_FETCH_TYPE _DEFAULT_FETCH_TYPE(BITS_PER_LONG) -#define DEFAULT_FETCH_TYPE_STR __stringify(DEFAULT_FETCH_TYPE) - -#define ASSIGN_FETCH_FUNC(method, type)	\ -	[FETCH_MTD_##method] = FETCH_FUNC_NAME(method, type) - -#define __ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, _fmttype)	\ -	{.name = _name,				\ -	 .size = _size,					\ -	 .is_signed = sign,				\ -	 .print = PRINT_TYPE_FUNC_NAME(ptype),		\ -	 .fmt = PRINT_TYPE_FMT_NAME(ptype),		\ -	 .fmttype = _fmttype,				\ -	 .fetch = {					\ -ASSIGN_FETCH_FUNC(reg, ftype),				\ -ASSIGN_FETCH_FUNC(stack, ftype),			\ -ASSIGN_FETCH_FUNC(retval, ftype),			\ -ASSIGN_FETCH_FUNC(memory, ftype),			\ -ASSIGN_FETCH_FUNC(symbol, ftype),			\ -ASSIGN_FETCH_FUNC(deref, ftype),			\ -ASSIGN_FETCH_FUNC(bitfield, ftype),			\ -	  }						\ -	} - -#define ASSIGN_FETCH_TYPE(ptype, ftype, sign)			\ -	__ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, #ptype) - -#define FETCH_TYPE_STRING	0 -#define FETCH_TYPE_STRSIZE	1 - -/* Fetch type information table */ -static const struct fetch_type fetch_type_table[] = { -	/* Special types */ -	[FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string, -					sizeof(u32), 1, "__data_loc char[]"), -	[FETCH_TYPE_STRSIZE] = __ASSIGN_FETCH_TYPE("string_size", u32, -					string_size, sizeof(u32), 0, "u32"), -	/* Basic types */ -	ASSIGN_FETCH_TYPE(u8,  u8,  0), -	ASSIGN_FETCH_TYPE(u16, u16, 0), -	ASSIGN_FETCH_TYPE(u32, u32, 0), -	ASSIGN_FETCH_TYPE(u64, u64, 0), -	ASSIGN_FETCH_TYPE(s8,  u8,  1), -	ASSIGN_FETCH_TYPE(s16, u16, 1), -	ASSIGN_FETCH_TYPE(s32, u32, 1), -	ASSIGN_FETCH_TYPE(s64, u64, 1), -}; - -static const struct fetch_type *find_fetch_type(const char *type) +static const struct fetch_type *find_fetch_type(const char *type, +						const struct fetch_type *ftbl)  {  	int i; @@ -446,44 +238,52 @@ static const struct fetch_type *find_fetch_type(const char *type)  		switch (bs) {  		case 8: -			return find_fetch_type("u8"); +			return find_fetch_type("u8", ftbl);  		case 16: -			return find_fetch_type("u16"); +			return find_fetch_type("u16", ftbl);  		case 32: -			return find_fetch_type("u32"); +			return find_fetch_type("u32", ftbl);  		case 64: -			return find_fetch_type("u64"); +			return find_fetch_type("u64", ftbl);  		default:  			goto fail;  		}  	} -	for (i = 0; i < ARRAY_SIZE(fetch_type_table); i++) -		if (strcmp(type, fetch_type_table[i].name) == 0) -			return &fetch_type_table[i]; +	for (i = 0; ftbl[i].name; i++) { +		if (strcmp(type, ftbl[i].name) == 0) +			return &ftbl[i]; +	}  fail:  	return NULL;  }  /* Special function : only accept unsigned long */ -static __kprobes void fetch_stack_address(struct pt_regs *regs, -					void *dummy, void *dest) +static void fetch_kernel_stack_address(struct pt_regs *regs, void *dummy, void *dest)  {  	*(unsigned long *)dest = kernel_stack_pointer(regs);  } +NOKPROBE_SYMBOL(fetch_kernel_stack_address); + +static void fetch_user_stack_address(struct pt_regs *regs, void *dummy, void *dest) +{ +	*(unsigned long *)dest = user_stack_pointer(regs); +} +NOKPROBE_SYMBOL(fetch_user_stack_address);  static fetch_func_t get_fetch_size_function(const struct fetch_type *type, -					fetch_func_t orig_fn) +					    fetch_func_t orig_fn, +					    const struct fetch_type *ftbl)  {  	int i; -	if (type != &fetch_type_table[FETCH_TYPE_STRING]) +	if (type != &ftbl[FETCH_TYPE_STRING])  		return NULL;	/* Only string type needs size function */  	for (i = 0; i < FETCH_MTD_END; i++)  		if (type->fetch[i] == orig_fn) -			return fetch_type_table[FETCH_TYPE_STRSIZE].fetch[i]; +			return ftbl[FETCH_TYPE_STRSIZE].fetch[i];  	WARN_ON(1);	/* This should not happen */ @@ -516,7 +316,8 @@ int traceprobe_split_symbol_offset(char *symbol, unsigned long *offset)  #define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long))  static int parse_probe_vars(char *arg, const struct fetch_type *t, -			    struct fetch_param *f, bool is_return) +			    struct fetch_param *f, bool is_return, +			    bool is_kprobe)  {  	int ret = 0;  	unsigned long param; @@ -528,13 +329,16 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t,  			ret = -EINVAL;  	} else if (strncmp(arg, "stack", 5) == 0) {  		if (arg[5] == '\0') { -			if (strcmp(t->name, DEFAULT_FETCH_TYPE_STR) == 0) -				f->fn = fetch_stack_address; +			if (strcmp(t->name, DEFAULT_FETCH_TYPE_STR)) +				return -EINVAL; + +			if (is_kprobe) +				f->fn = fetch_kernel_stack_address;  			else -				ret = -EINVAL; +				f->fn = fetch_user_stack_address;  		} else if (isdigit(arg[5])) {  			ret = kstrtoul(arg + 5, 10, ¶m); -			if (ret || param > PARAM_MAX_STACK) +			if (ret || (is_kprobe && param > PARAM_MAX_STACK))  				ret = -EINVAL;  			else {  				f->fn = t->fetch[FETCH_MTD_stack]; @@ -552,20 +356,18 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t,  static int parse_probe_arg(char *arg, const struct fetch_type *t,  		     struct fetch_param *f, bool is_return, bool is_kprobe)  { +	const struct fetch_type *ftbl;  	unsigned long param;  	long offset;  	char *tmp; -	int ret; +	int ret = 0; -	ret = 0; - -	/* Until uprobe_events supports only reg arguments */ -	if (!is_kprobe && arg[0] != '%') -		return -EINVAL; +	ftbl = is_kprobe ? kprobes_fetch_type_table : uprobes_fetch_type_table; +	BUG_ON(ftbl == NULL);  	switch (arg[0]) {  	case '$': -		ret = parse_probe_vars(arg + 1, t, f, is_return); +		ret = parse_probe_vars(arg + 1, t, f, is_return, is_kprobe);  		break;  	case '%':	/* named register */ @@ -577,7 +379,7 @@ static int parse_probe_arg(char *arg, const struct fetch_type *t,  		}  		break; -	case '@':	/* memory or symbol */ +	case '@':	/* memory, file-offset or symbol */  		if (isdigit(arg[1])) {  			ret = kstrtoul(arg + 1, 0, ¶m);  			if (ret) @@ -585,7 +387,22 @@ static int parse_probe_arg(char *arg, const struct fetch_type *t,  			f->fn = t->fetch[FETCH_MTD_memory];  			f->data = (void *)param; +		} else if (arg[1] == '+') { +			/* kprobes don't support file offsets */ +			if (is_kprobe) +				return -EINVAL; + +			ret = kstrtol(arg + 2, 0, &offset); +			if (ret) +				break; + +			f->fn = t->fetch[FETCH_MTD_file_offset]; +			f->data = (void *)offset;  		} else { +			/* uprobes don't support symbols */ +			if (!is_kprobe) +				return -EINVAL; +  			ret = traceprobe_split_symbol_offset(arg + 1, &offset);  			if (ret)  				break; @@ -616,7 +433,7 @@ static int parse_probe_arg(char *arg, const struct fetch_type *t,  			struct deref_fetch_param	*dprm;  			const struct fetch_type		*t2; -			t2 = find_fetch_type(NULL); +			t2 = find_fetch_type(NULL, ftbl);  			*tmp = '\0';  			dprm = kzalloc(sizeof(struct deref_fetch_param), GFP_KERNEL); @@ -624,6 +441,9 @@ static int parse_probe_arg(char *arg, const struct fetch_type *t,  				return -ENOMEM;  			dprm->offset = offset; +			dprm->fetch = t->fetch[FETCH_MTD_memory]; +			dprm->fetch_size = get_fetch_size_function(t, +							dprm->fetch, ftbl);  			ret = parse_probe_arg(arg, t2, &dprm->orig, is_return,  							is_kprobe);  			if (ret) @@ -685,9 +505,13 @@ static int __parse_bitfield_probe_arg(const char *bf,  int traceprobe_parse_probe_arg(char *arg, ssize_t *size,  		struct probe_arg *parg, bool is_return, bool is_kprobe)  { +	const struct fetch_type *ftbl;  	const char *t;  	int ret; +	ftbl = is_kprobe ? kprobes_fetch_type_table : uprobes_fetch_type_table; +	BUG_ON(ftbl == NULL); +  	if (strlen(arg) > MAX_ARGSTR_LEN) {  		pr_info("Argument is too long.: %s\n",  arg);  		return -ENOSPC; @@ -702,7 +526,7 @@ int traceprobe_parse_probe_arg(char *arg, ssize_t *size,  		arg[t - parg->comm] = '\0';  		t++;  	} -	parg->type = find_fetch_type(t); +	parg->type = find_fetch_type(t, ftbl);  	if (!parg->type) {  		pr_info("Unsupported type: %s\n", t);  		return -EINVAL; @@ -716,7 +540,8 @@ int traceprobe_parse_probe_arg(char *arg, ssize_t *size,  	if (ret >= 0) {  		parg->fetch_size.fn = get_fetch_size_function(parg->type, -							      parg->fetch.fn); +							      parg->fetch.fn, +							      ftbl);  		parg->fetch_size.data = parg->fetch.data;  	} @@ -837,3 +662,65 @@ out:  	return ret;  } + +static int __set_print_fmt(struct trace_probe *tp, char *buf, int len, +			   bool is_return) +{ +	int i; +	int pos = 0; + +	const char *fmt, *arg; + +	if (!is_return) { +		fmt = "(%lx)"; +		arg = "REC->" FIELD_STRING_IP; +	} else { +		fmt = "(%lx <- %lx)"; +		arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP; +	} + +	/* When len=0, we just calculate the needed length */ +#define LEN_OR_ZERO (len ? len - pos : 0) + +	pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt); + +	for (i = 0; i < tp->nr_args; i++) { +		pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%s", +				tp->args[i].name, tp->args[i].type->fmt); +	} + +	pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg); + +	for (i = 0; i < tp->nr_args; i++) { +		if (strcmp(tp->args[i].type->name, "string") == 0) +			pos += snprintf(buf + pos, LEN_OR_ZERO, +					", __get_str(%s)", +					tp->args[i].name); +		else +			pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s", +					tp->args[i].name); +	} + +#undef LEN_OR_ZERO + +	/* return the length of print_fmt */ +	return pos; +} + +int set_print_fmt(struct trace_probe *tp, bool is_return) +{ +	int len; +	char *print_fmt; + +	/* First: called with 0 length to calculate the needed length */ +	len = __set_print_fmt(tp, NULL, 0, is_return); +	print_fmt = kmalloc(len + 1, GFP_KERNEL); +	if (!print_fmt) +		return -ENOMEM; + +	/* Second: actually write the @print_fmt */ +	__set_print_fmt(tp, print_fmt, len + 1, is_return); +	tp->call.print_fmt = print_fmt; + +	return 0; +} diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 5c7e09d10d7..4f815fbce16 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -81,6 +81,17 @@   */  #define convert_rloc_to_loc(dl, offs)	((u32)(dl) + (offs)) +static nokprobe_inline void *get_rloc_data(u32 *dl) +{ +	return (u8 *)dl + get_rloc_offs(*dl); +} + +/* For data_loc conversion */ +static nokprobe_inline void *get_loc_data(u32 *dl, void *ent) +{ +	return (u8 *)ent + get_rloc_offs(*dl); +} +  /* Data fetch function type */  typedef	void (*fetch_func_t)(struct pt_regs *, void *, void *);  /* Printing function type */ @@ -95,6 +106,7 @@ enum {  	FETCH_MTD_symbol,  	FETCH_MTD_deref,  	FETCH_MTD_bitfield, +	FETCH_MTD_file_offset,  	FETCH_MTD_END,  }; @@ -115,6 +127,147 @@ struct fetch_param {  	void 			*data;  }; +/* For defining macros, define string/string_size types */ +typedef u32 string; +typedef u32 string_size; + +#define PRINT_TYPE_FUNC_NAME(type)	print_type_##type +#define PRINT_TYPE_FMT_NAME(type)	print_type_format_##type + +/* Printing  in basic type function template */ +#define DECLARE_BASIC_PRINT_TYPE_FUNC(type)				\ +int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, const char *name,	\ +				void *data, void *ent);			\ +extern const char PRINT_TYPE_FMT_NAME(type)[] + +DECLARE_BASIC_PRINT_TYPE_FUNC(u8); +DECLARE_BASIC_PRINT_TYPE_FUNC(u16); +DECLARE_BASIC_PRINT_TYPE_FUNC(u32); +DECLARE_BASIC_PRINT_TYPE_FUNC(u64); +DECLARE_BASIC_PRINT_TYPE_FUNC(s8); +DECLARE_BASIC_PRINT_TYPE_FUNC(s16); +DECLARE_BASIC_PRINT_TYPE_FUNC(s32); +DECLARE_BASIC_PRINT_TYPE_FUNC(s64); +DECLARE_BASIC_PRINT_TYPE_FUNC(string); + +#define FETCH_FUNC_NAME(method, type)	fetch_##method##_##type + +/* Declare macro for basic types */ +#define DECLARE_FETCH_FUNC(method, type)				\ +extern void FETCH_FUNC_NAME(method, type)(struct pt_regs *regs, 	\ +					  void *data, void *dest) + +#define DECLARE_BASIC_FETCH_FUNCS(method) 	\ +DECLARE_FETCH_FUNC(method, u8);			\ +DECLARE_FETCH_FUNC(method, u16);		\ +DECLARE_FETCH_FUNC(method, u32);		\ +DECLARE_FETCH_FUNC(method, u64) + +DECLARE_BASIC_FETCH_FUNCS(reg); +#define fetch_reg_string			NULL +#define fetch_reg_string_size			NULL + +DECLARE_BASIC_FETCH_FUNCS(retval); +#define fetch_retval_string			NULL +#define fetch_retval_string_size		NULL + +DECLARE_BASIC_FETCH_FUNCS(symbol); +DECLARE_FETCH_FUNC(symbol, string); +DECLARE_FETCH_FUNC(symbol, string_size); + +DECLARE_BASIC_FETCH_FUNCS(deref); +DECLARE_FETCH_FUNC(deref, string); +DECLARE_FETCH_FUNC(deref, string_size); + +DECLARE_BASIC_FETCH_FUNCS(bitfield); +#define fetch_bitfield_string			NULL +#define fetch_bitfield_string_size		NULL + +/* + * Define macro for basic types - we don't need to define s* types, because + * we have to care only about bitwidth at recording time. + */ +#define DEFINE_BASIC_FETCH_FUNCS(method) \ +DEFINE_FETCH_##method(u8)		\ +DEFINE_FETCH_##method(u16)		\ +DEFINE_FETCH_##method(u32)		\ +DEFINE_FETCH_##method(u64) + +/* Default (unsigned long) fetch type */ +#define __DEFAULT_FETCH_TYPE(t) u##t +#define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t) +#define DEFAULT_FETCH_TYPE _DEFAULT_FETCH_TYPE(BITS_PER_LONG) +#define DEFAULT_FETCH_TYPE_STR __stringify(DEFAULT_FETCH_TYPE) + +#define ASSIGN_FETCH_FUNC(method, type)	\ +	[FETCH_MTD_##method] = FETCH_FUNC_NAME(method, type) + +#define __ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, _fmttype)	\ +	{.name = _name,				\ +	 .size = _size,					\ +	 .is_signed = sign,				\ +	 .print = PRINT_TYPE_FUNC_NAME(ptype),		\ +	 .fmt = PRINT_TYPE_FMT_NAME(ptype),		\ +	 .fmttype = _fmttype,				\ +	 .fetch = {					\ +ASSIGN_FETCH_FUNC(reg, ftype),				\ +ASSIGN_FETCH_FUNC(stack, ftype),			\ +ASSIGN_FETCH_FUNC(retval, ftype),			\ +ASSIGN_FETCH_FUNC(memory, ftype),			\ +ASSIGN_FETCH_FUNC(symbol, ftype),			\ +ASSIGN_FETCH_FUNC(deref, ftype),			\ +ASSIGN_FETCH_FUNC(bitfield, ftype),			\ +ASSIGN_FETCH_FUNC(file_offset, ftype),			\ +	  }						\ +	} + +#define ASSIGN_FETCH_TYPE(ptype, ftype, sign)			\ +	__ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, #ptype) + +#define ASSIGN_FETCH_TYPE_END {} + +#define FETCH_TYPE_STRING	0 +#define FETCH_TYPE_STRSIZE	1 + +/* + * Fetch type information table. + * It's declared as a weak symbol due to conditional compilation. + */ +extern __weak const struct fetch_type kprobes_fetch_type_table[]; +extern __weak const struct fetch_type uprobes_fetch_type_table[]; + +#ifdef CONFIG_KPROBE_EVENT +struct symbol_cache; +unsigned long update_symbol_cache(struct symbol_cache *sc); +void free_symbol_cache(struct symbol_cache *sc); +struct symbol_cache *alloc_symbol_cache(const char *sym, long offset); +#else +/* uprobes do not support symbol fetch methods */ +#define fetch_symbol_u8			NULL +#define fetch_symbol_u16		NULL +#define fetch_symbol_u32		NULL +#define fetch_symbol_u64		NULL +#define fetch_symbol_string		NULL +#define fetch_symbol_string_size	NULL + +struct symbol_cache { +}; +static inline unsigned long __used update_symbol_cache(struct symbol_cache *sc) +{ +	return 0; +} + +static inline void __used free_symbol_cache(struct symbol_cache *sc) +{ +} + +static inline struct symbol_cache * __used +alloc_symbol_cache(const char *sym, long offset) +{ +	return NULL; +} +#endif /* CONFIG_KPROBE_EVENT */ +  struct probe_arg {  	struct fetch_param	fetch;  	struct fetch_param	fetch_size; @@ -124,7 +277,32 @@ struct probe_arg {  	const struct fetch_type	*type;	/* Type of this argument */  }; -static inline __kprobes void call_fetch(struct fetch_param *fprm, +struct trace_probe { +	unsigned int			flags;	/* For TP_FLAG_* */ +	struct ftrace_event_class	class; +	struct ftrace_event_call	call; +	struct list_head 		files; +	ssize_t				size;	/* trace entry size */ +	unsigned int			nr_args; +	struct probe_arg		args[]; +}; + +struct event_file_link { +	struct ftrace_event_file	*file; +	struct list_head		list; +}; + +static inline bool trace_probe_is_enabled(struct trace_probe *tp) +{ +	return !!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE)); +} + +static inline bool trace_probe_is_registered(struct trace_probe *tp) +{ +	return !!(tp->flags & TP_FLAG_REGISTERED); +} + +static nokprobe_inline void call_fetch(struct fetch_param *fprm,  				 struct pt_regs *regs, void *dest)  {  	return fprm->fn(regs, fprm->data, dest); @@ -142,6 +320,18 @@ static inline int is_good_name(const char *name)  	return 1;  } +static inline struct event_file_link * +find_event_file_link(struct trace_probe *tp, struct ftrace_event_file *file) +{ +	struct event_file_link *link; + +	list_for_each_entry(link, &tp->files, list) +		if (link->file == file) +			return link; + +	return NULL; +} +  extern int traceprobe_parse_probe_arg(char *arg, ssize_t *size,  		   struct probe_arg *parg, bool is_return, bool is_kprobe); @@ -158,3 +348,53 @@ extern ssize_t traceprobe_probes_write(struct file *file,  		int (*createfn)(int, char**));  extern int traceprobe_command(const char *buf, int (*createfn)(int, char**)); + +/* Sum up total data length for dynamic arraies (strings) */ +static nokprobe_inline int +__get_data_size(struct trace_probe *tp, struct pt_regs *regs) +{ +	int i, ret = 0; +	u32 len; + +	for (i = 0; i < tp->nr_args; i++) +		if (unlikely(tp->args[i].fetch_size.fn)) { +			call_fetch(&tp->args[i].fetch_size, regs, &len); +			ret += len; +		} + +	return ret; +} + +/* Store the value of each argument */ +static nokprobe_inline void +store_trace_args(int ent_size, struct trace_probe *tp, struct pt_regs *regs, +		 u8 *data, int maxlen) +{ +	int i; +	u32 end = tp->size; +	u32 *dl;	/* Data (relative) location */ + +	for (i = 0; i < tp->nr_args; i++) { +		if (unlikely(tp->args[i].fetch_size.fn)) { +			/* +			 * First, we set the relative location and +			 * maximum data length to *dl +			 */ +			dl = (u32 *)(data + tp->args[i].offset); +			*dl = make_data_rloc(maxlen, end - tp->args[i].offset); +			/* Then try to fetch string or dynamic array data */ +			call_fetch(&tp->args[i].fetch, regs, dl); +			/* Reduce maximum length */ +			end += get_rloc_len(*dl); +			maxlen -= get_rloc_len(*dl); +			/* Trick here, convert data_rloc to data_loc */ +			*dl = convert_rloc_to_loc(*dl, +				 ent_size + tp->args[i].offset); +		} else +			/* Just fetching data normally */ +			call_fetch(&tp->args[i].fetch, regs, +				   data + tp->args[i].offset); +	} +} + +extern int set_print_fmt(struct trace_probe *tp, bool is_return); diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 4e98e3b257a..3f34dc9b40f 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -45,7 +45,7 @@ tracing_sched_switch_trace(struct trace_array *tr,  	entry->next_state		= next->state;  	entry->next_cpu	= task_cpu(next); -	if (!filter_check_discard(call, entry, buffer, event)) +	if (!call_filter_check_discard(call, entry, buffer, event))  		trace_buffer_unlock_commit(buffer, event, flags, pc);  } @@ -101,7 +101,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr,  	entry->next_state		= wakee->state;  	entry->next_cpu			= task_cpu(wakee); -	if (!filter_check_discard(call, entry, buffer, event)) +	if (!call_filter_check_discard(call, entry, buffer, event))  		trace_buffer_unlock_commit(buffer, event, flags, pc);  } diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index fee77e15d81..19bd8928ce9 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -16,6 +16,7 @@  #include <linux/uaccess.h>  #include <linux/ftrace.h>  #include <linux/sched/rt.h> +#include <linux/sched/deadline.h>  #include <trace/events/sched.h>  #include "trace.h" @@ -27,6 +28,8 @@ static int			wakeup_cpu;  static int			wakeup_current_cpu;  static unsigned			wakeup_prio = -1;  static int			wakeup_rt; +static int			wakeup_dl; +static int			tracing_dl = 0;  static arch_spinlock_t wakeup_lock =  	(arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; @@ -127,15 +130,9 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip,  	atomic_dec(&data->disabled);  	preempt_enable_notrace();  } - -static struct ftrace_ops trace_ops __read_mostly = -{ -	.func = wakeup_tracer_call, -	.flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE, -};  #endif /* CONFIG_FUNCTION_TRACER */ -static int register_wakeup_function(int graph, int set) +static int register_wakeup_function(struct trace_array *tr, int graph, int set)  {  	int ret; @@ -147,7 +144,7 @@ static int register_wakeup_function(int graph, int set)  		ret = register_ftrace_graph(&wakeup_graph_return,  					    &wakeup_graph_entry);  	else -		ret = register_ftrace_function(&trace_ops); +		ret = register_ftrace_function(tr->ops);  	if (!ret)  		function_enabled = true; @@ -155,7 +152,7 @@ static int register_wakeup_function(int graph, int set)  	return ret;  } -static void unregister_wakeup_function(int graph) +static void unregister_wakeup_function(struct trace_array *tr, int graph)  {  	if (!function_enabled)  		return; @@ -163,32 +160,34 @@ static void unregister_wakeup_function(int graph)  	if (graph)  		unregister_ftrace_graph();  	else -		unregister_ftrace_function(&trace_ops); +		unregister_ftrace_function(tr->ops);  	function_enabled = false;  } -static void wakeup_function_set(int set) +static void wakeup_function_set(struct trace_array *tr, int set)  {  	if (set) -		register_wakeup_function(is_graph(), 1); +		register_wakeup_function(tr, is_graph(), 1);  	else -		unregister_wakeup_function(is_graph()); +		unregister_wakeup_function(tr, is_graph());  } -static int wakeup_flag_changed(struct tracer *tracer, u32 mask, int set) +static int wakeup_flag_changed(struct trace_array *tr, u32 mask, int set)  { +	struct tracer *tracer = tr->current_trace; +  	if (mask & TRACE_ITER_FUNCTION) -		wakeup_function_set(set); +		wakeup_function_set(tr, set);  	return trace_keep_overwrite(tracer, mask, set);  } -static int start_func_tracer(int graph) +static int start_func_tracer(struct trace_array *tr, int graph)  {  	int ret; -	ret = register_wakeup_function(graph, 0); +	ret = register_wakeup_function(tr, graph, 0);  	if (!ret && tracing_is_enabled())  		tracer_enabled = 1; @@ -198,15 +197,16 @@ static int start_func_tracer(int graph)  	return ret;  } -static void stop_func_tracer(int graph) +static void stop_func_tracer(struct trace_array *tr, int graph)  {  	tracer_enabled = 0; -	unregister_wakeup_function(graph); +	unregister_wakeup_function(tr, graph);  }  #ifdef CONFIG_FUNCTION_GRAPH_TRACER -static int wakeup_set_flag(u32 old_flags, u32 bit, int set) +static int +wakeup_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)  {  	if (!(bit & TRACE_DISPLAY_GRAPH)) @@ -215,12 +215,12 @@ static int wakeup_set_flag(u32 old_flags, u32 bit, int set)  	if (!(is_graph() ^ set))  		return 0; -	stop_func_tracer(!set); +	stop_func_tracer(tr, !set);  	wakeup_reset(wakeup_trace); -	tracing_max_latency = 0; +	tr->max_latency = 0; -	return start_func_tracer(set); +	return start_func_tracer(tr, set);  }  static int wakeup_graph_entry(struct ftrace_graph_ent *trace) @@ -308,7 +308,8 @@ __trace_function(struct trace_array *tr,  #else  #define __trace_function trace_function -static int wakeup_set_flag(u32 old_flags, u32 bit, int set) +static int +wakeup_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)  {  	return -EINVAL;  } @@ -343,13 +344,13 @@ static void wakeup_print_header(struct seq_file *s)  /*   * Should this new latency be reported/recorded?   */ -static int report_latency(cycle_t delta) +static int report_latency(struct trace_array *tr, cycle_t delta)  {  	if (tracing_thresh) {  		if (delta < tracing_thresh)  			return 0;  	} else { -		if (delta <= tracing_max_latency) +		if (delta <= tr->max_latency)  			return 0;  	}  	return 1; @@ -417,11 +418,11 @@ probe_wakeup_sched_switch(void *ignore,  	T1 = ftrace_now(cpu);  	delta = T1-T0; -	if (!report_latency(delta)) +	if (!report_latency(wakeup_trace, delta))  		goto out_unlock;  	if (likely(!is_tracing_stopped())) { -		tracing_max_latency = delta; +		wakeup_trace->max_latency = delta;  		update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu);  	} @@ -437,6 +438,7 @@ static void __wakeup_reset(struct trace_array *tr)  {  	wakeup_cpu = -1;  	wakeup_prio = -1; +	tracing_dl = 0;  	if (wakeup_task)  		put_task_struct(wakeup_task); @@ -472,9 +474,17 @@ probe_wakeup(void *ignore, struct task_struct *p, int success)  	tracing_record_cmdline(p);  	tracing_record_cmdline(current); -	if ((wakeup_rt && !rt_task(p)) || -			p->prio >= wakeup_prio || -			p->prio >= current->prio) +	/* +	 * Semantic is like this: +	 *  - wakeup tracer handles all tasks in the system, independently +	 *    from their scheduling class; +	 *  - wakeup_rt tracer handles tasks belonging to sched_dl and +	 *    sched_rt class; +	 *  - wakeup_dl handles tasks belonging to sched_dl class only. +	 */ +	if (tracing_dl || (wakeup_dl && !dl_task(p)) || +	    (wakeup_rt && !dl_task(p) && !rt_task(p)) || +	    (!dl_task(p) && (p->prio >= wakeup_prio || p->prio >= current->prio)))  		return;  	pc = preempt_count(); @@ -486,7 +496,8 @@ probe_wakeup(void *ignore, struct task_struct *p, int success)  	arch_spin_lock(&wakeup_lock);  	/* check for races. */ -	if (!tracer_enabled || p->prio >= wakeup_prio) +	if (!tracer_enabled || tracing_dl || +	    (!dl_task(p) && p->prio >= wakeup_prio))  		goto out_locked;  	/* reset the trace */ @@ -496,6 +507,15 @@ probe_wakeup(void *ignore, struct task_struct *p, int success)  	wakeup_current_cpu = wakeup_cpu;  	wakeup_prio = p->prio; +	/* +	 * Once you start tracing a -deadline task, don't bother tracing +	 * another task until the first one wakes up. +	 */ +	if (dl_task(p)) +		tracing_dl = 1; +	else +		tracing_dl = 0; +  	wakeup_task = p;  	get_task_struct(wakeup_task); @@ -561,7 +581,7 @@ static void start_wakeup_tracer(struct trace_array *tr)  	 */  	smp_wmb(); -	if (start_func_tracer(is_graph())) +	if (start_func_tracer(tr, is_graph()))  		printk(KERN_ERR "failed to start wakeup tracer\n");  	return; @@ -574,13 +594,15 @@ fail_deprobe:  static void stop_wakeup_tracer(struct trace_array *tr)  {  	tracer_enabled = 0; -	stop_func_tracer(is_graph()); +	stop_func_tracer(tr, is_graph());  	unregister_trace_sched_switch(probe_wakeup_sched_switch, NULL);  	unregister_trace_sched_wakeup_new(probe_wakeup, NULL);  	unregister_trace_sched_wakeup(probe_wakeup, NULL);  	unregister_trace_sched_migrate_task(probe_wakeup_migrate_task, NULL);  } +static bool wakeup_busy; +  static int __wakeup_tracer_init(struct trace_array *tr)  {  	save_flags = trace_flags; @@ -589,24 +611,45 @@ static int __wakeup_tracer_init(struct trace_array *tr)  	set_tracer_flag(tr, TRACE_ITER_OVERWRITE, 1);  	set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, 1); -	tracing_max_latency = 0; +	tr->max_latency = 0;  	wakeup_trace = tr; +	ftrace_init_array_ops(tr, wakeup_tracer_call);  	start_wakeup_tracer(tr); + +	wakeup_busy = true;  	return 0;  }  static int wakeup_tracer_init(struct trace_array *tr)  { +	if (wakeup_busy) +		return -EBUSY; + +	wakeup_dl = 0;  	wakeup_rt = 0;  	return __wakeup_tracer_init(tr);  }  static int wakeup_rt_tracer_init(struct trace_array *tr)  { +	if (wakeup_busy) +		return -EBUSY; + +	wakeup_dl = 0;  	wakeup_rt = 1;  	return __wakeup_tracer_init(tr);  } +static int wakeup_dl_tracer_init(struct trace_array *tr) +{ +	if (wakeup_busy) +		return -EBUSY; + +	wakeup_dl = 1; +	wakeup_rt = 0; +	return __wakeup_tracer_init(tr); +} +  static void wakeup_tracer_reset(struct trace_array *tr)  {  	int lat_flag = save_flags & TRACE_ITER_LATENCY_FMT; @@ -618,6 +661,8 @@ static void wakeup_tracer_reset(struct trace_array *tr)  	set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, lat_flag);  	set_tracer_flag(tr, TRACE_ITER_OVERWRITE, overwrite_flag); +	ftrace_reset_array_ops(tr); +	wakeup_busy = false;  }  static void wakeup_tracer_start(struct trace_array *tr) @@ -649,6 +694,7 @@ static struct tracer wakeup_tracer __read_mostly =  #endif  	.open		= wakeup_trace_open,  	.close		= wakeup_trace_close, +	.allow_instances = true,  	.use_max_tr	= true,  }; @@ -659,7 +705,28 @@ static struct tracer wakeup_rt_tracer __read_mostly =  	.reset		= wakeup_tracer_reset,  	.start		= wakeup_tracer_start,  	.stop		= wakeup_tracer_stop, -	.wait_pipe	= poll_wait_pipe, +	.print_max	= true, +	.print_header	= wakeup_print_header, +	.print_line	= wakeup_print_line, +	.flags		= &tracer_flags, +	.set_flag	= wakeup_set_flag, +	.flag_changed	= wakeup_flag_changed, +#ifdef CONFIG_FTRACE_SELFTEST +	.selftest    = trace_selftest_startup_wakeup, +#endif +	.open		= wakeup_trace_open, +	.close		= wakeup_trace_close, +	.allow_instances = true, +	.use_max_tr	= true, +}; + +static struct tracer wakeup_dl_tracer __read_mostly = +{ +	.name		= "wakeup_dl", +	.init		= wakeup_dl_tracer_init, +	.reset		= wakeup_tracer_reset, +	.start		= wakeup_tracer_start, +	.stop		= wakeup_tracer_stop,  	.print_max	= true,  	.print_header	= wakeup_print_header,  	.print_line	= wakeup_print_line, @@ -686,6 +753,10 @@ __init static int init_wakeup_tracer(void)  	if (ret)  		return ret; +	ret = register_tracer(&wakeup_dl_tracer); +	if (ret) +		return ret; +  	return 0;  }  core_initcall(init_wakeup_tracer); diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index a7329b7902f..5ef60499dc8 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -65,7 +65,7 @@ static int trace_test_buffer(struct trace_buffer *buf, unsigned long *count)  	/* Don't allow flipping of max traces now */  	local_irq_save(flags); -	arch_spin_lock(&ftrace_max_lock); +	arch_spin_lock(&buf->tr->max_lock);  	cnt = ring_buffer_entries(buf->buffer); @@ -83,7 +83,7 @@ static int trace_test_buffer(struct trace_buffer *buf, unsigned long *count)  			break;  	}  	tracing_on(); -	arch_spin_unlock(&ftrace_max_lock); +	arch_spin_unlock(&buf->tr->max_lock);  	local_irq_restore(flags);  	if (count) @@ -161,11 +161,6 @@ static struct ftrace_ops test_probe3 = {  	.flags			= FTRACE_OPS_FL_RECURSION_SAFE,  }; -static struct ftrace_ops test_global = { -	.func		= trace_selftest_test_global_func, -	.flags		= FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE, -}; -  static void print_counts(void)  {  	printk("(%d %d %d %d %d) ", @@ -185,7 +180,7 @@ static void reset_counts(void)  	trace_selftest_test_dyn_cnt = 0;  } -static int trace_selftest_ops(int cnt) +static int trace_selftest_ops(struct trace_array *tr, int cnt)  {  	int save_ftrace_enabled = ftrace_enabled;  	struct ftrace_ops *dyn_ops; @@ -220,7 +215,11 @@ static int trace_selftest_ops(int cnt)  	register_ftrace_function(&test_probe1);  	register_ftrace_function(&test_probe2);  	register_ftrace_function(&test_probe3); -	register_ftrace_function(&test_global); +	/* First time we are running with main function */ +	if (cnt > 1) { +		ftrace_init_array_ops(tr, trace_selftest_test_global_func); +		register_ftrace_function(tr->ops); +	}  	DYN_FTRACE_TEST_NAME(); @@ -232,8 +231,10 @@ static int trace_selftest_ops(int cnt)  		goto out;  	if (trace_selftest_test_probe3_cnt != 1)  		goto out; -	if (trace_selftest_test_global_cnt == 0) -		goto out; +	if (cnt > 1) { +		if (trace_selftest_test_global_cnt == 0) +			goto out; +	}  	DYN_FTRACE_TEST_NAME2(); @@ -269,8 +270,10 @@ static int trace_selftest_ops(int cnt)  		goto out_free;  	if (trace_selftest_test_probe3_cnt != 3)  		goto out_free; -	if (trace_selftest_test_global_cnt == 0) -		goto out; +	if (cnt > 1) { +		if (trace_selftest_test_global_cnt == 0) +			goto out; +	}  	if (trace_selftest_test_dyn_cnt == 0)  		goto out_free; @@ -295,7 +298,9 @@ static int trace_selftest_ops(int cnt)  	unregister_ftrace_function(&test_probe1);  	unregister_ftrace_function(&test_probe2);  	unregister_ftrace_function(&test_probe3); -	unregister_ftrace_function(&test_global); +	if (cnt > 1) +		unregister_ftrace_function(tr->ops); +	ftrace_reset_array_ops(tr);  	/* Make sure everything is off */  	reset_counts(); @@ -315,9 +320,9 @@ static int trace_selftest_ops(int cnt)  }  /* Test dynamic code modification and ftrace filters */ -int trace_selftest_startup_dynamic_tracing(struct tracer *trace, -					   struct trace_array *tr, -					   int (*func)(void)) +static int trace_selftest_startup_dynamic_tracing(struct tracer *trace, +						  struct trace_array *tr, +						  int (*func)(void))  {  	int save_ftrace_enabled = ftrace_enabled;  	unsigned long count; @@ -388,7 +393,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,  	}  	/* Test the ops with global tracing running */ -	ret = trace_selftest_ops(1); +	ret = trace_selftest_ops(tr, 1);  	trace->reset(tr);   out: @@ -399,7 +404,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,  	/* Test the ops with global tracing off */  	if (!ret) -		ret = trace_selftest_ops(2); +		ret = trace_selftest_ops(tr, 2);  	return ret;  } @@ -802,7 +807,7 @@ out:  int  trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)  { -	unsigned long save_max = tracing_max_latency; +	unsigned long save_max = tr->max_latency;  	unsigned long count;  	int ret; @@ -814,7 +819,7 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)  	}  	/* reset the max latency */ -	tracing_max_latency = 0; +	tr->max_latency = 0;  	/* disable interrupts for a bit */  	local_irq_disable();  	udelay(100); @@ -841,7 +846,7 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)  		ret = -1;  	} -	tracing_max_latency = save_max; +	tr->max_latency = save_max;  	return ret;  } @@ -851,7 +856,7 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)  int  trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)  { -	unsigned long save_max = tracing_max_latency; +	unsigned long save_max = tr->max_latency;  	unsigned long count;  	int ret; @@ -876,7 +881,7 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)  	}  	/* reset the max latency */ -	tracing_max_latency = 0; +	tr->max_latency = 0;  	/* disable preemption for a bit */  	preempt_disable();  	udelay(100); @@ -903,7 +908,7 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)  		ret = -1;  	} -	tracing_max_latency = save_max; +	tr->max_latency = save_max;  	return ret;  } @@ -913,7 +918,7 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)  int  trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *tr)  { -	unsigned long save_max = tracing_max_latency; +	unsigned long save_max = tr->max_latency;  	unsigned long count;  	int ret; @@ -938,7 +943,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *  	}  	/* reset the max latency */ -	tracing_max_latency = 0; +	tr->max_latency = 0;  	/* disable preemption and interrupts for a bit */  	preempt_disable(); @@ -973,7 +978,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *  	}  	/* do the test by disabling interrupts first this time */ -	tracing_max_latency = 0; +	tr->max_latency = 0;  	tracing_start();  	trace->start(tr); @@ -1004,7 +1009,7 @@ out:  	tracing_start();  out_no_start:  	trace->reset(tr); -	tracing_max_latency = save_max; +	tr->max_latency = save_max;  	return ret;  } @@ -1022,11 +1027,16 @@ trace_selftest_startup_nop(struct tracer *trace, struct trace_array *tr)  #ifdef CONFIG_SCHED_TRACER  static int trace_wakeup_test_thread(void *data)  { -	/* Make this a RT thread, doesn't need to be too high */ -	static const struct sched_param param = { .sched_priority = 5 }; +	/* Make this a -deadline thread */ +	static const struct sched_attr attr = { +		.sched_policy = SCHED_DEADLINE, +		.sched_runtime = 100000ULL, +		.sched_deadline = 10000000ULL, +		.sched_period = 10000000ULL +	};  	struct completion *x = data; -	sched_setscheduler(current, SCHED_FIFO, ¶m); +	sched_setattr(current, &attr);  	/* Make it know we have a new prio */  	complete(x); @@ -1040,8 +1050,8 @@ static int trace_wakeup_test_thread(void *data)  	/* we are awake, now wait to disappear */  	while (!kthread_should_stop()) {  		/* -		 * This is an RT task, do short sleeps to let -		 * others run. +		 * This will likely be the system top priority +		 * task, do short sleeps to let others run.  		 */  		msleep(100);  	} @@ -1052,23 +1062,23 @@ static int trace_wakeup_test_thread(void *data)  int  trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)  { -	unsigned long save_max = tracing_max_latency; +	unsigned long save_max = tr->max_latency;  	struct task_struct *p; -	struct completion isrt; +	struct completion is_ready;  	unsigned long count;  	int ret; -	init_completion(&isrt); +	init_completion(&is_ready); -	/* create a high prio thread */ -	p = kthread_run(trace_wakeup_test_thread, &isrt, "ftrace-test"); +	/* create a -deadline thread */ +	p = kthread_run(trace_wakeup_test_thread, &is_ready, "ftrace-test");  	if (IS_ERR(p)) {  		printk(KERN_CONT "Failed to create ftrace wakeup test thread ");  		return -1;  	} -	/* make sure the thread is running at an RT prio */ -	wait_for_completion(&isrt); +	/* make sure the thread is running at -deadline policy */ +	wait_for_completion(&is_ready);  	/* start the tracing */  	ret = tracer_init(trace, tr); @@ -1078,23 +1088,23 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)  	}  	/* reset the max latency */ -	tracing_max_latency = 0; +	tr->max_latency = 0;  	while (p->on_rq) {  		/* -		 * Sleep to make sure the RT thread is asleep too. +		 * Sleep to make sure the -deadline thread is asleep too.  		 * On virtual machines we can't rely on timings,  		 * but we want to make sure this test still works.  		 */  		msleep(100);  	} -	init_completion(&isrt); +	init_completion(&is_ready);  	wake_up_process(p);  	/* Wait for the task to wake up */ -	wait_for_completion(&isrt); +	wait_for_completion(&is_ready);  	/* stop the tracing. */  	tracing_stop(); @@ -1108,7 +1118,7 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)  	trace->reset(tr);  	tracing_start(); -	tracing_max_latency = save_max; +	tr->max_latency = save_max;  	/* kill the thread */  	kthread_stop(p); diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index b20428c5efe..8a4e5cb66a4 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -13,6 +13,7 @@  #include <linux/sysctl.h>  #include <linux/init.h>  #include <linux/fs.h> +#include <linux/magic.h>  #include <asm/setup.h> @@ -50,11 +51,33 @@ static DEFINE_MUTEX(stack_sysctl_mutex);  int stack_tracer_enabled;  static int last_stack_tracer_enabled; +static inline void print_max_stack(void) +{ +	long i; +	int size; + +	pr_emerg("        Depth    Size   Location    (%d entries)\n" +			   "        -----    ----   --------\n", +			   max_stack_trace.nr_entries - 1); + +	for (i = 0; i < max_stack_trace.nr_entries; i++) { +		if (stack_dump_trace[i] == ULONG_MAX) +			break; +		if (i+1 == max_stack_trace.nr_entries || +				stack_dump_trace[i+1] == ULONG_MAX) +			size = stack_dump_index[i]; +		else +			size = stack_dump_index[i] - stack_dump_index[i+1]; + +		pr_emerg("%3ld) %8d   %5d   %pS\n", i, stack_dump_index[i], +				size, (void *)stack_dump_trace[i]); +	} +} +  static inline void  check_stack(unsigned long ip, unsigned long *stack)  { -	unsigned long this_size, flags; -	unsigned long *p, *top, *start; +	unsigned long this_size, flags; unsigned long *p, *top, *start;  	static int tracer_frame;  	int frame_size = ACCESS_ONCE(tracer_frame);  	int i; @@ -84,8 +107,12 @@ check_stack(unsigned long ip, unsigned long *stack)  	max_stack_size = this_size; -	max_stack_trace.nr_entries	= 0; -	max_stack_trace.skip		= 3; +	max_stack_trace.nr_entries = 0; + +	if (using_ftrace_ops_list_func()) +		max_stack_trace.skip = 4; +	else +		max_stack_trace.skip = 3;  	save_stack_trace(&max_stack_trace); @@ -144,6 +171,12 @@ check_stack(unsigned long ip, unsigned long *stack)  			i++;  	} +	if ((current != &init_task && +		*(end_of_stack(current)) != STACK_END_MAGIC)) { +		print_max_stack(); +		BUG(); +	} +   out:  	arch_spin_unlock(&max_stack_lock);  	local_irq_restore(flags); @@ -382,7 +415,7 @@ static const struct file_operations stack_trace_filter_fops = {  	.open = stack_trace_filter_open,  	.read = seq_read,  	.write = ftrace_filter_write, -	.llseek = ftrace_filter_lseek, +	.llseek = tracing_lseek,  	.release = ftrace_regex_release,  }; diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c index 847f88a6194..7af67360b33 100644 --- a/kernel/trace/trace_stat.c +++ b/kernel/trace/trace_stat.c @@ -43,46 +43,15 @@ static DEFINE_MUTEX(all_stat_sessions_mutex);  /* The root directory for all stat files */  static struct dentry		*stat_dir; -/* - * Iterate through the rbtree using a post order traversal path - * to release the next node. - * It won't necessary release one at each iteration - * but it will at least advance closer to the next one - * to be released. - */ -static struct rb_node *release_next(struct tracer_stat *ts, -				    struct rb_node *node) +static void __reset_stat_session(struct stat_session *session)  { -	struct stat_node *snode; -	struct rb_node *parent = rb_parent(node); - -	if (node->rb_left) -		return node->rb_left; -	else if (node->rb_right) -		return node->rb_right; -	else { -		if (!parent) -			; -		else if (parent->rb_left == node) -			parent->rb_left = NULL; -		else -			parent->rb_right = NULL; +	struct stat_node *snode, *n; -		snode = container_of(node, struct stat_node, node); -		if (ts->stat_release) -			ts->stat_release(snode->stat); +	rbtree_postorder_for_each_entry_safe(snode, n, &session->stat_root, node) { +		if (session->ts->stat_release) +			session->ts->stat_release(snode->stat);  		kfree(snode); - -		return parent;  	} -} - -static void __reset_stat_session(struct stat_session *session) -{ -	struct rb_node *node = session->stat_root.rb_node; - -	while (node) -		node = release_next(session->ts, node);  	session->stat_root = RB_ROOT;  } diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 559329d9bd2..759d5e00451 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -302,6 +302,7 @@ static int __init syscall_exit_define_fields(struct ftrace_event_call *call)  static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)  {  	struct trace_array *tr = data; +	struct ftrace_event_file *ftrace_file;  	struct syscall_trace_enter *entry;  	struct syscall_metadata *sys_data;  	struct ring_buffer_event *event; @@ -314,7 +315,13 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)  	syscall_nr = trace_get_syscall_nr(current, regs);  	if (syscall_nr < 0)  		return; -	if (!test_bit(syscall_nr, tr->enabled_enter_syscalls)) + +	/* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE) */ +	ftrace_file = rcu_dereference_sched(tr->enter_syscall_files[syscall_nr]); +	if (!ftrace_file) +		return; + +	if (ftrace_trigger_soft_disabled(ftrace_file))  		return;  	sys_data = syscall_nr_to_meta(syscall_nr); @@ -336,15 +343,14 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)  	entry->nr = syscall_nr;  	syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args); -	if (!filter_current_check_discard(buffer, sys_data->enter_event, -					  entry, event)) -		trace_current_buffer_unlock_commit(buffer, event, -						   irq_flags, pc); +	event_trigger_unlock_commit(ftrace_file, buffer, event, entry, +				    irq_flags, pc);  }  static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)  {  	struct trace_array *tr = data; +	struct ftrace_event_file *ftrace_file;  	struct syscall_trace_exit *entry;  	struct syscall_metadata *sys_data;  	struct ring_buffer_event *event; @@ -356,7 +362,13 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)  	syscall_nr = trace_get_syscall_nr(current, regs);  	if (syscall_nr < 0)  		return; -	if (!test_bit(syscall_nr, tr->enabled_exit_syscalls)) + +	/* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE()) */ +	ftrace_file = rcu_dereference_sched(tr->exit_syscall_files[syscall_nr]); +	if (!ftrace_file) +		return; + +	if (ftrace_trigger_soft_disabled(ftrace_file))  		return;  	sys_data = syscall_nr_to_meta(syscall_nr); @@ -377,10 +389,8 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)  	entry->nr = syscall_nr;  	entry->ret = syscall_get_return_value(current, regs); -	if (!filter_current_check_discard(buffer, sys_data->exit_event, -					  entry, event)) -		trace_current_buffer_unlock_commit(buffer, event, -						   irq_flags, pc); +	event_trigger_unlock_commit(ftrace_file, buffer, event, entry, +				    irq_flags, pc);  }  static int reg_event_syscall_enter(struct ftrace_event_file *file, @@ -397,7 +407,7 @@ static int reg_event_syscall_enter(struct ftrace_event_file *file,  	if (!tr->sys_refcount_enter)  		ret = register_trace_sys_enter(ftrace_syscall_enter, tr);  	if (!ret) { -		set_bit(num, tr->enabled_enter_syscalls); +		rcu_assign_pointer(tr->enter_syscall_files[num], file);  		tr->sys_refcount_enter++;  	}  	mutex_unlock(&syscall_trace_lock); @@ -415,7 +425,7 @@ static void unreg_event_syscall_enter(struct ftrace_event_file *file,  		return;  	mutex_lock(&syscall_trace_lock);  	tr->sys_refcount_enter--; -	clear_bit(num, tr->enabled_enter_syscalls); +	rcu_assign_pointer(tr->enter_syscall_files[num], NULL);  	if (!tr->sys_refcount_enter)  		unregister_trace_sys_enter(ftrace_syscall_enter, tr);  	mutex_unlock(&syscall_trace_lock); @@ -435,7 +445,7 @@ static int reg_event_syscall_exit(struct ftrace_event_file *file,  	if (!tr->sys_refcount_exit)  		ret = register_trace_sys_exit(ftrace_syscall_exit, tr);  	if (!ret) { -		set_bit(num, tr->enabled_exit_syscalls); +		rcu_assign_pointer(tr->exit_syscall_files[num], file);  		tr->sys_refcount_exit++;  	}  	mutex_unlock(&syscall_trace_lock); @@ -453,7 +463,7 @@ static void unreg_event_syscall_exit(struct ftrace_event_file *file,  		return;  	mutex_lock(&syscall_trace_lock);  	tr->sys_refcount_exit--; -	clear_bit(num, tr->enabled_exit_syscalls); +	rcu_assign_pointer(tr->exit_syscall_files[num], NULL);  	if (!tr->sys_refcount_exit)  		unregister_trace_sys_exit(ftrace_syscall_exit, tr);  	mutex_unlock(&syscall_trace_lock); diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 272261b5f94..3c9b97e6b1f 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -51,22 +51,17 @@ struct trace_uprobe_filter {   */  struct trace_uprobe {  	struct list_head		list; -	struct ftrace_event_class	class; -	struct ftrace_event_call	call;  	struct trace_uprobe_filter	filter;  	struct uprobe_consumer		consumer;  	struct inode			*inode;  	char				*filename;  	unsigned long			offset;  	unsigned long			nhit; -	unsigned int			flags;	/* For TP_FLAG_* */ -	ssize_t				size;	/* trace entry size */ -	unsigned int			nr_args; -	struct probe_arg		args[]; +	struct trace_probe		tp;  }; -#define SIZEOF_TRACE_UPROBE(n)			\ -	(offsetof(struct trace_uprobe, args) +	\ +#define SIZEOF_TRACE_UPROBE(n)				\ +	(offsetof(struct trace_uprobe, tp.args) +	\  	(sizeof(struct probe_arg) * (n)))  static int register_uprobe_event(struct trace_uprobe *tu); @@ -75,10 +70,151 @@ static int unregister_uprobe_event(struct trace_uprobe *tu);  static DEFINE_MUTEX(uprobe_lock);  static LIST_HEAD(uprobe_list); +struct uprobe_dispatch_data { +	struct trace_uprobe	*tu; +	unsigned long		bp_addr; +}; +  static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs);  static int uretprobe_dispatcher(struct uprobe_consumer *con,  				unsigned long func, struct pt_regs *regs); +#ifdef CONFIG_STACK_GROWSUP +static unsigned long adjust_stack_addr(unsigned long addr, unsigned int n) +{ +	return addr - (n * sizeof(long)); +} +#else +static unsigned long adjust_stack_addr(unsigned long addr, unsigned int n) +{ +	return addr + (n * sizeof(long)); +} +#endif + +static unsigned long get_user_stack_nth(struct pt_regs *regs, unsigned int n) +{ +	unsigned long ret; +	unsigned long addr = user_stack_pointer(regs); + +	addr = adjust_stack_addr(addr, n); + +	if (copy_from_user(&ret, (void __force __user *) addr, sizeof(ret))) +		return 0; + +	return ret; +} + +/* + * Uprobes-specific fetch functions + */ +#define DEFINE_FETCH_stack(type)					\ +static void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,		\ +					 void *offset, void *dest)	\ +{									\ +	*(type *)dest = (type)get_user_stack_nth(regs,			\ +					      ((unsigned long)offset)); \ +} +DEFINE_BASIC_FETCH_FUNCS(stack) +/* No string on the stack entry */ +#define fetch_stack_string	NULL +#define fetch_stack_string_size	NULL + +#define DEFINE_FETCH_memory(type)					\ +static void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,		\ +					  void *addr, void *dest)	\ +{									\ +	type retval;							\ +	void __user *vaddr = (void __force __user *) addr;		\ +									\ +	if (copy_from_user(&retval, vaddr, sizeof(type)))		\ +		*(type *)dest = 0;					\ +	else								\ +		*(type *) dest = retval;				\ +} +DEFINE_BASIC_FETCH_FUNCS(memory) +/* + * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max + * length and relative data location. + */ +static void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs, +					    void *addr, void *dest) +{ +	long ret; +	u32 rloc = *(u32 *)dest; +	int maxlen  = get_rloc_len(rloc); +	u8 *dst = get_rloc_data(dest); +	void __user *src = (void __force __user *) addr; + +	if (!maxlen) +		return; + +	ret = strncpy_from_user(dst, src, maxlen); + +	if (ret < 0) {	/* Failed to fetch string */ +		((u8 *)get_rloc_data(dest))[0] = '\0'; +		*(u32 *)dest = make_data_rloc(0, get_rloc_offs(rloc)); +	} else { +		*(u32 *)dest = make_data_rloc(ret, get_rloc_offs(rloc)); +	} +} + +static void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs, +						 void *addr, void *dest) +{ +	int len; +	void __user *vaddr = (void __force __user *) addr; + +	len = strnlen_user(vaddr, MAX_STRING_SIZE); + +	if (len == 0 || len > MAX_STRING_SIZE)  /* Failed to check length */ +		*(u32 *)dest = 0; +	else +		*(u32 *)dest = len; +} + +static unsigned long translate_user_vaddr(void *file_offset) +{ +	unsigned long base_addr; +	struct uprobe_dispatch_data *udd; + +	udd = (void *) current->utask->vaddr; + +	base_addr = udd->bp_addr - udd->tu->offset; +	return base_addr + (unsigned long)file_offset; +} + +#define DEFINE_FETCH_file_offset(type)					\ +static void FETCH_FUNC_NAME(file_offset, type)(struct pt_regs *regs,	\ +					       void *offset, void *dest)\ +{									\ +	void *vaddr = (void *)translate_user_vaddr(offset);		\ +									\ +	FETCH_FUNC_NAME(memory, type)(regs, vaddr, dest);		\ +} +DEFINE_BASIC_FETCH_FUNCS(file_offset) +DEFINE_FETCH_file_offset(string) +DEFINE_FETCH_file_offset(string_size) + +/* Fetch type information table */ +const struct fetch_type uprobes_fetch_type_table[] = { +	/* Special types */ +	[FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string, +					sizeof(u32), 1, "__data_loc char[]"), +	[FETCH_TYPE_STRSIZE] = __ASSIGN_FETCH_TYPE("string_size", u32, +					string_size, sizeof(u32), 0, "u32"), +	/* Basic types */ +	ASSIGN_FETCH_TYPE(u8,  u8,  0), +	ASSIGN_FETCH_TYPE(u16, u16, 0), +	ASSIGN_FETCH_TYPE(u32, u32, 0), +	ASSIGN_FETCH_TYPE(u64, u64, 0), +	ASSIGN_FETCH_TYPE(s8,  u8,  1), +	ASSIGN_FETCH_TYPE(s16, u16, 1), +	ASSIGN_FETCH_TYPE(s32, u32, 1), +	ASSIGN_FETCH_TYPE(s64, u64, 1), + +	ASSIGN_FETCH_TYPE_END +}; +  static inline void init_trace_uprobe_filter(struct trace_uprobe_filter *filter)  {  	rwlock_init(&filter->rwlock); @@ -114,24 +250,26 @@ alloc_trace_uprobe(const char *group, const char *event, int nargs, bool is_ret)  	if (!tu)  		return ERR_PTR(-ENOMEM); -	tu->call.class = &tu->class; -	tu->call.name = kstrdup(event, GFP_KERNEL); -	if (!tu->call.name) +	tu->tp.call.class = &tu->tp.class; +	tu->tp.call.name = kstrdup(event, GFP_KERNEL); +	if (!tu->tp.call.name)  		goto error; -	tu->class.system = kstrdup(group, GFP_KERNEL); -	if (!tu->class.system) +	tu->tp.class.system = kstrdup(group, GFP_KERNEL); +	if (!tu->tp.class.system)  		goto error;  	INIT_LIST_HEAD(&tu->list); +	INIT_LIST_HEAD(&tu->tp.files);  	tu->consumer.handler = uprobe_dispatcher;  	if (is_ret)  		tu->consumer.ret_handler = uretprobe_dispatcher;  	init_trace_uprobe_filter(&tu->filter); +	tu->tp.call.flags |= TRACE_EVENT_FL_USE_CALL_FILTER;  	return tu;  error: -	kfree(tu->call.name); +	kfree(tu->tp.call.name);  	kfree(tu);  	return ERR_PTR(-ENOMEM); @@ -141,12 +279,12 @@ static void free_trace_uprobe(struct trace_uprobe *tu)  {  	int i; -	for (i = 0; i < tu->nr_args; i++) -		traceprobe_free_probe_arg(&tu->args[i]); +	for (i = 0; i < tu->tp.nr_args; i++) +		traceprobe_free_probe_arg(&tu->tp.args[i]);  	iput(tu->inode); -	kfree(tu->call.class->system); -	kfree(tu->call.name); +	kfree(tu->tp.call.class->system); +	kfree(tu->tp.call.name);  	kfree(tu->filename);  	kfree(tu);  } @@ -156,8 +294,8 @@ static struct trace_uprobe *find_probe_event(const char *event, const char *grou  	struct trace_uprobe *tu;  	list_for_each_entry(tu, &uprobe_list, list) -		if (strcmp(tu->call.name, event) == 0 && -		    strcmp(tu->call.class->system, group) == 0) +		if (strcmp(ftrace_event_name(&tu->tp.call), event) == 0 && +		    strcmp(tu->tp.call.class->system, group) == 0)  			return tu;  	return NULL; @@ -180,16 +318,17 @@ static int unregister_trace_uprobe(struct trace_uprobe *tu)  /* Register a trace_uprobe and probe_event */  static int register_trace_uprobe(struct trace_uprobe *tu)  { -	struct trace_uprobe *old_tp; +	struct trace_uprobe *old_tu;  	int ret;  	mutex_lock(&uprobe_lock);  	/* register as an event */ -	old_tp = find_probe_event(tu->call.name, tu->call.class->system); -	if (old_tp) { +	old_tu = find_probe_event(ftrace_event_name(&tu->tp.call), +			tu->tp.call.class->system); +	if (old_tu) {  		/* delete old event */ -		ret = unregister_trace_uprobe(old_tp); +		ret = unregister_trace_uprobe(old_tu);  		if (ret)  			goto end;  	} @@ -210,7 +349,7 @@ end:  /*   * Argument syntax: - *  - Add uprobe: p|r[:[GRP/]EVENT] PATH:SYMBOL [FETCHARGS] + *  - Add uprobe: p|r[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS]   *   *  - Remove uprobe: -:[GRP/]EVENT   */ @@ -359,34 +498,36 @@ static int create_trace_uprobe(int argc, char **argv)  	/* parse arguments */  	ret = 0;  	for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) { +		struct probe_arg *parg = &tu->tp.args[i]; +  		/* Increment count for freeing args in error case */ -		tu->nr_args++; +		tu->tp.nr_args++;  		/* Parse argument name */  		arg = strchr(argv[i], '=');  		if (arg) {  			*arg++ = '\0'; -			tu->args[i].name = kstrdup(argv[i], GFP_KERNEL); +			parg->name = kstrdup(argv[i], GFP_KERNEL);  		} else {  			arg = argv[i];  			/* If argument name is omitted, set "argN" */  			snprintf(buf, MAX_EVENT_NAME_LEN, "arg%d", i + 1); -			tu->args[i].name = kstrdup(buf, GFP_KERNEL); +			parg->name = kstrdup(buf, GFP_KERNEL);  		} -		if (!tu->args[i].name) { +		if (!parg->name) {  			pr_info("Failed to allocate argument[%d] name.\n", i);  			ret = -ENOMEM;  			goto error;  		} -		if (!is_good_name(tu->args[i].name)) { -			pr_info("Invalid argument[%d] name: %s\n", i, tu->args[i].name); +		if (!is_good_name(parg->name)) { +			pr_info("Invalid argument[%d] name: %s\n", i, parg->name);  			ret = -EINVAL;  			goto error;  		} -		if (traceprobe_conflict_field_name(tu->args[i].name, tu->args, i)) { +		if (traceprobe_conflict_field_name(parg->name, tu->tp.args, i)) {  			pr_info("Argument[%d] name '%s' conflicts with "  				"another field.\n", i, argv[i]);  			ret = -EINVAL; @@ -394,7 +535,8 @@ static int create_trace_uprobe(int argc, char **argv)  		}  		/* Parse fetch argument */ -		ret = traceprobe_parse_probe_arg(arg, &tu->size, &tu->args[i], false, false); +		ret = traceprobe_parse_probe_arg(arg, &tu->tp.size, parg, +						 is_return, false);  		if (ret) {  			pr_info("Parse error at argument[%d]. (%d)\n", i, ret);  			goto error; @@ -458,11 +600,12 @@ static int probes_seq_show(struct seq_file *m, void *v)  	char c = is_ret_probe(tu) ? 'r' : 'p';  	int i; -	seq_printf(m, "%c:%s/%s", c, tu->call.class->system, tu->call.name); +	seq_printf(m, "%c:%s/%s", c, tu->tp.call.class->system, +			ftrace_event_name(&tu->tp.call));  	seq_printf(m, " %s:0x%p", tu->filename, (void *)tu->offset); -	for (i = 0; i < tu->nr_args; i++) -		seq_printf(m, " %s=%s", tu->args[i].name, tu->args[i].comm); +	for (i = 0; i < tu->tp.nr_args; i++) +		seq_printf(m, " %s=%s", tu->tp.args[i].name, tu->tp.args[i].comm);  	seq_printf(m, "\n");  	return 0; @@ -508,7 +651,8 @@ static int probes_profile_seq_show(struct seq_file *m, void *v)  {  	struct trace_uprobe *tu = v; -	seq_printf(m, "  %s %-44s %15lu\n", tu->filename, tu->call.name, tu->nhit); +	seq_printf(m, "  %s %-44s %15lu\n", tu->filename, +			ftrace_event_name(&tu->tp.call), tu->nhit);  	return 0;  } @@ -532,19 +676,122 @@ static const struct file_operations uprobe_profile_ops = {  	.release	= seq_release,  }; -static void uprobe_trace_print(struct trace_uprobe *tu, -				unsigned long func, struct pt_regs *regs) +struct uprobe_cpu_buffer { +	struct mutex mutex; +	void *buf; +}; +static struct uprobe_cpu_buffer __percpu *uprobe_cpu_buffer; +static int uprobe_buffer_refcnt; + +static int uprobe_buffer_init(void) +{ +	int cpu, err_cpu; + +	uprobe_cpu_buffer = alloc_percpu(struct uprobe_cpu_buffer); +	if (uprobe_cpu_buffer == NULL) +		return -ENOMEM; + +	for_each_possible_cpu(cpu) { +		struct page *p = alloc_pages_node(cpu_to_node(cpu), +						  GFP_KERNEL, 0); +		if (p == NULL) { +			err_cpu = cpu; +			goto err; +		} +		per_cpu_ptr(uprobe_cpu_buffer, cpu)->buf = page_address(p); +		mutex_init(&per_cpu_ptr(uprobe_cpu_buffer, cpu)->mutex); +	} + +	return 0; + +err: +	for_each_possible_cpu(cpu) { +		if (cpu == err_cpu) +			break; +		free_page((unsigned long)per_cpu_ptr(uprobe_cpu_buffer, cpu)->buf); +	} + +	free_percpu(uprobe_cpu_buffer); +	return -ENOMEM; +} + +static int uprobe_buffer_enable(void) +{ +	int ret = 0; + +	BUG_ON(!mutex_is_locked(&event_mutex)); + +	if (uprobe_buffer_refcnt++ == 0) { +		ret = uprobe_buffer_init(); +		if (ret < 0) +			uprobe_buffer_refcnt--; +	} + +	return ret; +} + +static void uprobe_buffer_disable(void) +{ +	int cpu; + +	BUG_ON(!mutex_is_locked(&event_mutex)); + +	if (--uprobe_buffer_refcnt == 0) { +		for_each_possible_cpu(cpu) +			free_page((unsigned long)per_cpu_ptr(uprobe_cpu_buffer, +							     cpu)->buf); + +		free_percpu(uprobe_cpu_buffer); +		uprobe_cpu_buffer = NULL; +	} +} + +static struct uprobe_cpu_buffer *uprobe_buffer_get(void) +{ +	struct uprobe_cpu_buffer *ucb; +	int cpu; + +	cpu = raw_smp_processor_id(); +	ucb = per_cpu_ptr(uprobe_cpu_buffer, cpu); + +	/* +	 * Use per-cpu buffers for fastest access, but we might migrate +	 * so the mutex makes sure we have sole access to it. +	 */ +	mutex_lock(&ucb->mutex); + +	return ucb; +} + +static void uprobe_buffer_put(struct uprobe_cpu_buffer *ucb) +{ +	mutex_unlock(&ucb->mutex); +} + +static void __uprobe_trace_func(struct trace_uprobe *tu, +				unsigned long func, struct pt_regs *regs, +				struct uprobe_cpu_buffer *ucb, int dsize, +				struct ftrace_event_file *ftrace_file)  {  	struct uprobe_trace_entry_head *entry;  	struct ring_buffer_event *event;  	struct ring_buffer *buffer;  	void *data; -	int size, i; -	struct ftrace_event_call *call = &tu->call; +	int size, esize; +	struct ftrace_event_call *call = &tu->tp.call; + +	WARN_ON(call != ftrace_file->event_call); + +	if (WARN_ON_ONCE(tu->tp.size + dsize > PAGE_SIZE)) +		return; -	size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); -	event = trace_current_buffer_lock_reserve(&buffer, call->event.type, -						  size + tu->size, 0, 0); +	if (ftrace_trigger_soft_disabled(ftrace_file)) +		return; + +	esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); +	size = esize + tu->tp.size + dsize; +	event = trace_event_buffer_lock_reserve(&buffer, ftrace_file, +						call->event.type, size, 0, 0);  	if (!event)  		return; @@ -558,25 +805,38 @@ static void uprobe_trace_print(struct trace_uprobe *tu,  		data = DATAOF_TRACE_ENTRY(entry, false);  	} -	for (i = 0; i < tu->nr_args; i++) -		call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); +	memcpy(data, ucb->buf, tu->tp.size + dsize); -	if (!filter_current_check_discard(buffer, call, entry, event)) -		trace_buffer_unlock_commit(buffer, event, 0, 0); +	event_trigger_unlock_commit(ftrace_file, buffer, event, entry, 0, 0);  }  /* uprobe handler */ -static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs) +static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs, +			     struct uprobe_cpu_buffer *ucb, int dsize)  { -	if (!is_ret_probe(tu)) -		uprobe_trace_print(tu, 0, regs); +	struct event_file_link *link; + +	if (is_ret_probe(tu)) +		return 0; + +	rcu_read_lock(); +	list_for_each_entry_rcu(link, &tu->tp.files, list) +		__uprobe_trace_func(tu, 0, regs, ucb, dsize, link->file); +	rcu_read_unlock(); +  	return 0;  }  static void uretprobe_trace_func(struct trace_uprobe *tu, unsigned long func, -				struct pt_regs *regs) +				 struct pt_regs *regs, +				 struct uprobe_cpu_buffer *ucb, int dsize)  { -	uprobe_trace_print(tu, func, regs); +	struct event_file_link *link; + +	rcu_read_lock(); +	list_for_each_entry_rcu(link, &tu->tp.files, list) +		__uprobe_trace_func(tu, func, regs, ucb, dsize, link->file); +	rcu_read_unlock();  }  /* Event entry printers */ @@ -590,23 +850,26 @@ print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *e  	int i;  	entry = (struct uprobe_trace_entry_head *)iter->ent; -	tu = container_of(event, struct trace_uprobe, call.event); +	tu = container_of(event, struct trace_uprobe, tp.call.event);  	if (is_ret_probe(tu)) { -		if (!trace_seq_printf(s, "%s: (0x%lx <- 0x%lx)", tu->call.name, +		if (!trace_seq_printf(s, "%s: (0x%lx <- 0x%lx)", +					ftrace_event_name(&tu->tp.call),  					entry->vaddr[1], entry->vaddr[0]))  			goto partial;  		data = DATAOF_TRACE_ENTRY(entry, true);  	} else { -		if (!trace_seq_printf(s, "%s: (0x%lx)", tu->call.name, +		if (!trace_seq_printf(s, "%s: (0x%lx)", +					ftrace_event_name(&tu->tp.call),  					entry->vaddr[0]))  			goto partial;  		data = DATAOF_TRACE_ENTRY(entry, false);  	} -	for (i = 0; i < tu->nr_args; i++) { -		if (!tu->args[i].type->print(s, tu->args[i].name, -					     data + tu->args[i].offset, entry)) +	for (i = 0; i < tu->tp.nr_args; i++) { +		struct probe_arg *parg = &tu->tp.args[i]; + +		if (!parg->type->print(s, parg->name, data + parg->offset, entry))  			goto partial;  	} @@ -617,43 +880,95 @@ partial:  	return TRACE_TYPE_PARTIAL_LINE;  } -static inline bool is_trace_uprobe_enabled(struct trace_uprobe *tu) -{ -	return tu->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE); -} -  typedef bool (*filter_func_t)(struct uprobe_consumer *self,  				enum uprobe_filter_ctx ctx,  				struct mm_struct *mm);  static int -probe_event_enable(struct trace_uprobe *tu, int flag, filter_func_t filter) +probe_event_enable(struct trace_uprobe *tu, struct ftrace_event_file *file, +		   filter_func_t filter)  { -	int ret = 0; +	bool enabled = trace_probe_is_enabled(&tu->tp); +	struct event_file_link *link = NULL; +	int ret; + +	if (file) { +		if (tu->tp.flags & TP_FLAG_PROFILE) +			return -EINTR; + +		link = kmalloc(sizeof(*link), GFP_KERNEL); +		if (!link) +			return -ENOMEM; + +		link->file = file; +		list_add_tail_rcu(&link->list, &tu->tp.files); -	if (is_trace_uprobe_enabled(tu)) -		return -EINTR; +		tu->tp.flags |= TP_FLAG_TRACE; +	} else { +		if (tu->tp.flags & TP_FLAG_TRACE) +			return -EINTR; + +		tu->tp.flags |= TP_FLAG_PROFILE; +	}  	WARN_ON(!uprobe_filter_is_empty(&tu->filter)); -	tu->flags |= flag; +	if (enabled) +		return 0; + +	ret = uprobe_buffer_enable(); +	if (ret) +		goto err_flags; +  	tu->consumer.filter = filter;  	ret = uprobe_register(tu->inode, tu->offset, &tu->consumer);  	if (ret) -		tu->flags &= ~flag; +		goto err_buffer; + +	return 0; + err_buffer: +	uprobe_buffer_disable(); + + err_flags: +	if (file) { +		list_del(&link->list); +		kfree(link); +		tu->tp.flags &= ~TP_FLAG_TRACE; +	} else { +		tu->tp.flags &= ~TP_FLAG_PROFILE; +	}  	return ret;  } -static void probe_event_disable(struct trace_uprobe *tu, int flag) +static void +probe_event_disable(struct trace_uprobe *tu, struct ftrace_event_file *file)  { -	if (!is_trace_uprobe_enabled(tu)) +	if (!trace_probe_is_enabled(&tu->tp))  		return; +	if (file) { +		struct event_file_link *link; + +		link = find_event_file_link(&tu->tp, file); +		if (!link) +			return; + +		list_del_rcu(&link->list); +		/* synchronize with u{,ret}probe_trace_func */ +		synchronize_sched(); +		kfree(link); + +		if (!list_empty(&tu->tp.files)) +			return; +	} +  	WARN_ON(!uprobe_filter_is_empty(&tu->filter));  	uprobe_unregister(tu->inode, tu->offset, &tu->consumer); -	tu->flags &= ~flag; +	tu->tp.flags &= file ? ~TP_FLAG_TRACE : ~TP_FLAG_PROFILE; + +	uprobe_buffer_disable();  }  static int uprobe_event_define_fields(struct ftrace_event_call *event_call) @@ -671,12 +986,12 @@ static int uprobe_event_define_fields(struct ftrace_event_call *event_call)  		size = SIZEOF_TRACE_ENTRY(false);  	}  	/* Set argument names as fields */ -	for (i = 0; i < tu->nr_args; i++) { -		ret = trace_define_field(event_call, tu->args[i].type->fmttype, -					 tu->args[i].name, -					 size + tu->args[i].offset, -					 tu->args[i].type->size, -					 tu->args[i].type->is_signed, +	for (i = 0; i < tu->tp.nr_args; i++) { +		struct probe_arg *parg = &tu->tp.args[i]; + +		ret = trace_define_field(event_call, parg->type->fmttype, +					 parg->name, size + parg->offset, +					 parg->type->size, parg->type->is_signed,  					 FILTER_OTHER);  		if (ret) @@ -685,59 +1000,6 @@ static int uprobe_event_define_fields(struct ftrace_event_call *event_call)  	return 0;  } -#define LEN_OR_ZERO		(len ? len - pos : 0) -static int __set_print_fmt(struct trace_uprobe *tu, char *buf, int len) -{ -	const char *fmt, *arg; -	int i; -	int pos = 0; - -	if (is_ret_probe(tu)) { -		fmt = "(%lx <- %lx)"; -		arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP; -	} else { -		fmt = "(%lx)"; -		arg = "REC->" FIELD_STRING_IP; -	} - -	/* When len=0, we just calculate the needed length */ - -	pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt); - -	for (i = 0; i < tu->nr_args; i++) { -		pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%s", -				tu->args[i].name, tu->args[i].type->fmt); -	} - -	pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg); - -	for (i = 0; i < tu->nr_args; i++) { -		pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s", -				tu->args[i].name); -	} - -	return pos;	/* return the length of print_fmt */ -} -#undef LEN_OR_ZERO - -static int set_print_fmt(struct trace_uprobe *tu) -{ -	char *print_fmt; -	int len; - -	/* First: called with 0 length to calculate the needed length */ -	len = __set_print_fmt(tu, NULL, 0); -	print_fmt = kmalloc(len + 1, GFP_KERNEL); -	if (!print_fmt) -		return -ENOMEM; - -	/* Second: actually write the @print_fmt */ -	__set_print_fmt(tu, print_fmt, len + 1); -	tu->call.print_fmt = print_fmt; - -	return 0; -} -  #ifdef CONFIG_PERF_EVENTS  static bool  __uprobe_perf_filter(struct trace_uprobe_filter *filter, struct mm_struct *mm) @@ -761,56 +1023,60 @@ uprobe_filter_event(struct trace_uprobe *tu, struct perf_event *event)  	return __uprobe_perf_filter(&tu->filter, event->hw.tp_target->mm);  } -static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event) +static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event)  {  	bool done;  	write_lock(&tu->filter.rwlock);  	if (event->hw.tp_target) { -		/* -		 * event->parent != NULL means copy_process(), we can avoid -		 * uprobe_apply(). current->mm must be probed and we can rely -		 * on dup_mmap() which preserves the already installed bp's. -		 * -		 * attr.enable_on_exec means that exec/mmap will install the -		 * breakpoints we need. -		 */ +		list_del(&event->hw.tp_list);  		done = tu->filter.nr_systemwide || -			event->parent || event->attr.enable_on_exec || +			(event->hw.tp_target->flags & PF_EXITING) ||  			uprobe_filter_event(tu, event); -		list_add(&event->hw.tp_list, &tu->filter.perf_events);  	} else { +		tu->filter.nr_systemwide--;  		done = tu->filter.nr_systemwide; -		tu->filter.nr_systemwide++;  	}  	write_unlock(&tu->filter.rwlock);  	if (!done) -		uprobe_apply(tu->inode, tu->offset, &tu->consumer, true); +		return uprobe_apply(tu->inode, tu->offset, &tu->consumer, false);  	return 0;  } -static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event) +static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event)  {  	bool done; +	int err;  	write_lock(&tu->filter.rwlock);  	if (event->hw.tp_target) { -		list_del(&event->hw.tp_list); +		/* +		 * event->parent != NULL means copy_process(), we can avoid +		 * uprobe_apply(). current->mm must be probed and we can rely +		 * on dup_mmap() which preserves the already installed bp's. +		 * +		 * attr.enable_on_exec means that exec/mmap will install the +		 * breakpoints we need. +		 */  		done = tu->filter.nr_systemwide || -			(event->hw.tp_target->flags & PF_EXITING) || +			event->parent || event->attr.enable_on_exec ||  			uprobe_filter_event(tu, event); +		list_add(&event->hw.tp_list, &tu->filter.perf_events);  	} else { -		tu->filter.nr_systemwide--;  		done = tu->filter.nr_systemwide; +		tu->filter.nr_systemwide++;  	}  	write_unlock(&tu->filter.rwlock); -	if (!done) -		uprobe_apply(tu->inode, tu->offset, &tu->consumer, false); - -	return 0; +	err = 0; +	if (!done) { +		err = uprobe_apply(tu->inode, tu->offset, &tu->consumer, true); +		if (err) +			uprobe_perf_close(tu, event); +	} +	return err;  }  static bool uprobe_perf_filter(struct uprobe_consumer *uc, @@ -827,17 +1093,23 @@ static bool uprobe_perf_filter(struct uprobe_consumer *uc,  	return ret;  } -static void uprobe_perf_print(struct trace_uprobe *tu, -				unsigned long func, struct pt_regs *regs) +static void __uprobe_perf_func(struct trace_uprobe *tu, +			       unsigned long func, struct pt_regs *regs, +			       struct uprobe_cpu_buffer *ucb, int dsize)  { -	struct ftrace_event_call *call = &tu->call; +	struct ftrace_event_call *call = &tu->tp.call;  	struct uprobe_trace_entry_head *entry;  	struct hlist_head *head;  	void *data; -	int size, rctx, i; +	int size, esize; +	int rctx; -	size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); -	size = ALIGN(size + tu->size + sizeof(u32), sizeof(u64)) - sizeof(u32); +	esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); + +	size = esize + tu->tp.size + dsize; +	size = ALIGN(size + sizeof(u32), sizeof(u64)) - sizeof(u32); +	if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough")) +		return;  	preempt_disable();  	head = this_cpu_ptr(call->perf_events); @@ -857,8 +1129,13 @@ static void uprobe_perf_print(struct trace_uprobe *tu,  		data = DATAOF_TRACE_ENTRY(entry, false);  	} -	for (i = 0; i < tu->nr_args; i++) -		call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); +	memcpy(data, ucb->buf, tu->tp.size + dsize); + +	if (size - esize > tu->tp.size + dsize) { +		int len = tu->tp.size + dsize; + +		memset(data + len, 0, size - esize - len); +	}  	perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL);   out: @@ -866,42 +1143,46 @@ static void uprobe_perf_print(struct trace_uprobe *tu,  }  /* uprobe profile handler */ -static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs) +static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs, +			    struct uprobe_cpu_buffer *ucb, int dsize)  {  	if (!uprobe_perf_filter(&tu->consumer, 0, current->mm))  		return UPROBE_HANDLER_REMOVE;  	if (!is_ret_probe(tu)) -		uprobe_perf_print(tu, 0, regs); +		__uprobe_perf_func(tu, 0, regs, ucb, dsize);  	return 0;  }  static void uretprobe_perf_func(struct trace_uprobe *tu, unsigned long func, -				struct pt_regs *regs) +				struct pt_regs *regs, +				struct uprobe_cpu_buffer *ucb, int dsize)  { -	uprobe_perf_print(tu, func, regs); +	__uprobe_perf_func(tu, func, regs, ucb, dsize);  }  #endif	/* CONFIG_PERF_EVENTS */ -static -int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type, void *data) +static int +trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type, +		      void *data)  {  	struct trace_uprobe *tu = event->data; +	struct ftrace_event_file *file = data;  	switch (type) {  	case TRACE_REG_REGISTER: -		return probe_event_enable(tu, TP_FLAG_TRACE, NULL); +		return probe_event_enable(tu, file, NULL);  	case TRACE_REG_UNREGISTER: -		probe_event_disable(tu, TP_FLAG_TRACE); +		probe_event_disable(tu, file);  		return 0;  #ifdef CONFIG_PERF_EVENTS  	case TRACE_REG_PERF_REGISTER: -		return probe_event_enable(tu, TP_FLAG_PROFILE, uprobe_perf_filter); +		return probe_event_enable(tu, NULL, uprobe_perf_filter);  	case TRACE_REG_PERF_UNREGISTER: -		probe_event_disable(tu, TP_FLAG_PROFILE); +		probe_event_disable(tu, NULL);  		return 0;  	case TRACE_REG_PERF_OPEN: @@ -920,18 +1201,37 @@ int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type,  static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)  {  	struct trace_uprobe *tu; +	struct uprobe_dispatch_data udd; +	struct uprobe_cpu_buffer *ucb; +	int dsize, esize;  	int ret = 0; +  	tu = container_of(con, struct trace_uprobe, consumer);  	tu->nhit++; -	if (tu->flags & TP_FLAG_TRACE) -		ret |= uprobe_trace_func(tu, regs); +	udd.tu = tu; +	udd.bp_addr = instruction_pointer(regs); + +	current->utask->vaddr = (unsigned long) &udd; + +	if (WARN_ON_ONCE(!uprobe_cpu_buffer)) +		return 0; + +	dsize = __get_data_size(&tu->tp, regs); +	esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); + +	ucb = uprobe_buffer_get(); +	store_trace_args(esize, &tu->tp, regs, ucb->buf, dsize); + +	if (tu->tp.flags & TP_FLAG_TRACE) +		ret |= uprobe_trace_func(tu, regs, ucb, dsize);  #ifdef CONFIG_PERF_EVENTS -	if (tu->flags & TP_FLAG_PROFILE) -		ret |= uprobe_perf_func(tu, regs); +	if (tu->tp.flags & TP_FLAG_PROFILE) +		ret |= uprobe_perf_func(tu, regs, ucb, dsize);  #endif +	uprobe_buffer_put(ucb);  	return ret;  } @@ -939,16 +1239,34 @@ static int uretprobe_dispatcher(struct uprobe_consumer *con,  				unsigned long func, struct pt_regs *regs)  {  	struct trace_uprobe *tu; +	struct uprobe_dispatch_data udd; +	struct uprobe_cpu_buffer *ucb; +	int dsize, esize;  	tu = container_of(con, struct trace_uprobe, consumer); -	if (tu->flags & TP_FLAG_TRACE) -		uretprobe_trace_func(tu, func, regs); +	udd.tu = tu; +	udd.bp_addr = func; + +	current->utask->vaddr = (unsigned long) &udd; + +	if (WARN_ON_ONCE(!uprobe_cpu_buffer)) +		return 0; + +	dsize = __get_data_size(&tu->tp, regs); +	esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); + +	ucb = uprobe_buffer_get(); +	store_trace_args(esize, &tu->tp, regs, ucb->buf, dsize); + +	if (tu->tp.flags & TP_FLAG_TRACE) +		uretprobe_trace_func(tu, func, regs, ucb, dsize);  #ifdef CONFIG_PERF_EVENTS -	if (tu->flags & TP_FLAG_PROFILE) -		uretprobe_perf_func(tu, func, regs); +	if (tu->tp.flags & TP_FLAG_PROFILE) +		uretprobe_perf_func(tu, func, regs, ucb, dsize);  #endif +	uprobe_buffer_put(ucb);  	return 0;  } @@ -958,7 +1276,7 @@ static struct trace_event_functions uprobe_funcs = {  static int register_uprobe_event(struct trace_uprobe *tu)  { -	struct ftrace_event_call *call = &tu->call; +	struct ftrace_event_call *call = &tu->tp.call;  	int ret;  	/* Initialize ftrace_event_call */ @@ -966,7 +1284,7 @@ static int register_uprobe_event(struct trace_uprobe *tu)  	call->event.funcs = &uprobe_funcs;  	call->class->define_fields = uprobe_event_define_fields; -	if (set_print_fmt(tu) < 0) +	if (set_print_fmt(&tu->tp, is_ret_probe(tu)) < 0)  		return -ENOMEM;  	ret = register_ftrace_event(&call->event); @@ -980,7 +1298,8 @@ static int register_uprobe_event(struct trace_uprobe *tu)  	ret = trace_add_event_call(call);  	if (ret) { -		pr_info("Failed to register uprobe event: %s\n", call->name); +		pr_info("Failed to register uprobe event: %s\n", +			ftrace_event_name(call));  		kfree(call->print_fmt);  		unregister_ftrace_event(&call->event);  	} @@ -993,11 +1312,11 @@ static int unregister_uprobe_event(struct trace_uprobe *tu)  	int ret;  	/* tu->event is unregistered in trace_remove_event_call() */ -	ret = trace_remove_event_call(&tu->call); +	ret = trace_remove_event_call(&tu->tp.call);  	if (ret)  		return ret; -	kfree(tu->call.print_fmt); -	tu->call.print_fmt = NULL; +	kfree(tu->tp.call.print_fmt); +	tu->tp.call.print_fmt = NULL;  	return 0;  }  | 
