diff options
Diffstat (limited to 'kernel/trace/ftrace.c')
| -rw-r--r-- | kernel/trace/ftrace.c | 3161 | 
1 files changed, 2446 insertions, 715 deletions
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index f3dadae8388..ac9d1dad630 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -10,7 +10,7 @@   * Based on code in the latency_tracer, that is:   *   *  Copyright (C) 2004-2006 Ingo Molnar - *  Copyright (C) 2004 William Lee Irwin III + *  Copyright (C) 2004 Nadia Yvette Chambers   */  #include <linux/stop_machine.h> @@ -22,44 +22,71 @@  #include <linux/hardirq.h>  #include <linux/kthread.h>  #include <linux/uaccess.h> +#include <linux/bsearch.h> +#include <linux/module.h>  #include <linux/ftrace.h>  #include <linux/sysctl.h>  #include <linux/slab.h>  #include <linux/ctype.h> +#include <linux/sort.h>  #include <linux/list.h>  #include <linux/hash.h>  #include <linux/rcupdate.h>  #include <trace/events/sched.h> -#include <asm/ftrace.h>  #include <asm/setup.h>  #include "trace_output.h"  #include "trace_stat.h"  #define FTRACE_WARN_ON(cond)			\ -	do {					\ -		if (WARN_ON(cond))		\ +	({					\ +		int ___r = cond;		\ +		if (WARN_ON(___r))		\  			ftrace_kill();		\ -	} while (0) +		___r;				\ +	})  #define FTRACE_WARN_ON_ONCE(cond)		\ -	do {					\ -		if (WARN_ON_ONCE(cond))		\ +	({					\ +		int ___r = cond;		\ +		if (WARN_ON_ONCE(___r))		\  			ftrace_kill();		\ -	} while (0) +		___r;				\ +	})  /* hash bits for specific function selection */  #define FTRACE_HASH_BITS 7  #define FTRACE_FUNC_HASHSIZE (1 << FTRACE_HASH_BITS) +#define FTRACE_HASH_DEFAULT_BITS 10 +#define FTRACE_HASH_MAX_BITS 12 + +#define FL_GLOBAL_CONTROL_MASK (FTRACE_OPS_FL_CONTROL) + +#ifdef CONFIG_DYNAMIC_FTRACE +#define INIT_REGEX_LOCK(opsname)	\ +	.regex_lock	= __MUTEX_INITIALIZER(opsname.regex_lock), +#else +#define INIT_REGEX_LOCK(opsname) +#endif + +static struct ftrace_ops ftrace_list_end __read_mostly = { +	.func		= ftrace_stub, +	.flags		= FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_STUB, +};  /* ftrace_enabled is a method to turn ftrace on or off */  int ftrace_enabled __read_mostly;  static int last_ftrace_enabled;  /* Quick disabling of function tracer. */ -int function_trace_stop; +int function_trace_stop __read_mostly; + +/* Current function tracing op */ +struct ftrace_ops *function_trace_op __read_mostly = &ftrace_list_end; +/* What to set function_trace_op to */ +static struct ftrace_ops *set_function_trace_op;  /* List for set_ftrace_pid's pids. */  LIST_HEAD(ftrace_pids); @@ -76,41 +103,80 @@ static int ftrace_disabled __read_mostly;  static DEFINE_MUTEX(ftrace_lock); -static struct ftrace_ops ftrace_list_end __read_mostly = -{ -	.func		= ftrace_stub, -}; - -static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end; +static struct ftrace_ops *ftrace_control_list __read_mostly = &ftrace_list_end; +static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end;  ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; -ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;  ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; +static struct ftrace_ops global_ops; +static struct ftrace_ops control_ops; + +#if ARCH_SUPPORTS_FTRACE_OPS +static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, +				 struct ftrace_ops *op, struct pt_regs *regs); +#else +/* See comment below, where ftrace_ops_list_func is defined */ +static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip); +#define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops) +#endif  /* - * Traverse the ftrace_list, invoking all entries.  The reason that we - * can use rcu_dereference_raw() is that elements removed from this list + * Traverse the ftrace_global_list, invoking all entries.  The reason that we + * can use rcu_dereference_raw_notrace() is that elements removed from this list   * are simply leaked, so there is no need to interact with a grace-period - * mechanism.  The rcu_dereference_raw() calls are needed to handle - * concurrent insertions into the ftrace_list. + * mechanism.  The rcu_dereference_raw_notrace() calls are needed to handle + * concurrent insertions into the ftrace_global_list.   *   * Silly Alpha and silly pointer-speculation compiler optimizations!   */ -static void ftrace_list_func(unsigned long ip, unsigned long parent_ip) +#define do_for_each_ftrace_op(op, list)			\ +	op = rcu_dereference_raw_notrace(list);			\ +	do + +/* + * Optimized for just a single item in the list (as that is the normal case). + */ +#define while_for_each_ftrace_op(op)				\ +	while (likely(op = rcu_dereference_raw_notrace((op)->next)) &&	\ +	       unlikely((op) != &ftrace_list_end)) + +static inline void ftrace_ops_init(struct ftrace_ops *ops) +{ +#ifdef CONFIG_DYNAMIC_FTRACE +	if (!(ops->flags & FTRACE_OPS_FL_INITIALIZED)) { +		mutex_init(&ops->regex_lock); +		ops->flags |= FTRACE_OPS_FL_INITIALIZED; +	} +#endif +} + +/** + * ftrace_nr_registered_ops - return number of ops registered + * + * Returns the number of ftrace_ops registered and tracing functions + */ +int ftrace_nr_registered_ops(void)  { -	struct ftrace_ops *op = rcu_dereference_raw(ftrace_list); /*see above*/ +	struct ftrace_ops *ops; +	int cnt = 0; + +	mutex_lock(&ftrace_lock); + +	for (ops = ftrace_ops_list; +	     ops != &ftrace_list_end; ops = ops->next) +		cnt++; -	while (op != &ftrace_list_end) { -		op->func(ip, parent_ip); -		op = rcu_dereference_raw(op->next); /*see above*/ -	}; +	mutex_unlock(&ftrace_lock); + +	return cnt;  } -static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip) +static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip, +			    struct ftrace_ops *op, struct pt_regs *regs)  {  	if (!test_tsk_trace_trace(current))  		return; -	ftrace_pid_function(ip, parent_ip); +	ftrace_pid_function(ip, parent_ip, op, regs);  }  static void set_ftrace_pid_function(ftrace_func_t func) @@ -129,64 +195,142 @@ static void set_ftrace_pid_function(ftrace_func_t func)  void clear_ftrace_function(void)  {  	ftrace_trace_function = ftrace_stub; -	__ftrace_trace_function = ftrace_stub;  	ftrace_pid_function = ftrace_stub;  } -#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST -/* - * For those archs that do not test ftrace_trace_stop in their - * mcount call site, we need to do it from C. - */ -static void ftrace_test_stop_func(unsigned long ip, unsigned long parent_ip) +static void control_ops_disable_all(struct ftrace_ops *ops)  { -	if (function_trace_stop) -		return; +	int cpu; -	__ftrace_trace_function(ip, parent_ip); +	for_each_possible_cpu(cpu) +		*per_cpu_ptr(ops->disabled, cpu) = 1;  } -#endif -static int __register_ftrace_function(struct ftrace_ops *ops) +static int control_ops_alloc(struct ftrace_ops *ops) +{ +	int __percpu *disabled; + +	disabled = alloc_percpu(int); +	if (!disabled) +		return -ENOMEM; + +	ops->disabled = disabled; +	control_ops_disable_all(ops); +	return 0; +} + +static void ftrace_sync(struct work_struct *work)  { -	ops->next = ftrace_list;  	/* -	 * We are entering ops into the ftrace_list but another -	 * CPU might be walking that list. We need to make sure -	 * the ops->next pointer is valid before another CPU sees -	 * the ops pointer included into the ftrace_list. +	 * This function is just a stub to implement a hard force +	 * of synchronize_sched(). This requires synchronizing +	 * tasks even in userspace and idle. +	 * +	 * Yes, function tracing is rude.  	 */ -	rcu_assign_pointer(ftrace_list, ops); +} -	if (ftrace_enabled) { -		ftrace_func_t func; +static void ftrace_sync_ipi(void *data) +{ +	/* Probably not needed, but do it anyway */ +	smp_rmb(); +} -		if (ops->next == &ftrace_list_end) -			func = ops->func; -		else -			func = ftrace_list_func; +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +static void update_function_graph_func(void); +#else +static inline void update_function_graph_func(void) { } +#endif -		if (!list_empty(&ftrace_pids)) { -			set_ftrace_pid_function(func); -			func = ftrace_pid_func; -		} +static void update_ftrace_function(void) +{ +	ftrace_func_t func; + +	/* +	 * If we are at the end of the list and this ops is +	 * recursion safe and not dynamic and the arch supports passing ops, +	 * then have the mcount trampoline call the function directly. +	 */ +	if (ftrace_ops_list == &ftrace_list_end || +	    (ftrace_ops_list->next == &ftrace_list_end && +	     !(ftrace_ops_list->flags & FTRACE_OPS_FL_DYNAMIC) && +	     (ftrace_ops_list->flags & FTRACE_OPS_FL_RECURSION_SAFE) && +	     !FTRACE_FORCE_LIST_FUNC)) { +		/* Set the ftrace_ops that the arch callback uses */ +		set_function_trace_op = ftrace_ops_list; +		func = ftrace_ops_list->func; +	} else { +		/* Just use the default ftrace_ops */ +		set_function_trace_op = &ftrace_list_end; +		func = ftrace_ops_list_func; +	} + +	update_function_graph_func(); +	/* If there's no change, then do nothing more here */ +	if (ftrace_trace_function == func) +		return; + +	/* +	 * If we are using the list function, it doesn't care +	 * about the function_trace_ops. +	 */ +	if (func == ftrace_ops_list_func) { +		ftrace_trace_function = func;  		/* -		 * For one func, simply call it directly. -		 * For more than one func, call the chain. +		 * Don't even bother setting function_trace_ops, +		 * it would be racy to do so anyway.  		 */ -#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST -		ftrace_trace_function = func; -#else -		__ftrace_trace_function = func; -		ftrace_trace_function = ftrace_test_stop_func; -#endif +		return;  	} -	return 0; +#ifndef CONFIG_DYNAMIC_FTRACE +	/* +	 * For static tracing, we need to be a bit more careful. +	 * The function change takes affect immediately. Thus, +	 * we need to coorditate the setting of the function_trace_ops +	 * with the setting of the ftrace_trace_function. +	 * +	 * Set the function to the list ops, which will call the +	 * function we want, albeit indirectly, but it handles the +	 * ftrace_ops and doesn't depend on function_trace_op. +	 */ +	ftrace_trace_function = ftrace_ops_list_func; +	/* +	 * Make sure all CPUs see this. Yes this is slow, but static +	 * tracing is slow and nasty to have enabled. +	 */ +	schedule_on_each_cpu(ftrace_sync); +	/* Now all cpus are using the list ops. */ +	function_trace_op = set_function_trace_op; +	/* Make sure the function_trace_op is visible on all CPUs */ +	smp_wmb(); +	/* Nasty way to force a rmb on all cpus */ +	smp_call_function(ftrace_sync_ipi, NULL, 1); +	/* OK, we are all set to update the ftrace_trace_function now! */ +#endif /* !CONFIG_DYNAMIC_FTRACE */ + +	ftrace_trace_function = func;  } -static int __unregister_ftrace_function(struct ftrace_ops *ops) +int using_ftrace_ops_list_func(void) +{ +	return ftrace_trace_function == ftrace_ops_list_func; +} + +static void add_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops) +{ +	ops->next = *list; +	/* +	 * We are entering ops into the list but another +	 * CPU might be walking that list. We need to make sure +	 * the ops->next pointer is valid before another CPU sees +	 * the ops pointer included into the list. +	 */ +	rcu_assign_pointer(*list, ops); +} + +static int remove_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops)  {  	struct ftrace_ops **p; @@ -194,13 +338,12 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)  	 * If we are removing the last function, then simply point  	 * to the ftrace_stub.  	 */ -	if (ftrace_list == ops && ops->next == &ftrace_list_end) { -		ftrace_trace_function = ftrace_stub; -		ftrace_list = &ftrace_list_end; +	if (*list == ops && ops->next == &ftrace_list_end) { +		*list = &ftrace_list_end;  		return 0;  	} -	for (p = &ftrace_list; *p != &ftrace_list_end; p = &(*p)->next) +	for (p = list; *p != &ftrace_list_end; p = &(*p)->next)  		if (*p == ops)  			break; @@ -208,53 +351,96 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)  		return -1;  	*p = (*p)->next; +	return 0; +} -	if (ftrace_enabled) { -		/* If we only have one func left, then call that directly */ -		if (ftrace_list->next == &ftrace_list_end) { -			ftrace_func_t func = ftrace_list->func; +static void add_ftrace_list_ops(struct ftrace_ops **list, +				struct ftrace_ops *main_ops, +				struct ftrace_ops *ops) +{ +	int first = *list == &ftrace_list_end; +	add_ftrace_ops(list, ops); +	if (first) +		add_ftrace_ops(&ftrace_ops_list, main_ops); +} -			if (!list_empty(&ftrace_pids)) { -				set_ftrace_pid_function(func); -				func = ftrace_pid_func; -			} -#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST -			ftrace_trace_function = func; -#else -			__ftrace_trace_function = func; +static int remove_ftrace_list_ops(struct ftrace_ops **list, +				  struct ftrace_ops *main_ops, +				  struct ftrace_ops *ops) +{ +	int ret = remove_ftrace_ops(list, ops); +	if (!ret && *list == &ftrace_list_end) +		ret = remove_ftrace_ops(&ftrace_ops_list, main_ops); +	return ret; +} + +static int __register_ftrace_function(struct ftrace_ops *ops) +{ +	if (ops->flags & FTRACE_OPS_FL_DELETED) +		return -EINVAL; + +	if (WARN_ON(ops->flags & FTRACE_OPS_FL_ENABLED)) +		return -EBUSY; + +#ifndef CONFIG_DYNAMIC_FTRACE_WITH_REGS +	/* +	 * If the ftrace_ops specifies SAVE_REGS, then it only can be used +	 * if the arch supports it, or SAVE_REGS_IF_SUPPORTED is also set. +	 * Setting SAVE_REGS_IF_SUPPORTED makes SAVE_REGS irrelevant. +	 */ +	if (ops->flags & FTRACE_OPS_FL_SAVE_REGS && +	    !(ops->flags & FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED)) +		return -EINVAL; + +	if (ops->flags & FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED) +		ops->flags |= FTRACE_OPS_FL_SAVE_REGS;  #endif -		} -	} + +	if (!core_kernel_data((unsigned long)ops)) +		ops->flags |= FTRACE_OPS_FL_DYNAMIC; + +	if (ops->flags & FTRACE_OPS_FL_CONTROL) { +		if (control_ops_alloc(ops)) +			return -ENOMEM; +		add_ftrace_list_ops(&ftrace_control_list, &control_ops, ops); +	} else +		add_ftrace_ops(&ftrace_ops_list, ops); + +	if (ftrace_enabled) +		update_ftrace_function();  	return 0;  } -static void ftrace_update_pid_func(void) +static int __unregister_ftrace_function(struct ftrace_ops *ops)  { -	ftrace_func_t func; +	int ret; -	if (ftrace_trace_function == ftrace_stub) -		return; +	if (WARN_ON(!(ops->flags & FTRACE_OPS_FL_ENABLED))) +		return -EBUSY; -#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST -	func = ftrace_trace_function; -#else -	func = __ftrace_trace_function; -#endif +	if (ops->flags & FTRACE_OPS_FL_CONTROL) { +		ret = remove_ftrace_list_ops(&ftrace_control_list, +					     &control_ops, ops); +	} else +		ret = remove_ftrace_ops(&ftrace_ops_list, ops); -	if (!list_empty(&ftrace_pids)) { -		set_ftrace_pid_function(func); -		func = ftrace_pid_func; -	} else { -		if (func == ftrace_pid_func) -			func = ftrace_pid_function; -	} +	if (ret < 0) +		return ret; -#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST -	ftrace_trace_function = func; -#else -	__ftrace_trace_function = func; -#endif +	if (ftrace_enabled) +		update_ftrace_function(); + +	return 0; +} + +static void ftrace_update_pid_func(void) +{ +	/* Only do something if we are tracing something */ +	if (ftrace_trace_function == ftrace_stub) +		return; + +	update_ftrace_function();  }  #ifdef CONFIG_FUNCTION_PROFILER @@ -288,7 +474,6 @@ struct ftrace_profile_stat {  #define PROFILES_PER_PAGE					\  	(PROFILE_RECORDS_SIZE / sizeof(struct ftrace_profile)) -static int ftrace_profile_bits __read_mostly;  static int ftrace_profile_enabled __read_mostly;  /* ftrace_profile_lock - synchronize the enable and disable of the profiler */ @@ -296,7 +481,8 @@ static DEFINE_MUTEX(ftrace_profile_lock);  static DEFINE_PER_CPU(struct ftrace_profile_stat, ftrace_profile_stats); -#define FTRACE_PROFILE_HASH_SIZE 1024 /* must be power of 2 */ +#define FTRACE_PROFILE_HASH_BITS 10 +#define FTRACE_PROFILE_HASH_SIZE (1 << FTRACE_PROFILE_HASH_BITS)  static void *  function_stat_next(void *v, int idx) @@ -407,12 +593,18 @@ static int function_stat_show(struct seq_file *m, void *v)  	if (rec->counter <= 1)  		stddev = 0;  	else { -		stddev = rec->time_squared - rec->counter * avg * avg; +		/* +		 * Apply Welford's method: +		 * s^2 = 1 / (n * (n-1)) * (n * \Sum (x_i)^2 - (\Sum x_i)^2) +		 */ +		stddev = rec->counter * rec->time_squared - +			 rec->time * rec->time; +  		/*  		 * Divide only 1000 for ns^2 -> us^2 conversion.  		 * trace_print_graph_duration will divide 1000 again.  		 */ -		do_div(stddev, (rec->counter - 1) * 1000); +		do_div(stddev, rec->counter * (rec->counter - 1) * 1000);  	}  	trace_seq_init(&s); @@ -478,7 +670,7 @@ int ftrace_profile_pages_init(struct ftrace_profile_stat *stat)  	pages = DIV_ROUND_UP(functions, PROFILES_PER_PAGE); -	for (i = 0; i < pages; i++) { +	for (i = 1; i < pages; i++) {  		pg->next = (void *)get_zeroed_page(GFP_KERNEL);  		if (!pg->next)  			goto out_free; @@ -496,7 +688,6 @@ int ftrace_profile_pages_init(struct ftrace_profile_stat *stat)  		free_page(tmp);  	} -	free_page((unsigned long)stat->pages);  	stat->pages = NULL;  	stat->start = NULL; @@ -527,13 +718,6 @@ static int ftrace_profile_init_cpu(int cpu)  	if (!stat->hash)  		return -ENOMEM; -	if (!ftrace_profile_bits) { -		size--; - -		for (; size; size >>= 1) -			ftrace_profile_bits++; -	} -  	/* Preallocate the function profiling pages */  	if (ftrace_profile_pages_init(stat) < 0) {  		kfree(stat->hash); @@ -549,7 +733,7 @@ static int ftrace_profile_init(void)  	int cpu;  	int ret = 0; -	for_each_online_cpu(cpu) { +	for_each_possible_cpu(cpu) {  		ret = ftrace_profile_init_cpu(cpu);  		if (ret)  			break; @@ -564,16 +748,15 @@ ftrace_find_profiled_func(struct ftrace_profile_stat *stat, unsigned long ip)  {  	struct ftrace_profile *rec;  	struct hlist_head *hhd; -	struct hlist_node *n;  	unsigned long key; -	key = hash_long(ip, ftrace_profile_bits); +	key = hash_long(ip, FTRACE_PROFILE_HASH_BITS);  	hhd = &stat->hash[key];  	if (hlist_empty(hhd))  		return NULL; -	hlist_for_each_entry_rcu(rec, n, hhd, node) { +	hlist_for_each_entry_rcu_notrace(rec, hhd, node) {  		if (rec->ip == ip)  			return rec;  	} @@ -586,7 +769,7 @@ static void ftrace_add_profile(struct ftrace_profile_stat *stat,  {  	unsigned long key; -	key = hash_long(rec->ip, ftrace_profile_bits); +	key = hash_long(rec->ip, FTRACE_PROFILE_HASH_BITS);  	hlist_add_head_rcu(&rec->node, &stat->hash[key]);  } @@ -627,7 +810,8 @@ ftrace_profile_alloc(struct ftrace_profile_stat *stat, unsigned long ip)  }  static void -function_profile_call(unsigned long ip, unsigned long parent_ip) +function_profile_call(unsigned long ip, unsigned long parent_ip, +		      struct ftrace_ops *ops, struct pt_regs *regs)  {  	struct ftrace_profile_stat *stat;  	struct ftrace_profile *rec; @@ -638,7 +822,7 @@ function_profile_call(unsigned long ip, unsigned long parent_ip)  	local_irq_save(flags); -	stat = &__get_cpu_var(ftrace_profile_stats); +	stat = this_cpu_ptr(&ftrace_profile_stats);  	if (!stat->hash || !ftrace_profile_enabled)  		goto out; @@ -657,7 +841,7 @@ function_profile_call(unsigned long ip, unsigned long parent_ip)  #ifdef CONFIG_FUNCTION_GRAPH_TRACER  static int profile_graph_entry(struct ftrace_graph_ent *trace)  { -	function_profile_call(trace->func, 0); +	function_profile_call(trace->func, 0, NULL, NULL);  	return 1;  } @@ -669,7 +853,7 @@ static void profile_graph_return(struct ftrace_graph_ret *trace)  	unsigned long flags;  	local_irq_save(flags); -	stat = &__get_cpu_var(ftrace_profile_stats); +	stat = this_cpu_ptr(&ftrace_profile_stats);  	if (!stat->hash || !ftrace_profile_enabled)  		goto out; @@ -715,9 +899,10 @@ static void unregister_ftrace_profiler(void)  	unregister_ftrace_graph();  }  #else -static struct ftrace_ops ftrace_profile_ops __read_mostly = -{ +static struct ftrace_ops ftrace_profile_ops __read_mostly = {  	.func		= function_profile_call, +	.flags		= FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED, +	INIT_REGEX_LOCK(ftrace_profile_ops)  };  static int register_ftrace_profiler(void) @@ -736,19 +921,10 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf,  		     size_t cnt, loff_t *ppos)  {  	unsigned long val; -	char buf[64];		/* big enough to hold a number */  	int ret; -	if (cnt >= sizeof(buf)) -		return -EINVAL; - -	if (copy_from_user(&buf, ubuf, cnt)) -		return -EFAULT; - -	buf[cnt] = 0; - -	ret = strict_strtoul(buf, 10, &val); -	if (ret < 0) +	ret = kstrtoul_from_user(ubuf, cnt, 10, &val); +	if (ret)  		return ret;  	val = !!val; @@ -878,31 +1054,50 @@ struct ftrace_func_probe {  	unsigned long		flags;  	unsigned long		ip;  	void			*data; -	struct rcu_head		rcu; +	struct list_head	free_list;  }; -enum { -	FTRACE_ENABLE_CALLS		= (1 << 0), -	FTRACE_DISABLE_CALLS		= (1 << 1), -	FTRACE_UPDATE_TRACE_FUNC	= (1 << 2), -	FTRACE_START_FUNC_RET		= (1 << 3), -	FTRACE_STOP_FUNC_RET		= (1 << 4), +struct ftrace_func_entry { +	struct hlist_node hlist; +	unsigned long ip;  }; -static int ftrace_filtered; - -static struct dyn_ftrace *ftrace_new_addrs; +struct ftrace_hash { +	unsigned long		size_bits; +	struct hlist_head	*buckets; +	unsigned long		count; +	struct rcu_head		rcu; +}; -static DEFINE_MUTEX(ftrace_regex_lock); +/* + * We make these constant because no one should touch them, + * but they are used as the default "empty hash", to avoid allocating + * it all the time. These are in a read only section such that if + * anyone does try to modify it, it will cause an exception. + */ +static const struct hlist_head empty_buckets[1]; +static const struct ftrace_hash empty_hash = { +	.buckets = (struct hlist_head *)empty_buckets, +}; +#define EMPTY_HASH	((struct ftrace_hash *)&empty_hash) + +static struct ftrace_ops global_ops = { +	.func			= ftrace_stub, +	.notrace_hash		= EMPTY_HASH, +	.filter_hash		= EMPTY_HASH, +	.flags			= FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED, +	INIT_REGEX_LOCK(global_ops) +};  struct ftrace_page {  	struct ftrace_page	*next; +	struct dyn_ftrace	*records;  	int			index; -	struct dyn_ftrace	records[]; +	int			size;  }; -#define ENTRIES_PER_PAGE \ -  ((PAGE_SIZE - sizeof(struct ftrace_page)) / sizeof(struct dyn_ftrace)) +#define ENTRY_SIZE sizeof(struct dyn_ftrace) +#define ENTRIES_PER_PAGE (PAGE_SIZE / ENTRY_SIZE)  /* estimate from running different kernels */  #define NR_TO_INIT		10000 @@ -910,7 +1105,308 @@ struct ftrace_page {  static struct ftrace_page	*ftrace_pages_start;  static struct ftrace_page	*ftrace_pages; -static struct dyn_ftrace *ftrace_free_records; +static bool __always_inline ftrace_hash_empty(struct ftrace_hash *hash) +{ +	return !hash || !hash->count; +} + +static struct ftrace_func_entry * +ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip) +{ +	unsigned long key; +	struct ftrace_func_entry *entry; +	struct hlist_head *hhd; + +	if (ftrace_hash_empty(hash)) +		return NULL; + +	if (hash->size_bits > 0) +		key = hash_long(ip, hash->size_bits); +	else +		key = 0; + +	hhd = &hash->buckets[key]; + +	hlist_for_each_entry_rcu_notrace(entry, hhd, hlist) { +		if (entry->ip == ip) +			return entry; +	} +	return NULL; +} + +static void __add_hash_entry(struct ftrace_hash *hash, +			     struct ftrace_func_entry *entry) +{ +	struct hlist_head *hhd; +	unsigned long key; + +	if (hash->size_bits) +		key = hash_long(entry->ip, hash->size_bits); +	else +		key = 0; + +	hhd = &hash->buckets[key]; +	hlist_add_head(&entry->hlist, hhd); +	hash->count++; +} + +static int add_hash_entry(struct ftrace_hash *hash, unsigned long ip) +{ +	struct ftrace_func_entry *entry; + +	entry = kmalloc(sizeof(*entry), GFP_KERNEL); +	if (!entry) +		return -ENOMEM; + +	entry->ip = ip; +	__add_hash_entry(hash, entry); + +	return 0; +} + +static void +free_hash_entry(struct ftrace_hash *hash, +		  struct ftrace_func_entry *entry) +{ +	hlist_del(&entry->hlist); +	kfree(entry); +	hash->count--; +} + +static void +remove_hash_entry(struct ftrace_hash *hash, +		  struct ftrace_func_entry *entry) +{ +	hlist_del(&entry->hlist); +	hash->count--; +} + +static void ftrace_hash_clear(struct ftrace_hash *hash) +{ +	struct hlist_head *hhd; +	struct hlist_node *tn; +	struct ftrace_func_entry *entry; +	int size = 1 << hash->size_bits; +	int i; + +	if (!hash->count) +		return; + +	for (i = 0; i < size; i++) { +		hhd = &hash->buckets[i]; +		hlist_for_each_entry_safe(entry, tn, hhd, hlist) +			free_hash_entry(hash, entry); +	} +	FTRACE_WARN_ON(hash->count); +} + +static void free_ftrace_hash(struct ftrace_hash *hash) +{ +	if (!hash || hash == EMPTY_HASH) +		return; +	ftrace_hash_clear(hash); +	kfree(hash->buckets); +	kfree(hash); +} + +static void __free_ftrace_hash_rcu(struct rcu_head *rcu) +{ +	struct ftrace_hash *hash; + +	hash = container_of(rcu, struct ftrace_hash, rcu); +	free_ftrace_hash(hash); +} + +static void free_ftrace_hash_rcu(struct ftrace_hash *hash) +{ +	if (!hash || hash == EMPTY_HASH) +		return; +	call_rcu_sched(&hash->rcu, __free_ftrace_hash_rcu); +} + +void ftrace_free_filter(struct ftrace_ops *ops) +{ +	ftrace_ops_init(ops); +	free_ftrace_hash(ops->filter_hash); +	free_ftrace_hash(ops->notrace_hash); +} + +static struct ftrace_hash *alloc_ftrace_hash(int size_bits) +{ +	struct ftrace_hash *hash; +	int size; + +	hash = kzalloc(sizeof(*hash), GFP_KERNEL); +	if (!hash) +		return NULL; + +	size = 1 << size_bits; +	hash->buckets = kcalloc(size, sizeof(*hash->buckets), GFP_KERNEL); + +	if (!hash->buckets) { +		kfree(hash); +		return NULL; +	} + +	hash->size_bits = size_bits; + +	return hash; +} + +static struct ftrace_hash * +alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash) +{ +	struct ftrace_func_entry *entry; +	struct ftrace_hash *new_hash; +	int size; +	int ret; +	int i; + +	new_hash = alloc_ftrace_hash(size_bits); +	if (!new_hash) +		return NULL; + +	/* Empty hash? */ +	if (ftrace_hash_empty(hash)) +		return new_hash; + +	size = 1 << hash->size_bits; +	for (i = 0; i < size; i++) { +		hlist_for_each_entry(entry, &hash->buckets[i], hlist) { +			ret = add_hash_entry(new_hash, entry->ip); +			if (ret < 0) +				goto free_hash; +		} +	} + +	FTRACE_WARN_ON(new_hash->count != hash->count); + +	return new_hash; + + free_hash: +	free_ftrace_hash(new_hash); +	return NULL; +} + +static void +ftrace_hash_rec_disable(struct ftrace_ops *ops, int filter_hash); +static void +ftrace_hash_rec_enable(struct ftrace_ops *ops, int filter_hash); + +static int +ftrace_hash_move(struct ftrace_ops *ops, int enable, +		 struct ftrace_hash **dst, struct ftrace_hash *src) +{ +	struct ftrace_func_entry *entry; +	struct hlist_node *tn; +	struct hlist_head *hhd; +	struct ftrace_hash *old_hash; +	struct ftrace_hash *new_hash; +	int size = src->count; +	int bits = 0; +	int ret; +	int i; + +	/* +	 * Remove the current set, update the hash and add +	 * them back. +	 */ +	ftrace_hash_rec_disable(ops, enable); + +	/* +	 * If the new source is empty, just free dst and assign it +	 * the empty_hash. +	 */ +	if (!src->count) { +		free_ftrace_hash_rcu(*dst); +		rcu_assign_pointer(*dst, EMPTY_HASH); +		/* still need to update the function records */ +		ret = 0; +		goto out; +	} + +	/* +	 * Make the hash size about 1/2 the # found +	 */ +	for (size /= 2; size; size >>= 1) +		bits++; + +	/* Don't allocate too much */ +	if (bits > FTRACE_HASH_MAX_BITS) +		bits = FTRACE_HASH_MAX_BITS; + +	ret = -ENOMEM; +	new_hash = alloc_ftrace_hash(bits); +	if (!new_hash) +		goto out; + +	size = 1 << src->size_bits; +	for (i = 0; i < size; i++) { +		hhd = &src->buckets[i]; +		hlist_for_each_entry_safe(entry, tn, hhd, hlist) { +			remove_hash_entry(src, entry); +			__add_hash_entry(new_hash, entry); +		} +	} + +	old_hash = *dst; +	rcu_assign_pointer(*dst, new_hash); +	free_ftrace_hash_rcu(old_hash); + +	ret = 0; + out: +	/* +	 * Enable regardless of ret: +	 *  On success, we enable the new hash. +	 *  On failure, we re-enable the original hash. +	 */ +	ftrace_hash_rec_enable(ops, enable); + +	return ret; +} + +/* + * Test the hashes for this ops to see if we want to call + * the ops->func or not. + * + * It's a match if the ip is in the ops->filter_hash or + * the filter_hash does not exist or is empty, + *  AND + * the ip is not in the ops->notrace_hash. + * + * This needs to be called with preemption disabled as + * the hashes are freed with call_rcu_sched(). + */ +static int +ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs) +{ +	struct ftrace_hash *filter_hash; +	struct ftrace_hash *notrace_hash; +	int ret; + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +	/* +	 * There's a small race when adding ops that the ftrace handler +	 * that wants regs, may be called without them. We can not +	 * allow that handler to be called if regs is NULL. +	 */ +	if (regs == NULL && (ops->flags & FTRACE_OPS_FL_SAVE_REGS)) +		return 0; +#endif + +	filter_hash = rcu_dereference_raw_notrace(ops->filter_hash); +	notrace_hash = rcu_dereference_raw_notrace(ops->notrace_hash); + +	if ((ftrace_hash_empty(filter_hash) || +	     ftrace_lookup_ip(filter_hash, ip)) && +	    (ftrace_hash_empty(notrace_hash) || +	     !ftrace_lookup_ip(notrace_hash, ip))) +		ret = 1; +	else +		ret = 0; + +	return ret; +}  /*   * This is a double for. Do not use 'break' to break out of the loop, @@ -926,63 +1422,186 @@ static struct dyn_ftrace *ftrace_free_records;  		}				\  	} -static void ftrace_free_rec(struct dyn_ftrace *rec) + +static int ftrace_cmp_recs(const void *a, const void *b)  { -	rec->freelist = ftrace_free_records; -	ftrace_free_records = rec; -	rec->flags |= FTRACE_FL_FREE; +	const struct dyn_ftrace *key = a; +	const struct dyn_ftrace *rec = b; + +	if (key->flags < rec->ip) +		return -1; +	if (key->ip >= rec->ip + MCOUNT_INSN_SIZE) +		return 1; +	return 0;  } -static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) +static unsigned long ftrace_location_range(unsigned long start, unsigned long end)  { +	struct ftrace_page *pg;  	struct dyn_ftrace *rec; +	struct dyn_ftrace key; -	/* First check for freed records */ -	if (ftrace_free_records) { -		rec = ftrace_free_records; - -		if (unlikely(!(rec->flags & FTRACE_FL_FREE))) { -			FTRACE_WARN_ON_ONCE(1); -			ftrace_free_records = NULL; -			return NULL; -		} +	key.ip = start; +	key.flags = end;	/* overload flags, as it is unsigned long */ -		ftrace_free_records = rec->freelist; -		memset(rec, 0, sizeof(*rec)); -		return rec; +	for (pg = ftrace_pages_start; pg; pg = pg->next) { +		if (end < pg->records[0].ip || +		    start >= (pg->records[pg->index - 1].ip + MCOUNT_INSN_SIZE)) +			continue; +		rec = bsearch(&key, pg->records, pg->index, +			      sizeof(struct dyn_ftrace), +			      ftrace_cmp_recs); +		if (rec) +			return rec->ip;  	} -	if (ftrace_pages->index == ENTRIES_PER_PAGE) { -		if (!ftrace_pages->next) { -			/* allocate another page */ -			ftrace_pages->next = -				(void *)get_zeroed_page(GFP_KERNEL); -			if (!ftrace_pages->next) -				return NULL; -		} -		ftrace_pages = ftrace_pages->next; -	} +	return 0; +} -	return &ftrace_pages->records[ftrace_pages->index++]; +/** + * ftrace_location - return true if the ip giving is a traced location + * @ip: the instruction pointer to check + * + * Returns rec->ip if @ip given is a pointer to a ftrace location. + * That is, the instruction that is either a NOP or call to + * the function tracer. It checks the ftrace internal tables to + * determine if the address belongs or not. + */ +unsigned long ftrace_location(unsigned long ip) +{ +	return ftrace_location_range(ip, ip);  } -static struct dyn_ftrace * -ftrace_record_ip(unsigned long ip) +/** + * ftrace_text_reserved - return true if range contains an ftrace location + * @start: start of range to search + * @end: end of range to search (inclusive). @end points to the last byte to check. + * + * Returns 1 if @start and @end contains a ftrace location. + * That is, the instruction that is either a NOP or call to + * the function tracer. It checks the ftrace internal tables to + * determine if the address belongs or not. + */ +int ftrace_text_reserved(const void *start, const void *end)  { +	unsigned long ret; + +	ret = ftrace_location_range((unsigned long)start, +				    (unsigned long)end); + +	return (int)!!ret; +} + +static void __ftrace_hash_rec_update(struct ftrace_ops *ops, +				     int filter_hash, +				     bool inc) +{ +	struct ftrace_hash *hash; +	struct ftrace_hash *other_hash; +	struct ftrace_page *pg;  	struct dyn_ftrace *rec; +	int count = 0; +	int all = 0; -	if (ftrace_disabled) -		return NULL; +	/* Only update if the ops has been registered */ +	if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) +		return; -	rec = ftrace_alloc_dyn_node(ip); -	if (!rec) -		return NULL; +	/* +	 * In the filter_hash case: +	 *   If the count is zero, we update all records. +	 *   Otherwise we just update the items in the hash. +	 * +	 * In the notrace_hash case: +	 *   We enable the update in the hash. +	 *   As disabling notrace means enabling the tracing, +	 *   and enabling notrace means disabling, the inc variable +	 *   gets inversed. +	 */ +	if (filter_hash) { +		hash = ops->filter_hash; +		other_hash = ops->notrace_hash; +		if (ftrace_hash_empty(hash)) +			all = 1; +	} else { +		inc = !inc; +		hash = ops->notrace_hash; +		other_hash = ops->filter_hash; +		/* +		 * If the notrace hash has no items, +		 * then there's nothing to do. +		 */ +		if (ftrace_hash_empty(hash)) +			return; +	} -	rec->ip = ip; -	rec->newlist = ftrace_new_addrs; -	ftrace_new_addrs = rec; +	do_for_each_ftrace_rec(pg, rec) { +		int in_other_hash = 0; +		int in_hash = 0; +		int match = 0; -	return rec; +		if (all) { +			/* +			 * Only the filter_hash affects all records. +			 * Update if the record is not in the notrace hash. +			 */ +			if (!other_hash || !ftrace_lookup_ip(other_hash, rec->ip)) +				match = 1; +		} else { +			in_hash = !!ftrace_lookup_ip(hash, rec->ip); +			in_other_hash = !!ftrace_lookup_ip(other_hash, rec->ip); + +			/* +			 * If filter_hash is set, we want to match all functions +			 * that are in the hash but not in the other hash. +			 * +			 * If filter_hash is not set, then we are decrementing. +			 * That means we match anything that is in the hash +			 * and also in the other_hash. That is, we need to turn +			 * off functions in the other hash because they are disabled +			 * by this hash. +			 */ +			if (filter_hash && in_hash && !in_other_hash) +				match = 1; +			else if (!filter_hash && in_hash && +				 (in_other_hash || ftrace_hash_empty(other_hash))) +				match = 1; +		} +		if (!match) +			continue; + +		if (inc) { +			rec->flags++; +			if (FTRACE_WARN_ON((rec->flags & ~FTRACE_FL_MASK) == FTRACE_REF_MAX)) +				return; +			/* +			 * If any ops wants regs saved for this function +			 * then all ops will get saved regs. +			 */ +			if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) +				rec->flags |= FTRACE_FL_REGS; +		} else { +			if (FTRACE_WARN_ON((rec->flags & ~FTRACE_FL_MASK) == 0)) +				return; +			rec->flags--; +		} +		count++; +		/* Shortcut, if we handled all records, we are done. */ +		if (!all && count == hash->count) +			return; +	} while_for_each_ftrace_rec(); +} + +static void ftrace_hash_rec_disable(struct ftrace_ops *ops, +				    int filter_hash) +{ +	__ftrace_hash_rec_update(ops, filter_hash, 0); +} + +static void ftrace_hash_rec_enable(struct ftrace_ops *ops, +				   int filter_hash) +{ +	__ftrace_hash_rec_update(ops, filter_hash, 1);  }  static void print_ip_ins(const char *fmt, unsigned char *p) @@ -995,7 +1614,19 @@ static void print_ip_ins(const char *fmt, unsigned char *p)  		printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]);  } -static void ftrace_bug(int failed, unsigned long ip) +/** + * ftrace_bug - report and shutdown function tracer + * @failed: The failed type (EFAULT, EINVAL, EPERM) + * @ip: The address that failed + * + * The arch code that enables or disables the function tracing + * can call ftrace_bug() when it has detected a problem in + * modifying the code. @failed should be one of either: + * EFAULT - if the problem happens on reading the @ip address + * EINVAL - if what is read at @ip is not what was expected + * EPERM - if the problem happens on writting to the @ip address + */ +void ftrace_bug(int failed, unsigned long ip)  {  	switch (failed) {  	case -EFAULT: @@ -1022,76 +1653,183 @@ static void ftrace_bug(int failed, unsigned long ip)  	}  } +static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update) +{ +	unsigned long flag = 0UL; + +	/* +	 * If we are updating calls: +	 * +	 *   If the record has a ref count, then we need to enable it +	 *   because someone is using it. +	 * +	 *   Otherwise we make sure its disabled. +	 * +	 * If we are disabling calls, then disable all records that +	 * are enabled. +	 */ +	if (enable && (rec->flags & ~FTRACE_FL_MASK)) +		flag = FTRACE_FL_ENABLED; + +	/* +	 * If enabling and the REGS flag does not match the REGS_EN, then +	 * do not ignore this record. Set flags to fail the compare against +	 * ENABLED. +	 */ +	if (flag && +	    (!(rec->flags & FTRACE_FL_REGS) != !(rec->flags & FTRACE_FL_REGS_EN))) +		flag |= FTRACE_FL_REGS; + +	/* If the state of this record hasn't changed, then do nothing */ +	if ((rec->flags & FTRACE_FL_ENABLED) == flag) +		return FTRACE_UPDATE_IGNORE; + +	if (flag) { +		/* Save off if rec is being enabled (for return value) */ +		flag ^= rec->flags & FTRACE_FL_ENABLED; + +		if (update) { +			rec->flags |= FTRACE_FL_ENABLED; +			if (flag & FTRACE_FL_REGS) { +				if (rec->flags & FTRACE_FL_REGS) +					rec->flags |= FTRACE_FL_REGS_EN; +				else +					rec->flags &= ~FTRACE_FL_REGS_EN; +			} +		} + +		/* +		 * If this record is being updated from a nop, then +		 *   return UPDATE_MAKE_CALL. +		 * Otherwise, +		 *   return UPDATE_MODIFY_CALL to tell the caller to convert +		 *   from the save regs, to a non-save regs function or +		 *   vice versa. +		 */ +		if (flag & FTRACE_FL_ENABLED) +			return FTRACE_UPDATE_MAKE_CALL; + +		return FTRACE_UPDATE_MODIFY_CALL; +	} + +	if (update) { +		/* If there's no more users, clear all flags */ +		if (!(rec->flags & ~FTRACE_FL_MASK)) +			rec->flags = 0; +		else +			/* Just disable the record (keep REGS state) */ +			rec->flags &= ~FTRACE_FL_ENABLED; +	} -/* Return 1 if the address range is reserved for ftrace */ -int ftrace_text_reserved(void *start, void *end) +	return FTRACE_UPDATE_MAKE_NOP; +} + +/** + * ftrace_update_record, set a record that now is tracing or not + * @rec: the record to update + * @enable: set to 1 if the record is tracing, zero to force disable + * + * The records that represent all functions that can be traced need + * to be updated when tracing has been enabled. + */ +int ftrace_update_record(struct dyn_ftrace *rec, int enable)  { -	struct dyn_ftrace *rec; -	struct ftrace_page *pg; +	return ftrace_check_record(rec, enable, 1); +} -	do_for_each_ftrace_rec(pg, rec) { -		if (rec->ip <= (unsigned long)end && -		    rec->ip + MCOUNT_INSN_SIZE > (unsigned long)start) -			return 1; -	} while_for_each_ftrace_rec(); -	return 0; +/** + * ftrace_test_record, check if the record has been enabled or not + * @rec: the record to test + * @enable: set to 1 to check if enabled, 0 if it is disabled + * + * The arch code may need to test if a record is already set to + * tracing to determine how to modify the function code that it + * represents. + */ +int ftrace_test_record(struct dyn_ftrace *rec, int enable) +{ +	return ftrace_check_record(rec, enable, 0);  } +/** + * ftrace_get_addr_new - Get the call address to set to + * @rec:  The ftrace record descriptor + * + * If the record has the FTRACE_FL_REGS set, that means that it + * wants to convert to a callback that saves all regs. If FTRACE_FL_REGS + * is not not set, then it wants to convert to the normal callback. + * + * Returns the address of the trampoline to set to + */ +unsigned long ftrace_get_addr_new(struct dyn_ftrace *rec) +{ +	if (rec->flags & FTRACE_FL_REGS) +		return (unsigned long)FTRACE_REGS_ADDR; +	else +		return (unsigned long)FTRACE_ADDR; +} + +/** + * ftrace_get_addr_curr - Get the call address that is already there + * @rec:  The ftrace record descriptor + * + * The FTRACE_FL_REGS_EN is set when the record already points to + * a function that saves all the regs. Basically the '_EN' version + * represents the current state of the function. + * + * Returns the address of the trampoline that is currently being called + */ +unsigned long ftrace_get_addr_curr(struct dyn_ftrace *rec) +{ +	if (rec->flags & FTRACE_FL_REGS_EN) +		return (unsigned long)FTRACE_REGS_ADDR; +	else +		return (unsigned long)FTRACE_ADDR; +}  static int  __ftrace_replace_code(struct dyn_ftrace *rec, int enable)  { +	unsigned long ftrace_old_addr;  	unsigned long ftrace_addr; -	unsigned long flag = 0UL; +	int ret; -	ftrace_addr = (unsigned long)FTRACE_ADDR; +	ftrace_addr = ftrace_get_addr_new(rec); -	/* -	 * If this record is not to be traced or we want to disable it, -	 * then disable it. -	 * -	 * If we want to enable it and filtering is off, then enable it. -	 * -	 * If we want to enable it and filtering is on, enable it only if -	 * it's filtered -	 */ -	if (enable && !(rec->flags & FTRACE_FL_NOTRACE)) { -		if (!ftrace_filtered || (rec->flags & FTRACE_FL_FILTER)) -			flag = FTRACE_FL_ENABLED; -	} +	/* This needs to be done before we call ftrace_update_record */ +	ftrace_old_addr = ftrace_get_addr_curr(rec); -	/* If the state of this record hasn't changed, then do nothing */ -	if ((rec->flags & FTRACE_FL_ENABLED) == flag) +	ret = ftrace_update_record(rec, enable); + +	switch (ret) { +	case FTRACE_UPDATE_IGNORE:  		return 0; -	if (flag) { -		rec->flags |= FTRACE_FL_ENABLED; +	case FTRACE_UPDATE_MAKE_CALL:  		return ftrace_make_call(rec, ftrace_addr); + +	case FTRACE_UPDATE_MAKE_NOP: +		return ftrace_make_nop(NULL, rec, ftrace_addr); + +	case FTRACE_UPDATE_MODIFY_CALL: +		return ftrace_modify_call(rec, ftrace_old_addr, ftrace_addr);  	} -	rec->flags &= ~FTRACE_FL_ENABLED; -	return ftrace_make_nop(NULL, rec, ftrace_addr); +	return -1; /* unknow ftrace bug */  } -static void ftrace_replace_code(int enable) +void __weak ftrace_replace_code(int enable)  {  	struct dyn_ftrace *rec;  	struct ftrace_page *pg;  	int failed; -	do_for_each_ftrace_rec(pg, rec) { -		/* -		 * Skip over free records, records that have -		 * failed and not converted. -		 */ -		if (rec->flags & FTRACE_FL_FREE || -		    rec->flags & FTRACE_FL_FAILED || -		    !(rec->flags & FTRACE_FL_CONVERTED)) -			continue; +	if (unlikely(ftrace_disabled)) +		return; +	do_for_each_ftrace_rec(pg, rec) {  		failed = __ftrace_replace_code(rec, enable);  		if (failed) { -			rec->flags |= FTRACE_FL_FAILED;  			ftrace_bug(failed, rec->ip);  			/* Stop processing */  			return; @@ -1099,6 +1837,78 @@ static void ftrace_replace_code(int enable)  	} while_for_each_ftrace_rec();  } +struct ftrace_rec_iter { +	struct ftrace_page	*pg; +	int			index; +}; + +/** + * ftrace_rec_iter_start, start up iterating over traced functions + * + * Returns an iterator handle that is used to iterate over all + * the records that represent address locations where functions + * are traced. + * + * May return NULL if no records are available. + */ +struct ftrace_rec_iter *ftrace_rec_iter_start(void) +{ +	/* +	 * We only use a single iterator. +	 * Protected by the ftrace_lock mutex. +	 */ +	static struct ftrace_rec_iter ftrace_rec_iter; +	struct ftrace_rec_iter *iter = &ftrace_rec_iter; + +	iter->pg = ftrace_pages_start; +	iter->index = 0; + +	/* Could have empty pages */ +	while (iter->pg && !iter->pg->index) +		iter->pg = iter->pg->next; + +	if (!iter->pg) +		return NULL; + +	return iter; +} + +/** + * ftrace_rec_iter_next, get the next record to process. + * @iter: The handle to the iterator. + * + * Returns the next iterator after the given iterator @iter. + */ +struct ftrace_rec_iter *ftrace_rec_iter_next(struct ftrace_rec_iter *iter) +{ +	iter->index++; + +	if (iter->index >= iter->pg->index) { +		iter->pg = iter->pg->next; +		iter->index = 0; + +		/* Could have empty pages */ +		while (iter->pg && !iter->pg->index) +			iter->pg = iter->pg->next; +	} + +	if (!iter->pg) +		return NULL; + +	return iter; +} + +/** + * ftrace_rec_iter_record, get the record at the iterator location + * @iter: The current iterator location + * + * Returns the record that the current @iter is at. + */ +struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter) +{ +	return &iter->pg->records[iter->index]; +} +  static int  ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec)  { @@ -1107,10 +1917,12 @@ ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec)  	ip = rec->ip; +	if (unlikely(ftrace_disabled)) +		return 0; +  	ret = ftrace_make_nop(mod, rec, MCOUNT_ADDR);  	if (ret) {  		ftrace_bug(ret, ip); -		rec->flags |= FTRACE_FL_FAILED;  		return 0;  	}  	return 1; @@ -1134,26 +1946,83 @@ int __weak ftrace_arch_code_modify_post_process(void)  	return 0;  } -static int __ftrace_modify_code(void *data) +void ftrace_modify_all_code(int command)  { -	int *command = data; +	int update = command & FTRACE_UPDATE_TRACE_FUNC; +	int err = 0; + +	/* +	 * If the ftrace_caller calls a ftrace_ops func directly, +	 * we need to make sure that it only traces functions it +	 * expects to trace. When doing the switch of functions, +	 * we need to update to the ftrace_ops_list_func first +	 * before the transition between old and new calls are set, +	 * as the ftrace_ops_list_func will check the ops hashes +	 * to make sure the ops are having the right functions +	 * traced. +	 */ +	if (update) { +		err = ftrace_update_ftrace_func(ftrace_ops_list_func); +		if (FTRACE_WARN_ON(err)) +			return; +	} -	if (*command & FTRACE_ENABLE_CALLS) +	if (command & FTRACE_UPDATE_CALLS)  		ftrace_replace_code(1); -	else if (*command & FTRACE_DISABLE_CALLS) +	else if (command & FTRACE_DISABLE_CALLS)  		ftrace_replace_code(0); -	if (*command & FTRACE_UPDATE_TRACE_FUNC) -		ftrace_update_ftrace_func(ftrace_trace_function); +	if (update && ftrace_trace_function != ftrace_ops_list_func) { +		function_trace_op = set_function_trace_op; +		smp_wmb(); +		/* If irqs are disabled, we are in stop machine */ +		if (!irqs_disabled()) +			smp_call_function(ftrace_sync_ipi, NULL, 1); +		err = ftrace_update_ftrace_func(ftrace_trace_function); +		if (FTRACE_WARN_ON(err)) +			return; +	} -	if (*command & FTRACE_START_FUNC_RET) -		ftrace_enable_ftrace_graph_caller(); -	else if (*command & FTRACE_STOP_FUNC_RET) -		ftrace_disable_ftrace_graph_caller(); +	if (command & FTRACE_START_FUNC_RET) +		err = ftrace_enable_ftrace_graph_caller(); +	else if (command & FTRACE_STOP_FUNC_RET) +		err = ftrace_disable_ftrace_graph_caller(); +	FTRACE_WARN_ON(err); +} + +static int __ftrace_modify_code(void *data) +{ +	int *command = data; + +	ftrace_modify_all_code(*command);  	return 0;  } +/** + * ftrace_run_stop_machine, go back to the stop machine method + * @command: The command to tell ftrace what to do + * + * If an arch needs to fall back to the stop machine method, the + * it can call this function. + */ +void ftrace_run_stop_machine(int command) +{ +	stop_machine(__ftrace_modify_code, &command, NULL); +} + +/** + * arch_ftrace_update_code, modify the code to trace or not trace + * @command: The command that needs to be done + * + * Archs can override this function if it does not need to + * run stop_machine() to modify code. + */ +void __weak arch_ftrace_update_code(int command) +{ +	ftrace_run_stop_machine(command); +} +  static void ftrace_run_update_code(int command)  {  	int ret; @@ -1162,8 +2031,21 @@ static void ftrace_run_update_code(int command)  	FTRACE_WARN_ON(ret);  	if (ret)  		return; +	/* +	 * Do not call function tracer while we update the code. +	 * We are in stop machine. +	 */ +	function_trace_stop++; -	stop_machine(__ftrace_modify_code, &command, NULL); +	/* +	 * By default we use stop_machine() to modify the code. +	 * But archs can do what ever they want as long as it +	 * is safe. The stop_machine() is the safest, but also +	 * produces the most overhead. +	 */ +	arch_ftrace_update_code(command); + +	function_trace_stop--;  	ret = ftrace_arch_code_modify_post_process();  	FTRACE_WARN_ON(ret); @@ -1171,6 +2053,12 @@ static void ftrace_run_update_code(int command)  static ftrace_func_t saved_ftrace_func;  static int ftrace_start_up; +static int global_start_up; + +static void control_ops_free(struct ftrace_ops *ops) +{ +	free_percpu(ops->disabled); +}  static void ftrace_startup_enable(int command)  { @@ -1185,21 +2073,39 @@ static void ftrace_startup_enable(int command)  	ftrace_run_update_code(command);  } -static void ftrace_startup(int command) +static int ftrace_startup(struct ftrace_ops *ops, int command)  { +	int ret; +  	if (unlikely(ftrace_disabled)) -		return; +		return -ENODEV; + +	ret = __register_ftrace_function(ops); +	if (ret) +		return ret;  	ftrace_start_up++; -	command |= FTRACE_ENABLE_CALLS; +	command |= FTRACE_UPDATE_CALLS; + +	ops->flags |= FTRACE_OPS_FL_ENABLED; + +	ftrace_hash_rec_enable(ops, 1);  	ftrace_startup_enable(command); + +	return 0;  } -static void ftrace_shutdown(int command) +static int ftrace_shutdown(struct ftrace_ops *ops, int command)  { +	int ret; +  	if (unlikely(ftrace_disabled)) -		return; +		return -ENODEV; + +	ret = __unregister_ftrace_function(ops); +	if (ret) +		return ret;  	ftrace_start_up--;  	/* @@ -1209,18 +2115,54 @@ static void ftrace_shutdown(int command)  	 */  	WARN_ON_ONCE(ftrace_start_up < 0); -	if (!ftrace_start_up) -		command |= FTRACE_DISABLE_CALLS; +	ftrace_hash_rec_disable(ops, 1); + +	if (!global_start_up) +		ops->flags &= ~FTRACE_OPS_FL_ENABLED; + +	command |= FTRACE_UPDATE_CALLS;  	if (saved_ftrace_func != ftrace_trace_function) {  		saved_ftrace_func = ftrace_trace_function;  		command |= FTRACE_UPDATE_TRACE_FUNC;  	} -	if (!command || !ftrace_enabled) -		return; +	if (!command || !ftrace_enabled) { +		/* +		 * If these are control ops, they still need their +		 * per_cpu field freed. Since, function tracing is +		 * not currently active, we can just free them +		 * without synchronizing all CPUs. +		 */ +		if (ops->flags & FTRACE_OPS_FL_CONTROL) +			control_ops_free(ops); +		return 0; +	}  	ftrace_run_update_code(command); + +	/* +	 * Dynamic ops may be freed, we must make sure that all +	 * callers are done before leaving this function. +	 * The same goes for freeing the per_cpu data of the control +	 * ops. +	 * +	 * Again, normal synchronize_sched() is not good enough. +	 * We need to do a hard force of sched synchronization. +	 * This is because we use preempt_disable() to do RCU, but +	 * the function tracers can be called where RCU is not watching +	 * (like before user_exit()). We can not rely on the RCU +	 * infrastructure to do the synchronization, thus we must do it +	 * ourselves. +	 */ +	if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_CONTROL)) { +		schedule_on_each_cpu(ftrace_sync); + +		if (ops->flags & FTRACE_OPS_FL_CONTROL) +			control_ops_free(ops); +	} + +	return 0;  }  static void ftrace_startup_sysctl(void) @@ -1232,7 +2174,7 @@ static void ftrace_startup_sysctl(void)  	saved_ftrace_func = NULL;  	/* ftrace_start_up is true if we want ftrace running */  	if (ftrace_start_up) -		ftrace_run_update_code(FTRACE_ENABLE_CALLS); +		ftrace_run_update_code(FTRACE_UPDATE_CALLS);  }  static void ftrace_shutdown_sysctl(void) @@ -1246,115 +2188,226 @@ static void ftrace_shutdown_sysctl(void)  }  static cycle_t		ftrace_update_time; -static unsigned long	ftrace_update_cnt;  unsigned long		ftrace_update_tot_cnt; -static int ftrace_update_code(struct module *mod) +static inline int ops_traces_mod(struct ftrace_ops *ops) +{ +	/* +	 * Filter_hash being empty will default to trace module. +	 * But notrace hash requires a test of individual module functions. +	 */ +	return ftrace_hash_empty(ops->filter_hash) && +		ftrace_hash_empty(ops->notrace_hash); +} + +/* + * Check if the current ops references the record. + * + * If the ops traces all functions, then it was already accounted for. + * If the ops does not trace the current record function, skip it. + * If the ops ignores the function via notrace filter, skip it. + */ +static inline bool +ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec)  { +	/* If ops isn't enabled, ignore it */ +	if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) +		return 0; + +	/* If ops traces all mods, we already accounted for it */ +	if (ops_traces_mod(ops)) +		return 0; + +	/* The function must be in the filter */ +	if (!ftrace_hash_empty(ops->filter_hash) && +	    !ftrace_lookup_ip(ops->filter_hash, rec->ip)) +		return 0; + +	/* If in notrace hash, we ignore it too */ +	if (ftrace_lookup_ip(ops->notrace_hash, rec->ip)) +		return 0; + +	return 1; +} + +static int referenced_filters(struct dyn_ftrace *rec) +{ +	struct ftrace_ops *ops; +	int cnt = 0; + +	for (ops = ftrace_ops_list; ops != &ftrace_list_end; ops = ops->next) { +		if (ops_references_rec(ops, rec)) +		    cnt++; +	} + +	return cnt; +} + +static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs) +{ +	struct ftrace_page *pg;  	struct dyn_ftrace *p;  	cycle_t start, stop; +	unsigned long update_cnt = 0; +	unsigned long ref = 0; +	bool test = false; +	int i; + +	/* +	 * When adding a module, we need to check if tracers are +	 * currently enabled and if they are set to trace all functions. +	 * If they are, we need to enable the module functions as well +	 * as update the reference counts for those function records. +	 */ +	if (mod) { +		struct ftrace_ops *ops; + +		for (ops = ftrace_ops_list; +		     ops != &ftrace_list_end; ops = ops->next) { +			if (ops->flags & FTRACE_OPS_FL_ENABLED) { +				if (ops_traces_mod(ops)) +					ref++; +				else +					test = true; +			} +		} +	}  	start = ftrace_now(raw_smp_processor_id()); -	ftrace_update_cnt = 0; -	while (ftrace_new_addrs) { +	for (pg = new_pgs; pg; pg = pg->next) { -		/* If something went wrong, bail without enabling anything */ -		if (unlikely(ftrace_disabled)) -			return -1; +		for (i = 0; i < pg->index; i++) { +			int cnt = ref; -		p = ftrace_new_addrs; -		ftrace_new_addrs = p->newlist; -		p->flags = 0L; +			/* If something went wrong, bail without enabling anything */ +			if (unlikely(ftrace_disabled)) +				return -1; -		/* -		 * Do the initial record convertion from mcount jump -		 * to the NOP instructions. -		 */ -		if (!ftrace_code_disable(mod, p)) { -			ftrace_free_rec(p); -			continue; -		} +			p = &pg->records[i]; +			if (test) +				cnt += referenced_filters(p); +			p->flags = cnt; -		p->flags |= FTRACE_FL_CONVERTED; -		ftrace_update_cnt++; +			/* +			 * Do the initial record conversion from mcount jump +			 * to the NOP instructions. +			 */ +			if (!ftrace_code_disable(mod, p)) +				break; -		/* -		 * If the tracing is enabled, go ahead and enable the record. -		 * -		 * The reason not to enable the record immediatelly is the -		 * inherent check of ftrace_make_nop/ftrace_make_call for -		 * correct previous instructions.  Making first the NOP -		 * conversion puts the module to the correct state, thus -		 * passing the ftrace_make_call check. -		 */ -		if (ftrace_start_up) { -			int failed = __ftrace_replace_code(p, 1); -			if (failed) { -				ftrace_bug(failed, p->ip); -				ftrace_free_rec(p); +			update_cnt++; + +			/* +			 * If the tracing is enabled, go ahead and enable the record. +			 * +			 * The reason not to enable the record immediatelly is the +			 * inherent check of ftrace_make_nop/ftrace_make_call for +			 * correct previous instructions.  Making first the NOP +			 * conversion puts the module to the correct state, thus +			 * passing the ftrace_make_call check. +			 */ +			if (ftrace_start_up && cnt) { +				int failed = __ftrace_replace_code(p, 1); +				if (failed) +					ftrace_bug(failed, p->ip);  			}  		}  	}  	stop = ftrace_now(raw_smp_processor_id());  	ftrace_update_time = stop - start; -	ftrace_update_tot_cnt += ftrace_update_cnt; +	ftrace_update_tot_cnt += update_cnt;  	return 0;  } -static int __init ftrace_dyn_table_alloc(unsigned long num_to_init) +static int ftrace_allocate_records(struct ftrace_page *pg, int count)  { -	struct ftrace_page *pg; +	int order;  	int cnt; -	int i; -	/* allocate a few pages */ -	ftrace_pages_start = (void *)get_zeroed_page(GFP_KERNEL); -	if (!ftrace_pages_start) -		return -1; +	if (WARN_ON(!count)) +		return -EINVAL; + +	order = get_count_order(DIV_ROUND_UP(count, ENTRIES_PER_PAGE));  	/* -	 * Allocate a few more pages. -	 * -	 * TODO: have some parser search vmlinux before -	 *   final linking to find all calls to ftrace. -	 *   Then we can: -	 *    a) know how many pages to allocate. -	 *     and/or -	 *    b) set up the table then. -	 * -	 *  The dynamic code is still necessary for -	 *  modules. +	 * We want to fill as much as possible. No more than a page +	 * may be empty.  	 */ +	while ((PAGE_SIZE << order) / ENTRY_SIZE >= count + ENTRIES_PER_PAGE) +		order--; -	pg = ftrace_pages = ftrace_pages_start; + again: +	pg->records = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order); + +	if (!pg->records) { +		/* if we can't allocate this size, try something smaller */ +		if (!order) +			return -ENOMEM; +		order >>= 1; +		goto again; +	} -	cnt = num_to_init / ENTRIES_PER_PAGE; -	pr_info("ftrace: allocating %ld entries in %d pages\n", -		num_to_init, cnt + 1); +	cnt = (PAGE_SIZE << order) / ENTRY_SIZE; +	pg->size = cnt; -	for (i = 0; i < cnt; i++) { -		pg->next = (void *)get_zeroed_page(GFP_KERNEL); +	if (cnt > count) +		cnt = count; -		/* If we fail, we'll try later anyway */ -		if (!pg->next) +	return cnt; +} + +static struct ftrace_page * +ftrace_allocate_pages(unsigned long num_to_init) +{ +	struct ftrace_page *start_pg; +	struct ftrace_page *pg; +	int order; +	int cnt; + +	if (!num_to_init) +		return 0; + +	start_pg = pg = kzalloc(sizeof(*pg), GFP_KERNEL); +	if (!pg) +		return NULL; + +	/* +	 * Try to allocate as much as possible in one continues +	 * location that fills in all of the space. We want to +	 * waste as little space as possible. +	 */ +	for (;;) { +		cnt = ftrace_allocate_records(pg, num_to_init); +		if (cnt < 0) +			goto free_pages; + +		num_to_init -= cnt; +		if (!num_to_init)  			break; +		pg->next = kzalloc(sizeof(*pg), GFP_KERNEL); +		if (!pg->next) +			goto free_pages; +  		pg = pg->next;  	} -	return 0; -} +	return start_pg; -enum { -	FTRACE_ITER_FILTER	= (1 << 0), -	FTRACE_ITER_NOTRACE	= (1 << 1), -	FTRACE_ITER_FAILURES	= (1 << 2), -	FTRACE_ITER_PRINTALL	= (1 << 3), -	FTRACE_ITER_HASH	= (1 << 4), -}; + free_pages: +	while (start_pg) { +		order = get_count_order(pg->size / ENTRIES_PER_PAGE); +		free_pages((unsigned long)pg->records, order); +		start_pg = pg->next; +		kfree(pg); +		pg = start_pg; +	} +	pr_info("ftrace: FAILED to allocate memory for functions\n"); +	return NULL; +}  #define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */ @@ -1365,6 +2418,8 @@ struct ftrace_iterator {  	struct dyn_ftrace		*func;  	struct ftrace_func_probe	*probe;  	struct trace_parser		parser; +	struct ftrace_hash		*hash; +	struct ftrace_ops		*ops;  	int				hidx;  	int				idx;  	unsigned			flags; @@ -1418,6 +2473,9 @@ static void *t_hash_start(struct seq_file *m, loff_t *pos)  	void *p = NULL;  	loff_t l; +	if (!(iter->flags & FTRACE_ITER_DO_HASH)) +		return NULL; +  	if (iter->func_pos > *pos)  		return NULL; @@ -1461,13 +2519,17 @@ static void *  t_next(struct seq_file *m, void *v, loff_t *pos)  {  	struct ftrace_iterator *iter = m->private; +	struct ftrace_ops *ops = iter->ops;  	struct dyn_ftrace *rec = NULL; +	if (unlikely(ftrace_disabled)) +		return NULL; +  	if (iter->flags & FTRACE_ITER_HASH)  		return t_hash_next(m, pos);  	(*pos)++; -	iter->pos = *pos; +	iter->pos = iter->func_pos = *pos;  	if (iter->flags & FTRACE_ITER_PRINTALL)  		return t_hash_start(m, pos); @@ -1481,19 +2543,15 @@ t_next(struct seq_file *m, void *v, loff_t *pos)  		}  	} else {  		rec = &iter->pg->records[iter->idx++]; -		if ((rec->flags & FTRACE_FL_FREE) || +		if (((iter->flags & FTRACE_ITER_FILTER) && +		     !(ftrace_lookup_ip(ops->filter_hash, rec->ip))) || -		    (!(iter->flags & FTRACE_ITER_FAILURES) && -		     (rec->flags & FTRACE_FL_FAILED)) || - -		    ((iter->flags & FTRACE_ITER_FAILURES) && -		     !(rec->flags & FTRACE_FL_FAILED)) || +		    ((iter->flags & FTRACE_ITER_NOTRACE) && +		     !ftrace_lookup_ip(ops->notrace_hash, rec->ip)) || -		    ((iter->flags & FTRACE_ITER_FILTER) && -		     !(rec->flags & FTRACE_FL_FILTER)) || +		    ((iter->flags & FTRACE_ITER_ENABLED) && +		     !(rec->flags & FTRACE_FL_ENABLED))) { -		    ((iter->flags & FTRACE_ITER_NOTRACE) && -		     !(rec->flags & FTRACE_FL_NOTRACE))) {  			rec = NULL;  			goto retry;  		} @@ -1502,7 +2560,6 @@ t_next(struct seq_file *m, void *v, loff_t *pos)  	if (!rec)  		return t_hash_start(m, pos); -	iter->func_pos = *pos;  	iter->func = rec;  	return iter; @@ -1512,16 +2569,21 @@ static void reset_iter_read(struct ftrace_iterator *iter)  {  	iter->pos = 0;  	iter->func_pos = 0; -	iter->flags &= ~(FTRACE_ITER_PRINTALL & FTRACE_ITER_HASH); +	iter->flags &= ~(FTRACE_ITER_PRINTALL | FTRACE_ITER_HASH);  }  static void *t_start(struct seq_file *m, loff_t *pos)  {  	struct ftrace_iterator *iter = m->private; +	struct ftrace_ops *ops = iter->ops;  	void *p = NULL;  	loff_t l;  	mutex_lock(&ftrace_lock); + +	if (unlikely(ftrace_disabled)) +		return NULL; +  	/*  	 * If an lseek was done, then reset and start from beginning.  	 */ @@ -1533,7 +2595,8 @@ static void *t_start(struct seq_file *m, loff_t *pos)  	 * off, we can short cut and just print out that all  	 * functions are enabled.  	 */ -	if (iter->flags & FTRACE_ITER_FILTER && !ftrace_filtered) { +	if (iter->flags & FTRACE_ITER_FILTER && +	    ftrace_hash_empty(ops->filter_hash)) {  		if (*pos > 0)  			return t_hash_start(m, pos);  		iter->flags |= FTRACE_ITER_PRINTALL; @@ -1558,12 +2621,8 @@ static void *t_start(struct seq_file *m, loff_t *pos)  			break;  	} -	if (!p) { -		if (iter->flags & FTRACE_ITER_FILTER) -			return t_hash_start(m, pos); - -		return NULL; -	} +	if (!p) +		return t_hash_start(m, pos);  	return iter;  } @@ -1591,7 +2650,12 @@ static int t_show(struct seq_file *m, void *v)  	if (!rec)  		return 0; -	seq_printf(m, "%ps\n", (void *)rec->ip); +	seq_printf(m, "%ps", (void *)rec->ip); +	if (iter->flags & FTRACE_ITER_ENABLED) +		seq_printf(m, " (%ld)%s", +			   rec->flags & ~FTRACE_FL_MASK, +			   rec->flags & FTRACE_FL_REGS ? " R" : ""); +	seq_printf(m, "\n");  	return 0;  } @@ -1607,70 +2671,70 @@ static int  ftrace_avail_open(struct inode *inode, struct file *file)  {  	struct ftrace_iterator *iter; -	int ret;  	if (unlikely(ftrace_disabled))  		return -ENODEV; -	iter = kzalloc(sizeof(*iter), GFP_KERNEL); -	if (!iter) -		return -ENOMEM; - -	iter->pg = ftrace_pages_start; - -	ret = seq_open(file, &show_ftrace_seq_ops); -	if (!ret) { -		struct seq_file *m = file->private_data; - -		m->private = iter; -	} else { -		kfree(iter); +	iter = __seq_open_private(file, &show_ftrace_seq_ops, sizeof(*iter)); +	if (iter) { +		iter->pg = ftrace_pages_start; +		iter->ops = &global_ops;  	} -	return ret; +	return iter ? 0 : -ENOMEM;  }  static int -ftrace_failures_open(struct inode *inode, struct file *file) +ftrace_enabled_open(struct inode *inode, struct file *file)  { -	int ret; -	struct seq_file *m;  	struct ftrace_iterator *iter; -	ret = ftrace_avail_open(inode, file); -	if (!ret) { -		m = file->private_data; -		iter = m->private; -		iter->flags = FTRACE_ITER_FAILURES; +	if (unlikely(ftrace_disabled)) +		return -ENODEV; + +	iter = __seq_open_private(file, &show_ftrace_seq_ops, sizeof(*iter)); +	if (iter) { +		iter->pg = ftrace_pages_start; +		iter->flags = FTRACE_ITER_ENABLED; +		iter->ops = &global_ops;  	} -	return ret; +	return iter ? 0 : -ENOMEM;  } - -static void ftrace_filter_reset(int enable) +static void ftrace_filter_reset(struct ftrace_hash *hash)  { -	struct ftrace_page *pg; -	struct dyn_ftrace *rec; -	unsigned long type = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; -  	mutex_lock(&ftrace_lock); -	if (enable) -		ftrace_filtered = 0; -	do_for_each_ftrace_rec(pg, rec) { -		if (rec->flags & FTRACE_FL_FAILED) -			continue; -		rec->flags &= ~type; -	} while_for_each_ftrace_rec(); +	ftrace_hash_clear(hash);  	mutex_unlock(&ftrace_lock);  } -static int -ftrace_regex_open(struct inode *inode, struct file *file, int enable) +/** + * ftrace_regex_open - initialize function tracer filter files + * @ops: The ftrace_ops that hold the hash filters + * @flag: The type of filter to process + * @inode: The inode, usually passed in to your open routine + * @file: The file, usually passed in to your open routine + * + * ftrace_regex_open() initializes the filter files for the + * @ops. Depending on @flag it may process the filter hash or + * the notrace hash of @ops. With this called from the open + * routine, you can use ftrace_filter_write() for the write + * routine if @flag has FTRACE_ITER_FILTER set, or + * ftrace_notrace_write() if @flag has FTRACE_ITER_NOTRACE set. + * tracing_lseek() should be used as the lseek routine, and + * release must call ftrace_regex_release(). + */ +int +ftrace_regex_open(struct ftrace_ops *ops, int flag, +		  struct inode *inode, struct file *file)  {  	struct ftrace_iterator *iter; +	struct ftrace_hash *hash;  	int ret = 0; +	ftrace_ops_init(ops); +  	if (unlikely(ftrace_disabled))  		return -ENODEV; @@ -1683,27 +2747,48 @@ ftrace_regex_open(struct inode *inode, struct file *file, int enable)  		return -ENOMEM;  	} -	mutex_lock(&ftrace_regex_lock); +	iter->ops = ops; +	iter->flags = flag; + +	mutex_lock(&ops->regex_lock); + +	if (flag & FTRACE_ITER_NOTRACE) +		hash = ops->notrace_hash; +	else +		hash = ops->filter_hash; + +	if (file->f_mode & FMODE_WRITE) { +		iter->hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, hash); +		if (!iter->hash) { +			trace_parser_put(&iter->parser); +			kfree(iter); +			ret = -ENOMEM; +			goto out_unlock; +		} +	} +  	if ((file->f_mode & FMODE_WRITE) &&  	    (file->f_flags & O_TRUNC)) -		ftrace_filter_reset(enable); +		ftrace_filter_reset(iter->hash);  	if (file->f_mode & FMODE_READ) {  		iter->pg = ftrace_pages_start; -		iter->flags = enable ? FTRACE_ITER_FILTER : -			FTRACE_ITER_NOTRACE;  		ret = seq_open(file, &show_ftrace_seq_ops);  		if (!ret) {  			struct seq_file *m = file->private_data;  			m->private = iter;  		} else { +			/* Failed */ +			free_ftrace_hash(iter->hash);  			trace_parser_put(&iter->parser);  			kfree(iter);  		}  	} else  		file->private_data = iter; -	mutex_unlock(&ftrace_regex_lock); + + out_unlock: +	mutex_unlock(&ops->regex_lock);  	return ret;  } @@ -1711,26 +2796,20 @@ ftrace_regex_open(struct inode *inode, struct file *file, int enable)  static int  ftrace_filter_open(struct inode *inode, struct file *file)  { -	return ftrace_regex_open(inode, file, 1); +	struct ftrace_ops *ops = inode->i_private; + +	return ftrace_regex_open(ops, +			FTRACE_ITER_FILTER | FTRACE_ITER_DO_HASH, +			inode, file);  }  static int  ftrace_notrace_open(struct inode *inode, struct file *file)  { -	return ftrace_regex_open(inode, file, 0); -} - -static loff_t -ftrace_regex_lseek(struct file *file, loff_t offset, int origin) -{ -	loff_t ret; - -	if (file->f_mode & FMODE_READ) -		ret = seq_lseek(file, offset, origin); -	else -		file->f_pos = ret = 1; +	struct ftrace_ops *ops = inode->i_private; -	return ret; +	return ftrace_regex_open(ops, FTRACE_ITER_NOTRACE, +				 inode, file);  }  static int ftrace_match(char *str, char *regex, int len, int type) @@ -1762,86 +2841,98 @@ static int ftrace_match(char *str, char *regex, int len, int type)  }  static int -ftrace_match_record(struct dyn_ftrace *rec, char *regex, int len, int type) +enter_record(struct ftrace_hash *hash, struct dyn_ftrace *rec, int not) +{ +	struct ftrace_func_entry *entry; +	int ret = 0; + +	entry = ftrace_lookup_ip(hash, rec->ip); +	if (not) { +		/* Do nothing if it doesn't exist */ +		if (!entry) +			return 0; + +		free_hash_entry(hash, entry); +	} else { +		/* Do nothing if it exists */ +		if (entry) +			return 0; + +		ret = add_hash_entry(hash, rec->ip); +	} +	return ret; +} + +static int +ftrace_match_record(struct dyn_ftrace *rec, char *mod, +		    char *regex, int len, int type)  {  	char str[KSYM_SYMBOL_LEN]; +	char *modname; + +	kallsyms_lookup(rec->ip, NULL, NULL, &modname, str); + +	if (mod) { +		/* module lookup requires matching the module */ +		if (!modname || strcmp(modname, mod)) +			return 0; + +		/* blank search means to match all funcs in the mod */ +		if (!len) +			return 1; +	} -	kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);  	return ftrace_match(str, regex, len, type);  } -static int ftrace_match_records(char *buff, int len, int enable) +static int +match_records(struct ftrace_hash *hash, char *buff, +	      int len, char *mod, int not)  { -	unsigned int search_len; +	unsigned search_len = 0;  	struct ftrace_page *pg;  	struct dyn_ftrace *rec; -	unsigned long flag; -	char *search; -	int type; -	int not; +	int type = MATCH_FULL; +	char *search = buff;  	int found = 0; +	int ret; -	flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; -	type = filter_parse_regex(buff, len, &search, ¬); - -	search_len = strlen(search); +	if (len) { +		type = filter_parse_regex(buff, len, &search, ¬); +		search_len = strlen(search); +	}  	mutex_lock(&ftrace_lock); -	do_for_each_ftrace_rec(pg, rec) { -		if (rec->flags & FTRACE_FL_FAILED) -			continue; +	if (unlikely(ftrace_disabled)) +		goto out_unlock; -		if (ftrace_match_record(rec, search, search_len, type)) { -			if (not) -				rec->flags &= ~flag; -			else -				rec->flags |= flag; +	do_for_each_ftrace_rec(pg, rec) { +		if (ftrace_match_record(rec, mod, search, search_len, type)) { +			ret = enter_record(hash, rec, not); +			if (ret < 0) { +				found = ret; +				goto out_unlock; +			}  			found = 1;  		} -		/* -		 * Only enable filtering if we have a function that -		 * is filtered on. -		 */ -		if (enable && (rec->flags & FTRACE_FL_FILTER)) -			ftrace_filtered = 1;  	} while_for_each_ftrace_rec(); + out_unlock:  	mutex_unlock(&ftrace_lock);  	return found;  }  static int -ftrace_match_module_record(struct dyn_ftrace *rec, char *mod, -			   char *regex, int len, int type) +ftrace_match_records(struct ftrace_hash *hash, char *buff, int len)  { -	char str[KSYM_SYMBOL_LEN]; -	char *modname; - -	kallsyms_lookup(rec->ip, NULL, NULL, &modname, str); - -	if (!modname || strcmp(modname, mod)) -		return 0; - -	/* blank search means to match all funcs in the mod */ -	if (len) -		return ftrace_match(str, regex, len, type); -	else -		return 1; +	return match_records(hash, buff, len, NULL, 0);  } -static int ftrace_match_module_records(char *buff, char *mod, int enable) +static int +ftrace_match_module_records(struct ftrace_hash *hash, char *buff, char *mod)  { -	unsigned search_len = 0; -	struct ftrace_page *pg; -	struct dyn_ftrace *rec; -	int type = MATCH_FULL; -	char *search = buff; -	unsigned long flag;  	int not = 0; -	int found = 0; - -	flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;  	/* blank or '*' mean the same */  	if (strcmp(buff, "*") == 0) @@ -1853,32 +2944,7 @@ static int ftrace_match_module_records(char *buff, char *mod, int enable)  		not = 1;  	} -	if (strlen(buff)) { -		type = filter_parse_regex(buff, strlen(buff), &search, ¬); -		search_len = strlen(search); -	} - -	mutex_lock(&ftrace_lock); -	do_for_each_ftrace_rec(pg, rec) { - -		if (rec->flags & FTRACE_FL_FAILED) -			continue; - -		if (ftrace_match_module_record(rec, mod, -					       search, search_len, type)) { -			if (not) -				rec->flags &= ~flag; -			else -				rec->flags |= flag; -			found = 1; -		} -		if (enable && (rec->flags & FTRACE_FL_FILTER)) -			ftrace_filtered = 1; - -	} while_for_each_ftrace_rec(); -	mutex_unlock(&ftrace_lock); - -	return found; +	return match_records(hash, buff, strlen(buff), mod, not);  }  /* @@ -1887,9 +2953,11 @@ static int ftrace_match_module_records(char *buff, char *mod, int enable)   */  static int -ftrace_mod_callback(char *func, char *cmd, char *param, int enable) +ftrace_mod_callback(struct ftrace_hash *hash, +		    char *func, char *cmd, char *param, int enable)  {  	char *mod; +	int ret = -EINVAL;  	/*  	 * cmd == 'mod' because we only registered this func @@ -1901,15 +2969,19 @@ ftrace_mod_callback(char *func, char *cmd, char *param, int enable)  	/* we must have a module name */  	if (!param) -		return -EINVAL; +		return ret;  	mod = strsep(¶m, ":");  	if (!strlen(mod)) -		return -EINVAL; +		return ret; -	if (ftrace_match_module_records(func, mod, enable)) -		return 0; -	return -EINVAL; +	ret = ftrace_match_module_records(hash, func, mod); +	if (!ret) +		ret = -EINVAL; +	if (ret < 0) +		return ret; + +	return 0;  }  static struct ftrace_func_command ftrace_mod_cmd = { @@ -1921,14 +2993,13 @@ static int __init ftrace_mod_cmd_init(void)  {  	return register_ftrace_command(&ftrace_mod_cmd);  } -device_initcall(ftrace_mod_cmd_init); +core_initcall(ftrace_mod_cmd_init); -static void -function_trace_probe_call(unsigned long ip, unsigned long parent_ip) +static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip, +				      struct ftrace_ops *op, struct pt_regs *pt_regs)  {  	struct ftrace_func_probe *entry;  	struct hlist_head *hhd; -	struct hlist_node *n;  	unsigned long key;  	key = hash_long(ip, FTRACE_HASH_BITS); @@ -1944,7 +3015,7 @@ function_trace_probe_call(unsigned long ip, unsigned long parent_ip)  	 * on the hash. rcu_read_lock is too dangerous here.  	 */  	preempt_disable_notrace(); -	hlist_for_each_entry_rcu(entry, n, hhd, node) { +	hlist_for_each_entry_rcu_notrace(entry, hhd, node) {  		if (entry->ip == ip)  			entry->ops->func(ip, parent_ip, &entry->data);  	} @@ -1954,16 +3025,23 @@ function_trace_probe_call(unsigned long ip, unsigned long parent_ip)  static struct ftrace_ops trace_probe_ops __read_mostly =  {  	.func		= function_trace_probe_call, +	.flags		= FTRACE_OPS_FL_INITIALIZED, +	INIT_REGEX_LOCK(trace_probe_ops)  };  static int ftrace_probe_registered;  static void __enable_ftrace_function_probe(void)  { +	int ret;  	int i; -	if (ftrace_probe_registered) +	if (ftrace_probe_registered) { +		/* still need to update the function call sites */ +		if (ftrace_enabled) +			ftrace_run_update_code(FTRACE_UPDATE_CALLS);  		return; +	}  	for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) {  		struct hlist_head *hhd = &ftrace_func_hash[i]; @@ -1974,8 +3052,8 @@ static void __enable_ftrace_function_probe(void)  	if (i == FTRACE_FUNC_HASHSIZE)  		return; -	__register_ftrace_function(&trace_probe_ops); -	ftrace_startup(0); +	ret = ftrace_startup(&trace_probe_ops, 0); +  	ftrace_probe_registered = 1;  } @@ -1993,34 +3071,33 @@ static void __disable_ftrace_function_probe(void)  	}  	/* no more funcs left */ -	__unregister_ftrace_function(&trace_probe_ops); -	ftrace_shutdown(0); +	ftrace_shutdown(&trace_probe_ops, 0); +  	ftrace_probe_registered = 0;  } -static void ftrace_free_entry_rcu(struct rcu_head *rhp) +static void ftrace_free_entry(struct ftrace_func_probe *entry)  { -	struct ftrace_func_probe *entry = -		container_of(rhp, struct ftrace_func_probe, rcu); -  	if (entry->ops->free) -		entry->ops->free(&entry->data); +		entry->ops->free(entry->ops, entry->ip, &entry->data);  	kfree(entry);  } -  int  register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,  			      void *data)  {  	struct ftrace_func_probe *entry; +	struct ftrace_hash **orig_hash = &trace_probe_ops.filter_hash; +	struct ftrace_hash *hash;  	struct ftrace_page *pg;  	struct dyn_ftrace *rec;  	int type, len, not;  	unsigned long key;  	int count = 0;  	char *search; +	int ret;  	type = filter_parse_regex(glob, strlen(glob), &search, ¬);  	len = strlen(search); @@ -2029,13 +3106,24 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,  	if (WARN_ON(not))  		return -EINVAL; +	mutex_lock(&trace_probe_ops.regex_lock); + +	hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash); +	if (!hash) { +		count = -ENOMEM; +		goto out; +	} + +	if (unlikely(ftrace_disabled)) { +		count = -ENODEV; +		goto out; +	} +  	mutex_lock(&ftrace_lock); -	do_for_each_ftrace_rec(pg, rec) { -		if (rec->flags & FTRACE_FL_FAILED) -			continue; +	do_for_each_ftrace_rec(pg, rec) { -		if (!ftrace_match_record(rec, search, len, type)) +		if (!ftrace_match_record(rec, NULL, search, len, type))  			continue;  		entry = kmalloc(sizeof(*entry), GFP_KERNEL); @@ -2055,14 +3143,21 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,  		 * for each function we find. We call the callback  		 * to give the caller an opportunity to do so.  		 */ -		if (ops->callback) { -			if (ops->callback(rec->ip, &entry->data) < 0) { +		if (ops->init) { +			if (ops->init(ops, rec->ip, &entry->data) < 0) {  				/* caller does not like this func */  				kfree(entry);  				continue;  			}  		} +		ret = enter_record(hash, rec, 0); +		if (ret < 0) { +			kfree(entry); +			count = ret; +			goto out_unlock; +		} +  		entry->ops = ops;  		entry->ip = rec->ip; @@ -2070,10 +3165,18 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,  		hlist_add_head_rcu(&entry->node, &ftrace_func_hash[key]);  	} while_for_each_ftrace_rec(); + +	ret = ftrace_hash_move(&trace_probe_ops, 1, orig_hash, hash); +	if (ret < 0) +		count = ret; +  	__enable_ftrace_function_probe();   out_unlock:  	mutex_unlock(&ftrace_lock); + out: +	mutex_unlock(&trace_probe_ops.regex_lock); +	free_ftrace_hash(hash);  	return count;  } @@ -2087,8 +3190,13 @@ static void  __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,  				  void *data, int flags)  { +	struct ftrace_func_entry *rec_entry;  	struct ftrace_func_probe *entry; -	struct hlist_node *n, *tmp; +	struct ftrace_func_probe *p; +	struct ftrace_hash **orig_hash = &trace_probe_ops.filter_hash; +	struct list_head free_list; +	struct ftrace_hash *hash; +	struct hlist_node *tmp;  	char str[KSYM_SYMBOL_LEN];  	int type = MATCH_FULL;  	int i, len = 0; @@ -2107,11 +3215,19 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,  			return;  	} -	mutex_lock(&ftrace_lock); +	mutex_lock(&trace_probe_ops.regex_lock); + +	hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash); +	if (!hash) +		/* Hmm, should report this somehow */ +		goto out_unlock; + +	INIT_LIST_HEAD(&free_list); +  	for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) {  		struct hlist_head *hhd = &ftrace_func_hash[i]; -		hlist_for_each_entry_safe(entry, n, tmp, hhd, node) { +		hlist_for_each_entry_safe(entry, tmp, hhd, node) {  			/* break up if statements for readability */  			if ((flags & PROBE_TEST_FUNC) && entry->ops != ops) @@ -2128,12 +3244,32 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,  					continue;  			} -			hlist_del(&entry->node); -			call_rcu(&entry->rcu, ftrace_free_entry_rcu); +			rec_entry = ftrace_lookup_ip(hash, entry->ip); +			/* It is possible more than one entry had this ip */ +			if (rec_entry) +				free_hash_entry(hash, rec_entry); + +			hlist_del_rcu(&entry->node); +			list_add(&entry->free_list, &free_list);  		}  	} +	mutex_lock(&ftrace_lock);  	__disable_ftrace_function_probe(); +	/* +	 * Remove after the disable is called. Otherwise, if the last +	 * probe is removed, a null hash means *all enabled*. +	 */ +	ftrace_hash_move(&trace_probe_ops, 1, orig_hash, hash); +	synchronize_sched(); +	list_for_each_entry_safe(entry, p, &free_list, free_list) { +		list_del(&entry->free_list); +		ftrace_free_entry(entry); +	}  	mutex_unlock(&ftrace_lock); +		 + out_unlock: +	mutex_unlock(&trace_probe_ops.regex_lock); +	free_ftrace_hash(hash);  }  void @@ -2158,7 +3294,11 @@ void unregister_ftrace_function_probe_all(char *glob)  static LIST_HEAD(ftrace_commands);  static DEFINE_MUTEX(ftrace_cmd_mutex); -int register_ftrace_command(struct ftrace_func_command *cmd) +/* + * Currently we only register ftrace commands from __init, so mark this + * __init too. + */ +__init int register_ftrace_command(struct ftrace_func_command *cmd)  {  	struct ftrace_func_command *p;  	int ret = 0; @@ -2177,7 +3317,11 @@ int register_ftrace_command(struct ftrace_func_command *cmd)  	return ret;  } -int unregister_ftrace_command(struct ftrace_func_command *cmd) +/* + * Currently we only unregister ftrace commands from __init, so mark + * this __init too. + */ +__init int unregister_ftrace_command(struct ftrace_func_command *cmd)  {  	struct ftrace_func_command *p, *n;  	int ret = -ENODEV; @@ -2196,7 +3340,8 @@ int unregister_ftrace_command(struct ftrace_func_command *cmd)  	return ret;  } -static int ftrace_process_regex(char *buff, int len, int enable) +static int ftrace_process_regex(struct ftrace_hash *hash, +				char *buff, int len, int enable)  {  	char *func, *command, *next = buff;  	struct ftrace_func_command *p; @@ -2205,9 +3350,12 @@ static int ftrace_process_regex(char *buff, int len, int enable)  	func = strsep(&next, ":");  	if (!next) { -		if (ftrace_match_records(func, len, enable)) -			return 0; -		return ret; +		ret = ftrace_match_records(hash, func, len); +		if (!ret) +			ret = -EINVAL; +		if (ret < 0) +			return ret; +		return 0;  	}  	/* command found */ @@ -2217,7 +3365,7 @@ static int ftrace_process_regex(char *buff, int len, int enable)  	mutex_lock(&ftrace_cmd_mutex);  	list_for_each_entry(p, &ftrace_commands, list) {  		if (strcmp(p->name, command) == 0) { -			ret = p->func(func, command, next, enable); +			ret = p->func(hash, func, command, next, enable);  			goto out_unlock;  		}  	} @@ -2238,63 +3386,158 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,  	if (!cnt)  		return 0; -	mutex_lock(&ftrace_regex_lock); -  	if (file->f_mode & FMODE_READ) {  		struct seq_file *m = file->private_data;  		iter = m->private;  	} else  		iter = file->private_data; +	if (unlikely(ftrace_disabled)) +		return -ENODEV; + +	/* iter->hash is a local copy, so we don't need regex_lock */ +  	parser = &iter->parser;  	read = trace_get_user(parser, ubuf, cnt, ppos);  	if (read >= 0 && trace_parser_loaded(parser) &&  	    !trace_parser_cont(parser)) { -		ret = ftrace_process_regex(parser->buffer, +		ret = ftrace_process_regex(iter->hash, parser->buffer,  					   parser->idx, enable);  		trace_parser_clear(parser); -		if (ret) -			goto out_unlock; +		if (ret < 0) +			goto out;  	}  	ret = read; -out_unlock: -	mutex_unlock(&ftrace_regex_lock); - + out:  	return ret;  } -static ssize_t +ssize_t  ftrace_filter_write(struct file *file, const char __user *ubuf,  		    size_t cnt, loff_t *ppos)  {  	return ftrace_regex_write(file, ubuf, cnt, ppos, 1);  } -static ssize_t +ssize_t  ftrace_notrace_write(struct file *file, const char __user *ubuf,  		     size_t cnt, loff_t *ppos)  {  	return ftrace_regex_write(file, ubuf, cnt, ppos, 0);  } -static void -ftrace_set_regex(unsigned char *buf, int len, int reset, int enable) +static int +ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove) +{ +	struct ftrace_func_entry *entry; + +	if (!ftrace_location(ip)) +		return -EINVAL; + +	if (remove) { +		entry = ftrace_lookup_ip(hash, ip); +		if (!entry) +			return -ENOENT; +		free_hash_entry(hash, entry); +		return 0; +	} + +	return add_hash_entry(hash, ip); +} + +static void ftrace_ops_update_code(struct ftrace_ops *ops)  { +	if (ops->flags & FTRACE_OPS_FL_ENABLED && ftrace_enabled) +		ftrace_run_update_code(FTRACE_UPDATE_CALLS); +} + +static int +ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len, +		unsigned long ip, int remove, int reset, int enable) +{ +	struct ftrace_hash **orig_hash; +	struct ftrace_hash *hash; +	int ret; +  	if (unlikely(ftrace_disabled)) -		return; +		return -ENODEV; + +	mutex_lock(&ops->regex_lock); + +	if (enable) +		orig_hash = &ops->filter_hash; +	else +		orig_hash = &ops->notrace_hash; + +	hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash); +	if (!hash) { +		ret = -ENOMEM; +		goto out_regex_unlock; +	} -	mutex_lock(&ftrace_regex_lock);  	if (reset) -		ftrace_filter_reset(enable); -	if (buf) -		ftrace_match_records(buf, len, enable); -	mutex_unlock(&ftrace_regex_lock); +		ftrace_filter_reset(hash); +	if (buf && !ftrace_match_records(hash, buf, len)) { +		ret = -EINVAL; +		goto out_regex_unlock; +	} +	if (ip) { +		ret = ftrace_match_addr(hash, ip, remove); +		if (ret < 0) +			goto out_regex_unlock; +	} + +	mutex_lock(&ftrace_lock); +	ret = ftrace_hash_move(ops, enable, orig_hash, hash); +	if (!ret) +		ftrace_ops_update_code(ops); + +	mutex_unlock(&ftrace_lock); + + out_regex_unlock: +	mutex_unlock(&ops->regex_lock); + +	free_ftrace_hash(hash); +	return ret; +} + +static int +ftrace_set_addr(struct ftrace_ops *ops, unsigned long ip, int remove, +		int reset, int enable) +{ +	return ftrace_set_hash(ops, 0, 0, ip, remove, reset, enable); +} + +/** + * ftrace_set_filter_ip - set a function to filter on in ftrace by address + * @ops - the ops to set the filter with + * @ip - the address to add to or remove from the filter. + * @remove - non zero to remove the ip from the filter + * @reset - non zero to reset all filters before applying this filter. + * + * Filters denote which functions should be enabled when tracing is enabled + * If @ip is NULL, it failes to update filter. + */ +int ftrace_set_filter_ip(struct ftrace_ops *ops, unsigned long ip, +			 int remove, int reset) +{ +	ftrace_ops_init(ops); +	return ftrace_set_addr(ops, ip, remove, reset, 1); +} +EXPORT_SYMBOL_GPL(ftrace_set_filter_ip); + +static int +ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len, +		 int reset, int enable) +{ +	return ftrace_set_hash(ops, buf, len, 0, 0, reset, enable);  }  /**   * ftrace_set_filter - set a function to filter on in ftrace + * @ops - the ops to set the filter with   * @buf - the string that holds the function filter text.   * @len - the length of the string.   * @reset - non zero to reset all filters before applying this filter. @@ -2302,13 +3545,17 @@ ftrace_set_regex(unsigned char *buf, int len, int reset, int enable)   * Filters denote which functions should be enabled when tracing is enabled.   * If @buf is NULL and reset is set, all functions will be enabled for tracing.   */ -void ftrace_set_filter(unsigned char *buf, int len, int reset) +int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf, +		       int len, int reset)  { -	ftrace_set_regex(buf, len, reset, 1); +	ftrace_ops_init(ops); +	return ftrace_set_regex(ops, buf, len, reset, 1);  } +EXPORT_SYMBOL_GPL(ftrace_set_filter);  /**   * ftrace_set_notrace - set a function to not trace in ftrace + * @ops - the ops to set the notrace filter with   * @buf - the string that holds the function notrace text.   * @len - the length of the string.   * @reset - non zero to reset all filters before applying this filter. @@ -2317,10 +3564,43 @@ void ftrace_set_filter(unsigned char *buf, int len, int reset)   * is enabled. If @buf is NULL and reset is set, all functions will be enabled   * for tracing.   */ -void ftrace_set_notrace(unsigned char *buf, int len, int reset) +int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf, +			int len, int reset) +{ +	ftrace_ops_init(ops); +	return ftrace_set_regex(ops, buf, len, reset, 0); +} +EXPORT_SYMBOL_GPL(ftrace_set_notrace); +/** + * ftrace_set_global_filter - set a function to filter on with global tracers + * @buf - the string that holds the function filter text. + * @len - the length of the string. + * @reset - non zero to reset all filters before applying this filter. + * + * Filters denote which functions should be enabled when tracing is enabled. + * If @buf is NULL and reset is set, all functions will be enabled for tracing. + */ +void ftrace_set_global_filter(unsigned char *buf, int len, int reset)  { -	ftrace_set_regex(buf, len, reset, 0); +	ftrace_set_regex(&global_ops, buf, len, reset, 1);  } +EXPORT_SYMBOL_GPL(ftrace_set_global_filter); + +/** + * ftrace_set_global_notrace - set a function to not trace with global tracers + * @buf - the string that holds the function notrace text. + * @len - the length of the string. + * @reset - non zero to reset all filters before applying this filter. + * + * Notrace Filters denote which functions should not be enabled when tracing + * is enabled. If @buf is NULL and reset is set, all functions will be enabled + * for tracing. + */ +void ftrace_set_global_notrace(unsigned char *buf, int len, int reset) +{ +	ftrace_set_regex(&global_ops, buf, len, reset, 0); +} +EXPORT_SYMBOL_GPL(ftrace_set_global_notrace);  /*   * command line interface to allow users to set filters on boot up. @@ -2329,23 +3609,28 @@ void ftrace_set_notrace(unsigned char *buf, int len, int reset)  static char ftrace_notrace_buf[FTRACE_FILTER_SIZE] __initdata;  static char ftrace_filter_buf[FTRACE_FILTER_SIZE] __initdata; +/* Used by function selftest to not test if filter is set */ +bool ftrace_filter_param __initdata; +  static int __init set_ftrace_notrace(char *str)  { -	strncpy(ftrace_notrace_buf, str, FTRACE_FILTER_SIZE); +	ftrace_filter_param = true; +	strlcpy(ftrace_notrace_buf, str, FTRACE_FILTER_SIZE);  	return 1;  }  __setup("ftrace_notrace=", set_ftrace_notrace);  static int __init set_ftrace_filter(char *str)  { -	strncpy(ftrace_filter_buf, str, FTRACE_FILTER_SIZE); +	ftrace_filter_param = true; +	strlcpy(ftrace_filter_buf, str, FTRACE_FILTER_SIZE);  	return 1;  }  __setup("ftrace_filter=", set_ftrace_filter);  #ifdef CONFIG_FUNCTION_GRAPH_TRACER  static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata; -static int ftrace_set_func(unsigned long *array, int *idx, char *buffer); +static int ftrace_set_func(unsigned long *array, int *idx, int size, char *buffer);  static int __init set_graph_function(char *str)  { @@ -2363,7 +3648,7 @@ static void __init set_ftrace_early_graph(char *buf)  		func = strsep(&buf, ",");  		/* we allow only one expression at a time */  		ret = ftrace_set_func(ftrace_graph_funcs, &ftrace_graph_count, -				      func); +				      FTRACE_GRAPH_MAX_FUNCS, func);  		if (ret)  			printk(KERN_DEBUG "ftrace: function %s not "  					  "traceable\n", func); @@ -2371,39 +3656,42 @@ static void __init set_ftrace_early_graph(char *buf)  }  #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ -static void __init set_ftrace_early_filter(char *buf, int enable) +void __init +ftrace_set_early_filter(struct ftrace_ops *ops, char *buf, int enable)  {  	char *func; +	ftrace_ops_init(ops); +  	while (buf) {  		func = strsep(&buf, ","); -		ftrace_set_regex(func, strlen(func), 0, enable); +		ftrace_set_regex(ops, func, strlen(func), 0, enable);  	}  }  static void __init set_ftrace_early_filters(void)  {  	if (ftrace_filter_buf[0]) -		set_ftrace_early_filter(ftrace_filter_buf, 1); +		ftrace_set_early_filter(&global_ops, ftrace_filter_buf, 1);  	if (ftrace_notrace_buf[0]) -		set_ftrace_early_filter(ftrace_notrace_buf, 0); +		ftrace_set_early_filter(&global_ops, ftrace_notrace_buf, 0);  #ifdef CONFIG_FUNCTION_GRAPH_TRACER  	if (ftrace_graph_buf[0])  		set_ftrace_early_graph(ftrace_graph_buf);  #endif /* CONFIG_FUNCTION_GRAPH_TRACER */  } -static int -ftrace_regex_release(struct inode *inode, struct file *file, int enable) +int ftrace_regex_release(struct inode *inode, struct file *file)  {  	struct seq_file *m = (struct seq_file *)file->private_data;  	struct ftrace_iterator *iter; +	struct ftrace_hash **orig_hash;  	struct trace_parser *parser; +	int filter_hash; +	int ret; -	mutex_lock(&ftrace_regex_lock);  	if (file->f_mode & FMODE_READ) {  		iter = m->private; -  		seq_release(inode, file);  	} else  		iter = file->private_data; @@ -2411,31 +3699,35 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable)  	parser = &iter->parser;  	if (trace_parser_loaded(parser)) {  		parser->buffer[parser->idx] = 0; -		ftrace_match_records(parser->buffer, parser->idx, enable); +		ftrace_match_records(iter->hash, parser->buffer, parser->idx);  	} -	mutex_lock(&ftrace_lock); -	if (ftrace_start_up && ftrace_enabled) -		ftrace_run_update_code(FTRACE_ENABLE_CALLS); -	mutex_unlock(&ftrace_lock); -  	trace_parser_put(parser); -	kfree(iter); -	mutex_unlock(&ftrace_regex_lock); -	return 0; -} +	mutex_lock(&iter->ops->regex_lock); -static int -ftrace_filter_release(struct inode *inode, struct file *file) -{ -	return ftrace_regex_release(inode, file, 1); -} +	if (file->f_mode & FMODE_WRITE) { +		filter_hash = !!(iter->flags & FTRACE_ITER_FILTER); -static int -ftrace_notrace_release(struct inode *inode, struct file *file) -{ -	return ftrace_regex_release(inode, file, 0); +		if (filter_hash) +			orig_hash = &iter->ops->filter_hash; +		else +			orig_hash = &iter->ops->notrace_hash; + +		mutex_lock(&ftrace_lock); +		ret = ftrace_hash_move(iter->ops, filter_hash, +				       orig_hash, iter->hash); +		if (!ret) +			ftrace_ops_update_code(iter->ops); + +		mutex_unlock(&ftrace_lock); +	} + +	mutex_unlock(&iter->ops->regex_lock); +	free_ftrace_hash(iter->hash); +	kfree(iter); + +	return 0;  }  static const struct file_operations ftrace_avail_fops = { @@ -2445,8 +3737,8 @@ static const struct file_operations ftrace_avail_fops = {  	.release = seq_release_private,  }; -static const struct file_operations ftrace_failures_fops = { -	.open = ftrace_failures_open, +static const struct file_operations ftrace_enabled_fops = { +	.open = ftrace_enabled_open,  	.read = seq_read,  	.llseek = seq_lseek,  	.release = seq_release_private, @@ -2456,16 +3748,16 @@ static const struct file_operations ftrace_filter_fops = {  	.open = ftrace_filter_open,  	.read = seq_read,  	.write = ftrace_filter_write, -	.llseek = ftrace_regex_lseek, -	.release = ftrace_filter_release, +	.llseek = tracing_lseek, +	.release = ftrace_regex_release,  };  static const struct file_operations ftrace_notrace_fops = {  	.open = ftrace_notrace_open,  	.read = seq_read,  	.write = ftrace_notrace_write, -	.llseek = ftrace_regex_lseek, -	.release = ftrace_notrace_release, +	.llseek = tracing_lseek, +	.release = ftrace_regex_release,  };  #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -2473,15 +3765,25 @@ static const struct file_operations ftrace_notrace_fops = {  static DEFINE_MUTEX(graph_lock);  int ftrace_graph_count; -int ftrace_graph_filter_enabled; +int ftrace_graph_notrace_count;  unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly; +unsigned long ftrace_graph_notrace_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly; + +struct ftrace_graph_data { +	unsigned long *table; +	size_t size; +	int *count; +	const struct seq_operations *seq_ops; +};  static void *  __g_next(struct seq_file *m, loff_t *pos)  { -	if (*pos >= ftrace_graph_count) +	struct ftrace_graph_data *fgd = m->private; + +	if (*pos >= *fgd->count)  		return NULL; -	return &ftrace_graph_funcs[*pos]; +	return &fgd->table[*pos];  }  static void * @@ -2493,10 +3795,12 @@ g_next(struct seq_file *m, void *v, loff_t *pos)  static void *g_start(struct seq_file *m, loff_t *pos)  { +	struct ftrace_graph_data *fgd = m->private; +  	mutex_lock(&graph_lock);  	/* Nothing, tell g_show to print all functions are enabled */ -	if (!ftrace_graph_filter_enabled && !*pos) +	if (!*fgd->count && !*pos)  		return (void *)1;  	return __g_next(m, pos); @@ -2532,38 +3836,88 @@ static const struct seq_operations ftrace_graph_seq_ops = {  };  static int -ftrace_graph_open(struct inode *inode, struct file *file) +__ftrace_graph_open(struct inode *inode, struct file *file, +		    struct ftrace_graph_data *fgd)  {  	int ret = 0; -	if (unlikely(ftrace_disabled)) -		return -ENODEV; -  	mutex_lock(&graph_lock);  	if ((file->f_mode & FMODE_WRITE) &&  	    (file->f_flags & O_TRUNC)) { -		ftrace_graph_filter_enabled = 0; -		ftrace_graph_count = 0; -		memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs)); +		*fgd->count = 0; +		memset(fgd->table, 0, fgd->size * sizeof(*fgd->table));  	}  	mutex_unlock(&graph_lock); -	if (file->f_mode & FMODE_READ) -		ret = seq_open(file, &ftrace_graph_seq_ops); +	if (file->f_mode & FMODE_READ) { +		ret = seq_open(file, fgd->seq_ops); +		if (!ret) { +			struct seq_file *m = file->private_data; +			m->private = fgd; +		} +	} else +		file->private_data = fgd;  	return ret;  }  static int +ftrace_graph_open(struct inode *inode, struct file *file) +{ +	struct ftrace_graph_data *fgd; + +	if (unlikely(ftrace_disabled)) +		return -ENODEV; + +	fgd = kmalloc(sizeof(*fgd), GFP_KERNEL); +	if (fgd == NULL) +		return -ENOMEM; + +	fgd->table = ftrace_graph_funcs; +	fgd->size = FTRACE_GRAPH_MAX_FUNCS; +	fgd->count = &ftrace_graph_count; +	fgd->seq_ops = &ftrace_graph_seq_ops; + +	return __ftrace_graph_open(inode, file, fgd); +} + +static int +ftrace_graph_notrace_open(struct inode *inode, struct file *file) +{ +	struct ftrace_graph_data *fgd; + +	if (unlikely(ftrace_disabled)) +		return -ENODEV; + +	fgd = kmalloc(sizeof(*fgd), GFP_KERNEL); +	if (fgd == NULL) +		return -ENOMEM; + +	fgd->table = ftrace_graph_notrace_funcs; +	fgd->size = FTRACE_GRAPH_MAX_FUNCS; +	fgd->count = &ftrace_graph_notrace_count; +	fgd->seq_ops = &ftrace_graph_seq_ops; + +	return __ftrace_graph_open(inode, file, fgd); +} + +static int  ftrace_graph_release(struct inode *inode, struct file *file)  { -	if (file->f_mode & FMODE_READ) +	if (file->f_mode & FMODE_READ) { +		struct seq_file *m = file->private_data; + +		kfree(m->private);  		seq_release(inode, file); +	} else { +		kfree(file->private_data); +	} +  	return 0;  }  static int -ftrace_set_func(unsigned long *array, int *idx, char *buffer) +ftrace_set_func(unsigned long *array, int *idx, int size, char *buffer)  {  	struct dyn_ftrace *rec;  	struct ftrace_page *pg; @@ -2574,23 +3928,23 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)  	bool exists;  	int i; -	if (ftrace_disabled) -		return -ENODEV; -  	/* decode regex */  	type = filter_parse_regex(buffer, strlen(buffer), &search, ¬); -	if (!not && *idx >= FTRACE_GRAPH_MAX_FUNCS) +	if (!not && *idx >= size)  		return -EBUSY;  	search_len = strlen(search);  	mutex_lock(&ftrace_lock); -	do_for_each_ftrace_rec(pg, rec) { -		if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE)) -			continue; +	if (unlikely(ftrace_disabled)) { +		mutex_unlock(&ftrace_lock); +		return -ENODEV; +	} + +	do_for_each_ftrace_rec(pg, rec) { -		if (ftrace_match_record(rec, search, search_len, type)) { +		if (ftrace_match_record(rec, NULL, search, search_len, type)) {  			/* if it is in the array */  			exists = false;  			for (i = 0; i < *idx; i++) { @@ -2604,7 +3958,7 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)  				fail = 0;  				if (!exists) {  					array[(*idx)++] = rec->ip; -					if (*idx >= FTRACE_GRAPH_MAX_FUNCS) +					if (*idx >= size)  						goto out;  				}  			} else { @@ -2622,7 +3976,6 @@ out:  	if (fail)  		return -EINVAL; -	ftrace_graph_filter_enabled = 1;  	return 0;  } @@ -2631,36 +3984,33 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,  		   size_t cnt, loff_t *ppos)  {  	struct trace_parser parser; -	ssize_t read, ret; +	ssize_t read, ret = 0; +	struct ftrace_graph_data *fgd = file->private_data;  	if (!cnt)  		return 0; -	mutex_lock(&graph_lock); - -	if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) { -		ret = -ENOMEM; -		goto out_unlock; -	} +	if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) +		return -ENOMEM;  	read = trace_get_user(&parser, ubuf, cnt, ppos);  	if (read >= 0 && trace_parser_loaded((&parser))) {  		parser.buffer[parser.idx] = 0; +		mutex_lock(&graph_lock); +  		/* we allow only one expression at a time */ -		ret = ftrace_set_func(ftrace_graph_funcs, &ftrace_graph_count, -					parser.buffer); -		if (ret) -			goto out_free; +		ret = ftrace_set_func(fgd->table, fgd->count, fgd->size, +				      parser.buffer); + +		mutex_unlock(&graph_lock);  	} -	ret = read; +	if (!ret) +		ret = read; -out_free:  	trace_parser_put(&parser); -out_unlock: -	mutex_unlock(&graph_lock);  	return ret;  } @@ -2669,45 +4019,146 @@ static const struct file_operations ftrace_graph_fops = {  	.open		= ftrace_graph_open,  	.read		= seq_read,  	.write		= ftrace_graph_write, +	.llseek		= tracing_lseek, +	.release	= ftrace_graph_release, +}; + +static const struct file_operations ftrace_graph_notrace_fops = { +	.open		= ftrace_graph_notrace_open, +	.read		= seq_read, +	.write		= ftrace_graph_write, +	.llseek		= tracing_lseek,  	.release	= ftrace_graph_release, -	.llseek		= seq_lseek,  };  #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ +void ftrace_create_filter_files(struct ftrace_ops *ops, +				struct dentry *parent) +{ + +	trace_create_file("set_ftrace_filter", 0644, parent, +			  ops, &ftrace_filter_fops); + +	trace_create_file("set_ftrace_notrace", 0644, parent, +			  ops, &ftrace_notrace_fops); +} + +/* + * The name "destroy_filter_files" is really a misnomer. Although + * in the future, it may actualy delete the files, but this is + * really intended to make sure the ops passed in are disabled + * and that when this function returns, the caller is free to + * free the ops. + * + * The "destroy" name is only to match the "create" name that this + * should be paired with. + */ +void ftrace_destroy_filter_files(struct ftrace_ops *ops) +{ +	mutex_lock(&ftrace_lock); +	if (ops->flags & FTRACE_OPS_FL_ENABLED) +		ftrace_shutdown(ops, 0); +	ops->flags |= FTRACE_OPS_FL_DELETED; +	mutex_unlock(&ftrace_lock); +} +  static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)  {  	trace_create_file("available_filter_functions", 0444,  			d_tracer, NULL, &ftrace_avail_fops); -	trace_create_file("failures", 0444, -			d_tracer, NULL, &ftrace_failures_fops); - -	trace_create_file("set_ftrace_filter", 0644, d_tracer, -			NULL, &ftrace_filter_fops); +	trace_create_file("enabled_functions", 0444, +			d_tracer, NULL, &ftrace_enabled_fops); -	trace_create_file("set_ftrace_notrace", 0644, d_tracer, -				    NULL, &ftrace_notrace_fops); +	ftrace_create_filter_files(&global_ops, d_tracer);  #ifdef CONFIG_FUNCTION_GRAPH_TRACER  	trace_create_file("set_graph_function", 0444, d_tracer,  				    NULL,  				    &ftrace_graph_fops); +	trace_create_file("set_graph_notrace", 0444, d_tracer, +				    NULL, +				    &ftrace_graph_notrace_fops);  #endif /* CONFIG_FUNCTION_GRAPH_TRACER */  	return 0;  } +static int ftrace_cmp_ips(const void *a, const void *b) +{ +	const unsigned long *ipa = a; +	const unsigned long *ipb = b; + +	if (*ipa > *ipb) +		return 1; +	if (*ipa < *ipb) +		return -1; +	return 0; +} + +static void ftrace_swap_ips(void *a, void *b, int size) +{ +	unsigned long *ipa = a; +	unsigned long *ipb = b; +	unsigned long t; + +	t = *ipa; +	*ipa = *ipb; +	*ipb = t; +} +  static int ftrace_process_locs(struct module *mod,  			       unsigned long *start,  			       unsigned long *end)  { +	struct ftrace_page *start_pg; +	struct ftrace_page *pg; +	struct dyn_ftrace *rec; +	unsigned long count;  	unsigned long *p;  	unsigned long addr; -	unsigned long flags; +	unsigned long flags = 0; /* Shut up gcc */ +	int ret = -ENOMEM; + +	count = end - start; + +	if (!count) +		return 0; + +	sort(start, count, sizeof(*start), +	     ftrace_cmp_ips, ftrace_swap_ips); + +	start_pg = ftrace_allocate_pages(count); +	if (!start_pg) +		return -ENOMEM;  	mutex_lock(&ftrace_lock); + +	/* +	 * Core and each module needs their own pages, as +	 * modules will free them when they are removed. +	 * Force a new page to be allocated for modules. +	 */ +	if (!mod) { +		WARN_ON(ftrace_pages || ftrace_pages_start); +		/* First initialization */ +		ftrace_pages = ftrace_pages_start = start_pg; +	} else { +		if (!ftrace_pages) +			goto out; + +		if (WARN_ON(ftrace_pages->next)) { +			/* Hmm, we have free pages? */ +			while (ftrace_pages->next) +				ftrace_pages = ftrace_pages->next; +		} + +		ftrace_pages->next = start_pg; +	} +  	p = start; +	pg = start_pg;  	while (p < end) {  		addr = ftrace_call_adjust(*p++);  		/* @@ -2718,38 +4169,87 @@ static int ftrace_process_locs(struct module *mod,  		 */  		if (!addr)  			continue; -		ftrace_record_ip(addr); + +		if (pg->index == pg->size) { +			/* We should have allocated enough */ +			if (WARN_ON(!pg->next)) +				break; +			pg = pg->next; +		} + +		rec = &pg->records[pg->index++]; +		rec->ip = addr;  	} -	/* disable interrupts to prevent kstop machine */ -	local_irq_save(flags); -	ftrace_update_code(mod); -	local_irq_restore(flags); +	/* We should have used all pages */ +	WARN_ON(pg->next); + +	/* Assign the last page to ftrace_pages */ +	ftrace_pages = pg; + +	/* +	 * We only need to disable interrupts on start up +	 * because we are modifying code that an interrupt +	 * may execute, and the modification is not atomic. +	 * But for modules, nothing runs the code we modify +	 * until we are finished with it, and there's no +	 * reason to cause large interrupt latencies while we do it. +	 */ +	if (!mod) +		local_irq_save(flags); +	ftrace_update_code(mod, start_pg); +	if (!mod) +		local_irq_restore(flags); +	ret = 0; + out:  	mutex_unlock(&ftrace_lock); -	return 0; +	return ret;  }  #ifdef CONFIG_MODULES + +#define next_to_ftrace_page(p) container_of(p, struct ftrace_page, next) +  void ftrace_release_mod(struct module *mod)  {  	struct dyn_ftrace *rec; +	struct ftrace_page **last_pg;  	struct ftrace_page *pg; +	int order; + +	mutex_lock(&ftrace_lock);  	if (ftrace_disabled) -		return; +		goto out_unlock; -	mutex_lock(&ftrace_lock); -	do_for_each_ftrace_rec(pg, rec) { +	/* +	 * Each module has its own ftrace_pages, remove +	 * them from the list. +	 */ +	last_pg = &ftrace_pages_start; +	for (pg = ftrace_pages_start; pg; pg = *last_pg) { +		rec = &pg->records[0];  		if (within_module_core(rec->ip, mod)) {  			/* -			 * rec->ip is changed in ftrace_free_rec() -			 * It should not between s and e if record was freed. +			 * As core pages are first, the first +			 * page should never be a module page.  			 */ -			FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE); -			ftrace_free_rec(rec); -		} -	} while_for_each_ftrace_rec(); +			if (WARN_ON(pg == ftrace_pages_start)) +				goto out_unlock; + +			/* Check if we are deleting the last page */ +			if (pg == ftrace_pages) +				ftrace_pages = next_to_ftrace_page(last_pg); + +			*last_pg = pg->next; +			order = get_count_order(pg->size / ENTRIES_PER_PAGE); +			free_pages((unsigned long)pg->records, order); +			kfree(pg); +		} else +			last_pg = &pg->next; +	} + out_unlock:  	mutex_unlock(&ftrace_lock);  } @@ -2761,61 +4261,57 @@ static void ftrace_init_module(struct module *mod,  	ftrace_process_locs(mod, start, end);  } -static int ftrace_module_notify(struct notifier_block *self, -				unsigned long val, void *data) +void ftrace_module_init(struct module *mod) +{ +	ftrace_init_module(mod, mod->ftrace_callsites, +			   mod->ftrace_callsites + +			   mod->num_ftrace_callsites); +} + +static int ftrace_module_notify_exit(struct notifier_block *self, +				     unsigned long val, void *data)  {  	struct module *mod = data; -	switch (val) { -	case MODULE_STATE_COMING: -		ftrace_init_module(mod, mod->ftrace_callsites, -				   mod->ftrace_callsites + -				   mod->num_ftrace_callsites); -		break; -	case MODULE_STATE_GOING: +	if (val == MODULE_STATE_GOING)  		ftrace_release_mod(mod); -		break; -	}  	return 0;  }  #else -static int ftrace_module_notify(struct notifier_block *self, -				unsigned long val, void *data) +static int ftrace_module_notify_exit(struct notifier_block *self, +				     unsigned long val, void *data)  {  	return 0;  }  #endif /* CONFIG_MODULES */ -struct notifier_block ftrace_module_nb = { -	.notifier_call = ftrace_module_notify, -	.priority = 0, +struct notifier_block ftrace_module_exit_nb = { +	.notifier_call = ftrace_module_notify_exit, +	.priority = INT_MIN,	/* Run after anything that can remove kprobes */  }; -extern unsigned long __start_mcount_loc[]; -extern unsigned long __stop_mcount_loc[]; -  void __init ftrace_init(void)  { -	unsigned long count, addr, flags; +	extern unsigned long __start_mcount_loc[]; +	extern unsigned long __stop_mcount_loc[]; +	unsigned long count, flags;  	int ret; -	/* Keep the ftrace pointer to the stub */ -	addr = (unsigned long)ftrace_stub; -  	local_irq_save(flags); -	ftrace_dyn_arch_init(&addr); +	ret = ftrace_dyn_arch_init();  	local_irq_restore(flags); - -	/* ftrace_dyn_arch_init places the return code in addr */ -	if (addr) +	if (ret)  		goto failed;  	count = __stop_mcount_loc - __start_mcount_loc; - -	ret = ftrace_dyn_table_alloc(count); -	if (ret) +	if (!count) { +		pr_info("ftrace: No functions to be traced?\n");  		goto failed; +	} + +	pr_info("ftrace: allocating %ld entries in %ld pages\n", +		count, count / ENTRIES_PER_PAGE + 1);  	last_ftrace_enabled = ftrace_enabled = 1; @@ -2823,9 +4319,9 @@ void __init ftrace_init(void)  				  __start_mcount_loc,  				  __stop_mcount_loc); -	ret = register_module_notifier(&ftrace_module_nb); +	ret = register_module_notifier(&ftrace_module_exit_nb);  	if (ret) -		pr_warning("Failed to register trace ftrace module notifier\n"); +		pr_warning("Failed to register trace ftrace module exit notifier\n");  	set_ftrace_early_filters(); @@ -2836,22 +4332,174 @@ void __init ftrace_init(void)  #else +static struct ftrace_ops global_ops = { +	.func			= ftrace_stub, +	.flags			= FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED, +	INIT_REGEX_LOCK(global_ops) +}; +  static int __init ftrace_nodyn_init(void)  {  	ftrace_enabled = 1;  	return 0;  } -device_initcall(ftrace_nodyn_init); +core_initcall(ftrace_nodyn_init);  static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; }  static inline void ftrace_startup_enable(int command) { }  /* Keep as macros so we do not need to define the commands */ -# define ftrace_startup(command)	do { } while (0) -# define ftrace_shutdown(command)	do { } while (0) +# define ftrace_startup(ops, command)					\ +	({								\ +		int ___ret = __register_ftrace_function(ops);		\ +		if (!___ret)						\ +			(ops)->flags |= FTRACE_OPS_FL_ENABLED;		\ +		___ret;							\ +	}) +# define ftrace_shutdown(ops, command)					\ +	({								\ +		int ___ret = __unregister_ftrace_function(ops);		\ +		if (!___ret)						\ +			(ops)->flags &= ~FTRACE_OPS_FL_ENABLED;		\ +		___ret;							\ +	}) +  # define ftrace_startup_sysctl()	do { } while (0)  # define ftrace_shutdown_sysctl()	do { } while (0) + +static inline int +ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs) +{ +	return 1; +} +  #endif /* CONFIG_DYNAMIC_FTRACE */ +__init void ftrace_init_global_array_ops(struct trace_array *tr) +{ +	tr->ops = &global_ops; +	tr->ops->private = tr; +} + +void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func) +{ +	/* If we filter on pids, update to use the pid function */ +	if (tr->flags & TRACE_ARRAY_FL_GLOBAL) { +		if (WARN_ON(tr->ops->func != ftrace_stub)) +			printk("ftrace ops had %pS for function\n", +			       tr->ops->func); +		/* Only the top level instance does pid tracing */ +		if (!list_empty(&ftrace_pids)) { +			set_ftrace_pid_function(func); +			func = ftrace_pid_func; +		} +	} +	tr->ops->func = func; +	tr->ops->private = tr; +} + +void ftrace_reset_array_ops(struct trace_array *tr) +{ +	tr->ops->func = ftrace_stub; +} + +static void +ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip, +			struct ftrace_ops *op, struct pt_regs *regs) +{ +	if (unlikely(trace_recursion_test(TRACE_CONTROL_BIT))) +		return; + +	/* +	 * Some of the ops may be dynamically allocated, +	 * they must be freed after a synchronize_sched(). +	 */ +	preempt_disable_notrace(); +	trace_recursion_set(TRACE_CONTROL_BIT); + +	/* +	 * Control funcs (perf) uses RCU. Only trace if +	 * RCU is currently active. +	 */ +	if (!rcu_is_watching()) +		goto out; + +	do_for_each_ftrace_op(op, ftrace_control_list) { +		if (!(op->flags & FTRACE_OPS_FL_STUB) && +		    !ftrace_function_local_disabled(op) && +		    ftrace_ops_test(op, ip, regs)) +			op->func(ip, parent_ip, op, regs); +	} while_for_each_ftrace_op(op); + out: +	trace_recursion_clear(TRACE_CONTROL_BIT); +	preempt_enable_notrace(); +} + +static struct ftrace_ops control_ops = { +	.func	= ftrace_ops_control_func, +	.flags	= FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED, +	INIT_REGEX_LOCK(control_ops) +}; + +static inline void +__ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, +		       struct ftrace_ops *ignored, struct pt_regs *regs) +{ +	struct ftrace_ops *op; +	int bit; + +	if (function_trace_stop) +		return; + +	bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX); +	if (bit < 0) +		return; + +	/* +	 * Some of the ops may be dynamically allocated, +	 * they must be freed after a synchronize_sched(). +	 */ +	preempt_disable_notrace(); +	do_for_each_ftrace_op(op, ftrace_ops_list) { +		if (ftrace_ops_test(op, ip, regs)) { +			if (WARN_ON(!op->func)) { +				function_trace_stop = 1; +				printk("op=%p %pS\n", op, op); +				goto out; +			} +			op->func(ip, parent_ip, op, regs); +		} +	} while_for_each_ftrace_op(op); +out: +	preempt_enable_notrace(); +	trace_clear_recursion(bit); +} + +/* + * Some archs only support passing ip and parent_ip. Even though + * the list function ignores the op parameter, we do not want any + * C side effects, where a function is called without the caller + * sending a third parameter. + * Archs are to support both the regs and ftrace_ops at the same time. + * If they support ftrace_ops, it is assumed they support regs. + * If call backs want to use regs, they must either check for regs + * being NULL, or CONFIG_DYNAMIC_FTRACE_WITH_REGS. + * Note, CONFIG_DYNAMIC_FTRACE_WITH_REGS expects a full regs to be saved. + * An architecture can pass partial regs with ftrace_ops and still + * set the ARCH_SUPPORT_FTARCE_OPS. + */ +#if ARCH_SUPPORTS_FTRACE_OPS +static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, +				 struct ftrace_ops *op, struct pt_regs *regs) +{ +	__ftrace_ops_list_func(ip, parent_ip, NULL, regs); +} +#else +static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip) +{ +	__ftrace_ops_list_func(ip, parent_ip, NULL, NULL); +} +#endif +  static void clear_ftrace_swapper(void)  {  	struct task_struct *p; @@ -3072,7 +4720,7 @@ ftrace_pid_write(struct file *filp, const char __user *ubuf,  	if (strlen(tmp) == 0)  		return 1; -	ret = strict_strtol(tmp, 10, &val); +	ret = kstrtol(tmp, 10, &val);  	if (ret < 0)  		return ret; @@ -3094,7 +4742,7 @@ static const struct file_operations ftrace_pid_fops = {  	.open		= ftrace_pid_open,  	.write		= ftrace_pid_write,  	.read		= seq_read, -	.llseek		= seq_lseek, +	.llseek		= tracing_lseek,  	.release	= ftrace_pid_release,  }; @@ -3132,6 +4780,14 @@ void ftrace_kill(void)  }  /** + * Test if ftrace is dead or not. + */ +int ftrace_is_dead(void) +{ +	return ftrace_disabled; +} + +/**   * register_ftrace_function - register a function for profiling   * @ops - ops structure that holds the function for profiling.   * @@ -3144,19 +4800,19 @@ void ftrace_kill(void)   */  int register_ftrace_function(struct ftrace_ops *ops)  { -	int ret; +	int ret = -1; -	if (unlikely(ftrace_disabled)) -		return -1; +	ftrace_ops_init(ops);  	mutex_lock(&ftrace_lock); -	ret = __register_ftrace_function(ops); -	ftrace_startup(0); +	ret = ftrace_startup(ops, 0);  	mutex_unlock(&ftrace_lock); +  	return ret;  } +EXPORT_SYMBOL_GPL(register_ftrace_function);  /**   * unregister_ftrace_function - unregister a function for profiling. @@ -3169,26 +4825,26 @@ int unregister_ftrace_function(struct ftrace_ops *ops)  	int ret;  	mutex_lock(&ftrace_lock); -	ret = __unregister_ftrace_function(ops); -	ftrace_shutdown(0); +	ret = ftrace_shutdown(ops, 0);  	mutex_unlock(&ftrace_lock);  	return ret;  } +EXPORT_SYMBOL_GPL(unregister_ftrace_function);  int  ftrace_enable_sysctl(struct ctl_table *table, int write,  		     void __user *buffer, size_t *lenp,  		     loff_t *ppos)  { -	int ret; - -	if (unlikely(ftrace_disabled)) -		return -ENODEV; +	int ret = -ENODEV;  	mutex_lock(&ftrace_lock); -	ret  = proc_dointvec(table, write, buffer, lenp, ppos); +	if (unlikely(ftrace_disabled)) +		goto out; + +	ret = proc_dointvec(table, write, buffer, lenp, ppos);  	if (ret || !write || (last_ftrace_enabled == !!ftrace_enabled))  		goto out; @@ -3200,12 +4856,8 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,  		ftrace_startup_sysctl();  		/* we are starting ftrace again */ -		if (ftrace_list != &ftrace_list_end) { -			if (ftrace_list->next == &ftrace_list_end) -				ftrace_trace_function = ftrace_list->func; -			else -				ftrace_trace_function = ftrace_list_func; -		} +		if (ftrace_ops_list != &ftrace_list_end) +			update_ftrace_function();  	} else {  		/* stopping ftrace calls (just send to ftrace_stub) */ @@ -3222,7 +4874,6 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,  #ifdef CONFIG_FUNCTION_GRAPH_TRACER  static int ftrace_graph_active; -static struct notifier_block ftrace_suspend_notifier;  int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)  { @@ -3233,6 +4884,7 @@ int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)  trace_func_graph_ret_t ftrace_graph_return =  			(trace_func_graph_ret_t)ftrace_stub;  trace_func_graph_ent_t ftrace_graph_entry = ftrace_graph_entry_stub; +static trace_func_graph_ent_t __ftrace_graph_entry = ftrace_graph_entry_stub;  /* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */  static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) @@ -3328,7 +4980,7 @@ static int start_graph_tracing(void)  	/* The cpu_boot init_task->ret_stack will never be freed */  	for_each_online_cpu(cpu) {  		if (!idle_task(cpu)->ret_stack) -			ftrace_graph_init_task(idle_task(cpu)); +			ftrace_graph_init_idle_task(idle_task(cpu), cpu);  	}  	do { @@ -3367,6 +5019,34 @@ ftrace_suspend_notifier_call(struct notifier_block *bl, unsigned long state,  	return NOTIFY_DONE;  } +static int ftrace_graph_entry_test(struct ftrace_graph_ent *trace) +{ +	if (!ftrace_ops_test(&global_ops, trace->func, NULL)) +		return 0; +	return __ftrace_graph_entry(trace); +} + +/* + * The function graph tracer should only trace the functions defined + * by set_ftrace_filter and set_ftrace_notrace. If another function + * tracer ops is registered, the graph tracer requires testing the + * function against the global ops, and not just trace any function + * that any ftrace_ops registered. + */ +static void update_function_graph_func(void) +{ +	if (ftrace_ops_list == &ftrace_list_end || +	    (ftrace_ops_list == &global_ops && +	     global_ops.next == &ftrace_list_end)) +		ftrace_graph_entry = __ftrace_graph_entry; +	else +		ftrace_graph_entry = ftrace_graph_entry_test; +} + +static struct notifier_block ftrace_suspend_notifier = { +	.notifier_call = ftrace_suspend_notifier_call, +}; +  int register_ftrace_graph(trace_func_graph_ret_t retfunc,  			trace_func_graph_ent_t entryfunc)  { @@ -3380,7 +5060,6 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,  		goto out;  	} -	ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call;  	register_pm_notifier(&ftrace_suspend_notifier);  	ftrace_graph_active++; @@ -3391,9 +5070,21 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,  	}  	ftrace_graph_return = retfunc; -	ftrace_graph_entry = entryfunc; -	ftrace_startup(FTRACE_START_FUNC_RET); +	/* +	 * Update the indirect function to the entryfunc, and the +	 * function that gets called to the entry_test first. Then +	 * call the update fgraph entry function to determine if +	 * the entryfunc should be called directly or not. +	 */ +	__ftrace_graph_entry = entryfunc; +	ftrace_graph_entry = ftrace_graph_entry_test; +	update_function_graph_func(); + +	/* Function graph doesn't use the .func field of global_ops */ +	global_ops.flags |= FTRACE_OPS_FL_STUB; + +	ret = ftrace_startup(&global_ops, FTRACE_START_FUNC_RET);  out:  	mutex_unlock(&ftrace_lock); @@ -3410,7 +5101,9 @@ void unregister_ftrace_graph(void)  	ftrace_graph_active--;  	ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;  	ftrace_graph_entry = ftrace_graph_entry_stub; -	ftrace_shutdown(FTRACE_STOP_FUNC_RET); +	__ftrace_graph_entry = ftrace_graph_entry_stub; +	ftrace_shutdown(&global_ops, FTRACE_STOP_FUNC_RET); +	global_ops.flags &= ~FTRACE_OPS_FL_STUB;  	unregister_pm_notifier(&ftrace_suspend_notifier);  	unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL); @@ -3418,6 +5111,49 @@ void unregister_ftrace_graph(void)  	mutex_unlock(&ftrace_lock);  } +static DEFINE_PER_CPU(struct ftrace_ret_stack *, idle_ret_stack); + +static void +graph_init_task(struct task_struct *t, struct ftrace_ret_stack *ret_stack) +{ +	atomic_set(&t->tracing_graph_pause, 0); +	atomic_set(&t->trace_overrun, 0); +	t->ftrace_timestamp = 0; +	/* make curr_ret_stack visible before we add the ret_stack */ +	smp_wmb(); +	t->ret_stack = ret_stack; +} + +/* + * Allocate a return stack for the idle task. May be the first + * time through, or it may be done by CPU hotplug online. + */ +void ftrace_graph_init_idle_task(struct task_struct *t, int cpu) +{ +	t->curr_ret_stack = -1; +	/* +	 * The idle task has no parent, it either has its own +	 * stack or no stack at all. +	 */ +	if (t->ret_stack) +		WARN_ON(t->ret_stack != per_cpu(idle_ret_stack, cpu)); + +	if (ftrace_graph_active) { +		struct ftrace_ret_stack *ret_stack; + +		ret_stack = per_cpu(idle_ret_stack, cpu); +		if (!ret_stack) { +			ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH +					    * sizeof(struct ftrace_ret_stack), +					    GFP_KERNEL); +			if (!ret_stack) +				return; +			per_cpu(idle_ret_stack, cpu) = ret_stack; +		} +		graph_init_task(t, ret_stack); +	} +} +  /* Allocate a return stack for newly created task */  void ftrace_graph_init_task(struct task_struct *t)  { @@ -3433,12 +5169,7 @@ void ftrace_graph_init_task(struct task_struct *t)  				GFP_KERNEL);  		if (!ret_stack)  			return; -		atomic_set(&t->tracing_graph_pause, 0); -		atomic_set(&t->trace_overrun, 0); -		t->ftrace_timestamp = 0; -		/* make curr_ret_stack visable before we add the ret_stack */ -		smp_wmb(); -		t->ret_stack = ret_stack; +		graph_init_task(t, ret_stack);  	}  }  | 
