diff options
Diffstat (limited to 'kernel/kprobes.c')
| -rw-r--r-- | kernel/kprobes.c | 1390 | 
1 files changed, 895 insertions, 495 deletions
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 9737a76e106..734e9a7d280 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -36,7 +36,7 @@  #include <linux/init.h>  #include <linux/slab.h>  #include <linux/stddef.h> -#include <linux/module.h> +#include <linux/export.h>  #include <linux/moduleloader.h>  #include <linux/kallsyms.h>  #include <linux/freezer.h> @@ -78,29 +78,16 @@ static bool kprobes_all_disarmed;  static DEFINE_MUTEX(kprobe_mutex);  static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;  static struct { -	spinlock_t lock ____cacheline_aligned_in_smp; +	raw_spinlock_t lock ____cacheline_aligned_in_smp;  } kretprobe_table_locks[KPROBE_TABLE_SIZE]; -static spinlock_t *kretprobe_table_lock_ptr(unsigned long hash) +static raw_spinlock_t *kretprobe_table_lock_ptr(unsigned long hash)  {  	return &(kretprobe_table_locks[hash].lock);  } -/* - * Normally, functions that we'd want to prohibit kprobes in, are marked - * __kprobes. But, there are cases where such functions already belong to - * a different section (__sched for preempt_schedule) - * - * For such cases, we now have a blacklist - */ -static struct kprobe_blackpoint kprobe_blacklist[] = { -	{"preempt_schedule",}, -	{"native_get_debugreg",}, -	{"irq_entries_start",}, -	{"common_interrupt",}, -	{"mcount",},	/* mcount can be called from everywhere */ -	{NULL}    /* Terminator */ -}; +/* Blacklist -- list of struct kprobe_blacklist_entry */ +static LIST_HEAD(kprobe_blacklist);  #ifdef __ARCH_WANT_KPROBES_INSN_SLOT  /* @@ -112,6 +99,7 @@ static struct kprobe_blackpoint kprobe_blacklist[] = {  struct kprobe_insn_page {  	struct list_head list;  	kprobe_opcode_t *insns;		/* Page of instruction slots */ +	struct kprobe_insn_cache *cache;  	int nused;  	int ngarbage;  	char slot_used[]; @@ -121,12 +109,6 @@ struct kprobe_insn_page {  	(offsetof(struct kprobe_insn_page, slot_used) +	\  	 (sizeof(char) * (slots))) -struct kprobe_insn_cache { -	struct list_head pages;	/* list of kprobe_insn_page */ -	size_t insn_size;	/* size of instruction slot */ -	int nr_garbage; -}; -  static int slots_per_page(struct kprobe_insn_cache *c)  {  	return PAGE_SIZE/(c->insn_size * sizeof(kprobe_opcode_t)); @@ -138,22 +120,36 @@ enum kprobe_slot_state {  	SLOT_USED = 2,  }; -static DEFINE_MUTEX(kprobe_insn_mutex);	/* Protects kprobe_insn_slots */ -static struct kprobe_insn_cache kprobe_insn_slots = { +static void *alloc_insn_page(void) +{ +	return module_alloc(PAGE_SIZE); +} + +static void free_insn_page(void *page) +{ +	module_free(NULL, page); +} + +struct kprobe_insn_cache kprobe_insn_slots = { +	.mutex = __MUTEX_INITIALIZER(kprobe_insn_slots.mutex), +	.alloc = alloc_insn_page, +	.free = free_insn_page,  	.pages = LIST_HEAD_INIT(kprobe_insn_slots.pages),  	.insn_size = MAX_INSN_SIZE,  	.nr_garbage = 0,  }; -static int __kprobes collect_garbage_slots(struct kprobe_insn_cache *c); +static int collect_garbage_slots(struct kprobe_insn_cache *c);  /**   * __get_insn_slot() - Find a slot on an executable page for an instruction.   * We allocate an executable page if there's no room on existing ones.   */ -static kprobe_opcode_t __kprobes *__get_insn_slot(struct kprobe_insn_cache *c) +kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c)  {  	struct kprobe_insn_page *kip; +	kprobe_opcode_t *slot = NULL; +	mutex_lock(&c->mutex);   retry:  	list_for_each_entry(kip, &c->pages, list) {  		if (kip->nused < slots_per_page(c)) { @@ -162,7 +158,8 @@ static kprobe_opcode_t __kprobes *__get_insn_slot(struct kprobe_insn_cache *c)  				if (kip->slot_used[i] == SLOT_CLEAN) {  					kip->slot_used[i] = SLOT_USED;  					kip->nused++; -					return kip->insns + (i * c->insn_size); +					slot = kip->insns + (i * c->insn_size); +					goto out;  				}  			}  			/* kip->nused is broken. Fix it. */ @@ -178,41 +175,33 @@ static kprobe_opcode_t __kprobes *__get_insn_slot(struct kprobe_insn_cache *c)  	/* All out of space.  Need to allocate a new page. */  	kip = kmalloc(KPROBE_INSN_PAGE_SIZE(slots_per_page(c)), GFP_KERNEL);  	if (!kip) -		return NULL; +		goto out;  	/*  	 * Use module_alloc so this page is within +/- 2GB of where the  	 * kernel image and loaded module images reside. This is required  	 * so x86_64 can correctly handle the %rip-relative fixups.  	 */ -	kip->insns = module_alloc(PAGE_SIZE); +	kip->insns = c->alloc();  	if (!kip->insns) {  		kfree(kip); -		return NULL; +		goto out;  	}  	INIT_LIST_HEAD(&kip->list);  	memset(kip->slot_used, SLOT_CLEAN, slots_per_page(c));  	kip->slot_used[0] = SLOT_USED;  	kip->nused = 1;  	kip->ngarbage = 0; +	kip->cache = c;  	list_add(&kip->list, &c->pages); -	return kip->insns; -} - - -kprobe_opcode_t __kprobes *get_insn_slot(void) -{ -	kprobe_opcode_t *ret = NULL; - -	mutex_lock(&kprobe_insn_mutex); -	ret = __get_insn_slot(&kprobe_insn_slots); -	mutex_unlock(&kprobe_insn_mutex); - -	return ret; +	slot = kip->insns; +out: +	mutex_unlock(&c->mutex); +	return slot;  }  /* Return 1 if all garbages are collected, otherwise 0. */ -static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx) +static int collect_one_slot(struct kprobe_insn_page *kip, int idx)  {  	kip->slot_used[idx] = SLOT_CLEAN;  	kip->nused--; @@ -225,7 +214,7 @@ static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx)  		 */  		if (!list_is_singular(&kip->list)) {  			list_del(&kip->list); -			module_free(NULL, kip->insns); +			kip->cache->free(kip->insns);  			kfree(kip);  		}  		return 1; @@ -233,7 +222,7 @@ static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx)  	return 0;  } -static int __kprobes collect_garbage_slots(struct kprobe_insn_cache *c) +static int collect_garbage_slots(struct kprobe_insn_cache *c)  {  	struct kprobe_insn_page *kip, *next; @@ -255,11 +244,12 @@ static int __kprobes collect_garbage_slots(struct kprobe_insn_cache *c)  	return 0;  } -static void __kprobes __free_insn_slot(struct kprobe_insn_cache *c, -				       kprobe_opcode_t *slot, int dirty) +void __free_insn_slot(struct kprobe_insn_cache *c, +		      kprobe_opcode_t *slot, int dirty)  {  	struct kprobe_insn_page *kip; +	mutex_lock(&c->mutex);  	list_for_each_entry(kip, &c->pages, list) {  		long idx = ((long)slot - (long)kip->insns) /  				(c->insn_size * sizeof(kprobe_opcode_t)); @@ -272,57 +262,37 @@ static void __kprobes __free_insn_slot(struct kprobe_insn_cache *c,  					collect_garbage_slots(c);  			} else  				collect_one_slot(kip, idx); -			return; +			goto out;  		}  	}  	/* Could not free this slot. */  	WARN_ON(1); +out: +	mutex_unlock(&c->mutex);  } -void __kprobes free_insn_slot(kprobe_opcode_t * slot, int dirty) -{ -	mutex_lock(&kprobe_insn_mutex); -	__free_insn_slot(&kprobe_insn_slots, slot, dirty); -	mutex_unlock(&kprobe_insn_mutex); -}  #ifdef CONFIG_OPTPROBES  /* For optimized_kprobe buffer */ -static DEFINE_MUTEX(kprobe_optinsn_mutex); /* Protects kprobe_optinsn_slots */ -static struct kprobe_insn_cache kprobe_optinsn_slots = { +struct kprobe_insn_cache kprobe_optinsn_slots = { +	.mutex = __MUTEX_INITIALIZER(kprobe_optinsn_slots.mutex), +	.alloc = alloc_insn_page, +	.free = free_insn_page,  	.pages = LIST_HEAD_INIT(kprobe_optinsn_slots.pages),  	/* .insn_size is initialized later */  	.nr_garbage = 0,  }; -/* Get a slot for optimized_kprobe buffer */ -kprobe_opcode_t __kprobes *get_optinsn_slot(void) -{ -	kprobe_opcode_t *ret = NULL; - -	mutex_lock(&kprobe_optinsn_mutex); -	ret = __get_insn_slot(&kprobe_optinsn_slots); -	mutex_unlock(&kprobe_optinsn_mutex); - -	return ret; -} - -void __kprobes free_optinsn_slot(kprobe_opcode_t * slot, int dirty) -{ -	mutex_lock(&kprobe_optinsn_mutex); -	__free_insn_slot(&kprobe_optinsn_slots, slot, dirty); -	mutex_unlock(&kprobe_optinsn_mutex); -}  #endif  #endif  /* We have preemption disabled.. so it is safe to use __ versions */  static inline void set_kprobe_instance(struct kprobe *kp)  { -	__get_cpu_var(kprobe_instance) = kp; +	__this_cpu_write(kprobe_instance, kp);  }  static inline void reset_kprobe_instance(void)  { -	__get_cpu_var(kprobe_instance) = NULL; +	__this_cpu_write(kprobe_instance, NULL);  }  /* @@ -331,22 +301,22 @@ static inline void reset_kprobe_instance(void)   * 				OR   * 	- with preemption disabled - from arch/xxx/kernel/kprobes.c   */ -struct kprobe __kprobes *get_kprobe(void *addr) +struct kprobe *get_kprobe(void *addr)  {  	struct hlist_head *head; -	struct hlist_node *node;  	struct kprobe *p;  	head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)]; -	hlist_for_each_entry_rcu(p, node, head, hlist) { +	hlist_for_each_entry_rcu(p, head, hlist) {  		if (p->addr == addr)  			return p;  	}  	return NULL;  } +NOKPROBE_SYMBOL(get_kprobe); -static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs); +static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs);  /* Return true if the kprobe is an aggregator */  static inline int kprobe_aggrprobe(struct kprobe *p) @@ -354,13 +324,20 @@ static inline int kprobe_aggrprobe(struct kprobe *p)  	return p->pre_handler == aggr_pre_handler;  } +/* Return true(!0) if the kprobe is unused */ +static inline int kprobe_unused(struct kprobe *p) +{ +	return kprobe_aggrprobe(p) && kprobe_disabled(p) && +	       list_empty(&p->list); +} +  /*   * Keep all fields in the kprobe consistent   */ -static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p) +static inline void copy_kprobe(struct kprobe *ap, struct kprobe *p)  { -	memcpy(&p->opcode, &old_p->opcode, sizeof(kprobe_opcode_t)); -	memcpy(&p->ainsn, &old_p->ainsn, sizeof(struct arch_specific_insn)); +	memcpy(&p->opcode, &ap->opcode, sizeof(kprobe_opcode_t)); +	memcpy(&p->ainsn, &ap->ainsn, sizeof(struct arch_specific_insn));  }  #ifdef CONFIG_OPTPROBES @@ -371,7 +348,7 @@ static bool kprobes_allow_optimization;   * Call all pre_handler on the list, but ignores its return value.   * This must be called from arch-dep optimized caller.   */ -void __kprobes opt_pre_handler(struct kprobe *p, struct pt_regs *regs) +void opt_pre_handler(struct kprobe *p, struct pt_regs *regs)  {  	struct kprobe *kp; @@ -383,6 +360,18 @@ void __kprobes opt_pre_handler(struct kprobe *p, struct pt_regs *regs)  		reset_kprobe_instance();  	}  } +NOKPROBE_SYMBOL(opt_pre_handler); + +/* Free optimized instructions and optimized_kprobe */ +static void free_aggr_kprobe(struct kprobe *p) +{ +	struct optimized_kprobe *op; + +	op = container_of(p, struct optimized_kprobe, kp); +	arch_remove_optimized_kprobe(op); +	arch_remove_kprobe(p); +	kfree(op); +}  /* Return true(!0) if the kprobe is ready for optimization. */  static inline int kprobe_optready(struct kprobe *p) @@ -397,11 +386,38 @@ static inline int kprobe_optready(struct kprobe *p)  	return 0;  } +/* Return true(!0) if the kprobe is disarmed. Note: p must be on hash list */ +static inline int kprobe_disarmed(struct kprobe *p) +{ +	struct optimized_kprobe *op; + +	/* If kprobe is not aggr/opt probe, just return kprobe is disabled */ +	if (!kprobe_aggrprobe(p)) +		return kprobe_disabled(p); + +	op = container_of(p, struct optimized_kprobe, kp); + +	return kprobe_disabled(p) && list_empty(&op->list); +} + +/* Return true(!0) if the probe is queued on (un)optimizing lists */ +static int kprobe_queued(struct kprobe *p) +{ +	struct optimized_kprobe *op; + +	if (kprobe_aggrprobe(p)) { +		op = container_of(p, struct optimized_kprobe, kp); +		if (!list_empty(&op->list)) +			return 1; +	} +	return 0; +} +  /*   * Return an optimized kprobe whose optimizing code replaces   * instructions including addr (exclude breakpoint).   */ -static struct kprobe *__kprobes get_optimized_kprobe(unsigned long addr) +static struct kprobe *get_optimized_kprobe(unsigned long addr)  {  	int i;  	struct kprobe *p = NULL; @@ -422,30 +438,23 @@ static struct kprobe *__kprobes get_optimized_kprobe(unsigned long addr)  /* Optimization staging list, protected by kprobe_mutex */  static LIST_HEAD(optimizing_list); +static LIST_HEAD(unoptimizing_list); +static LIST_HEAD(freeing_list);  static void kprobe_optimizer(struct work_struct *work);  static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer);  #define OPTIMIZE_DELAY 5 -/* Kprobe jump optimizer */ -static __kprobes void kprobe_optimizer(struct work_struct *work) +/* + * Optimize (replace a breakpoint with a jump) kprobes listed on + * optimizing_list. + */ +static void do_optimize_kprobes(void)  { -	struct optimized_kprobe *op, *tmp; - -	/* Lock modules while optimizing kprobes */ -	mutex_lock(&module_mutex); -	mutex_lock(&kprobe_mutex); -	if (kprobes_all_disarmed || !kprobes_allow_optimization) -		goto end; - -	/* -	 * Wait for quiesence period to ensure all running interrupts -	 * are done. Because optprobe may modify multiple instructions -	 * there is a chance that Nth instruction is interrupted. In that -	 * case, running interrupt can return to 2nd-Nth byte of jump -	 * instruction. This wait is for avoiding it. -	 */ -	synchronize_sched(); +	/* Optimization never be done when disarmed */ +	if (kprobes_all_disarmed || !kprobes_allow_optimization || +	    list_empty(&optimizing_list)) +		return;  	/*  	 * The optimization/unoptimization refers online_cpus via @@ -459,21 +468,121 @@ static __kprobes void kprobe_optimizer(struct work_struct *work)  	 */  	get_online_cpus();  	mutex_lock(&text_mutex); -	list_for_each_entry_safe(op, tmp, &optimizing_list, list) { -		WARN_ON(kprobe_disabled(&op->kp)); -		if (arch_optimize_kprobe(op) < 0) -			op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; -		list_del_init(&op->list); +	arch_optimize_kprobes(&optimizing_list); +	mutex_unlock(&text_mutex); +	put_online_cpus(); +} + +/* + * Unoptimize (replace a jump with a breakpoint and remove the breakpoint + * if need) kprobes listed on unoptimizing_list. + */ +static void do_unoptimize_kprobes(void) +{ +	struct optimized_kprobe *op, *tmp; + +	/* Unoptimization must be done anytime */ +	if (list_empty(&unoptimizing_list)) +		return; + +	/* Ditto to do_optimize_kprobes */ +	get_online_cpus(); +	mutex_lock(&text_mutex); +	arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list); +	/* Loop free_list for disarming */ +	list_for_each_entry_safe(op, tmp, &freeing_list, list) { +		/* Disarm probes if marked disabled */ +		if (kprobe_disabled(&op->kp)) +			arch_disarm_kprobe(&op->kp); +		if (kprobe_unused(&op->kp)) { +			/* +			 * Remove unused probes from hash list. After waiting +			 * for synchronization, these probes are reclaimed. +			 * (reclaiming is done by do_free_cleaned_kprobes.) +			 */ +			hlist_del_rcu(&op->kp.hlist); +		} else +			list_del_init(&op->list);  	}  	mutex_unlock(&text_mutex);  	put_online_cpus(); -end: -	mutex_unlock(&kprobe_mutex); +} + +/* Reclaim all kprobes on the free_list */ +static void do_free_cleaned_kprobes(void) +{ +	struct optimized_kprobe *op, *tmp; + +	list_for_each_entry_safe(op, tmp, &freeing_list, list) { +		BUG_ON(!kprobe_unused(&op->kp)); +		list_del_init(&op->list); +		free_aggr_kprobe(&op->kp); +	} +} + +/* Start optimizer after OPTIMIZE_DELAY passed */ +static void kick_kprobe_optimizer(void) +{ +	schedule_delayed_work(&optimizing_work, OPTIMIZE_DELAY); +} + +/* Kprobe jump optimizer */ +static void kprobe_optimizer(struct work_struct *work) +{ +	mutex_lock(&kprobe_mutex); +	/* Lock modules while optimizing kprobes */ +	mutex_lock(&module_mutex); + +	/* +	 * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed) +	 * kprobes before waiting for quiesence period. +	 */ +	do_unoptimize_kprobes(); + +	/* +	 * Step 2: Wait for quiesence period to ensure all running interrupts +	 * are done. Because optprobe may modify multiple instructions +	 * there is a chance that Nth instruction is interrupted. In that +	 * case, running interrupt can return to 2nd-Nth byte of jump +	 * instruction. This wait is for avoiding it. +	 */ +	synchronize_sched(); + +	/* Step 3: Optimize kprobes after quiesence period */ +	do_optimize_kprobes(); + +	/* Step 4: Free cleaned kprobes after quiesence period */ +	do_free_cleaned_kprobes(); +  	mutex_unlock(&module_mutex); +	mutex_unlock(&kprobe_mutex); + +	/* Step 5: Kick optimizer again if needed */ +	if (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list)) +		kick_kprobe_optimizer(); +} + +/* Wait for completing optimization and unoptimization */ +static void wait_for_kprobe_optimizer(void) +{ +	mutex_lock(&kprobe_mutex); + +	while (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list)) { +		mutex_unlock(&kprobe_mutex); + +		/* this will also make optimizing_work execute immmediately */ +		flush_delayed_work(&optimizing_work); +		/* @optimizing_work might not have been queued yet, relax */ +		cpu_relax(); + +		mutex_lock(&kprobe_mutex); +	} + +	mutex_unlock(&kprobe_mutex);  }  /* Optimize kprobe if p is ready to be optimized */ -static __kprobes void optimize_kprobe(struct kprobe *p) +static void optimize_kprobe(struct kprobe *p)  {  	struct optimized_kprobe *op; @@ -495,47 +604,115 @@ static __kprobes void optimize_kprobe(struct kprobe *p)  	/* Check if it is already optimized. */  	if (op->kp.flags & KPROBE_FLAG_OPTIMIZED)  		return; -  	op->kp.flags |= KPROBE_FLAG_OPTIMIZED; -	list_add(&op->list, &optimizing_list); -	if (!delayed_work_pending(&optimizing_work)) -		schedule_delayed_work(&optimizing_work, OPTIMIZE_DELAY); + +	if (!list_empty(&op->list)) +		/* This is under unoptimizing. Just dequeue the probe */ +		list_del_init(&op->list); +	else { +		list_add(&op->list, &optimizing_list); +		kick_kprobe_optimizer(); +	} +} + +/* Short cut to direct unoptimizing */ +static void force_unoptimize_kprobe(struct optimized_kprobe *op) +{ +	get_online_cpus(); +	arch_unoptimize_kprobe(op); +	put_online_cpus(); +	if (kprobe_disabled(&op->kp)) +		arch_disarm_kprobe(&op->kp);  }  /* Unoptimize a kprobe if p is optimized */ -static __kprobes void unoptimize_kprobe(struct kprobe *p) +static void unoptimize_kprobe(struct kprobe *p, bool force)  {  	struct optimized_kprobe *op; -	if ((p->flags & KPROBE_FLAG_OPTIMIZED) && kprobe_aggrprobe(p)) { -		op = container_of(p, struct optimized_kprobe, kp); -		if (!list_empty(&op->list)) -			/* Dequeue from the optimization queue */ +	if (!kprobe_aggrprobe(p) || kprobe_disarmed(p)) +		return; /* This is not an optprobe nor optimized */ + +	op = container_of(p, struct optimized_kprobe, kp); +	if (!kprobe_optimized(p)) { +		/* Unoptimized or unoptimizing case */ +		if (force && !list_empty(&op->list)) { +			/* +			 * Only if this is unoptimizing kprobe and forced, +			 * forcibly unoptimize it. (No need to unoptimize +			 * unoptimized kprobe again :) +			 */  			list_del_init(&op->list); -		else -			/* Replace jump with break */ -			arch_unoptimize_kprobe(op); -		op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; +			force_unoptimize_kprobe(op); +		} +		return;  	} + +	op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; +	if (!list_empty(&op->list)) { +		/* Dequeue from the optimization queue */ +		list_del_init(&op->list); +		return; +	} +	/* Optimized kprobe case */ +	if (force) +		/* Forcibly update the code: this is a special case */ +		force_unoptimize_kprobe(op); +	else { +		list_add(&op->list, &unoptimizing_list); +		kick_kprobe_optimizer(); +	} +} + +/* Cancel unoptimizing for reusing */ +static void reuse_unused_kprobe(struct kprobe *ap) +{ +	struct optimized_kprobe *op; + +	BUG_ON(!kprobe_unused(ap)); +	/* +	 * Unused kprobe MUST be on the way of delayed unoptimizing (means +	 * there is still a relative jump) and disabled. +	 */ +	op = container_of(ap, struct optimized_kprobe, kp); +	if (unlikely(list_empty(&op->list))) +		printk(KERN_WARNING "Warning: found a stray unused " +			"aggrprobe@%p\n", ap->addr); +	/* Enable the probe again */ +	ap->flags &= ~KPROBE_FLAG_DISABLED; +	/* Optimize it again (remove from op->list) */ +	BUG_ON(!kprobe_optready(ap)); +	optimize_kprobe(ap);  }  /* Remove optimized instructions */ -static void __kprobes kill_optimized_kprobe(struct kprobe *p) +static void kill_optimized_kprobe(struct kprobe *p)  {  	struct optimized_kprobe *op;  	op = container_of(p, struct optimized_kprobe, kp); -	if (!list_empty(&op->list)) { -		/* Dequeue from the optimization queue */ +	if (!list_empty(&op->list)) +		/* Dequeue from the (un)optimization queue */  		list_del_init(&op->list); -		op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; +	op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; + +	if (kprobe_unused(p)) { +		/* Enqueue if it is unused */ +		list_add(&op->list, &freeing_list); +		/* +		 * Remove unused probes from the hash list. After waiting +		 * for synchronization, this probe is reclaimed. +		 * (reclaiming is done by do_free_cleaned_kprobes().) +		 */ +		hlist_del_rcu(&op->kp.hlist);  	} -	/* Don't unoptimize, because the target code will be freed. */ + +	/* Don't touch the code, because it is already freed. */  	arch_remove_optimized_kprobe(op);  }  /* Try to prepare optimized instructions */ -static __kprobes void prepare_optimized_kprobe(struct kprobe *p) +static void prepare_optimized_kprobe(struct kprobe *p)  {  	struct optimized_kprobe *op; @@ -543,18 +720,8 @@ static __kprobes void prepare_optimized_kprobe(struct kprobe *p)  	arch_prepare_optimized_kprobe(op);  } -/* Free optimized instructions and optimized_kprobe */ -static __kprobes void free_aggr_kprobe(struct kprobe *p) -{ -	struct optimized_kprobe *op; - -	op = container_of(p, struct optimized_kprobe, kp); -	arch_remove_optimized_kprobe(op); -	kfree(op); -} -  /* Allocate new optimized_kprobe and try to prepare optimized instructions */ -static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p) +static struct kprobe *alloc_aggr_kprobe(struct kprobe *p)  {  	struct optimized_kprobe *op; @@ -569,85 +736,98 @@ static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p)  	return &op->kp;  } -static void __kprobes init_aggr_kprobe(struct kprobe *ap, struct kprobe *p); +static void init_aggr_kprobe(struct kprobe *ap, struct kprobe *p);  /*   * Prepare an optimized_kprobe and optimize it   * NOTE: p must be a normal registered kprobe   */ -static __kprobes void try_to_optimize_kprobe(struct kprobe *p) +static void try_to_optimize_kprobe(struct kprobe *p)  {  	struct kprobe *ap;  	struct optimized_kprobe *op; +	/* Impossible to optimize ftrace-based kprobe */ +	if (kprobe_ftrace(p)) +		return; + +	/* For preparing optimization, jump_label_text_reserved() is called */ +	jump_label_lock(); +	mutex_lock(&text_mutex); +  	ap = alloc_aggr_kprobe(p);  	if (!ap) -		return; +		goto out;  	op = container_of(ap, struct optimized_kprobe, kp);  	if (!arch_prepared_optinsn(&op->optinsn)) {  		/* If failed to setup optimizing, fallback to kprobe */ -		free_aggr_kprobe(ap); -		return; +		arch_remove_optimized_kprobe(op); +		kfree(op); +		goto out;  	}  	init_aggr_kprobe(ap, p); -	optimize_kprobe(ap); +	optimize_kprobe(ap);	/* This just kicks optimizer thread */ + +out: +	mutex_unlock(&text_mutex); +	jump_label_unlock();  }  #ifdef CONFIG_SYSCTL -/* This should be called with kprobe_mutex locked */ -static void __kprobes optimize_all_kprobes(void) +static void optimize_all_kprobes(void)  {  	struct hlist_head *head; -	struct hlist_node *node;  	struct kprobe *p;  	unsigned int i; +	mutex_lock(&kprobe_mutex);  	/* If optimization is already allowed, just return */  	if (kprobes_allow_optimization) -		return; +		goto out;  	kprobes_allow_optimization = true;  	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {  		head = &kprobe_table[i]; -		hlist_for_each_entry_rcu(p, node, head, hlist) +		hlist_for_each_entry_rcu(p, head, hlist)  			if (!kprobe_disabled(p))  				optimize_kprobe(p);  	}  	printk(KERN_INFO "Kprobes globally optimized\n"); +out: +	mutex_unlock(&kprobe_mutex);  } -/* This should be called with kprobe_mutex locked */ -static void __kprobes unoptimize_all_kprobes(void) +static void unoptimize_all_kprobes(void)  {  	struct hlist_head *head; -	struct hlist_node *node;  	struct kprobe *p;  	unsigned int i; +	mutex_lock(&kprobe_mutex);  	/* If optimization is already prohibited, just return */ -	if (!kprobes_allow_optimization) +	if (!kprobes_allow_optimization) { +		mutex_unlock(&kprobe_mutex);  		return; +	}  	kprobes_allow_optimization = false; -	printk(KERN_INFO "Kprobes globally unoptimized\n"); -	get_online_cpus();	/* For avoiding text_mutex deadlock */ -	mutex_lock(&text_mutex);  	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {  		head = &kprobe_table[i]; -		hlist_for_each_entry_rcu(p, node, head, hlist) { +		hlist_for_each_entry_rcu(p, head, hlist) {  			if (!kprobe_disabled(p)) -				unoptimize_kprobe(p); +				unoptimize_kprobe(p, false);  		}  	} +	mutex_unlock(&kprobe_mutex); -	mutex_unlock(&text_mutex); -	put_online_cpus(); -	/* Allow all currently running kprobes to complete */ -	synchronize_sched(); +	/* Wait for unoptimizing completion */ +	wait_for_kprobe_optimizer(); +	printk(KERN_INFO "Kprobes globally unoptimized\n");  } +static DEFINE_MUTEX(kprobe_sysctl_mutex);  int sysctl_kprobes_optimization;  int proc_kprobes_optimization_handler(struct ctl_table *table, int write,  				      void __user *buffer, size_t *length, @@ -655,7 +835,7 @@ int proc_kprobes_optimization_handler(struct ctl_table *table, int write,  {  	int ret; -	mutex_lock(&kprobe_mutex); +	mutex_lock(&kprobe_sysctl_mutex);  	sysctl_kprobes_optimization = kprobes_allow_optimization ? 1 : 0;  	ret = proc_dointvec_minmax(table, write, buffer, length, ppos); @@ -663,62 +843,133 @@ int proc_kprobes_optimization_handler(struct ctl_table *table, int write,  		optimize_all_kprobes();  	else  		unoptimize_all_kprobes(); -	mutex_unlock(&kprobe_mutex); +	mutex_unlock(&kprobe_sysctl_mutex);  	return ret;  }  #endif /* CONFIG_SYSCTL */ -static void __kprobes __arm_kprobe(struct kprobe *p) +/* Put a breakpoint for a probe. Must be called with text_mutex locked */ +static void __arm_kprobe(struct kprobe *p)  { -	struct kprobe *old_p; +	struct kprobe *_p;  	/* Check collision with other optimized kprobes */ -	old_p = get_optimized_kprobe((unsigned long)p->addr); -	if (unlikely(old_p)) -		unoptimize_kprobe(old_p); /* Fallback to unoptimized kprobe */ +	_p = get_optimized_kprobe((unsigned long)p->addr); +	if (unlikely(_p)) +		/* Fallback to unoptimized kprobe */ +		unoptimize_kprobe(_p, true);  	arch_arm_kprobe(p);  	optimize_kprobe(p);	/* Try to optimize (add kprobe to a list) */  } -static void __kprobes __disarm_kprobe(struct kprobe *p) +/* Remove the breakpoint of a probe. Must be called with text_mutex locked */ +static void __disarm_kprobe(struct kprobe *p, bool reopt)  { -	struct kprobe *old_p; +	struct kprobe *_p; -	unoptimize_kprobe(p);	/* Try to unoptimize */ -	arch_disarm_kprobe(p); +	unoptimize_kprobe(p, false);	/* Try to unoptimize */ -	/* If another kprobe was blocked, optimize it. */ -	old_p = get_optimized_kprobe((unsigned long)p->addr); -	if (unlikely(old_p)) -		optimize_kprobe(old_p); +	if (!kprobe_queued(p)) { +		arch_disarm_kprobe(p); +		/* If another kprobe was blocked, optimize it. */ +		_p = get_optimized_kprobe((unsigned long)p->addr); +		if (unlikely(_p) && reopt) +			optimize_kprobe(_p); +	} +	/* TODO: reoptimize others after unoptimized this probe */  }  #else /* !CONFIG_OPTPROBES */  #define optimize_kprobe(p)			do {} while (0) -#define unoptimize_kprobe(p)			do {} while (0) +#define unoptimize_kprobe(p, f)			do {} while (0)  #define kill_optimized_kprobe(p)		do {} while (0)  #define prepare_optimized_kprobe(p)		do {} while (0)  #define try_to_optimize_kprobe(p)		do {} while (0)  #define __arm_kprobe(p)				arch_arm_kprobe(p) -#define __disarm_kprobe(p)			arch_disarm_kprobe(p) +#define __disarm_kprobe(p, o)			arch_disarm_kprobe(p) +#define kprobe_disarmed(p)			kprobe_disabled(p) +#define wait_for_kprobe_optimizer()		do {} while (0) + +/* There should be no unused kprobes can be reused without optimization */ +static void reuse_unused_kprobe(struct kprobe *ap) +{ +	printk(KERN_ERR "Error: There should be no unused kprobe here.\n"); +	BUG_ON(kprobe_unused(ap)); +} -static __kprobes void free_aggr_kprobe(struct kprobe *p) +static void free_aggr_kprobe(struct kprobe *p)  { +	arch_remove_kprobe(p);  	kfree(p);  } -static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p) +static struct kprobe *alloc_aggr_kprobe(struct kprobe *p)  {  	return kzalloc(sizeof(struct kprobe), GFP_KERNEL);  }  #endif /* CONFIG_OPTPROBES */ +#ifdef CONFIG_KPROBES_ON_FTRACE +static struct ftrace_ops kprobe_ftrace_ops __read_mostly = { +	.func = kprobe_ftrace_handler, +	.flags = FTRACE_OPS_FL_SAVE_REGS, +}; +static int kprobe_ftrace_enabled; + +/* Must ensure p->addr is really on ftrace */ +static int prepare_kprobe(struct kprobe *p) +{ +	if (!kprobe_ftrace(p)) +		return arch_prepare_kprobe(p); + +	return arch_prepare_kprobe_ftrace(p); +} + +/* Caller must lock kprobe_mutex */ +static void arm_kprobe_ftrace(struct kprobe *p) +{ +	int ret; + +	ret = ftrace_set_filter_ip(&kprobe_ftrace_ops, +				   (unsigned long)p->addr, 0, 0); +	WARN(ret < 0, "Failed to arm kprobe-ftrace at %p (%d)\n", p->addr, ret); +	kprobe_ftrace_enabled++; +	if (kprobe_ftrace_enabled == 1) { +		ret = register_ftrace_function(&kprobe_ftrace_ops); +		WARN(ret < 0, "Failed to init kprobe-ftrace (%d)\n", ret); +	} +} + +/* Caller must lock kprobe_mutex */ +static void disarm_kprobe_ftrace(struct kprobe *p) +{ +	int ret; + +	kprobe_ftrace_enabled--; +	if (kprobe_ftrace_enabled == 0) { +		ret = unregister_ftrace_function(&kprobe_ftrace_ops); +		WARN(ret < 0, "Failed to init kprobe-ftrace (%d)\n", ret); +	} +	ret = ftrace_set_filter_ip(&kprobe_ftrace_ops, +			   (unsigned long)p->addr, 1, 0); +	WARN(ret < 0, "Failed to disarm kprobe-ftrace at %p (%d)\n", p->addr, ret); +} +#else	/* !CONFIG_KPROBES_ON_FTRACE */ +#define prepare_kprobe(p)	arch_prepare_kprobe(p) +#define arm_kprobe_ftrace(p)	do {} while (0) +#define disarm_kprobe_ftrace(p)	do {} while (0) +#endif +  /* Arm a kprobe with text_mutex */ -static void __kprobes arm_kprobe(struct kprobe *kp) +static void arm_kprobe(struct kprobe *kp)  { +	if (unlikely(kprobe_ftrace(kp))) { +		arm_kprobe_ftrace(kp); +		return; +	}  	/*  	 * Here, since __arm_kprobe() doesn't use stop_machine(),  	 * this doesn't cause deadlock on text_mutex. So, we don't @@ -730,20 +981,23 @@ static void __kprobes arm_kprobe(struct kprobe *kp)  }  /* Disarm a kprobe with text_mutex */ -static void __kprobes disarm_kprobe(struct kprobe *kp) +static void disarm_kprobe(struct kprobe *kp, bool reopt)  { -	get_online_cpus();	/* For avoiding text_mutex deadlock */ +	if (unlikely(kprobe_ftrace(kp))) { +		disarm_kprobe_ftrace(kp); +		return; +	} +	/* Ditto */  	mutex_lock(&text_mutex); -	__disarm_kprobe(kp); +	__disarm_kprobe(kp, reopt);  	mutex_unlock(&text_mutex); -	put_online_cpus();  }  /*   * Aggregate handlers for multiple kprobes support - these handlers   * take care of invoking the individual kprobe handlers on p->list   */ -static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs) +static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)  {  	struct kprobe *kp; @@ -757,9 +1011,10 @@ static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)  	}  	return 0;  } +NOKPROBE_SYMBOL(aggr_pre_handler); -static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs, -					unsigned long flags) +static void aggr_post_handler(struct kprobe *p, struct pt_regs *regs, +			      unsigned long flags)  {  	struct kprobe *kp; @@ -771,11 +1026,12 @@ static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs,  		}  	}  } +NOKPROBE_SYMBOL(aggr_post_handler); -static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, -					int trapnr) +static int aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, +			      int trapnr)  { -	struct kprobe *cur = __get_cpu_var(kprobe_instance); +	struct kprobe *cur = __this_cpu_read(kprobe_instance);  	/*  	 * if we faulted "during" the execution of a user specified @@ -787,10 +1043,11 @@ static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs,  	}  	return 0;  } +NOKPROBE_SYMBOL(aggr_fault_handler); -static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs) +static int aggr_break_handler(struct kprobe *p, struct pt_regs *regs)  { -	struct kprobe *cur = __get_cpu_var(kprobe_instance); +	struct kprobe *cur = __this_cpu_read(kprobe_instance);  	int ret = 0;  	if (cur && cur->break_handler) { @@ -800,9 +1057,10 @@ static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs)  	reset_kprobe_instance();  	return ret;  } +NOKPROBE_SYMBOL(aggr_break_handler);  /* Walks the list and increments nmissed count for multiprobe case */ -void __kprobes kprobes_inc_nmissed_count(struct kprobe *p) +void kprobes_inc_nmissed_count(struct kprobe *p)  {  	struct kprobe *kp;  	if (!kprobe_aggrprobe(p)) { @@ -813,9 +1071,10 @@ void __kprobes kprobes_inc_nmissed_count(struct kprobe *p)  	}  	return;  } +NOKPROBE_SYMBOL(kprobes_inc_nmissed_count); -void __kprobes recycle_rp_inst(struct kretprobe_instance *ri, -				struct hlist_head *head) +void recycle_rp_inst(struct kretprobe_instance *ri, +		     struct hlist_head *head)  {  	struct kretprobe *rp = ri->rp; @@ -823,52 +1082,57 @@ void __kprobes recycle_rp_inst(struct kretprobe_instance *ri,  	hlist_del(&ri->hlist);  	INIT_HLIST_NODE(&ri->hlist);  	if (likely(rp)) { -		spin_lock(&rp->lock); +		raw_spin_lock(&rp->lock);  		hlist_add_head(&ri->hlist, &rp->free_instances); -		spin_unlock(&rp->lock); +		raw_spin_unlock(&rp->lock);  	} else  		/* Unregistering */  		hlist_add_head(&ri->hlist, head);  } +NOKPROBE_SYMBOL(recycle_rp_inst); -void __kprobes kretprobe_hash_lock(struct task_struct *tsk, +void kretprobe_hash_lock(struct task_struct *tsk,  			 struct hlist_head **head, unsigned long *flags)  __acquires(hlist_lock)  {  	unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); -	spinlock_t *hlist_lock; +	raw_spinlock_t *hlist_lock;  	*head = &kretprobe_inst_table[hash];  	hlist_lock = kretprobe_table_lock_ptr(hash); -	spin_lock_irqsave(hlist_lock, *flags); +	raw_spin_lock_irqsave(hlist_lock, *flags);  } +NOKPROBE_SYMBOL(kretprobe_hash_lock); -static void __kprobes kretprobe_table_lock(unsigned long hash, -	unsigned long *flags) +static void kretprobe_table_lock(unsigned long hash, +				 unsigned long *flags)  __acquires(hlist_lock)  { -	spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); -	spin_lock_irqsave(hlist_lock, *flags); +	raw_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); +	raw_spin_lock_irqsave(hlist_lock, *flags);  } +NOKPROBE_SYMBOL(kretprobe_table_lock); -void __kprobes kretprobe_hash_unlock(struct task_struct *tsk, -	unsigned long *flags) +void kretprobe_hash_unlock(struct task_struct *tsk, +			   unsigned long *flags)  __releases(hlist_lock)  {  	unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); -	spinlock_t *hlist_lock; +	raw_spinlock_t *hlist_lock;  	hlist_lock = kretprobe_table_lock_ptr(hash); -	spin_unlock_irqrestore(hlist_lock, *flags); +	raw_spin_unlock_irqrestore(hlist_lock, *flags);  } +NOKPROBE_SYMBOL(kretprobe_hash_unlock); -static void __kprobes kretprobe_table_unlock(unsigned long hash, -       unsigned long *flags) +static void kretprobe_table_unlock(unsigned long hash, +				   unsigned long *flags)  __releases(hlist_lock)  { -	spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); -	spin_unlock_irqrestore(hlist_lock, *flags); +	raw_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); +	raw_spin_unlock_irqrestore(hlist_lock, *flags);  } +NOKPROBE_SYMBOL(kretprobe_table_unlock);  /*   * This function is called from finish_task_switch when task tk becomes dead, @@ -876,55 +1140,56 @@ __releases(hlist_lock)   * with this task. These left over instances represent probed functions   * that have been called but will never return.   */ -void __kprobes kprobe_flush_task(struct task_struct *tk) +void kprobe_flush_task(struct task_struct *tk)  {  	struct kretprobe_instance *ri;  	struct hlist_head *head, empty_rp; -	struct hlist_node *node, *tmp; +	struct hlist_node *tmp;  	unsigned long hash, flags = 0;  	if (unlikely(!kprobes_initialized))  		/* Early boot.  kretprobe_table_locks not yet initialized. */  		return; +	INIT_HLIST_HEAD(&empty_rp);  	hash = hash_ptr(tk, KPROBE_HASH_BITS);  	head = &kretprobe_inst_table[hash];  	kretprobe_table_lock(hash, &flags); -	hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { +	hlist_for_each_entry_safe(ri, tmp, head, hlist) {  		if (ri->task == tk)  			recycle_rp_inst(ri, &empty_rp);  	}  	kretprobe_table_unlock(hash, &flags); -	INIT_HLIST_HEAD(&empty_rp); -	hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { +	hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {  		hlist_del(&ri->hlist);  		kfree(ri);  	}  } +NOKPROBE_SYMBOL(kprobe_flush_task);  static inline void free_rp_inst(struct kretprobe *rp)  {  	struct kretprobe_instance *ri; -	struct hlist_node *pos, *next; +	struct hlist_node *next; -	hlist_for_each_entry_safe(ri, pos, next, &rp->free_instances, hlist) { +	hlist_for_each_entry_safe(ri, next, &rp->free_instances, hlist) {  		hlist_del(&ri->hlist);  		kfree(ri);  	}  } -static void __kprobes cleanup_rp_inst(struct kretprobe *rp) +static void cleanup_rp_inst(struct kretprobe *rp)  {  	unsigned long flags, hash;  	struct kretprobe_instance *ri; -	struct hlist_node *pos, *next; +	struct hlist_node *next;  	struct hlist_head *head;  	/* No race here */  	for (hash = 0; hash < KPROBE_TABLE_SIZE; hash++) {  		kretprobe_table_lock(hash, &flags);  		head = &kretprobe_inst_table[hash]; -		hlist_for_each_entry_safe(ri, pos, next, head, hlist) { +		hlist_for_each_entry_safe(ri, next, head, hlist) {  			if (ri->rp == rp)  				ri->rp = NULL;  		} @@ -932,17 +1197,18 @@ static void __kprobes cleanup_rp_inst(struct kretprobe *rp)  	}  	free_rp_inst(rp);  } +NOKPROBE_SYMBOL(cleanup_rp_inst);  /*  * Add the new probe to ap->list. Fail if this is the  * second jprobe at the address - two jprobes can't coexist  */ -static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p) +static int add_new_kprobe(struct kprobe *ap, struct kprobe *p)  {  	BUG_ON(kprobe_gone(ap) || kprobe_gone(p));  	if (p->break_handler || p->post_handler) -		unoptimize_kprobe(ap);	/* Fall back to normal kprobe */ +		unoptimize_kprobe(ap, true);	/* Fall back to normal kprobe */  	if (p->break_handler) {  		if (ap->break_handler) @@ -954,12 +1220,6 @@ static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p)  	if (p->post_handler && !ap->post_handler)  		ap->post_handler = aggr_post_handler; -	if (kprobe_disabled(ap) && !kprobe_disabled(p)) { -		ap->flags &= ~KPROBE_FLAG_DISABLED; -		if (!kprobes_all_disarmed) -			/* Arm the breakpoint again. */ -			__arm_kprobe(ap); -	}  	return 0;  } @@ -967,7 +1227,7 @@ static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p)   * Fill in the required fields of the "manager kprobe". Replace the   * earlier kprobe in the hlist with the manager kprobe   */ -static void __kprobes init_aggr_kprobe(struct kprobe *ap, struct kprobe *p) +static void init_aggr_kprobe(struct kprobe *ap, struct kprobe *p)  {  	/* Copy p's insn slot to ap */  	copy_kprobe(p, ap); @@ -993,19 +1253,31 @@ static void __kprobes init_aggr_kprobe(struct kprobe *ap, struct kprobe *p)   * This is the second or subsequent kprobe at the address - handle   * the intricacies   */ -static int __kprobes register_aggr_kprobe(struct kprobe *old_p, -					  struct kprobe *p) +static int register_aggr_kprobe(struct kprobe *orig_p, struct kprobe *p)  {  	int ret = 0; -	struct kprobe *ap = old_p; +	struct kprobe *ap = orig_p; -	if (!kprobe_aggrprobe(old_p)) { -		/* If old_p is not an aggr_kprobe, create new aggr_kprobe. */ -		ap = alloc_aggr_kprobe(old_p); -		if (!ap) -			return -ENOMEM; -		init_aggr_kprobe(ap, old_p); -	} +	/* For preparing optimization, jump_label_text_reserved() is called */ +	jump_label_lock(); +	/* +	 * Get online CPUs to avoid text_mutex deadlock.with stop machine, +	 * which is invoked by unoptimize_kprobe() in add_new_kprobe() +	 */ +	get_online_cpus(); +	mutex_lock(&text_mutex); + +	if (!kprobe_aggrprobe(orig_p)) { +		/* If orig_p is not an aggr_kprobe, create new aggr_kprobe. */ +		ap = alloc_aggr_kprobe(orig_p); +		if (!ap) { +			ret = -ENOMEM; +			goto out; +		} +		init_aggr_kprobe(ap, orig_p); +	} else if (kprobe_unused(ap)) +		/* This probe is going to die. Rescue it */ +		reuse_unused_kprobe(ap);  	if (kprobe_gone(ap)) {  		/* @@ -1021,7 +1293,7 @@ static int __kprobes register_aggr_kprobe(struct kprobe *old_p,  			 * free aggr_probe. It will be used next time, or  			 * freed by unregister_kprobe.  			 */ -			return ret; +			goto out;  		/* Prepare optimized instructions if possible. */  		prepare_optimized_kprobe(ap); @@ -1036,159 +1308,198 @@ static int __kprobes register_aggr_kprobe(struct kprobe *old_p,  	/* Copy ap's insn slot to p */  	copy_kprobe(ap, p); -	return add_new_kprobe(ap, p); -} +	ret = add_new_kprobe(ap, p); -/* Try to disable aggr_kprobe, and return 1 if succeeded.*/ -static int __kprobes try_to_disable_aggr_kprobe(struct kprobe *p) -{ -	struct kprobe *kp; +out: +	mutex_unlock(&text_mutex); +	put_online_cpus(); +	jump_label_unlock(); -	list_for_each_entry_rcu(kp, &p->list, list) { -		if (!kprobe_disabled(kp)) -			/* -			 * There is an active probe on the list. -			 * We can't disable aggr_kprobe. -			 */ -			return 0; +	if (ret == 0 && kprobe_disabled(ap) && !kprobe_disabled(p)) { +		ap->flags &= ~KPROBE_FLAG_DISABLED; +		if (!kprobes_all_disarmed) +			/* Arm the breakpoint again. */ +			arm_kprobe(ap);  	} -	p->flags |= KPROBE_FLAG_DISABLED; -	return 1; +	return ret;  } -static int __kprobes in_kprobes_functions(unsigned long addr) +bool __weak arch_within_kprobe_blacklist(unsigned long addr)  { -	struct kprobe_blackpoint *kb; +	/* The __kprobes marked functions and entry code must not be probed */ +	return addr >= (unsigned long)__kprobes_text_start && +	       addr < (unsigned long)__kprobes_text_end; +} -	if (addr >= (unsigned long)__kprobes_text_start && -	    addr < (unsigned long)__kprobes_text_end) -		return -EINVAL; +static bool within_kprobe_blacklist(unsigned long addr) +{ +	struct kprobe_blacklist_entry *ent; + +	if (arch_within_kprobe_blacklist(addr)) +		return true;  	/*  	 * If there exists a kprobe_blacklist, verify and  	 * fail any probe registration in the prohibited area  	 */ -	for (kb = kprobe_blacklist; kb->name != NULL; kb++) { -		if (kb->start_addr) { -			if (addr >= kb->start_addr && -			    addr < (kb->start_addr + kb->range)) -				return -EINVAL; -		} +	list_for_each_entry(ent, &kprobe_blacklist, list) { +		if (addr >= ent->start_addr && addr < ent->end_addr) +			return true;  	} -	return 0; + +	return false;  }  /*   * If we have a symbol_name argument, look it up and add the offset field   * to it. This way, we can specify a relative address to a symbol. + * This returns encoded errors if it fails to look up symbol or invalid + * combination of parameters.   */ -static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p) +static kprobe_opcode_t *kprobe_addr(struct kprobe *p)  {  	kprobe_opcode_t *addr = p->addr; + +	if ((p->symbol_name && p->addr) || +	    (!p->symbol_name && !p->addr)) +		goto invalid; +  	if (p->symbol_name) { -		if (addr) -			return NULL;  		kprobe_lookup_name(p->symbol_name, addr); +		if (!addr) +			return ERR_PTR(-ENOENT);  	} -	if (!addr) -		return NULL; -	return (kprobe_opcode_t *)(((char *)addr) + p->offset); +	addr = (kprobe_opcode_t *)(((char *)addr) + p->offset); +	if (addr) +		return addr; + +invalid: +	return ERR_PTR(-EINVAL);  }  /* Check passed kprobe is valid and return kprobe in kprobe_table. */ -static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p) +static struct kprobe *__get_valid_kprobe(struct kprobe *p)  { -	struct kprobe *old_p, *list_p; +	struct kprobe *ap, *list_p; -	old_p = get_kprobe(p->addr); -	if (unlikely(!old_p)) +	ap = get_kprobe(p->addr); +	if (unlikely(!ap))  		return NULL; -	if (p != old_p) { -		list_for_each_entry_rcu(list_p, &old_p->list, list) +	if (p != ap) { +		list_for_each_entry_rcu(list_p, &ap->list, list)  			if (list_p == p)  			/* kprobe p is a valid probe */  				goto valid;  		return NULL;  	}  valid: -	return old_p; +	return ap;  }  /* Return error if the kprobe is being re-registered */  static inline int check_kprobe_rereg(struct kprobe *p)  {  	int ret = 0; -	struct kprobe *old_p;  	mutex_lock(&kprobe_mutex); -	old_p = __get_valid_kprobe(p); -	if (old_p) +	if (__get_valid_kprobe(p))  		ret = -EINVAL;  	mutex_unlock(&kprobe_mutex); +  	return ret;  } -int __kprobes register_kprobe(struct kprobe *p) +static int check_kprobe_address_safe(struct kprobe *p, +				     struct module **probed_mod)  {  	int ret = 0; -	struct kprobe *old_p; -	struct module *probed_mod; -	kprobe_opcode_t *addr; +	unsigned long ftrace_addr; -	addr = kprobe_addr(p); -	if (!addr) +	/* +	 * If the address is located on a ftrace nop, set the +	 * breakpoint to the following instruction. +	 */ +	ftrace_addr = ftrace_location((unsigned long)p->addr); +	if (ftrace_addr) { +#ifdef CONFIG_KPROBES_ON_FTRACE +		/* Given address is not on the instruction boundary */ +		if ((unsigned long)p->addr != ftrace_addr) +			return -EILSEQ; +		p->flags |= KPROBE_FLAG_FTRACE; +#else	/* !CONFIG_KPROBES_ON_FTRACE */  		return -EINVAL; -	p->addr = addr; - -	ret = check_kprobe_rereg(p); -	if (ret) -		return ret; +#endif +	}  	jump_label_lock();  	preempt_disable(); -	if (!kernel_text_address((unsigned long) p->addr) || -	    in_kprobes_functions((unsigned long) p->addr) || -	    ftrace_text_reserved(p->addr, p->addr) || -	    jump_label_text_reserved(p->addr, p->addr)) -		goto fail_with_jump_label; -	/* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */ -	p->flags &= KPROBE_FLAG_DISABLED; +	/* Ensure it is not in reserved area nor out of text */ +	if (!kernel_text_address((unsigned long) p->addr) || +	    within_kprobe_blacklist((unsigned long) p->addr) || +	    jump_label_text_reserved(p->addr, p->addr)) { +		ret = -EINVAL; +		goto out; +	} -	/* -	 * Check if are we probing a module. -	 */ -	probed_mod = __module_text_address((unsigned long) p->addr); -	if (probed_mod) { +	/* Check if are we probing a module */ +	*probed_mod = __module_text_address((unsigned long) p->addr); +	if (*probed_mod) {  		/*  		 * We must hold a refcount of the probed module while updating  		 * its code to prohibit unexpected unloading.  		 */ -		if (unlikely(!try_module_get(probed_mod))) -			goto fail_with_jump_label; +		if (unlikely(!try_module_get(*probed_mod))) { +			ret = -ENOENT; +			goto out; +		}  		/*  		 * If the module freed .init.text, we couldn't insert  		 * kprobes in there.  		 */ -		if (within_module_init((unsigned long)p->addr, probed_mod) && -		    probed_mod->state != MODULE_STATE_COMING) { -			module_put(probed_mod); -			goto fail_with_jump_label; +		if (within_module_init((unsigned long)p->addr, *probed_mod) && +		    (*probed_mod)->state != MODULE_STATE_COMING) { +			module_put(*probed_mod); +			*probed_mod = NULL; +			ret = -ENOENT;  		}  	} +out:  	preempt_enable();  	jump_label_unlock(); +	return ret; +} + +int register_kprobe(struct kprobe *p) +{ +	int ret; +	struct kprobe *old_p; +	struct module *probed_mod; +	kprobe_opcode_t *addr; + +	/* Adjust probe address from symbol */ +	addr = kprobe_addr(p); +	if (IS_ERR(addr)) +		return PTR_ERR(addr); +	p->addr = addr; + +	ret = check_kprobe_rereg(p); +	if (ret) +		return ret; + +	/* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */ +	p->flags &= KPROBE_FLAG_DISABLED;  	p->nmissed = 0;  	INIT_LIST_HEAD(&p->list); -	mutex_lock(&kprobe_mutex); -	jump_label_lock(); /* needed to call jump_label_text_reserved() */ +	ret = check_kprobe_address_safe(p, &probed_mod); +	if (ret) +		return ret; -	get_online_cpus();	/* For avoiding text_mutex deadlock. */ -	mutex_lock(&text_mutex); +	mutex_lock(&kprobe_mutex);  	old_p = get_kprobe(p->addr);  	if (old_p) { @@ -1197,7 +1508,9 @@ int __kprobes register_kprobe(struct kprobe *p)  		goto out;  	} -	ret = arch_prepare_kprobe(p); +	mutex_lock(&text_mutex);	/* Avoiding text modification */ +	ret = prepare_kprobe(p); +	mutex_unlock(&text_mutex);  	if (ret)  		goto out; @@ -1206,93 +1519,139 @@ int __kprobes register_kprobe(struct kprobe *p)  		       &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]);  	if (!kprobes_all_disarmed && !kprobe_disabled(p)) -		__arm_kprobe(p); +		arm_kprobe(p);  	/* Try to optimize kprobe */  	try_to_optimize_kprobe(p);  out: -	mutex_unlock(&text_mutex); -	put_online_cpus(); -	jump_label_unlock();  	mutex_unlock(&kprobe_mutex);  	if (probed_mod)  		module_put(probed_mod);  	return ret; - -fail_with_jump_label: -	preempt_enable(); -	jump_label_unlock(); -	return -EINVAL;  }  EXPORT_SYMBOL_GPL(register_kprobe); +/* Check if all probes on the aggrprobe are disabled */ +static int aggr_kprobe_disabled(struct kprobe *ap) +{ +	struct kprobe *kp; + +	list_for_each_entry_rcu(kp, &ap->list, list) +		if (!kprobe_disabled(kp)) +			/* +			 * There is an active probe on the list. +			 * We can't disable this ap. +			 */ +			return 0; + +	return 1; +} + +/* Disable one kprobe: Make sure called under kprobe_mutex is locked */ +static struct kprobe *__disable_kprobe(struct kprobe *p) +{ +	struct kprobe *orig_p; + +	/* Get an original kprobe for return */ +	orig_p = __get_valid_kprobe(p); +	if (unlikely(orig_p == NULL)) +		return NULL; + +	if (!kprobe_disabled(p)) { +		/* Disable probe if it is a child probe */ +		if (p != orig_p) +			p->flags |= KPROBE_FLAG_DISABLED; + +		/* Try to disarm and disable this/parent probe */ +		if (p == orig_p || aggr_kprobe_disabled(orig_p)) { +			disarm_kprobe(orig_p, true); +			orig_p->flags |= KPROBE_FLAG_DISABLED; +		} +	} + +	return orig_p; +} +  /*   * Unregister a kprobe without a scheduler synchronization.   */ -static int __kprobes __unregister_kprobe_top(struct kprobe *p) +static int __unregister_kprobe_top(struct kprobe *p)  { -	struct kprobe *old_p, *list_p; +	struct kprobe *ap, *list_p; -	old_p = __get_valid_kprobe(p); -	if (old_p == NULL) +	/* Disable kprobe. This will disarm it if needed. */ +	ap = __disable_kprobe(p); +	if (ap == NULL)  		return -EINVAL; -	if (old_p == p || -	    (kprobe_aggrprobe(old_p) && -	     list_is_singular(&old_p->list))) { +	if (ap == p)  		/* -		 * Only probe on the hash list. Disarm only if kprobes are -		 * enabled and not gone - otherwise, the breakpoint would -		 * already have been removed. We save on flushing icache. +		 * This probe is an independent(and non-optimized) kprobe +		 * (not an aggrprobe). Remove from the hash list.  		 */ -		if (!kprobes_all_disarmed && !kprobe_disabled(old_p)) -			disarm_kprobe(old_p); -		hlist_del_rcu(&old_p->hlist); -	} else { +		goto disarmed; + +	/* Following process expects this probe is an aggrprobe */ +	WARN_ON(!kprobe_aggrprobe(ap)); + +	if (list_is_singular(&ap->list) && kprobe_disarmed(ap)) +		/* +		 * !disarmed could be happen if the probe is under delayed +		 * unoptimizing. +		 */ +		goto disarmed; +	else { +		/* If disabling probe has special handlers, update aggrprobe */  		if (p->break_handler && !kprobe_gone(p)) -			old_p->break_handler = NULL; +			ap->break_handler = NULL;  		if (p->post_handler && !kprobe_gone(p)) { -			list_for_each_entry_rcu(list_p, &old_p->list, list) { +			list_for_each_entry_rcu(list_p, &ap->list, list) {  				if ((list_p != p) && (list_p->post_handler))  					goto noclean;  			} -			old_p->post_handler = NULL; +			ap->post_handler = NULL;  		}  noclean: +		/* +		 * Remove from the aggrprobe: this path will do nothing in +		 * __unregister_kprobe_bottom(). +		 */  		list_del_rcu(&p->list); -		if (!kprobe_disabled(old_p)) { -			try_to_disable_aggr_kprobe(old_p); -			if (!kprobes_all_disarmed) { -				if (kprobe_disabled(old_p)) -					disarm_kprobe(old_p); -				else -					/* Try to optimize this probe again */ -					optimize_kprobe(old_p); -			} -		} +		if (!kprobe_disabled(ap) && !kprobes_all_disarmed) +			/* +			 * Try to optimize this probe again, because post +			 * handler may have been changed. +			 */ +			optimize_kprobe(ap);  	}  	return 0; + +disarmed: +	BUG_ON(!kprobe_disarmed(ap)); +	hlist_del_rcu(&ap->hlist); +	return 0;  } -static void __kprobes __unregister_kprobe_bottom(struct kprobe *p) +static void __unregister_kprobe_bottom(struct kprobe *p)  { -	struct kprobe *old_p; +	struct kprobe *ap;  	if (list_empty(&p->list)) +		/* This is an independent kprobe */  		arch_remove_kprobe(p);  	else if (list_is_singular(&p->list)) { -		/* "p" is the last child of an aggr_kprobe */ -		old_p = list_entry(p->list.next, struct kprobe, list); +		/* This is the last child of an aggrprobe */ +		ap = list_entry(p->list.next, struct kprobe, list);  		list_del(&p->list); -		arch_remove_kprobe(old_p); -		free_aggr_kprobe(old_p); +		free_aggr_kprobe(ap);  	} +	/* Otherwise, do nothing. */  } -int __kprobes register_kprobes(struct kprobe **kps, int num) +int register_kprobes(struct kprobe **kps, int num)  {  	int i, ret = 0; @@ -1310,13 +1669,13 @@ int __kprobes register_kprobes(struct kprobe **kps, int num)  }  EXPORT_SYMBOL_GPL(register_kprobes); -void __kprobes unregister_kprobe(struct kprobe *p) +void unregister_kprobe(struct kprobe *p)  {  	unregister_kprobes(&p, 1);  }  EXPORT_SYMBOL_GPL(unregister_kprobe); -void __kprobes unregister_kprobes(struct kprobe **kps, int num) +void unregister_kprobes(struct kprobe **kps, int num)  {  	int i; @@ -1345,7 +1704,7 @@ unsigned long __weak arch_deref_entry_point(void *entry)  	return (unsigned long)entry;  } -int __kprobes register_jprobes(struct jprobe **jps, int num) +int register_jprobes(struct jprobe **jps, int num)  {  	struct jprobe *jp;  	int ret = 0, i; @@ -1376,19 +1735,19 @@ int __kprobes register_jprobes(struct jprobe **jps, int num)  }  EXPORT_SYMBOL_GPL(register_jprobes); -int __kprobes register_jprobe(struct jprobe *jp) +int register_jprobe(struct jprobe *jp)  {  	return register_jprobes(&jp, 1);  }  EXPORT_SYMBOL_GPL(register_jprobe); -void __kprobes unregister_jprobe(struct jprobe *jp) +void unregister_jprobe(struct jprobe *jp)  {  	unregister_jprobes(&jp, 1);  }  EXPORT_SYMBOL_GPL(unregister_jprobe); -void __kprobes unregister_jprobes(struct jprobe **jps, int num) +void unregister_jprobes(struct jprobe **jps, int num)  {  	int i; @@ -1413,8 +1772,7 @@ EXPORT_SYMBOL_GPL(unregister_jprobes);   * This kprobe pre_handler is registered with every kretprobe. When probe   * hits it will set up the return probe.   */ -static int __kprobes pre_handler_kretprobe(struct kprobe *p, -					   struct pt_regs *regs) +static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)  {  	struct kretprobe *rp = container_of(p, struct kretprobe, kp);  	unsigned long hash, flags = 0; @@ -1422,18 +1780,22 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p,  	/*TODO: consider to only swap the RA after the last pre_handler fired */  	hash = hash_ptr(current, KPROBE_HASH_BITS); -	spin_lock_irqsave(&rp->lock, flags); +	raw_spin_lock_irqsave(&rp->lock, flags);  	if (!hlist_empty(&rp->free_instances)) {  		ri = hlist_entry(rp->free_instances.first,  				struct kretprobe_instance, hlist);  		hlist_del(&ri->hlist); -		spin_unlock_irqrestore(&rp->lock, flags); +		raw_spin_unlock_irqrestore(&rp->lock, flags);  		ri->rp = rp;  		ri->task = current; -		if (rp->entry_handler && rp->entry_handler(ri, regs)) +		if (rp->entry_handler && rp->entry_handler(ri, regs)) { +			raw_spin_lock_irqsave(&rp->lock, flags); +			hlist_add_head(&ri->hlist, &rp->free_instances); +			raw_spin_unlock_irqrestore(&rp->lock, flags);  			return 0; +		}  		arch_prepare_kretprobe(ri, regs); @@ -1444,12 +1806,13 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p,  		kretprobe_table_unlock(hash, &flags);  	} else {  		rp->nmissed++; -		spin_unlock_irqrestore(&rp->lock, flags); +		raw_spin_unlock_irqrestore(&rp->lock, flags);  	}  	return 0;  } +NOKPROBE_SYMBOL(pre_handler_kretprobe); -int __kprobes register_kretprobe(struct kretprobe *rp) +int register_kretprobe(struct kretprobe *rp)  {  	int ret = 0;  	struct kretprobe_instance *inst; @@ -1458,8 +1821,8 @@ int __kprobes register_kretprobe(struct kretprobe *rp)  	if (kretprobe_blacklist_size) {  		addr = kprobe_addr(&rp->kp); -		if (!addr) -			return -EINVAL; +		if (IS_ERR(addr)) +			return PTR_ERR(addr);  		for (i = 0; kretprobe_blacklist[i].name != NULL; i++) {  			if (kretprobe_blacklist[i].addr == addr) @@ -1480,7 +1843,7 @@ int __kprobes register_kretprobe(struct kretprobe *rp)  		rp->maxactive = num_possible_cpus();  #endif  	} -	spin_lock_init(&rp->lock); +	raw_spin_lock_init(&rp->lock);  	INIT_HLIST_HEAD(&rp->free_instances);  	for (i = 0; i < rp->maxactive; i++) {  		inst = kmalloc(sizeof(struct kretprobe_instance) + @@ -1502,7 +1865,7 @@ int __kprobes register_kretprobe(struct kretprobe *rp)  }  EXPORT_SYMBOL_GPL(register_kretprobe); -int __kprobes register_kretprobes(struct kretprobe **rps, int num) +int register_kretprobes(struct kretprobe **rps, int num)  {  	int ret = 0, i; @@ -1520,13 +1883,13 @@ int __kprobes register_kretprobes(struct kretprobe **rps, int num)  }  EXPORT_SYMBOL_GPL(register_kretprobes); -void __kprobes unregister_kretprobe(struct kretprobe *rp) +void unregister_kretprobe(struct kretprobe *rp)  {  	unregister_kretprobes(&rp, 1);  }  EXPORT_SYMBOL_GPL(unregister_kretprobe); -void __kprobes unregister_kretprobes(struct kretprobe **rps, int num) +void unregister_kretprobes(struct kretprobe **rps, int num)  {  	int i; @@ -1549,38 +1912,38 @@ void __kprobes unregister_kretprobes(struct kretprobe **rps, int num)  EXPORT_SYMBOL_GPL(unregister_kretprobes);  #else /* CONFIG_KRETPROBES */ -int __kprobes register_kretprobe(struct kretprobe *rp) +int register_kretprobe(struct kretprobe *rp)  {  	return -ENOSYS;  }  EXPORT_SYMBOL_GPL(register_kretprobe); -int __kprobes register_kretprobes(struct kretprobe **rps, int num) +int register_kretprobes(struct kretprobe **rps, int num)  {  	return -ENOSYS;  }  EXPORT_SYMBOL_GPL(register_kretprobes); -void __kprobes unregister_kretprobe(struct kretprobe *rp) +void unregister_kretprobe(struct kretprobe *rp)  {  }  EXPORT_SYMBOL_GPL(unregister_kretprobe); -void __kprobes unregister_kretprobes(struct kretprobe **rps, int num) +void unregister_kretprobes(struct kretprobe **rps, int num)  {  }  EXPORT_SYMBOL_GPL(unregister_kretprobes); -static int __kprobes pre_handler_kretprobe(struct kprobe *p, -					   struct pt_regs *regs) +static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)  {  	return 0;  } +NOKPROBE_SYMBOL(pre_handler_kretprobe);  #endif /* CONFIG_KRETPROBES */  /* Set the kprobe gone and remove its instruction buffer. */ -static void __kprobes kill_kprobe(struct kprobe *p) +static void kill_kprobe(struct kprobe *p)  {  	struct kprobe *kp; @@ -1604,39 +1967,23 @@ static void __kprobes kill_kprobe(struct kprobe *p)  }  /* Disable one kprobe */ -int __kprobes disable_kprobe(struct kprobe *kp) +int disable_kprobe(struct kprobe *kp)  {  	int ret = 0; -	struct kprobe *p;  	mutex_lock(&kprobe_mutex); -	/* Check whether specified probe is valid. */ -	p = __get_valid_kprobe(kp); -	if (unlikely(p == NULL)) { +	/* Disable this kprobe */ +	if (__disable_kprobe(kp) == NULL)  		ret = -EINVAL; -		goto out; -	} - -	/* If the probe is already disabled (or gone), just return */ -	if (kprobe_disabled(kp)) -		goto out; - -	kp->flags |= KPROBE_FLAG_DISABLED; -	if (p != kp) -		/* When kp != p, p is always enabled. */ -		try_to_disable_aggr_kprobe(p); -	if (!kprobes_all_disarmed && kprobe_disabled(p)) -		disarm_kprobe(p); -out:  	mutex_unlock(&kprobe_mutex);  	return ret;  }  EXPORT_SYMBOL_GPL(disable_kprobe);  /* Enable one kprobe */ -int __kprobes enable_kprobe(struct kprobe *kp) +int enable_kprobe(struct kprobe *kp)  {  	int ret = 0;  	struct kprobe *p; @@ -1669,20 +2016,56 @@ out:  }  EXPORT_SYMBOL_GPL(enable_kprobe); -void __kprobes dump_kprobe(struct kprobe *kp) +void dump_kprobe(struct kprobe *kp)  {  	printk(KERN_WARNING "Dumping kprobe:\n");  	printk(KERN_WARNING "Name: %s\nAddress: %p\nOffset: %x\n",  	       kp->symbol_name, kp->addr, kp->offset);  } +NOKPROBE_SYMBOL(dump_kprobe); + +/* + * Lookup and populate the kprobe_blacklist. + * + * Unlike the kretprobe blacklist, we'll need to determine + * the range of addresses that belong to the said functions, + * since a kprobe need not necessarily be at the beginning + * of a function. + */ +static int __init populate_kprobe_blacklist(unsigned long *start, +					     unsigned long *end) +{ +	unsigned long *iter; +	struct kprobe_blacklist_entry *ent; +	unsigned long entry, offset = 0, size = 0; + +	for (iter = start; iter < end; iter++) { +		entry = arch_deref_entry_point((void *)*iter); + +		if (!kernel_text_address(entry) || +		    !kallsyms_lookup_size_offset(entry, &size, &offset)) { +			pr_err("Failed to find blacklist at %p\n", +				(void *)entry); +			continue; +		} + +		ent = kmalloc(sizeof(*ent), GFP_KERNEL); +		if (!ent) +			return -ENOMEM; +		ent->start_addr = entry; +		ent->end_addr = entry + size; +		INIT_LIST_HEAD(&ent->list); +		list_add_tail(&ent->list, &kprobe_blacklist); +	} +	return 0; +}  /* Module notifier call back, checking kprobes on the module */ -static int __kprobes kprobes_module_callback(struct notifier_block *nb, -					     unsigned long val, void *data) +static int kprobes_module_callback(struct notifier_block *nb, +				   unsigned long val, void *data)  {  	struct module *mod = data;  	struct hlist_head *head; -	struct hlist_node *node;  	struct kprobe *p;  	unsigned int i;  	int checkcore = (val == MODULE_STATE_GOING); @@ -1699,7 +2082,7 @@ static int __kprobes kprobes_module_callback(struct notifier_block *nb,  	mutex_lock(&kprobe_mutex);  	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {  		head = &kprobe_table[i]; -		hlist_for_each_entry_rcu(p, node, head, hlist) +		hlist_for_each_entry_rcu(p, head, hlist)  			if (within_module_init((unsigned long)p->addr, mod) ||  			    (checkcore &&  			     within_module_core((unsigned long)p->addr, mod))) { @@ -1720,43 +2103,27 @@ static struct notifier_block kprobe_module_nb = {  	.priority = 0  }; +/* Markers of _kprobe_blacklist section */ +extern unsigned long __start_kprobe_blacklist[]; +extern unsigned long __stop_kprobe_blacklist[]; +  static int __init init_kprobes(void)  {  	int i, err = 0; -	unsigned long offset = 0, size = 0; -	char *modname, namebuf[128]; -	const char *symbol_name; -	void *addr; -	struct kprobe_blackpoint *kb;  	/* FIXME allocate the probe table, currently defined statically */  	/* initialize all list heads */  	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {  		INIT_HLIST_HEAD(&kprobe_table[i]);  		INIT_HLIST_HEAD(&kretprobe_inst_table[i]); -		spin_lock_init(&(kretprobe_table_locks[i].lock)); +		raw_spin_lock_init(&(kretprobe_table_locks[i].lock));  	} -	/* -	 * Lookup and populate the kprobe_blacklist. -	 * -	 * Unlike the kretprobe blacklist, we'll need to determine -	 * the range of addresses that belong to the said functions, -	 * since a kprobe need not necessarily be at the beginning -	 * of a function. -	 */ -	for (kb = kprobe_blacklist; kb->name != NULL; kb++) { -		kprobe_lookup_name(kb->name, addr); -		if (!addr) -			continue; - -		kb->start_addr = (unsigned long)addr; -		symbol_name = kallsyms_lookup(kb->start_addr, -				&size, &offset, &modname, namebuf); -		if (!symbol_name) -			kb->range = 0; -		else -			kb->range = size; +	err = populate_kprobe_blacklist(__start_kprobe_blacklist, +					__stop_kprobe_blacklist); +	if (err) { +		pr_err("kprobes: failed to populate blacklist: %d\n", err); +		pr_err("Please take care of using kprobes.\n");  	}  	if (kretprobe_blacklist_size) { @@ -1796,7 +2163,7 @@ static int __init init_kprobes(void)  }  #ifdef CONFIG_DEBUG_FS -static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p, +static void report_probe(struct seq_file *pi, struct kprobe *p,  		const char *sym, int offset, char *modname, struct kprobe *pp)  {  	char *kprobe_type; @@ -1818,18 +2185,19 @@ static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p,  	if (!pp)  		pp = p; -	seq_printf(pi, "%s%s%s\n", +	seq_printf(pi, "%s%s%s%s\n",  		(kprobe_gone(p) ? "[GONE]" : ""),  		((kprobe_disabled(p) && !kprobe_gone(p)) ?  "[DISABLED]" : ""), -		(kprobe_optimized(pp) ? "[OPTIMIZED]" : "")); +		(kprobe_optimized(pp) ? "[OPTIMIZED]" : ""), +		(kprobe_ftrace(pp) ? "[FTRACE]" : ""));  } -static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos) +static void *kprobe_seq_start(struct seq_file *f, loff_t *pos)  {  	return (*pos < KPROBE_TABLE_SIZE) ? pos : NULL;  } -static void __kprobes *kprobe_seq_next(struct seq_file *f, void *v, loff_t *pos) +static void *kprobe_seq_next(struct seq_file *f, void *v, loff_t *pos)  {  	(*pos)++;  	if (*pos >= KPROBE_TABLE_SIZE) @@ -1837,24 +2205,23 @@ static void __kprobes *kprobe_seq_next(struct seq_file *f, void *v, loff_t *pos)  	return pos;  } -static void __kprobes kprobe_seq_stop(struct seq_file *f, void *v) +static void kprobe_seq_stop(struct seq_file *f, void *v)  {  	/* Nothing to do */  } -static int __kprobes show_kprobe_addr(struct seq_file *pi, void *v) +static int show_kprobe_addr(struct seq_file *pi, void *v)  {  	struct hlist_head *head; -	struct hlist_node *node;  	struct kprobe *p, *kp;  	const char *sym = NULL;  	unsigned int i = *(loff_t *) v;  	unsigned long offset = 0; -	char *modname, namebuf[128]; +	char *modname, namebuf[KSYM_NAME_LEN];  	head = &kprobe_table[i];  	preempt_disable(); -	hlist_for_each_entry_rcu(p, node, head, hlist) { +	hlist_for_each_entry_rcu(p, head, hlist) {  		sym = kallsyms_lookup((unsigned long)p->addr, NULL,  					&offset, &modname, namebuf);  		if (kprobe_aggrprobe(p)) { @@ -1874,7 +2241,7 @@ static const struct seq_operations kprobes_seq_ops = {  	.show  = show_kprobe_addr  }; -static int __kprobes kprobes_open(struct inode *inode, struct file *filp) +static int kprobes_open(struct inode *inode, struct file *filp)  {  	return seq_open(filp, &kprobes_seq_ops);  } @@ -1886,10 +2253,49 @@ static const struct file_operations debugfs_kprobes_operations = {  	.release        = seq_release,  }; -static void __kprobes arm_all_kprobes(void) +/* kprobes/blacklist -- shows which functions can not be probed */ +static void *kprobe_blacklist_seq_start(struct seq_file *m, loff_t *pos) +{ +	return seq_list_start(&kprobe_blacklist, *pos); +} + +static void *kprobe_blacklist_seq_next(struct seq_file *m, void *v, loff_t *pos) +{ +	return seq_list_next(v, &kprobe_blacklist, pos); +} + +static int kprobe_blacklist_seq_show(struct seq_file *m, void *v) +{ +	struct kprobe_blacklist_entry *ent = +		list_entry(v, struct kprobe_blacklist_entry, list); + +	seq_printf(m, "0x%p-0x%p\t%ps\n", (void *)ent->start_addr, +		   (void *)ent->end_addr, (void *)ent->start_addr); +	return 0; +} + +static const struct seq_operations kprobe_blacklist_seq_ops = { +	.start = kprobe_blacklist_seq_start, +	.next  = kprobe_blacklist_seq_next, +	.stop  = kprobe_seq_stop,	/* Reuse void function */ +	.show  = kprobe_blacklist_seq_show, +}; + +static int kprobe_blacklist_open(struct inode *inode, struct file *filp) +{ +	return seq_open(filp, &kprobe_blacklist_seq_ops); +} + +static const struct file_operations debugfs_kprobe_blacklist_ops = { +	.open           = kprobe_blacklist_open, +	.read           = seq_read, +	.llseek         = seq_lseek, +	.release        = seq_release, +}; + +static void arm_all_kprobes(void)  {  	struct hlist_head *head; -	struct hlist_node *node;  	struct kprobe *p;  	unsigned int i; @@ -1900,14 +2306,12 @@ static void __kprobes arm_all_kprobes(void)  		goto already_enabled;  	/* Arming kprobes doesn't optimize kprobe itself */ -	mutex_lock(&text_mutex);  	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {  		head = &kprobe_table[i]; -		hlist_for_each_entry_rcu(p, node, head, hlist) +		hlist_for_each_entry_rcu(p, head, hlist)  			if (!kprobe_disabled(p)) -				__arm_kprobe(p); +				arm_kprobe(p);  	} -	mutex_unlock(&text_mutex);  	kprobes_all_disarmed = false;  	printk(KERN_INFO "Kprobes globally enabled\n"); @@ -1917,46 +2321,34 @@ already_enabled:  	return;  } -static void __kprobes disarm_all_kprobes(void) +static void disarm_all_kprobes(void)  {  	struct hlist_head *head; -	struct hlist_node *node;  	struct kprobe *p;  	unsigned int i;  	mutex_lock(&kprobe_mutex);  	/* If kprobes are already disarmed, just return */ -	if (kprobes_all_disarmed) -		goto already_disabled; +	if (kprobes_all_disarmed) { +		mutex_unlock(&kprobe_mutex); +		return; +	}  	kprobes_all_disarmed = true;  	printk(KERN_INFO "Kprobes globally disabled\n"); -	/* -	 * Here we call get_online_cpus() for avoiding text_mutex deadlock, -	 * because disarming may also unoptimize kprobes. -	 */ -	get_online_cpus(); -	mutex_lock(&text_mutex);  	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {  		head = &kprobe_table[i]; -		hlist_for_each_entry_rcu(p, node, head, hlist) { +		hlist_for_each_entry_rcu(p, head, hlist) {  			if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p)) -				__disarm_kprobe(p); +				disarm_kprobe(p, false);  		}  	} - -	mutex_unlock(&text_mutex); -	put_online_cpus();  	mutex_unlock(&kprobe_mutex); -	/* Allow all currently running kprobes to complete */ -	synchronize_sched(); -	return; -already_disabled: -	mutex_unlock(&kprobe_mutex); -	return; +	/* Wait for disarming all kprobes by optimizer */ +	wait_for_kprobe_optimizer();  }  /* @@ -1982,12 +2374,13 @@ static ssize_t write_enabled_file_bool(struct file *file,  	       const char __user *user_buf, size_t count, loff_t *ppos)  {  	char buf[32]; -	int buf_size; +	size_t buf_size;  	buf_size = min(count, (sizeof(buf)-1));  	if (copy_from_user(buf, user_buf, buf_size))  		return -EFAULT; +	buf[buf_size] = '\0';  	switch (buf[0]) {  	case 'y':  	case 'Y': @@ -1999,6 +2392,8 @@ static ssize_t write_enabled_file_bool(struct file *file,  	case '0':  		disarm_all_kprobes();  		break; +	default: +		return -EINVAL;  	}  	return count; @@ -2010,7 +2405,7 @@ static const struct file_operations fops_kp = {  	.llseek =	default_llseek,  }; -static int __kprobes debugfs_kprobe_init(void) +static int __init debugfs_kprobe_init(void)  {  	struct dentry *dir, *file;  	unsigned int value = 1; @@ -2021,19 +2416,24 @@ static int __kprobes debugfs_kprobe_init(void)  	file = debugfs_create_file("list", 0444, dir, NULL,  				&debugfs_kprobes_operations); -	if (!file) { -		debugfs_remove(dir); -		return -ENOMEM; -	} +	if (!file) +		goto error;  	file = debugfs_create_file("enabled", 0600, dir,  					&value, &fops_kp); -	if (!file) { -		debugfs_remove(dir); -		return -ENOMEM; -	} +	if (!file) +		goto error; + +	file = debugfs_create_file("blacklist", 0444, dir, NULL, +				&debugfs_kprobe_blacklist_ops); +	if (!file) +		goto error;  	return 0; + +error: +	debugfs_remove(dir); +	return -ENOMEM;  }  late_initcall(debugfs_kprobe_init);  | 
