diff options
Diffstat (limited to 'kernel/events/uprobes.c')
| -rw-r--r-- | kernel/events/uprobes.c | 374 | 
1 files changed, 251 insertions, 123 deletions
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index ad8e1bdca70..6f3254e8c13 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -35,6 +35,8 @@  #include <linux/kdebug.h>	/* notifier mechanism */  #include "../../mm/internal.h"	/* munlock_vma_page */  #include <linux/percpu-rwsem.h> +#include <linux/task_work.h> +#include <linux/shmem_fs.h>  #include <linux/uprobes.h> @@ -59,8 +61,6 @@ static struct percpu_rw_semaphore dup_mmap_sem;  /* Have a copy of original instruction */  #define UPROBE_COPY_INSN	0 -/* Can skip singlestep */ -#define UPROBE_SKIP_SSTEP	1  struct uprobe {  	struct rb_node		rb_node;	/* node in the rb tree */ @@ -72,6 +72,17 @@ struct uprobe {  	struct inode		*inode;		/* Also hold a ref to inode */  	loff_t			offset;  	unsigned long		flags; + +	/* +	 * The generic code assumes that it has two members of unknown type +	 * owned by the arch-specific code: +	 * +	 * 	insn -	copy_insn() saves the original instruction here for +	 *		arch_uprobe_analyze_insn(). +	 * +	 *	ixol -	potentially modified instruction to execute out of +	 *		line, copied to xol_area by xol_get_insn_slot(). +	 */  	struct arch_uprobe	arch;  }; @@ -85,6 +96,29 @@ struct return_instance {  };  /* + * Execute out of line area: anonymous executable mapping installed + * by the probed task to execute the copy of the original instruction + * mangled by set_swbp(). + * + * On a breakpoint hit, thread contests for a slot.  It frees the + * slot after singlestep. Currently a fixed number of slots are + * allocated. + */ +struct xol_area { +	wait_queue_head_t 	wq;		/* if all slots are busy */ +	atomic_t 		slot_count;	/* number of in-use slots */ +	unsigned long 		*bitmap;	/* 0 = free slot */ +	struct page 		*page; + +	/* +	 * We keep the vma's vm_start rather than a pointer to the vma +	 * itself.  The probed process or a naughty kernel module could make +	 * the vma go away, and we must handle that reasonably gracefully. +	 */ +	unsigned long 		vaddr;		/* Page(s) of instruction slots */ +}; + +/*   * valid_vma: Verify if the specified vma is an executable vma   * Relax restrictions while unregistering: vm_flags might have   * changed after breakpoint was inserted. @@ -94,7 +128,7 @@ struct return_instance {   */  static bool valid_vma(struct vm_area_struct *vma, bool is_register)  { -	vm_flags_t flags = VM_HUGETLB | VM_MAYEXEC | VM_SHARED; +	vm_flags_t flags = VM_HUGETLB | VM_MAYEXEC | VM_MAYSHARE;  	if (is_register)  		flags |= VM_WRITE; @@ -244,23 +278,18 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t   * the architecture. If an arch has variable length instruction and the   * breakpoint instruction is not of the smallest length instruction   * supported by that architecture then we need to modify is_trap_at_addr and - * write_opcode accordingly. This would never be a problem for archs that - * have fixed length instructions. - */ - -/* - * write_opcode - write the opcode at a given virtual address. + * uprobe_write_opcode accordingly. This would never be a problem for archs + * that have fixed length instructions. + * + * uprobe_write_opcode - write the opcode at a given virtual address.   * @mm: the probed process address space.   * @vaddr: the virtual address to store the opcode.   * @opcode: opcode to be written at @vaddr.   * - * Called with mm->mmap_sem held (for read and with a reference to - * mm). - * - * For mm @mm, write the opcode at @vaddr. + * Called with mm->mmap_sem held for write.   * Return 0 (success) or a negative errno.   */ -static int write_opcode(struct mm_struct *mm, unsigned long vaddr, +int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr,  			uprobe_opcode_t opcode)  {  	struct page *old_page, *new_page; @@ -277,21 +306,25 @@ retry:  	if (ret <= 0)  		goto put_old; +	ret = anon_vma_prepare(vma); +	if (ret) +		goto put_old; +  	ret = -ENOMEM;  	new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr);  	if (!new_page)  		goto put_old; -	__SetPageUptodate(new_page); +	if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL)) +		goto put_new; +	__SetPageUptodate(new_page);  	copy_highpage(new_page, old_page);  	copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE); -	ret = anon_vma_prepare(vma); -	if (ret) -		goto put_new; -  	ret = __replace_page(vma, vaddr, old_page, new_page); +	if (ret) +		mem_cgroup_uncharge_page(new_page);  put_new:  	page_cache_release(new_page); @@ -314,7 +347,7 @@ put_old:   */  int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr)  { -	return write_opcode(mm, vaddr, UPROBE_SWBP_INSN); +	return uprobe_write_opcode(mm, vaddr, UPROBE_SWBP_INSN);  }  /** @@ -329,7 +362,7 @@ int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned  int __weak  set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr)  { -	return write_opcode(mm, vaddr, *(uprobe_opcode_t *)auprobe->insn); +	return uprobe_write_opcode(mm, vaddr, *(uprobe_opcode_t *)&auprobe->insn);  }  static int match_uprobe(struct uprobe *l, struct uprobe *r) @@ -456,12 +489,9 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)  	uprobe->offset = offset;  	init_rwsem(&uprobe->register_rwsem);  	init_rwsem(&uprobe->consumer_rwsem); -	/* For now assume that the instruction need not be single-stepped */ -	__set_bit(UPROBE_SKIP_SSTEP, &uprobe->flags);  	/* add to uprobes_tree, sorted on inode:offset */  	cur_uprobe = insert_uprobe(uprobe); -  	/* a uprobe exists for this inode:offset combination */  	if (cur_uprobe) {  		kfree(uprobe); @@ -503,19 +533,19 @@ static bool consumer_del(struct uprobe *uprobe, struct uprobe_consumer *uc)  	return ret;  } -static int -__copy_insn(struct address_space *mapping, struct file *filp, char *insn, -			unsigned long nbytes, loff_t offset) +static int __copy_insn(struct address_space *mapping, struct file *filp, +			void *insn, int nbytes, loff_t offset)  {  	struct page *page; - -	if (!mapping->a_ops->readpage) -		return -EIO;  	/* -	 * Ensure that the page that has the original instruction is -	 * populated and in page-cache. +	 * Ensure that the page that has the original instruction is populated +	 * and in page-cache. If ->readpage == NULL it must be shmem_mapping(), +	 * see uprobe_register().  	 */ -	page = read_mapping_page(mapping, offset >> PAGE_CACHE_SHIFT, filp); +	if (mapping->a_ops->readpage) +		page = read_mapping_page(mapping, offset >> PAGE_CACHE_SHIFT, filp); +	else +		page = shmem_read_mapping_page(mapping, offset >> PAGE_CACHE_SHIFT);  	if (IS_ERR(page))  		return PTR_ERR(page); @@ -527,28 +557,28 @@ __copy_insn(struct address_space *mapping, struct file *filp, char *insn,  static int copy_insn(struct uprobe *uprobe, struct file *filp)  { -	struct address_space *mapping; -	unsigned long nbytes; -	int bytes; - -	nbytes = PAGE_SIZE - (uprobe->offset & ~PAGE_MASK); -	mapping = uprobe->inode->i_mapping; +	struct address_space *mapping = uprobe->inode->i_mapping; +	loff_t offs = uprobe->offset; +	void *insn = &uprobe->arch.insn; +	int size = sizeof(uprobe->arch.insn); +	int len, err = -EIO; -	/* Instruction at end of binary; copy only available bytes */ -	if (uprobe->offset + MAX_UINSN_BYTES > uprobe->inode->i_size) -		bytes = uprobe->inode->i_size - uprobe->offset; -	else -		bytes = MAX_UINSN_BYTES; +	/* Copy only available bytes, -EIO if nothing was read */ +	do { +		if (offs >= i_size_read(uprobe->inode)) +			break; -	/* Instruction at the page-boundary; copy bytes in second page */ -	if (nbytes < bytes) { -		int err = __copy_insn(mapping, filp, uprobe->arch.insn + nbytes, -				bytes - nbytes, uprobe->offset + nbytes); +		len = min_t(int, size, PAGE_SIZE - (offs & ~PAGE_MASK)); +		err = __copy_insn(mapping, filp, insn, len, offs);  		if (err) -			return err; -		bytes = nbytes; -	} -	return __copy_insn(mapping, filp, uprobe->arch.insn, bytes, uprobe->offset); +			break; + +		insn += len; +		offs += len; +		size -= len; +	} while (size); + +	return err;  }  static int prepare_uprobe(struct uprobe *uprobe, struct file *file, @@ -569,14 +599,14 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file,  		goto out;  	ret = -ENOTSUPP; -	if (is_trap_insn((uprobe_opcode_t *)uprobe->arch.insn)) +	if (is_trap_insn((uprobe_opcode_t *)&uprobe->arch.insn))  		goto out;  	ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, vaddr);  	if (ret)  		goto out; -	/* write_opcode() assumes we don't cross page boundary */ +	/* uprobe_write_opcode() assumes we don't cross page boundary */  	BUG_ON((uprobe->offset & ~PAGE_MASK) +  			UPROBE_SWBP_INSN_SIZE > PAGE_SIZE); @@ -816,7 +846,7 @@ static void __uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *u  {  	int err; -	if (!consumer_del(uprobe, uc))	/* WARN? */ +	if (WARN_ON(!consumer_del(uprobe, uc)))  		return;  	err = register_for_each_vma(uprobe, NULL); @@ -851,6 +881,9 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *  	if (!uc->handler && !uc->ret_handler)  		return -EINVAL; +	/* copy_insn() uses read_mapping_page() or shmem_read_mapping_page() */ +	if (!inode->i_mapping->a_ops->readpage && !shmem_mapping(inode->i_mapping)) +		return -EIO;  	/* Racy, just to catch the obvious mistakes */  	if (offset > i_size_read(inode))  		return -EINVAL; @@ -894,7 +927,7 @@ int uprobe_apply(struct inode *inode, loff_t offset,  	int ret = -ENOENT;  	uprobe = find_uprobe(inode, offset); -	if (!uprobe) +	if (WARN_ON(!uprobe))  		return ret;  	down_write(&uprobe->register_rwsem); @@ -919,7 +952,7 @@ void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consume  	struct uprobe *uprobe;  	uprobe = find_uprobe(inode, offset); -	if (!uprobe) +	if (WARN_ON(!uprobe))  		return;  	down_write(&uprobe->register_rwsem); @@ -1096,21 +1129,22 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon  }  /* Slot allocation for XOL */ -static int xol_add_vma(struct xol_area *area) +static int xol_add_vma(struct mm_struct *mm, struct xol_area *area)  { -	struct mm_struct *mm = current->mm;  	int ret = -EALREADY;  	down_write(&mm->mmap_sem);  	if (mm->uprobes_state.xol_area)  		goto fail; -	ret = -ENOMEM; -	/* Try to map as high as possible, this is only a hint. */ -	area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, PAGE_SIZE, 0, 0); -	if (area->vaddr & ~PAGE_MASK) { -		ret = area->vaddr; -		goto fail; +	if (!area->vaddr) { +		/* Try to map as high as possible, this is only a hint. */ +		area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, +						PAGE_SIZE, 0, 0); +		if (area->vaddr & ~PAGE_MASK) { +			ret = area->vaddr; +			goto fail; +		}  	}  	ret = install_special_mapping(mm, area->vaddr, PAGE_SIZE, @@ -1120,30 +1154,19 @@ static int xol_add_vma(struct xol_area *area)  	smp_wmb();	/* pairs with get_xol_area() */  	mm->uprobes_state.xol_area = area; -	ret = 0;   fail:  	up_write(&mm->mmap_sem);  	return ret;  } -/* - * get_xol_area - Allocate process's xol_area if necessary. - * This area will be used for storing instructions for execution out of line. - * - * Returns the allocated area or NULL. - */ -static struct xol_area *get_xol_area(void) +static struct xol_area *__create_xol_area(unsigned long vaddr)  {  	struct mm_struct *mm = current->mm; -	struct xol_area *area;  	uprobe_opcode_t insn = UPROBE_SWBP_INSN; +	struct xol_area *area; -	area = mm->uprobes_state.xol_area; -	if (area) -		goto ret; - -	area = kzalloc(sizeof(*area), GFP_KERNEL); +	area = kmalloc(sizeof(*area), GFP_KERNEL);  	if (unlikely(!area))  		goto out; @@ -1155,13 +1178,14 @@ static struct xol_area *get_xol_area(void)  	if (!area->page)  		goto free_bitmap; -	/* allocate first slot of task's xol_area for the return probes */ +	area->vaddr = vaddr; +	init_waitqueue_head(&area->wq); +	/* Reserve the 1st slot for get_trampoline_vaddr() */  	set_bit(0, area->bitmap); -	copy_to_page(area->page, 0, &insn, UPROBE_SWBP_INSN_SIZE);  	atomic_set(&area->slot_count, 1); -	init_waitqueue_head(&area->wq); +	copy_to_page(area->page, 0, &insn, UPROBE_SWBP_INSN_SIZE); -	if (!xol_add_vma(area)) +	if (!xol_add_vma(mm, area))  		return area;  	__free_page(area->page); @@ -1170,9 +1194,25 @@ static struct xol_area *get_xol_area(void)   free_area:  	kfree(area);   out: +	return NULL; +} + +/* + * get_xol_area - Allocate process's xol_area if necessary. + * This area will be used for storing instructions for execution out of line. + * + * Returns the allocated area or NULL. + */ +static struct xol_area *get_xol_area(void) +{ +	struct mm_struct *mm = current->mm; +	struct xol_area *area; + +	if (!mm->uprobes_state.xol_area) +		__create_xol_area(0); +  	area = mm->uprobes_state.xol_area; - ret: -	smp_read_barrier_depends();     /* pairs with wmb in xol_add_vma() */ +	smp_read_barrier_depends();	/* pairs with wmb in xol_add_vma() */  	return area;  } @@ -1255,13 +1295,8 @@ static unsigned long xol_get_insn_slot(struct uprobe *uprobe)  	if (unlikely(!xol_vaddr))  		return 0; -	/* Initialize the slot */ -	copy_to_page(area->page, xol_vaddr, uprobe->arch.insn, MAX_UINSN_BYTES); -	/* -	 * We probably need flush_icache_user_range() but it needs vma. -	 * This should work on supported architectures too. -	 */ -	flush_dcache_page(area->page); +	arch_uprobe_copy_ixol(area->page, xol_vaddr, +			      &uprobe->arch.ixol, sizeof(uprobe->arch.ixol));  	return xol_vaddr;  } @@ -1304,6 +1339,21 @@ static void xol_free_insn_slot(struct task_struct *tsk)  	}  } +void __weak arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, +				  void *src, unsigned long len) +{ +	/* Initialize the slot */ +	copy_to_page(page, vaddr, src, len); + +	/* +	 * We probably need flush_icache_user_range() but it needs vma. +	 * This should work on most of architectures by default. If +	 * architecture needs to do something different it can define +	 * its own version of the function. +	 */ +	flush_dcache_page(page); +} +  /**   * uprobe_get_swbp_addr - compute address of swbp given post-swbp regs   * @regs: Reflects the saved state of the task after it has hit a breakpoint @@ -1315,6 +1365,16 @@ unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs)  	return instruction_pointer(regs) - UPROBE_SWBP_INSN_SIZE;  } +unsigned long uprobe_get_trap_addr(struct pt_regs *regs) +{ +	struct uprobe_task *utask = current->utask; + +	if (unlikely(utask && utask->active_uprobe)) +		return utask->vaddr; + +	return instruction_pointer(regs); +} +  /*   * Called with no locks held.   * Called in context of a exiting or a exec-ing thread. @@ -1345,14 +1405,6 @@ void uprobe_free_utask(struct task_struct *t)  }  /* - * Called in context of a new clone/fork from copy_process. - */ -void uprobe_copy_process(struct task_struct *t) -{ -	t->utask = NULL; -} - -/*   * Allocate a uprobe_task object for the task if if necessary.   * Called when the thread hits a breakpoint.   * @@ -1367,6 +1419,82 @@ static struct uprobe_task *get_utask(void)  	return current->utask;  } +static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask) +{ +	struct uprobe_task *n_utask; +	struct return_instance **p, *o, *n; + +	n_utask = kzalloc(sizeof(struct uprobe_task), GFP_KERNEL); +	if (!n_utask) +		return -ENOMEM; +	t->utask = n_utask; + +	p = &n_utask->return_instances; +	for (o = o_utask->return_instances; o; o = o->next) { +		n = kmalloc(sizeof(struct return_instance), GFP_KERNEL); +		if (!n) +			return -ENOMEM; + +		*n = *o; +		atomic_inc(&n->uprobe->ref); +		n->next = NULL; + +		*p = n; +		p = &n->next; +		n_utask->depth++; +	} + +	return 0; +} + +static void uprobe_warn(struct task_struct *t, const char *msg) +{ +	pr_warn("uprobe: %s:%d failed to %s\n", +			current->comm, current->pid, msg); +} + +static void dup_xol_work(struct callback_head *work) +{ +	if (current->flags & PF_EXITING) +		return; + +	if (!__create_xol_area(current->utask->dup_xol_addr)) +		uprobe_warn(current, "dup xol area"); +} + +/* + * Called in context of a new clone/fork from copy_process. + */ +void uprobe_copy_process(struct task_struct *t, unsigned long flags) +{ +	struct uprobe_task *utask = current->utask; +	struct mm_struct *mm = current->mm; +	struct xol_area *area; + +	t->utask = NULL; + +	if (!utask || !utask->return_instances) +		return; + +	if (mm == t->mm && !(flags & CLONE_VFORK)) +		return; + +	if (dup_utask(t, utask)) +		return uprobe_warn(t, "dup ret instances"); + +	/* The task can fork() after dup_xol_work() fails */ +	area = mm->uprobes_state.xol_area; +	if (!area) +		return uprobe_warn(t, "dup xol area"); + +	if (mm == t->mm) +		return; + +	t->utask->dup_xol_addr = area->vaddr; +	init_task_work(&t->utask->dup_xol_work, dup_xol_work); +	task_work_add(t, &t->utask->dup_xol_work, true); +} +  /*   * Current area->vaddr notion assume the trampoline address is always   * equal area->vaddr. @@ -1518,20 +1646,6 @@ bool uprobe_deny_signal(void)  	return true;  } -/* - * Avoid singlestepping the original instruction if the original instruction - * is a NOP or can be emulated. - */ -static bool can_skip_sstep(struct uprobe *uprobe, struct pt_regs *regs) -{ -	if (test_bit(UPROBE_SKIP_SSTEP, &uprobe->flags)) { -		if (arch_uprobe_skip_sstep(&uprobe->arch, regs)) -			return true; -		clear_bit(UPROBE_SKIP_SSTEP, &uprobe->flags); -	} -	return false; -} -  static void mmf_recalc_uprobes(struct mm_struct *mm)  {  	struct vm_area_struct *vma; @@ -1694,6 +1808,11 @@ static bool handle_trampoline(struct pt_regs *regs)  	return true;  } +bool __weak arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs) +{ +	return false; +} +  /*   * Run handler and ask thread to singlestep.   * Ensure all non-fatal signals cannot interrupt thread while it singlesteps. @@ -1744,14 +1863,22 @@ static void handle_swbp(struct pt_regs *regs)  	if (unlikely(!test_bit(UPROBE_COPY_INSN, &uprobe->flags)))  		goto out; +	/* Tracing handlers use ->utask to communicate with fetch methods */ +	if (!get_utask()) +		goto out; + +	if (arch_uprobe_ignore(&uprobe->arch, regs)) +		goto out; +  	handler_chain(uprobe, regs); -	if (can_skip_sstep(uprobe, regs)) + +	if (arch_uprobe_skip_sstep(&uprobe->arch, regs))  		goto out;  	if (!pre_ssout(uprobe, regs, bp_vaddr))  		return; -	/* can_skip_sstep() succeeded, or restart if can't singlestep */ +	/* arch_uprobe_skip_sstep() succeeded, or restart if can't singlestep */  out:  	put_uprobe(uprobe);  } @@ -1763,10 +1890,11 @@ out:  static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)  {  	struct uprobe *uprobe; +	int err = 0;  	uprobe = utask->active_uprobe;  	if (utask->state == UTASK_SSTEP_ACK) -		arch_uprobe_post_xol(&uprobe->arch, regs); +		err = arch_uprobe_post_xol(&uprobe->arch, regs);  	else if (utask->state == UTASK_SSTEP_TRAPPED)  		arch_uprobe_abort_xol(&uprobe->arch, regs);  	else @@ -1780,6 +1908,11 @@ static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)  	spin_lock_irq(¤t->sighand->siglock);  	recalc_sigpending(); /* see uprobe_deny_signal() */  	spin_unlock_irq(¤t->sighand->siglock); + +	if (unlikely(err)) { +		uprobe_warn(current, "execute the probed insn, sending SIGILL."); +		force_sig_info(SIGILL, SEND_SIG_FORCED, current); +	}  }  /* @@ -1857,9 +1990,4 @@ static int __init init_uprobes(void)  	return register_die_notifier(&uprobe_exception_nb);  } -module_init(init_uprobes); - -static void __exit exit_uprobes(void) -{ -} -module_exit(exit_uprobes); +__initcall(init_uprobes);  | 
