From b68e0749100e1b901bf11330f149b321c082178e Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 13 Oct 2013 21:18:31 +0200 Subject: uprobes: Change the callsite of uprobe_copy_process() Preparation for the next patches. Move the callsite of uprobe_copy_process() in copy_process() down to the succesfull return. We do not care if copy_process() fails, uprobe_free_utask() won't be called in this case so the wrong ->utask != NULL doesn't matter. OTOH, with this change we know that copy_process() can't fail when uprobe_copy_process() is called, the new task should either return to user-mode or call do_exit(). This way uprobe_copy_process() can: 1. setup p->utask != NULL if necessary 2. setup uprobes_state.xol_area 3. use task_work_add(p) Also, move the definition of uprobe_copy_process() down so that it can see get_utask(). Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'kernel/events') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index ad8e1bdca70..db7a1dcb3dd 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1344,14 +1344,6 @@ void uprobe_free_utask(struct task_struct *t) t->utask = NULL; } -/* - * Called in context of a new clone/fork from copy_process. - */ -void uprobe_copy_process(struct task_struct *t) -{ - t->utask = NULL; -} - /* * Allocate a uprobe_task object for the task if if necessary. * Called when the thread hits a breakpoint. @@ -1367,6 +1359,14 @@ static struct uprobe_task *get_utask(void) return current->utask; } +/* + * Called in context of a new clone/fork from copy_process. + */ +void uprobe_copy_process(struct task_struct *t) +{ + t->utask = NULL; +} + /* * Current area->vaddr notion assume the trampoline address is always * equal area->vaddr. -- cgit v1.2.3-18-g5258 From 6441ec8b7c108b72789d120562b9f1d976e4aaaf Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 13 Oct 2013 21:18:35 +0200 Subject: uprobes: Introduce __create_xol_area() No functional changes, preparation. Extract the code which actually allocates/installs the new area into the new helper, __create_xol_area(). While at it remove the unnecessary "ret = ENOMEM" and "ret = 0" in xol_add_vma(), they both have no effect. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 47 +++++++++++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 22 deletions(-) (limited to 'kernel/events') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index db7a1dcb3dd..ad17d813e73 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1096,16 +1096,14 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon } /* Slot allocation for XOL */ -static int xol_add_vma(struct xol_area *area) +static int xol_add_vma(struct mm_struct *mm, struct xol_area *area) { - struct mm_struct *mm = current->mm; int ret = -EALREADY; down_write(&mm->mmap_sem); if (mm->uprobes_state.xol_area) goto fail; - ret = -ENOMEM; /* Try to map as high as possible, this is only a hint. */ area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, PAGE_SIZE, 0, 0); if (area->vaddr & ~PAGE_MASK) { @@ -1120,28 +1118,17 @@ static int xol_add_vma(struct xol_area *area) smp_wmb(); /* pairs with get_xol_area() */ mm->uprobes_state.xol_area = area; - ret = 0; fail: up_write(&mm->mmap_sem); return ret; } -/* - * get_xol_area - Allocate process's xol_area if necessary. - * This area will be used for storing instructions for execution out of line. - * - * Returns the allocated area or NULL. - */ -static struct xol_area *get_xol_area(void) +static struct xol_area *__create_xol_area(void) { struct mm_struct *mm = current->mm; - struct xol_area *area; uprobe_opcode_t insn = UPROBE_SWBP_INSN; - - area = mm->uprobes_state.xol_area; - if (area) - goto ret; + struct xol_area *area; area = kzalloc(sizeof(*area), GFP_KERNEL); if (unlikely(!area)) @@ -1155,13 +1142,13 @@ static struct xol_area *get_xol_area(void) if (!area->page) goto free_bitmap; - /* allocate first slot of task's xol_area for the return probes */ + init_waitqueue_head(&area->wq); + /* Reserve the 1st slot for get_trampoline_vaddr() */ set_bit(0, area->bitmap); - copy_to_page(area->page, 0, &insn, UPROBE_SWBP_INSN_SIZE); atomic_set(&area->slot_count, 1); - init_waitqueue_head(&area->wq); + copy_to_page(area->page, 0, &insn, UPROBE_SWBP_INSN_SIZE); - if (!xol_add_vma(area)) + if (!xol_add_vma(mm, area)) return area; __free_page(area->page); @@ -1170,9 +1157,25 @@ static struct xol_area *get_xol_area(void) free_area: kfree(area); out: + return NULL; +} + +/* + * get_xol_area - Allocate process's xol_area if necessary. + * This area will be used for storing instructions for execution out of line. + * + * Returns the allocated area or NULL. + */ +static struct xol_area *get_xol_area(void) +{ + struct mm_struct *mm = current->mm; + struct xol_area *area; + + if (!mm->uprobes_state.xol_area) + __create_xol_area(); + area = mm->uprobes_state.xol_area; - ret: - smp_read_barrier_depends(); /* pairs with wmb in xol_add_vma() */ + smp_read_barrier_depends(); /* pairs with wmb in xol_add_vma() */ return area; } -- cgit v1.2.3-18-g5258 From af0d95af79773f7637107cd3871aaabcb425f15a Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 13 Oct 2013 21:18:38 +0200 Subject: uprobes: Teach __create_xol_area() to accept the predefined vaddr Currently xol_add_vma() uses get_unmapped_area() for area->vaddr, but the next patches need to use the fixed address. So this patch adds the new "vaddr" argument to __create_xol_area() which should be used as area->vaddr if it is nonzero. xol_add_vma() doesn't bother to verify that the predefined addr is not used, insert_vm_struct() should fail if find_vma_links() detects the overlap with the existing vma. Also, __create_xol_area() doesn't need __GFP_ZERO to allocate area. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'kernel/events') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index ad17d813e73..7d12a45842a 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1104,11 +1104,14 @@ static int xol_add_vma(struct mm_struct *mm, struct xol_area *area) if (mm->uprobes_state.xol_area) goto fail; - /* Try to map as high as possible, this is only a hint. */ - area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, PAGE_SIZE, 0, 0); - if (area->vaddr & ~PAGE_MASK) { - ret = area->vaddr; - goto fail; + if (!area->vaddr) { + /* Try to map as high as possible, this is only a hint. */ + area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, + PAGE_SIZE, 0, 0); + if (area->vaddr & ~PAGE_MASK) { + ret = area->vaddr; + goto fail; + } } ret = install_special_mapping(mm, area->vaddr, PAGE_SIZE, @@ -1124,13 +1127,13 @@ static int xol_add_vma(struct mm_struct *mm, struct xol_area *area) return ret; } -static struct xol_area *__create_xol_area(void) +static struct xol_area *__create_xol_area(unsigned long vaddr) { struct mm_struct *mm = current->mm; uprobe_opcode_t insn = UPROBE_SWBP_INSN; struct xol_area *area; - area = kzalloc(sizeof(*area), GFP_KERNEL); + area = kmalloc(sizeof(*area), GFP_KERNEL); if (unlikely(!area)) goto out; @@ -1142,6 +1145,7 @@ static struct xol_area *__create_xol_area(void) if (!area->page) goto free_bitmap; + area->vaddr = vaddr; init_waitqueue_head(&area->wq); /* Reserve the 1st slot for get_trampoline_vaddr() */ set_bit(0, area->bitmap); @@ -1172,7 +1176,7 @@ static struct xol_area *get_xol_area(void) struct xol_area *area; if (!mm->uprobes_state.xol_area) - __create_xol_area(); + __create_xol_area(0); area = mm->uprobes_state.xol_area; smp_read_barrier_depends(); /* pairs with wmb in xol_add_vma() */ -- cgit v1.2.3-18-g5258 From 248d3a7b2f100078c5f6878351177859380582e9 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 13 Oct 2013 21:18:41 +0200 Subject: uprobes: Change uprobe_copy_process() to dup return_instances uprobe_copy_process() assumes that the new child doesn't need ->utask, it should be allocated by demand. But this is not true if the forking task has the pending ret- probes, the child should report them as well and thus it needs the copy of parent's ->return_instances chain. Otherwise the child crashes when it returns from the probed function. Alternatively we could cleanup the child's stack, but this needs per-arch changes and this is not what we want. At least systemtap expects a .return in the child too. Note: this change alone doesn't fix the problem, see the next change. Reported-by: Martin Cermak Reported-by: David Smith Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'kernel/events') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 7d12a45842a..1c6cda68a55 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1366,12 +1366,55 @@ static struct uprobe_task *get_utask(void) return current->utask; } +static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask) +{ + struct uprobe_task *n_utask; + struct return_instance **p, *o, *n; + + n_utask = kzalloc(sizeof(struct uprobe_task), GFP_KERNEL); + if (!n_utask) + return -ENOMEM; + t->utask = n_utask; + + p = &n_utask->return_instances; + for (o = o_utask->return_instances; o; o = o->next) { + n = kmalloc(sizeof(struct return_instance), GFP_KERNEL); + if (!n) + return -ENOMEM; + + *n = *o; + atomic_inc(&n->uprobe->ref); + n->next = NULL; + + *p = n; + p = &n->next; + n_utask->depth++; + } + + return 0; +} + +static void uprobe_warn(struct task_struct *t, const char *msg) +{ + pr_warn("uprobe: %s:%d failed to %s\n", + current->comm, current->pid, msg); +} + /* * Called in context of a new clone/fork from copy_process. */ void uprobe_copy_process(struct task_struct *t) { + struct uprobe_task *utask = current->utask; + struct mm_struct *mm = current->mm; + t->utask = NULL; + + if (mm == t->mm || !utask || !utask->return_instances) + return; + + if (dup_utask(t, utask)) + return uprobe_warn(t, "dup ret instances"); } /* -- cgit v1.2.3-18-g5258 From aa59c53fd4599c91ccf9629af0c2777b89929076 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 13 Oct 2013 21:18:44 +0200 Subject: uprobes: Change uprobe_copy_process() to dup xol_area This finally fixes the serious bug in uretprobes: a forked child crashes if the parent called fork() with the pending ret probe. Trivial test-case: # perf probe -x /lib/libc.so.6 __fork%return # perf record -e probe_libc:__fork perl -le 'fork || print "OK"' (the child doesn't print "OK", it is killed by SIGSEGV) If the child returns from the probed function it actually returns to trampoline_vaddr, because it got the copy of parent's stack mangled by prepare_uretprobe() when the parent entered this func. It crashes because a) this address is not mapped and b) until the previous change it doesn't have the proper->return_instances info. This means that uprobe_copy_process() has to create xol_area which has the trampoline slot, and its vaddr should be equal to parent's xol_area->vaddr. Unfortunately, uprobe_copy_process() can not simply do __create_xol_area(child, xol_area->vaddr). This could actually work but perf_event_mmap() doesn't expect the usage of foreign ->mm. So we offload this to task_work_run(), and pass the argument via not yet used utask->vaddr. We know that this vaddr is fine for install_special_mapping(), the necessary hole was recently "created" by dup_mmap() which skips the parent's VM_DONTCOPY area, and nobody else could use the new mm. Unfortunately, this also means that we can not handle the errors properly, we obviously can not abort the already completed fork(). So we simply print the warning if GFP_KERNEL allocation (the only possible reason) fails. Reported-by: Martin Cermak Reported-by: David Smith Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'kernel/events') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 1c6cda68a55..9f282e14925 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -35,6 +35,7 @@ #include /* notifier mechanism */ #include "../../mm/internal.h" /* munlock_vma_page */ #include +#include #include @@ -1400,6 +1401,17 @@ static void uprobe_warn(struct task_struct *t, const char *msg) current->comm, current->pid, msg); } +static void dup_xol_work(struct callback_head *work) +{ + kfree(work); + + if (current->flags & PF_EXITING) + return; + + if (!__create_xol_area(current->utask->vaddr)) + uprobe_warn(current, "dup xol area"); +} + /* * Called in context of a new clone/fork from copy_process. */ @@ -1407,6 +1419,8 @@ void uprobe_copy_process(struct task_struct *t) { struct uprobe_task *utask = current->utask; struct mm_struct *mm = current->mm; + struct callback_head *work; + struct xol_area *area; t->utask = NULL; @@ -1415,6 +1429,20 @@ void uprobe_copy_process(struct task_struct *t) if (dup_utask(t, utask)) return uprobe_warn(t, "dup ret instances"); + + /* The task can fork() after dup_xol_work() fails */ + area = mm->uprobes_state.xol_area; + if (!area) + return uprobe_warn(t, "dup xol area"); + + /* TODO: move it into the union in uprobe_task */ + work = kmalloc(sizeof(*work), GFP_KERNEL); + if (!work) + return uprobe_warn(t, "dup xol area"); + + utask->vaddr = area->vaddr; + init_task_work(work, dup_xol_work); + task_work_add(t, work, true); } /* -- cgit v1.2.3-18-g5258 From 3ab679661721b1ec2aaad99a801870ed59ab1110 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 16 Oct 2013 19:39:37 +0200 Subject: uprobes: Teach uprobe_copy_process() to handle CLONE_VFORK uprobe_copy_process() does nothing if the child shares ->mm with the forking process, but there is a special case: CLONE_VFORK. In this case it would be more correct to do dup_utask() but avoid dup_xol(). This is not that important, the child should not unwind its stack too much, this can corrupt the parent's stack, but at least we need this to allow to ret-probe __vfork() itself. Note: in theory, it would be better to check task_pt_regs(p)->sp instead of CLONE_VFORK, we need to dup_utask() if and only if the child can return from the function called by the parent. But this needs the arch-dependant helper, and I think that nobody actually does clone(same_stack, CLONE_VM). Reported-by: Martin Cermak Reported-by: David Smith Signed-off-by: Oleg Nesterov --- kernel/events/uprobes.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'kernel/events') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 9f282e14925..ae9e1d2ef25 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1415,7 +1415,7 @@ static void dup_xol_work(struct callback_head *work) /* * Called in context of a new clone/fork from copy_process. */ -void uprobe_copy_process(struct task_struct *t) +void uprobe_copy_process(struct task_struct *t, unsigned long flags) { struct uprobe_task *utask = current->utask; struct mm_struct *mm = current->mm; @@ -1424,7 +1424,10 @@ void uprobe_copy_process(struct task_struct *t) t->utask = NULL; - if (mm == t->mm || !utask || !utask->return_instances) + if (!utask || !utask->return_instances) + return; + + if (mm == t->mm && !(flags & CLONE_VFORK)) return; if (dup_utask(t, utask)) @@ -1435,6 +1438,9 @@ void uprobe_copy_process(struct task_struct *t) if (!area) return uprobe_warn(t, "dup xol area"); + if (mm == t->mm) + return; + /* TODO: move it into the union in uprobe_task */ work = kmalloc(sizeof(*work), GFP_KERNEL); if (!work) -- cgit v1.2.3-18-g5258