diff options
Diffstat (limited to 'arch/x86/kernel/ptrace.c')
| -rw-r--r-- | arch/x86/kernel/ptrace.c | 1219 |
1 files changed, 579 insertions, 640 deletions
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index eb92ccbb350..678c0ada3b3 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -2,9 +2,6 @@ /* * Pentium III FXSR, SSE support * Gareth Hughes <gareth@valinux.com>, May 2000 - * - * BTS tracing - * Markus Metzger <markus.t.metzger@intel.com>, Dec 2007 */ #include <linux/kernel.h> @@ -12,34 +9,136 @@ #include <linux/mm.h> #include <linux/smp.h> #include <linux/errno.h> +#include <linux/slab.h> #include <linux/ptrace.h> #include <linux/regset.h> +#include <linux/tracehook.h> #include <linux/user.h> #include <linux/elf.h> #include <linux/security.h> #include <linux/audit.h> #include <linux/seccomp.h> #include <linux/signal.h> +#include <linux/perf_event.h> +#include <linux/hw_breakpoint.h> +#include <linux/rcupdate.h> +#include <linux/export.h> +#include <linux/context_tracking.h> #include <asm/uaccess.h> #include <asm/pgtable.h> -#include <asm/system.h> #include <asm/processor.h> #include <asm/i387.h> +#include <asm/fpu-internal.h> #include <asm/debugreg.h> #include <asm/ldt.h> #include <asm/desc.h> #include <asm/prctl.h> #include <asm/proto.h> -#include <asm/ds.h> +#include <asm/hw_breakpoint.h> +#include <asm/traps.h> #include "tls.h" +#define CREATE_TRACE_POINTS +#include <trace/events/syscalls.h> + enum x86_regset { REGSET_GENERAL, REGSET_FP, REGSET_XFP, + REGSET_IOPERM64 = REGSET_XFP, + REGSET_XSTATE, REGSET_TLS, + REGSET_IOPERM32, +}; + +struct pt_regs_offset { + const char *name; + int offset; +}; + +#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)} +#define REG_OFFSET_END {.name = NULL, .offset = 0} + +static const struct pt_regs_offset regoffset_table[] = { +#ifdef CONFIG_X86_64 + REG_OFFSET_NAME(r15), + REG_OFFSET_NAME(r14), + REG_OFFSET_NAME(r13), + REG_OFFSET_NAME(r12), + REG_OFFSET_NAME(r11), + REG_OFFSET_NAME(r10), + REG_OFFSET_NAME(r9), + REG_OFFSET_NAME(r8), +#endif + REG_OFFSET_NAME(bx), + REG_OFFSET_NAME(cx), + REG_OFFSET_NAME(dx), + REG_OFFSET_NAME(si), + REG_OFFSET_NAME(di), + REG_OFFSET_NAME(bp), + REG_OFFSET_NAME(ax), +#ifdef CONFIG_X86_32 + REG_OFFSET_NAME(ds), + REG_OFFSET_NAME(es), + REG_OFFSET_NAME(fs), + REG_OFFSET_NAME(gs), +#endif + REG_OFFSET_NAME(orig_ax), + REG_OFFSET_NAME(ip), + REG_OFFSET_NAME(cs), + REG_OFFSET_NAME(flags), + REG_OFFSET_NAME(sp), + REG_OFFSET_NAME(ss), + REG_OFFSET_END, +}; + +/** + * regs_query_register_offset() - query register offset from its name + * @name: the name of a register + * + * regs_query_register_offset() returns the offset of a register in struct + * pt_regs from its name. If the name is invalid, this returns -EINVAL; + */ +int regs_query_register_offset(const char *name) +{ + const struct pt_regs_offset *roff; + for (roff = regoffset_table; roff->name != NULL; roff++) + if (!strcmp(roff->name, name)) + return roff->offset; + return -EINVAL; +} + +/** + * regs_query_register_name() - query register name from its offset + * @offset: the offset of a register in struct pt_regs. + * + * regs_query_register_name() returns the name of a register from its + * offset in struct pt_regs. If the @offset is invalid, this returns NULL; + */ +const char *regs_query_register_name(unsigned int offset) +{ + const struct pt_regs_offset *roff; + for (roff = regoffset_table; roff->name != NULL; roff++) + if (roff->offset == offset) + return roff->name; + return NULL; +} + +static const int arg_offs_table[] = { +#ifdef CONFIG_X86_32 + [0] = offsetof(struct pt_regs, ax), + [1] = offsetof(struct pt_regs, dx), + [2] = offsetof(struct pt_regs, cx) +#else /* CONFIG_X86_64 */ + [0] = offsetof(struct pt_regs, di), + [1] = offsetof(struct pt_regs, si), + [2] = offsetof(struct pt_regs, dx), + [3] = offsetof(struct pt_regs, cx), + [4] = offsetof(struct pt_regs, r8), + [5] = offsetof(struct pt_regs, r9) +#endif }; /* @@ -69,13 +168,39 @@ static inline bool invalid_selector(u16 value) #define FLAG_MASK FLAG_MASK_32 -static long *pt_regs_access(struct pt_regs *regs, unsigned long regno) +/* + * X86_32 CPUs don't save ss and esp if the CPU is already in kernel mode + * when it traps. The previous stack will be directly underneath the saved + * registers, and 'sp/ss' won't even have been saved. Thus the '®s->sp'. + * + * Now, if the stack is empty, '®s->sp' is out of range. In this + * case we try to take the previous stack. To always return a non-null + * stack pointer we fall back to regs as stack if no previous stack + * exists. + * + * This is valid only for kernel mode traps. + */ +unsigned long kernel_stack_pointer(struct pt_regs *regs) +{ + unsigned long context = (unsigned long)regs & ~(THREAD_SIZE - 1); + unsigned long sp = (unsigned long)®s->sp; + u32 *prev_esp; + + if (context == (sp & ~(THREAD_SIZE - 1))) + return sp; + + prev_esp = (u32 *)(context); + if (prev_esp) + return (unsigned long)prev_esp; + + return (unsigned long)regs; +} +EXPORT_SYMBOL_GPL(kernel_stack_pointer); + +static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno) { BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0); - regno >>= 2; - if (regno > FS) - --regno; - return ®s->bx + regno; + return ®s->bx + (regno >> 2); } static u16 get_segment_reg(struct task_struct *task, unsigned long offset) @@ -87,9 +212,10 @@ static u16 get_segment_reg(struct task_struct *task, unsigned long offset) if (offset != offsetof(struct user_regs_struct, gs)) retval = *pt_regs_access(task_pt_regs(task), offset); else { - retval = task->thread.gs; if (task == current) - savesegment(gs, retval); + retval = get_user_gs(task_pt_regs(task)); + else + retval = task_user_gs(task); } return retval; } @@ -123,23 +249,15 @@ static int set_segment_reg(struct task_struct *task, break; case offsetof(struct user_regs_struct, gs): - task->thread.gs = value; if (task == current) - /* - * The user-mode %gs is not affected by - * kernel entry, so we must update the CPU. - */ - loadsegment(gs, value); + set_user_gs(task_pt_regs(task), value); + else + task_user_gs(task) = value; } return 0; } -static unsigned long debugreg_addr_limit(struct task_struct *task) -{ - return TASK_SIZE - 3; -} - #else /* CONFIG_X86_64 */ #define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT) @@ -264,15 +382,6 @@ static int set_segment_reg(struct task_struct *task, return 0; } -static unsigned long debugreg_addr_limit(struct task_struct *task) -{ -#ifdef CONFIG_IA32_EMULATION - if (test_tsk_thread_flag(task, TIF_IA32)) - return IA32_PAGE_OFFSET - 3; -#endif - return TASK_SIZE64 - 7; -} - #endif /* CONFIG_X86_32 */ static unsigned long get_flags(struct task_struct *task) @@ -323,16 +432,6 @@ static int putreg(struct task_struct *child, return set_flags(child, value); #ifdef CONFIG_X86_64 - /* - * Orig_ax is really just a flag with small positive and - * negative values, so make sure to always sign-extend it - * from 32 bits so that it works correctly regardless of - * whether we come from a 32-bit environment or not. - */ - case offsetof(struct user_regs_struct, orig_ax): - value = (long) (s32) value; - break; - case offsetof(struct user_regs_struct,fs_base): if (value >= TASK_SIZE_OF(child)) return -EIO; @@ -416,14 +515,14 @@ static int genregs_get(struct task_struct *target, { if (kbuf) { unsigned long *k = kbuf; - while (count > 0) { + while (count >= sizeof(*k)) { *k++ = getreg(target, pos); count -= sizeof(*k); pos += sizeof(*k); } } else { unsigned long __user *u = ubuf; - while (count > 0) { + while (count >= sizeof(*u)) { if (__put_user(getreg(target, pos), u++)) return -EFAULT; count -= sizeof(*u); @@ -442,14 +541,14 @@ static int genregs_set(struct task_struct *target, int ret = 0; if (kbuf) { const unsigned long *k = kbuf; - while (count > 0 && !ret) { + while (count >= sizeof(*k) && !ret) { ret = putreg(target, pos, *k++); count -= sizeof(*k); pos += sizeof(*k); } } else { const unsigned long __user *u = ubuf; - while (count > 0 && !ret) { + while (count >= sizeof(*u) && !ret) { unsigned long word; ret = __get_user(word, u++); if (ret) @@ -462,384 +561,245 @@ static int genregs_set(struct task_struct *target, return ret; } -/* - * This function is trivial and will be inlined by the compiler. - * Having it separates the implementation details of debug - * registers from the interface details of ptrace. - */ -static unsigned long ptrace_get_debugreg(struct task_struct *child, int n) +static void ptrace_triggered(struct perf_event *bp, + struct perf_sample_data *data, + struct pt_regs *regs) { - switch (n) { - case 0: return child->thread.debugreg0; - case 1: return child->thread.debugreg1; - case 2: return child->thread.debugreg2; - case 3: return child->thread.debugreg3; - case 6: return child->thread.debugreg6; - case 7: return child->thread.debugreg7; + int i; + struct thread_struct *thread = &(current->thread); + + /* + * Store in the virtual DR6 register the fact that the breakpoint + * was hit so the thread's debugger will see it. + */ + for (i = 0; i < HBP_NUM; i++) { + if (thread->ptrace_bps[i] == bp) + break; } - return 0; + + thread->debugreg6 |= (DR_TRAP0 << i); } -static int ptrace_set_debugreg(struct task_struct *child, - int n, unsigned long data) +/* + * Walk through every ptrace breakpoints for this thread and + * build the dr7 value on top of their attributes. + * + */ +static unsigned long ptrace_get_dr7(struct perf_event *bp[]) { int i; + int dr7 = 0; + struct arch_hw_breakpoint *info; - if (unlikely(n == 4 || n == 5)) - return -EIO; - - if (n < 4 && unlikely(data >= debugreg_addr_limit(child))) - return -EIO; - - switch (n) { - case 0: child->thread.debugreg0 = data; break; - case 1: child->thread.debugreg1 = data; break; - case 2: child->thread.debugreg2 = data; break; - case 3: child->thread.debugreg3 = data; break; - - case 6: - if ((data & ~0xffffffffUL) != 0) - return -EIO; - child->thread.debugreg6 = data; - break; - - case 7: - /* - * Sanity-check data. Take one half-byte at once with - * check = (val >> (16 + 4*i)) & 0xf. It contains the - * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits - * 2 and 3 are LENi. Given a list of invalid values, - * we do mask |= 1 << invalid_value, so that - * (mask >> check) & 1 is a correct test for invalid - * values. - * - * R/Wi contains the type of the breakpoint / - * watchpoint, LENi contains the length of the watched - * data in the watchpoint case. - * - * The invalid values are: - * - LENi == 0x10 (undefined), so mask |= 0x0f00. [32-bit] - * - R/Wi == 0x10 (break on I/O reads or writes), so - * mask |= 0x4444. - * - R/Wi == 0x00 && LENi != 0x00, so we have mask |= - * 0x1110. - * - * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54. - * - * See the Intel Manual "System Programming Guide", - * 15.2.4 - * - * Note that LENi == 0x10 is defined on x86_64 in long - * mode (i.e. even for 32-bit userspace software, but - * 64-bit kernel), so the x86_64 mask value is 0x5454. - * See the AMD manual no. 24593 (AMD64 System Programming) - */ -#ifdef CONFIG_X86_32 -#define DR7_MASK 0x5f54 -#else -#define DR7_MASK 0x5554 -#endif - data &= ~DR_CONTROL_RESERVED; - for (i = 0; i < 4; i++) - if ((DR7_MASK >> ((data >> (16 + 4*i)) & 0xf)) & 1) - return -EIO; - child->thread.debugreg7 = data; - if (data) - set_tsk_thread_flag(child, TIF_DEBUG); - else - clear_tsk_thread_flag(child, TIF_DEBUG); - break; + for (i = 0; i < HBP_NUM; i++) { + if (bp[i] && !bp[i]->attr.disabled) { + info = counter_arch_bp(bp[i]); + dr7 |= encode_dr7(i, info->len, info->type); + } } - return 0; + return dr7; } -#ifdef X86_BTS - -static int ptrace_bts_get_size(struct task_struct *child) +static int ptrace_fill_bp_fields(struct perf_event_attr *attr, + int len, int type, bool disabled) { - if (!child->thread.ds_area_msr) - return -ENXIO; + int err, bp_len, bp_type; - return ds_get_bts_index((void *)child->thread.ds_area_msr); + err = arch_bp_generic_fields(len, type, &bp_len, &bp_type); + if (!err) { + attr->bp_len = bp_len; + attr->bp_type = bp_type; + attr->disabled = disabled; + } + + return err; } -static int ptrace_bts_read_record(struct task_struct *child, - long index, - struct bts_struct __user *out) +static struct perf_event * +ptrace_register_breakpoint(struct task_struct *tsk, int len, int type, + unsigned long addr, bool disabled) { - struct bts_struct ret; - int retval; - int bts_end; - int bts_index; - - if (!child->thread.ds_area_msr) - return -ENXIO; - - if (index < 0) - return -EINVAL; - - bts_end = ds_get_bts_end((void *)child->thread.ds_area_msr); - if (bts_end <= index) - return -EINVAL; + struct perf_event_attr attr; + int err; - /* translate the ptrace bts index into the ds bts index */ - bts_index = ds_get_bts_index((void *)child->thread.ds_area_msr); - bts_index -= (index + 1); - if (bts_index < 0) - bts_index += bts_end; + ptrace_breakpoint_init(&attr); + attr.bp_addr = addr; - retval = ds_read_bts((void *)child->thread.ds_area_msr, - bts_index, &ret); - if (retval < 0) - return retval; - - if (copy_to_user(out, &ret, sizeof(ret))) - return -EFAULT; - - return sizeof(ret); -} - -static int ptrace_bts_clear(struct task_struct *child) -{ - if (!child->thread.ds_area_msr) - return -ENXIO; + err = ptrace_fill_bp_fields(&attr, len, type, disabled); + if (err) + return ERR_PTR(err); - return ds_clear((void *)child->thread.ds_area_msr); + return register_user_hw_breakpoint(&attr, ptrace_triggered, + NULL, tsk); } -static int ptrace_bts_drain(struct task_struct *child, - long size, - struct bts_struct __user *out) +static int ptrace_modify_breakpoint(struct perf_event *bp, int len, int type, + int disabled) { - int end, i; - void *ds = (void *)child->thread.ds_area_msr; - - if (!ds) - return -ENXIO; - - end = ds_get_bts_index(ds); - if (end <= 0) - return end; - - if (size < (end * sizeof(struct bts_struct))) - return -EIO; + struct perf_event_attr attr = bp->attr; + int err; - for (i = 0; i < end; i++, out++) { - struct bts_struct ret; - int retval; + err = ptrace_fill_bp_fields(&attr, len, type, disabled); + if (err) + return err; - retval = ds_read_bts(ds, i, &ret); - if (retval < 0) - return retval; - - if (copy_to_user(out, &ret, sizeof(ret))) - return -EFAULT; - } - - ds_clear(ds); - - return end; + return modify_user_hw_breakpoint(bp, &attr); } -static int ptrace_bts_config(struct task_struct *child, - long cfg_size, - const struct ptrace_bts_config __user *ucfg) +/* + * Handle ptrace writes to debug register 7. + */ +static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data) { - struct ptrace_bts_config cfg; - int bts_size, ret = 0; - void *ds; - - if (cfg_size < sizeof(cfg)) - return -EIO; - - if (copy_from_user(&cfg, ucfg, sizeof(cfg))) - return -EFAULT; + struct thread_struct *thread = &tsk->thread; + unsigned long old_dr7; + bool second_pass = false; + int i, rc, ret = 0; + + data &= ~DR_CONTROL_RESERVED; + old_dr7 = ptrace_get_dr7(thread->ptrace_bps); + +restore: + rc = 0; + for (i = 0; i < HBP_NUM; i++) { + unsigned len, type; + bool disabled = !decode_dr7(data, i, &len, &type); + struct perf_event *bp = thread->ptrace_bps[i]; + + if (!bp) { + if (disabled) + continue; + + bp = ptrace_register_breakpoint(tsk, + len, type, 0, disabled); + if (IS_ERR(bp)) { + rc = PTR_ERR(bp); + break; + } - if ((int)cfg.size < 0) - return -EINVAL; + thread->ptrace_bps[i] = bp; + continue; + } - bts_size = 0; - ds = (void *)child->thread.ds_area_msr; - if (ds) { - bts_size = ds_get_bts_size(ds); - if (bts_size < 0) - return bts_size; + rc = ptrace_modify_breakpoint(bp, len, type, disabled); + if (rc) + break; } - cfg.size = PAGE_ALIGN(cfg.size); - - if (bts_size != cfg.size) { - ret = ptrace_bts_realloc(child, cfg.size, - cfg.flags & PTRACE_BTS_O_CUT_SIZE); - if (ret < 0) - goto errout; - ds = (void *)child->thread.ds_area_msr; + /* Restore if the first pass failed, second_pass shouldn't fail. */ + if (rc && !WARN_ON(second_pass)) { + ret = rc; + data = old_dr7; + second_pass = true; + goto restore; } - if (cfg.flags & PTRACE_BTS_O_SIGNAL) - ret = ds_set_overflow(ds, DS_O_SIGNAL); - else - ret = ds_set_overflow(ds, DS_O_WRAP); - if (ret < 0) - goto errout; - - if (cfg.flags & PTRACE_BTS_O_TRACE) - child->thread.debugctlmsr |= ds_debugctl_mask(); - else - child->thread.debugctlmsr &= ~ds_debugctl_mask(); - - if (cfg.flags & PTRACE_BTS_O_SCHED) - set_tsk_thread_flag(child, TIF_BTS_TRACE_TS); - else - clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); - - ret = sizeof(cfg); - -out: - if (child->thread.debugctlmsr) - set_tsk_thread_flag(child, TIF_DEBUGCTLMSR); - else - clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); - return ret; - -errout: - child->thread.debugctlmsr &= ~ds_debugctl_mask(); - clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); - goto out; } -static int ptrace_bts_status(struct task_struct *child, - long cfg_size, - struct ptrace_bts_config __user *ucfg) +/* + * Handle PTRACE_PEEKUSR calls for the debug register area. + */ +static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) { - void *ds = (void *)child->thread.ds_area_msr; - struct ptrace_bts_config cfg; - - if (cfg_size < sizeof(cfg)) - return -EIO; - - memset(&cfg, 0, sizeof(cfg)); - - if (ds) { - cfg.size = ds_get_bts_size(ds); - - if (ds_get_overflow(ds) == DS_O_SIGNAL) - cfg.flags |= PTRACE_BTS_O_SIGNAL; - - if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) && - child->thread.debugctlmsr & ds_debugctl_mask()) - cfg.flags |= PTRACE_BTS_O_TRACE; - - if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS)) - cfg.flags |= PTRACE_BTS_O_SCHED; + struct thread_struct *thread = &tsk->thread; + unsigned long val = 0; + + if (n < HBP_NUM) { + struct perf_event *bp = thread->ptrace_bps[n]; + + if (bp) + val = bp->hw.info.address; + } else if (n == 6) { + val = thread->debugreg6; + } else if (n == 7) { + val = thread->ptrace_dr7; } - - cfg.bts_size = sizeof(struct bts_struct); - - if (copy_to_user(ucfg, &cfg, sizeof(cfg))) - return -EFAULT; - - return sizeof(cfg); + return val; } - -static int ptrace_bts_write_record(struct task_struct *child, - const struct bts_struct *in) +static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, + unsigned long addr) { - int retval; + struct thread_struct *t = &tsk->thread; + struct perf_event *bp = t->ptrace_bps[nr]; + int err = 0; - if (!child->thread.ds_area_msr) - return -ENXIO; + if (!bp) { + /* + * Put stub len and type to create an inactive but correct bp. + * + * CHECKME: the previous code returned -EIO if the addr wasn't + * a valid task virtual addr. The new one will return -EINVAL in + * this case. + * -EINVAL may be what we want for in-kernel breakpoints users, + * but -EIO looks better for ptrace, since we refuse a register + * writing for the user. And anyway this is the previous + * behaviour. + */ + bp = ptrace_register_breakpoint(tsk, + X86_BREAKPOINT_LEN_1, X86_BREAKPOINT_WRITE, + addr, true); + if (IS_ERR(bp)) + err = PTR_ERR(bp); + else + t->ptrace_bps[nr] = bp; + } else { + struct perf_event_attr attr = bp->attr; - retval = ds_write_bts((void *)child->thread.ds_area_msr, in); - if (retval) - return retval; + attr.bp_addr = addr; + err = modify_user_hw_breakpoint(bp, &attr); + } - return sizeof(*in); + return err; } -static int ptrace_bts_realloc(struct task_struct *child, - int size, int reduce_size) +/* + * Handle PTRACE_POKEUSR calls for the debug register area. + */ +static int ptrace_set_debugreg(struct task_struct *tsk, int n, + unsigned long val) { - unsigned long rlim, vm; - int ret, old_size; - - if (size < 0) - return -EINVAL; - - old_size = ds_get_bts_size((void *)child->thread.ds_area_msr); - if (old_size < 0) - return old_size; - - ret = ds_free((void **)&child->thread.ds_area_msr); - if (ret < 0) - goto out; - - size >>= PAGE_SHIFT; - old_size >>= PAGE_SHIFT; - - current->mm->total_vm -= old_size; - current->mm->locked_vm -= old_size; - - if (size == 0) - goto out; - - rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; - vm = current->mm->total_vm + size; - if (rlim < vm) { - ret = -ENOMEM; - - if (!reduce_size) - goto out; - - size = rlim - current->mm->total_vm; - if (size <= 0) - goto out; + struct thread_struct *thread = &tsk->thread; + /* There are no DR4 or DR5 registers */ + int rc = -EIO; + + if (n < HBP_NUM) { + rc = ptrace_set_breakpoint_addr(tsk, n, val); + } else if (n == 6) { + thread->debugreg6 = val; + rc = 0; + } else if (n == 7) { + rc = ptrace_write_dr7(tsk, val); + if (!rc) + thread->ptrace_dr7 = val; } + return rc; +} - rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; - vm = current->mm->locked_vm + size; - if (rlim < vm) { - ret = -ENOMEM; - - if (!reduce_size) - goto out; - - size = rlim - current->mm->locked_vm; - if (size <= 0) - goto out; - } - - ret = ds_allocate((void **)&child->thread.ds_area_msr, - size << PAGE_SHIFT); - if (ret < 0) - goto out; - - current->mm->total_vm += size; - current->mm->locked_vm += size; - -out: - if (child->thread.ds_area_msr) - set_tsk_thread_flag(child, TIF_DS_AREA_MSR); - else - clear_tsk_thread_flag(child, TIF_DS_AREA_MSR); - - return ret; +/* + * These access the current or another (stopped) task's io permission + * bitmap for debugging or core dump. + */ +static int ioperm_active(struct task_struct *target, + const struct user_regset *regset) +{ + return target->thread.io_bitmap_max / regset->size; } -void ptrace_bts_take_timestamp(struct task_struct *tsk, - enum bts_qualifier qualifier) +static int ioperm_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) { - struct bts_struct rec = { - .qualifier = qualifier, - .variant.jiffies = jiffies_64 - }; + if (!target->thread.io_bitmap_ptr) + return -ENXIO; - ptrace_bts_write_record(tsk, &rec); + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + target->thread.io_bitmap_ptr, + 0, IO_BITMAP_BYTES); } -#endif /* X86_BTS */ /* * Called by kernel/ptrace.c when detaching.. @@ -852,22 +812,14 @@ void ptrace_disable(struct task_struct *child) #ifdef TIF_SYSCALL_EMU clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); #endif - if (child->thread.ds_area_msr) { -#ifdef X86_BTS - ptrace_bts_realloc(child, 0, 0); -#endif - child->thread.debugctlmsr &= ~ds_debugctl_mask(); - if (!child->thread.debugctlmsr) - clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); - clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); - } } #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION static const struct user_regset_view user_x86_32_view; /* Initialized below. */ #endif -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { int ret; unsigned long __user *datap = (unsigned long __user *)data; @@ -878,8 +830,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) unsigned long tmp; ret = -EIO; - if ((addr & (sizeof(data) - 1)) || addr < 0 || - addr >= sizeof(struct user)) + if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user)) break; tmp = 0; /* Default return condition */ @@ -896,8 +847,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ ret = -EIO; - if ((addr & (sizeof(data) - 1)) || addr < 0 || - addr >= sizeof(struct user)) + if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user)) break; if (addr < sizeof(struct user_regs_struct)) @@ -943,28 +893,28 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) return copy_regset_to_user(child, &user_x86_32_view, REGSET_XFP, 0, sizeof(struct user_fxsr_struct), - datap); + datap) ? -EIO : 0; case PTRACE_SETFPXREGS: /* Set the child extended FPU state. */ return copy_regset_from_user(child, &user_x86_32_view, REGSET_XFP, 0, sizeof(struct user_fxsr_struct), - datap); + datap) ? -EIO : 0; #endif #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION case PTRACE_GET_THREAD_AREA: - if (addr < 0) + if ((int) addr < 0) return -EIO; ret = do_get_thread_area(child, addr, - (struct user_desc __user *) data); + (struct user_desc __user *)data); break; case PTRACE_SET_THREAD_AREA: - if (addr < 0) + if ((int) addr < 0) return -EIO; ret = do_set_thread_area(child, addr, - (struct user_desc __user *) data, 0); + (struct user_desc __user *)data, 0); break; #endif @@ -977,39 +927,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) break; #endif - /* - * These bits need more cooking - not enabled yet: - */ -#ifdef X86_BTS - case PTRACE_BTS_CONFIG: - ret = ptrace_bts_config - (child, data, (struct ptrace_bts_config __user *)addr); - break; - - case PTRACE_BTS_STATUS: - ret = ptrace_bts_status - (child, data, (struct ptrace_bts_config __user *)addr); - break; - - case PTRACE_BTS_SIZE: - ret = ptrace_bts_get_size(child); - break; - - case PTRACE_BTS_GET: - ret = ptrace_bts_read_record - (child, data, (struct bts_struct __user *) addr); - break; - - case PTRACE_BTS_CLEAR: - ret = ptrace_bts_clear(child); - break; - - case PTRACE_BTS_DRAIN: - ret = ptrace_bts_drain - (child, data, (struct bts_struct __user *) addr); - break; -#endif - default: ret = ptrace_request(child, request, addr, data); break; @@ -1061,10 +978,15 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value) case offsetof(struct user32, regs.orig_eax): /* - * Sign-extend the value so that orig_eax = -1 - * causes (long)orig_ax < 0 tests to fire correctly. + * A 32-bit debugger setting orig_eax means to restore + * the state of the task restarting a 32-bit syscall. + * Make sure we interpret the -ERESTART* codes correctly + * in case the task is not actually still sitting at the + * exit from a 32-bit syscall with TS_COMPAT still set. */ - regs->orig_ax = (long) (s32) value; + regs->orig_ax = value; + if (syscall_get_nr(child, regs) >= 0) + task_thread_info(child)->status |= TS_COMPAT; break; case offsetof(struct user32, regs.eflags): @@ -1159,14 +1081,14 @@ static int genregs32_get(struct task_struct *target, { if (kbuf) { compat_ulong_t *k = kbuf; - while (count > 0) { + while (count >= sizeof(*k)) { getreg32(target, pos, k++); count -= sizeof(*k); pos += sizeof(*k); } } else { compat_ulong_t __user *u = ubuf; - while (count > 0) { + while (count >= sizeof(*u)) { compat_ulong_t word; getreg32(target, pos, &word); if (__put_user(word, u++)) @@ -1187,14 +1109,14 @@ static int genregs32_set(struct task_struct *target, int ret = 0; if (kbuf) { const compat_ulong_t *k = kbuf; - while (count > 0 && !ret) { + while (count >= sizeof(*k) && !ret) { ret = putreg32(target, pos, *k++); count -= sizeof(*k); pos += sizeof(*k); } } else { const compat_ulong_t __user *u = ubuf; - while (count > 0 && !ret) { + while (count >= sizeof(*u) && !ret) { compat_ulong_t word; ret = __get_user(word, u++); if (ret) @@ -1207,95 +1129,107 @@ static int genregs32_set(struct task_struct *target, return ret; } -static long ptrace32_siginfo(unsigned request, u32 pid, u32 addr, u32 data) +#ifdef CONFIG_X86_X32_ABI +static long x32_arch_ptrace(struct task_struct *child, + compat_long_t request, compat_ulong_t caddr, + compat_ulong_t cdata) { - siginfo_t __user *si = compat_alloc_user_space(sizeof(siginfo_t)); - compat_siginfo_t __user *si32 = compat_ptr(data); - siginfo_t ssi; - int ret; - - if (request == PTRACE_SETSIGINFO) { - memset(&ssi, 0, sizeof(siginfo_t)); - ret = copy_siginfo_from_user32(&ssi, si32); - if (ret) - return ret; - if (copy_to_user(si, &ssi, sizeof(siginfo_t))) - return -EFAULT; - } - ret = sys_ptrace(request, pid, addr, (unsigned long)si); - if (ret) - return ret; - if (request == PTRACE_GETSIGINFO) { - if (copy_from_user(&ssi, si, sizeof(siginfo_t))) - return -EFAULT; - ret = copy_siginfo_to_user32(si32, &ssi); - } - return ret; -} - -asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data) -{ - struct task_struct *child; - struct pt_regs *childregs; + unsigned long addr = caddr; + unsigned long data = cdata; void __user *datap = compat_ptr(data); int ret; - __u32 val; switch (request) { - case PTRACE_TRACEME: - case PTRACE_ATTACH: - case PTRACE_KILL: - case PTRACE_CONT: - case PTRACE_SINGLESTEP: - case PTRACE_SINGLEBLOCK: - case PTRACE_DETACH: - case PTRACE_SYSCALL: - case PTRACE_OLDSETOPTIONS: - case PTRACE_SETOPTIONS: - case PTRACE_SET_THREAD_AREA: - case PTRACE_GET_THREAD_AREA: -#ifdef X86_BTS - case PTRACE_BTS_CONFIG: - case PTRACE_BTS_STATUS: - case PTRACE_BTS_SIZE: - case PTRACE_BTS_GET: - case PTRACE_BTS_CLEAR: - case PTRACE_BTS_DRAIN: -#endif - return sys_ptrace(request, pid, addr, data); + /* Read 32bits at location addr in the USER area. Only allow + to return the lower 32bits of segment and debug registers. */ + case PTRACE_PEEKUSR: { + u32 tmp; - default: - return -EINVAL; + ret = -EIO; + if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user) || + addr < offsetof(struct user_regs_struct, cs)) + break; + + tmp = 0; /* Default return condition */ + if (addr < sizeof(struct user_regs_struct)) + tmp = getreg(child, addr); + else if (addr >= offsetof(struct user, u_debugreg[0]) && + addr <= offsetof(struct user, u_debugreg[7])) { + addr -= offsetof(struct user, u_debugreg[0]); + tmp = ptrace_get_debugreg(child, addr / sizeof(data)); + } + ret = put_user(tmp, (__u32 __user *)datap); + break; + } - case PTRACE_PEEKTEXT: - case PTRACE_PEEKDATA: - case PTRACE_POKEDATA: - case PTRACE_POKETEXT: + /* Write the word at location addr in the USER area. Only allow + to update segment and debug registers with the upper 32bits + zero-extended. */ case PTRACE_POKEUSR: - case PTRACE_PEEKUSR: - case PTRACE_GETREGS: - case PTRACE_SETREGS: - case PTRACE_SETFPREGS: - case PTRACE_GETFPREGS: - case PTRACE_SETFPXREGS: - case PTRACE_GETFPXREGS: - case PTRACE_GETEVENTMSG: + ret = -EIO; + if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user) || + addr < offsetof(struct user_regs_struct, cs)) + break; + + if (addr < sizeof(struct user_regs_struct)) + ret = putreg(child, addr, data); + else if (addr >= offsetof(struct user, u_debugreg[0]) && + addr <= offsetof(struct user, u_debugreg[7])) { + addr -= offsetof(struct user, u_debugreg[0]); + ret = ptrace_set_debugreg(child, + addr / sizeof(data), data); + } break; - case PTRACE_SETSIGINFO: - case PTRACE_GETSIGINFO: - return ptrace32_siginfo(request, pid, addr, data); + case PTRACE_GETREGS: /* Get all gp regs from the child. */ + return copy_regset_to_user(child, + task_user_regset_view(current), + REGSET_GENERAL, + 0, sizeof(struct user_regs_struct), + datap); + + case PTRACE_SETREGS: /* Set all gp regs in the child. */ + return copy_regset_from_user(child, + task_user_regset_view(current), + REGSET_GENERAL, + 0, sizeof(struct user_regs_struct), + datap); + + case PTRACE_GETFPREGS: /* Get the child FPU state. */ + return copy_regset_to_user(child, + task_user_regset_view(current), + REGSET_FP, + 0, sizeof(struct user_i387_struct), + datap); + + case PTRACE_SETFPREGS: /* Set the child FPU state. */ + return copy_regset_from_user(child, + task_user_regset_view(current), + REGSET_FP, + 0, sizeof(struct user_i387_struct), + datap); + + default: + return compat_ptrace_request(child, request, addr, data); } - child = ptrace_get_task_struct(pid); - if (IS_ERR(child)) - return PTR_ERR(child); + return ret; +} +#endif - ret = ptrace_check_attach(child, request == PTRACE_KILL); - if (ret < 0) - goto out; +long compat_arch_ptrace(struct task_struct *child, compat_long_t request, + compat_ulong_t caddr, compat_ulong_t cdata) +{ + unsigned long addr = caddr; + unsigned long data = cdata; + void __user *datap = compat_ptr(data); + int ret; + __u32 val; - childregs = task_pt_regs(child); +#ifdef CONFIG_X86_X32_ABI + if (!is_ia32_task()) + return x32_arch_ptrace(child, request, caddr, cdata); +#endif switch (request) { case PTRACE_PEEKUSR: @@ -1343,12 +1277,14 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data) sizeof(struct user32_fxsr_struct), datap); + case PTRACE_GET_THREAD_AREA: + case PTRACE_SET_THREAD_AREA: + return arch_ptrace(child, request, addr, data); + default: return compat_ptrace_request(child, request, addr, data); } - out: - put_task_struct(child); return ret; } @@ -1356,7 +1292,7 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data) #ifdef CONFIG_X86_64 -static const struct user_regset x86_64_regsets[] = { +static struct user_regset x86_64_regsets[] __read_mostly = { [REGSET_GENERAL] = { .core_note_type = NT_PRSTATUS, .n = sizeof(struct user_regs_struct) / sizeof(long), @@ -1369,6 +1305,18 @@ static const struct user_regset x86_64_regsets[] = { .size = sizeof(long), .align = sizeof(long), .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set }, + [REGSET_XSTATE] = { + .core_note_type = NT_X86_XSTATE, + .size = sizeof(u64), .align = sizeof(u64), + .active = xstateregs_active, .get = xstateregs_get, + .set = xstateregs_set + }, + [REGSET_IOPERM64] = { + .core_note_type = NT_386_IOPERM, + .n = IO_BITMAP_LONGS, + .size = sizeof(long), .align = sizeof(long), + .active = ioperm_active, .get = ioperm_get + }, }; static const struct user_regset_view user_x86_64_view = { @@ -1385,7 +1333,7 @@ static const struct user_regset_view user_x86_64_view = { #endif /* CONFIG_X86_64 */ #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION -static const struct user_regset x86_32_regsets[] = { +static struct user_regset x86_32_regsets[] __read_mostly = { [REGSET_GENERAL] = { .core_note_type = NT_PRSTATUS, .n = sizeof(struct user_regs_struct32) / sizeof(u32), @@ -1394,16 +1342,22 @@ static const struct user_regset x86_32_regsets[] = { }, [REGSET_FP] = { .core_note_type = NT_PRFPREG, - .n = sizeof(struct user_i387_struct) / sizeof(u32), + .n = sizeof(struct user_i387_ia32_struct) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), .active = fpregs_active, .get = fpregs_get, .set = fpregs_set }, [REGSET_XFP] = { .core_note_type = NT_PRXFPREG, - .n = sizeof(struct user_i387_struct) / sizeof(u32), + .n = sizeof(struct user32_fxsr_struct) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set }, + [REGSET_XSTATE] = { + .core_note_type = NT_X86_XSTATE, + .size = sizeof(u64), .align = sizeof(u64), + .active = xstateregs_active, .get = xstateregs_get, + .set = xstateregs_set + }, [REGSET_TLS] = { .core_note_type = NT_386_TLS, .n = GDT_ENTRY_TLS_ENTRIES, .bias = GDT_ENTRY_TLS_MIN, @@ -1412,6 +1366,12 @@ static const struct user_regset x86_32_regsets[] = { .active = regset_tls_active, .get = regset_tls_get, .set = regset_tls_set }, + [REGSET_IOPERM32] = { + .core_note_type = NT_386_IOPERM, + .n = IO_BITMAP_BYTES / sizeof(u32), + .size = sizeof(u32), .align = sizeof(u32), + .active = ioperm_active, .get = ioperm_get + }, }; static const struct user_regset_view user_x86_32_view = { @@ -1420,6 +1380,23 @@ static const struct user_regset_view user_x86_32_view = { }; #endif +/* + * This represents bytes 464..511 in the memory layout exported through + * the REGSET_XSTATE interface. + */ +u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; + +void update_regset_xstate_info(unsigned int size, u64 xstate_mask) +{ +#ifdef CONFIG_X86_64 + x86_64_regsets[REGSET_XSTATE].n = size / sizeof(u64); +#endif +#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION + x86_32_regsets[REGSET_XSTATE].n = size / sizeof(u64); +#endif + xstate_fx_sw_bytes[USER_XSTATE_XCR0_WORD] = xstate_mask; +} + const struct user_regset_view *task_user_regset_view(struct task_struct *task) { #ifdef CONFIG_IA32_EMULATION @@ -1433,164 +1410,126 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task) #endif } -#ifdef CONFIG_X86_32 - -void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) +static void fill_sigtrap_info(struct task_struct *tsk, + struct pt_regs *regs, + int error_code, int si_code, + struct siginfo *info) { - struct siginfo info; - - tsk->thread.trap_no = 1; + tsk->thread.trap_nr = X86_TRAP_DB; tsk->thread.error_code = error_code; - memset(&info, 0, sizeof(info)); - info.si_signo = SIGTRAP; - info.si_code = TRAP_BRKPT; + memset(info, 0, sizeof(*info)); + info->si_signo = SIGTRAP; + info->si_code = si_code; + info->si_addr = user_mode_vm(regs) ? (void __user *)regs->ip : NULL; +} - /* User-mode ip? */ - info.si_addr = user_mode_vm(regs) ? (void __user *) regs->ip : NULL; +void user_single_step_siginfo(struct task_struct *tsk, + struct pt_regs *regs, + struct siginfo *info) +{ + fill_sigtrap_info(tsk, regs, 0, TRAP_BRKPT, info); +} +void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, + int error_code, int si_code) +{ + struct siginfo info; + + fill_sigtrap_info(tsk, regs, error_code, si_code, &info); /* Send us the fake SIGTRAP */ force_sig_info(SIGTRAP, &info, tsk); } -/* notification of system call entry/exit - * - triggered by current->work.syscall_trace + +#ifdef CONFIG_X86_32 +# define IS_IA32 1 +#elif defined CONFIG_IA32_EMULATION +# define IS_IA32 is_compat_task() +#else +# define IS_IA32 0 +#endif + +/* + * We must return the syscall number to actually look up in the table. + * This can be -1L to skip running any syscall at all. */ -__attribute__((regparm(3))) -int do_syscall_trace(struct pt_regs *regs, int entryexit) +long syscall_trace_enter(struct pt_regs *regs) { - int is_sysemu = test_thread_flag(TIF_SYSCALL_EMU); + long ret = 0; + + user_exit(); + /* - * With TIF_SYSCALL_EMU set we want to ignore TIF_SINGLESTEP for syscall - * interception + * If we stepped into a sysenter/syscall insn, it trapped in + * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP. + * If user-mode had set TF itself, then it's still clear from + * do_debug() and we need to set it again to restore the user + * state. If we entered on the slow path, TF was already set. */ - int is_singlestep = !is_sysemu && test_thread_flag(TIF_SINGLESTEP); - int ret = 0; + if (test_thread_flag(TIF_SINGLESTEP)) + regs->flags |= X86_EFLAGS_TF; /* do the secure computing check first */ - if (!entryexit) - secure_computing(regs->orig_ax); - - if (unlikely(current->audit_context)) { - if (entryexit) - audit_syscall_exit(AUDITSC_RESULT(regs->ax), - regs->ax); - /* Debug traps, when using PTRACE_SINGLESTEP, must be sent only - * on the syscall exit path. Normally, when TIF_SYSCALL_AUDIT is - * not used, entry.S will call us only on syscall exit, not - * entry; so when TIF_SYSCALL_AUDIT is used we must avoid - * calling send_sigtrap() on syscall entry. - * - * Note that when PTRACE_SYSEMU_SINGLESTEP is used, - * is_singlestep is false, despite his name, so we will still do - * the correct thing. - */ - else if (is_singlestep) - goto out; - } - - if (!(current->ptrace & PT_PTRACED)) + if (secure_computing(regs->orig_ax)) { + /* seccomp failures shouldn't expose any additional code. */ + ret = -1L; goto out; + } - /* If a process stops on the 1st tracepoint with SYSCALL_TRACE - * and then is resumed with SYSEMU_SINGLESTEP, it will come in - * here. We have to check this and return */ - if (is_sysemu && entryexit) - return 0; + if (unlikely(test_thread_flag(TIF_SYSCALL_EMU))) + ret = -1L; - /* Fake a debug trap */ - if (is_singlestep) - send_sigtrap(current, regs, 0); + if ((ret || test_thread_flag(TIF_SYSCALL_TRACE)) && + tracehook_report_syscall_entry(regs)) + ret = -1L; - if (!test_thread_flag(TIF_SYSCALL_TRACE) && !is_sysemu) - goto out; + if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) + trace_sys_enter(regs, regs->orig_ax); - /* the 0x80 provides a way for the tracing parent to distinguish - between a syscall stop and SIGTRAP delivery */ - /* Note that the debugger could change the result of test_thread_flag!*/ - ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80:0)); + if (IS_IA32) + audit_syscall_entry(AUDIT_ARCH_I386, + regs->orig_ax, + regs->bx, regs->cx, + regs->dx, regs->si); +#ifdef CONFIG_X86_64 + else + audit_syscall_entry(AUDIT_ARCH_X86_64, + regs->orig_ax, + regs->di, regs->si, + regs->dx, regs->r10); +#endif - /* - * this isn't the same as continuing with a signal, but it will do - * for normal use. strace only continues with a signal if the - * stopping signal is not SIGTRAP. -brl - */ - if (current->exit_code) { - send_sig(current->exit_code, current, 1); - current->exit_code = 0; - } - ret = is_sysemu; out: - if (unlikely(current->audit_context) && !entryexit) - audit_syscall_entry(AUDIT_ARCH_I386, regs->orig_ax, - regs->bx, regs->cx, regs->dx, regs->si); - if (ret == 0) - return 0; - - regs->orig_ax = -1; /* force skip of syscall restarting */ - if (unlikely(current->audit_context)) - audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); - return 1; + return ret ?: regs->orig_ax; } -#else /* CONFIG_X86_64 */ - -static void syscall_trace(struct pt_regs *regs) +void syscall_trace_leave(struct pt_regs *regs) { + bool step; -#if 0 - printk("trace %s ip %lx sp %lx ax %d origrax %d caller %lx tiflags %x ptrace %x\n", - current->comm, - regs->ip, regs->sp, regs->ax, regs->orig_ax, __builtin_return_address(0), - current_thread_info()->flags, current->ptrace); -#endif - - ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) - ? 0x80 : 0)); /* - * this isn't the same as continuing with a signal, but it will do - * for normal use. strace only continues with a signal if the - * stopping signal is not SIGTRAP. -brl + * We may come here right after calling schedule_user() + * or do_notify_resume(), in which case we can be in RCU + * user mode. */ - if (current->exit_code) { - send_sig(current->exit_code, current, 1); - current->exit_code = 0; - } -} + user_exit(); -asmlinkage void syscall_trace_enter(struct pt_regs *regs) -{ - /* do the secure computing check first */ - secure_computing(regs->orig_ax); - - if (test_thread_flag(TIF_SYSCALL_TRACE) - && (current->ptrace & PT_PTRACED)) - syscall_trace(regs); - - if (unlikely(current->audit_context)) { - if (test_thread_flag(TIF_IA32)) { - audit_syscall_entry(AUDIT_ARCH_I386, - regs->orig_ax, - regs->bx, regs->cx, - regs->dx, regs->si); - } else { - audit_syscall_entry(AUDIT_ARCH_X86_64, - regs->orig_ax, - regs->di, regs->si, - regs->dx, regs->r10); - } - } -} + audit_syscall_exit(regs); -asmlinkage void syscall_trace_leave(struct pt_regs *regs) -{ - if (unlikely(current->audit_context)) - audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); + if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) + trace_sys_exit(regs, regs->ax); - if ((test_thread_flag(TIF_SYSCALL_TRACE) - || test_thread_flag(TIF_SINGLESTEP)) - && (current->ptrace & PT_PTRACED)) - syscall_trace(regs); -} + /* + * If TIF_SYSCALL_EMU is set, we only get here because of + * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP). + * We already reported this syscall instruction in + * syscall_trace_enter(). + */ + step = unlikely(test_thread_flag(TIF_SINGLESTEP)) && + !test_thread_flag(TIF_SYSCALL_EMU); + if (step || test_thread_flag(TIF_SYSCALL_TRACE)) + tracehook_report_syscall_exit(regs, step); -#endif /* CONFIG_X86_32 */ + user_enter(); +} |
