diff options
Diffstat (limited to 'arch/x86/kernel/ptrace.c')
| -rw-r--r-- | arch/x86/kernel/ptrace.c | 382 |
1 files changed, 251 insertions, 131 deletions
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 45892dc4b72..678c0ada3b3 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -21,18 +21,22 @@ #include <linux/signal.h> #include <linux/perf_event.h> #include <linux/hw_breakpoint.h> +#include <linux/rcupdate.h> +#include <linux/export.h> +#include <linux/context_tracking.h> #include <asm/uaccess.h> #include <asm/pgtable.h> -#include <asm/system.h> #include <asm/processor.h> #include <asm/i387.h> +#include <asm/fpu-internal.h> #include <asm/debugreg.h> #include <asm/ldt.h> #include <asm/desc.h> #include <asm/prctl.h> #include <asm/proto.h> #include <asm/hw_breakpoint.h> +#include <asm/traps.h> #include "tls.h" @@ -164,6 +168,35 @@ static inline bool invalid_selector(u16 value) #define FLAG_MASK FLAG_MASK_32 +/* + * X86_32 CPUs don't save ss and esp if the CPU is already in kernel mode + * when it traps. The previous stack will be directly underneath the saved + * registers, and 'sp/ss' won't even have been saved. Thus the '®s->sp'. + * + * Now, if the stack is empty, '®s->sp' is out of range. In this + * case we try to take the previous stack. To always return a non-null + * stack pointer we fall back to regs as stack if no previous stack + * exists. + * + * This is valid only for kernel mode traps. + */ +unsigned long kernel_stack_pointer(struct pt_regs *regs) +{ + unsigned long context = (unsigned long)regs & ~(THREAD_SIZE - 1); + unsigned long sp = (unsigned long)®s->sp; + u32 *prev_esp; + + if (context == (sp & ~(THREAD_SIZE - 1))) + return sp; + + prev_esp = (u32 *)(context); + if (prev_esp) + return (unsigned long)prev_esp; + + return (unsigned long)regs; +} +EXPORT_SYMBOL_GPL(kernel_stack_pointer); + static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno) { BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0); @@ -528,7 +561,7 @@ static int genregs_set(struct task_struct *target, return ret; } -static void ptrace_triggered(struct perf_event *bp, int nmi, +static void ptrace_triggered(struct perf_event *bp, struct perf_sample_data *data, struct pt_regs *regs) { @@ -568,30 +601,48 @@ static unsigned long ptrace_get_dr7(struct perf_event *bp[]) return dr7; } -static int -ptrace_modify_breakpoint(struct perf_event *bp, int len, int type, - struct task_struct *tsk, int disabled) +static int ptrace_fill_bp_fields(struct perf_event_attr *attr, + int len, int type, bool disabled) +{ + int err, bp_len, bp_type; + + err = arch_bp_generic_fields(len, type, &bp_len, &bp_type); + if (!err) { + attr->bp_len = bp_len; + attr->bp_type = bp_type; + attr->disabled = disabled; + } + + return err; +} + +static struct perf_event * +ptrace_register_breakpoint(struct task_struct *tsk, int len, int type, + unsigned long addr, bool disabled) { - int err; - int gen_len, gen_type; struct perf_event_attr attr; + int err; - /* - * We should have at least an inactive breakpoint at this - * slot. It means the user is writing dr7 without having - * written the address register first - */ - if (!bp) - return -EINVAL; + ptrace_breakpoint_init(&attr); + attr.bp_addr = addr; - err = arch_bp_generic_fields(len, type, &gen_len, &gen_type); + err = ptrace_fill_bp_fields(&attr, len, type, disabled); if (err) - return err; + return ERR_PTR(err); - attr = bp->attr; - attr.bp_len = gen_len; - attr.bp_type = gen_type; - attr.disabled = disabled; + return register_user_hw_breakpoint(&attr, ptrace_triggered, + NULL, tsk); +} + +static int ptrace_modify_breakpoint(struct perf_event *bp, int len, int type, + int disabled) +{ + struct perf_event_attr attr = bp->attr; + int err; + + err = ptrace_fill_bp_fields(&attr, len, type, disabled); + if (err) + return err; return modify_user_hw_breakpoint(bp, &attr); } @@ -601,61 +652,50 @@ ptrace_modify_breakpoint(struct perf_event *bp, int len, int type, */ static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data) { - struct thread_struct *thread = &(tsk->thread); + struct thread_struct *thread = &tsk->thread; unsigned long old_dr7; - int i, orig_ret = 0, rc = 0; - int enabled, second_pass = 0; - unsigned len, type; - struct perf_event *bp; + bool second_pass = false; + int i, rc, ret = 0; data &= ~DR_CONTROL_RESERVED; old_dr7 = ptrace_get_dr7(thread->ptrace_bps); + restore: - /* - * Loop through all the hardware breakpoints, making the - * appropriate changes to each. - */ + rc = 0; for (i = 0; i < HBP_NUM; i++) { - enabled = decode_dr7(data, i, &len, &type); - bp = thread->ptrace_bps[i]; - - if (!enabled) { - if (bp) { - /* - * Don't unregister the breakpoints right-away, - * unless all register_user_hw_breakpoint() - * requests have succeeded. This prevents - * any window of opportunity for debug - * register grabbing by other users. - */ - if (!second_pass) - continue; - - rc = ptrace_modify_breakpoint(bp, len, type, - tsk, 1); - if (rc) - break; + unsigned len, type; + bool disabled = !decode_dr7(data, i, &len, &type); + struct perf_event *bp = thread->ptrace_bps[i]; + + if (!bp) { + if (disabled) + continue; + + bp = ptrace_register_breakpoint(tsk, + len, type, 0, disabled); + if (IS_ERR(bp)) { + rc = PTR_ERR(bp); + break; } + + thread->ptrace_bps[i] = bp; continue; } - rc = ptrace_modify_breakpoint(bp, len, type, tsk, 0); + rc = ptrace_modify_breakpoint(bp, len, type, disabled); if (rc) break; } - /* - * Make a second pass to free the remaining unused breakpoints - * or to restore the original breakpoints if an error occurred. - */ - if (!second_pass) { - second_pass = 1; - if (rc < 0) { - orig_ret = rc; - data = old_dr7; - } + + /* Restore if the first pass failed, second_pass shouldn't fail. */ + if (rc && !WARN_ON(second_pass)) { + ret = rc; + data = old_dr7; + second_pass = true; goto restore; } - return ((orig_ret < 0) ? orig_ret : rc); + + return ret; } /* @@ -663,18 +703,17 @@ restore: */ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) { - struct thread_struct *thread = &(tsk->thread); + struct thread_struct *thread = &tsk->thread; unsigned long val = 0; if (n < HBP_NUM) { - struct perf_event *bp; - bp = thread->ptrace_bps[n]; - if (!bp) - return 0; - val = bp->hw.info.address; + struct perf_event *bp = thread->ptrace_bps[n]; + + if (bp) + val = bp->hw.info.address; } else if (n == 6) { val = thread->debugreg6; - } else if (n == 7) { + } else if (n == 7) { val = thread->ptrace_dr7; } return val; @@ -683,24 +722,14 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, unsigned long addr) { - struct perf_event *bp; struct thread_struct *t = &tsk->thread; - struct perf_event_attr attr; - - if (!t->ptrace_bps[nr]) { - ptrace_breakpoint_init(&attr); - /* - * Put stub len and type to register (reserve) an inactive but - * correct bp - */ - attr.bp_addr = addr; - attr.bp_len = HW_BREAKPOINT_LEN_1; - attr.bp_type = HW_BREAKPOINT_W; - attr.disabled = 1; - - bp = register_user_hw_breakpoint(&attr, ptrace_triggered, tsk); + struct perf_event *bp = t->ptrace_bps[nr]; + int err = 0; + if (!bp) { /* + * Put stub len and type to create an inactive but correct bp. + * * CHECKME: the previous code returned -EIO if the addr wasn't * a valid task virtual addr. The new one will return -EINVAL in * this case. @@ -709,55 +738,43 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, * writing for the user. And anyway this is the previous * behaviour. */ + bp = ptrace_register_breakpoint(tsk, + X86_BREAKPOINT_LEN_1, X86_BREAKPOINT_WRITE, + addr, true); if (IS_ERR(bp)) - return PTR_ERR(bp); - - t->ptrace_bps[nr] = bp; + err = PTR_ERR(bp); + else + t->ptrace_bps[nr] = bp; } else { - int err; + struct perf_event_attr attr = bp->attr; - bp = t->ptrace_bps[nr]; - - attr = bp->attr; attr.bp_addr = addr; err = modify_user_hw_breakpoint(bp, &attr); - if (err) - return err; } - - return 0; + return err; } /* * Handle PTRACE_POKEUSR calls for the debug register area. */ -int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val) +static int ptrace_set_debugreg(struct task_struct *tsk, int n, + unsigned long val) { - struct thread_struct *thread = &(tsk->thread); - int rc = 0; - + struct thread_struct *thread = &tsk->thread; /* There are no DR4 or DR5 registers */ - if (n == 4 || n == 5) - return -EIO; + int rc = -EIO; - if (n == 6) { - thread->debugreg6 = val; - goto ret_path; - } if (n < HBP_NUM) { rc = ptrace_set_breakpoint_addr(tsk, n, val); - if (rc) - return rc; - } - /* All that's left is DR7 */ - if (n == 7) { + } else if (n == 6) { + thread->debugreg6 = val; + rc = 0; + } else if (n == 7) { rc = ptrace_write_dr7(tsk, val); if (!rc) thread->ptrace_dr7 = val; } - -ret_path: return rc; } @@ -1112,6 +1129,94 @@ static int genregs32_set(struct task_struct *target, return ret; } +#ifdef CONFIG_X86_X32_ABI +static long x32_arch_ptrace(struct task_struct *child, + compat_long_t request, compat_ulong_t caddr, + compat_ulong_t cdata) +{ + unsigned long addr = caddr; + unsigned long data = cdata; + void __user *datap = compat_ptr(data); + int ret; + + switch (request) { + /* Read 32bits at location addr in the USER area. Only allow + to return the lower 32bits of segment and debug registers. */ + case PTRACE_PEEKUSR: { + u32 tmp; + + ret = -EIO; + if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user) || + addr < offsetof(struct user_regs_struct, cs)) + break; + + tmp = 0; /* Default return condition */ + if (addr < sizeof(struct user_regs_struct)) + tmp = getreg(child, addr); + else if (addr >= offsetof(struct user, u_debugreg[0]) && + addr <= offsetof(struct user, u_debugreg[7])) { + addr -= offsetof(struct user, u_debugreg[0]); + tmp = ptrace_get_debugreg(child, addr / sizeof(data)); + } + ret = put_user(tmp, (__u32 __user *)datap); + break; + } + + /* Write the word at location addr in the USER area. Only allow + to update segment and debug registers with the upper 32bits + zero-extended. */ + case PTRACE_POKEUSR: + ret = -EIO; + if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user) || + addr < offsetof(struct user_regs_struct, cs)) + break; + + if (addr < sizeof(struct user_regs_struct)) + ret = putreg(child, addr, data); + else if (addr >= offsetof(struct user, u_debugreg[0]) && + addr <= offsetof(struct user, u_debugreg[7])) { + addr -= offsetof(struct user, u_debugreg[0]); + ret = ptrace_set_debugreg(child, + addr / sizeof(data), data); + } + break; + + case PTRACE_GETREGS: /* Get all gp regs from the child. */ + return copy_regset_to_user(child, + task_user_regset_view(current), + REGSET_GENERAL, + 0, sizeof(struct user_regs_struct), + datap); + + case PTRACE_SETREGS: /* Set all gp regs in the child. */ + return copy_regset_from_user(child, + task_user_regset_view(current), + REGSET_GENERAL, + 0, sizeof(struct user_regs_struct), + datap); + + case PTRACE_GETFPREGS: /* Get the child FPU state. */ + return copy_regset_to_user(child, + task_user_regset_view(current), + REGSET_FP, + 0, sizeof(struct user_i387_struct), + datap); + + case PTRACE_SETFPREGS: /* Set the child FPU state. */ + return copy_regset_from_user(child, + task_user_regset_view(current), + REGSET_FP, + 0, sizeof(struct user_i387_struct), + datap); + + default: + return compat_ptrace_request(child, request, addr, data); + } + + return ret; +} +#endif + long compat_arch_ptrace(struct task_struct *child, compat_long_t request, compat_ulong_t caddr, compat_ulong_t cdata) { @@ -1121,6 +1226,11 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, int ret; __u32 val; +#ifdef CONFIG_X86_X32_ABI + if (!is_ia32_task()) + return x32_arch_ptrace(child, request, caddr, cdata); +#endif + switch (request) { case PTRACE_PEEKUSR: ret = getreg32(child, addr, &val); @@ -1220,9 +1330,6 @@ static const struct user_regset_view user_x86_64_view = { #define genregs32_get genregs_get #define genregs32_set genregs_set -#define user_i387_ia32_struct user_i387_struct -#define user32_fxsr_struct user_fxsr_struct - #endif /* CONFIG_X86_64 */ #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION @@ -1308,7 +1415,7 @@ static void fill_sigtrap_info(struct task_struct *tsk, int error_code, int si_code, struct siginfo *info) { - tsk->thread.trap_no = 1; + tsk->thread.trap_nr = X86_TRAP_DB; tsk->thread.error_code = error_code; memset(info, 0, sizeof(*info)); @@ -1347,10 +1454,12 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, * We must return the syscall number to actually look up in the table. * This can be -1L to skip running any syscall at all. */ -asmregparm long syscall_trace_enter(struct pt_regs *regs) +long syscall_trace_enter(struct pt_regs *regs) { long ret = 0; + user_exit(); + /* * If we stepped into a sysenter/syscall insn, it trapped in * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP. @@ -1362,7 +1471,11 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs) regs->flags |= X86_EFLAGS_TF; /* do the secure computing check first */ - secure_computing(regs->orig_ax); + if (secure_computing(regs->orig_ax)) { + /* seccomp failures shouldn't expose any additional code. */ + ret = -1L; + goto out; + } if (unlikely(test_thread_flag(TIF_SYSCALL_EMU))) ret = -1L; @@ -1374,30 +1487,35 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs) if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) trace_sys_enter(regs, regs->orig_ax); - if (unlikely(current->audit_context)) { - if (IS_IA32) - audit_syscall_entry(AUDIT_ARCH_I386, - regs->orig_ax, - regs->bx, regs->cx, - regs->dx, regs->si); + if (IS_IA32) + audit_syscall_entry(AUDIT_ARCH_I386, + regs->orig_ax, + regs->bx, regs->cx, + regs->dx, regs->si); #ifdef CONFIG_X86_64 - else - audit_syscall_entry(AUDIT_ARCH_X86_64, - regs->orig_ax, - regs->di, regs->si, - regs->dx, regs->r10); + else + audit_syscall_entry(AUDIT_ARCH_X86_64, + regs->orig_ax, + regs->di, regs->si, + regs->dx, regs->r10); #endif - } +out: return ret ?: regs->orig_ax; } -asmregparm void syscall_trace_leave(struct pt_regs *regs) +void syscall_trace_leave(struct pt_regs *regs) { bool step; - if (unlikely(current->audit_context)) - audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); + /* + * We may come here right after calling schedule_user() + * or do_notify_resume(), in which case we can be in RCU + * user mode. + */ + user_exit(); + + audit_syscall_exit(regs); if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) trace_sys_exit(regs, regs->ax); @@ -1412,4 +1530,6 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs) !test_thread_flag(TIF_SYSCALL_EMU); if (step || test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall_exit(regs, step); + + user_enter(); } |
