diff options
Diffstat (limited to 'arch/x86/kernel/process_64.c')
| -rw-r--r-- | arch/x86/kernel/process_64.c | 278 |
1 files changed, 94 insertions, 184 deletions
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index b3d7a3a04f3..ca5b02d405c 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -14,7 +14,6 @@ * This file handles the architecture-dependent parts of process handling.. */ -#include <linux/stackprotector.h> #include <linux/cpu.h> #include <linux/errno.h> #include <linux/sched.h> @@ -32,16 +31,15 @@ #include <linux/notifier.h> #include <linux/kprobes.h> #include <linux/kdebug.h> -#include <linux/tick.h> #include <linux/prctl.h> #include <linux/uaccess.h> #include <linux/io.h> #include <linux/ftrace.h> #include <asm/pgtable.h> -#include <asm/system.h> #include <asm/processor.h> #include <asm/i387.h> +#include <asm/fpu-internal.h> #include <asm/mmu_context.h> #include <asm/prctl.h> #include <asm/desc.h> @@ -50,111 +48,11 @@ #include <asm/idle.h> #include <asm/syscalls.h> #include <asm/debugreg.h> - -#include <trace/events/power.h> +#include <asm/switch_to.h> asmlinkage extern void ret_from_fork(void); -DEFINE_PER_CPU(unsigned long, old_rsp); -static DEFINE_PER_CPU(unsigned char, is_idle); - -static ATOMIC_NOTIFIER_HEAD(idle_notifier); - -void idle_notifier_register(struct notifier_block *n) -{ - atomic_notifier_chain_register(&idle_notifier, n); -} -EXPORT_SYMBOL_GPL(idle_notifier_register); - -void idle_notifier_unregister(struct notifier_block *n) -{ - atomic_notifier_chain_unregister(&idle_notifier, n); -} -EXPORT_SYMBOL_GPL(idle_notifier_unregister); - -void enter_idle(void) -{ - percpu_write(is_idle, 1); - atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); -} - -static void __exit_idle(void) -{ - if (x86_test_and_clear_bit_percpu(0, is_idle) == 0) - return; - atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); -} - -/* Called from interrupts to signify idle end */ -void exit_idle(void) -{ - /* idle loop has pid 0 */ - if (current->pid) - return; - __exit_idle(); -} - -#ifndef CONFIG_SMP -static inline void play_dead(void) -{ - BUG(); -} -#endif - -/* - * The idle thread. There's no useful work to be - * done, so just try to conserve power and have a - * low exit latency (ie sit in a loop waiting for - * somebody to say that they'd like to reschedule) - */ -void cpu_idle(void) -{ - current_thread_info()->status |= TS_POLLING; - - /* - * If we're the non-boot CPU, nothing set the stack canary up - * for us. CPU0 already has it initialized but no harm in - * doing it again. This is a good place for updating it, as - * we wont ever return from this function (so the invalid - * canaries already on the stack wont ever trigger). - */ - boot_init_stack_canary(); - - /* endless idle loop with no priority at all */ - while (1) { - tick_nohz_stop_sched_tick(1); - while (!need_resched()) { - - rmb(); - - if (cpu_is_offline(smp_processor_id())) - play_dead(); - /* - * Idle routines should keep interrupts disabled - * from here on, until they go to idle. - * Otherwise, idle callbacks can misfire. - */ - local_irq_disable(); - enter_idle(); - /* Don't trace irqs off for idle */ - stop_critical_timings(); - pm_idle(); - start_critical_timings(); - - trace_power_end(smp_processor_id()); - - /* In many cases the interrupt that ended idle - has already called exit_idle. But some idle - loops can be woken up without interrupt. */ - __exit_idle(); - } - - tick_nohz_restart_sched_tick(); - preempt_enable_no_resched(); - schedule(); - preempt_disable(); - } -} +__visible DEFINE_PER_CPU(unsigned long, old_rsp); /* Prints also some state that isn't saved in the pt_regs */ void __show_regs(struct pt_regs *regs, int all) @@ -164,9 +62,8 @@ void __show_regs(struct pt_regs *regs, int all) unsigned int fsindex, gsindex; unsigned int ds, cs, es; - show_regs_common(); printk(KERN_DEFAULT "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); - printk_address(regs->ip, 1); + printk_address(regs->ip); printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->sp, regs->flags); printk(KERN_DEFAULT "RAX: %016lx RBX: %016lx RCX: %016lx\n", @@ -208,21 +105,28 @@ void __show_regs(struct pt_regs *regs, int all) get_debugreg(d0, 0); get_debugreg(d1, 1); get_debugreg(d2, 2); - printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2); get_debugreg(d3, 3); get_debugreg(d6, 6); get_debugreg(d7, 7); + + /* Only print out debug registers if they are in their non-default state. */ + if ((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) && + (d6 == DR6_RESERVED) && (d7 == 0x400)) + return; + + printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2); printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7); + } void release_thread(struct task_struct *dead_task) { if (dead_task->mm) { if (dead_task->mm->context.size) { - printk("WARNING: dead process %8s still has LDT? <%p/%d>\n", - dead_task->comm, - dead_task->mm->context.ldt, - dead_task->mm->context.size); + pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n", + dead_task->comm, + dead_task->mm->context.ldt, + dead_task->mm->context.size); BUG(); } } @@ -247,39 +151,19 @@ static inline u32 read_32bit_tls(struct task_struct *t, int tls) return get_desc_base(&t->thread.tls_array[tls]); } -/* - * This gets called before we allocate a new thread and copy - * the current task into it. - */ -void prepare_to_copy(struct task_struct *tsk) -{ - unlazy_fpu(tsk); -} - int copy_thread(unsigned long clone_flags, unsigned long sp, - unsigned long unused, - struct task_struct *p, struct pt_regs *regs) + unsigned long arg, struct task_struct *p) { int err; struct pt_regs *childregs; struct task_struct *me = current; - childregs = ((struct pt_regs *) - (THREAD_SIZE + task_stack_page(p))) - 1; - *childregs = *regs; - - childregs->ax = 0; - if (user_mode(regs)) - childregs->sp = sp; - else - childregs->sp = (unsigned long)childregs; - + p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE; + childregs = task_pt_regs(p); p->thread.sp = (unsigned long) childregs; - p->thread.sp0 = (unsigned long) (childregs+1); p->thread.usersp = me->thread.usersp; - set_tsk_thread_flag(p, TIF_FORK); - + p->thread.fpu_counter = 0; p->thread.io_bitmap_ptr = NULL; savesegment(gs, p->thread.gsindex); @@ -288,18 +172,36 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, p->thread.fs = p->thread.fsindex ? 0 : me->thread.fs; savesegment(es, p->thread.es); savesegment(ds, p->thread.ds); + memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); + + if (unlikely(p->flags & PF_KTHREAD)) { + /* kernel thread */ + memset(childregs, 0, sizeof(struct pt_regs)); + childregs->sp = (unsigned long)childregs; + childregs->ss = __KERNEL_DS; + childregs->bx = sp; /* function */ + childregs->bp = arg; + childregs->orig_ax = -1; + childregs->cs = __KERNEL_CS | get_kernel_rpl(); + childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED; + return 0; + } + *childregs = *current_pt_regs(); + + childregs->ax = 0; + if (sp) + childregs->sp = sp; err = -ENOMEM; memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { - p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); + p->thread.io_bitmap_ptr = kmemdup(me->thread.io_bitmap_ptr, + IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { p->thread.io_bitmap_max = 0; return -ENOMEM; } - memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, - IO_BITMAP_BYTES); set_tsk_thread_flag(p, TIF_IO_BITMAP); } @@ -336,17 +238,13 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip, loadsegment(es, _ds); loadsegment(ds, _ds); load_gs_index(0); + current->thread.usersp = new_sp; regs->ip = new_ip; regs->sp = new_sp; - percpu_write(old_rsp, new_sp); + this_cpu_write(old_rsp, new_sp); regs->cs = _cs; regs->ss = _ss; regs->flags = X86_EFLAGS_IF; - set_fs(USER_DS); - /* - * Free the old FP and other extended state - */ - free_thread_xstate(current); } void @@ -360,7 +258,9 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp) { start_thread_common(regs, new_ip, new_sp, - __USER32_CS, __USER32_DS, __USER32_DS); + test_thread_flag(TIF_X32) + ? __USER_CS : __USER32_CS, + __USER_DS, __USER_DS); } #endif @@ -374,7 +274,7 @@ void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp) * Kprobes not supported here. Set the probe on schedule instead. * Function graph tracer not supported too. */ -__notrace_funcgraph struct task_struct * +__visible __notrace_funcgraph struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev = &prev_p->thread; @@ -382,18 +282,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) int cpu = smp_processor_id(); struct tss_struct *tss = &per_cpu(init_tss, cpu); unsigned fsindex, gsindex; - bool preload_fpu; - - /* - * If the task has used fpu the last 5 timeslices, just do a full - * restore of the math state immediately to avoid the trap; the - * chances of needing FPU soon are obviously high now - */ - preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5; + fpu_switch_t fpu; - /* we're going to use this soon, after a few expensive things */ - if (preload_fpu) - prefetch(next->fpu.state); + fpu = switch_fpu_prepare(prev_p, next_p, cpu); /* * Reload esp0, LDT and the page table pointer: @@ -423,13 +314,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) load_TLS(next, cpu); - /* Must be after DS reload */ - __unlazy_fpu(prev_p); - - /* Make sure cpu is ready for new context */ - if (preload_fpu) - clts(); - /* * Leave lazy mode, flushing any hypercalls made here. * This must be done before restoring TLS segments so @@ -470,14 +354,24 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) wrmsrl(MSR_KERNEL_GS_BASE, next->gs); prev->gsindex = gsindex; + switch_fpu_finish(next_p, fpu); + /* * Switch the PDA and FPU contexts. */ - prev->usersp = percpu_read(old_rsp); - percpu_write(old_rsp, next->usersp); - percpu_write(current_task, next_p); + prev->usersp = this_cpu_read(old_rsp); + this_cpu_write(old_rsp, next->usersp); + this_cpu_write(current_task, next_p); - percpu_write(kernel_stack, + /* + * If it were not for PREEMPT_ACTIVE we could guarantee that the + * preempt_count of all tasks was equal here and this would not be + * needed. + */ + task_thread_info(prev_p)->saved_preempt_count = this_cpu_read(__preempt_count); + this_cpu_write(__preempt_count, task_thread_info(next_p)->saved_preempt_count); + + this_cpu_write(kernel_stack, (unsigned long)task_stack_page(next_p) + THREAD_SIZE - KERNEL_STACK_OFFSET); @@ -488,13 +382,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) __switch_to_xtra(prev_p, next_p, tss); - /* - * Preload the FPU context, now that we've determined that the - * task is likely to be using it. - */ - if (preload_fpu) - __math_state_restore(); - return prev_p; } @@ -504,6 +391,12 @@ void set_personality_64bit(void) /* Make sure to be in 64bit mode */ clear_thread_flag(TIF_IA32); + clear_thread_flag(TIF_ADDR32); + clear_thread_flag(TIF_X32); + + /* Ensure the corresponding mm is not marked. */ + if (current->mm) + current->mm->context.ia32_compat = 0; /* TBD: overwrites user setup. Should have two bits. But 64bit processes have always behaved this way, @@ -512,17 +405,34 @@ void set_personality_64bit(void) current->personality &= ~READ_IMPLIES_EXEC; } -void set_personality_ia32(void) +void set_personality_ia32(bool x32) { /* inherit personality from parent */ /* Make sure to be in 32bit mode */ - set_thread_flag(TIF_IA32); - current->personality |= force_personality32; - - /* Prepare the first "return" to user space */ - current_thread_info()->status |= TS_COMPAT; + set_thread_flag(TIF_ADDR32); + + /* Mark the associated mm as containing 32-bit tasks. */ + if (x32) { + clear_thread_flag(TIF_IA32); + set_thread_flag(TIF_X32); + if (current->mm) + current->mm->context.ia32_compat = TIF_X32; + current->personality &= ~READ_IMPLIES_EXEC; + /* is_compat_task() uses the presence of the x32 + syscall bit flag to determine compat status */ + current_thread_info()->status &= ~TS_COMPAT; + } else { + set_thread_flag(TIF_IA32); + clear_thread_flag(TIF_X32); + if (current->mm) + current->mm->context.ia32_compat = TIF_IA32; + current->personality |= force_personality32; + /* Prepare the first "return" to user space */ + current_thread_info()->status |= TS_COMPAT; + } } +EXPORT_SYMBOL_GPL(set_personality_ia32); unsigned long get_wchan(struct task_struct *p) { @@ -574,7 +484,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) task->thread.gs = addr; if (doit) { load_gs_index(0); - ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr); + ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr); } } put_cpu(); @@ -602,7 +512,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) /* set the selector to 0 to not confuse __switch_to */ loadsegment(fs, 0); - ret = checking_wrmsrl(MSR_FS_BASE, addr); + ret = wrmsrl_safe(MSR_FS_BASE, addr); } } put_cpu(); |
