aboutsummaryrefslogtreecommitdiff
path: root/arch/x86/kernel/entry_64.S
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/entry_64.S')
-rw-r--r--arch/x86/kernel/entry_64.S826
1 files changed, 422 insertions, 404 deletions
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 3fe8239fd8f..c844f0816ab 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -36,7 +36,7 @@
* - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
* frame that is otherwise undefined after a SYSCALL
* - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
- * - errorentry/paranoidentry/zeroentry - Define exception entry points.
+ * - idtentry - Define exception entry points.
*/
#include <linux/linkage.h>
@@ -53,8 +53,11 @@
#include <asm/page_types.h>
#include <asm/irqflags.h>
#include <asm/paravirt.h>
-#include <asm/ftrace.h>
#include <asm/percpu.h>
+#include <asm/asm.h>
+#include <asm/context_tracking.h>
+#include <asm/smap.h>
+#include <asm/pgtable_types.h>
#include <linux/err.h>
/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
@@ -66,107 +69,6 @@
.code64
.section .entry.text, "ax"
-#ifdef CONFIG_FUNCTION_TRACER
-#ifdef CONFIG_DYNAMIC_FTRACE
-ENTRY(mcount)
- retq
-END(mcount)
-
-ENTRY(ftrace_caller)
- cmpl $0, function_trace_stop
- jne ftrace_stub
-
- MCOUNT_SAVE_FRAME
-
- movq 0x38(%rsp), %rdi
- movq 8(%rbp), %rsi
- subq $MCOUNT_INSN_SIZE, %rdi
-
-GLOBAL(ftrace_call)
- call ftrace_stub
-
- MCOUNT_RESTORE_FRAME
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-GLOBAL(ftrace_graph_call)
- jmp ftrace_stub
-#endif
-
-GLOBAL(ftrace_stub)
- retq
-END(ftrace_caller)
-
-#else /* ! CONFIG_DYNAMIC_FTRACE */
-ENTRY(mcount)
- cmpl $0, function_trace_stop
- jne ftrace_stub
-
- cmpq $ftrace_stub, ftrace_trace_function
- jnz trace
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- cmpq $ftrace_stub, ftrace_graph_return
- jnz ftrace_graph_caller
-
- cmpq $ftrace_graph_entry_stub, ftrace_graph_entry
- jnz ftrace_graph_caller
-#endif
-
-GLOBAL(ftrace_stub)
- retq
-
-trace:
- MCOUNT_SAVE_FRAME
-
- movq 0x38(%rsp), %rdi
- movq 8(%rbp), %rsi
- subq $MCOUNT_INSN_SIZE, %rdi
-
- call *ftrace_trace_function
-
- MCOUNT_RESTORE_FRAME
-
- jmp ftrace_stub
-END(mcount)
-#endif /* CONFIG_DYNAMIC_FTRACE */
-#endif /* CONFIG_FUNCTION_TRACER */
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-ENTRY(ftrace_graph_caller)
- cmpl $0, function_trace_stop
- jne ftrace_stub
-
- MCOUNT_SAVE_FRAME
-
- leaq 8(%rbp), %rdi
- movq 0x38(%rsp), %rsi
- movq (%rbp), %rdx
- subq $MCOUNT_INSN_SIZE, %rsi
-
- call prepare_ftrace_return
-
- MCOUNT_RESTORE_FRAME
-
- retq
-END(ftrace_graph_caller)
-
-GLOBAL(return_to_handler)
- subq $24, %rsp
-
- /* Save the return values */
- movq %rax, (%rsp)
- movq %rdx, 8(%rsp)
- movq %rbp, %rdi
-
- call ftrace_return_to_handler
-
- movq %rax, %rdi
- movq 8(%rsp), %rdx
- movq (%rsp), %rax
- addq $24, %rsp
- jmp *%rdi
-#endif
-
#ifndef CONFIG_PREEMPT
#define retint_kernel retint_restore_args
@@ -190,6 +92,44 @@ ENDPROC(native_usergs_sysret64)
.endm
/*
+ * When dynamic function tracer is enabled it will add a breakpoint
+ * to all locations that it is about to modify, sync CPUs, update
+ * all the code, sync CPUs, then remove the breakpoints. In this time
+ * if lockdep is enabled, it might jump back into the debug handler
+ * outside the updating of the IST protection. (TRACE_IRQS_ON/OFF).
+ *
+ * We need to change the IDT table before calling TRACE_IRQS_ON/OFF to
+ * make sure the stack pointer does not get reset back to the top
+ * of the debug stack, and instead just reuses the current stack.
+ */
+#if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_TRACE_IRQFLAGS)
+
+.macro TRACE_IRQS_OFF_DEBUG
+ call debug_stack_set_zero
+ TRACE_IRQS_OFF
+ call debug_stack_reset
+.endm
+
+.macro TRACE_IRQS_ON_DEBUG
+ call debug_stack_set_zero
+ TRACE_IRQS_ON
+ call debug_stack_reset
+.endm
+
+.macro TRACE_IRQS_IRETQ_DEBUG offset=ARGOFFSET
+ bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
+ jnc 1f
+ TRACE_IRQS_ON_DEBUG
+1:
+.endm
+
+#else
+# define TRACE_IRQS_OFF_DEBUG TRACE_IRQS_OFF
+# define TRACE_IRQS_ON_DEBUG TRACE_IRQS_ON
+# define TRACE_IRQS_IRETQ_DEBUG TRACE_IRQS_IRETQ
+#endif
+
+/*
* C code is not supposed to know about undefined top of stack. Every time
* a C function with an pt_regs argument is called from the SYSCALL based
* fast path FIXUP_TOP_OF_STACK is needed.
@@ -222,7 +162,7 @@ ENDPROC(native_usergs_sysret64)
/*CFI_REL_OFFSET ss,0*/
pushq_cfi %rax /* rsp */
CFI_REL_OFFSET rsp,0
- pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_BIT1) /* eflags - interrupts on */
+ pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_FIXED) /* eflags - interrupts on */
/*CFI_REL_OFFSET rflags,0*/
pushq_cfi $__KERNEL_CS /* cs */
/*CFI_REL_OFFSET cs,0*/
@@ -303,15 +243,15 @@ ENDPROC(native_usergs_sysret64)
.macro SAVE_ARGS_IRQ
cld
/* start from rbp in pt_regs and jump over */
- movq_cfi rdi, RDI-RBP
- movq_cfi rsi, RSI-RBP
- movq_cfi rdx, RDX-RBP
- movq_cfi rcx, RCX-RBP
- movq_cfi rax, RAX-RBP
- movq_cfi r8, R8-RBP
- movq_cfi r9, R9-RBP
- movq_cfi r10, R10-RBP
- movq_cfi r11, R11-RBP
+ movq_cfi rdi, (RDI-RBP)
+ movq_cfi rsi, (RSI-RBP)
+ movq_cfi rdx, (RDX-RBP)
+ movq_cfi rcx, (RCX-RBP)
+ movq_cfi rax, (RAX-RBP)
+ movq_cfi r8, (R8-RBP)
+ movq_cfi r9, (R9-RBP)
+ movq_cfi r10, (R10-RBP)
+ movq_cfi r11, (R11-RBP)
/* Save rbp so that we can unwind from get_irq_regs() */
movq_cfi rbp, 0
@@ -320,7 +260,7 @@ ENDPROC(native_usergs_sysret64)
movq %rsp, %rsi
leaq -RBP(%rsp),%rdi /* arg1 for handler */
- testl $3, CS(%rdi)
+ testl $3, CS-RBP(%rsi)
je 1f
SWAPGS
/*
@@ -330,11 +270,10 @@ ENDPROC(native_usergs_sysret64)
* moving irq_enter into assembly, which would be too much work)
*/
1: incl PER_CPU_VAR(irq_count)
- jne 2f
- mov PER_CPU_VAR(irq_stack_ptr),%rsp
+ cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
CFI_DEF_CFA_REGISTER rsi
-2: /* Store previous stack value */
+ /* Store previous stack value */
pushq %rsi
CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \
0x77 /* DW_OP_breg7 */, 0, \
@@ -345,23 +284,6 @@ ENDPROC(native_usergs_sysret64)
TRACE_IRQS_OFF
.endm
-ENTRY(save_rest)
- PARTIAL_FRAME 1 REST_SKIP+8
- movq 5*8+16(%rsp), %r11 /* save return address */
- movq_cfi rbx, RBX+16
- movq_cfi rbp, RBP+16
- movq_cfi r12, R12+16
- movq_cfi r13, R13+16
- movq_cfi r14, R14+16
- movq_cfi r15, R15+16
- movq %r11, 8(%rsp) /* return address */
- FIXUP_TOP_OF_STACK %r11, 16
- ret
- CFI_ENDPROC
-END(save_rest)
-
-/* save complete stack frame */
- .pushsection .kprobes.text, "ax"
ENTRY(save_paranoid)
XCPT_FRAME 1 RDI+8
cld
@@ -390,7 +312,6 @@ ENTRY(save_paranoid)
1: ret
CFI_ENDPROC
END(save_paranoid)
- .popsection
/*
* A newly forked process directly context switches into this address.
@@ -402,7 +323,7 @@ ENTRY(ret_from_fork)
LOCK ; btr $TIF_FORK,TI_flags(%r8)
- pushq_cfi kernel_eflags(%rip)
+ pushq_cfi $0x0002
popfq_cfi # reset kernel eflags
call schedule_tail # rdi: 'prev' task parameter
@@ -412,7 +333,7 @@ ENTRY(ret_from_fork)
RESTORE_REST
testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread?
- jz retint_restore_args
+ jz 1f
testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET
jnz int_ret_from_sys_call
@@ -420,6 +341,14 @@ ENTRY(ret_from_fork)
RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET
jmp ret_from_sys_call # go to the SYSRET fastpath
+1:
+ subq $REST_SKIP, %rsp # leave space for volatiles
+ CFI_ADJUST_CFA_OFFSET REST_SKIP
+ movq %rbp, %rdi
+ call *%rbx
+ movl $0, RAX(%rsp)
+ RESTORE_REST
+ jmp int_ret_from_sys_call
CFI_ENDPROC
END(ret_from_fork)
@@ -427,7 +356,8 @@ END(ret_from_fork)
* System call entry. Up to 6 arguments in registers are supported.
*
* SYSCALL does not save anything on the stack and does not change the
- * stack pointer.
+ * stack pointer. However, it does mask the flags register for us, so
+ * CLD and CLAC are not needed.
*/
/*
@@ -482,7 +412,12 @@ GLOBAL(system_call_after_swapgs)
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
jnz tracesys
system_call_fastpath:
+#if __SYSCALL_MASK == ~0
cmpq $__NR_syscall_max,%rax
+#else
+ andl $__SYSCALL_MASK,%eax
+ cmpl $__NR_syscall_max,%eax
+#endif
ja badsys
movq %r10,%rcx
call *sys_call_table(,%rax,8) # XXX: rip relative
@@ -522,7 +457,7 @@ sysret_careful:
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
pushq_cfi %rdi
- call schedule
+ SCHEDULE_USER
popq_cfi %rdi
jmp sysret_check
@@ -596,7 +531,12 @@ tracesys:
*/
LOAD_ARGS ARGOFFSET, 1
RESTORE_REST
+#if __SYSCALL_MASK == ~0
cmpq $__NR_syscall_max,%rax
+#else
+ andl $__SYSCALL_MASK,%eax
+ cmpl $__NR_syscall_max,%eax
+#endif
ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */
movq %r10,%rcx /* fixup for C */
call *sys_call_table(,%rax,8)
@@ -630,7 +570,7 @@ int_careful:
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
pushq_cfi %rdi
- call schedule
+ SCHEDULE_USER
popq_cfi %rdi
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
@@ -667,28 +607,38 @@ int_restore_rest:
CFI_ENDPROC
END(system_call)
-/*
- * Certain special system calls that need to save a complete full stack frame.
- */
- .macro PTREGSCALL label,func,arg
-ENTRY(\label)
- PARTIAL_FRAME 1 8 /* offset 8: return address */
- subq $REST_SKIP, %rsp
- CFI_ADJUST_CFA_OFFSET REST_SKIP
- call save_rest
+ .macro FORK_LIKE func
+ENTRY(stub_\func)
+ CFI_STARTPROC
+ popq %r11 /* save return address */
+ PARTIAL_FRAME 0
+ SAVE_REST
+ pushq %r11 /* put it back on stack */
+ FIXUP_TOP_OF_STACK %r11, 8
DEFAULT_FRAME 0 8 /* offset 8: return address */
- leaq 8(%rsp), \arg /* pt_regs pointer */
+ call sys_\func
+ RESTORE_TOP_OF_STACK %r11, 8
+ ret $REST_SKIP /* pop extended registers */
+ CFI_ENDPROC
+END(stub_\func)
+ .endm
+
+ .macro FIXED_FRAME label,func
+ENTRY(\label)
+ CFI_STARTPROC
+ PARTIAL_FRAME 0 8 /* offset 8: return address */
+ FIXUP_TOP_OF_STACK %r11, 8-ARGOFFSET
call \func
- jmp ptregscall_common
+ RESTORE_TOP_OF_STACK %r11, 8-ARGOFFSET
+ ret
CFI_ENDPROC
END(\label)
.endm
- PTREGSCALL stub_clone, sys_clone, %r8
- PTREGSCALL stub_fork, sys_fork, %rdi
- PTREGSCALL stub_vfork, sys_vfork, %rdi
- PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
- PTREGSCALL stub_iopl, sys_iopl, %rsi
+ FORK_LIKE clone
+ FORK_LIKE fork
+ FORK_LIKE vfork
+ FIXED_FRAME stub_iopl, sys_iopl
ENTRY(ptregscall_common)
DEFAULT_FRAME 1 8 /* offset 8: return address */
@@ -709,9 +659,7 @@ ENTRY(stub_execve)
PARTIAL_FRAME 0
SAVE_REST
FIXUP_TOP_OF_STACK %r11
- movq %rsp, %rcx
call sys_execve
- RESTORE_TOP_OF_STACK %r11
movq %rax,RAX(%rsp)
RESTORE_REST
jmp int_ret_from_sys_call
@@ -727,7 +675,6 @@ ENTRY(stub_rt_sigreturn)
addq $8, %rsp
PARTIAL_FRAME 0
SAVE_REST
- movq %rsp,%rdi
FIXUP_TOP_OF_STACK %r11
call sys_rt_sigreturn
movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
@@ -736,6 +683,36 @@ ENTRY(stub_rt_sigreturn)
CFI_ENDPROC
END(stub_rt_sigreturn)
+#ifdef CONFIG_X86_X32_ABI
+ENTRY(stub_x32_rt_sigreturn)
+ CFI_STARTPROC
+ addq $8, %rsp
+ PARTIAL_FRAME 0
+ SAVE_REST
+ FIXUP_TOP_OF_STACK %r11
+ call sys32_x32_rt_sigreturn
+ movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
+ RESTORE_REST
+ jmp int_ret_from_sys_call
+ CFI_ENDPROC
+END(stub_x32_rt_sigreturn)
+
+ENTRY(stub_x32_execve)
+ CFI_STARTPROC
+ addq $8, %rsp
+ PARTIAL_FRAME 0
+ SAVE_REST
+ FIXUP_TOP_OF_STACK %r11
+ call compat_sys_execve
+ RESTORE_TOP_OF_STACK %r11
+ movq %rax,RAX(%rsp)
+ RESTORE_REST
+ jmp int_ret_from_sys_call
+ CFI_ENDPROC
+END(stub_x32_execve)
+
+#endif
+
/*
* Build the entry stubs and pointer table with some assembler magic.
* We pack 7 stubs into a single 32-byte chunk, which will fit in a
@@ -792,10 +769,6 @@ END(interrupt)
call \func
.endm
-/*
- * Interrupt entry/exit should be protected against kprobes
- */
- .pushsection .kprobes.text, "ax"
/*
* The interrupt stubs push (~vector+0x80) onto the stack and
* then jump to common_interrupt.
@@ -803,6 +776,7 @@ END(interrupt)
.p2align CONFIG_X86_L1_CACHE_SHIFT
common_interrupt:
XCPT_FRAME
+ ASM_CLAC
addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */
interrupt do_IRQ
/* 0(%rsp): old_rsp-ARGOFFSET */
@@ -813,7 +787,7 @@ ret_from_intr:
/* Restore saved previous stack */
popq %rsi
- CFI_DEF_CFA_REGISTER rsi
+ CFI_DEF_CFA rsi,SS+8-RBP /* reg/off reset after def_cfa_expr */
leaq ARGOFFSET-RBP(%rsi), %rsp
CFI_DEF_CFA_REGISTER rsp
CFI_ADJUST_CFA_OFFSET RBP-ARGOFFSET
@@ -858,17 +832,44 @@ restore_args:
irq_return:
INTERRUPT_RETURN
- .section __ex_table, "a"
- .quad irq_return, bad_iret
- .previous
-
-#ifdef CONFIG_PARAVIRT
ENTRY(native_iret)
+ /*
+ * Are we returning to a stack segment from the LDT? Note: in
+ * 64-bit mode SS:RSP on the exception stack is always valid.
+ */
+#ifdef CONFIG_X86_ESPFIX64
+ testb $4,(SS-RIP)(%rsp)
+ jnz native_irq_return_ldt
+#endif
+
+native_irq_return_iret:
iretq
+ _ASM_EXTABLE(native_irq_return_iret, bad_iret)
- .section __ex_table,"a"
- .quad native_iret, bad_iret
- .previous
+#ifdef CONFIG_X86_ESPFIX64
+native_irq_return_ldt:
+ pushq_cfi %rax
+ pushq_cfi %rdi
+ SWAPGS
+ movq PER_CPU_VAR(espfix_waddr),%rdi
+ movq %rax,(0*8)(%rdi) /* RAX */
+ movq (2*8)(%rsp),%rax /* RIP */
+ movq %rax,(1*8)(%rdi)
+ movq (3*8)(%rsp),%rax /* CS */
+ movq %rax,(2*8)(%rdi)
+ movq (4*8)(%rsp),%rax /* RFLAGS */
+ movq %rax,(3*8)(%rdi)
+ movq (6*8)(%rsp),%rax /* SS */
+ movq %rax,(5*8)(%rdi)
+ movq (5*8)(%rsp),%rax /* RSP */
+ movq %rax,(4*8)(%rdi)
+ andl $0xffff0000,%eax
+ popq_cfi %rdi
+ orq PER_CPU_VAR(espfix_stack),%rax
+ SWAPGS
+ movq %rax,%rsp
+ popq_cfi %rax
+ jmp native_irq_return_iret
#endif
.section .fixup,"ax"
@@ -898,7 +899,7 @@ retint_careful:
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
pushq_cfi %rdi
- call schedule
+ SCHEDULE_USER
popq_cfi %rdi
GET_THREAD_INFO(%rcx)
DISABLE_INTERRUPTS(CLBR_NONE)
@@ -925,29 +926,54 @@ retint_signal:
/* Returning to kernel space. Check if we need preemption */
/* rcx: threadinfo. interrupts off. */
ENTRY(retint_kernel)
- cmpl $0,TI_preempt_count(%rcx)
+ cmpl $0,PER_CPU_VAR(__preempt_count)
jnz retint_restore_args
- bt $TIF_NEED_RESCHED,TI_flags(%rcx)
- jnc retint_restore_args
bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
jnc retint_restore_args
call preempt_schedule_irq
jmp exit_intr
#endif
-
CFI_ENDPROC
END(common_interrupt)
-/*
- * End of kprobes section
- */
- .popsection
+
+ /*
+ * If IRET takes a fault on the espfix stack, then we
+ * end up promoting it to a doublefault. In that case,
+ * modify the stack to make it look like we just entered
+ * the #GP handler from user space, similar to bad_iret.
+ */
+#ifdef CONFIG_X86_ESPFIX64
+ ALIGN
+__do_double_fault:
+ XCPT_FRAME 1 RDI+8
+ movq RSP(%rdi),%rax /* Trap on the espfix stack? */
+ sarq $PGDIR_SHIFT,%rax
+ cmpl $ESPFIX_PGD_ENTRY,%eax
+ jne do_double_fault /* No, just deliver the fault */
+ cmpl $__KERNEL_CS,CS(%rdi)
+ jne do_double_fault
+ movq RIP(%rdi),%rax
+ cmpq $native_irq_return_iret,%rax
+ jne do_double_fault /* This shouldn't happen... */
+ movq PER_CPU_VAR(kernel_stack),%rax
+ subq $(6*8-KERNEL_STACK_OFFSET),%rax /* Reset to original stack */
+ movq %rax,RSP(%rdi)
+ movq $0,(%rax) /* Missing (lost) #GP error code */
+ movq $general_protection,RIP(%rdi)
+ retq
+ CFI_ENDPROC
+END(__do_double_fault)
+#else
+# define __do_double_fault do_double_fault
+#endif
/*
* APIC interrupts.
*/
-.macro apicinterrupt num sym do_sym
+.macro apicinterrupt3 num sym do_sym
ENTRY(\sym)
INTR_FRAME
+ ASM_CLAC
pushq_cfi $~(\num)
.Lcommon_\sym:
interrupt \do_sym
@@ -956,15 +982,32 @@ ENTRY(\sym)
END(\sym)
.endm
+#ifdef CONFIG_TRACING
+#define trace(sym) trace_##sym
+#define smp_trace(sym) smp_trace_##sym
+
+.macro trace_apicinterrupt num sym
+apicinterrupt3 \num trace(\sym) smp_trace(\sym)
+.endm
+#else
+.macro trace_apicinterrupt num sym do_sym
+.endm
+#endif
+
+.macro apicinterrupt num sym do_sym
+apicinterrupt3 \num \sym \do_sym
+trace_apicinterrupt \num \sym
+.endm
+
#ifdef CONFIG_SMP
-apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \
+apicinterrupt3 IRQ_MOVE_CLEANUP_VECTOR \
irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
-apicinterrupt REBOOT_VECTOR \
+apicinterrupt3 REBOOT_VECTOR \
reboot_interrupt smp_reboot_interrupt
#endif
#ifdef CONFIG_X86_UV
-apicinterrupt UV_BAU_MESSAGE \
+apicinterrupt3 UV_BAU_MESSAGE \
uv_bau_message_intr1 uv_bau_message_interrupt
#endif
apicinterrupt LOCAL_TIMER_VECTOR \
@@ -972,28 +1015,20 @@ apicinterrupt LOCAL_TIMER_VECTOR \
apicinterrupt X86_PLATFORM_IPI_VECTOR \
x86_platform_ipi smp_x86_platform_ipi
-#ifdef CONFIG_SMP
- ALIGN
- INTR_FRAME
-.irp idx,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \
- 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
-.if NUM_INVALIDATE_TLB_VECTORS > \idx
-ENTRY(invalidate_interrupt\idx)
- pushq_cfi $~(INVALIDATE_TLB_VECTOR_START+\idx)
- jmp .Lcommon_invalidate_interrupt0
- CFI_ADJUST_CFA_OFFSET -8
-END(invalidate_interrupt\idx)
-.endif
-.endr
- CFI_ENDPROC
-apicinterrupt INVALIDATE_TLB_VECTOR_START, \
- invalidate_interrupt0, smp_invalidate_interrupt
+#ifdef CONFIG_HAVE_KVM
+apicinterrupt3 POSTED_INTR_VECTOR \
+ kvm_posted_intr_ipi smp_kvm_posted_intr_ipi
#endif
+#ifdef CONFIG_X86_MCE_THRESHOLD
apicinterrupt THRESHOLD_APIC_VECTOR \
threshold_interrupt smp_threshold_interrupt
+#endif
+
+#ifdef CONFIG_X86_THERMAL_VECTOR
apicinterrupt THERMAL_APIC_VECTOR \
thermal_interrupt smp_thermal_interrupt
+#endif
#ifdef CONFIG_SMP
apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \
@@ -1017,109 +1052,100 @@ apicinterrupt IRQ_WORK_VECTOR \
/*
* Exception entry points.
*/
-.macro zeroentry sym do_sym
-ENTRY(\sym)
- INTR_FRAME
- PARAVIRT_ADJUST_EXCEPTION_FRAME
- pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
- subq $ORIG_RAX-R15, %rsp
- CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
- call error_entry
- DEFAULT_FRAME 0
- movq %rsp,%rdi /* pt_regs pointer */
- xorl %esi,%esi /* no error code */
- call \do_sym
- jmp error_exit /* %ebx: no swapgs flag */
- CFI_ENDPROC
-END(\sym)
-.endm
+#define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8)
-.macro paranoidzeroentry sym do_sym
+.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
ENTRY(\sym)
- INTR_FRAME
- PARAVIRT_ADJUST_EXCEPTION_FRAME
- pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
- subq $ORIG_RAX-R15, %rsp
- CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
- call save_paranoid
- TRACE_IRQS_OFF
- movq %rsp,%rdi /* pt_regs pointer */
- xorl %esi,%esi /* no error code */
- call \do_sym
- jmp paranoid_exit /* %ebx: no swapgs flag */
- CFI_ENDPROC
-END(\sym)
-.endm
+ /* Sanity check */
+ .if \shift_ist != -1 && \paranoid == 0
+ .error "using shift_ist requires paranoid=1"
+ .endif
-#define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8)
-.macro paranoidzeroentry_ist sym do_sym ist
-ENTRY(\sym)
+ .if \has_error_code
+ XCPT_FRAME
+ .else
INTR_FRAME
- PARAVIRT_ADJUST_EXCEPTION_FRAME
- pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
- subq $ORIG_RAX-R15, %rsp
- CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
- call save_paranoid
- TRACE_IRQS_OFF
- movq %rsp,%rdi /* pt_regs pointer */
- xorl %esi,%esi /* no error code */
- subq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist)
- call \do_sym
- addq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist)
- jmp paranoid_exit /* %ebx: no swapgs flag */
- CFI_ENDPROC
-END(\sym)
-.endm
+ .endif
-.macro errorentry sym do_sym
-ENTRY(\sym)
- XCPT_FRAME
+ ASM_CLAC
PARAVIRT_ADJUST_EXCEPTION_FRAME
- subq $ORIG_RAX-R15, %rsp
- CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
- call error_entry
- DEFAULT_FRAME 0
- movq %rsp,%rdi /* pt_regs pointer */
- movq ORIG_RAX(%rsp),%rsi /* get error code */
- movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */
- call \do_sym
- jmp error_exit /* %ebx: no swapgs flag */
- CFI_ENDPROC
-END(\sym)
-.endm
- /* error code is on the stack already */
-.macro paranoiderrorentry sym do_sym
-ENTRY(\sym)
- XCPT_FRAME
- PARAVIRT_ADJUST_EXCEPTION_FRAME
+ .ifeq \has_error_code
+ pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
+ .endif
+
subq $ORIG_RAX-R15, %rsp
CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
+
+ .if \paranoid
call save_paranoid
+ .else
+ call error_entry
+ .endif
+
DEFAULT_FRAME 0
+
+ .if \paranoid
+ .if \shift_ist != -1
+ TRACE_IRQS_OFF_DEBUG /* reload IDT in case of recursion */
+ .else
TRACE_IRQS_OFF
+ .endif
+ .endif
+
movq %rsp,%rdi /* pt_regs pointer */
+
+ .if \has_error_code
movq ORIG_RAX(%rsp),%rsi /* get error code */
movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */
+ .else
+ xorl %esi,%esi /* no error code */
+ .endif
+
+ .if \shift_ist != -1
+ subq $EXCEPTION_STKSZ, INIT_TSS_IST(\shift_ist)
+ .endif
+
call \do_sym
+
+ .if \shift_ist != -1
+ addq $EXCEPTION_STKSZ, INIT_TSS_IST(\shift_ist)
+ .endif
+
+ .if \paranoid
jmp paranoid_exit /* %ebx: no swapgs flag */
+ .else
+ jmp error_exit /* %ebx: no swapgs flag */
+ .endif
+
CFI_ENDPROC
END(\sym)
.endm
-zeroentry divide_error do_divide_error
-zeroentry overflow do_overflow
-zeroentry bounds do_bounds
-zeroentry invalid_op do_invalid_op
-zeroentry device_not_available do_device_not_available
-paranoiderrorentry double_fault do_double_fault
-zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun
-errorentry invalid_TSS do_invalid_TSS
-errorentry segment_not_present do_segment_not_present
-zeroentry spurious_interrupt_bug do_spurious_interrupt_bug
-zeroentry coprocessor_error do_coprocessor_error
-errorentry alignment_check do_alignment_check
-zeroentry simd_coprocessor_error do_simd_coprocessor_error
+#ifdef CONFIG_TRACING
+.macro trace_idtentry sym do_sym has_error_code:req
+idtentry trace(\sym) trace(\do_sym) has_error_code=\has_error_code
+idtentry \sym \do_sym has_error_code=\has_error_code
+.endm
+#else
+.macro trace_idtentry sym do_sym has_error_code:req
+idtentry \sym \do_sym has_error_code=\has_error_code
+.endm
+#endif
+
+idtentry divide_error do_divide_error has_error_code=0
+idtentry overflow do_overflow has_error_code=0
+idtentry bounds do_bounds has_error_code=0
+idtentry invalid_op do_invalid_op has_error_code=0
+idtentry device_not_available do_device_not_available has_error_code=0
+idtentry double_fault __do_double_fault has_error_code=1 paranoid=1
+idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0
+idtentry invalid_TSS do_invalid_TSS has_error_code=1
+idtentry segment_not_present do_segment_not_present has_error_code=1
+idtentry spurious_interrupt_bug do_spurious_interrupt_bug has_error_code=0
+idtentry coprocessor_error do_coprocessor_error has_error_code=0
+idtentry alignment_check do_alignment_check has_error_code=1
+idtentry simd_coprocessor_error do_simd_coprocessor_error has_error_code=0
/* Reload gs selector with exception handling */
@@ -1138,10 +1164,7 @@ gs_change:
CFI_ENDPROC
END(native_load_gs_index)
- .section __ex_table,"a"
- .align 8
- .quad gs_change,bad_gs
- .previous
+ _ASM_EXTABLE(gs_change,bad_gs)
.section .fixup,"ax"
/* running with kernelgs */
bad_gs:
@@ -1151,54 +1174,8 @@ bad_gs:
jmp 2b
.previous
-ENTRY(kernel_thread_helper)
- pushq $0 # fake return address
- CFI_STARTPROC
- /*
- * Here we are in the child and the registers are set as they were
- * at kernel_thread() invocation in the parent.
- */
- call *%rsi
- # exit
- mov %eax, %edi
- call do_exit
- ud2 # padding for call trace
- CFI_ENDPROC
-END(kernel_thread_helper)
-
-/*
- * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
- *
- * C extern interface:
- * extern long execve(const char *name, char **argv, char **envp)
- *
- * asm input arguments:
- * rdi: name, rsi: argv, rdx: envp
- *
- * We want to fallback into:
- * extern long sys_execve(const char *name, char **argv,char **envp, struct pt_regs *regs)
- *
- * do_sys_execve asm fallback arguments:
- * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack
- */
-ENTRY(kernel_execve)
- CFI_STARTPROC
- FAKE_STACK_FRAME $0
- SAVE_ALL
- movq %rsp,%rcx
- call sys_execve
- movq %rax, RAX(%rsp)
- RESTORE_REST
- testq %rax,%rax
- je int_ret_from_sys_call
- RESTORE_ARGS
- UNFAKE_STACK_FRAME
- ret
- CFI_ENDPROC
-END(kernel_execve)
-
/* Call softirq on interrupt stack. Interrupts are off. */
-ENTRY(call_softirq)
+ENTRY(do_softirq_own_stack)
CFI_STARTPROC
pushq_cfi %rbp
CFI_REL_OFFSET rbp,0
@@ -1215,10 +1192,10 @@ ENTRY(call_softirq)
decl PER_CPU_VAR(irq_count)
ret
CFI_ENDPROC
-END(call_softirq)
+END(do_softirq_own_stack)
#ifdef CONFIG_XEN
-zeroentry xen_hypervisor_callback xen_do_hypervisor_callback
+idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0
/*
* A note on the "critical region" in our callback handler.
@@ -1308,37 +1285,37 @@ ENTRY(xen_failsafe_callback)
CFI_RESTORE r11
addq $0x30,%rsp
CFI_ADJUST_CFA_OFFSET -0x30
- pushq_cfi $0
+ pushq_cfi $-1 /* orig_ax = -1 => not a system call */
SAVE_ALL
jmp error_exit
CFI_ENDPROC
END(xen_failsafe_callback)
-apicinterrupt XEN_HVM_EVTCHN_CALLBACK \
+apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
xen_hvm_callback_vector xen_evtchn_do_upcall
#endif /* CONFIG_XEN */
-/*
- * Some functions should be protected against kprobes
- */
- .pushsection .kprobes.text, "ax"
+#if IS_ENABLED(CONFIG_HYPERV)
+apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
+ hyperv_callback_vector hyperv_vector_handler
+#endif /* CONFIG_HYPERV */
-paranoidzeroentry_ist debug do_debug DEBUG_STACK
-paranoidzeroentry_ist int3 do_int3 DEBUG_STACK
-paranoiderrorentry stack_segment do_stack_segment
+idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
+idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
+idtentry stack_segment do_stack_segment has_error_code=1 paranoid=1
#ifdef CONFIG_XEN
-zeroentry xen_debug do_debug
-zeroentry xen_int3 do_int3
-errorentry xen_stack_segment do_stack_segment
+idtentry xen_debug do_debug has_error_code=0
+idtentry xen_int3 do_int3 has_error_code=0
+idtentry xen_stack_segment do_stack_segment has_error_code=1
#endif
-errorentry general_protection do_general_protection
-errorentry page_fault do_page_fault
+idtentry general_protection do_general_protection has_error_code=1
+trace_idtentry page_fault do_page_fault has_error_code=1
#ifdef CONFIG_KVM_GUEST
-errorentry async_page_fault do_async_page_fault
+idtentry async_page_fault do_async_page_fault has_error_code=1
#endif
#ifdef CONFIG_X86_MCE
-paranoidzeroentry machine_check *machine_check_vector(%rip)
+idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector(%rip)
#endif
/*
@@ -1358,7 +1335,7 @@ paranoidzeroentry machine_check *machine_check_vector(%rip)
ENTRY(paranoid_exit)
DEFAULT_FRAME
DISABLE_INTERRUPTS(CLBR_NONE)
- TRACE_IRQS_OFF
+ TRACE_IRQS_OFF_DEBUG
testl %ebx,%ebx /* swapgs needed? */
jnz paranoid_restore
testl $3,CS(%rsp)
@@ -1369,7 +1346,7 @@ paranoid_swapgs:
RESTORE_ALL 8
jmp irq_return
paranoid_restore:
- TRACE_IRQS_IRETQ 0
+ TRACE_IRQS_IRETQ_DEBUG 0
RESTORE_ALL 8
jmp irq_return
paranoid_userspace:
@@ -1394,7 +1371,7 @@ paranoid_userspace:
paranoid_schedule:
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_ANY)
- call schedule
+ SCHEDULE_USER
DISABLE_INTERRUPTS(CLBR_ANY)
TRACE_IRQS_OFF
jmp paranoid_userspace
@@ -1443,7 +1420,7 @@ error_sti:
*/
error_kernelspace:
incl %ebx
- leaq irq_return(%rip),%rcx
+ leaq native_irq_return_iret(%rip),%rcx
cmpq %rcx,RIP+8(%rsp)
je error_swapgs
movl %ecx,%eax /* zero extend */
@@ -1530,12 +1507,20 @@ ENTRY(nmi)
/* Use %rdx as out temp variable throughout */
pushq_cfi %rdx
+ CFI_REL_OFFSET rdx, 0
+
+ /*
+ * If %cs was not the kernel segment, then the NMI triggered in user
+ * space, which means it is definitely not nested.
+ */
+ cmpl $__KERNEL_CS, 16(%rsp)
+ jne first_nmi
/*
* Check the special variable on the stack to see if NMIs are
* executing.
*/
- cmp $1, -8(%rsp)
+ cmpl $1, -8(%rsp)
je nested_nmi
/*
@@ -1547,6 +1532,7 @@ ENTRY(nmi)
*/
lea 6*8(%rsp), %rdx
test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi
+ CFI_REMEMBER_STATE
nested_nmi:
/*
@@ -1563,9 +1549,10 @@ nested_nmi:
1:
/* Set up the interrupted NMIs stack to jump to repeat_nmi */
- leaq -6*8(%rsp), %rdx
+ leaq -1*8(%rsp), %rdx
movq %rdx, %rsp
- CFI_ADJUST_CFA_OFFSET 6*8
+ CFI_ADJUST_CFA_OFFSET 1*8
+ leaq -10*8(%rsp), %rdx
pushq_cfi $__KERNEL_DS
pushq_cfi %rdx
pushfq_cfi
@@ -1573,15 +1560,17 @@ nested_nmi:
pushq_cfi $repeat_nmi
/* Put stack back */
- addq $(11*8), %rsp
- CFI_ADJUST_CFA_OFFSET -11*8
+ addq $(6*8), %rsp
+ CFI_ADJUST_CFA_OFFSET -6*8
nested_nmi_out:
popq_cfi %rdx
+ CFI_RESTORE rdx
/* No need to check faults here */
INTERRUPT_RETURN
+ CFI_RESTORE_STATE
first_nmi:
/*
* Because nested NMIs will use the pushed location that we
@@ -1598,48 +1587,79 @@ first_nmi:
* +-------------------------+
* | NMI executing variable |
* +-------------------------+
- * | Saved SS |
- * | Saved Return RSP |
- * | Saved RFLAGS |
- * | Saved CS |
- * | Saved RIP |
- * +-------------------------+
* | copied SS |
* | copied Return RSP |
* | copied RFLAGS |
* | copied CS |
* | copied RIP |
* +-------------------------+
+ * | Saved SS |
+ * | Saved Return RSP |
+ * | Saved RFLAGS |
+ * | Saved CS |
+ * | Saved RIP |
+ * +-------------------------+
* | pt_regs |
* +-------------------------+
*
- * The saved RIP is used to fix up the copied RIP that a nested
- * NMI may zero out. The original stack frame and the temp storage
+ * The saved stack frame is used to fix up the copied stack frame
+ * that a nested NMI may change to make the interrupted NMI iret jump
+ * to the repeat_nmi. The original stack frame and the temp storage
* is also used by nested NMIs and can not be trusted on exit.
*/
+ /* Do not pop rdx, nested NMIs will corrupt that part of the stack */
+ movq (%rsp), %rdx
+ CFI_RESTORE rdx
+
/* Set the NMI executing variable on the stack. */
pushq_cfi $1
+ /*
+ * Leave room for the "copied" frame
+ */
+ subq $(5*8), %rsp
+ CFI_ADJUST_CFA_OFFSET 5*8
+
/* Copy the stack frame to the Saved frame */
.rept 5
- pushq_cfi 6*8(%rsp)
+ pushq_cfi 11*8(%rsp)
.endr
+ CFI_DEF_CFA_OFFSET SS+8-RIP
+
+ /* Everything up to here is safe from nested NMIs */
+
+ /*
+ * If there was a nested NMI, the first NMI's iret will return
+ * here. But NMIs are still enabled and we can take another
+ * nested NMI. The nested NMI checks the interrupted RIP to see
+ * if it is between repeat_nmi and end_repeat_nmi, and if so
+ * it will just return, as we are about to repeat an NMI anyway.
+ * This makes it safe to copy to the stack frame that a nested
+ * NMI will update.
+ */
+repeat_nmi:
+ /*
+ * Update the stack variable to say we are still in NMI (the update
+ * is benign for the non-repeat case, where 1 was pushed just above
+ * to this very stack slot).
+ */
+ movq $1, 10*8(%rsp)
/* Make another copy, this one may be modified by nested NMIs */
+ addq $(10*8), %rsp
+ CFI_ADJUST_CFA_OFFSET -10*8
.rept 5
- pushq_cfi 4*8(%rsp)
+ pushq_cfi -6*8(%rsp)
.endr
-
- /* Do not pop rdx, nested NMIs will corrupt it */
- movq 11*8(%rsp), %rdx
+ subq $(5*8), %rsp
+ CFI_DEF_CFA_OFFSET SS+8-RIP
+end_repeat_nmi:
/*
* Everything below this point can be preempted by a nested
- * NMI if the first NMI took an exception. Repeated NMIs
- * caused by an exception and nested NMI will start here, and
- * can still be preempted by another NMI.
+ * NMI if the first NMI took an exception and reset our iret stack
+ * so that we repeat another NMI.
*/
-restart_nmi:
pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
subq $ORIG_RAX-R15, %rsp
CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
@@ -1652,42 +1672,44 @@ restart_nmi:
*/
call save_paranoid
DEFAULT_FRAME 0
+
+ /*
+ * Save off the CR2 register. If we take a page fault in the NMI then
+ * it could corrupt the CR2 value. If the NMI preempts a page fault
+ * handler before it was able to read the CR2 register, and then the
+ * NMI itself takes a page fault, the page fault that was preempted
+ * will read the information from the NMI page fault and not the
+ * origin fault. Save it off and restore it if it changes.
+ * Use the r12 callee-saved register.
+ */
+ movq %cr2, %r12
+
/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
movq %rsp,%rdi
movq $-1,%rsi
call do_nmi
+
+ /* Did the NMI take a page fault? Restore cr2 if it did */
+ movq %cr2, %rcx
+ cmpq %rcx, %r12
+ je 1f
+ movq %r12, %cr2
+1:
+
testl %ebx,%ebx /* swapgs needed? */
jnz nmi_restore
nmi_swapgs:
SWAPGS_UNSAFE_STACK
nmi_restore:
- RESTORE_ALL 8
+ /* Pop the extra iret frame at once */
+ RESTORE_ALL 6*8
+
/* Clear the NMI executing stack variable */
- movq $0, 10*8(%rsp)
+ movq $0, 5*8(%rsp)
jmp irq_return
CFI_ENDPROC
END(nmi)
- /*
- * If an NMI hit an iret because of an exception or breakpoint,
- * it can lose its NMI context, and a nested NMI may come in.
- * In that case, the nested NMI will change the preempted NMI's
- * stack to jump to here when it does the final iret.
- */
-repeat_nmi:
- INTR_FRAME
- /* Update the stack variable to say we are still in NMI */
- movq $1, 5*8(%rsp)
-
- /* copy the saved stack back to copy stack */
- .rept 5
- pushq_cfi 4*8(%rsp)
- .endr
-
- jmp restart_nmi
- CFI_ENDPROC
-end_repeat_nmi:
-
ENTRY(ignore_sysret)
CFI_STARTPROC
mov $-ENOSYS,%eax
@@ -1695,7 +1717,3 @@ ENTRY(ignore_sysret)
CFI_ENDPROC
END(ignore_sysret)
-/*
- * End of kprobes section
- */
- .popsection