diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-12-23 16:30:31 +0100 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-12-23 16:30:31 +0100 |
commit | bed4f13065b520e564adffbfcd1c1a764a9c887e (patch) | |
tree | d847ddd3bab64cb126ee6679dc4a949386554d55 | |
parent | 3e5621edb3392b28efb260ac99b2d26fb8b44e73 (diff) | |
parent | bf8bd66d0580f296f257d371ee41a0a137b541c7 (diff) |
Merge branch 'x86/irq' into x86/core
-rw-r--r-- | arch/x86/include/asm/dwarf2.h | 97 | ||||
-rw-r--r-- | arch/x86/include/asm/hardirq_32.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/hardirq_64.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/hw_irq.h | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/linkage.h | 60 | ||||
-rw-r--r-- | arch/x86/kernel/apic.c | 13 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce_amd_64.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce_intel_64.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/entry_32.S | 476 | ||||
-rw-r--r-- | arch/x86/kernel/entry_64.S | 1231 | ||||
-rw-r--r-- | arch/x86/kernel/irqinit_32.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/irqinit_64.c | 66 | ||||
-rw-r--r-- | arch/x86/kernel/smp.c | 18 | ||||
-rw-r--r-- | arch/x86/kernel/time_32.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/time_64.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/tlb_32.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/tlb_64.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/traps.c | 6 | ||||
-rw-r--r-- | arch/x86/lguest/boot.c | 3 | ||||
-rw-r--r-- | include/linux/linkage.h | 8 |
20 files changed, 1054 insertions, 946 deletions
diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h index 804b6e6be92..3afc5e87cfd 100644 --- a/arch/x86/include/asm/dwarf2.h +++ b/arch/x86/include/asm/dwarf2.h @@ -6,56 +6,91 @@ #endif /* - Macros for dwarf2 CFI unwind table entries. - See "as.info" for details on these pseudo ops. Unfortunately - they are only supported in very new binutils, so define them - away for older version. + * Macros for dwarf2 CFI unwind table entries. + * See "as.info" for details on these pseudo ops. Unfortunately + * they are only supported in very new binutils, so define them + * away for older version. */ #ifdef CONFIG_AS_CFI -#define CFI_STARTPROC .cfi_startproc -#define CFI_ENDPROC .cfi_endproc -#define CFI_DEF_CFA .cfi_def_cfa -#define CFI_DEF_CFA_REGISTER .cfi_def_cfa_register -#define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset -#define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset -#define CFI_OFFSET .cfi_offset -#define CFI_REL_OFFSET .cfi_rel_offset -#define CFI_REGISTER .cfi_register -#define CFI_RESTORE .cfi_restore -#define CFI_REMEMBER_STATE .cfi_remember_state -#define CFI_RESTORE_STATE .cfi_restore_state -#define CFI_UNDEFINED .cfi_undefined +#define CFI_STARTPROC .cfi_startproc +#define CFI_ENDPROC .cfi_endproc +#define CFI_DEF_CFA .cfi_def_cfa +#define CFI_DEF_CFA_REGISTER .cfi_def_cfa_register +#define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset +#define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset +#define CFI_OFFSET .cfi_offset +#define CFI_REL_OFFSET .cfi_rel_offset +#define CFI_REGISTER .cfi_register +#define CFI_RESTORE .cfi_restore +#define CFI_REMEMBER_STATE .cfi_remember_state +#define CFI_RESTORE_STATE .cfi_restore_state +#define CFI_UNDEFINED .cfi_undefined #ifdef CONFIG_AS_CFI_SIGNAL_FRAME -#define CFI_SIGNAL_FRAME .cfi_signal_frame +#define CFI_SIGNAL_FRAME .cfi_signal_frame #else #define CFI_SIGNAL_FRAME #endif #else -/* Due to the structure of pre-exisiting code, don't use assembler line - comment character # to ignore the arguments. Instead, use a dummy macro. */ +/* + * Due to the structure of pre-exisiting code, don't use assembler line + * comment character # to ignore the arguments. Instead, use a dummy macro. + */ .macro cfi_ignore a=0, b=0, c=0, d=0 .endm -#define CFI_STARTPROC cfi_ignore -#define CFI_ENDPROC cfi_ignore -#define CFI_DEF_CFA cfi_ignore +#define CFI_STARTPROC cfi_ignore +#define CFI_ENDPROC cfi_ignore +#define CFI_DEF_CFA cfi_ignore #define CFI_DEF_CFA_REGISTER cfi_ignore #define CFI_DEF_CFA_OFFSET cfi_ignore #define CFI_ADJUST_CFA_OFFSET cfi_ignore -#define CFI_OFFSET cfi_ignore -#define CFI_REL_OFFSET cfi_ignore -#define CFI_REGISTER cfi_ignore -#define CFI_RESTORE cfi_ignore -#define CFI_REMEMBER_STATE cfi_ignore -#define CFI_RESTORE_STATE cfi_ignore -#define CFI_UNDEFINED cfi_ignore -#define CFI_SIGNAL_FRAME cfi_ignore +#define CFI_OFFSET cfi_ignore +#define CFI_REL_OFFSET cfi_ignore +#define CFI_REGISTER cfi_ignore +#define CFI_RESTORE cfi_ignore +#define CFI_REMEMBER_STATE cfi_ignore +#define CFI_RESTORE_STATE cfi_ignore +#define CFI_UNDEFINED cfi_ignore +#define CFI_SIGNAL_FRAME cfi_ignore #endif +/* + * An attempt to make CFI annotations more or less + * correct and shorter. It is implied that you know + * what you're doing if you use them. + */ +#ifdef __ASSEMBLY__ +#ifdef CONFIG_X86_64 + .macro pushq_cfi reg + pushq \reg + CFI_ADJUST_CFA_OFFSET 8 + .endm + + .macro popq_cfi reg + popq \reg + CFI_ADJUST_CFA_OFFSET -8 + .endm + + .macro movq_cfi reg offset=0 + movq %\reg, \offset(%rsp) + CFI_REL_OFFSET \reg, \offset + .endm + + .macro movq_cfi_restore offset reg + movq \offset(%rsp), %\reg + CFI_RESTORE \reg + .endm +#else /*!CONFIG_X86_64*/ + + /* 32bit defenitions are missed yet */ + +#endif /*!CONFIG_X86_64*/ +#endif /*__ASSEMBLY__*/ + #endif /* _ASM_X86_DWARF2_H */ diff --git a/arch/x86/include/asm/hardirq_32.h b/arch/x86/include/asm/hardirq_32.h index 5ca135e72f2..cf7954d1405 100644 --- a/arch/x86/include/asm/hardirq_32.h +++ b/arch/x86/include/asm/hardirq_32.h @@ -22,6 +22,8 @@ DECLARE_PER_CPU(irq_cpustat_t, irq_stat); #define __ARCH_IRQ_STAT #define __IRQ_STAT(cpu, member) (per_cpu(irq_stat, cpu).member) +#define inc_irq_stat(member) (__get_cpu_var(irq_stat).member++) + void ack_bad_irq(unsigned int irq); #include <linux/irq_cpustat.h> diff --git a/arch/x86/include/asm/hardirq_64.h b/arch/x86/include/asm/hardirq_64.h index 1ba381fc51d..b5a6b5d5670 100644 --- a/arch/x86/include/asm/hardirq_64.h +++ b/arch/x86/include/asm/hardirq_64.h @@ -11,6 +11,8 @@ #define __ARCH_IRQ_STAT 1 +#define inc_irq_stat(member) add_pda(member, 1) + #define local_softirq_pending() read_pda(__softirq_pending) #define __ARCH_SET_SOFTIRQ_PENDING 1 diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index b97aecb0b61..8de644b6b95 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -109,9 +109,7 @@ extern asmlinkage void smp_invalidate_interrupt(struct pt_regs *); #endif #endif -#ifdef CONFIG_X86_32 -extern void (*const interrupt[NR_VECTORS])(void); -#endif +extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void); typedef int vector_irq_t[NR_VECTORS]; DECLARE_PER_CPU(vector_irq_t, vector_irq); diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index f61ee8f937e..5d98d0b68ff 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -57,5 +57,65 @@ #define __ALIGN_STR ".align 16,0x90" #endif +/* + * to check ENTRY_X86/END_X86 and + * KPROBE_ENTRY_X86/KPROBE_END_X86 + * unbalanced-missed-mixed appearance + */ +#define __set_entry_x86 .set ENTRY_X86_IN, 0 +#define __unset_entry_x86 .set ENTRY_X86_IN, 1 +#define __set_kprobe_x86 .set KPROBE_X86_IN, 0 +#define __unset_kprobe_x86 .set KPROBE_X86_IN, 1 + +#define __macro_err_x86 .error "ENTRY_X86/KPROBE_X86 unbalanced,missed,mixed" + +#define __check_entry_x86 \ + .ifdef ENTRY_X86_IN; \ + .ifeq ENTRY_X86_IN; \ + __macro_err_x86; \ + .abort; \ + .endif; \ + .endif + +#define __check_kprobe_x86 \ + .ifdef KPROBE_X86_IN; \ + .ifeq KPROBE_X86_IN; \ + __macro_err_x86; \ + .abort; \ + .endif; \ + .endif + +#define __check_entry_kprobe_x86 \ + __check_entry_x86; \ + __check_kprobe_x86 + +#define ENTRY_KPROBE_FINAL_X86 __check_entry_kprobe_x86 + +#define ENTRY_X86(name) \ + __check_entry_kprobe_x86; \ + __set_entry_x86; \ + .globl name; \ + __ALIGN; \ + name: + +#define END_X86(name) \ + __unset_entry_x86; \ + __check_entry_kprobe_x86; \ + .size name, .-name + +#define KPROBE_ENTRY_X86(name) \ + __check_entry_kprobe_x86; \ + __set_kprobe_x86; \ + .pushsection .kprobes.text, "ax"; \ + .globl name; \ + __ALIGN; \ + name: + +#define KPROBE_END_X86(name) \ + __unset_kprobe_x86; \ + __check_entry_kprobe_x86; \ + .size name, .-name; \ + .popsection + #endif /* _ASM_X86_LINKAGE_H */ diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 20c6e12c047..7397911f847 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -777,11 +777,7 @@ static void local_apic_timer_interrupt(void) /* * the NMI deadlock-detector uses this. */ -#ifdef CONFIG_X86_64 - add_pda(apic_timer_irqs, 1); -#else - per_cpu(irq_stat, cpu).apic_timer_irqs++; -#endif + inc_irq_stat(apic_timer_irqs); evt->event_handler(evt); } @@ -1677,14 +1673,11 @@ void smp_spurious_interrupt(struct pt_regs *regs) if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) ack_APIC_irq(); -#ifdef CONFIG_X86_64 - add_pda(irq_spurious_count, 1); -#else + inc_irq_stat(irq_spurious_count); + /* see sw-dev-man vol 3, chapter 7.4.13.5 */ pr_info("spurious APIC interrupt on CPU#%d, " "should never happen.\n", smp_processor_id()); - __get_cpu_var(irq_stat).irq_spurious_count++; -#endif irq_exit(); } diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index 5eb390a4b2e..748c8f9e7a0 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c @@ -237,7 +237,7 @@ asmlinkage void mce_threshold_interrupt(void) } } out: - add_pda(irq_threshold_count, 1); + inc_irq_stat(irq_threshold_count); irq_exit(); } diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c index c17eaf5dd6d..4b48f251fd3 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c @@ -26,7 +26,7 @@ asmlinkage void smp_thermal_interrupt(void) if (therm_throt_process(msr_val & 1)) mce_log_therm_throt_event(smp_processor_id(), msr_val); - add_pda(irq_thermal_count, 1); + inc_irq_stat(irq_thermal_count); irq_exit(); } diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index f6402c4ba10..fe7014176eb 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -619,28 +619,37 @@ END(syscall_badsys) 27:; /* - * Build the entry stubs and pointer table with - * some assembler magic. + * Build the entry stubs and pointer table with some assembler magic. + * We pack 7 stubs into a single 32-byte chunk, which will fit in a + * single cache line on all modern x86 implementations. */ -.section .rodata,"a" +.section .init.rodata,"a" ENTRY(interrupt) .text - + .p2align 5 + .p2align CONFIG_X86_L1_CACHE_SHIFT ENTRY(irq_entries_start) RING0_INT_FRAME -vector=0 -.rept NR_VECTORS - ALIGN - .if vector +vector=FIRST_EXTERNAL_VECTOR +.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7 + .balign 32 + .rept 7 + .if vector < NR_VECTORS + .if vector <> FIRST_EXTERNAL_VECTOR CFI_ADJUST_CFA_OFFSET -4 - .endif -1: pushl $~(vector) + .endif +1: pushl $(~vector+0x80) /* Note: always in signed byte range */ CFI_ADJUST_CFA_OFFSET 4 - jmp common_interrupt - .previous + .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6 + jmp 2f + .endif + .previous .long 1b - .text + .text vector=vector+1 + .endif + .endr +2: jmp common_interrupt .endr END(irq_entries_start) @@ -652,8 +661,9 @@ END(interrupt) * the CPU automatically disables interrupts when executing an IRQ vector, * so IRQ-flags tracing has to follow that: */ - ALIGN + .p2align CONFIG_X86_L1_CACHE_SHIFT common_interrupt: + addl $-0x80,(%esp) /* Adjust vector into the [-256,-1] range */ SAVE_ALL TRACE_IRQS_OFF movl %esp,%eax @@ -678,65 +688,6 @@ ENDPROC(name) /* The include is where all of the SMP etc. interrupts come from */ #include "entry_arch.h" -KPROBE_ENTRY(page_fault) - RING0_EC_FRAME - pushl $do_page_fault - CFI_ADJUST_CFA_OFFSET 4 - ALIGN -error_code: - /* the function address is in %fs's slot on the stack */ - pushl %es - CFI_ADJUST_CFA_OFFSET 4 - /*CFI_REL_OFFSET es, 0*/ - pushl %ds - CFI_ADJUST_CFA_OFFSET 4 - /*CFI_REL_OFFSET ds, 0*/ - pushl %eax - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET eax, 0 - pushl %ebp - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET ebp, 0 - pushl %edi - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET edi, 0 - pushl %esi - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET esi, 0 - pushl %edx - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET edx, 0 - pushl %ecx - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET ecx, 0 - pushl %ebx - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET ebx, 0 - cld - pushl %fs - CFI_ADJUST_CFA_OFFSET 4 - /*CFI_REL_OFFSET fs, 0*/ - movl $(__KERNEL_PERCPU), %ecx - movl %ecx, %fs - UNWIND_ESPFIX_STACK - popl %ecx - CFI_ADJUST_CFA_OFFSET -4 - /*CFI_REGISTER es, ecx*/ - movl PT_FS(%esp), %edi # get the function address - movl PT_ORIG_EAX(%esp), %edx # get the error code - movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart - mov %ecx, PT_FS(%esp) - /*CFI_REL_OFFSET fs, ES*/ - movl $(__USER_DS), %ecx - movl %ecx, %ds - movl %ecx, %es - TRACE_IRQS_OFF - movl %esp,%eax # pt_regs pointer - call *%edi - jmp ret_from_exception - CFI_ENDPROC -KPROBE_END(page_fault) - ENTRY(coprocessor_error) RING0_INT_FRAME pushl $0 @@ -767,140 +718,6 @@ ENTRY(device_not_available) CFI_ENDPROC END(device_not_available) -/* - * Debug traps and NMI can happen at the one SYSENTER instruction - * that sets up the real kernel stack. Check here, since we can't - * allow the wrong stack to be used. - * - * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have - * already pushed 3 words if it hits on the sysenter instruction: - * eflags, cs and eip. - * - * We just load the right stack, and push the three (known) values - * by hand onto the new stack - while updating the return eip past - * the instruction that would have done it for sysenter. - */ -#define FIX_STACK(offset, ok, label) \ - cmpw $__KERNEL_CS,4(%esp); \ - jne ok; \ -label: \ - movl TSS_sysenter_sp0+offset(%esp),%esp; \ - CFI_DEF_CFA esp, 0; \ - CFI_UNDEFINED eip; \ - pushfl; \ - CFI_ADJUST_CFA_OFFSET 4; \ - pushl $__KERNEL_CS; \ - CFI_ADJUST_CFA_OFFSET 4; \ - pushl $sysenter_past_esp; \ - CFI_ADJUST_CFA_OFFSET 4; \ - CFI_REL_OFFSET eip, 0 - -KPROBE_ENTRY(debug) - RING0_INT_FRAME - cmpl $ia32_sysenter_target,(%esp) - jne debug_stack_correct - FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) -debug_stack_correct: - pushl $-1 # mark this as an int - CFI_ADJUST_CFA_OFFSET 4 - SAVE_ALL - TRACE_IRQS_OFF - xorl %edx,%edx # error code 0 - movl %esp,%eax # pt_regs pointer - call do_debug - jmp ret_from_exception - CFI_ENDPROC -KPROBE_END(debug) - -/* - * NMI is doubly nasty. It can happen _while_ we're handling - * a debug fault, and the debug fault hasn't yet been able to - * clear up the stack. So we first check whether we got an - * NMI on the sysenter entry path, but after that we need to - * check whether we got an NMI on the debug path where the debug - * fault happened on the sysenter path. - */ -KPROBE_ENTRY(nmi) - RING0_INT_FRAME - pushl %eax - CFI_ADJUST_CFA_OFFSET 4 - movl %ss, %eax - cmpw $__ESPFIX_SS, %ax - popl %eax - CFI_ADJUST_CFA_OFFSET -4 - je nmi_espfix_stack - cmpl $ia32_sysenter_target,(%esp) - je nmi_stack_fixup - pushl %eax - CFI_ADJUST_CFA_OFFSET 4 - movl %esp,%eax - /* Do not access memory above the end of our stack page, - * it might not exist. - */ - andl $(THREAD_SIZE-1),%eax - cmpl $(THREAD_SIZE-20),%eax - popl %eax - CFI_ADJUST_CFA_OFFSET -4 - jae nmi_stack_correct - cmpl $ia32_sysenter_target,12(%esp) - je nmi_debug_stack_check -nmi_stack_correct: - /* We have a RING0_INT_FRAME here */ - pushl %eax - CFI_ADJUST_CFA_OFFSET 4 - SAVE_ALL - TRACE_IRQS_OFF - xorl %edx,%edx # zero error code - movl %esp,%eax # pt_regs pointer - call do_nmi - jmp restore_nocheck_notrace - CFI_ENDPROC - -nmi_stack_fixup: - RING0_INT_FRAME - FIX_STACK(12,nmi_stack_correct, 1) - jmp nmi_stack_correct - -nmi_debug_stack_check: - /* We have a RING0_INT_FRAME here */ - cmpw $__KERNEL_CS,16(%esp) - jne nmi_stack_correct - cmpl $debug,(%esp) - jb nmi_stack_correct - cmpl $debug_esp_fix_insn,(%esp) - ja nmi_stack_correct - FIX_STACK(24,nmi_stack_correct, 1) - jmp nmi_stack_correct - -nmi_espfix_stack: - /* We have a RING0_INT_FRAME here. - * - * create the pointer to lss back - */ - pushl %ss - CFI_ADJUST_CFA_OFFSET 4 - pushl %esp - CFI_ADJUST_CFA_OFFSET 4 - addw $4, (%esp) - /* copy the iret frame of 12 bytes */ - .rept 3 - pushl 16(%esp) - CFI_ADJUST_CFA_OFFSET 4 - .endr - pushl %eax - CFI_ADJUST_CFA_OFFSET 4 - SAVE_ALL - TRACE_IRQS_OFF - FIXUP_ESPFIX_STACK # %eax == %esp - xorl %edx,%edx # zero error code - call do_nmi - RESTORE_REGS - lss 12+4(%esp), %esp # back to espfix stack - CFI_ADJUST_CFA_OFFSET -24 - jmp irq_return - CFI_ENDPROC -KPROBE_END(nmi) - #ifdef CONFIG_PARAVIRT ENTRY(native_iret) iret @@ -916,19 +733,6 @@ ENTRY(native_irq_enable_sysexit) END(native_irq_enable_sysexit) #endif -KPROBE_ENTRY(int3) - RING0_INT_FRAME - pushl $-1 # mark this as an int - CFI_ADJUST_CFA_OFFSET 4 - SAVE_ALL - TRACE_IRQS_OFF - xorl %edx,%edx # zero error code - movl %esp,%eax # pt_regs pointer - call do_int3 - jmp ret_from_exception - CFI_ENDPROC -KPROBE_END(int3) - ENTRY(overflow) RING0_INT_FRAME pushl $0 @@ -993,14 +797,6 @@ ENTRY(stack_segment) CFI_ENDPROC END(stack_segment) -KPROBE_ENTRY(general_protection) - RING0_EC_FRAME - pushl $do_general_protection - CFI_ADJUST_CFA_OFFSET 4 - jmp error_code - CFI_ENDPROC -KPROBE_END(general_protection) - ENTRY(alignment_check) RING0_EC_FRAME pushl $do_alignment_check @@ -1211,3 +1007,227 @@ END(mcount) #include "syscall_table_32.S" syscall_table_size=(.-sys_call_table) + +/* + * Some functions should be protected against kprobes + */ + .pushsection .kprobes.text, "ax" + +ENTRY(page_fault) + RING0_EC_FRAME + pushl $do_page_fault + CFI_ADJUST_CFA_OFFSET 4 + ALIGN +error_code: + /* the function address is in %fs's slot on the stack */ + pushl %es + CFI_ADJUST_CFA_OFFSET 4 + /*CFI_REL_OFFSET es, 0*/ + pushl %ds + CFI_ADJUST_CFA_OFFSET 4 + /*CFI_REL_OFFSET ds, 0*/ + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET eax, 0 + pushl %ebp + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ebp, 0 + pushl %edi + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET edi, 0 + pushl %esi + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET esi, 0 + pushl %edx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET edx, 0 + pushl %ecx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ecx, 0 + pushl %ebx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ebx, 0 + cld + pushl %fs + CFI_ADJUST_CFA_OFFSET 4 + /*CFI_REL_OFFSET fs, 0*/ + movl $(__KERNEL_PERCPU), %ecx + movl %ecx, %fs + UNWIND_ESPFIX_STACK + popl %ecx + CFI_ADJUST_CFA_OFFSET -4 + /*CFI_REGISTER es, ecx*/ + movl PT_FS(%esp), %edi # get the function address + movl PT_ORIG_EAX(%esp), %edx # get the error code + movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart + mov %ecx, PT_FS(%esp) + /*CFI_REL_OFFSET fs, ES*/ + movl $(__USER_DS), %ecx + movl %ecx, %ds + movl %ecx, %es + TRACE_IRQS_OFF + movl %esp,%eax # pt_regs pointer + call *%edi + jmp ret_from_exception + CFI_ENDPROC +END(page_fault) + +/* + * Debug traps and NMI can happen at the one SYSENTER instruction + * that sets up the real kernel stack. Check here, since we can't + * allow the wrong stack to be used. + * + * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have + * already pushed 3 words if it hits on the sysenter instruction: + * eflags, cs and eip. + * + * We just load the right stack, and push the three (known) values + * by hand onto the new stack - while updating the return eip past + * the instruction that would have done it for sysenter. + */ +#define FIX_STACK(offset, ok, label) \ + cmpw $__KERNEL_CS,4(%esp); \ + jne ok; \ +label: \ + movl TSS_sysenter_sp0+offset(%esp),%esp; \ + CFI_DEF_CFA esp, 0; \ + CFI_UNDEFINED eip; \ + pushfl; \ + CFI_ADJUST_CFA_OFFSET 4; \ + pushl $__KERNEL_CS; \ + CFI_ADJUST_CFA_OFFSET 4; \ + pushl $sysenter_past_esp; \ + CFI_ADJUST_CFA_OFFSET 4; \ + CFI_REL_OFFSET eip, 0 + +ENTRY(debug) + RING0_INT_FRAME + cmpl $ia32_sysenter_target,(%esp) + jne debug_stack_correct + FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) +debug_stack_correct: + pushl $-1 # mark this as an int + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + TRACE_IRQS_OFF + xorl %edx,%edx # error code 0 + movl %esp,%eax # pt_regs pointer + call do_debug + jmp ret_from_exception + CFI_ENDPROC +END(debug) + +/* + * NMI is doubly nasty. It can happen _while_ we're handling + * a debug fault, and the debug fault hasn't yet been able to + * clear up the stack. So we first check whether we got an + * NMI on the sysenter entry path, but after that we need to + * check whether we got an NMI on the debug path where the debug + * fault happened on the sysenter path. + */ +ENTRY(nmi) + RING0_INT_FRAME + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 + movl %ss, %eax + cmpw $__ESPFIX_SS, %ax + popl %eax + CFI_ADJUST_CFA_OFFSET -4 + je nmi_espfix_stack + cmpl $ia32_sysenter_target,(%esp) + je nmi_stack_fixup + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 + movl %esp,%eax + /* Do not access memory above the end of our stack page, + * it might not exist. + */ + andl $(THREAD_SIZE-1),%eax + cmpl $(THREAD_SIZE-20),%eax + popl %eax + CFI_ADJUST_CFA_OFFSET -4 + jae nmi_stack_correct + cmpl $ia32_sysenter_target,12(%esp) + je nmi_debug_stack_check +nmi_stack_correct: + /* We have a RING0_INT_FRAME here */ + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + TRACE_IRQS_OFF + xorl %edx,%edx # zero error code + movl %esp,%eax # pt_regs pointer + call do_nmi + jmp restore_nocheck_notrace + CFI_ENDPROC + +nmi_stack_fixup: + RING0_INT_FRAME + FIX_STACK(12,nmi_stack_correct, 1) + jmp nmi_stack_correct + +nmi_debug_stack_check: + /* We have a RING0_INT_FRAME here */ + cmpw $__KERNEL_CS,16(%esp) + jne nmi_stack_correct + cmpl $debug,(%esp) + jb nmi_stack_correct + cmpl $debug_esp_fix_insn,(%esp) + ja nmi_stack_correct + FIX_STACK(24,nmi_stack_correct, 1) + jmp nmi_stack_correct + +nmi_espfix_stack: + /* We have a RING0_INT_FRAME here. + * + * create the pointer to lss back + */ + pushl %ss + CFI_ADJUST_CFA_OFFSET 4 + pushl %esp + CFI_ADJUST_CFA_OFFSET 4 + addw $4, (%esp) + /* copy the iret frame of 12 bytes */ + .rept 3 + pushl 16(%esp) + CFI_ADJUST_CFA_OFFSET 4 + .endr + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + TRACE_IRQS_OFF + FIXUP_ESPFIX_STACK # %eax == %esp + xorl %edx,%edx # zero error code + call do_nmi + RESTORE_REGS + lss 12+4(%esp), %esp # back to espfix stack + CFI_ADJUST_CFA_OFFSET -24 + jmp irq_return + CFI_ENDPROC +END(nmi) + +ENTRY(int3) + RING0_INT_FRAME + pushl $-1 # mark this as an int + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + TRACE_IRQS_OFF + xorl %edx,%edx # zero error code + movl %esp,%eax # pt_regs pointer + call do_int3 + jmp ret_from_exception + CFI_ENDPROC +END(int3) + +ENTRY(general_protection) + RING0_EC_FRAME + pushl $do_general_protection + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC +END(general_protection) + +/* + * End of kprobes section + */ + .popsection diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 42571baaca3..3194636a429 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -60,7 +60,6 @@ #define __AUDIT_ARCH_LE 0x40000000 .code64 - #ifdef CONFIG_FUNCTION_TRACER #ifdef CONFIG_DYNAMIC_FTRACE ENTRY(mcount) @@ -169,21 +168,21 @@ ENTRY(native_usergs_sysret64) */ /* %rsp:at FRAMEEND */ - .macro FIXUP_TOP_OF_STACK tmp - movq %gs:pda_oldrsp,\tmp - movq \tmp,RSP(%rsp) - movq $__USER_DS,SS(%rsp) - movq $__USER_CS,CS(%rsp) - movq $-1,RCX(%rsp) - movq R11(%rsp),\tmp /* get eflags */ - movq \tmp,EFLAGS(%rsp) + .macro FIXUP_TOP_OF_STACK tmp offset=0 + movq %gs:pda_oldrsp,\tmp + movq \tmp,RSP+\offset(%rsp) + movq $__USER_DS,SS+\offset(%rsp) + movq $__USER_CS,CS+\offset(%rsp) + movq $-1,RCX+\offset(%rsp) + movq R11+\offset(%rsp),\tmp /* get eflags */ + movq \tmp,EFLAGS+\offset(%rsp) .endm - .macro RESTORE_TOP_OF_STACK tmp,offset=0 - movq RSP-\offset(%rsp),\tmp - movq \tmp,%gs:pda_oldrsp - movq EFLAGS-\offset(%rsp),\tmp - movq \tmp,R11-\offset(%rsp) + .macro RESTORE_TOP_OF_STACK tmp offset=0 + movq RSP+\offset(%rsp),\tmp + movq \tmp,%gs:pda_oldrsp + movq EFLAGS+\offset(%rsp),\tmp + movq \tmp,R11+\offset(%rsp) .endm .macro FAKE_STACK_FRAME child_rip @@ -195,7 +194,7 @@ ENTRY(native_usergs_sysret64) pushq %rax /* rsp */ CFI_ADJUST_CFA_OFFSET 8 CFI_REL_OFFSET rsp,0 - pushq $(1<<9) /* eflags - interrupts on */ + pushq $X86_EFLAGS_IF /* eflags - interrupts on */ CFI_ADJUST_CFA_OFFSET 8 /*CFI_REL_OFFSET rflags,0*/ pushq $__KERNEL_CS /* cs */ @@ -213,64 +212,184 @@ ENTRY(native_usergs_sysret64) CFI_ADJUST_CFA_OFFSET -(6*8) .endm - .macro CFI_DEFAULT_STACK start=1 +/* + * initial frame state for interrupts (and exceptions without error code) + */ + .macro EMPTY_FRAME start=1 offset=0 .if \start - CFI_STARTPROC simple + CFI_STARTPROC simple CFI_SIGNAL_FRAME - CFI_DEF_CFA rsp,SS+8 + CFI_DEF_CFA rsp,8+\offset .else - CFI_DEF_CFA_OFFSET SS+8 + CFI_DEF_CFA_OFFSET 8+\offset .endif - CFI_REL_OFFSET r15,R15 - CFI_REL_OFFSET r14,R14 - CFI_REL_OFFSET r13,R13 - CFI_REL_OFFSET r12,R12 - CFI_REL_OFFSET rbp,RBP - CFI_REL_OFFSET rbx,RBX - CFI_REL_OFFSET r11,R11 - CFI_REL_OFFSET r10,R10 - CFI_REL_OFFSET r9,R9 - CFI_REL_OFFSET r8,R8 - CFI_REL_OFFSET rax,RAX - CFI_REL_OFFSET rcx,RCX - CFI_REL_OFFSET rdx,RDX - CFI_REL_OFFSET rsi,RSI - CFI_REL_OFFSET rdi,RDI - CFI_REL_OFFSET rip,RIP - /*CFI_REL_OFFSET cs,CS*/ - /*CFI_REL_OFFSET rflags,EFLAGS*/ - CFI_REL_OFFSET rsp,RSP - /*CFI_REL_OFFSET ss,SS*/ .endm + +/* + * initial frame state for interrupts (and exceptions without error code) + */ + .macro INTR_FRAME start=1 offset=0 + EMPTY_FRAME \start, SS+8+\offset-RIP + /*CFI_REL_OFFSET ss, SS+\offset-RIP*/ + CFI_REL_OFFSET rsp, RSP+\offset-RIP + /*CFI_REL_OFFSET rflags, EFLAGS+\offset-RIP*/ + /*CFI_REL_OFFSET cs, CS+\offset-RIP*/ + CFI_REL_OFFSET rip, RIP+\offset-RIP + .endm + +/* + * initial frame state for exceptions with error code (and interrupts + * with vector already pushed) + */ + .macro XCPT_FRAME start=1 offset=0 + INTR_FRAME \start, RIP+\offset-ORIG_RAX + /*CFI_REL_OFFSET orig_rax, ORIG_RAX-ORIG_RAX*/ + .endm + +/* + * frame that enables calling into C. + */ + .macro PARTIAL_FRAME start=1 offset=0 + XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET + CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET + CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET + CFI_REL_OFFSET rdx, RDX+\offset-ARGOFFSET + CFI_REL_OFFSET rcx, RCX+\offset-ARGOFFSET + CFI_REL_OFFSET rax, RAX+\offset-ARGOFFSET + CFI_REL_OFFSET r8, R8+\offset-ARGOFFSET + CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET + CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET + CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET + .endm + +/* + * frame that enables passing a complete pt_regs to a C function. + */ + .macro DEFAULT_FRAME start=1 offset=0 + PARTIAL_FRAME \start, R11+\offset-R15 + CFI_REL_OFFSET rbx, RBX+\offset + CFI_REL_OFFSET rbp, RBP+\offset + CFI_REL_OFFSET r12, R12+\offset + CFI_REL_OFFSET r13, R13+\offset + CFI_REL_OFFSET r14, R14+\offset + CFI_REL_OFFSET r15, R15+\offset + .endm + +/* save partial stack frame */ +ENTRY(save_args) + XCPT_FRAME + cld + movq_cfi rdi, RDI+16-ARGOFFSET + movq_cfi rsi, RSI+16-ARGOFFSET + movq_cfi rdx, RDX+16-ARGOFFSET + movq_cfi rcx, RCX+16-ARGOFFSET + movq_cfi rax, RAX+16-ARGOFFSET + movq_cfi r8, R8+16-ARGOFFSET + movq_cfi r9, R9+16-ARGOFFSET + movq_cfi r10, R10+16-ARGOFFSET + movq_cfi r11, R11+16-ARGOFFSET + + leaq -ARGOFFSET+16(%rsp),%rdi /* arg1 for handler */ + movq_cfi rbp, 8 /* push %rbp */ + leaq 8(%rsp), %rbp /* mov %rsp, %ebp */ + testl $3, CS(%rdi) + je 1f + SWAPGS + /* + * irqcount is used to check if a CPU is already on an interrupt stack + * or not. While this is essentially redundant with preempt_count it is + * a little cheaper to use a separate counter in the PDA (short of + * moving irq_enter into assembly, which would be too much work) + */ +1: incl %gs:pda_irqcount + jne 2f + popq_cfi %rax /* move return address... */ + mov %gs:pda_irqstackptr,%rsp + EMPTY_FRAME 0 + pushq_cfi %rax /* ... to the new stack */ + /* + * We entered an interrupt context - irqs are off: + */ +2: TRACE_IRQS_OFF + ret + CFI_ENDPROC +END(save_args) + |