diff options
Diffstat (limited to 'arch/tile/kernel/intvec_32.S')
| -rw-r--r-- | arch/tile/kernel/intvec_32.S | 484 |
1 files changed, 193 insertions, 291 deletions
diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S index f5821626247..cdbda45a4e4 100644 --- a/arch/tile/kernel/intvec_32.S +++ b/arch/tile/kernel/intvec_32.S @@ -21,31 +21,17 @@ #include <asm/ptrace.h> #include <asm/thread_info.h> #include <asm/irqflags.h> -#include <asm/atomic.h> +#include <asm/atomic_32.h> #include <asm/asm-offsets.h> #include <hv/hypervisor.h> #include <arch/abi.h> #include <arch/interrupts.h> #include <arch/spr_def.h> -#ifdef CONFIG_PREEMPT -# error "No support for kernel preemption currently" -#endif - -#if INT_INTCTRL_K < 32 || INT_INTCTRL_K >= 48 -# error INT_INTCTRL_K coded to set high interrupt mask -#endif - #define PTREGS_PTR(reg, ptreg) addli reg, sp, C_ABI_SAVE_AREA_SIZE + (ptreg) #define PTREGS_OFFSET_SYSCALL PTREGS_OFFSET_REG(TREG_SYSCALL_NR) -#if !CHIP_HAS_WH64() - /* By making this an empty macro, we can use wh64 in the code. */ - .macro wh64 reg - .endm -#endif - .macro push_reg reg, ptr=sp, delta=-4 { sw \ptr, \reg @@ -193,7 +179,7 @@ intvec_\vecname: * point sp at the top aligned address on the actual stack page. */ mfspr r0, SPR_SYSTEM_SAVE_K_0 - mm r0, r0, zero, LOG2_THREAD_SIZE, 31 + mm r0, r0, zero, LOG2_NR_CPU_IDS, 31 0: /* @@ -211,6 +197,9 @@ intvec_\vecname: * cache line 1: r14...r29 * cache line 0: 2 x frame, r0..r13 */ +#if STACK_TOP_DELTA != 64 +#error STACK_TOP_DELTA must be 64 for assumptions here and in task_pt_regs() +#endif andi r0, r0, -64 /* @@ -324,24 +313,20 @@ intvec_\vecname: movei r3, 0 } .else - .ifc \c_routine, op_handle_perf_interrupt + .ifc \c_routine, handle_perf_interrupt { mfspr r2, PERF_COUNT_STS movei r3, -1 /* not used, but set for consistency */ } .else -#if CHIP_HAS_AUX_PERF_COUNTERS() - .ifc \c_routine, op_handle_aux_perf_interrupt + .ifc \c_routine, handle_perf_interrupt { mfspr r2, AUX_PERF_COUNT_STS movei r3, -1 /* not used, but set for consistency */ } .else -#endif movei r3, 0 -#if CHIP_HAS_AUX_PERF_COUNTERS() .endif -#endif .endif .endif .endif @@ -358,7 +343,7 @@ intvec_\vecname: #ifdef __COLLECT_LINKER_FEEDBACK__ .pushsection .text.intvec_feedback,"ax" .org (\vecnum << 5) - FEEDBACK_ENTER_EXPLICIT(intvec_\vecname, .intrpt1, 1 << 8) + FEEDBACK_ENTER_EXPLICIT(intvec_\vecname, .intrpt, 1 << 8) jrp lr .popsection #endif @@ -472,7 +457,7 @@ intvec_\vecname: } { auli r21, r21, ha16(__per_cpu_offset) - mm r20, r20, zero, 0, LOG2_THREAD_SIZE-1 + mm r20, r20, zero, 0, LOG2_NR_CPU_IDS-1 } s2a r20, r20, r21 lw tp, r20 @@ -566,7 +551,6 @@ intvec_\vecname: .endif mtspr INTERRUPT_CRITICAL_SECTION, zero -#if CHIP_HAS_WH64() /* * Prepare the first 256 stack bytes to be rapidly accessible * without having to fetch the background data. We don't really @@ -587,7 +571,6 @@ intvec_\vecname: addi r52, r52, -64 } wh64 r52 -#endif #ifdef CONFIG_TRACE_IRQFLAGS .ifnc \function,handle_nmi @@ -766,7 +749,7 @@ intvec_\vecname: .macro dc_dispatch vecnum, vecname .org (\vecnum << 8) intvec_\vecname: - j hv_downcall_dispatch + j _hv_downcall_dispatch ENDPROC(intvec_\vecname) .endm @@ -803,6 +786,10 @@ handle_interrupt: * This routine takes a boolean in r30 indicating if this is an NMI. * If so, we also expect a boolean in r31 indicating whether to * re-enable the oprofile interrupts. + * + * Note that .Lresume_userspace is jumped to directly in several + * places, and we need to make sure r30 is set correctly in those + * callers as well. */ STD_ENTRY(interrupt_return) /* If we're resuming to kernel space, don't check thread flags. */ @@ -812,17 +799,37 @@ STD_ENTRY(interrupt_return) } lw r29, r29 andi r29, r29, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */ + bzt r29, .Lresume_userspace + +#ifdef CONFIG_PREEMPT + /* Returning to kernel space. Check if we need preemption. */ + GET_THREAD_INFO(r29) + addli r28, r29, THREAD_INFO_FLAGS_OFFSET { - bzt r29, .Lresume_userspace - PTREGS_PTR(r29, PTREGS_OFFSET_PC) + lw r28, r28 + addli r29, r29, THREAD_INFO_PREEMPT_COUNT_OFFSET + } + { + andi r28, r28, _TIF_NEED_RESCHED + lw r29, r29 } + bzt r28, 1f + bnz r29, 1f + /* Disable interrupts explicitly for preemption. */ + IRQ_DISABLE(r20,r21) + TRACE_IRQS_OFF + jal preempt_schedule_irq + FEEDBACK_REENTER(interrupt_return) +1: +#endif /* If we're resuming to _cpu_idle_nap, bump PC forward by 8. */ { - lw r28, r29 + PTREGS_PTR(r29, PTREGS_OFFSET_PC) moveli r27, lo16(_cpu_idle_nap) } { + lw r28, r29 auli r27, r27, ha16(_cpu_idle_nap) } { @@ -839,6 +846,18 @@ STD_ENTRY(interrupt_return) FEEDBACK_REENTER(interrupt_return) /* + * Use r33 to hold whether we have already loaded the callee-saves + * into ptregs. We don't want to do it twice in this loop, since + * then we'd clobber whatever changes are made by ptrace, etc. + * Get base of stack in r32. + */ + { + GET_THREAD_INFO(r32) + movei r33, 0 + } + +.Lretry_work_pending: + /* * Disable interrupts so as to make sure we don't * miss an interrupt that sets any of the thread flags (like * need_resched or sigpending) between sampling and the iret. @@ -848,21 +867,33 @@ STD_ENTRY(interrupt_return) IRQ_DISABLE(r20, r21) TRACE_IRQS_OFF /* Note: clobbers registers r0-r29 */ - /* Get base of stack in r32; note r30/31 are used as arguments here. */ - GET_THREAD_INFO(r32) - /* Check to see if there is any work to do before returning to user. */ { addi r29, r32, THREAD_INFO_FLAGS_OFFSET - moveli r28, lo16(_TIF_ALLWORK_MASK) + moveli r1, lo16(_TIF_ALLWORK_MASK) } { lw r29, r29 - auli r28, r28, ha16(_TIF_ALLWORK_MASK) + auli r1, r1, ha16(_TIF_ALLWORK_MASK) } - and r28, r29, r28 - bnz r28, .Lwork_pending + and r1, r29, r1 + bzt r1, .Lrestore_all + + /* + * Make sure we have all the registers saved for signal + * handling, notify-resume, or single-step. Call out to C + * code to figure out exactly what we need to do for each flag bit, + * then if necessary, reload the flags and recheck. + */ + { + PTREGS_PTR(r0, PTREGS_OFFSET_BASE) + bnz r33, 1f + } + push_extra_callee_saves r0 + movei r33, 1 +1: jal do_work_pending + bnz r0, .Lretry_work_pending /* * In the NMI case we @@ -915,6 +946,13 @@ STD_ENTRY(interrupt_return) bzt r30, .Lrestore_regs 3: + /* We are relying on INT_PERF_COUNT at 33, and AUX_PERF_COUNT at 48 */ + { + moveli r0, lo16(1 << (INT_PERF_COUNT - 32)) + bz r31, .Lrestore_regs + } + auli r0, r0, ha16(1 << (INT_AUX_PERF_COUNT - 32)) + mtspr SPR_INTERRUPT_MASK_RESET_K_1, r0 /* * We now commit to returning from this interrupt, since we will be @@ -1103,142 +1141,9 @@ STD_ENTRY(interrupt_return) pop_reg r50 pop_reg r51, sp, PTREGS_OFFSET_REG(29) - PTREGS_OFFSET_REG(51) j .Lcontinue_restore_regs - -.Lwork_pending: - /* Mask the reschedule flag */ - andi r28, r29, _TIF_NEED_RESCHED - - { - /* - * If the NEED_RESCHED flag is called, we call schedule(), which - * may drop this context right here and go do something else. - * On return, jump back to .Lresume_userspace and recheck. - */ - bz r28, .Lasync_tlb - - /* Mask the async-tlb flag */ - andi r28, r29, _TIF_ASYNC_TLB - } - - jal schedule - FEEDBACK_REENTER(interrupt_return) - - /* Reload the flags and check again */ - j .Lresume_userspace - -.Lasync_tlb: - { - bz r28, .Lneed_sigpending - - /* Mask the sigpending flag */ - andi r28, r29, _TIF_SIGPENDING - } - - PTREGS_PTR(r0, PTREGS_OFFSET_BASE) - jal do_async_page_fault - FEEDBACK_REENTER(interrupt_return) - - /* - * Go restart the "resume userspace" process. We may have - * fired a signal, and we need to disable interrupts again. - */ - j .Lresume_userspace - -.Lneed_sigpending: - /* - * At this point we are either doing signal handling or single-step, - * so either way make sure we have all the registers saved. - */ - push_extra_callee_saves r0 - - { - /* If no signal pending, skip to singlestep check */ - bz r28, .Lneed_singlestep - - /* Mask the singlestep flag */ - andi r28, r29, _TIF_SINGLESTEP - } - - jal do_signal - FEEDBACK_REENTER(interrupt_return) - - /* Reload the flags and check again */ - j .Lresume_userspace - -.Lneed_singlestep: - { - /* Get a pointer to the EX1 field */ - PTREGS_PTR(r29, PTREGS_OFFSET_EX1) - - /* If we get here, our bit must be set. */ - bz r28, .Lwork_confusion - } - /* If we are in priv mode, don't single step */ - lw r28, r29 - andi r28, r28, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */ - bnz r28, .Lrestore_all - - /* Allow interrupts within the single step code */ - TRACE_IRQS_ON /* Note: clobbers registers r0-r29 */ - IRQ_ENABLE(r20, r21) - - /* try to single-step the current instruction */ - PTREGS_PTR(r0, PTREGS_OFFSET_BASE) - jal single_step_once - FEEDBACK_REENTER(interrupt_return) - - /* Re-disable interrupts. TRACE_IRQS_OFF in .Lrestore_all. */ - IRQ_DISABLE(r20,r21) - - j .Lrestore_all - -.Lwork_confusion: - move r0, r28 - panic "thread_info allwork flags unhandled on userspace resume: %#x" - STD_ENDPROC(interrupt_return) /* - * This interrupt variant clears the INT_INTCTRL_K interrupt mask bit - * before returning, so we can properly get more downcalls. - */ - .pushsection .text.handle_interrupt_downcall,"ax" -handle_interrupt_downcall: - finish_interrupt_save handle_interrupt_downcall - check_single_stepping normal, .Ldispatch_downcall -.Ldispatch_downcall: - - /* Clear INTCTRL_K from the set of interrupts we ever enable. */ - GET_INTERRUPTS_ENABLED_MASK_PTR(r30) - { - addi r30, r30, 4 - movei r31, INT_MASK(INT_INTCTRL_K) - } - { - lw r20, r30 - nor r21, r31, zero - } - and r20, r20, r21 - sw r30, r20 - - { - jalr r0 - PTREGS_PTR(r0, PTREGS_OFFSET_BASE) - } - FEEDBACK_REENTER(handle_interrupt_downcall) - - /* Allow INTCTRL_K to be enabled next time we enable interrupts. */ - lw r20, r30 - or r20, r20, r31 - sw r30, r20 - - { - movei r30, 0 /* not an NMI */ - j interrupt_return - } - STD_ENDPROC(handle_interrupt_downcall) - - /* * Some interrupts don't check for single stepping */ .pushsection .text.handle_interrupt_no_single_step,"ax" @@ -1273,6 +1178,10 @@ handle_nmi: PTREGS_PTR(r0, PTREGS_OFFSET_BASE) } FEEDBACK_REENTER(handle_nmi) + { + movei r30, 1 + seq r31, r0, zero + } j interrupt_return STD_ENDPROC(handle_nmi) @@ -1300,15 +1209,20 @@ handle_syscall: add r20, r20, tp lw r21, r20 addi r21, r21, 1 - sw r20, r21 + { + sw r20, r21 + GET_THREAD_INFO(r31) + } /* Trace syscalls, if requested. */ - GET_THREAD_INFO(r31) addi r31, r31, THREAD_INFO_FLAGS_OFFSET lw r30, r31 andi r30, r30, _TIF_SYSCALL_TRACE bzt r30, .Lrestore_syscall_regs - jal do_syscall_trace + { + PTREGS_PTR(r0, PTREGS_OFFSET_BASE) + jal do_syscall_trace_enter + } FEEDBACK_REENTER(handle_syscall) /* @@ -1342,8 +1256,8 @@ handle_syscall: lw r20, r20 /* Jump to syscall handler. */ - jalr r20; .Lhandle_syscall_link: - FEEDBACK_REENTER(handle_syscall) + jalr r20 +.Lhandle_syscall_link: /* value of "lr" after "jalr r20" above */ /* * Write our r0 onto the stack so it gets restored instead @@ -1352,13 +1266,22 @@ handle_syscall: PTREGS_PTR(r29, PTREGS_OFFSET_REG(0)) sw r29, r0 +.Lsyscall_sigreturn_skip: + FEEDBACK_REENTER(handle_syscall) + /* Do syscall trace again, if requested. */ lw r30, r31 andi r30, r30, _TIF_SYSCALL_TRACE bzt r30, 1f - jal do_syscall_trace + { + PTREGS_PTR(r0, PTREGS_OFFSET_BASE) + jal do_syscall_trace_exit + } FEEDBACK_REENTER(handle_syscall) -1: j .Lresume_userspace /* jump into middle of interrupt_return */ +1: { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } .Linvalid_syscall: /* Report an invalid syscall back to the user program */ @@ -1367,7 +1290,10 @@ handle_syscall: movei r28, -ENOSYS } sw r29, r28 - j .Lresume_userspace /* jump into middle of interrupt_return */ + { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } STD_ENDPROC(handle_syscall) /* Return the address for oprofile to suppress in backtraces. */ @@ -1383,9 +1309,27 @@ STD_ENTRY(ret_from_fork) jal sim_notify_fork jal schedule_tail FEEDBACK_REENTER(ret_from_fork) - j .Lresume_userspace /* jump into middle of interrupt_return */ + { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } STD_ENDPROC(ret_from_fork) +STD_ENTRY(ret_from_kernel_thread) + jal sim_notify_fork + jal schedule_tail + FEEDBACK_REENTER(ret_from_fork) + { + move r0, r31 + jalr r30 + } + FEEDBACK_REENTER(ret_from_kernel_thread) + { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } + STD_ENDPROC(ret_from_kernel_thread) + /* * Code for ill interrupt. */ @@ -1470,7 +1414,10 @@ handle_ill: 3: /* set PC and continue */ lw r26, r24 - sw r28, r26 + { + sw r28, r26 + GET_THREAD_INFO(r0) + } /* * Clear TIF_SINGLESTEP to prevent recursion if we execute an ill. @@ -1478,7 +1425,6 @@ handle_ill: * need to clear it here and can't really impose on all other arches. * So what's another write between friends? */ - GET_THREAD_INFO(r0) addi r1, r0, THREAD_INFO_FLAGS_OFFSET { @@ -1492,12 +1438,14 @@ handle_ill: { lw r0, r0 /* indirect thru thread_info to get task_info*/ addi r1, sp, C_ABI_SAVE_AREA_SIZE /* put ptregs pointer into r1 */ - move r2, zero /* load error code into r2 */ } jal send_sigtrap /* issue a SIGTRAP */ FEEDBACK_REENTER(handle_ill) - j .Lresume_userspace /* jump into middle of interrupt_return */ + { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } .Ldispatch_normal_ill: { @@ -1527,21 +1475,24 @@ STD_ENTRY_LOCAL(bad_intr) panic "Unhandled interrupt %#x: PC %#lx" STD_ENDPROC(bad_intr) -/* Put address of pt_regs in reg and jump. */ -#define PTREGS_SYSCALL(x, reg) \ +/* + * Special-case sigreturn to not write r0 to the stack on return. + * This is technically more efficient, but it also avoids difficulties + * in the 64-bit OS when handling 32-bit compat code, since we must not + * sign-extend r0 for the sigreturn return-value case. + */ +#define PTREGS_SYSCALL_SIGRETURN(x, reg) \ STD_ENTRY(_##x); \ + addli lr, lr, .Lsyscall_sigreturn_skip - .Lhandle_syscall_link; \ { \ PTREGS_PTR(reg, PTREGS_OFFSET_BASE); \ j x \ }; \ STD_ENDPROC(_##x) -PTREGS_SYSCALL(sys_execve, r3) -PTREGS_SYSCALL(sys_sigaltstack, r2) -PTREGS_SYSCALL(sys_rt_sigreturn, r0) -PTREGS_SYSCALL(sys_cmpxchg_badaddr, r1) +PTREGS_SYSCALL_SIGRETURN(sys_rt_sigreturn, r0) -/* Save additional callee-saves to pt_regs, put address in r4 and jump. */ +/* Save additional callee-saves to pt_regs and jump to standard function. */ STD_ENTRY(_sys_clone) push_extra_callee_saves r4 j sys_clone @@ -1576,13 +1527,17 @@ STD_ENTRY(_sys_clone) * We place it in the __HEAD section to ensure it is relatively * near to the intvec_SWINT_1 code (reachable by a conditional branch). * - * Must match register usage in do_page_fault(). + * Our use of ATOMIC_LOCK_REG here must match do_page_fault_ics(). + * + * As we do in lib/atomic_asm_32.S, we bypass a store if the value we + * would store is the same as the value we just loaded. */ __HEAD .align 64 /* Align much later jump on the start of a cache line. */ -#if !ATOMIC_LOCKS_FOUND_VIA_TABLE() - nop; nop + nop +#if PAGE_SIZE >= 0x10000 + nop #endif ENTRY(sys_cmpxchg) @@ -1610,67 +1565,13 @@ ENTRY(sys_cmpxchg) * about aliasing among multiple mappings of the same physical page, * and we ignore the low 3 bits so we have one lock that covers * both a cmpxchg64() and a cmpxchg() on either its low or high word. - * NOTE: this code must match __atomic_hashed_lock() in lib/atomic.c. + * NOTE: this must match __atomic_hashed_lock() in lib/atomic_32.c. */ -#if ATOMIC_LOCKS_FOUND_VIA_TABLE() - { - /* Check for unaligned input. */ - bnz sp, .Lcmpxchg_badaddr - mm r25, r0, zero, 3, PAGE_SHIFT-1 - } - { - crc32_32 r25, zero, r25 - moveli r21, lo16(atomic_lock_ptr) - } - { - auli r21, r21, ha16(atomic_lock_ptr) - auli r23, zero, hi16(PAGE_OFFSET) /* hugepage-aligned */ - } - { - shri r20, r25, 32 - ATOMIC_HASH_L1_SHIFT - slt_u r23, r0, r23 - - /* - * Ensure that the TLB is loaded before we take out the lock. - * On TILEPro, this will start fetching the value all the way - * into our L1 as well (and if it gets modified before we - * grab the lock, it will be invalidated from our cache - * before we reload it). On tile64, we'll start fetching it - * into our L1 if we're the home, and if we're not, we'll - * still at least start fetching it into the home's L2. - */ - lw r26, r0 - } - { - s2a r21, r20, r21 - bbns r23, .Lcmpxchg_badaddr - } - { - lw r21, r21 - seqi r23, TREG_SYSCALL_NR_NAME, __NR_FAST_cmpxchg64 - andi r25, r25, ATOMIC_HASH_L2_SIZE - 1 - } - { - /* Branch away at this point if we're doing a 64-bit cmpxchg. */ - bbs r23, .Lcmpxchg64 - andi r23, r0, 7 /* Precompute alignment for cmpxchg64. */ - } - - { - /* - * We very carefully align the code that actually runs with - * the lock held (nine bundles) so that we know it is all in - * the icache when we start. This instruction (the jump) is - * at the start of the first cache line, address zero mod 64; - * we jump to somewhere in the second cache line to issue the - * tns, then jump back to finish up. - */ - s2a ATOMIC_LOCK_REG_NAME, r25, r21 - j .Lcmpxchg32_tns - } +#if (PAGE_OFFSET & 0xffff) != 0 +# error Code here assumes PAGE_OFFSET can be loaded with just hi16() +#endif -#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ { /* Check for unaligned input. */ bnz sp, .Lcmpxchg_badaddr @@ -1684,7 +1585,7 @@ ENTRY(sys_cmpxchg) * Because of C pointer arithmetic, we want to compute this: * * ((char*)atomic_locks + - * (((r0 >> 3) & (1 << (ATOMIC_HASH_SIZE - 1))) << 2)) + * (((r0 >> 3) & ((1 << ATOMIC_HASH_SHIFT) - 1)) << 2)) * * Instead of two shifts we just ">> 1", and use 'mm' * to ignore the low and high bits we don't want. @@ -1695,21 +1596,21 @@ ENTRY(sys_cmpxchg) /* * Ensure that the TLB is loaded before we take out the lock. - * On tilepro, this will start fetching the value all the way - * into our L1 as well (and if it gets modified before we - * grab the lock, it will be invalidated from our cache - * before we reload it). On tile64, we'll start fetching it - * into our L1 if we're the home, and if we're not, we'll - * still at least start fetching it into the home's L2. + * This will start fetching the value all the way into our L1 + * as well (and if it gets modified before we grab the lock, + * it will be invalidated from our cache before we reload it). */ lw r26, r0 } { - /* atomic_locks is page aligned so this suffices to get its addr. */ - auli r21, zero, hi16(atomic_locks) + auli r21, zero, ha16(atomic_locks) bbns r23, .Lcmpxchg_badaddr } +#if PAGE_SIZE < 0x10000 + /* atomic_locks is page-aligned so for big pages we don't need this. */ + addli r21, r21, lo16(atomic_locks) +#endif { /* * Insert the hash bits into the page-aligned pointer. @@ -1729,24 +1630,23 @@ ENTRY(sys_cmpxchg) { /* * We very carefully align the code that actually runs with - * the lock held (nine bundles) so that we know it is all in + * the lock held (twelve bundles) so that we know it is all in * the icache when we start. This instruction (the jump) is * at the start of the first cache line, address zero mod 64; - * we jump to somewhere in the second cache line to issue the - * tns, then jump back to finish up. + * we jump to the very end of the second cache line to get that + * line loaded in the icache, then fall through to issue the tns + * in the third cache line, at which point it's all cached. + * Note that is for performance, not correctness. */ j .Lcmpxchg32_tns } -#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ - - ENTRY(__sys_cmpxchg_grab_lock) +/* Symbol for do_page_fault_ics() to use to compare against the PC. */ +.global __sys_cmpxchg_grab_lock +__sys_cmpxchg_grab_lock: /* * Perform the actual cmpxchg or atomic_update. - * Note that __futex_mark_unlocked() in uClibc relies on - * atomic_update() to always perform an "mf", so don't make - * it optional or conditional without modifying that code. */ .Ldo_cmpxchg32: { @@ -1764,10 +1664,13 @@ ENTRY(sys_cmpxchg) } { mvnz r24, r23, r25 /* Use atomic_update value if appropriate. */ - bbns r22, .Lcmpxchg32_mismatch + bbns r22, .Lcmpxchg32_nostore } + seq r22, r24, r21 /* Are we storing the value we loaded? */ + bbs r22, .Lcmpxchg32_nostore sw r0, r24 + /* The following instruction is the start of the second cache line. */ /* Do slow mtspr here so the following "mf" waits less. */ { move sp, r27 @@ -1775,7 +1678,6 @@ ENTRY(sys_cmpxchg) } mf - /* The following instruction is the start of the second cache line. */ { move r0, r21 sw ATOMIC_LOCK_REG_NAME, zero @@ -1783,7 +1685,7 @@ ENTRY(sys_cmpxchg) iret /* Duplicated code here in the case where we don't overlap "mf" */ -.Lcmpxchg32_mismatch: +.Lcmpxchg32_nostore: { move r0, r21 sw ATOMIC_LOCK_REG_NAME, zero @@ -1799,8 +1701,6 @@ ENTRY(sys_cmpxchg) * and for 64-bit cmpxchg. We provide it as a macro and put * it into both versions. We can't share the code literally * since it depends on having the right branch-back address. - * Note that the first few instructions should share the cache - * line with the second half of the actual locked code. */ .macro cmpxchg_lock, bitwidth @@ -1826,7 +1726,7 @@ ENTRY(sys_cmpxchg) } /* * The preceding instruction is the last thing that must be - * on the second cache line. + * hot in the icache before we do the "tns" above. */ #ifdef CONFIG_SMP @@ -1857,6 +1757,12 @@ ENTRY(sys_cmpxchg) .endm .Lcmpxchg32_tns: + /* + * This is the last instruction on the second cache line. + * The nop here loads the second line, then we fall through + * to the tns to load the third line before we take the lock. + */ + nop cmpxchg_lock 32 /* @@ -1872,9 +1778,6 @@ ENTRY(sys_cmpxchg) .align 64 .Lcmpxchg64: { -#if ATOMIC_LOCKS_FOUND_VIA_TABLE() - s2a ATOMIC_LOCK_REG_NAME, r25, r21 -#endif bzt r23, .Lcmpxchg64_tns } j .Lcmpxchg_badaddr @@ -1940,11 +1843,12 @@ int_unalign: push_extra_callee_saves r0 j do_trap -/* Include .intrpt1 array of interrupt vectors */ - .section ".intrpt1", "ax" +/* Include .intrpt array of interrupt vectors */ + .section ".intrpt", "ax" -#define op_handle_perf_interrupt bad_intr -#define op_handle_aux_perf_interrupt bad_intr +#ifndef CONFIG_USE_PMC +#define handle_perf_interrupt bad_intr +#endif #ifndef CONFIG_HARDWALL #define do_hardwall_trap bad_intr @@ -1985,7 +1889,7 @@ int_unalign: int_hand INT_IDN_AVAIL, IDN_AVAIL, bad_intr int_hand INT_UDN_AVAIL, UDN_AVAIL, bad_intr int_hand INT_PERF_COUNT, PERF_COUNT, \ - op_handle_perf_interrupt, handle_nmi + handle_perf_interrupt, handle_nmi int_hand INT_INTCTRL_3, INTCTRL_3, bad_intr #if CONFIG_KERNEL_PL == 2 dc_dispatch INT_INTCTRL_2, INTCTRL_2 @@ -1996,23 +1900,21 @@ int_unalign: #endif int_hand INT_INTCTRL_0, INTCTRL_0, bad_intr int_hand INT_MESSAGE_RCV_DWNCL, MESSAGE_RCV_DWNCL, \ - hv_message_intr, handle_interrupt_downcall + hv_message_intr int_hand INT_DEV_INTR_DWNCL, DEV_INTR_DWNCL, \ - tile_dev_intr, handle_interrupt_downcall + tile_dev_intr int_hand INT_I_ASID, I_ASID, bad_intr int_hand INT_D_ASID, D_ASID, bad_intr int_hand INT_DMATLB_MISS_DWNCL, DMATLB_MISS_DWNCL, \ - do_page_fault, handle_interrupt_downcall + do_page_fault int_hand INT_SNITLB_MISS_DWNCL, SNITLB_MISS_DWNCL, \ - do_page_fault, handle_interrupt_downcall + do_page_fault int_hand INT_DMATLB_ACCESS_DWNCL, DMATLB_ACCESS_DWNCL, \ - do_page_fault, handle_interrupt_downcall + do_page_fault int_hand INT_SN_CPL, SN_CPL, bad_intr int_hand INT_DOUBLE_FAULT, DOUBLE_FAULT, do_trap -#if CHIP_HAS_AUX_PERF_COUNTERS() int_hand INT_AUX_PERF_COUNT, AUX_PERF_COUNT, \ - op_handle_aux_perf_interrupt, handle_nmi -#endif + handle_perf_interrupt, handle_nmi /* Synthetic interrupt delivered only by the simulator */ int_hand INT_BREAKPOINT, BREAKPOINT, do_breakpoint |
