diff options
Diffstat (limited to 'arch/tile/kernel/intvec_32.S')
-rw-r--r-- | arch/tile/kernel/intvec_32.S | 175 |
1 files changed, 43 insertions, 132 deletions
diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S index fffcfa6b3a6..72ade79b621 100644 --- a/arch/tile/kernel/intvec_32.S +++ b/arch/tile/kernel/intvec_32.S @@ -851,14 +851,27 @@ STD_ENTRY(interrupt_return) /* Check to see if there is any work to do before returning to user. */ { addi r29, r32, THREAD_INFO_FLAGS_OFFSET - moveli r28, lo16(_TIF_ALLWORK_MASK) + moveli r1, lo16(_TIF_ALLWORK_MASK) } { lw r29, r29 - auli r28, r28, ha16(_TIF_ALLWORK_MASK) + auli r1, r1, ha16(_TIF_ALLWORK_MASK) } - and r28, r29, r28 - bnz r28, .Lwork_pending + and r1, r29, r1 + bzt r1, .Lrestore_all + + /* + * Make sure we have all the registers saved for signal + * handling or single-step. Call out to C code to figure out + * exactly what we need to do for each flag bit, then if + * necessary, reload the flags and recheck. + */ + push_extra_callee_saves r0 + { + PTREGS_PTR(r0, PTREGS_OFFSET_BASE) + jal do_work_pending + } + bnz r0, .Lresume_userspace /* * In the NMI case we @@ -1099,99 +1112,6 @@ STD_ENTRY(interrupt_return) pop_reg r50 pop_reg r51, sp, PTREGS_OFFSET_REG(29) - PTREGS_OFFSET_REG(51) j .Lcontinue_restore_regs - -.Lwork_pending: - /* Mask the reschedule flag */ - andi r28, r29, _TIF_NEED_RESCHED - - { - /* - * If the NEED_RESCHED flag is called, we call schedule(), which - * may drop this context right here and go do something else. - * On return, jump back to .Lresume_userspace and recheck. - */ - bz r28, .Lasync_tlb - - /* Mask the async-tlb flag */ - andi r28, r29, _TIF_ASYNC_TLB - } - - jal schedule - FEEDBACK_REENTER(interrupt_return) - - /* Reload the flags and check again */ - j .Lresume_userspace - -.Lasync_tlb: - { - bz r28, .Lneed_sigpending - - /* Mask the sigpending flag */ - andi r28, r29, _TIF_SIGPENDING - } - - PTREGS_PTR(r0, PTREGS_OFFSET_BASE) - jal do_async_page_fault - FEEDBACK_REENTER(interrupt_return) - - /* - * Go restart the "resume userspace" process. We may have - * fired a signal, and we need to disable interrupts again. - */ - j .Lresume_userspace - -.Lneed_sigpending: - /* - * At this point we are either doing signal handling or single-step, - * so either way make sure we have all the registers saved. - */ - push_extra_callee_saves r0 - - { - /* If no signal pending, skip to singlestep check */ - bz r28, .Lneed_singlestep - - /* Mask the singlestep flag */ - andi r28, r29, _TIF_SINGLESTEP - } - - jal do_signal - FEEDBACK_REENTER(interrupt_return) - - /* Reload the flags and check again */ - j .Lresume_userspace - -.Lneed_singlestep: - { - /* Get a pointer to the EX1 field */ - PTREGS_PTR(r29, PTREGS_OFFSET_EX1) - - /* If we get here, our bit must be set. */ - bz r28, .Lwork_confusion - } - /* If we are in priv mode, don't single step */ - lw r28, r29 - andi r28, r28, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */ - bnz r28, .Lrestore_all - - /* Allow interrupts within the single step code */ - TRACE_IRQS_ON /* Note: clobbers registers r0-r29 */ - IRQ_ENABLE(r20, r21) - - /* try to single-step the current instruction */ - PTREGS_PTR(r0, PTREGS_OFFSET_BASE) - jal single_step_once - FEEDBACK_REENTER(interrupt_return) - - /* Re-disable interrupts. TRACE_IRQS_OFF in .Lrestore_all. */ - IRQ_DISABLE(r20,r21) - - j .Lrestore_all - -.Lwork_confusion: - move r0, r28 - panic "thread_info allwork flags unhandled on userspace resume: %#x" - STD_ENDPROC(interrupt_return) /* @@ -1550,7 +1470,10 @@ STD_ENTRY(_sys_clone) * We place it in the __HEAD section to ensure it is relatively * near to the intvec_SWINT_1 code (reachable by a conditional branch). * - * Must match register usage in do_page_fault(). + * Our use of ATOMIC_LOCK_REG here must match do_page_fault_ics(). + * + * As we do in lib/atomic_asm_32.S, we bypass a store if the value we + * would store is the same as the value we just loaded. */ __HEAD .align 64 @@ -1611,17 +1534,7 @@ ENTRY(sys_cmpxchg) { shri r20, r25, 32 - ATOMIC_HASH_L1_SHIFT slt_u r23, r0, r23 - - /* - * Ensure that the TLB is loaded before we take out the lock. - * On TILEPro, this will start fetching the value all the way - * into our L1 as well (and if it gets modified before we - * grab the lock, it will be invalidated from our cache - * before we reload it). On tile64, we'll start fetching it - * into our L1 if we're the home, and if we're not, we'll - * still at least start fetching it into the home's L2. - */ - lw r26, r0 + lw r26, r0 /* see comment in the "#else" for the "lw r26". */ } { s2a r21, r20, r21 @@ -1637,18 +1550,9 @@ ENTRY(sys_cmpxchg) bbs r23, .Lcmpxchg64 andi r23, r0, 7 /* Precompute alignment for cmpxchg64. */ } - { - /* - * We very carefully align the code that actually runs with - * the lock held (nine bundles) so that we know it is all in - * the icache when we start. This instruction (the jump) is - * at the start of the first cache line, address zero mod 64; - * we jump to somewhere in the second cache line to issue the - * tns, then jump back to finish up. - */ s2a ATOMIC_LOCK_REG_NAME, r25, r21 - j .Lcmpxchg32_tns + j .Lcmpxchg32_tns /* see comment in the #else for the jump. */ } #else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ @@ -1713,24 +1617,25 @@ ENTRY(sys_cmpxchg) { /* * We very carefully align the code that actually runs with - * the lock held (nine bundles) so that we know it is all in + * the lock held (twelve bundles) so that we know it is all in * the icache when we start. This instruction (the jump) is * at the start of the first cache line, address zero mod 64; - * we jump to somewhere in the second cache line to issue the - * tns, then jump back to finish up. + * we jump to the very end of the second cache line to get that + * line loaded in the icache, then fall through to issue the tns + * in the third cache line, at which point it's all cached. + * Note that is for performance, not correctness. */ j .Lcmpxchg32_tns } #endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ - ENTRY(__sys_cmpxchg_grab_lock) +/* Symbol for do_page_fault_ics() to use to compare against the PC. */ +.global __sys_cmpxchg_grab_lock +__sys_cmpxchg_grab_lock: /* * Perform the actual cmpxchg or atomic_update. - * Note that the system <arch/atomic.h> header relies on - * atomic_update() to always perform an "mf", so don't make - * it optional or conditional without modifying that code. */ .Ldo_cmpxchg32: { @@ -1748,10 +1653,13 @@ ENTRY(sys_cmpxchg) } { mvnz r24, r23, r25 /* Use atomic_update value if appropriate. */ - bbns r22, .Lcmpxchg32_mismatch + bbns r22, .Lcmpxchg32_nostore } + seq r22, r24, r21 /* Are we storing the value we loaded? */ + bbs r22, .Lcmpxchg32_nostore sw r0, r24 + /* The following instruction is the start of the second cache line. */ /* Do slow mtspr here so the following "mf" waits less. */ { move sp, r27 @@ -1759,7 +1667,6 @@ ENTRY(sys_cmpxchg) } mf - /* The following instruction is the start of the second cache line. */ { move r0, r21 sw ATOMIC_LOCK_REG_NAME, zero @@ -1767,7 +1674,7 @@ ENTRY(sys_cmpxchg) iret /* Duplicated code here in the case where we don't overlap "mf" */ -.Lcmpxchg32_mismatch: +.Lcmpxchg32_nostore: { move r0, r21 sw ATOMIC_LOCK_REG_NAME, zero @@ -1783,8 +1690,6 @@ ENTRY(sys_cmpxchg) * and for 64-bit cmpxchg. We provide it as a macro and put * it into both versions. We can't share the code literally * since it depends on having the right branch-back address. - * Note that the first few instructions should share the cache - * line with the second half of the actual locked code. */ .macro cmpxchg_lock, bitwidth @@ -1810,7 +1715,7 @@ ENTRY(sys_cmpxchg) } /* * The preceding instruction is the last thing that must be - * on the second cache line. + * hot in the icache before we do the "tns" above. */ #ifdef CONFIG_SMP @@ -1841,6 +1746,12 @@ ENTRY(sys_cmpxchg) .endm .Lcmpxchg32_tns: + /* + * This is the last instruction on the second cache line. + * The nop here loads the second line, then we fall through + * to the tns to load the third line before we take the lock. + */ + nop cmpxchg_lock 32 /* |