aboutsummaryrefslogtreecommitdiff
path: root/arch/tile/kernel/intvec_32.S
diff options
context:
space:
mode:
Diffstat (limited to 'arch/tile/kernel/intvec_32.S')
-rw-r--r--arch/tile/kernel/intvec_32.S484
1 files changed, 193 insertions, 291 deletions
diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S
index f5821626247..cdbda45a4e4 100644
--- a/arch/tile/kernel/intvec_32.S
+++ b/arch/tile/kernel/intvec_32.S
@@ -21,31 +21,17 @@
#include <asm/ptrace.h>
#include <asm/thread_info.h>
#include <asm/irqflags.h>
-#include <asm/atomic.h>
+#include <asm/atomic_32.h>
#include <asm/asm-offsets.h>
#include <hv/hypervisor.h>
#include <arch/abi.h>
#include <arch/interrupts.h>
#include <arch/spr_def.h>
-#ifdef CONFIG_PREEMPT
-# error "No support for kernel preemption currently"
-#endif
-
-#if INT_INTCTRL_K < 32 || INT_INTCTRL_K >= 48
-# error INT_INTCTRL_K coded to set high interrupt mask
-#endif
-
#define PTREGS_PTR(reg, ptreg) addli reg, sp, C_ABI_SAVE_AREA_SIZE + (ptreg)
#define PTREGS_OFFSET_SYSCALL PTREGS_OFFSET_REG(TREG_SYSCALL_NR)
-#if !CHIP_HAS_WH64()
- /* By making this an empty macro, we can use wh64 in the code. */
- .macro wh64 reg
- .endm
-#endif
-
.macro push_reg reg, ptr=sp, delta=-4
{
sw \ptr, \reg
@@ -193,7 +179,7 @@ intvec_\vecname:
* point sp at the top aligned address on the actual stack page.
*/
mfspr r0, SPR_SYSTEM_SAVE_K_0
- mm r0, r0, zero, LOG2_THREAD_SIZE, 31
+ mm r0, r0, zero, LOG2_NR_CPU_IDS, 31
0:
/*
@@ -211,6 +197,9 @@ intvec_\vecname:
* cache line 1: r14...r29
* cache line 0: 2 x frame, r0..r13
*/
+#if STACK_TOP_DELTA != 64
+#error STACK_TOP_DELTA must be 64 for assumptions here and in task_pt_regs()
+#endif
andi r0, r0, -64
/*
@@ -324,24 +313,20 @@ intvec_\vecname:
movei r3, 0
}
.else
- .ifc \c_routine, op_handle_perf_interrupt
+ .ifc \c_routine, handle_perf_interrupt
{
mfspr r2, PERF_COUNT_STS
movei r3, -1 /* not used, but set for consistency */
}
.else
-#if CHIP_HAS_AUX_PERF_COUNTERS()
- .ifc \c_routine, op_handle_aux_perf_interrupt
+ .ifc \c_routine, handle_perf_interrupt
{
mfspr r2, AUX_PERF_COUNT_STS
movei r3, -1 /* not used, but set for consistency */
}
.else
-#endif
movei r3, 0
-#if CHIP_HAS_AUX_PERF_COUNTERS()
.endif
-#endif
.endif
.endif
.endif
@@ -358,7 +343,7 @@ intvec_\vecname:
#ifdef __COLLECT_LINKER_FEEDBACK__
.pushsection .text.intvec_feedback,"ax"
.org (\vecnum << 5)
- FEEDBACK_ENTER_EXPLICIT(intvec_\vecname, .intrpt1, 1 << 8)
+ FEEDBACK_ENTER_EXPLICIT(intvec_\vecname, .intrpt, 1 << 8)
jrp lr
.popsection
#endif
@@ -472,7 +457,7 @@ intvec_\vecname:
}
{
auli r21, r21, ha16(__per_cpu_offset)
- mm r20, r20, zero, 0, LOG2_THREAD_SIZE-1
+ mm r20, r20, zero, 0, LOG2_NR_CPU_IDS-1
}
s2a r20, r20, r21
lw tp, r20
@@ -566,7 +551,6 @@ intvec_\vecname:
.endif
mtspr INTERRUPT_CRITICAL_SECTION, zero
-#if CHIP_HAS_WH64()
/*
* Prepare the first 256 stack bytes to be rapidly accessible
* without having to fetch the background data. We don't really
@@ -587,7 +571,6 @@ intvec_\vecname:
addi r52, r52, -64
}
wh64 r52
-#endif
#ifdef CONFIG_TRACE_IRQFLAGS
.ifnc \function,handle_nmi
@@ -766,7 +749,7 @@ intvec_\vecname:
.macro dc_dispatch vecnum, vecname
.org (\vecnum << 8)
intvec_\vecname:
- j hv_downcall_dispatch
+ j _hv_downcall_dispatch
ENDPROC(intvec_\vecname)
.endm
@@ -803,6 +786,10 @@ handle_interrupt:
* This routine takes a boolean in r30 indicating if this is an NMI.
* If so, we also expect a boolean in r31 indicating whether to
* re-enable the oprofile interrupts.
+ *
+ * Note that .Lresume_userspace is jumped to directly in several
+ * places, and we need to make sure r30 is set correctly in those
+ * callers as well.
*/
STD_ENTRY(interrupt_return)
/* If we're resuming to kernel space, don't check thread flags. */
@@ -812,17 +799,37 @@ STD_ENTRY(interrupt_return)
}
lw r29, r29
andi r29, r29, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */
+ bzt r29, .Lresume_userspace
+
+#ifdef CONFIG_PREEMPT
+ /* Returning to kernel space. Check if we need preemption. */
+ GET_THREAD_INFO(r29)
+ addli r28, r29, THREAD_INFO_FLAGS_OFFSET
{
- bzt r29, .Lresume_userspace
- PTREGS_PTR(r29, PTREGS_OFFSET_PC)
+ lw r28, r28
+ addli r29, r29, THREAD_INFO_PREEMPT_COUNT_OFFSET
+ }
+ {
+ andi r28, r28, _TIF_NEED_RESCHED
+ lw r29, r29
}
+ bzt r28, 1f
+ bnz r29, 1f
+ /* Disable interrupts explicitly for preemption. */
+ IRQ_DISABLE(r20,r21)
+ TRACE_IRQS_OFF
+ jal preempt_schedule_irq
+ FEEDBACK_REENTER(interrupt_return)
+1:
+#endif
/* If we're resuming to _cpu_idle_nap, bump PC forward by 8. */
{
- lw r28, r29
+ PTREGS_PTR(r29, PTREGS_OFFSET_PC)
moveli r27, lo16(_cpu_idle_nap)
}
{
+ lw r28, r29
auli r27, r27, ha16(_cpu_idle_nap)
}
{
@@ -839,6 +846,18 @@ STD_ENTRY(interrupt_return)
FEEDBACK_REENTER(interrupt_return)
/*
+ * Use r33 to hold whether we have already loaded the callee-saves
+ * into ptregs. We don't want to do it twice in this loop, since
+ * then we'd clobber whatever changes are made by ptrace, etc.
+ * Get base of stack in r32.
+ */
+ {
+ GET_THREAD_INFO(r32)
+ movei r33, 0
+ }
+
+.Lretry_work_pending:
+ /*
* Disable interrupts so as to make sure we don't
* miss an interrupt that sets any of the thread flags (like
* need_resched or sigpending) between sampling and the iret.
@@ -848,21 +867,33 @@ STD_ENTRY(interrupt_return)
IRQ_DISABLE(r20, r21)
TRACE_IRQS_OFF /* Note: clobbers registers r0-r29 */
- /* Get base of stack in r32; note r30/31 are used as arguments here. */
- GET_THREAD_INFO(r32)
-
/* Check to see if there is any work to do before returning to user. */
{
addi r29, r32, THREAD_INFO_FLAGS_OFFSET
- moveli r28, lo16(_TIF_ALLWORK_MASK)
+ moveli r1, lo16(_TIF_ALLWORK_MASK)
}
{
lw r29, r29
- auli r28, r28, ha16(_TIF_ALLWORK_MASK)
+ auli r1, r1, ha16(_TIF_ALLWORK_MASK)
}
- and r28, r29, r28
- bnz r28, .Lwork_pending
+ and r1, r29, r1
+ bzt r1, .Lrestore_all
+
+ /*
+ * Make sure we have all the registers saved for signal
+ * handling, notify-resume, or single-step. Call out to C
+ * code to figure out exactly what we need to do for each flag bit,
+ * then if necessary, reload the flags and recheck.
+ */
+ {
+ PTREGS_PTR(r0, PTREGS_OFFSET_BASE)
+ bnz r33, 1f
+ }
+ push_extra_callee_saves r0
+ movei r33, 1
+1: jal do_work_pending
+ bnz r0, .Lretry_work_pending
/*
* In the NMI case we
@@ -915,6 +946,13 @@ STD_ENTRY(interrupt_return)
bzt r30, .Lrestore_regs
3:
+ /* We are relying on INT_PERF_COUNT at 33, and AUX_PERF_COUNT at 48 */
+ {
+ moveli r0, lo16(1 << (INT_PERF_COUNT - 32))
+ bz r31, .Lrestore_regs
+ }
+ auli r0, r0, ha16(1 << (INT_AUX_PERF_COUNT - 32))
+ mtspr SPR_INTERRUPT_MASK_RESET_K_1, r0
/*
* We now commit to returning from this interrupt, since we will be
@@ -1103,142 +1141,9 @@ STD_ENTRY(interrupt_return)
pop_reg r50
pop_reg r51, sp, PTREGS_OFFSET_REG(29) - PTREGS_OFFSET_REG(51)
j .Lcontinue_restore_regs
-
-.Lwork_pending:
- /* Mask the reschedule flag */
- andi r28, r29, _TIF_NEED_RESCHED
-
- {
- /*
- * If the NEED_RESCHED flag is called, we call schedule(), which
- * may drop this context right here and go do something else.
- * On return, jump back to .Lresume_userspace and recheck.
- */
- bz r28, .Lasync_tlb
-
- /* Mask the async-tlb flag */
- andi r28, r29, _TIF_ASYNC_TLB
- }
-
- jal schedule
- FEEDBACK_REENTER(interrupt_return)
-
- /* Reload the flags and check again */
- j .Lresume_userspace
-
-.Lasync_tlb:
- {
- bz r28, .Lneed_sigpending
-
- /* Mask the sigpending flag */
- andi r28, r29, _TIF_SIGPENDING
- }
-
- PTREGS_PTR(r0, PTREGS_OFFSET_BASE)
- jal do_async_page_fault
- FEEDBACK_REENTER(interrupt_return)
-
- /*
- * Go restart the "resume userspace" process. We may have
- * fired a signal, and we need to disable interrupts again.
- */
- j .Lresume_userspace
-
-.Lneed_sigpending:
- /*
- * At this point we are either doing signal handling or single-step,
- * so either way make sure we have all the registers saved.
- */
- push_extra_callee_saves r0
-
- {
- /* If no signal pending, skip to singlestep check */
- bz r28, .Lneed_singlestep
-
- /* Mask the singlestep flag */
- andi r28, r29, _TIF_SINGLESTEP
- }
-
- jal do_signal
- FEEDBACK_REENTER(interrupt_return)
-
- /* Reload the flags and check again */
- j .Lresume_userspace
-
-.Lneed_singlestep:
- {
- /* Get a pointer to the EX1 field */
- PTREGS_PTR(r29, PTREGS_OFFSET_EX1)
-
- /* If we get here, our bit must be set. */
- bz r28, .Lwork_confusion
- }
- /* If we are in priv mode, don't single step */
- lw r28, r29
- andi r28, r28, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */
- bnz r28, .Lrestore_all
-
- /* Allow interrupts within the single step code */
- TRACE_IRQS_ON /* Note: clobbers registers r0-r29 */
- IRQ_ENABLE(r20, r21)
-
- /* try to single-step the current instruction */
- PTREGS_PTR(r0, PTREGS_OFFSET_BASE)
- jal single_step_once
- FEEDBACK_REENTER(interrupt_return)
-
- /* Re-disable interrupts. TRACE_IRQS_OFF in .Lrestore_all. */
- IRQ_DISABLE(r20,r21)
-
- j .Lrestore_all
-
-.Lwork_confusion:
- move r0, r28
- panic "thread_info allwork flags unhandled on userspace resume: %#x"
-
STD_ENDPROC(interrupt_return)
/*
- * This interrupt variant clears the INT_INTCTRL_K interrupt mask bit
- * before returning, so we can properly get more downcalls.
- */
- .pushsection .text.handle_interrupt_downcall,"ax"
-handle_interrupt_downcall:
- finish_interrupt_save handle_interrupt_downcall
- check_single_stepping normal, .Ldispatch_downcall
-.Ldispatch_downcall:
-
- /* Clear INTCTRL_K from the set of interrupts we ever enable. */
- GET_INTERRUPTS_ENABLED_MASK_PTR(r30)
- {
- addi r30, r30, 4
- movei r31, INT_MASK(INT_INTCTRL_K)
- }
- {
- lw r20, r30
- nor r21, r31, zero
- }
- and r20, r20, r21
- sw r30, r20
-
- {
- jalr r0
- PTREGS_PTR(r0, PTREGS_OFFSET_BASE)
- }
- FEEDBACK_REENTER(handle_interrupt_downcall)
-
- /* Allow INTCTRL_K to be enabled next time we enable interrupts. */
- lw r20, r30
- or r20, r20, r31
- sw r30, r20
-
- {
- movei r30, 0 /* not an NMI */
- j interrupt_return
- }
- STD_ENDPROC(handle_interrupt_downcall)
-
- /*
* Some interrupts don't check for single stepping
*/
.pushsection .text.handle_interrupt_no_single_step,"ax"
@@ -1273,6 +1178,10 @@ handle_nmi:
PTREGS_PTR(r0, PTREGS_OFFSET_BASE)
}
FEEDBACK_REENTER(handle_nmi)
+ {
+ movei r30, 1
+ seq r31, r0, zero
+ }
j interrupt_return
STD_ENDPROC(handle_nmi)
@@ -1300,15 +1209,20 @@ handle_syscall:
add r20, r20, tp
lw r21, r20
addi r21, r21, 1
- sw r20, r21
+ {
+ sw r20, r21
+ GET_THREAD_INFO(r31)
+ }
/* Trace syscalls, if requested. */
- GET_THREAD_INFO(r31)
addi r31, r31, THREAD_INFO_FLAGS_OFFSET
lw r30, r31
andi r30, r30, _TIF_SYSCALL_TRACE
bzt r30, .Lrestore_syscall_regs
- jal do_syscall_trace
+ {
+ PTREGS_PTR(r0, PTREGS_OFFSET_BASE)
+ jal do_syscall_trace_enter
+ }
FEEDBACK_REENTER(handle_syscall)
/*
@@ -1342,8 +1256,8 @@ handle_syscall:
lw r20, r20
/* Jump to syscall handler. */
- jalr r20; .Lhandle_syscall_link:
- FEEDBACK_REENTER(handle_syscall)
+ jalr r20
+.Lhandle_syscall_link: /* value of "lr" after "jalr r20" above */
/*
* Write our r0 onto the stack so it gets restored instead
@@ -1352,13 +1266,22 @@ handle_syscall:
PTREGS_PTR(r29, PTREGS_OFFSET_REG(0))
sw r29, r0
+.Lsyscall_sigreturn_skip:
+ FEEDBACK_REENTER(handle_syscall)
+
/* Do syscall trace again, if requested. */
lw r30, r31
andi r30, r30, _TIF_SYSCALL_TRACE
bzt r30, 1f
- jal do_syscall_trace
+ {
+ PTREGS_PTR(r0, PTREGS_OFFSET_BASE)
+ jal do_syscall_trace_exit
+ }
FEEDBACK_REENTER(handle_syscall)
-1: j .Lresume_userspace /* jump into middle of interrupt_return */
+1: {
+ movei r30, 0 /* not an NMI */
+ j .Lresume_userspace /* jump into middle of interrupt_return */
+ }
.Linvalid_syscall:
/* Report an invalid syscall back to the user program */
@@ -1367,7 +1290,10 @@ handle_syscall:
movei r28, -ENOSYS
}
sw r29, r28
- j .Lresume_userspace /* jump into middle of interrupt_return */
+ {
+ movei r30, 0 /* not an NMI */
+ j .Lresume_userspace /* jump into middle of interrupt_return */
+ }
STD_ENDPROC(handle_syscall)
/* Return the address for oprofile to suppress in backtraces. */
@@ -1383,9 +1309,27 @@ STD_ENTRY(ret_from_fork)
jal sim_notify_fork
jal schedule_tail
FEEDBACK_REENTER(ret_from_fork)
- j .Lresume_userspace /* jump into middle of interrupt_return */
+ {
+ movei r30, 0 /* not an NMI */
+ j .Lresume_userspace /* jump into middle of interrupt_return */
+ }
STD_ENDPROC(ret_from_fork)
+STD_ENTRY(ret_from_kernel_thread)
+ jal sim_notify_fork
+ jal schedule_tail
+ FEEDBACK_REENTER(ret_from_fork)
+ {
+ move r0, r31
+ jalr r30
+ }
+ FEEDBACK_REENTER(ret_from_kernel_thread)
+ {
+ movei r30, 0 /* not an NMI */
+ j .Lresume_userspace /* jump into middle of interrupt_return */
+ }
+ STD_ENDPROC(ret_from_kernel_thread)
+
/*
* Code for ill interrupt.
*/
@@ -1470,7 +1414,10 @@ handle_ill:
3:
/* set PC and continue */
lw r26, r24
- sw r28, r26
+ {
+ sw r28, r26
+ GET_THREAD_INFO(r0)
+ }
/*
* Clear TIF_SINGLESTEP to prevent recursion if we execute an ill.
@@ -1478,7 +1425,6 @@ handle_ill:
* need to clear it here and can't really impose on all other arches.
* So what's another write between friends?
*/
- GET_THREAD_INFO(r0)
addi r1, r0, THREAD_INFO_FLAGS_OFFSET
{
@@ -1492,12 +1438,14 @@ handle_ill:
{
lw r0, r0 /* indirect thru thread_info to get task_info*/
addi r1, sp, C_ABI_SAVE_AREA_SIZE /* put ptregs pointer into r1 */
- move r2, zero /* load error code into r2 */
}
jal send_sigtrap /* issue a SIGTRAP */
FEEDBACK_REENTER(handle_ill)
- j .Lresume_userspace /* jump into middle of interrupt_return */
+ {
+ movei r30, 0 /* not an NMI */
+ j .Lresume_userspace /* jump into middle of interrupt_return */
+ }
.Ldispatch_normal_ill:
{
@@ -1527,21 +1475,24 @@ STD_ENTRY_LOCAL(bad_intr)
panic "Unhandled interrupt %#x: PC %#lx"
STD_ENDPROC(bad_intr)
-/* Put address of pt_regs in reg and jump. */
-#define PTREGS_SYSCALL(x, reg) \
+/*
+ * Special-case sigreturn to not write r0 to the stack on return.
+ * This is technically more efficient, but it also avoids difficulties
+ * in the 64-bit OS when handling 32-bit compat code, since we must not
+ * sign-extend r0 for the sigreturn return-value case.
+ */
+#define PTREGS_SYSCALL_SIGRETURN(x, reg) \
STD_ENTRY(_##x); \
+ addli lr, lr, .Lsyscall_sigreturn_skip - .Lhandle_syscall_link; \
{ \
PTREGS_PTR(reg, PTREGS_OFFSET_BASE); \
j x \
}; \
STD_ENDPROC(_##x)
-PTREGS_SYSCALL(sys_execve, r3)
-PTREGS_SYSCALL(sys_sigaltstack, r2)
-PTREGS_SYSCALL(sys_rt_sigreturn, r0)
-PTREGS_SYSCALL(sys_cmpxchg_badaddr, r1)
+PTREGS_SYSCALL_SIGRETURN(sys_rt_sigreturn, r0)
-/* Save additional callee-saves to pt_regs, put address in r4 and jump. */
+/* Save additional callee-saves to pt_regs and jump to standard function. */
STD_ENTRY(_sys_clone)
push_extra_callee_saves r4
j sys_clone
@@ -1576,13 +1527,17 @@ STD_ENTRY(_sys_clone)
* We place it in the __HEAD section to ensure it is relatively
* near to the intvec_SWINT_1 code (reachable by a conditional branch).
*
- * Must match register usage in do_page_fault().
+ * Our use of ATOMIC_LOCK_REG here must match do_page_fault_ics().
+ *
+ * As we do in lib/atomic_asm_32.S, we bypass a store if the value we
+ * would store is the same as the value we just loaded.
*/
__HEAD
.align 64
/* Align much later jump on the start of a cache line. */
-#if !ATOMIC_LOCKS_FOUND_VIA_TABLE()
- nop; nop
+ nop
+#if PAGE_SIZE >= 0x10000
+ nop
#endif
ENTRY(sys_cmpxchg)
@@ -1610,67 +1565,13 @@ ENTRY(sys_cmpxchg)
* about aliasing among multiple mappings of the same physical page,
* and we ignore the low 3 bits so we have one lock that covers
* both a cmpxchg64() and a cmpxchg() on either its low or high word.
- * NOTE: this code must match __atomic_hashed_lock() in lib/atomic.c.
+ * NOTE: this must match __atomic_hashed_lock() in lib/atomic_32.c.
*/
-#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
- {
- /* Check for unaligned input. */
- bnz sp, .Lcmpxchg_badaddr
- mm r25, r0, zero, 3, PAGE_SHIFT-1
- }
- {
- crc32_32 r25, zero, r25
- moveli r21, lo16(atomic_lock_ptr)
- }
- {
- auli r21, r21, ha16(atomic_lock_ptr)
- auli r23, zero, hi16(PAGE_OFFSET) /* hugepage-aligned */
- }
- {
- shri r20, r25, 32 - ATOMIC_HASH_L1_SHIFT
- slt_u r23, r0, r23
-
- /*
- * Ensure that the TLB is loaded before we take out the lock.
- * On TILEPro, this will start fetching the value all the way
- * into our L1 as well (and if it gets modified before we
- * grab the lock, it will be invalidated from our cache
- * before we reload it). On tile64, we'll start fetching it
- * into our L1 if we're the home, and if we're not, we'll
- * still at least start fetching it into the home's L2.
- */
- lw r26, r0
- }
- {
- s2a r21, r20, r21
- bbns r23, .Lcmpxchg_badaddr
- }
- {
- lw r21, r21
- seqi r23, TREG_SYSCALL_NR_NAME, __NR_FAST_cmpxchg64
- andi r25, r25, ATOMIC_HASH_L2_SIZE - 1
- }
- {
- /* Branch away at this point if we're doing a 64-bit cmpxchg. */
- bbs r23, .Lcmpxchg64
- andi r23, r0, 7 /* Precompute alignment for cmpxchg64. */
- }
-
- {
- /*
- * We very carefully align the code that actually runs with
- * the lock held (nine bundles) so that we know it is all in
- * the icache when we start. This instruction (the jump) is
- * at the start of the first cache line, address zero mod 64;
- * we jump to somewhere in the second cache line to issue the
- * tns, then jump back to finish up.
- */
- s2a ATOMIC_LOCK_REG_NAME, r25, r21
- j .Lcmpxchg32_tns
- }
+#if (PAGE_OFFSET & 0xffff) != 0
+# error Code here assumes PAGE_OFFSET can be loaded with just hi16()
+#endif
-#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
{
/* Check for unaligned input. */
bnz sp, .Lcmpxchg_badaddr
@@ -1684,7 +1585,7 @@ ENTRY(sys_cmpxchg)
* Because of C pointer arithmetic, we want to compute this:
*
* ((char*)atomic_locks +
- * (((r0 >> 3) & (1 << (ATOMIC_HASH_SIZE - 1))) << 2))
+ * (((r0 >> 3) & ((1 << ATOMIC_HASH_SHIFT) - 1)) << 2))
*
* Instead of two shifts we just ">> 1", and use 'mm'
* to ignore the low and high bits we don't want.
@@ -1695,21 +1596,21 @@ ENTRY(sys_cmpxchg)
/*
* Ensure that the TLB is loaded before we take out the lock.
- * On tilepro, this will start fetching the value all the way
- * into our L1 as well (and if it gets modified before we
- * grab the lock, it will be invalidated from our cache
- * before we reload it). On tile64, we'll start fetching it
- * into our L1 if we're the home, and if we're not, we'll
- * still at least start fetching it into the home's L2.
+ * This will start fetching the value all the way into our L1
+ * as well (and if it gets modified before we grab the lock,
+ * it will be invalidated from our cache before we reload it).
*/
lw r26, r0
}
{
- /* atomic_locks is page aligned so this suffices to get its addr. */
- auli r21, zero, hi16(atomic_locks)
+ auli r21, zero, ha16(atomic_locks)
bbns r23, .Lcmpxchg_badaddr
}
+#if PAGE_SIZE < 0x10000
+ /* atomic_locks is page-aligned so for big pages we don't need this. */
+ addli r21, r21, lo16(atomic_locks)
+#endif
{
/*
* Insert the hash bits into the page-aligned pointer.
@@ -1729,24 +1630,23 @@ ENTRY(sys_cmpxchg)
{
/*
* We very carefully align the code that actually runs with
- * the lock held (nine bundles) so that we know it is all in
+ * the lock held (twelve bundles) so that we know it is all in
* the icache when we start. This instruction (the jump) is
* at the start of the first cache line, address zero mod 64;
- * we jump to somewhere in the second cache line to issue the
- * tns, then jump back to finish up.
+ * we jump to the very end of the second cache line to get that
+ * line loaded in the icache, then fall through to issue the tns
+ * in the third cache line, at which point it's all cached.
+ * Note that is for performance, not correctness.
*/
j .Lcmpxchg32_tns
}
-#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
-
- ENTRY(__sys_cmpxchg_grab_lock)
+/* Symbol for do_page_fault_ics() to use to compare against the PC. */
+.global __sys_cmpxchg_grab_lock
+__sys_cmpxchg_grab_lock:
/*
* Perform the actual cmpxchg or atomic_update.
- * Note that __futex_mark_unlocked() in uClibc relies on
- * atomic_update() to always perform an "mf", so don't make
- * it optional or conditional without modifying that code.
*/
.Ldo_cmpxchg32:
{
@@ -1764,10 +1664,13 @@ ENTRY(sys_cmpxchg)
}
{
mvnz r24, r23, r25 /* Use atomic_update value if appropriate. */
- bbns r22, .Lcmpxchg32_mismatch
+ bbns r22, .Lcmpxchg32_nostore
}
+ seq r22, r24, r21 /* Are we storing the value we loaded? */
+ bbs r22, .Lcmpxchg32_nostore
sw r0, r24
+ /* The following instruction is the start of the second cache line. */
/* Do slow mtspr here so the following "mf" waits less. */
{
move sp, r27
@@ -1775,7 +1678,6 @@ ENTRY(sys_cmpxchg)
}
mf
- /* The following instruction is the start of the second cache line. */
{
move r0, r21
sw ATOMIC_LOCK_REG_NAME, zero
@@ -1783,7 +1685,7 @@ ENTRY(sys_cmpxchg)
iret
/* Duplicated code here in the case where we don't overlap "mf" */
-.Lcmpxchg32_mismatch:
+.Lcmpxchg32_nostore:
{
move r0, r21
sw ATOMIC_LOCK_REG_NAME, zero
@@ -1799,8 +1701,6 @@ ENTRY(sys_cmpxchg)
* and for 64-bit cmpxchg. We provide it as a macro and put
* it into both versions. We can't share the code literally
* since it depends on having the right branch-back address.
- * Note that the first few instructions should share the cache
- * line with the second half of the actual locked code.
*/
.macro cmpxchg_lock, bitwidth
@@ -1826,7 +1726,7 @@ ENTRY(sys_cmpxchg)
}
/*
* The preceding instruction is the last thing that must be
- * on the second cache line.
+ * hot in the icache before we do the "tns" above.
*/
#ifdef CONFIG_SMP
@@ -1857,6 +1757,12 @@ ENTRY(sys_cmpxchg)
.endm
.Lcmpxchg32_tns:
+ /*
+ * This is the last instruction on the second cache line.
+ * The nop here loads the second line, then we fall through
+ * to the tns to load the third line before we take the lock.
+ */
+ nop
cmpxchg_lock 32
/*
@@ -1872,9 +1778,6 @@ ENTRY(sys_cmpxchg)
.align 64
.Lcmpxchg64:
{
-#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
- s2a ATOMIC_LOCK_REG_NAME, r25, r21
-#endif
bzt r23, .Lcmpxchg64_tns
}
j .Lcmpxchg_badaddr
@@ -1940,11 +1843,12 @@ int_unalign:
push_extra_callee_saves r0
j do_trap
-/* Include .intrpt1 array of interrupt vectors */
- .section ".intrpt1", "ax"
+/* Include .intrpt array of interrupt vectors */
+ .section ".intrpt", "ax"
-#define op_handle_perf_interrupt bad_intr
-#define op_handle_aux_perf_interrupt bad_intr
+#ifndef CONFIG_USE_PMC
+#define handle_perf_interrupt bad_intr
+#endif
#ifndef CONFIG_HARDWALL
#define do_hardwall_trap bad_intr
@@ -1985,7 +1889,7 @@ int_unalign:
int_hand INT_IDN_AVAIL, IDN_AVAIL, bad_intr
int_hand INT_UDN_AVAIL, UDN_AVAIL, bad_intr
int_hand INT_PERF_COUNT, PERF_COUNT, \
- op_handle_perf_interrupt, handle_nmi
+ handle_perf_interrupt, handle_nmi
int_hand INT_INTCTRL_3, INTCTRL_3, bad_intr
#if CONFIG_KERNEL_PL == 2
dc_dispatch INT_INTCTRL_2, INTCTRL_2
@@ -1996,23 +1900,21 @@ int_unalign:
#endif
int_hand INT_INTCTRL_0, INTCTRL_0, bad_intr
int_hand INT_MESSAGE_RCV_DWNCL, MESSAGE_RCV_DWNCL, \
- hv_message_intr, handle_interrupt_downcall
+ hv_message_intr
int_hand INT_DEV_INTR_DWNCL, DEV_INTR_DWNCL, \
- tile_dev_intr, handle_interrupt_downcall
+ tile_dev_intr
int_hand INT_I_ASID, I_ASID, bad_intr
int_hand INT_D_ASID, D_ASID, bad_intr
int_hand INT_DMATLB_MISS_DWNCL, DMATLB_MISS_DWNCL, \
- do_page_fault, handle_interrupt_downcall
+ do_page_fault
int_hand INT_SNITLB_MISS_DWNCL, SNITLB_MISS_DWNCL, \
- do_page_fault, handle_interrupt_downcall
+ do_page_fault
int_hand INT_DMATLB_ACCESS_DWNCL, DMATLB_ACCESS_DWNCL, \
- do_page_fault, handle_interrupt_downcall
+ do_page_fault
int_hand INT_SN_CPL, SN_CPL, bad_intr
int_hand INT_DOUBLE_FAULT, DOUBLE_FAULT, do_trap
-#if CHIP_HAS_AUX_PERF_COUNTERS()
int_hand INT_AUX_PERF_COUNT, AUX_PERF_COUNT, \
- op_handle_aux_perf_interrupt, handle_nmi
-#endif
+ handle_perf_interrupt, handle_nmi
/* Synthetic interrupt delivered only by the simulator */
int_hand INT_BREAKPOINT, BREAKPOINT, do_breakpoint