diff options
Diffstat (limited to 'arch/parisc/kernel/pacache.S')
| -rw-r--r-- | arch/parisc/kernel/pacache.S | 890 |
1 files changed, 549 insertions, 341 deletions
diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S index 77e03bc0f93..b743a80eaba 100644 --- a/arch/parisc/kernel/pacache.S +++ b/arch/parisc/kernel/pacache.S @@ -26,31 +26,22 @@ * can be used. */ -#ifdef __LP64__ -#define ADDIB addib,* -#define CMPB cmpb,* -#define ANDCM andcm,* - +#ifdef CONFIG_64BIT .level 2.0w #else -#define ADDIB addib, -#define CMPB cmpb, -#define ANDCM andcm - .level 2.0 #endif -#include <asm/assembly.h> #include <asm/psw.h> +#include <asm/assembly.h> #include <asm/pgtable.h> #include <asm/cache.h> +#include <linux/linkage.h> .text .align 128 - .export flush_tlb_all_local,code - -flush_tlb_all_local: +ENTRY(flush_tlb_all_local) .proc .callinfo NO_CALLS .entry @@ -62,37 +53,27 @@ flush_tlb_all_local: * to happen in real mode with all interruptions disabled. */ - /* - * Once again, we do the rfi dance ... some day we need examine - * all of our uses of this type of code and see what can be - * consolidated. - */ - - rsm PSW_SM_I, %r19 /* relied upon translation! PA 2.0 Arch. F-5 */ - nop + /* pcxt_ssm_bug - relied upon translation! PA 2.0 Arch. F-4 and F-5 */ + rsm PSW_SM_I, %r19 /* save I-bit state */ + load32 PA(1f), %r1 nop nop nop nop nop - nop - - rsm PSW_SM_Q, %r0 /* Turn off Q bit to load iia queue */ - ldil L%REAL_MODE_PSW, %r1 - ldo R%REAL_MODE_PSW(%r1), %r1 - mtctl %r1, %cr22 + + rsm PSW_SM_Q, %r0 /* prep to load iia queue */ mtctl %r0, %cr17 /* Clear IIASQ tail */ mtctl %r0, %cr17 /* Clear IIASQ head */ - ldil L%PA(1f), %r1 - ldo R%PA(1f)(%r1), %r1 mtctl %r1, %cr18 /* IIAOQ head */ ldo 4(%r1), %r1 mtctl %r1, %cr18 /* IIAOQ tail */ + load32 REAL_MODE_PSW, %r1 + mtctl %r1, %ipsw rfi nop -1: ldil L%PA(cache_info), %r1 - ldo R%PA(cache_info)(%r1), %r1 +1: load32 PA(cache_info), %r1 /* Flush Instruction Tlb */ @@ -104,7 +85,7 @@ flush_tlb_all_local: LDREG ITLB_OFF_COUNT(%r1), %arg2 LDREG ITLB_LOOP(%r1), %arg3 - ADDIB= -1, %arg3, fitoneloop /* Preadjust and test */ + addib,COND(=) -1, %arg3, fitoneloop /* Preadjust and test */ movb,<,n %arg3, %r31, fitdone /* If loop < 0, skip */ copy %arg0, %r28 /* Init base addr */ @@ -114,14 +95,14 @@ fitmanyloop: /* Loop if LOOP >= 2 */ copy %arg2, %r29 /* Init middle loop count */ fitmanymiddle: /* Loop if LOOP >= 2 */ - ADDIB> -1, %r31, fitmanymiddle /* Adjusted inner loop decr */ + addib,COND(>) -1, %r31, fitmanymiddle /* Adjusted inner loop decr */ pitlbe 0(%sr1, %r28) pitlbe,m %arg1(%sr1, %r28) /* Last pitlbe and addr adjust */ - ADDIB> -1, %r29, fitmanymiddle /* Middle loop decr */ + addib,COND(>) -1, %r29, fitmanymiddle /* Middle loop decr */ copy %arg3, %r31 /* Re-init inner loop count */ movb,tr %arg0, %r28, fitmanyloop /* Re-init base addr */ - ADDIB<=,n -1, %r22, fitdone /* Outer loop count decr */ + addib,COND(<=),n -1, %r22, fitdone /* Outer loop count decr */ fitoneloop: /* Loop if LOOP = 1 */ mtsp %r20, %sr1 @@ -129,10 +110,10 @@ fitoneloop: /* Loop if LOOP = 1 */ copy %arg2, %r29 /* init middle loop count */ fitonemiddle: /* Loop if LOOP = 1 */ - ADDIB> -1, %r29, fitonemiddle /* Middle loop count decr */ + addib,COND(>) -1, %r29, fitonemiddle /* Middle loop count decr */ pitlbe,m %arg1(%sr1, %r28) /* pitlbe for one loop */ - ADDIB> -1, %r22, fitoneloop /* Outer loop count decr */ + addib,COND(>) -1, %r22, fitoneloop /* Outer loop count decr */ add %r21, %r20, %r20 /* increment space */ fitdone: @@ -147,7 +128,7 @@ fitdone: LDREG DTLB_OFF_COUNT(%r1), %arg2 LDREG DTLB_LOOP(%r1), %arg3 - ADDIB= -1, %arg3, fdtoneloop /* Preadjust and test */ + addib,COND(=) -1, %arg3, fdtoneloop /* Preadjust and test */ movb,<,n %arg3, %r31, fdtdone /* If loop < 0, skip */ copy %arg0, %r28 /* Init base addr */ @@ -157,14 +138,14 @@ fdtmanyloop: /* Loop if LOOP >= 2 */ copy %arg2, %r29 /* Init middle loop count */ fdtmanymiddle: /* Loop if LOOP >= 2 */ - ADDIB> -1, %r31, fdtmanymiddle /* Adjusted inner loop decr */ + addib,COND(>) -1, %r31, fdtmanymiddle /* Adjusted inner loop decr */ pdtlbe 0(%sr1, %r28) pdtlbe,m %arg1(%sr1, %r28) /* Last pdtlbe and addr adjust */ - ADDIB> -1, %r29, fdtmanymiddle /* Middle loop decr */ + addib,COND(>) -1, %r29, fdtmanymiddle /* Middle loop decr */ copy %arg3, %r31 /* Re-init inner loop count */ movb,tr %arg0, %r28, fdtmanyloop /* Re-init base addr */ - ADDIB<=,n -1, %r22,fdtdone /* Outer loop count decr */ + addib,COND(<=),n -1, %r22,fdtdone /* Outer loop count decr */ fdtoneloop: /* Loop if LOOP = 1 */ mtsp %r20, %sr1 @@ -172,48 +153,53 @@ fdtoneloop: /* Loop if LOOP = 1 */ copy %arg2, %r29 /* init middle loop count */ fdtonemiddle: /* Loop if LOOP = 1 */ - ADDIB> -1, %r29, fdtonemiddle /* Middle loop count decr */ + addib,COND(>) -1, %r29, fdtonemiddle /* Middle loop count decr */ pdtlbe,m %arg1(%sr1, %r28) /* pdtlbe for one loop */ - ADDIB> -1, %r22, fdtoneloop /* Outer loop count decr */ + addib,COND(>) -1, %r22, fdtoneloop /* Outer loop count decr */ add %r21, %r20, %r20 /* increment space */ -fdtdone: - /* Switch back to virtual mode */ +fdtdone: + /* + * Switch back to virtual mode + */ + /* pcxt_ssm_bug */ + rsm PSW_SM_I, %r0 + load32 2f, %r1 + nop + nop + nop + nop + nop - rsm PSW_SM_Q, %r0 /* clear Q bit to load iia queue */ - ldil L%KERNEL_PSW, %r1 - ldo R%KERNEL_PSW(%r1), %r1 - or %r1, %r19, %r1 /* Set I bit if set on entry */ - mtctl %r1, %cr22 + rsm PSW_SM_Q, %r0 /* prep to load iia queue */ mtctl %r0, %cr17 /* Clear IIASQ tail */ mtctl %r0, %cr17 /* Clear IIASQ head */ - ldil L%(2f), %r1 - ldo R%(2f)(%r1), %r1 mtctl %r1, %cr18 /* IIAOQ head */ ldo 4(%r1), %r1 mtctl %r1, %cr18 /* IIAOQ tail */ + load32 KERNEL_PSW, %r1 + or %r1, %r19, %r1 /* I-bit to state on entry */ + mtctl %r1, %ipsw /* restore I-bit (entire PSW) */ rfi nop 2: bv %r0(%r2) nop - .exit + .exit .procend +ENDPROC(flush_tlb_all_local) - .export flush_instruction_cache_local,code .import cache_info,data -flush_instruction_cache_local: +ENTRY(flush_instruction_cache_local) .proc .callinfo NO_CALLS .entry - mtsp %r0, %sr1 - ldil L%cache_info, %r1 - ldo R%cache_info(%r1), %r1 + load32 cache_info, %r1 /* Flush Instruction Cache */ @@ -221,41 +207,66 @@ flush_instruction_cache_local: LDREG ICACHE_STRIDE(%r1), %arg1 LDREG ICACHE_COUNT(%r1), %arg2 LDREG ICACHE_LOOP(%r1), %arg3 - rsm PSW_SM_I, %r22 /* No mmgt ops during loop*/ - ADDIB= -1, %arg3, fioneloop /* Preadjust and test */ + rsm PSW_SM_I, %r22 /* No mmgt ops during loop*/ + mtsp %r0, %sr1 + addib,COND(=) -1, %arg3, fioneloop /* Preadjust and test */ movb,<,n %arg3, %r31, fisync /* If loop < 0, do sync */ fimanyloop: /* Loop if LOOP >= 2 */ - ADDIB> -1, %r31, fimanyloop /* Adjusted inner loop decr */ - fice 0(%sr1, %arg0) + addib,COND(>) -1, %r31, fimanyloop /* Adjusted inner loop decr */ + fice %r0(%sr1, %arg0) fice,m %arg1(%sr1, %arg0) /* Last fice and addr adjust */ movb,tr %arg3, %r31, fimanyloop /* Re-init inner loop count */ - ADDIB<=,n -1, %arg2, fisync /* Outer loop decr */ + addib,COND(<=),n -1, %arg2, fisync /* Outer loop decr */ fioneloop: /* Loop if LOOP = 1 */ - ADDIB> -1, %arg2, fioneloop /* Outer loop count decr */ + /* Some implementations may flush with a single fice instruction */ + cmpib,COND(>>=),n 15, %arg2, fioneloop2 + +fioneloop1: + fice,m %arg1(%sr1, %arg0) + fice,m %arg1(%sr1, %arg0) + fice,m %arg1(%sr1, %arg0) + fice,m %arg1(%sr1, %arg0) + fice,m %arg1(%sr1, %arg0) + fice,m %arg1(%sr1, %arg0) + fice,m %arg1(%sr1, %arg0) + fice,m %arg1(%sr1, %arg0) + fice,m %arg1(%sr1, %arg0) + fice,m %arg1(%sr1, %arg0) + fice,m %arg1(%sr1, %arg0) + fice,m %arg1(%sr1, %arg0) + fice,m %arg1(%sr1, %arg0) + fice,m %arg1(%sr1, %arg0) + fice,m %arg1(%sr1, %arg0) + addib,COND(>) -16, %arg2, fioneloop1 + fice,m %arg1(%sr1, %arg0) + + /* Check if done */ + cmpb,COND(=),n %arg2, %r0, fisync /* Predict branch taken */ + +fioneloop2: + addib,COND(>) -1, %arg2, fioneloop2 /* Outer loop count decr */ fice,m %arg1(%sr1, %arg0) /* Fice for one loop */ fisync: sync - mtsm %r22 + mtsm %r22 /* restore I-bit */ bv %r0(%r2) nop .exit .procend +ENDPROC(flush_instruction_cache_local) - .export flush_data_cache_local, code - .import cache_info, data -flush_data_cache_local: + .import cache_info, data +ENTRY(flush_data_cache_local) .proc .callinfo NO_CALLS .entry - mtsp %r0, %sr1 - ldil L%cache_info, %r1 - ldo R%cache_info(%r1), %r1 + load32 cache_info, %r1 /* Flush Data Cache */ @@ -263,53 +274,174 @@ flush_data_cache_local: LDREG DCACHE_STRIDE(%r1), %arg1 LDREG DCACHE_COUNT(%r1), %arg2 LDREG DCACHE_LOOP(%r1), %arg3 - rsm PSW_SM_I, %r22 - ADDIB= -1, %arg3, fdoneloop /* Preadjust and test */ + rsm PSW_SM_I, %r22 /* No mmgt ops during loop*/ + mtsp %r0, %sr1 + addib,COND(=) -1, %arg3, fdoneloop /* Preadjust and test */ movb,<,n %arg3, %r31, fdsync /* If loop < 0, do sync */ fdmanyloop: /* Loop if LOOP >= 2 */ - ADDIB> -1, %r31, fdmanyloop /* Adjusted inner loop decr */ - fdce 0(%sr1, %arg0) + addib,COND(>) -1, %r31, fdmanyloop /* Adjusted inner loop decr */ + fdce %r0(%sr1, %arg0) fdce,m %arg1(%sr1, %arg0) /* Last fdce and addr adjust */ movb,tr %arg3, %r31, fdmanyloop /* Re-init inner loop count */ - ADDIB<=,n -1, %arg2, fdsync /* Outer loop decr */ + addib,COND(<=),n -1, %arg2, fdsync /* Outer loop decr */ fdoneloop: /* Loop if LOOP = 1 */ - ADDIB> -1, %arg2, fdoneloop /* Outer loop count decr */ + /* Some implementations may flush with a single fdce instruction */ + cmpib,COND(>>=),n 15, %arg2, fdoneloop2 + +fdoneloop1: + fdce,m %arg1(%sr1, %arg0) + fdce,m %arg1(%sr1, %arg0) + fdce,m %arg1(%sr1, %arg0) + fdce,m %arg1(%sr1, %arg0) + fdce,m %arg1(%sr1, %arg0) + fdce,m %arg1(%sr1, %arg0) + fdce,m %arg1(%sr1, %arg0) + fdce,m %arg1(%sr1, %arg0) + fdce,m %arg1(%sr1, %arg0) + fdce,m %arg1(%sr1, %arg0) + fdce,m %arg1(%sr1, %arg0) + fdce,m %arg1(%sr1, %arg0) + fdce,m %arg1(%sr1, %arg0) + fdce,m %arg1(%sr1, %arg0) + fdce,m %arg1(%sr1, %arg0) + addib,COND(>) -16, %arg2, fdoneloop1 + fdce,m %arg1(%sr1, %arg0) + + /* Check if done */ + cmpb,COND(=),n %arg2, %r0, fdsync /* Predict branch taken */ + +fdoneloop2: + addib,COND(>) -1, %arg2, fdoneloop2 /* Outer loop count decr */ fdce,m %arg1(%sr1, %arg0) /* Fdce for one loop */ fdsync: syncdma sync - mtsm %r22 + mtsm %r22 /* restore I-bit */ bv %r0(%r2) nop .exit .procend +ENDPROC(flush_data_cache_local) - .export copy_user_page_asm,code .align 16 -copy_user_page_asm: +/* Macros to serialize TLB purge operations on SMP. */ + + .macro tlb_lock la,flags,tmp +#ifdef CONFIG_SMP + ldil L%pa_tlb_lock,%r1 + ldo R%pa_tlb_lock(%r1),\la + rsm PSW_SM_I,\flags +1: LDCW 0(\la),\tmp + cmpib,<>,n 0,\tmp,3f +2: ldw 0(\la),\tmp + cmpb,<> %r0,\tmp,1b + nop + b,n 2b +3: +#endif + .endm + + .macro tlb_unlock la,flags,tmp +#ifdef CONFIG_SMP + ldi 1,\tmp + stw \tmp,0(\la) + mtsm \flags +#endif + .endm + +/* Clear page using kernel mapping. */ + +ENTRY(clear_page_asm) .proc .callinfo NO_CALLS .entry -#ifdef __LP64__ +#ifdef CONFIG_64BIT + + /* Unroll the loop. */ + ldi (PAGE_SIZE / 128), %r1 + +1: + std %r0, 0(%r26) + std %r0, 8(%r26) + std %r0, 16(%r26) + std %r0, 24(%r26) + std %r0, 32(%r26) + std %r0, 40(%r26) + std %r0, 48(%r26) + std %r0, 56(%r26) + std %r0, 64(%r26) + std %r0, 72(%r26) + std %r0, 80(%r26) + std %r0, 88(%r26) + std %r0, 96(%r26) + std %r0, 104(%r26) + std %r0, 112(%r26) + std %r0, 120(%r26) + + /* Note reverse branch hint for addib is taken. */ + addib,COND(>),n -1, %r1, 1b + ldo 128(%r26), %r26 + +#else + + /* + * Note that until (if) we start saving the full 64-bit register + * values on interrupt, we can't use std on a 32 bit kernel. + */ + ldi (PAGE_SIZE / 64), %r1 + +1: + stw %r0, 0(%r26) + stw %r0, 4(%r26) + stw %r0, 8(%r26) + stw %r0, 12(%r26) + stw %r0, 16(%r26) + stw %r0, 20(%r26) + stw %r0, 24(%r26) + stw %r0, 28(%r26) + stw %r0, 32(%r26) + stw %r0, 36(%r26) + stw %r0, 40(%r26) + stw %r0, 44(%r26) + stw %r0, 48(%r26) + stw %r0, 52(%r26) + stw %r0, 56(%r26) + stw %r0, 60(%r26) + + addib,COND(>),n -1, %r1, 1b + ldo 64(%r26), %r26 +#endif + bv %r0(%r2) + nop + .exit + + .procend +ENDPROC(clear_page_asm) + +/* Copy page using kernel mapping. */ + +ENTRY(copy_page_asm) + .proc + .callinfo NO_CALLS + .entry + +#ifdef CONFIG_64BIT /* PA8x00 CPUs can consume 2 loads or 1 store per cycle. * Unroll the loop by hand and arrange insn appropriately. - * GCC probably can do this just as well. + * Prefetch doesn't improve performance on rp3440. + * GCC probably can do this just as well... */ - ldd 0(%r25), %r19 - ldi 32, %r1 /* PAGE_SIZE/128 == 32 */ - ldw 64(%r25), %r0 /* prefetch 1 cacheline ahead */ - ldw 128(%r25), %r0 /* prefetch 2 */ + ldi (PAGE_SIZE / 128), %r1 -1: ldd 8(%r25), %r20 - ldw 192(%r25), %r0 /* prefetch 3 */ - ldw 256(%r25), %r0 /* prefetch 4 */ +1: ldd 0(%r25), %r19 + ldd 8(%r25), %r20 ldd 16(%r25), %r21 ldd 24(%r25), %r22 @@ -343,16 +475,16 @@ copy_user_page_asm: ldd 112(%r25), %r21 ldd 120(%r25), %r22 + ldo 128(%r25), %r25 std %r19, 96(%r26) std %r20, 104(%r26) - ldo 128(%r25), %r25 std %r21, 112(%r26) std %r22, 120(%r26) - ldo 128(%r26), %r26 - ADDIB> -1, %r1, 1b /* bundle 10 */ - ldd 0(%r25), %r19 /* start next loads */ + /* Note reverse branch hint for addib is taken. */ + addib,COND(>),n -1, %r1, 1b + ldo 128(%r26), %r26 #else @@ -363,10 +495,10 @@ copy_user_page_asm: * the full 64 bit register values on interrupt, we can't * use ldd/std on a 32 bit kernel. */ - ldi 64, %r1 /* PAGE_SIZE/64 == 64 */ + ldw 0(%r25), %r19 + ldi (PAGE_SIZE / 64), %r1 1: - ldw 0(%r25), %r19 ldw 4(%r25), %r20 ldw 8(%r25), %r21 ldw 12(%r25), %r22 @@ -396,17 +528,19 @@ copy_user_page_asm: ldw 60(%r25), %r22 stw %r19, 48(%r26) stw %r20, 52(%r26) + ldo 64(%r25), %r25 stw %r21, 56(%r26) stw %r22, 60(%r26) ldo 64(%r26), %r26 - ADDIB> -1, %r1, 1b - ldo 64(%r25), %r25 + addib,COND(>),n -1, %r1, 1b + ldw 0(%r25), %r19 #endif bv %r0(%r2) nop .exit .procend +ENDPROC(copy_page_asm) /* * NOTE: Code in clear_user_page has a hard coded dependency on the @@ -429,7 +563,14 @@ copy_user_page_asm: * %r23 physical page (shifted for tlb insert) of "from" translation */ -#if 0 + /* Drop prot bits and convert to page addr for iitlbt and idtlbt */ + #define PAGE_ADD_SHIFT (PAGE_SHIFT-12) + .macro convert_phys_for_tlb_insert20 phys + extrd,u \phys, 56-PAGE_ADD_SHIFT, 32-PAGE_ADD_SHIFT, \phys +#if _PAGE_SIZE_ENCODING_DEFAULT + depdi _PAGE_SIZE_ENCODING_DEFAULT, 63, (63-58), \phys +#endif + .endm /* * We can't do this since copy_user_page is used to bring in @@ -442,42 +583,112 @@ copy_user_page_asm: * use it if more information is passed into copy_user_page(). * Have to do some measurements to see if it is worthwhile to * lobby for such a change. + * */ - .export copy_user_page_asm,code - -copy_user_page_asm: +ENTRY(copy_user_page_asm) .proc .callinfo NO_CALLS .entry + /* Convert virtual `to' and `from' addresses to physical addresses. + Move `from' physical address to non shadowed register. */ ldil L%(__PAGE_OFFSET), %r1 sub %r26, %r1, %r26 - sub %r25, %r1, %r23 /* move physical addr into non shadowed reg */ + sub %r25, %r1, %r23 ldil L%(TMPALIAS_MAP_START), %r28 -#ifdef __LP64__ - extrd,u %r26,56,32, %r26 /* convert phys addr to tlb insert format */ - extrd,u %r23,56,32, %r23 /* convert phys addr to tlb insert format */ - depd %r24,63,22, %r28 /* Form aliased virtual address 'to' */ - depdi 0, 63,12, %r28 /* Clear any offset bits */ +#ifdef CONFIG_64BIT +#if (TMPALIAS_MAP_START >= 0x80000000) + depdi 0, 31,32, %r28 /* clear any sign extension */ +#endif + convert_phys_for_tlb_insert20 %r26 /* convert phys addr to tlb insert format */ + convert_phys_for_tlb_insert20 %r23 /* convert phys addr to tlb insert format */ + depd %r24,63,22, %r28 /* Form aliased virtual address 'to' */ + depdi 0, 63,PAGE_SHIFT, %r28 /* Clear any offset bits */ copy %r28, %r29 depdi 1, 41,1, %r29 /* Form aliased virtual address 'from' */ #else extrw,u %r26, 24,25, %r26 /* convert phys addr to tlb insert format */ extrw,u %r23, 24,25, %r23 /* convert phys addr to tlb insert format */ depw %r24, 31,22, %r28 /* Form aliased virtual address 'to' */ - depwi 0, 31,12, %r28 /* Clear any offset bits */ + depwi 0, 31,PAGE_SHIFT, %r28 /* Clear any offset bits */ copy %r28, %r29 depwi 1, 9,1, %r29 /* Form aliased virtual address 'from' */ #endif /* Purge any old translations */ +#ifdef CONFIG_PA20 + pdtlb,l 0(%r28) + pdtlb,l 0(%r29) +#else + tlb_lock %r20,%r21,%r22 pdtlb 0(%r28) pdtlb 0(%r29) + tlb_unlock %r20,%r21,%r22 +#endif + +#ifdef CONFIG_64BIT + /* PA8x00 CPUs can consume 2 loads or 1 store per cycle. + * Unroll the loop by hand and arrange insn appropriately. + * GCC probably can do this just as well. + */ - ldi 64, %r1 + ldd 0(%r29), %r19 + ldi (PAGE_SIZE / 128), %r1 + +1: ldd 8(%r29), %r20 + + ldd 16(%r29), %r21 + ldd 24(%r29), %r22 + std %r19, 0(%r28) + std %r20, 8(%r28) + + ldd 32(%r29), %r19 + ldd 40(%r29), %r20 + std %r21, 16(%r28) + std %r22, 24(%r28) + + ldd 48(%r29), %r21 + ldd 56(%r29), %r22 + std %r19, 32(%r28) + std %r20, 40(%r28) + + ldd 64(%r29), %r19 + ldd 72(%r29), %r20 + std %r21, 48(%r28) + std %r22, 56(%r28) + + ldd 80(%r29), %r21 + ldd 88(%r29), %r22 + std %r19, 64(%r28) + std %r20, 72(%r28) + + ldd 96(%r29), %r19 + ldd 104(%r29), %r20 + std %r21, 80(%r28) + std %r22, 88(%r28) + + ldd 112(%r29), %r21 + ldd 120(%r29), %r22 + std %r19, 96(%r28) + std %r20, 104(%r28) + + ldo 128(%r29), %r29 + std %r21, 112(%r28) + std %r22, 120(%r28) + ldo 128(%r28), %r28 + + /* conditional branches nullify on forward taken branch, and on + * non-taken backward branch. Note that .+4 is a backwards branch. + * The ldd should only get executed if the branch is taken. + */ + addib,COND(>),n -1, %r1, 1b /* bundle 10 */ + ldd 0(%r29), %r19 /* start next loads */ + +#else + ldi (PAGE_SIZE / 64), %r1 /* * This loop is optimized for PCXL/PCXL2 ldw/ldw and stw/stw @@ -488,9 +699,7 @@ copy_user_page_asm: * use ldd/std on a 32 bit kernel. */ - -1: - ldw 0(%r29), %r19 +1: ldw 0(%r29), %r19 ldw 4(%r29), %r20 ldw 8(%r29), %r21 ldw 12(%r29), %r22 @@ -523,19 +732,19 @@ copy_user_page_asm: stw %r21, 56(%r28) stw %r22, 60(%r28) ldo 64(%r28), %r28 - ADDIB> -1, %r1,1b + + addib,COND(>) -1, %r1,1b ldo 64(%r29), %r29 +#endif bv %r0(%r2) nop .exit .procend -#endif +ENDPROC(copy_user_page_asm) - .export __clear_user_page_asm,code - -__clear_user_page_asm: +ENTRY(clear_user_page_asm) .proc .callinfo NO_CALLS .entry @@ -543,28 +752,34 @@ __clear_user_page_asm: tophys_r1 %r26 ldil L%(TMPALIAS_MAP_START), %r28 -#ifdef __LP64__ +#ifdef CONFIG_64BIT #if (TMPALIAS_MAP_START >= 0x80000000) depdi 0, 31,32, %r28 /* clear any sign extension */ #endif - extrd,u %r26, 56,32, %r26 /* convert phys addr to tlb insert format */ + convert_phys_for_tlb_insert20 %r26 /* convert phys addr to tlb insert format */ depd %r25, 63,22, %r28 /* Form aliased virtual address 'to' */ - depdi 0, 63,12, %r28 /* Clear any offset bits */ + depdi 0, 63,PAGE_SHIFT, %r28 /* Clear any offset bits */ #else extrw,u %r26, 24,25, %r26 /* convert phys addr to tlb insert format */ depw %r25, 31,22, %r28 /* Form aliased virtual address 'to' */ - depwi 0, 31,12, %r28 /* Clear any offset bits */ + depwi 0, 31,PAGE_SHIFT, %r28 /* Clear any offset bits */ #endif /* Purge any old translation */ +#ifdef CONFIG_PA20 + pdtlb,l 0(%r28) +#else + tlb_lock %r20,%r21,%r22 pdtlb 0(%r28) + tlb_unlock %r20,%r21,%r22 +#endif -#ifdef __LP64__ - ldi 32, %r1 /* PAGE_SIZE/128 == 32 */ +#ifdef CONFIG_64BIT + ldi (PAGE_SIZE / 128), %r1 /* PREFETCH (Write) has not (yet) been proven to help here */ -/* #define PREFETCHW_OP ldd 256(%0), %r0 */ + /* #define PREFETCHW_OP ldd 256(%0), %r0 */ 1: std %r0, 0(%r28) std %r0, 8(%r28) @@ -582,15 +797,13 @@ __clear_user_page_asm: std %r0, 104(%r28) std %r0, 112(%r28) std %r0, 120(%r28) - ADDIB> -1, %r1, 1b + addib,COND(>) -1, %r1, 1b ldo 128(%r28), %r28 -#else /* ! __LP64 */ - - ldi 64, %r1 /* PAGE_SIZE/64 == 64 */ +#else /* ! CONFIG_64BIT */ + ldi (PAGE_SIZE / 64), %r1 -1: - stw %r0, 0(%r28) +1: stw %r0, 0(%r28) stw %r0, 4(%r28) stw %r0, 8(%r28) stw %r0, 12(%r28) @@ -606,107 +819,172 @@ __clear_user_page_asm: stw %r0, 52(%r28) stw %r0, 56(%r28) stw %r0, 60(%r28) - ADDIB> -1, %r1, 1b + addib,COND(>) -1, %r1, 1b ldo 64(%r28), %r28 -#endif /* __LP64 */ +#endif /* CONFIG_64BIT */ bv %r0(%r2) nop .exit .procend +ENDPROC(clear_user_page_asm) - .export flush_kernel_dcache_page - -flush_kernel_dcache_page: +ENTRY(flush_dcache_page_asm) .proc .callinfo NO_CALLS .entry + ldil L%(TMPALIAS_MAP_START), %r28 +#ifdef CONFIG_64BIT +#if (TMPALIAS_MAP_START >= 0x80000000) + depdi 0, 31,32, %r28 /* clear any sign extension */ +#endif + convert_phys_for_tlb_insert20 %r26 /* convert phys addr to tlb insert format */ + depd %r25, 63,22, %r28 /* Form aliased virtual address 'to' */ + depdi 0, 63,PAGE_SHIFT, %r28 /* Clear any offset bits */ +#else + extrw,u %r26, 24,25, %r26 /* convert phys addr to tlb insert format */ + depw %r25, 31,22, %r28 /* Form aliased virtual address 'to' */ + depwi 0, 31,PAGE_SHIFT, %r28 /* Clear any offset bits */ +#endif + + /* Purge any old translation */ + +#ifdef CONFIG_PA20 + pdtlb,l 0(%r28) +#else + tlb_lock %r20,%r21,%r22 + pdtlb 0(%r28) + tlb_unlock %r20,%r21,%r22 +#endif + ldil L%dcache_stride, %r1 - ldw R%dcache_stride(%r1), %r23 + ldw R%dcache_stride(%r1), r31 -#ifdef __LP64__ +#ifdef CONFIG_64BIT depdi,z 1, 63-PAGE_SHIFT,1, %r25 #else depwi,z 1, 31-PAGE_SHIFT,1, %r25 #endif - add %r26, %r25, %r25 - sub %r25, %r23, %r25 + add %r28, %r25, %r25 + sub %r25, r31, %r25 + + +1: fdc,m r31(%r28) + fdc,m r31(%r28) + fdc,m r31(%r28) + fdc,m r31(%r28) + fdc,m r31(%r28) + fdc,m r31(%r28) + fdc,m r31(%r28) + fdc,m r31(%r28) + fdc,m r31(%r28) + fdc,m r31(%r28) + fdc,m r31(%r28) + fdc,m r31(%r28) + fdc,m r31(%r28) + fdc,m r31(%r28) + fdc,m r31(%r28) + cmpb,COND(<<) %r28, %r25,1b + fdc,m r31(%r28) + sync -1: fdc,m %r23(%r26) - fdc,m %r23(%r26) - fdc,m %r23(%r26) - fdc,m %r23(%r26) - fdc,m %r23(%r26) - fdc,m %r23(%r26) - fdc,m %r23(%r26) - fdc,m %r23(%r26) - fdc,m %r23(%r26) - fdc,m %r23(%r26) - fdc,m %r23(%r26) - fdc,m %r23(%r26) - fdc,m %r23(%r26) - fdc,m %r23(%r26) - fdc,m %r23(%r26) - CMPB<< %r26, %r25,1b - fdc,m %r23(%r26) +#ifdef CONFIG_PA20 + pdtlb,l 0(%r25) +#else + tlb_lock %r20,%r21,%r22 + pdtlb 0(%r25) + tlb_unlock %r20,%r21,%r22 +#endif - sync bv %r0(%r2) nop .exit .procend - - .export flush_user_dcache_page +ENDPROC(flush_dcache_page_asm) -flush_user_dcache_page: +ENTRY(flush_icache_page_asm) .proc .callinfo NO_CALLS .entry - ldil L%dcache_stride, %r1 - ldw R%dcache_stride(%r1), %r23 + ldil L%(TMPALIAS_MAP_START), %r28 +#ifdef CONFIG_64BIT +#if (TMPALIAS_MAP_START >= 0x80000000) + depdi 0, 31,32, %r28 /* clear any sign extension */ +#endif + convert_phys_for_tlb_insert20 %r26 /* convert phys addr to tlb insert format */ + depd %r25, 63,22, %r28 /* Form aliased virtual address 'to' */ + depdi 0, 63,PAGE_SHIFT, %r28 /* Clear any offset bits */ +#else + extrw,u %r26, 24,25, %r26 /* convert phys addr to tlb insert format */ + depw %r25, 31,22, %r28 /* Form aliased virtual address 'to' */ + depwi 0, 31,PAGE_SHIFT, %r28 /* Clear any offset bits */ +#endif + + /* Purge any old translation */ -#ifdef __LP64__ - depdi,z 1,63-PAGE_SHIFT,1, %r25 +#ifdef CONFIG_PA20 + pitlb,l %r0(%sr4,%r28) #else - depwi,z 1,31-PAGE_SHIFT,1, %r25 + tlb_lock %r20,%r21,%r22 + pitlb (%sr4,%r28) + tlb_unlock %r20,%r21,%r22 #endif - add %r26, %r25, %r25 - sub %r25, %r23, %r25 + ldil L%icache_stride, %r1 + ldw R%icache_stride(%r1), %r31 -1: fdc,m %r23(%sr3, %r26) - fdc,m %r23(%sr3, %r26) - fdc,m %r23(%sr3, %r26) - fdc,m %r23(%sr3, %r26) - fdc,m %r23(%sr3, %r26) - fdc,m %r23(%sr3, %r26) - fdc,m %r23(%sr3, %r26) - fdc,m %r23(%sr3, %r26) - fdc,m %r23(%sr3, %r26) - fdc,m %r23(%sr3, %r26) - fdc,m %r23(%sr3, %r26) - fdc,m %r23(%sr3, %r26) - fdc,m %r23(%sr3, %r26) - fdc,m %r23(%sr3, %r26) - fdc,m %r23(%sr3, %r26) - CMPB<< %r26, %r25,1b - fdc,m %r23(%sr3, %r26) +#ifdef CONFIG_64BIT + depdi,z 1, 63-PAGE_SHIFT,1, %r25 +#else + depwi,z 1, 31-PAGE_SHIFT,1, %r25 +#endif + add %r28, %r25, %r25 + sub %r25, %r31, %r25 + + + /* fic only has the type 26 form on PA1.1, requiring an + * explicit space specification, so use %sr4 */ +1: fic,m %r31(%sr4,%r28) + fic,m %r31(%sr4,%r28) + fic,m %r31(%sr4,%r28) + fic,m %r31(%sr4,%r28) + fic,m %r31(%sr4,%r28) + fic,m %r31(%sr4,%r28) + fic,m %r31(%sr4,%r28) + fic,m %r31(%sr4,%r28) + fic,m %r31(%sr4,%r28) + fic,m %r31(%sr4,%r28) + fic,m %r31(%sr4,%r28) + fic,m %r31(%sr4,%r28) + fic,m %r31(%sr4,%r28) + fic,m %r31(%sr4,%r28) + fic,m %r31(%sr4,%r28) + cmpb,COND(<<) %r28, %r25,1b + fic,m %r31(%sr4,%r28) sync + +#ifdef CONFIG_PA20 + pitlb,l %r0(%sr4,%r25) +#else + tlb_lock %r20,%r21,%r22 + pitlb (%sr4,%r25) + tlb_unlock %r20,%r21,%r22 +#endif + bv %r0(%r2) nop .exit .procend +ENDPROC(flush_icache_page_asm) - .export flush_user_icache_page - -flush_user_icache_page: +ENTRY(flush_kernel_dcache_page_asm) .proc .callinfo NO_CALLS .entry @@ -714,7 +992,7 @@ flush_user_icache_page: ldil L%dcache_stride, %r1 ldw R%dcache_stride(%r1), %r23 -#ifdef __LP64__ +#ifdef CONFIG_64BIT depdi,z 1, 63-PAGE_SHIFT,1, %r25 #else depwi,z 1, 31-PAGE_SHIFT,1, %r25 @@ -723,23 +1001,23 @@ flush_user_icache_page: sub %r25, %r23, %r25 -1: fic,m %r23(%sr3, %r26) - fic,m %r23(%sr3, %r26) - fic,m %r23(%sr3, %r26) - fic,m %r23(%sr3, %r26) - fic,m %r23(%sr3, %r26) - fic,m %r23(%sr3, %r26) - fic,m %r23(%sr3, %r26) - fic,m %r23(%sr3, %r26) - fic,m %r23(%sr3, %r26) - fic,m %r23(%sr3, %r26) - fic,m %r23(%sr3, %r26) - fic,m %r23(%sr3, %r26) - fic,m %r23(%sr3, %r26) - fic,m %r23(%sr3, %r26) - fic,m %r23(%sr3, %r26) - CMPB<< %r26, %r25,1b - fic,m %r23(%sr3, %r26) +1: fdc,m %r23(%r26) + fdc,m %r23(%r26) + fdc,m %r23(%r26) + fdc,m %r23(%r26) + fdc,m %r23(%r26) + fdc,m %r23(%r26) + fdc,m %r23(%r26) + fdc,m %r23(%r26) + fdc,m %r23(%r26) + fdc,m %r23(%r26) + fdc,m %r23(%r26) + fdc,m %r23(%r26) + fdc,m %r23(%r26) + fdc,m %r23(%r26) + fdc,m %r23(%r26) + cmpb,COND(<<) %r26, %r25,1b + fdc,m %r23(%r26) sync bv %r0(%r2) @@ -747,11 +1025,9 @@ flush_user_icache_page: .exit .procend +ENDPROC(flush_kernel_dcache_page_asm) - - .export purge_kernel_dcache_page - -purge_kernel_dcache_page: +ENTRY(purge_kernel_dcache_page_asm) .proc .callinfo NO_CALLS .entry @@ -759,7 +1035,7 @@ purge_kernel_dcache_page: ldil L%dcache_stride, %r1 ldw R%dcache_stride(%r1), %r23 -#ifdef __LP64__ +#ifdef CONFIG_64BIT depdi,z 1, 63-PAGE_SHIFT,1, %r25 #else depwi,z 1, 31-PAGE_SHIFT,1, %r25 @@ -782,7 +1058,7 @@ purge_kernel_dcache_page: pdc,m %r23(%r26) pdc,m %r23(%r26) pdc,m %r23(%r26) - CMPB<< %r26, %r25, 1b + cmpb,COND(<<) %r26, %r25, 1b pdc,m %r23(%r26) sync @@ -791,76 +1067,9 @@ purge_kernel_dcache_page: .exit .procend +ENDPROC(purge_kernel_dcache_page_asm) -#if 0 - /* Currently not used, but it still is a possible alternate - * solution. - */ - - .export flush_alias_page - -flush_alias_page: - .proc - .callinfo NO_CALLS - .entry - - tophys_r1 %r26 - - ldil L%(TMPALIAS_MAP_START), %r28 -#ifdef __LP64__ - extrd,u %r26, 56,32, %r26 /* convert phys addr to tlb insert format */ - depd %r25, 63,22, %r28 /* Form aliased virtual address 'to' */ - depdi 0, 63,12, %r28 /* Clear any offset bits */ -#else - extrw,u %r26, 24,25, %r26 /* convert phys addr to tlb insert format */ - depw %r25, 31,22, %r28 /* Form aliased virtual address 'to' */ - depwi 0, 31,12, %r28 /* Clear any offset bits */ -#endif - - /* Purge any old translation */ - - pdtlb 0(%r28) - - ldil L%dcache_stride, %r1 - ldw R%dcache_stride(%r1), %r23 - -#ifdef __LP64__ - depdi,z 1, 63-PAGE_SHIFT,1, %r29 -#else - depwi,z 1, 31-PAGE_SHIFT,1, %r29 -#endif - add %r28, %r29, %r29 - sub %r29, %r23, %r29 - -1: fdc,m %r23(%r28) - fdc,m %r23(%r28) - fdc,m %r23(%r28) - fdc,m %r23(%r28) - fdc,m %r23(%r28) - fdc,m %r23(%r28) - fdc,m %r23(%r28) - fdc,m %r23(%r28) - fdc,m %r23(%r28) - fdc,m %r23(%r28) - fdc,m %r23(%r28) - fdc,m %r23(%r28) - fdc,m %r23(%r28) - fdc,m %r23(%r28) - fdc,m %r23(%r28) - CMPB<< %r28, %r29, 1b - fdc,m %r23(%r28) - - sync - bv %r0(%r2) - nop - .exit - - .procend -#endif - - .export flush_user_dcache_range_asm - -flush_user_dcache_range_asm: +ENTRY(flush_user_dcache_range_asm) .proc .callinfo NO_CALLS .entry @@ -870,7 +1079,7 @@ flush_user_dcache_range_asm: ldo -1(%r23), %r21 ANDCM %r26, %r21, %r26 -1: CMPB<<,n %r26, %r25, 1b +1: cmpb,COND(<<),n %r26, %r25, 1b fdc,m %r23(%sr3, %r26) sync @@ -879,10 +1088,9 @@ flush_user_dcache_range_asm: .exit .procend +ENDPROC(flush_user_dcache_range_asm) - .export flush_kernel_dcache_range_asm - -flush_kernel_dcache_range_asm: +ENTRY(flush_kernel_dcache_range_asm) .proc .callinfo NO_CALLS .entry @@ -892,7 +1100,7 @@ flush_kernel_dcache_range_asm: ldo -1(%r23), %r21 ANDCM %r26, %r21, %r26 -1: CMPB<<,n %r26, %r25,1b +1: cmpb,COND(<<),n %r26, %r25,1b fdc,m %r23(%r26) sync @@ -902,10 +1110,9 @@ flush_kernel_dcache_range_asm: .exit .procend +ENDPROC(flush_kernel_dcache_range_asm) - .export flush_user_icache_range_asm - -flush_user_icache_range_asm: +ENTRY(flush_user_icache_range_asm) .proc .callinfo NO_CALLS .entry @@ -915,7 +1122,7 @@ flush_user_icache_range_asm: ldo -1(%r23), %r21 ANDCM %r26, %r21, %r26 -1: CMPB<<,n %r26, %r25,1b +1: cmpb,COND(<<),n %r26, %r25,1b fic,m %r23(%sr3, %r26) sync @@ -924,10 +1131,9 @@ flush_user_icache_range_asm: .exit .procend +ENDPROC(flush_user_icache_range_asm) - .export flush_kernel_icache_page - -flush_kernel_icache_page: +ENTRY(flush_kernel_icache_page) .proc .callinfo NO_CALLS .entry @@ -935,7 +1141,7 @@ flush_kernel_icache_page: ldil L%icache_stride, %r1 ldw R%icache_stride(%r1), %r23 -#ifdef __LP64__ +#ifdef CONFIG_64BIT depdi,z 1, 63-PAGE_SHIFT,1, %r25 #else depwi,z 1, 31-PAGE_SHIFT,1, %r25 @@ -944,23 +1150,23 @@ flush_kernel_icache_page: sub %r25, %r23, %r25 -1: fic,m %r23(%r26) - fic,m %r23(%r26) - fic,m %r23(%r26) - fic,m %r23(%r26) - fic,m %r23(%r26) - fic,m %r23(%r26) - fic,m %r23(%r26) - fic,m %r23(%r26) - fic,m %r23(%r26) - fic,m %r23(%r26) - fic,m %r23(%r26) - fic,m %r23(%r26) - fic,m %r23(%r26) - fic,m %r23(%r26) - fic,m %r23(%r26) - CMPB<< %r26, %r25, 1b - fic,m %r23(%r26) +1: fic,m %r23(%sr4, %r26) + fic,m %r23(%sr4, %r26) + fic,m %r23(%sr4, %r26) + fic,m %r23(%sr4, %r26) + fic,m %r23(%sr4, %r26) + fic,m %r23(%sr4, %r26) + fic,m %r23(%sr4, %r26) + fic,m %r23(%sr4, %r26) + fic,m %r23(%sr4, %r26) + fic,m %r23(%sr4, %r26) + fic,m %r23(%sr4, %r26) + fic,m %r23(%sr4, %r26) + fic,m %r23(%sr4, %r26) + fic,m %r23(%sr4, %r26) + fic,m %r23(%sr4, %r26) + cmpb,COND(<<) %r26, %r25, 1b + fic,m %r23(%sr4, %r26) sync bv %r0(%r2) @@ -968,10 +1174,9 @@ flush_kernel_icache_page: .exit .procend +ENDPROC(flush_kernel_icache_page) - .export flush_kernel_icache_range_asm - -flush_kernel_icache_range_asm: +ENTRY(flush_kernel_icache_range_asm) .proc .callinfo NO_CALLS .entry @@ -981,47 +1186,45 @@ flush_kernel_icache_range_asm: ldo -1(%r23), %r21 ANDCM %r26, %r21, %r26 -1: CMPB<<,n %r26, %r25, 1b - fic,m %r23(%r26) +1: cmpb,COND(<<),n %r26, %r25, 1b + fic,m %r23(%sr4, %r26) sync bv %r0(%r2) nop .exit - .procend +ENDPROC(flush_kernel_icache_range_asm) - .align 128 - - .export disable_sr_hashing_asm,code - -disable_sr_hashing_asm: + /* align should cover use of rfi in disable_sr_hashing_asm and + * srdis_done. + */ + .align 256 +ENTRY(disable_sr_hashing_asm) .proc .callinfo NO_CALLS .entry - /* Switch to real mode */ - - ssm 0, %r0 /* relied upon translation! */ - nop - nop + /* + * Switch to real mode + */ + /* pcxt_ssm_bug */ + rsm PSW_SM_I, %r0 + load32 PA(1f), %r1 nop nop nop nop nop - - rsm (PSW_SM_Q|PSW_SM_I), %r0 /* disable Q&I to load the iia queue */ - ldil L%REAL_MODE_PSW, %r1 - ldo R%REAL_MODE_PSW(%r1), %r1 - mtctl %r1, %cr22 + + rsm PSW_SM_Q, %r0 /* prep to load iia queue */ mtctl %r0, %cr17 /* Clear IIASQ tail */ mtctl %r0, %cr17 /* Clear IIASQ head */ - ldil L%PA(1f), %r1 - ldo R%PA(1f)(%r1), %r1 mtctl %r1, %cr18 /* IIAOQ head */ ldo 4(%r1), %r1 mtctl %r1, %cr18 /* IIAOQ tail */ + load32 REAL_MODE_PSW, %r1 + mtctl %r1, %ipsw rfi nop @@ -1053,27 +1256,31 @@ srdis_pcxl: srdis_pa20: - /* Disable Space Register Hashing for PCXU,PCXU+,PCXW,PCXW+ */ + /* Disable Space Register Hashing for PCXU,PCXU+,PCXW,PCXW+,PCXW2 */ .word 0x144008bc /* mfdiag %dr2, %r28 */ depdi 0, 54,1, %r28 /* clear DIAG_SPHASH_ENAB (bit 54) */ .word 0x145c1840 /* mtdiag %r28, %dr2 */ -srdis_done: +srdis_done: /* Switch back to virtual mode */ + rsm PSW_SM_I, %r0 /* prep to load iia queue */ + load32 2f, %r1 + nop + nop + nop + nop + nop - rsm PSW_SM_Q, %r0 /* clear Q bit to load iia queue */ - ldil L%KERNEL_PSW, %r1 - ldo R%KERNEL_PSW(%r1), %r1 - mtctl %r1, %cr22 + rsm PSW_SM_Q, %r0 /* prep to load iia queue */ mtctl %r0, %cr17 /* Clear IIASQ tail */ mtctl %r0, %cr17 /* Clear IIASQ head */ - ldil L%(2f), %r1 - ldo R%(2f)(%r1), %r1 mtctl %r1, %cr18 /* IIAOQ head */ ldo 4(%r1), %r1 mtctl %r1, %cr18 /* IIAOQ tail */ + load32 KERNEL_PSW, %r1 + mtctl %r1, %ipsw rfi nop @@ -1082,5 +1289,6 @@ srdis_done: .exit .procend +ENDPROC(disable_sr_hashing_asm) .end |
