diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-07-23 18:54:23 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-07-23 18:54:23 -0700 |
commit | 83c7f72259ea4bd0561e2f2762d97ee2888126ce (patch) | |
tree | c8b181e9f9a0bb061f5ab63fabb98197d7aee19a /arch/powerpc/kernel | |
parent | e05644e17e744315bce12b0948cdc36910b9a76e (diff) | |
parent | 574ce79cea9d3fda109ffcc82f81733de4740e5c (diff) |
Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc
Pull powerpc updates from Benjamin Herrenschmidt:
"Notable highlights:
- iommu improvements from Anton removing the per-iommu global lock in
favor of dividing the DMA space into pools, each with its own lock,
and hashed on the CPU number. Along with making the locking more
fine grained, this gives significant improvements in multiqueue
networking scalability.
- Still from Anton, we know provide a vdso based variant of getcpu
which makes sched_getcpu with the appropriate glibc patch something
like 18 times faster.
- More anton goodness (he's been busy !) in other areas such as a
faster __clear_user and copy_page on P7, various perf fixes to
improve sampling quality, etc...
- One more step toward removing legacy i2c interfaces by using new
device-tree based probing of platform devices for the AOA audio
drivers
- A nice series of patches from Michael Neuling that helps avoiding
confusion between register numbers and litterals in assembly code,
trying to enforce the use of "%rN" register names in gas rather
than plain numbers.
- A pile of FSL updates
- The usual bunch of small fixes, cleanups etc...
You may spot a change to drivers/char/mem. The patch got no comment
or ack from outside, it's a trivial patch to allow the architecture to
skip creating /dev/port, which we use to disable it on ppc64 that
don't have a legacy brige. On those, IO ports 0...64K are not mapped
in kernel space at all, so accesses to /dev/port cause oopses (and
yes, distros -still- ship userspace that bangs hard coded ports such
as kbdrate)."
* 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc: (106 commits)
powerpc/mpic: Create a revmap with enough entries for IPIs and timers
Remove stale .rej file
powerpc/iommu: Fix iommu pool initialization
powerpc/eeh: Check handle_eeh_events() return value
powerpc/85xx: Add phy nodes in SGMII mode for MPC8536/44/72DS & P2020DS
powerpc/e500: add paravirt QEMU platform
powerpc/mpc85xx_ds: convert to unified PCI init
powerpc/fsl-pci: get PCI init out of board files
powerpc/85xx: Update corenet64_smp_defconfig
powerpc/85xx: Update corenet32_smp_defconfig
powerpc/85xx: Rename P1021RDB-PC device trees to be consistent
powerpc/watchdog: move booke watchdog param related code to setup-common.c
sound/aoa: Adapt to new i2c probing scheme
i2c/powermac: Improve detection of devices from device-tree
powerpc: Disable /dev/port interface on systems without an ISA bridge
of: Improve prom_update_property() function
powerpc: Add "memory" attribute for mfmsr()
powerpc/ftrace: Fix assembly trampoline register usage
powerpc/hw_breakpoints: Fix incorrect pointer access
powerpc: Put the gpr save/restore functions in their own section
...
Diffstat (limited to 'arch/powerpc/kernel')
32 files changed, 555 insertions, 210 deletions
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 52c7ad78242..85b05c463fa 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -533,6 +533,7 @@ int main(void) HSTATE_FIELD(HSTATE_VMHANDLER, vmhandler); HSTATE_FIELD(HSTATE_SCRATCH0, scratch0); HSTATE_FIELD(HSTATE_SCRATCH1, scratch1); + HSTATE_FIELD(HSTATE_SPRG3, sprg3); HSTATE_FIELD(HSTATE_IN_GUEST, in_guest); HSTATE_FIELD(HSTATE_RESTORE_HID5, restore_hid5); HSTATE_FIELD(HSTATE_NAPPING, napping); diff --git a/arch/powerpc/kernel/cpu_setup_a2.S b/arch/powerpc/kernel/cpu_setup_a2.S index ebc62f42a23..61f079e05b6 100644 --- a/arch/powerpc/kernel/cpu_setup_a2.S +++ b/arch/powerpc/kernel/cpu_setup_a2.S @@ -100,19 +100,19 @@ _icswx_skip_guest: lis r4,(MMUCR0_TLBSEL_I|MMUCR0_ECL)@h mtspr SPRN_MMUCR0, r4 li r4,A2_IERAT_SIZE-1 - PPC_ERATWE(r4,r4,3) + PPC_ERATWE(R4,R4,3) /* Now set the D-ERAT watermark to 31 */ lis r4,(MMUCR0_TLBSEL_D|MMUCR0_ECL)@h mtspr SPRN_MMUCR0, r4 li r4,A2_DERAT_SIZE-1 - PPC_ERATWE(r4,r4,3) + PPC_ERATWE(R4,R4,3) /* And invalidate the beast just in case. That won't get rid of * a bolted entry though it will be in LRU and so will go away eventually * but let's not bother for now */ - PPC_ERATILX(0,0,0) + PPC_ERATILX(0,0,R0) 1: blr diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index b1ec983dcec..289be751cd7 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -11,6 +11,8 @@ #include <linux/gfp.h> #include <linux/memblock.h> #include <linux/export.h> +#include <linux/pci.h> +#include <asm/vio.h> #include <asm/bug.h> #include <asm/abs_addr.h> #include <asm/machdep.h> @@ -205,7 +207,13 @@ EXPORT_SYMBOL_GPL(dma_get_required_mask); static int __init dma_init(void) { - dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); + dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); +#ifdef CONFIG_PCI + dma_debug_add_bus(&pci_bus_type); +#endif +#ifdef CONFIG_IBMVIO + dma_debug_add_bus(&vio_bus_type); +#endif return 0; } diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index ba3aeb4bc06..5207d5a405e 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -92,7 +92,7 @@ crit_transfer_to_handler: mfspr r8,SPRN_SPRG_THREAD lwz r0,KSP_LIMIT(r8) stw r0,SAVED_KSP_LIMIT(r11) - rlwimi r0,r1,0,0,(31-THREAD_SHIFT) + CURRENT_THREAD_INFO(r0, r1) stw r0,KSP_LIMIT(r8) /* fall through */ #endif @@ -112,7 +112,7 @@ crit_transfer_to_handler: mfspr r8,SPRN_SPRG_THREAD lwz r0,KSP_LIMIT(r8) stw r0,saved_ksp_limit@l(0) - rlwimi r0,r1,0,0,(31-THREAD_SHIFT) + CURRENT_THREAD_INFO(r0, r1) stw r0,KSP_LIMIT(r8) /* fall through */ #endif @@ -158,7 +158,7 @@ transfer_to_handler: tophys(r11,r11) addi r11,r11,global_dbcr0@l #ifdef CONFIG_SMP - rlwinm r9,r1,0,0,(31-THREAD_SHIFT) + CURRENT_THREAD_INFO(r9, r1) lwz r9,TI_CPU(r9) slwi r9,r9,3 add r11,r11,r9 @@ -179,7 +179,7 @@ transfer_to_handler: ble- stack_ovf /* then the kernel stack overflowed */ 5: #if defined(CONFIG_6xx) || defined(CONFIG_E500) - rlwinm r9,r1,0,0,31-THREAD_SHIFT + CURRENT_THREAD_INFO(r9, r1) tophys(r9,r9) /* check local flags */ lwz r12,TI_LOCAL_FLAGS(r9) mtcrf 0x01,r12 @@ -226,13 +226,7 @@ reenable_mmu: /* re-enable mmu so we can */ stw r3,16(r1) stw r4,20(r1) stw r5,24(r1) - andi. r12,r12,MSR_PR - b 11f bl trace_hardirqs_off - b 12f -11: - bl trace_hardirqs_off -12: lwz r5,24(r1) lwz r4,20(r1) lwz r3,16(r1) @@ -333,7 +327,7 @@ _GLOBAL(DoSyscall) mtmsr r11 1: #endif /* CONFIG_TRACE_IRQFLAGS */ - rlwinm r10,r1,0,0,(31-THREAD_SHIFT) /* current_thread_info() */ + CURRENT_THREAD_INFO(r10, r1) lwz r11,TI_FLAGS(r10) andi. r11,r11,_TIF_SYSCALL_T_OR_A bne- syscall_dotrace @@ -354,7 +348,7 @@ ret_from_syscall: bl do_show_syscall_exit #endif mr r6,r3 - rlwinm r12,r1,0,0,(31-THREAD_SHIFT) /* current_thread_info() */ + CURRENT_THREAD_INFO(r12, r1) /* disable interrupts so current_thread_info()->flags can't change */ LOAD_MSR_KERNEL(r10,MSR_KERNEL) /* doesn't include MSR_EE */ /* Note: We don't bother telling lockdep about it */ @@ -815,7 +809,7 @@ ret_from_except: user_exc_return: /* r10 contains MSR_KERNEL here */ /* Check current_thread_info()->flags */ - rlwinm r9,r1,0,0,(31-THREAD_SHIFT) + CURRENT_THREAD_INFO(r9, r1) lwz r9,TI_FLAGS(r9) andi. r0,r9,_TIF_USER_WORK_MASK bne do_work @@ -835,7 +829,7 @@ restore_user: /* N.B. the only way to get here is from the beq following ret_from_except. */ resume_kernel: /* check current_thread_info->preempt_count */ - rlwinm r9,r1,0,0,(31-THREAD_SHIFT) + CURRENT_THREAD_INFO(r9, r1) lwz r0,TI_PREEMPT(r9) cmpwi 0,r0,0 /* if non-zero, just restore regs and return */ bne restore @@ -852,7 +846,7 @@ resume_kernel: bl trace_hardirqs_off #endif 1: bl preempt_schedule_irq - rlwinm r9,r1,0,0,(31-THREAD_SHIFT) + CURRENT_THREAD_INFO(r9, r1) lwz r3,TI_FLAGS(r9) andi. r0,r3,_TIF_NEED_RESCHED bne- 1b @@ -1122,7 +1116,7 @@ ret_from_debug_exc: lwz r10,SAVED_KSP_LIMIT(r1) stw r10,KSP_LIMIT(r9) lwz r9,THREAD_INFO-THREAD(r9) - rlwinm r10,r1,0,0,(31-THREAD_SHIFT) + CURRENT_THREAD_INFO(r10, r1) lwz r10,TI_PREEMPT(r10) stw r10,TI_PREEMPT(r9) RESTORE_xSRR(SRR0,SRR1); @@ -1156,7 +1150,7 @@ load_dbcr0: lis r11,global_dbcr0@ha addi r11,r11,global_dbcr0@l #ifdef CONFIG_SMP - rlwinm r9,r1,0,0,(31-THREAD_SHIFT) + CURRENT_THREAD_INFO(r9, r1) lwz r9,TI_CPU(r9) slwi r9,r9,3 add r11,r11,r9 @@ -1197,7 +1191,7 @@ recheck: LOAD_MSR_KERNEL(r10,MSR_KERNEL) SYNC MTMSRD(r10) /* disable interrupts */ - rlwinm r9,r1,0,0,(31-THREAD_SHIFT) + CURRENT_THREAD_INFO(r9, r1) lwz r9,TI_FLAGS(r9) andi. r0,r9,_TIF_NEED_RESCHED bne- do_resched diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 5971c85df13..4b01a25e29e 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -146,7 +146,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) REST_2GPRS(7,r1) addi r9,r1,STACK_FRAME_OVERHEAD #endif - clrrdi r11,r1,THREAD_SHIFT + CURRENT_THREAD_INFO(r11, r1) ld r10,TI_FLAGS(r11) andi. r11,r10,_TIF_SYSCALL_T_OR_A bne- syscall_dotrace @@ -181,7 +181,7 @@ syscall_exit: bl .do_show_syscall_exit ld r3,RESULT(r1) #endif - clrrdi r12,r1,THREAD_SHIFT + CURRENT_THREAD_INFO(r12, r1) ld r8,_MSR(r1) #ifdef CONFIG_PPC_BOOK3S @@ -197,7 +197,16 @@ syscall_exit: wrteei 0 #else ld r10,PACAKMSR(r13) - mtmsrd r10,1 + /* + * For performance reasons we clear RI the same time that we + * clear EE. We only need to clear RI just before we restore r13 + * below, but batching it with EE saves us one expensive mtmsrd call. + * We have to be careful to restore RI if we branch anywhere from + * here (eg syscall_exit_work). + */ + li r9,MSR_RI + andc r11,r10,r9 + mtmsrd r11,1 #endif /* CONFIG_PPC_BOOK3E */ ld r9,TI_FLAGS(r12) @@ -214,17 +223,6 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) andi. r6,r8,MSR_PR ld r4,_LINK(r1) - /* - * Clear RI before restoring r13. If we are returning to - * userspace and we take an exception after restoring r13, - * we end up corrupting the userspace r13 value. - */ -#ifdef CONFIG_PPC_BOOK3S - /* No MSR:RI on BookE */ - li r12,MSR_RI - andc r11,r10,r12 - mtmsrd r11,1 /* clear MSR.RI */ -#endif /* CONFIG_PPC_BOOK3S */ beq- 1f ACCOUNT_CPU_USER_EXIT(r11, r12) @@ -262,7 +260,7 @@ syscall_dotrace: ld r7,GPR7(r1) ld r8,GPR8(r1) addi r9,r1,STACK_FRAME_OVERHEAD - clrrdi r10,r1,THREAD_SHIFT + CURRENT_THREAD_INFO(r10, r1) ld r10,TI_FLAGS(r10) b .Lsyscall_dotrace_cont @@ -271,6 +269,9 @@ syscall_enosys: b syscall_exit syscall_exit_work: +#ifdef CONFIG_PPC_BOOK3S + mtmsrd r10,1 /* Restore RI */ +#endif /* If TIF_RESTOREALL is set, don't scribble on either r3 or ccr. If TIF_NOERROR is set, just save r3 as it is. */ @@ -499,7 +500,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) 2: #endif /* !CONFIG_PPC_BOOK3S */ - clrrdi r7,r8,THREAD_SHIFT /* base of new stack */ + CURRENT_THREAD_INFO(r7, r8) /* base of new stack */ /* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE because we don't need to leave the 288-byte ABI gap at the top of the kernel stack. */ @@ -558,7 +559,7 @@ _GLOBAL(ret_from_except_lite) mtmsrd r10,1 /* Update machine state */ #endif /* CONFIG_PPC_BOOK3E */ - clrrdi r9,r1,THREAD_SHIFT /* current_thread_info() */ + CURRENT_THREAD_INFO(r9, r1) ld r3,_MSR(r1) ld r4,TI_FLAGS(r9) andi. r3,r3,MSR_PR @@ -601,7 +602,7 @@ resume_kernel: 1: bl .preempt_schedule_irq /* Re-test flags and eventually loop */ - clrrdi r9,r1,THREAD_SHIFT + CURRENT_THREAD_INFO(r9, r1) ld r4,TI_FLAGS(r9) andi. r0,r4,_TIF_NEED_RESCHED bne 1b diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 7215cc2495d..98be7f0cd22 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -222,7 +222,7 @@ exc_##n##_bad_stack: \ * interrupts happen before the wait instruction. */ #define CHECK_NAPPING() \ - clrrdi r11,r1,THREAD_SHIFT; \ + CURRENT_THREAD_INFO(r11, r1); \ ld r10,TI_LOCAL_FLAGS(r11); \ andi. r9,r10,_TLF_NAPPING; \ beq+ 1f; \ @@ -903,7 +903,7 @@ skpinv: addi r6,r6,1 /* Increment */ bne 1b /* If not, repeat */ /* Invalidate all TLBs */ - PPC_TLBILX_ALL(0,0) + PPC_TLBILX_ALL(0,R0) sync isync @@ -961,7 +961,7 @@ skpinv: addi r6,r6,1 /* Increment */ tlbwe /* Invalidate TLB1 */ - PPC_TLBILX_ALL(0,0) + PPC_TLBILX_ALL(0,R0) sync isync @@ -1020,7 +1020,7 @@ skpinv: addi r6,r6,1 /* Increment */ tlbwe /* Invalidate TLB1 */ - PPC_TLBILX_ALL(0,0) + PPC_TLBILX_ALL(0,R0) sync isync @@ -1138,7 +1138,7 @@ a2_tlbinit_after_iprot_flush: tlbwe #endif /* CONFIG_PPC_EARLY_DEBUG_WSP */ - PPC_TLBILX(0,0,0) + PPC_TLBILX(0,0,R0) sync isync diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 1c06d297154..e894515e77b 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -239,6 +239,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) * out of line to handle them */ . = 0xe00 +hv_exception_trampoline: b h_data_storage_hv . = 0xe20 b h_instr_storage_hv @@ -851,7 +852,7 @@ BEGIN_FTR_SECTION bne- do_ste_alloc /* If so handle it */ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB) - clrrdi r11,r1,THREAD_SHIFT + CURRENT_THREAD_INFO(r11, r1) lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */ andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */ bne 77f /* then don't call hash_page now */ diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index de369558bf0..e0ada05f2df 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -26,7 +26,7 @@ #include <asm/ptrace.h> #ifdef CONFIG_VSX -#define REST_32FPVSRS(n,c,base) \ +#define __REST_32FPVSRS(n,c,base) \ BEGIN_FTR_SECTION \ b 2f; \ END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ @@ -35,7 +35,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ 2: REST_32VSRS(n,c,base); \ 3: -#define SAVE_32FPVSRS(n,c,base) \ +#define __SAVE_32FPVSRS(n,c,base) \ BEGIN_FTR_SECTION \ b 2f; \ END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ @@ -44,9 +44,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ 2: SAVE_32VSRS(n,c,base); \ 3: #else -#define REST_32FPVSRS(n,b,base) REST_32FPRS(n, base) -#define SAVE_32FPVSRS(n,b,base) SAVE_32FPRS(n, base) +#define __REST_32FPVSRS(n,b,base) REST_32FPRS(n, base) +#define __SAVE_32FPVSRS(n,b,base) SAVE_32FPRS(n, base) #endif +#define REST_32FPVSRS(n,c,base) __REST_32FPVSRS(n,__REG_##c,__REG_##base) +#define SAVE_32FPVSRS(n,c,base) __SAVE_32FPVSRS(n,__REG_##c,__REG_##base) /* * This task wants to use the FPU now. @@ -79,7 +81,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) beq 1f toreal(r4) addi r4,r4,THREAD /* want last_task_used_math->thread */ - SAVE_32FPVSRS(0, r5, r4) + SAVE_32FPVSRS(0, R5, R4) mffs fr0 stfd fr0,THREAD_FPSCR(r4) PPC_LL r5,PT_REGS(r4) @@ -106,7 +108,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) #endif lfd fr0,THREAD_FPSCR(r5) MTFSF_L(fr0) - REST_32FPVSRS(0, r4, r5) + REST_32FPVSRS(0, R4, R5) #ifndef CONFIG_SMP subi r4,r5,THREAD fromreal(r4) @@ -140,7 +142,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) addi r3,r3,THREAD /* want THREAD of task */ PPC_LL r5,PT_REGS(r3) PPC_LCMPI 0,r5,0 - SAVE_32FPVSRS(0, r4 ,r3) + SAVE_32FPVSRS(0, R4 ,R3) mffs fr0 stfd fr0,THREAD_FPSCR(r3) beq 1f diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index bf99cfa6bbf..91b46b7f6f0 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -63,11 +63,9 @@ ftrace_modify_code(unsigned long ip, unsigned int old, unsigned int new) return -EINVAL; /* replace the text with the new text */ - if (probe_kernel_write((void *)ip, &new, MCOUNT_INSN_SIZE)) + if (patch_instruction((unsigned int *)ip, new)) return -EPERM; - flush_icache_range(ip, ip + 8); - return 0; } @@ -212,12 +210,9 @@ __ftrace_make_nop(struct module *mod, */ op = 0x48000008; /* b +8 */ - if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE)) + if (patch_instruction((unsigned int *)ip, op)) return -EPERM; - - flush_icache_range(ip, ip + 8); - return 0; } @@ -245,9 +240,9 @@ __ftrace_make_nop(struct module *mod, /* * On PPC32 the trampoline looks like: - * 0x3d, 0x60, 0x00, 0x00 lis r11,sym@ha - * 0x39, 0x6b, 0x00, 0x00 addi r11,r11,sym@l - * 0x7d, 0x69, 0x03, 0xa6 mtctr r11 + * 0x3d, 0x80, 0x00, 0x00 lis r12,sym@ha + * 0x39, 0x8c, 0x00, 0x00 addi r12,r12,sym@l + * 0x7d, 0x89, 0x03, 0xa6 mtctr r12 * 0x4e, 0x80, 0x04, 0x20 bctr */ @@ -262,9 +257,9 @@ __ftrace_make_nop(struct module *mod, pr_devel(" %08x %08x ", jmp[0], jmp[1]); /* verify that this is what we expect it to be */ - if (((jmp[0] & 0xffff0000) != 0x3d600000) || - ((jmp[1] & 0xffff0000) != 0x396b0000) || - (jmp[2] != 0x7d6903a6) || + if (((jmp[0] & 0xffff0000) != 0x3d800000) || + ((jmp[1] & 0xffff0000) != 0x398c0000) || + (jmp[2] != 0x7d8903a6) || (jmp[3] != 0x4e800420)) { printk(KERN_ERR "Not a trampoline\n"); return -EINVAL; @@ -286,11 +281,9 @@ __ftrace_make_nop(struct module *mod, op = PPC_INST_NOP; - if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE)) + if (patch_instruction((unsigned int *)ip, op)) return -EPERM; - flush_icache_range(ip, ip + 8); - return 0; } #endif /* PPC64 */ @@ -426,11 +419,9 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) pr_devel("write to %lx\n", rec->ip); - if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE)) + if (patch_instruction((unsigned int *)ip, op)) return -EPERM; - flush_icache_range(ip, ip + 8); - return 0; } #endif /* CONFIG_PPC64 */ @@ -484,6 +475,58 @@ int ftrace_update_ftrace_func(ftrace_func_t func) return ret; } +static int __ftrace_replace_code(struct dyn_ftrace *rec, int enable) +{ + unsigned long ftrace_addr = (unsigned long)FTRACE_ADDR; + int ret; + + ret = ftrace_update_record(rec, enable); + + switch (ret) { + case FTRACE_UPDATE_IGNORE: + return 0; + case FTRACE_UPDATE_MAKE_CALL: + return ftrace_make_call(rec, ftrace_addr); + case FTRACE_UPDATE_MAKE_NOP: + return ftrace_make_nop(NULL, rec, ftrace_addr); + } + + return 0; +} + +void ftrace_replace_code(int enable) +{ + struct ftrace_rec_iter *iter; + struct dyn_ftrace *rec; + int ret; + + for (iter = ftrace_rec_iter_start(); iter; + iter = ftrace_rec_iter_next(iter)) { + rec = ftrace_rec_iter_record(iter); + ret = __ftrace_replace_code(rec, enable); + if (ret) { + ftrace_bug(ret, rec->ip); + return; + } + } +} + +void arch_ftrace_update_code(int command) +{ + if (command & FTRACE_UPDATE_CALLS) + ftrace_replace_code(1); + else if (command & FTRACE_DISABLE_CALLS) + ftrace_replace_code(0); + + if (command & FTRACE_UPDATE_TRACE_FUNC) + ftrace_update_ftrace_func(ftrace_trace_function); + + if (command & FTRACE_START_FUNC_RET) + ftrace_enable_ftrace_graph_caller(); + else if (command & FTRACE_STOP_FUNC_RET) + ftrace_disable_ftrace_graph_caller(); +} + int __init ftrace_dyn_arch_init(void *data) { /* caller expects data to be zero */ diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 1f4434a3860..0f59863c3ad 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -192,7 +192,7 @@ _ENTRY(__early_start) li r0,0 stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1) - rlwinm r22,r1,0,0,31-THREAD_SHIFT /* current thread_info */ + CURRENT_THREAD_INFO(r22, r1) stw r24, TI_CPU(r22) bl early_init @@ -556,8 +556,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) /* SPE Unavailable */ START_EXCEPTION(SPEUnavailable) NORMAL_EXCEPTION_PROLOG(SPE_UNAVAIL) - bne load_up_spe - addi r3,r1,STACK_FRAME_OVERHEAD + beq 1f + bl load_up_spe + b fast_exception_return +1: addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_EE_LITE(0x2010, KernelSPE) #else EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, \ @@ -778,7 +780,7 @@ tlb_write_entry: /* Note that the SPE support is closely modeled after the AltiVec * support. Changes to one are likely to be applicable to the * other! */ -load_up_spe: +_GLOBAL(load_up_spe) /* * Disable SPE for the task which had SPE previously, * and save its SPE registers in its thread_struct. @@ -826,20 +828,7 @@ load_up_spe: subi r4,r5,THREAD stw r4,last_task_used_spe@l(r3) #endif /* !CONFIG_SMP */ - /* restore registers and return */ -2: REST_4GPRS(3, r11) - lwz r10,_CCR(r11) - REST_GPR(1, r11) - mtcr r10 - lwz r10,_LINK(r11) - mtlr r10 - REST_GPR(10, r11) - mtspr SPRN_SRR1,r9 - mtspr SPRN_SRR0,r12 - REST_GPR(9, r11) - REST_GPR(12, r11) - lwz r11,GPR11(r11) - rfi + blr /* * SPE unavailable trap from kernel - print a message, but let diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 2bc0584be81..f3a82dde61d 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -111,7 +111,7 @@ void arch_unregister_hw_breakpoint(struct perf_event *bp) * and the single_step_dabr_instruction(), then cleanup the breakpoint * restoration variables to prevent dangling pointers. */ - if (bp->ctx->task) + if (bp->ctx && bp->ctx->task) bp->ctx->task->thread.last_hit_ubp = NULL; } diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S index 15c611de1ee..1686916cc7f 100644 --- a/arch/powerpc/kernel/idle_6xx.S +++ b/arch/powerpc/kernel/idle_6xx.S @@ -135,7 +135,7 @@ BEGIN_FTR_SECTION DSSALL sync END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) - rlwinm r9,r1,0,0,31-THREAD_SHIFT /* current thread_info */ + CURRENT_THREAD_INFO(r9, r1) lwz r8,TI_LOCAL_FLAGS(r9) /* set napping bit */ ori r8,r8,_TLF_NAPPING /* so when we take an exception */ stw r8,TI_LOCAL_FLAGS(r9) /* it will return to our caller */ @@ -158,7 +158,7 @@ _GLOBAL(power_save_ppc32_restore) stw r9,_NIP(r11) /* make it do a blr */ #ifdef CONFIG_SMP - rlwinm r12,r11,0,0,31-THREAD_SHIFT + CURRENT_THREAD_INFO(r12, r11) lwz r11,TI_CPU(r12) /* get cpu number * 4 */ slwi r11,r11,2 #else diff --git a/arch/powerpc/kernel/idle_book3e.S b/arch/powerpc/kernel/idle_book3e.S index ff007b59448..4c7cb400858 100644 --- a/arch/powerpc/kernel/idle_book3e.S +++ b/arch/powerpc/kernel/idle_book3e.S @@ -60,7 +60,7 @@ _GLOBAL(book3e_idle) 1: /* Let's set the _TLF_NAPPING flag so interrupts make us return * to the right spot */ - clrrdi r11,r1,THREAD_SHIFT + CURRENT_THREAD_INFO(r11, r1) ld r10,TI_LOCAL_FLAGS(r11) ori r10,r10,_TLF_NAPPING std r10,TI_LOCAL_FLAGS(r11) diff --git a/arch/powerpc/kernel/idle_e500.S b/arch/powerpc/kernel/idle_e500.S index 4f0ab85f378..15448668988 100644 --- a/arch/powerpc/kernel/idle_e500.S +++ b/arch/powerpc/kernel/idle_e500.S @@ -21,7 +21,7 @@ .text _GLOBAL(e500_idle) - rlwinm r3,r1,0,0,31-THREAD_SHIFT /* current thread_info */ + CURRENT_THREAD_INFO(r3, r1) lwz r4,TI_LOCAL_FLAGS(r3) /* set napping bit */ ori r4,r4,_TLF_NAPPING /* so when we take an exception */ stw r4,TI_LOCAL_FLAGS(r3) /* it will return to our caller */ @@ -96,7 +96,7 @@ _GLOBAL(power_save_ppc32_restore) stw r9,_NIP(r11) /* make it do a blr */ #ifdef CONFIG_SMP - rlwinm r12,r1,0,0,31-THREAD_SHIFT + CURRENT_THREAD_INFO(r12, r1) lwz r11,TI_CPU(r12) /* get cpu number * 4 */ slwi r11,r11,2 #else diff --git a/arch/powerpc/kernel/idle_power4.S b/arch/powerpc/kernel/idle_power4.S index 2c71b0fc9f9..e3edaa18991 100644 --- a/arch/powerpc/kernel/idle_power4.S +++ b/arch/powerpc/kernel/idle_power4.S @@ -59,7 +59,7 @@ BEGIN_FTR_SECTION DSSALL sync END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) - clrrdi r9,r1,THREAD_SHIFT /* current thread_info */ + CURRENT_THREAD_INFO(r9, r1) ld r8,TI_LOCAL_FLAGS(r9) /* set napping bit */ ori r8,r8,_TLF_NAPPING /* so when we take an exception */ std r8,TI_LOCAL_FLAGS(r9) /* it will return to our caller */ diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 359f078571c..ff5a6ce027b 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -33,6 +33,9 @@ #include <linux/bitmap.h> #include <linux/iommu-helper.h> #include <linux/crash_dump.h> +#include <linux/hash.h> +#include <linux/fault-inject.h> +#include <linux/pci.h> #include <asm/io.h> #include <asm/prom.h> #include <asm/iommu.h> @@ -40,6 +43,7 @@ #include <asm/machdep.h> #include <asm/kdump.h> #include <asm/fadump.h> +#include <asm/vio.h> #define DBG(...) @@ -58,6 +62,114 @@ static int __init setup_iommu(char *str) __setup("iommu=", setup_iommu); +static DEFINE_PER_CPU(unsigned int, iommu_pool_hash); + +/* + * We precalculate the hash to avoid doing it on every allocation. + * + * The hash is important to spread CPUs across all the pools. For example, + * on a POWER7 with 4 way SMT we want interrupts on the primary threads and + * with 4 pools all primary threads would map to the same pool. + */ +static int __init setup_iommu_pool_hash(void) +{ + unsigned int i; + + for_each_possible_cpu(i) + per_cpu(iommu_pool_hash, i) = hash_32(i, IOMMU_POOL_HASHBITS); + + return 0; +} +subsys_initcall(setup_iommu_pool_hash); + +#ifdef CONFIG_FAIL_IOMMU + +static DECLARE_FAULT_ATTR(fail_iommu); + +static int __init setup_fail_iommu(char *str) +{ + return setup_fault_attr(&fail_iommu, str); +} +__setup("fail_iommu=", setup_fail_iommu); + +static bool should_fail_iommu(struct device *dev) +{ + return dev->archdata.fail_iommu && should_fail(&fail_iommu, 1); +} + +static int __init fail_iommu_debugfs(void) +{ + struct dentry *dir = fault_create_debugfs_attr("fail_iommu", + NULL, &fail_iommu); + + return IS_ERR(dir) ? PTR_ERR(dir) : 0; +} +late_initcall(fail_iommu_debugfs); + +static ssize_t fail_iommu_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", dev->archdata.fail_iommu); +} + +static ssize_t fail_iommu_store(struct device *dev, + struct device_attribute *attr, const char *buf, + size_t count) +{ + int i; + + if (count > 0 && sscanf(buf, "%d", &i) > 0) + dev->archdata.fail_iommu = (i == 0) ? 0 : 1; + + return count; +} + +static DEVICE_ATTR(fail_iommu, S_IRUGO|S_IWUSR, fail_iommu_show, + fail_iommu_store); + +static int fail_iommu_bus_notify(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct device *dev = data; + + if (action == BUS_NOTIFY_ADD_DEVICE) { + if (device_create_file(dev, &dev_attr_fail_iommu)) + pr_warn("Unable to create IOMMU fault injection sysfs " + "entries\n"); + } else if (action == BUS_NOTIFY_DEL_DEVICE) { + device_remove_file(dev, &dev_attr_fail_iommu); + } + + return 0; +} + +static struct notifier_block fail_iommu_bus_notifier = { + .notifier_call = fail_iommu_bus_notify +}; + +static int __init fail_iommu_setup(void) +{ +#ifdef CONFIG_PCI + bus_register_notifier(&pci_bus_type, &fail_iommu_bus_notifier); +#endif +#ifdef CONFIG_IBMVIO + bus_register_notifier(&vio_bus_type, &fail_iommu_bus_notifier); +#endif + + return 0; +} +/* + * Must execute after PCI and VIO subsystem have initialised but before + * devices are probed. + */ +arch_initcall(fail_iommu_setup); +#else +static inline bool should_fail_iommu(struct device *dev) +{ + return false; +} +#endif + static unsigned long iommu_range_alloc(struct device *dev, struct iommu_table *tbl, unsigned long npages, @@ -71,6 +183,9 @@ static unsigned long iommu_range_alloc(struct device *dev, int pass = 0; unsigned long align_mask; unsigned long boundary_size; + unsigned long flags; + unsigned int pool_nr; + struct iommu_pool *pool; align_mask = 0xffffffffffffffffl >> (64 - align_order); @@ -83,36 +198,49 @@ static unsigned long iommu_range_alloc(struct device *dev, return DMA_ERROR_CODE; } - if (handle && *handle) - start = *handle; + if (should_fail_iommu(dev)) + return DMA_ERROR_CODE; + + /* + * We don't need to disable preemption here because any CPU can + * safely use any IOMMU pool. + */ + pool_nr = __raw_get_cpu_var(iommu_pool_hash) & (tbl->nr_pools - 1); + + if (largealloc) + pool = &(tbl->large_pool); else - start = largealloc ? tbl->it_largehint : tbl->it_hint; + pool = &(tbl->pools[pool_nr]); - /* Use only half of the table for small allocs (15 pages or less) */ - limit = largealloc ? tbl->it_size : tbl->it_halfpoint; + spin_lock_irqsave(&(pool->lock), flags); - if (largealloc && start < tbl->it_halfpoint) - start = tbl->it_halfpoint; +again: + if ((pass == 0) && handle && *handle) + start = *handle; + else + start = pool->hint; + + limit = pool->end; /* The case below can happen if we have a small segment appended * to a large, or when the previous alloc was at the very end of * the available space. If so, go back to the initial start. */ if (start >= limit) - start = largealloc ? tbl->it_largehint : tbl->it_hint; - - again: + start = pool->start; if (limit + tbl->it_offset > mask) { limit = mask - tbl->it_offset + 1; /* If we're constrained on address range, first try * at the masked hint to avoid O(n) search complexity, - * but on second pass, start at 0. + * but on second pass, start at 0 in pool 0. */ - if ((start & mask) >= limit || pass > 0) - start = 0; - else + if ((start & mask) >= limit || pass > 0) { + pool = &(tbl->pools[0]); + start = pool->start; + } else { start &= mask; + } } if (dev) @@ -126,16 +254,25 @@ static unsigned long iommu_range_alloc(struct device *dev, tbl->it_offset, boundary_size >> IOMMU_PAGE_SHIFT, align_mask); if (n == -1) { - if (likely(pass < 2)) { - /* First failure, just rescan the half of the table. - * Second failure, rescan the other half of the table. - */ - start = (largealloc ^ pass) ? tbl->it_halfpoint : 0; - limit = pass ? tbl->it_size : limit; + if (likely(pass == 0)) { + /* First try the pool from the start */ + pool->hint = pool->start; pass++; goto again; + + } else if (pass <= tbl->nr_pools) { + /* Now try scanning all the other pools */ + spin_unlock(&(pool->lock)); + pool_nr = (pool_nr + 1) & (tbl->nr_pools - 1); + pool = &tbl->pools[pool_nr]; + spin_lock(&(pool->lock)); + pool->hint = pool->start; + pass++; + goto again; + } else { - /* Third failure, give up */ + /* Give up */ + spin_unlock_irqrestore(&(pool->lock), flags); return DMA_ERROR_CODE; } } @@ -145,10 +282,10 @@ static unsigned long iommu_range_alloc(struct device *dev, /* Bump the hint to a new block for small allocs. */ if (largealloc) { /* Don't bump to new block to avoid fragmentation */ - tbl->it_largehint = end; + pool->hint = end; } else { /* Overflow will be taken care of at the next allocation */ - tbl->it_hint = (end + tbl->it_blocksize - 1) & + pool->hint = (end + tbl->it_blocksize - 1) & ~(tbl->it_blocksize - 1); } @@ -156,6 +293,8 @@ static unsigned long iommu_range_alloc(struct device *dev, if (handle) *handle = end; + spin_unlock_irqrestore(&(pool->lock), flags); + return n; } @@ -165,18 +304,14 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, unsigned long mask, unsigned int align_order, struct dma_attrs *attrs) { - unsigned long entry, flags; + unsigned long entry; dma_addr_t ret = DMA_ERROR_CODE; int build_fail; - spin_lock_irqsave(&(tbl->it_lock), flags); - entry = iommu_range_alloc(dev, tbl, npages, NULL, mask, align_order); - if (unlikely(entry == DMA_ERROR_CODE)) { - spin_unlock_irqrestore(&(tbl->it_lock), flags); + if (unlikely(entry == DMA_ERROR_CODE)) return DMA_ERROR_CODE; - } entry += tbl->it_offset; /* Offset into real TCE table */ ret = entry << IOMMU_PAGE_SHIFT; /* Set the return dma address */ @@ -193,8 +328,6 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, */ if (unlikely(build_fail)) { __iommu_free(tbl, ret, npages); - - spin_unlock_irqrestore(&(tbl->it_lock), flags); return DMA_ERROR_CODE; } @@ -202,16 +335,14 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, if (ppc_md.tce_flush) ppc_md.tce_flush(tbl); - spin_unlock_irqrestore(&(tbl->it_lock), flags); - /* Make sure updates are seen by hardware */ mb(); return ret; } -static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, - unsigned int npages) +static bool iommu_free_check(struct iommu_table *tbl, dma_addr_t dma_addr, + unsigned int npages) { unsigned long entry, free_entry; @@ -231,20 +362,57 @@ static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, printk(KERN_INFO "\tindex = 0x%llx\n", (u64)tbl->it_index); WARN_ON(1); } - return; + + return false; + } + + return true; +} + +static struct iommu_pool *get_pool(struct iommu_table *tbl, + unsigned long entry) +{ + struct iommu_pool *p; + unsigned long largepool_start = tbl->large_pool.start; + + /* The large pool is the last pool at the top of the table */ + if (entry >= largepool_start) { + p = &tbl->large_pool; + } else { + unsigned int pool_nr = entry / tbl->poolsize; + + BUG_ON(pool_nr > tbl->nr_pools); + p = &tbl->pools[pool_nr]; } + return p; +} + +static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, + unsigned int npages) +{ + unsigned long entry, free_entry; + unsigned long flags; + struct iommu_pool *pool; + + entry = dma_addr >> IOMMU_PAGE_SHIFT; + free_entry = entry - tbl->it_offset; + + pool = get_pool(tbl, free_entry); + + if (!iommu_free_check(tbl, dma_addr, npages)) + return; + ppc_md.tce_free(tbl, entry, npages); + + spin_lock_irqsave(&(pool->lock), flags); bitmap_clear(tbl->it_map, free_entry, npages); + spin_unlock_irqrestore(&(pool->lock), flags); } static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, unsigned int npages) { - unsigned long flags; - - spin_lock_irqsave(&(tbl->it_lock), flags); - __iommu_free(tbl, dma_addr, npages); /* Make sure TLB cache is flushed if the HW needs it. We do @@ -253,8 +421,6 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, */ if (ppc_md.tce_flush) ppc_md.tce_flush(tbl); - - spin_unlock_irqrestore(&(tbl->it_lock), flags); } int iommu_map_sg(struct device *dev, struct iommu_table *tbl, @@ -263,7 +429,6 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl, struct dma_attrs *attrs) { dma_addr_t dma_next = 0, dma_addr; - unsigned long flags; struct scatterlist *s, *outs, *segstart; int outcount, incount, i, build_fail = 0; unsigned int align; @@ -285,8 +450,6 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl, DBG("sg mapping %d elements:\n", nelems); - spin_lock_irqsave(&(tbl->it_lock), flags); - max_seg_size = dma_get_max_seg_size(dev); for_each_sg(sglist, s, nelems, i) { unsigned long vaddr, npages, entry, slen; @@ -369,8 +532,6 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl, if (ppc_md.tce_flush) ppc_md.tce_flush(tbl); - spin_unlock_irqrestore(&(tbl->it_lock), flags); - DBG("mapped %d elements:\n", outcount); /* For the sake of iommu_unmap_sg, we clear out the length in the @@ -402,7 +563,6 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl, if (s == outs) break; } - spin_unlock_irqrestore(&(tbl->it_lock), flags); return 0; } @@ -412,15 +572,12 @@ void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist, struct dma_attrs *attrs) { struct scatterlist *sg; - unsigned long flags; BUG_ON(direction == DMA_NONE); if (!tbl) return; - spin_lock_irqsave(&(tbl->it_lock), flags); - sg = sglist; while (nelems--) { unsigned int npages; @@ -440,8 +597,6 @@ void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist, */ if (ppc_md.tce_flush) ppc_md.tce_flush(tbl); - - spin_unlock_irqrestore(&(tbl->it_lock), flags); } static void iommu_table_clear(struct iommu_table *tbl) @@ -494,9 +649,8 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid) unsigned long sz; static int welcomed = 0; struct page *page; - - /* Set aside 1/4 of the table for large allocations. */ - tbl->it_halfpoint = tbl->it_size * 3 / 4; + unsigned int i; + struct iommu_pool *p; /* number of bytes needed for the bitmap */ sz = (tbl->it_size + 7) >> 3; @@ -515,9 +669,28 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid) if (tbl->it_offset == 0) set_bit(0, tbl->it_map); - tbl->it_hint = 0; - tbl->it_largehint = tbl->it_halfpoint; - spin_lock_init(&tbl->it_lock); + /* We only split the IOMMU table if we have 1GB or more of space */ + if ((tbl->it_size << IOMMU_PAGE_SHIFT) >= (1UL * 1024 * 1024 * 1024)) + tbl->nr_pools = IOMMU_NR_POOLS; + else + tbl->nr_pools = 1; + + /* We reserve the top 1/4 of the table for large allocations */ + tbl->poolsize = (tbl->it_size * 3 / 4) / tbl->nr_pools; + + for (i = 0; i < tbl->nr_pools; i++) { + p = &tbl->pools[i]; + spin_lock_init(&(p->lock)); + p->start = tbl->poolsize * i; + p->hint = p->start; + p->end = p->start + tbl->poolsize; + } + + p = &tbl->large_pool; + spin_lock_init(&(p->lock)); + p->start = tbl->poolsize * i; + p->hint = p->start; + p->end = tbl->it_size; iommu_table_clear(tbl); diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 62bdf238966..02c167db6ba 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -302,7 +302,7 @@ static void kvm_patch_ins_wrtee(u32 *inst, u32 rt, int imm_one) if (imm_one) { p[kvm_emulate_wrtee_reg_offs] = - KVM_INST_LI | __PPC_RT(30) | MSR_EE; + KVM_INST_LI | __PPC_RT(R30) | MSR_EE; } else { /* Make clobbered registers work too */ switch (get_rt(rt)) { diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 386d57f66f2..407e293aad2 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -179,7 +179,7 @@ _GLOBAL(low_choose_750fx_pll) mtspr SPRN_HID1,r4 /* Store new HID1 image */ - rlwinm r6,r1,0,0,(31-THREAD_SHIFT) + CURRENT_THREAD_INFO(r6, r1) lwz r6,TI_CPU(r6) slwi r6,r6,2 addis r6,r6,nap_save_hid1@ha @@ -699,7 +699,7 @@ _GLOBAL(kernel_thread) #ifdef CONFIG_SMP _GLOBAL(start_secondary_resume) /* Reset stack */ - rlwinm r1,r1,0,0,(31-THREAD_SHIFT) /* current_thread_info() */ + CURRENT_THREAD_INFO(r1, r1) addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD li r3,0 stw r3,0(r1) /* Zero the stack frame pointer */ diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 616921ef143..565b78625a3 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -301,11 +301,6 @@ _GLOBAL(real_writeb) #ifdef CONFIG_PPC_PASEMI -/* No support in all binutils for these yet, so use defines */ -#define LBZCIX(RT,RA,RB) .long (0x7c0006aa|(RT<<21)|(RA<<16)|(RB << 11)) -#define STBCIX(RS,RA,RB) .long (0x7c0007aa|(RS<<21)|(RA<<16)|(RB << 11)) - - _GLOBAL(real_205_readb) mfmsr r7 ori r0,r7,MSR_DR @@ -314,7 +309,7 @@ _GLOBAL(real_205_readb) mtmsrd r0 sync isync - LBZCIX(r3,0,r3) + LBZCIX(R3,R0,R3) isync mtmsrd r7 sync @@ -329,7 +324,7 @@ _GLOBAL(real_205_writeb) mtmsrd r0 sync isync - STBCIX(r3,0,r4) + STBCIX(R3,R0,R4) isync mtmsrd r7 sync diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 8e78e93c818..0f75bd50040 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1646,7 +1646,6 @@ void __devinit pcibios_scan_phb(struct pci_controller *hose) pci_free_resource_list(&resources); return; } - bus->secondary = hose->first_busno; hose->bus = bus; /* Get probe mode and perform scan */ diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c index 89dde171a6f..d7dd42bd145 100644 --- a/arch/powerpc/kernel/pci_of_scan.c +++ b/arch/powerpc/kernel/pci_of_scan.c @@ -198,7 +198,6 @@ EXPORT_SYMBOL(of_create_pci_dev); /** * of_scan_pci_bridge - Set up a PCI bridge and scan for child nodes - * @node: device tree node of bridge * @dev: pci_dev structure for the bridge * * of_scan_bus() calls this routine for each PCI bridge that it finds, and diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index afd4f051f3f..bdc499c1787 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -720,6 +720,33 @@ static int powerpc_debugfs_init(void) arch_initcall(powerpc_debugfs_init); #endif +#ifdef CONFIG_BOOKE_WDT +extern u32 booke_wdt_enabled; +extern u32 booke_wdt_period; + +/* Checks wdt=x and wdt_period=xx command-line option */ +notrace int __init early_parse_wdt(char *p) +{ + if (p && strncmp(p, "0", 1) != 0) + booke_wdt_enabled = 1; + + return 0; +} +early_param("wdt", early_parse_wdt); + +int __init early_parse_wdt_period(char *p) +{ + unsigned long ret; + if (p) { + if (!kstrtol(p, 0, &ret)) + booke_wdt_period = ret; + } + + return 0; +} +early_param("wdt_period", early_parse_wdt_period); +#endif /* CONFIG_BOOKE_WDT */ + void ppc_printk_progress(char *s, unsigned short hex) { pr_info("%s\n", s); diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index ec8a53fa9e8..a8f54ecb091 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -149,30 +149,6 @@ notrace void __init machine_init(u64 dt_ptr) ppc_md.progress("id mach(): done", 0x200); } -#ifdef CONFIG_BOOKE_WDT -extern u32 booke_wdt_enabled; -extern u32 booke_wdt_period; - -/* Checks wdt=x and wdt_period=xx command-line option */ -notrace int __init early_parse_wdt(char *p) -{ - if (p && strncmp(p, "0", 1) != 0) - booke_wdt_enabled = 1; - - return 0; -} -early_param("wdt", early_parse_wdt); - -int __init early_parse_wdt_period (char *p) -{ - if (p) - booke_wdt_period = simple_strtoul(p, NULL, 0); - - return 0; -} -early_param("wdt_period", early_parse_wdt_period); -#endif /* CONFIG_BOOKE_WDT */ - /* Checks "l2cr=xxxx" command-line option */ int __init ppc_setup_l2cr(char *str) { diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index e1417c42155..0321007086f 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -48,6 +48,7 @@ #ifdef CONFIG_PPC64 #include <asm/paca.h> #endif +#include <asm/vdso.h> #include <asm/debug.h> #ifdef DEBUG @@ -570,6 +571,8 @@ void __devinit start_secondary(void *unused) #ifdef CONFIG_PPC64 if (system_state == SYSTEM_RUNNING) vdso_data->processorCount++; + + vdso_getcpu_init(); #endif notify_cpu_starting(cpu); set_cpu_online(cpu, true); diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 9eb5b9b536a..b67db22e102 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -706,6 +706,34 @@ static void __init vdso_setup_syscall_map(void) } } +#ifdef CONFIG_PPC64 +int __cpuinit vdso_getcpu_init(void) +{ + unsigned long cpu, node, val; + + /* + * SPRG3 contains the CPU in the bottom 16 bits and the NUMA node in + * the next 16 bits. The VDSO uses this to implement getcpu(). + */ + cpu = get_cpu(); + WARN_ON_ONCE(cpu > 0xffff); + + node = cpu_to_node(cpu); + WARN_ON_ONCE(node > 0xffff); + + val = (cpu & 0xfff) | ((node & 0xffff) << 16); + mtspr(SPRN_SPRG3, val); +#ifdef CONFIG_KVM_BOOK3S_HANDLER + get_paca()->kvm_hstate.sprg3 = val; +#endif + + put_cpu(); + + return 0; +} +/* We need to call this before SMP init */ +early_initcall(vdso_getcpu_init); +#endif static int __init vdso_init(void) { diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile index 9a7946c4173..53e6c9b979e 100644 --- a/arch/powerpc/kernel/vdso32/Makefile +++ b/arch/powerpc/kernel/vdso32/Makefile @@ -1,7 +1,9 @@ # List of files in the vdso, has to be asm only for now -obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o +obj-vdso32-$(CONFIG_PPC64) = getcpu.o +obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o \ + $(obj-vdso32-y) # Build rules diff --git a/arch/powerpc/kernel/vdso32/getcpu.S b/arch/powerpc/kernel/vdso32/getcpu.S new file mode 100644 index 00000000000..47afd08c90f --- /dev/null +++ b/arch/powerpc/kernel/vdso32/getcpu.S @@ -0,0 +1,45 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2012 + * + * Author: Anton Blanchard <anton@au.ibm.com> + */ +#include <asm/ppc_asm.h> +#include <asm/vdso.h> + + .text +/* + * Exact prototype of getcpu + * + * int __kernel_getcpu(unsigned *cpu, unsigned *node); + * + */ +V_FUNCTION_BEGIN(__kernel_getcpu) + .cfi_startproc + mfspr r5,SPRN_USPRG3 + cmpdi cr0,r3,0 + cmpdi cr1,r4,0 + clrlwi r6,r5,16 + rlwinm r7,r5,16,31-15,31-0 + beq cr0,1f + stw r6,0(r3) +1: beq cr1,2f + stw r7,0(r4) +2: crclr cr0*4+so + li r3,0 /* always success */ + blr + .cfi_endproc +V_FUNCTION_END(__kernel_getcpu) diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S index 0546bcd49cd..43200ba2e57 100644 --- a/arch/powerpc/kernel/vdso32/vdso32.lds.S +++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S @@ -147,6 +147,9 @@ VERSION __kernel_sync_dicache_p5; __kernel_sigtramp32; __kernel_sigtramp_rt32; +#ifdef CONFIG_PPC64 + __kernel_getcpu; +#endif local: *; }; diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile index 8c500d8622e..effca9404b1 100644 --- a/arch/powerpc/kernel/vdso64/Makefile +++ b/arch/powerpc/kernel/vdso64/Makefile @@ -1,6 +1,6 @@ # List of files in the vdso, has to be asm only for now -obj-vdso64 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o +obj-vdso64 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o # Build rules diff --git a/arch/powerpc/kernel/vdso64/getcpu.S b/arch/powerpc/kernel/vdso64/getcpu.S new file mode 100644 index 00000000000..47afd08c90f --- /dev/null +++ b/arch/powerpc/kernel/vdso64/getcpu.S @@ -0,0 +1,45 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2012 + * + * Author: Anton Blanchard <anton@au.ibm.com> + */ +#include <asm/ppc_asm.h> +#include <asm/vdso.h> + + .text +/* + * Exact prototype of getcpu + * + * int __kernel_getcpu(unsigned *cpu, unsigned *node); + * + */ +V_FUNCTION_BEGIN(__kernel_getcpu) + .cfi_startproc + mfspr r5,SPRN_USPRG3 + cmpdi cr0,r3,0 + cmpdi cr1,r4,0 + clrlwi r6,r5,16 + rlwinm r7,r5,16,31-15,31-0 + beq cr0,1f + stw r6,0(r3) +1: beq cr1,2f + stw r7,0(r4) +2: crclr cr0*4+so + li r3,0 /* always success */ + blr + .cfi_endproc +V_FUNCTION_END(__kernel_getcpu) diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso64/vdso64.lds.S index 0e615404e24..e6c1758f358 100644 --- a/arch/powerpc/kernel/vdso64/vdso64.lds.S +++ b/arch/powerpc/kernel/vdso64/vdso64.lds.S @@ -146,6 +146,7 @@ VERSION __kernel_sync_dicache; __kernel_sync_dicache_p5; __kernel_sigtramp_rt64; + __kernel_getcpu; local: *; }; diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index cb87301ccd5..7a421e8fe7c 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -37,8 +37,6 @@ #include <asm/page.h> #include <asm/hvcall.h> -static struct bus_type vio_bus_type; - static struct vio_dev vio_bus_device = { /* fake "parent" device */ .name = "vio", .type = "", @@ -625,7 +623,7 @@ struct dma_map_ops vio_dma_mapping_ops = { * vio_cmo_set_dev_desired - Set desired entitlement for a device * * @viodev: struct vio_dev for device to alter - * @new_desired: new desired entitlement level in bytes + * @desired: new desired entitlement level in bytes * * For use by devices to request a change to their entitlement at runtime or * through sysfs. The desired entitlement level is changed and a balancing @@ -1262,7 +1260,7 @@ static int vio_bus_remove(struct device *dev) /** * vio_register_driver: - Register a new vio driver - * @drv: The vio_driver structure to be registered. + * @viodrv: The vio_driver structure to be registered. */ int __vio_register_driver(struct vio_driver *viodrv, struct module *owner, const char *mod_name) @@ -1282,7 +1280,7 @@ EXPORT_SYMBOL(__vio_register_driver); /** * vio_unregister_driver - Remove registration of vio driver. - * @driver: The vio_driver struct to be removed form registration + * @viodrv: The vio_driver struct to be removed form registration */ void vio_unregister_driver(struct vio_driver *viodrv) { @@ -1397,21 +1395,27 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node) viodev->name = of_node->name; viodev->dev.of_node = of_node_get(of_node); - if (firmware_has_feature(FW_FEATURE_CMO)) - vio_cmo_set_dma_ops(viodev); - else - set_dma_ops(&viodev->dev, &dma_iommu_ops); - set_iommu_table_base(&viodev->dev, vio_build_iommu_table(viodev)); set_dev_node(&viodev->dev, of_node_to_nid(of_node)); /* init generic 'struct device' fields: */ viodev->dev.parent = &vio_bus_device.dev; viodev->dev.bus = &vio_bus_type; viodev->dev.release = vio_dev_release; - /* needed to ensure proper operation of coherent allocations - * later, in case driver doesn't set it explicitly */ - dma_set_mask(&viodev->dev, DMA_BIT_MASK(64)); - dma_set_coherent_mask(&viodev->dev, DMA_BIT_MASK(64)); + + if (of_get_property(viodev->dev.of_node, "ibm,my-dma-window", NULL)) { + if (firmware_has_feature(FW_FEATURE_CMO)) + vio_cmo_set_dma_ops(viodev); + else + set_dma_ops(&viodev->dev, &dma_iommu_ops); + + set_iommu_table_base(&viodev->dev, + vio_build_iommu_table(viodev)); + + /* needed to ensure proper operation of coherent allocations + * later, in case driver doesn't set it explicitly */ + dma_set_mask(&viodev->dev, DMA_BIT_MASK(64)); + dma_set_coherent_mask(&viodev->dev, DMA_BIT_MASK(64)); + } /* register with generic device framework */ if (device_register(&viodev->dev)) { @@ -1491,12 +1495,18 @@ static int __init vio_bus_init(void) if (firmware_has_feature(FW_FEATURE_CMO)) vio_cmo_bus_init(); + return 0; +} +postcore_initcall(vio_bus_init); + +static int __init vio_device_init(void) +{ vio_bus_scan_register_devices("vdevice"); vio_bus_scan_register_devices("ibm,platform-facilities"); return 0; } -__initcall(vio_bus_init); +device_initcall(vio_device_init); static ssize_t name_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -1568,7 +1578,7 @@ static int vio_hotplug(struct device *dev, struct kobj_uevent_env *env) return 0; } -static struct bus_type vio_bus_type = { +struct bus_type vio_bus_type = { .name = "vio", .dev_attrs = vio_dev_attrs, .uevent = vio_hotplug, |