diff options
Diffstat (limited to 'arch/powerpc/kernel')
101 files changed, 5648 insertions, 3016 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 445cb6e39d5..670c312d914 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -2,6 +2,7 @@ # Makefile for the linux kernel. # +CFLAGS_prom.o = -I$(src)/../../../scripts/dtc/libfdt CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror @@ -39,15 +40,14 @@ obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \ obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o +obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o obj64-$(CONFIG_RELOCATABLE) += reloc_64.o obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o -obj-$(CONFIG_PPC_A2) += cpu_setup_a2.o obj-$(CONFIG_PPC64) += vdso64/ obj-$(CONFIG_ALTIVEC) += vecemu.o obj-$(CONFIG_PPC_970_NAP) += idle_power4.o obj-$(CONFIG_PPC_P7_NAP) += idle_power7.o obj-$(CONFIG_PPC_OF) += of_platform.o prom_parse.o -obj-$(CONFIG_PPC_CLOCK) += clock.o procfs-y := proc_powerpc.o obj-$(CONFIG_PROC_FS) += $(procfs-y) rtaspci-$(CONFIG_PPC64)-$(CONFIG_PCI) := rtas_pci.o diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index a27ccd5dc6b..34f55524d45 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -25,14 +25,13 @@ #include <asm/cputable.h> #include <asm/emulated_ops.h> #include <asm/switch_to.h> +#include <asm/disassemble.h> struct aligninfo { unsigned char len; unsigned char flags; }; -#define IS_XFORM(inst) (((inst) >> 26) == 31) -#define IS_DSFORM(inst) (((inst) >> 26) >= 56) #define INVALID { 0, 0 } @@ -54,8 +53,6 @@ struct aligninfo { /* DSISR bits reported for a DCBZ instruction: */ #define DCBZ 0x5f /* 8xx/82xx dcbz faults when cache not enabled */ -#define SWAP(a, b) (t = (a), (a) = (b), (b) = t) - /* * The PowerPC stores certain bits of the instruction that caused the * alignment exception in the DSISR register. This array maps those @@ -75,7 +72,7 @@ static struct aligninfo aligninfo[128] = { { 8, LD+F }, /* 00 0 1001: lfd */ { 4, ST+F+S }, /* 00 0 1010: stfs */ { 8, ST+F }, /* 00 0 1011: stfd */ - INVALID, /* 00 0 1100 */ + { 16, LD }, /* 00 0 1100: lq */ { 8, LD }, /* 00 0 1101: ld/ldu/lwa */ INVALID, /* 00 0 1110 */ { 8, ST }, /* 00 0 1111: std/stdu */ @@ -142,7 +139,7 @@ static struct aligninfo aligninfo[128] = { { 2, LD+SW }, /* 10 0 1100: lhbrx */ { 4, LD+SE }, /* 10 0 1101 lwa */ { 2, ST+SW }, /* 10 0 1110: sthbrx */ - INVALID, /* 10 0 1111 */ + { 16, ST }, /* 10 0 1111: stq */ INVALID, /* 10 1 0000 */ INVALID, /* 10 1 0001 */ INVALID, /* 10 1 0010 */ @@ -194,37 +191,6 @@ static struct aligninfo aligninfo[128] = { }; /* - * Create a DSISR value from the instruction - */ -static inline unsigned make_dsisr(unsigned instr) -{ - unsigned dsisr; - - - /* bits 6:15 --> 22:31 */ - dsisr = (instr & 0x03ff0000) >> 16; - - if (IS_XFORM(instr)) { - /* bits 29:30 --> 15:16 */ - dsisr |= (instr & 0x00000006) << 14; - /* bit 25 --> 17 */ - dsisr |= (instr & 0x00000040) << 8; - /* bits 21:24 --> 18:21 */ - dsisr |= (instr & 0x00000780) << 3; - } else { - /* bit 5 --> 17 */ - dsisr |= (instr & 0x04000000) >> 12; - /* bits 1: 4 --> 18:21 */ - dsisr |= (instr & 0x78000000) >> 17; - /* bits 30:31 --> 12:13 */ - if (IS_DSFORM(instr)) - dsisr |= (instr & 0x00000003) << 18; - } - - return dsisr; -} - -/* * The dcbz (data cache block zero) instruction * gives an alignment fault if used on non-cacheable * memory. We handle the fault mainly for the @@ -256,11 +222,17 @@ static int emulate_dcbz(struct pt_regs *regs, unsigned char __user *addr) * bottom 4 bytes of each register, and the loads clear the * top 4 bytes of the affected register. */ +#ifdef __BIG_ENDIAN__ #ifdef CONFIG_PPC64 #define REG_BYTE(rp, i) *((u8 *)((rp) + ((i) >> 2)) + ((i) & 3) + 4) #else #define REG_BYTE(rp, i) *((u8 *)(rp) + (i)) #endif +#endif + +#ifdef __LITTLE_ENDIAN__ +#define REG_BYTE(rp, i) (*(((u8 *)((rp) + ((i)>>2)) + ((i)&3)))) +#endif #define SWIZ_PTR(p) ((unsigned char __user *)((p) ^ swiz)) @@ -305,6 +277,15 @@ static int emulate_multiple(struct pt_regs *regs, unsigned char __user *addr, nb0 = nb + reg * 4 - 128; nb = 128 - reg * 4; } +#ifdef __LITTLE_ENDIAN__ + /* + * String instructions are endian neutral but the code + * below is not. Force byte swapping on so that the + * effects of swizzling are undone in the load/store + * loops below. + */ + flags ^= SW; +#endif } else { /* lwm, stmw */ nb = (32 - reg) * 4; @@ -372,8 +353,6 @@ static int emulate_fp_pair(unsigned char __user *addr, unsigned int reg, char *ptr1 = (char *) ¤t->thread.TS_FPR(reg+1); int i, ret, sw = 0; - if (!(flags & F)) - return 0; if (reg & 1) return 0; /* invalid form: FRS/FRT must be even */ if (flags & SW) @@ -393,6 +372,34 @@ static int emulate_fp_pair(unsigned char __user *addr, unsigned int reg, return 1; /* exception handled and fixed up */ } +#ifdef CONFIG_PPC64 +static int emulate_lq_stq(struct pt_regs *regs, unsigned char __user *addr, + unsigned int reg, unsigned int flags) +{ + char *ptr0 = (char *)®s->gpr[reg]; + char *ptr1 = (char *)®s->gpr[reg+1]; + int i, ret, sw = 0; + + if (reg & 1) + return 0; /* invalid form: GPR must be even */ + if (flags & SW) + sw = 7; + ret = 0; + for (i = 0; i < 8; ++i) { + if (!(flags & ST)) { + ret |= __get_user(ptr0[i^sw], addr + i); + ret |= __get_user(ptr1[i^sw], addr + i + 8); + } else { + ret |= __put_user(ptr0[i^sw], addr + i); + ret |= __put_user(ptr1[i^sw], addr + i + 8); + } + } + if (ret) + return -EFAULT; + return 1; /* exception handled and fixed up */ +} +#endif /* CONFIG_PPC64 */ + #ifdef CONFIG_SPE static struct aligninfo spe_aligninfo[32] = { @@ -458,7 +465,7 @@ static struct aligninfo spe_aligninfo[32] = { static int emulate_spe(struct pt_regs *regs, unsigned int reg, unsigned int instr) { - int t, ret; + int ret; union { u64 ll; u32 w[2]; @@ -581,24 +588,18 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg, if (flags & SW) { switch (flags & 0xf0) { case E8: - SWAP(data.v[0], data.v[7]); - SWAP(data.v[1], data.v[6]); - SWAP(data.v[2], data.v[5]); - SWAP(data.v[3], data.v[4]); + data.ll = swab64(data.ll); break; case E4: - - SWAP(data.v[0], data.v[3]); - SWAP(data.v[1], data.v[2]); - SWAP(data.v[4], data.v[7]); - SWAP(data.v[5], data.v[6]); + data.w[0] = swab32(data.w[0]); + data.w[1] = swab32(data.w[1]); break; /* Its half word endian */ default: - SWAP(data.v[0], data.v[1]); - SWAP(data.v[2], data.v[3]); - SWAP(data.v[4], data.v[5]); - SWAP(data.v[6], data.v[7]); + data.h[0] = swab16(data.h[0]); + data.h[1] = swab16(data.h[1]); + data.h[2] = swab16(data.h[2]); + data.h[3] = swab16(data.h[3]); break; } } @@ -658,14 +659,31 @@ static int emulate_vsx(unsigned char __user *addr, unsigned int reg, flush_vsx_to_thread(current); if (reg < 32) - ptr = (char *) ¤t->thread.TS_FPR(reg); + ptr = (char *) ¤t->thread.fp_state.fpr[reg][0]; else - ptr = (char *) ¤t->thread.vr[reg - 32]; + ptr = (char *) ¤t->thread.vr_state.vr[reg - 32]; lptr = (unsigned long *) ptr; +#ifdef __LITTLE_ENDIAN__ + if (flags & SW) { + elsize = length; + sw = length-1; + } else { + /* + * The elements are BE ordered, even in LE mode, so process + * them in reverse order. + */ + addr += length - elsize; + + /* 8 byte memory accesses go in the top 8 bytes of the VR */ + if (length == 8) + ptr += 8; + } +#else if (flags & SW) sw = elsize-1; +#endif for (j = 0; j < length; j += elsize) { for (i = 0; i < elsize; ++i) { @@ -675,19 +693,31 @@ static int emulate_vsx(unsigned char __user *addr, unsigned int reg, ret |= __get_user(ptr[i^sw], addr + i); } ptr += elsize; +#ifdef __LITTLE_ENDIAN__ + addr -= elsize; +#else addr += elsize; +#endif } +#ifdef __BIG_ENDIAN__ +#define VSX_HI 0 +#define VSX_LO 1 +#else +#define VSX_HI 1 +#define VSX_LO 0 +#endif + if (!ret) { if (flags & U) regs->gpr[areg] = regs->dar; /* Splat load copies the same data to top and bottom 8 bytes */ if (flags & SPLT) - lptr[1] = lptr[0]; - /* For 8 byte loads, zero the top 8 bytes */ + lptr[VSX_LO] = lptr[VSX_HI]; + /* For 8 byte loads, zero the low 8 bytes */ else if (!(flags & ST) && (8 == length)) - lptr[1] = 0; + lptr[VSX_LO] = 0; } else return -EFAULT; @@ -710,18 +740,28 @@ int fix_alignment(struct pt_regs *regs) unsigned int dsisr; unsigned char __user *addr; unsigned long p, swiz; - int ret, t; - union { + int ret, i; + union data { u64 ll; double dd; unsigned char v[8]; struct { +#ifdef __LITTLE_ENDIAN__ + int low32; + unsigned hi32; +#else unsigned hi32; int low32; +#endif } x32; struct { +#ifdef __LITTLE_ENDIAN__ + short low16; + unsigned char hi48[6]; +#else unsigned char hi48[6]; short low16; +#endif } x16; } data; @@ -780,8 +820,9 @@ int fix_alignment(struct pt_regs *regs) /* Byteswap little endian loads and stores */ swiz = 0; - if (regs->msr & MSR_LE) { + if ((regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE)) { flags ^= SW; +#ifdef __BIG_ENDIAN__ /* * So-called "PowerPC little endian" mode works by * swizzling addresses rather than by actually doing @@ -794,6 +835,7 @@ int fix_alignment(struct pt_regs *regs) */ if (cpu_has_feature(CPU_FTR_PPC_LE)) swiz = 7; +#endif } /* DAR has the operand effective address */ @@ -818,7 +860,7 @@ int fix_alignment(struct pt_regs *regs) elsize = 8; flags = 0; - if (regs->msr & MSR_LE) + if ((regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE)) flags |= SW; if (instruction & 0x100) flags |= ST; @@ -866,10 +908,20 @@ int fix_alignment(struct pt_regs *regs) flush_fp_to_thread(current); } - /* Special case for 16-byte FP loads and stores */ - if (nb == 16) { - PPC_WARN_ALIGNMENT(fp_pair, regs); - return emulate_fp_pair(addr, reg, flags); + if ((nb == 16)) { + if (flags & F) { + /* Special case for 16-byte FP loads and stores */ + PPC_WARN_ALIGNMENT(fp_pair, regs); + return emulate_fp_pair(addr, reg, flags); + } else { +#ifdef CONFIG_PPC64 + /* Special case for 16-byte loads and stores */ + PPC_WARN_ALIGNMENT(lq_stq, regs); + return emulate_lq_stq(regs, addr, reg, flags); +#else + return 0; +#endif + } } PPC_WARN_ALIGNMENT(unaligned, regs); @@ -878,32 +930,36 @@ int fix_alignment(struct pt_regs *regs) * get it from register values */ if (!(flags & ST)) { - data.ll = 0; - ret = 0; - p = (unsigned long) addr; + unsigned int start = 0; + switch (nb) { - case 8: - ret |= __get_user_inatomic(data.v[0], SWIZ_PTR(p++)); - ret |= __get_user_inatomic(data.v[1], SWIZ_PTR(p++)); - ret |= __get_user_inatomic(data.v[2], SWIZ_PTR(p++)); - ret |= __get_user_inatomic(data.v[3], SWIZ_PTR(p++)); case 4: - ret |= __get_user_inatomic(data.v[4], SWIZ_PTR(p++)); - ret |= __get_user_inatomic(data.v[5], SWIZ_PTR(p++)); + start = offsetof(union data, x32.low32); + break; case 2: - ret |= __get_user_inatomic(data.v[6], SWIZ_PTR(p++)); - ret |= __get_user_inatomic(data.v[7], SWIZ_PTR(p++)); - if (unlikely(ret)) - return -EFAULT; + start = offsetof(union data, x16.low16); + break; } + + data.ll = 0; + ret = 0; + p = (unsigned long)addr; + + for (i = 0; i < nb; i++) + ret |= __get_user_inatomic(data.v[start + i], + SWIZ_PTR(p++)); + + if (unlikely(ret)) + return -EFAULT; + } else if (flags & F) { - data.dd = current->thread.TS_FPR(reg); + data.ll = current->thread.TS_FPR(reg); if (flags & S) { /* Single-precision FP store requires conversion... */ #ifdef CONFIG_PPC_FPU preempt_disable(); enable_kernel_fp(); - cvt_df(&data.dd, (float *)&data.v[4]); + cvt_df(&data.dd, (float *)&data.x32.low32); preempt_enable(); #else return 0; @@ -915,17 +971,13 @@ int fix_alignment(struct pt_regs *regs) if (flags & SW) { switch (nb) { case 8: - SWAP(data.v[0], data.v[7]); - SWAP(data.v[1], data.v[6]); - SWAP(data.v[2], data.v[5]); - SWAP(data.v[3], data.v[4]); + data.ll = swab64(data.ll); break; case 4: - SWAP(data.v[4], data.v[7]); - SWAP(data.v[5], data.v[6]); + data.x32.low32 = swab32(data.x32.low32); break; case 2: - SWAP(data.v[6], data.v[7]); + data.x16.low16 = swab16(data.x16.low16); break; } } @@ -947,7 +999,7 @@ int fix_alignment(struct pt_regs *regs) #ifdef CONFIG_PPC_FPU preempt_disable(); enable_kernel_fp(); - cvt_fd((float *)&data.v[4], &data.dd); + cvt_fd((float *)&data.x32.low32, &data.dd); preempt_enable(); #else return 0; @@ -957,25 +1009,28 @@ int fix_alignment(struct pt_regs *regs) /* Store result to memory or update registers */ if (flags & ST) { - ret = 0; - p = (unsigned long) addr; + unsigned int start = 0; + switch (nb) { - case 8: - ret |= __put_user_inatomic(data.v[0], SWIZ_PTR(p++)); - ret |= __put_user_inatomic(data.v[1], SWIZ_PTR(p++)); - ret |= __put_user_inatomic(data.v[2], SWIZ_PTR(p++)); - ret |= __put_user_inatomic(data.v[3], SWIZ_PTR(p++)); case 4: - ret |= __put_user_inatomic(data.v[4], SWIZ_PTR(p++)); - ret |= __put_user_inatomic(data.v[5], SWIZ_PTR(p++)); + start = offsetof(union data, x32.low32); + break; case 2: - ret |= __put_user_inatomic(data.v[6], SWIZ_PTR(p++)); - ret |= __put_user_inatomic(data.v[7], SWIZ_PTR(p++)); + start = offsetof(union data, x16.low16); + break; } + + ret = 0; + p = (unsigned long)addr; + + for (i = 0; i < nb; i++) + ret |= __put_user_inatomic(data.v[start + i], + SWIZ_PTR(p++)); + if (unlikely(ret)) return -EFAULT; } else if (flags & F) - current->thread.TS_FPR(reg) = data.dd; + current->thread.TS_FPR(reg) = data.ll; else regs->gpr[reg] = data.ll; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index d8958be5f31..f5995a91221 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -54,6 +54,7 @@ #endif #if defined(CONFIG_KVM) && defined(CONFIG_PPC_BOOK3S) #include <asm/kvm_book3s.h> +#include <asm/kvm_ppc.h> #endif #ifdef CONFIG_PPC32 @@ -80,25 +81,27 @@ int main(void) DEFINE(TASKTHREADPPR, offsetof(struct task_struct, thread.ppr)); #else DEFINE(THREAD_INFO, offsetof(struct task_struct, stack)); + DEFINE(THREAD_INFO_GAP, _ALIGN_UP(sizeof(struct thread_info), 16)); + DEFINE(KSP_LIMIT, offsetof(struct thread_struct, ksp_limit)); #endif /* CONFIG_PPC64 */ DEFINE(KSP, offsetof(struct thread_struct, ksp)); - DEFINE(KSP_LIMIT, offsetof(struct thread_struct, ksp_limit)); DEFINE(PT_REGS, offsetof(struct thread_struct, regs)); #ifdef CONFIG_BOOKE DEFINE(THREAD_NORMSAVES, offsetof(struct thread_struct, normsave[0])); #endif DEFINE(THREAD_FPEXC_MODE, offsetof(struct thread_struct, fpexc_mode)); - DEFINE(THREAD_FPR0, offsetof(struct thread_struct, fpr[0])); - DEFINE(THREAD_FPSCR, offsetof(struct thread_struct, fpscr)); + DEFINE(THREAD_FPSTATE, offsetof(struct thread_struct, fp_state)); + DEFINE(THREAD_FPSAVEAREA, offsetof(struct thread_struct, fp_save_area)); + DEFINE(FPSTATE_FPSCR, offsetof(struct thread_fp_state, fpscr)); #ifdef CONFIG_ALTIVEC - DEFINE(THREAD_VR0, offsetof(struct thread_struct, vr[0])); + DEFINE(THREAD_VRSTATE, offsetof(struct thread_struct, vr_state)); + DEFINE(THREAD_VRSAVEAREA, offsetof(struct thread_struct, vr_save_area)); DEFINE(THREAD_VRSAVE, offsetof(struct thread_struct, vrsave)); - DEFINE(THREAD_VSCR, offsetof(struct thread_struct, vscr)); DEFINE(THREAD_USED_VR, offsetof(struct thread_struct, used_vr)); + DEFINE(VRSTATE_VSCR, offsetof(struct thread_vr_state, vscr)); #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_VSX - DEFINE(THREAD_VSR0, offsetof(struct thread_struct, fpr)); DEFINE(THREAD_USED_VSR, offsetof(struct thread_struct, used_vsr)); #endif /* CONFIG_VSX */ #ifdef CONFIG_PPC64 @@ -113,7 +116,7 @@ int main(void) #endif /* CONFIG_SPE */ #endif /* CONFIG_PPC64 */ #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) - DEFINE(THREAD_DBCR0, offsetof(struct thread_struct, dbcr0)); + DEFINE(THREAD_DBCR0, offsetof(struct thread_struct, debug.dbcr0)); #endif #ifdef CONFIG_KVM_BOOK3S_32_HANDLER DEFINE(THREAD_KVM_SVCPU, offsetof(struct thread_struct, kvm_shadow_vcpu)); @@ -142,20 +145,12 @@ int main(void) DEFINE(THREAD_TM_PPR, offsetof(struct thread_struct, tm_ppr)); DEFINE(THREAD_TM_DSCR, offsetof(struct thread_struct, tm_dscr)); DEFINE(PT_CKPT_REGS, offsetof(struct thread_struct, ckpt_regs)); - DEFINE(THREAD_TRANSACT_VR0, offsetof(struct thread_struct, - transact_vr[0])); - DEFINE(THREAD_TRANSACT_VSCR, offsetof(struct thread_struct, - transact_vscr)); + DEFINE(THREAD_TRANSACT_VRSTATE, offsetof(struct thread_struct, + transact_vr)); DEFINE(THREAD_TRANSACT_VRSAVE, offsetof(struct thread_struct, transact_vrsave)); - DEFINE(THREAD_TRANSACT_FPR0, offsetof(struct thread_struct, - transact_fpr[0])); - DEFINE(THREAD_TRANSACT_FPSCR, offsetof(struct thread_struct, - transact_fpscr)); -#ifdef CONFIG_VSX - DEFINE(THREAD_TRANSACT_VSR0, offsetof(struct thread_struct, - transact_fpr[0])); -#endif + DEFINE(THREAD_TRANSACT_FPSTATE, offsetof(struct thread_struct, + transact_fp)); /* Local pt_regs on stack for Transactional Memory funcs. */ DEFINE(TM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16); @@ -209,6 +204,15 @@ int main(void) DEFINE(PACA_MC_STACK, offsetof(struct paca_struct, mc_kstack)); DEFINE(PACA_CRIT_STACK, offsetof(struct paca_struct, crit_kstack)); DEFINE(PACA_DBG_STACK, offsetof(struct paca_struct, dbg_kstack)); + DEFINE(PACA_TCD_PTR, offsetof(struct paca_struct, tcd_ptr)); + + DEFINE(TCD_ESEL_NEXT, + offsetof(struct tlb_core_data, esel_next)); + DEFINE(TCD_ESEL_MAX, + offsetof(struct tlb_core_data, esel_max)); + DEFINE(TCD_ESEL_FIRST, + offsetof(struct tlb_core_data, esel_first)); + DEFINE(TCD_LOCK, offsetof(struct tlb_core_data, lock)); #endif /* CONFIG_PPC_BOOK3E */ #ifdef CONFIG_PPC_STD_MMU_64 @@ -238,15 +242,20 @@ int main(void) DEFINE(PACA_DTL_RIDX, offsetof(struct paca_struct, dtl_ridx)); #endif /* CONFIG_PPC_STD_MMU_64 */ DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp)); +#ifdef CONFIG_PPC_BOOK3S_64 + DEFINE(PACAMCEMERGSP, offsetof(struct paca_struct, mc_emergency_sp)); + DEFINE(PACA_IN_MCE, offsetof(struct paca_struct, in_mce)); +#endif DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id)); DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state)); + DEFINE(PACA_DSCR, offsetof(struct paca_struct, dscr_default)); DEFINE(PACA_STARTTIME, offsetof(struct paca_struct, starttime)); DEFINE(PACA_STARTTIME_USER, offsetof(struct paca_struct, starttime_user)); DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time)); DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time)); DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save)); DEFINE(PACA_NAPSTATELOST, offsetof(struct paca_struct, nap_state_lost)); - DEFINE(PACA_SPRG3, offsetof(struct paca_struct, sprg3)); + DEFINE(PACA_SPRG_VDSO, offsetof(struct paca_struct, sprg_vdso)); #endif /* CONFIG_PPC64 */ /* RTAS */ @@ -431,21 +440,19 @@ int main(void) DEFINE(VCPU_GUEST_PID, offsetof(struct kvm_vcpu, arch.pid)); DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); DEFINE(VCPU_VRSAVE, offsetof(struct kvm_vcpu, arch.vrsave)); - DEFINE(VCPU_FPRS, offsetof(struct kvm_vcpu, arch.fpr)); - DEFINE(VCPU_FPSCR, offsetof(struct kvm_vcpu, arch.fpscr)); + DEFINE(VCPU_FPRS, offsetof(struct kvm_vcpu, arch.fp.fpr)); #ifdef CONFIG_ALTIVEC - DEFINE(VCPU_VRS, offsetof(struct kvm_vcpu, arch.vr)); - DEFINE(VCPU_VSCR, offsetof(struct kvm_vcpu, arch.vscr)); -#endif -#ifdef CONFIG_VSX - DEFINE(VCPU_VSRS, offsetof(struct kvm_vcpu, arch.vsr)); + DEFINE(VCPU_VRS, offsetof(struct kvm_vcpu, arch.vr.vr)); #endif DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer)); DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr)); DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); +#ifdef CONFIG_PPC_BOOK3S + DEFINE(VCPU_TAR, offsetof(struct kvm_vcpu, arch.tar)); +#endif DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc)); -#ifdef CONFIG_KVM_BOOK3S_64_HV +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.shregs.msr)); DEFINE(VCPU_SRR0, offsetof(struct kvm_vcpu, arch.shregs.srr0)); DEFINE(VCPU_SRR1, offsetof(struct kvm_vcpu, arch.shregs.srr1)); @@ -464,6 +471,9 @@ int main(void) DEFINE(VCPU_SHARED, offsetof(struct kvm_vcpu, arch.shared)); DEFINE(VCPU_SHARED_MSR, offsetof(struct kvm_vcpu_arch_shared, msr)); DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr)); +#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_PR_POSSIBLE) + DEFINE(VCPU_SHAREDBE, offsetof(struct kvm_vcpu, arch.shared_big_endian)); +#endif DEFINE(VCPU_SHARED_MAS0, offsetof(struct kvm_vcpu_arch_shared, mas0)); DEFINE(VCPU_SHARED_MAS1, offsetof(struct kvm_vcpu_arch_shared, mas1)); @@ -476,7 +486,7 @@ int main(void) DEFINE(KVM_LPID, offsetof(struct kvm, arch.lpid)); /* book3s */ -#ifdef CONFIG_KVM_BOOK3S_64_HV +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE DEFINE(KVM_SDR1, offsetof(struct kvm, arch.sdr1)); DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid)); DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr)); @@ -495,11 +505,18 @@ int main(void) DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id)); DEFINE(VCPU_PURR, offsetof(struct kvm_vcpu, arch.purr)); DEFINE(VCPU_SPURR, offsetof(struct kvm_vcpu, arch.spurr)); + DEFINE(VCPU_IC, offsetof(struct kvm_vcpu, arch.ic)); + DEFINE(VCPU_VTB, offsetof(struct kvm_vcpu, arch.vtb)); DEFINE(VCPU_DSCR, offsetof(struct kvm_vcpu, arch.dscr)); DEFINE(VCPU_AMR, offsetof(struct kvm_vcpu, arch.amr)); DEFINE(VCPU_UAMOR, offsetof(struct kvm_vcpu, arch.uamor)); + DEFINE(VCPU_IAMR, offsetof(struct kvm_vcpu, arch.iamr)); DEFINE(VCPU_CTRL, offsetof(struct kvm_vcpu, arch.ctrl)); DEFINE(VCPU_DABR, offsetof(struct kvm_vcpu, arch.dabr)); + DEFINE(VCPU_DABRX, offsetof(struct kvm_vcpu, arch.dabrx)); + DEFINE(VCPU_DAWR, offsetof(struct kvm_vcpu, arch.dawr)); + DEFINE(VCPU_DAWRX, offsetof(struct kvm_vcpu, arch.dawrx)); + DEFINE(VCPU_CIABR, offsetof(struct kvm_vcpu, arch.ciabr)); DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags)); DEFINE(VCPU_DEC, offsetof(struct kvm_vcpu, arch.dec)); DEFINE(VCPU_DEC_EXPIRES, offsetof(struct kvm_vcpu, arch.dec_expires)); @@ -508,27 +525,64 @@ int main(void) DEFINE(VCPU_PRODDED, offsetof(struct kvm_vcpu, arch.prodded)); DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr)); DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc)); + DEFINE(VCPU_SPMC, offsetof(struct kvm_vcpu, arch.spmc)); + DEFINE(VCPU_SIAR, offsetof(struct kvm_vcpu, arch.siar)); + DEFINE(VCPU_SDAR, offsetof(struct kvm_vcpu, arch.sdar)); + DEFINE(VCPU_SIER, offsetof(struct kvm_vcpu, arch.sier)); DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb)); DEFINE(VCPU_SLB_MAX, offsetof(struct kvm_vcpu, arch.slb_max)); DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr)); DEFINE(VCPU_FAULT_DSISR, offsetof(struct kvm_vcpu, arch.fault_dsisr)); DEFINE(VCPU_FAULT_DAR, offsetof(struct kvm_vcpu, arch.fault_dar)); + DEFINE(VCPU_INTR_MSR, offsetof(struct kvm_vcpu, arch.intr_msr)); DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap)); - DEFINE(VCPU_PTID, offsetof(struct kvm_vcpu, arch.ptid)); DEFINE(VCPU_CFAR, offsetof(struct kvm_vcpu, arch.cfar)); + DEFINE(VCPU_PPR, offsetof(struct kvm_vcpu, arch.ppr)); + DEFINE(VCPU_FSCR, offsetof(struct kvm_vcpu, arch.fscr)); + DEFINE(VCPU_SHADOW_FSCR, offsetof(struct kvm_vcpu, arch.shadow_fscr)); + DEFINE(VCPU_PSPB, offsetof(struct kvm_vcpu, arch.pspb)); + DEFINE(VCPU_EBBHR, offsetof(struct kvm_vcpu, arch.ebbhr)); + DEFINE(VCPU_EBBRR, offsetof(struct kvm_vcpu, arch.ebbrr)); + DEFINE(VCPU_BESCR, offsetof(struct kvm_vcpu, arch.bescr)); + DEFINE(VCPU_CSIGR, offsetof(struct kvm_vcpu, arch.csigr)); + DEFINE(VCPU_TACR, offsetof(struct kvm_vcpu, arch.tacr)); + DEFINE(VCPU_TCSCR, offsetof(struct kvm_vcpu, arch.tcscr)); + DEFINE(VCPU_ACOP, offsetof(struct kvm_vcpu, arch.acop)); + DEFINE(VCPU_WORT, offsetof(struct kvm_vcpu, arch.wort)); + DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1)); DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_count)); DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count)); DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest)); DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads)); - DEFINE(VCPU_SVCPU, offsetof(struct kvmppc_vcpu_book3s, shadow_vcpu) - - offsetof(struct kvmppc_vcpu_book3s, vcpu)); + DEFINE(VCORE_KVM, offsetof(struct kvmppc_vcore, kvm)); + DEFINE(VCORE_TB_OFFSET, offsetof(struct kvmppc_vcore, tb_offset)); + DEFINE(VCORE_LPCR, offsetof(struct kvmppc_vcore, lpcr)); + DEFINE(VCORE_PCR, offsetof(struct kvmppc_vcore, pcr)); + DEFINE(VCORE_DPDES, offsetof(struct kvmppc_vcore, dpdes)); DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige)); DEFINE(VCPU_SLB_V, offsetof(struct kvmppc_slb, origv)); DEFINE(VCPU_SLB_SIZE, sizeof(struct kvmppc_slb)); +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + DEFINE(VCPU_TFHAR, offsetof(struct kvm_vcpu, arch.tfhar)); + DEFINE(VCPU_TFIAR, offsetof(struct kvm_vcpu, arch.tfiar)); + DEFINE(VCPU_TEXASR, offsetof(struct kvm_vcpu, arch.texasr)); + DEFINE(VCPU_GPR_TM, offsetof(struct kvm_vcpu, arch.gpr_tm)); + DEFINE(VCPU_FPRS_TM, offsetof(struct kvm_vcpu, arch.fp_tm.fpr)); + DEFINE(VCPU_VRS_TM, offsetof(struct kvm_vcpu, arch.vr_tm.vr)); + DEFINE(VCPU_VRSAVE_TM, offsetof(struct kvm_vcpu, arch.vrsave_tm)); + DEFINE(VCPU_CR_TM, offsetof(struct kvm_vcpu, arch.cr_tm)); + DEFINE(VCPU_LR_TM, offsetof(struct kvm_vcpu, arch.lr_tm)); + DEFINE(VCPU_CTR_TM, offsetof(struct kvm_vcpu, arch.ctr_tm)); + DEFINE(VCPU_AMR_TM, offsetof(struct kvm_vcpu, arch.amr_tm)); + DEFINE(VCPU_PPR_TM, offsetof(struct kvm_vcpu, arch.ppr_tm)); + DEFINE(VCPU_DSCR_TM, offsetof(struct kvm_vcpu, arch.dscr_tm)); + DEFINE(VCPU_TAR_TM, offsetof(struct kvm_vcpu, arch.tar_tm)); +#endif #ifdef CONFIG_PPC_BOOK3S_64 -#ifdef CONFIG_KVM_BOOK3S_PR +#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE + DEFINE(PACA_SVCPU, offsetof(struct paca_struct, shadow_vcpu)); # define SVCPU_FIELD(x, f) DEFINE(x, offsetof(struct paca_struct, shadow_vcpu.f)) #else # define SVCPU_FIELD(x, f) @@ -568,6 +622,7 @@ int main(void) #ifdef CONFIG_PPC64 SVCPU_FIELD(SVCPU_SLB, slb); SVCPU_FIELD(SVCPU_SLB_MAX, slb_max); + SVCPU_FIELD(SVCPU_SHADOW_FSCR, shadow_fscr); #endif HSTATE_FIELD(HSTATE_HOST_R1, host_r1); @@ -576,11 +631,12 @@ int main(void) HSTATE_FIELD(HSTATE_VMHANDLER, vmhandler); HSTATE_FIELD(HSTATE_SCRATCH0, scratch0); HSTATE_FIELD(HSTATE_SCRATCH1, scratch1); + HSTATE_FIELD(HSTATE_SCRATCH2, scratch2); HSTATE_FIELD(HSTATE_IN_GUEST, in_guest); HSTATE_FIELD(HSTATE_RESTORE_HID5, restore_hid5); HSTATE_FIELD(HSTATE_NAPPING, napping); -#ifdef CONFIG_KVM_BOOK3S_64_HV +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE HSTATE_FIELD(HSTATE_HWTHREAD_REQ, hwthread_req); HSTATE_FIELD(HSTATE_HWTHREAD_STATE, hwthread_state); HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu); @@ -588,6 +644,7 @@ int main(void) HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys); HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr); HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi); + HSTATE_FIELD(HSTATE_PTID, ptid); HSTATE_FIELD(HSTATE_MMCR, host_mmcr); HSTATE_FIELD(HSTATE_PMC, host_pmc); HSTATE_FIELD(HSTATE_PURR, host_purr); @@ -596,10 +653,12 @@ int main(void) HSTATE_FIELD(HSTATE_DABR, dabr); HSTATE_FIELD(HSTATE_DECEXP, dec_expires); DEFINE(IPI_PRIORITY, IPI_PRIORITY); -#endif /* CONFIG_KVM_BOOK3S_64_HV */ +#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ #ifdef CONFIG_PPC_BOOK3S_64 HSTATE_FIELD(HSTATE_CFAR, cfar); + HSTATE_FIELD(HSTATE_PPR, ppr); + HSTATE_FIELD(HSTATE_HOST_FSCR, host_fscr); #endif /* CONFIG_PPC_BOOK3S_64 */ #else /* CONFIG_PPC_BOOK3S */ diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index 65493272787..40198d50b4c 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -12,7 +12,6 @@ #include <linux/cpu.h> #include <linux/cpumask.h> -#include <linux/init.h> #include <linux/kernel.h> #include <linux/kobject.h> #include <linux/list.h> @@ -757,7 +756,10 @@ void cacheinfo_cpu_online(unsigned int cpu_id) cacheinfo_sysfs_populate(cpu_id, cache); } -#ifdef CONFIG_HOTPLUG_CPU /* functions needed for cpu offline */ +/* functions needed to remove cache entry for cpu offline or suspend/resume */ + +#if (defined(CONFIG_PPC_PSERIES) && defined(CONFIG_SUSPEND)) || \ + defined(CONFIG_HOTPLUG_CPU) static struct cache *cache_lookup_by_cpu(unsigned int cpu_id) { @@ -794,6 +796,9 @@ static void remove_cache_dir(struct cache_dir *cache_dir) { remove_index_dirs(cache_dir); + /* Remove cache dir from sysfs */ + kobject_del(cache_dir->kobj); + kobject_put(cache_dir->kobj); kfree(cache_dir); @@ -841,4 +846,4 @@ void cacheinfo_cpu_offline(unsigned int cpu_id) if (cache) cache_cpu_clear(cache, cpu_id); } -#endif /* CONFIG_HOTPLUG_CPU */ +#endif /* (CONFIG_PPC_PSERIES && CONFIG_SUSPEND) || CONFIG_HOTPLUG_CPU */ diff --git a/arch/powerpc/kernel/clock.c b/arch/powerpc/kernel/clock.c deleted file mode 100644 index a764b47791e..00000000000 --- a/arch/powerpc/kernel/clock.c +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Dummy clk implementations for powerpc. - * These need to be overridden in platform code. - */ - -#include <linux/clk.h> -#include <linux/err.h> -#include <linux/errno.h> -#include <linux/export.h> -#include <asm/clk_interface.h> - -struct clk_interface clk_functions; - -struct clk *clk_get(struct device *dev, const char *id) -{ - if (clk_functions.clk_get) - return clk_functions.clk_get(dev, id); - return ERR_PTR(-ENOSYS); -} -EXPORT_SYMBOL(clk_get); - -void clk_put(struct clk *clk) -{ - if (clk_functions.clk_put) - clk_functions.clk_put(clk); -} -EXPORT_SYMBOL(clk_put); - -int clk_enable(struct clk *clk) -{ - if (clk_functions.clk_enable) - return clk_functions.clk_enable(clk); - return -ENOSYS; -} -EXPORT_SYMBOL(clk_enable); - -void clk_disable(struct clk *clk) -{ - if (clk_functions.clk_disable) - clk_functions.clk_disable(clk); -} -EXPORT_SYMBOL(clk_disable); - -unsigned long clk_get_rate(struct clk *clk) -{ - if (clk_functions.clk_get_rate) - return clk_functions.clk_get_rate(clk); - return 0; -} -EXPORT_SYMBOL(clk_get_rate); - -long clk_round_rate(struct clk *clk, unsigned long rate) -{ - if (clk_functions.clk_round_rate) - return clk_functions.clk_round_rate(clk, rate); - return -ENOSYS; -} -EXPORT_SYMBOL(clk_round_rate); - -int clk_set_rate(struct clk *clk, unsigned long rate) -{ - if (clk_functions.clk_set_rate) - return clk_functions.clk_set_rate(clk, rate); - return -ENOSYS; -} -EXPORT_SYMBOL(clk_set_rate); - -struct clk *clk_get_parent(struct clk *clk) -{ - if (clk_functions.clk_get_parent) - return clk_functions.clk_get_parent(clk); - return ERR_PTR(-ENOSYS); -} -EXPORT_SYMBOL(clk_get_parent); - -int clk_set_parent(struct clk *clk, struct clk *parent) -{ - if (clk_functions.clk_set_parent) - return clk_functions.clk_set_parent(clk, parent); - return -ENOSYS; -} -EXPORT_SYMBOL(clk_set_parent); diff --git a/arch/powerpc/kernel/cpu_setup_a2.S b/arch/powerpc/kernel/cpu_setup_a2.S deleted file mode 100644 index 61f079e05b6..00000000000 --- a/arch/powerpc/kernel/cpu_setup_a2.S +++ /dev/null @@ -1,120 +0,0 @@ -/* - * A2 specific assembly support code - * - * Copyright 2009 Ben Herrenschmidt, IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <asm/asm-offsets.h> -#include <asm/ppc_asm.h> -#include <asm/ppc-opcode.h> -#include <asm/processor.h> -#include <asm/reg_a2.h> -#include <asm/reg.h> -#include <asm/thread_info.h> - -/* - * Disable thdid and class fields in ERATs to bump PID to full 14 bits capacity. - * This also prevents external LPID accesses but that isn't a problem when not a - * guest. Under PV, this setting will be ignored and MMUCR will return the right - * number of PID bits we can use. - */ -#define MMUCR1_EXTEND_PID \ - (MMUCR1_ICTID | MMUCR1_ITTID | MMUCR1_DCTID | \ - MMUCR1_DTTID | MMUCR1_DCCD) - -/* - * Use extended PIDs if enabled. - * Don't clear the ERATs on context sync events and enable I & D LRU. - * Enable ERAT back invalidate when tlbwe overwrites an entry. - */ -#define INITIAL_MMUCR1 \ - (MMUCR1_EXTEND_PID | MMUCR1_CSINV_NEVER | MMUCR1_IRRE | \ - MMUCR1_DRRE | MMUCR1_TLBWE_BINV) - -_GLOBAL(__setup_cpu_a2) - /* Some of these are actually thread local and some are - * core local but doing it always won't hurt - */ - -#ifdef CONFIG_PPC_ICSWX - /* Make sure ACOP starts out as zero */ - li r3,0 - mtspr SPRN_ACOP,r3 - - /* Skip the following if we are in Guest mode */ - mfmsr r3 - andis. r0,r3,MSR_GS@h - bne _icswx_skip_guest - - /* Enable icswx instruction */ - mfspr r3,SPRN_A2_CCR2 - ori r3,r3,A2_CCR2_ENABLE_ICSWX - mtspr SPRN_A2_CCR2,r3 - - /* Unmask all CTs in HACOP */ - li r3,-1 - mtspr SPRN_HACOP,r3 -_icswx_skip_guest: -#endif /* CONFIG_PPC_ICSWX */ - - /* Enable doorbell */ - mfspr r3,SPRN_A2_CCR2 - oris r3,r3,A2_CCR2_ENABLE_PC@h - mtspr SPRN_A2_CCR2,r3 - isync - - /* Setup CCR0 to disable power saving for now as it's busted - * in the current implementations. Setup CCR1 to wake on - * interrupts normally (we write the default value but who - * knows what FW may have clobbered...) - */ - li r3,0 - mtspr SPRN_A2_CCR0, r3 - LOAD_REG_IMMEDIATE(r3,0x0f0f0f0f) - mtspr SPRN_A2_CCR1, r3 - - /* Initialise MMUCR1 */ - lis r3,INITIAL_MMUCR1@h - ori r3,r3,INITIAL_MMUCR1@l - mtspr SPRN_MMUCR1,r3 - - /* Set MMUCR2 to enable 4K, 64K, 1M, 16M and 1G pages */ - LOAD_REG_IMMEDIATE(r3, 0x000a7531) - mtspr SPRN_MMUCR2,r3 - - /* Set MMUCR3 to write all thids bit to the TLB */ - LOAD_REG_IMMEDIATE(r3, 0x0000000f) - mtspr SPRN_MMUCR3,r3 - - /* Don't do ERAT stuff if running guest mode */ - mfmsr r3 - andis. r0,r3,MSR_GS@h - bne 1f - - /* Now set the I-ERAT watermark to 15 */ - lis r4,(MMUCR0_TLBSEL_I|MMUCR0_ECL)@h - mtspr SPRN_MMUCR0, r4 - li r4,A2_IERAT_SIZE-1 - PPC_ERATWE(R4,R4,3) - - /* Now set the D-ERAT watermark to 31 */ - lis r4,(MMUCR0_TLBSEL_D|MMUCR0_ECL)@h - mtspr SPRN_MMUCR0, r4 - li r4,A2_DERAT_SIZE-1 - PPC_ERATWE(R4,R4,3) - - /* And invalidate the beast just in case. That won't get rid of - * a bolted entry though it will be in LRU and so will go away eventually - * but let's not bother for now - */ - PPC_ERATILX(0,0,R0) -1: - blr - -_GLOBAL(__restore_cpu_a2) - b __setup_cpu_a2 diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S index bfb18c7290b..4f1393d2007 100644 --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S +++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S @@ -53,11 +53,57 @@ _GLOBAL(__e500_dcache_setup) isync blr +/* + * FIXME - we haven't yet done testing to determine a reasonable default + * value for PW20_WAIT_IDLE_BIT. + */ +#define PW20_WAIT_IDLE_BIT 50 /* 1ms, TB frequency is 41.66MHZ */ +_GLOBAL(setup_pw20_idle) + mfspr r3, SPRN_PWRMGTCR0 + + /* Set PW20_WAIT bit, enable pw20 state*/ + ori r3, r3, PWRMGTCR0_PW20_WAIT + li r11, PW20_WAIT_IDLE_BIT + + /* Set Automatic PW20 Core Idle Count */ + rlwimi r3, r11, PWRMGTCR0_PW20_ENT_SHIFT, PWRMGTCR0_PW20_ENT + + mtspr SPRN_PWRMGTCR0, r3 + + blr + +/* + * FIXME - we haven't yet done testing to determine a reasonable default + * value for AV_WAIT_IDLE_BIT. + */ +#define AV_WAIT_IDLE_BIT 50 /* 1ms, TB frequency is 41.66MHZ */ +_GLOBAL(setup_altivec_idle) + mfspr r3, SPRN_PWRMGTCR0 + + /* Enable Altivec Idle */ + oris r3, r3, PWRMGTCR0_AV_IDLE_PD_EN@h + li r11, AV_WAIT_IDLE_BIT + + /* Set Automatic AltiVec Idle Count */ + rlwimi r3, r11, PWRMGTCR0_AV_IDLE_CNT_SHIFT, PWRMGTCR0_AV_IDLE_CNT + + mtspr SPRN_PWRMGTCR0, r3 + + blr + _GLOBAL(__setup_cpu_e6500) mflr r6 #ifdef CONFIG_PPC64 - bl .setup_altivec_ivors + bl setup_altivec_ivors + /* Touch IVOR42 only if the CPU supports E.HV category */ + mfspr r10,SPRN_MMUCFG + rlwinm. r10,r10,0,MMUCFG_LPIDSIZE + beq 1f + bl setup_lrat_ivor +1: #endif + bl setup_pw20_idle + bl setup_altivec_idle bl __setup_cpu_e5500 mtlr r6 blr @@ -118,7 +164,15 @@ _GLOBAL(__setup_cpu_e5500) #ifdef CONFIG_PPC_BOOK3E_64 _GLOBAL(__restore_cpu_e6500) mflr r5 - bl .setup_altivec_ivors + bl setup_altivec_ivors + /* Touch IVOR42 only if the CPU supports E.HV category */ + mfspr r10,SPRN_MMUCFG + rlwinm. r10,r10,0,MMUCFG_LPIDSIZE + beq 1f + bl setup_lrat_ivor +1: + bl setup_pw20_idle + bl setup_altivec_idle bl __restore_cpu_e5500 mtlr r5 blr @@ -127,9 +181,9 @@ _GLOBAL(__restore_cpu_e5500) mflr r4 bl __e500_icache_setup bl __e500_dcache_setup - bl .__setup_base_ivors - bl .setup_perfmon_ivor - bl .setup_doorbell_ivors + bl __setup_base_ivors + bl setup_perfmon_ivor + bl setup_doorbell_ivors /* * We only want to touch IVOR38-41 if we're running on hardware * that supports category E.HV. The architectural way to determine @@ -138,7 +192,7 @@ _GLOBAL(__restore_cpu_e5500) mfspr r10,SPRN_MMUCFG rlwinm. r10,r10,0,MMUCFG_LPIDSIZE beq 1f - bl .setup_ehv_ivors + bl setup_ehv_ivors 1: mtlr r4 blr @@ -147,9 +201,9 @@ _GLOBAL(__setup_cpu_e5500) mflr r5 bl __e500_icache_setup bl __e500_dcache_setup - bl .__setup_base_ivors - bl .setup_perfmon_ivor - bl .setup_doorbell_ivors + bl __setup_base_ivors + bl setup_perfmon_ivor + bl setup_doorbell_ivors /* * We only want to touch IVOR38-41 if we're running on hardware * that supports category E.HV. The architectural way to determine @@ -158,7 +212,7 @@ _GLOBAL(__setup_cpu_e5500) mfspr r10,SPRN_MMUCFG rlwinm. r10,r10,0,MMUCFG_LPIDSIZE beq 1f - bl .setup_ehv_ivors + bl setup_ehv_ivors b 2f 1: ld r10,CPU_SPEC_FEATURES(r4) diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index 18b5b9cf8e3..46733535cc0 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -29,7 +29,7 @@ _GLOBAL(__setup_cpu_power7) mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR bl __init_LPCR - bl __init_TLB + bl __init_tlb_power7 mtlr r11 blr @@ -42,7 +42,7 @@ _GLOBAL(__restore_cpu_power7) mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR bl __init_LPCR - bl __init_TLB + bl __init_tlb_power7 mtlr r11 blr @@ -56,10 +56,10 @@ _GLOBAL(__setup_cpu_power8) li r0,0 mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR - oris r3, r3, LPCR_AIL_3@h + ori r3, r3, LPCR_PECEDH bl __init_LPCR bl __init_HFSCR - bl __init_TLB + bl __init_tlb_power8 bl __init_PMU_HV mtlr r11 blr @@ -75,10 +75,10 @@ _GLOBAL(__restore_cpu_power8) li r0,0 mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR - oris r3, r3, LPCR_AIL_3@h + ori r3, r3, LPCR_PECEDH bl __init_LPCR bl __init_HFSCR - bl __init_TLB + bl __init_tlb_power8 bl __init_PMU_HV mtlr r11 blr @@ -134,15 +134,31 @@ __init_HFSCR: mtspr SPRN_HFSCR,r3 blr -__init_TLB: - /* - * Clear the TLB using the "IS 3" form of tlbiel instruction - * (invalidate by congruence class). P7 has 128 CCs, P8 has 512 - * so we just always do 512 - */ +/* + * Clear the TLB using the specified IS form of tlbiel instruction + * (invalidate by congruence class). P7 has 128 CCs., P8 has 512. + * + * r3 = IS field + */ +__init_tlb_power7: + li r3,0xc00 /* IS field = 0b11 */ +_GLOBAL(__flush_tlb_power7) + li r6,128 + mtctr r6 + mr r7,r3 /* IS field */ + ptesync +2: tlbiel r7 + addi r7,r7,0x1000 + bdnz 2b + ptesync +1: blr + +__init_tlb_power8: + li r3,0xc00 /* IS field = 0b11 */ +_GLOBAL(__flush_tlb_power8) li r6,512 mtctr r6 - li r7,0xc00 /* IS field = 0b11 */ + mr r7,r3 /* IS field */ ptesync 2: tlbiel r7 addi r7,r7,0x1000 diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 597d954e586..0c157642c2a 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -71,6 +71,10 @@ extern void __restore_cpu_power7(void); extern void __setup_cpu_power8(unsigned long offset, struct cpu_spec* spec); extern void __restore_cpu_power8(void); extern void __restore_cpu_a2(void); +extern void __flush_tlb_power7(unsigned long inval_selector); +extern void __flush_tlb_power8(unsigned long inval_selector); +extern long __machine_check_early_realmode_p7(struct pt_regs *regs); +extern long __machine_check_early_realmode_p8(struct pt_regs *regs); #endif /* CONFIG_PPC64 */ #if defined(CONFIG_E500) extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec); @@ -105,7 +109,8 @@ extern void __restore_cpu_e6500(void); PPC_FEATURE_PSERIES_PERFMON_COMPAT) #define COMMON_USER2_POWER8 (PPC_FEATURE2_ARCH_2_07 | \ PPC_FEATURE2_HTM_COMP | PPC_FEATURE2_DSCR | \ - PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR) + PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR | \ + PPC_FEATURE2_VEC_CRYPTO) #define COMMON_USER_PA6T (COMMON_USER_PPC64 | PPC_FEATURE_PA6T |\ PPC_FEATURE_TRUE_LE | \ PPC_FEATURE_HAS_ALTIVEC_COMP) @@ -440,6 +445,8 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_cpu_type = "ppc64/ibm-compat-v1", .cpu_setup = __setup_cpu_power7, .cpu_restore = __restore_cpu_power7, + .flush_tlb = __flush_tlb_power7, + .machine_check_early = __machine_check_early_realmode_p7, .platform = "power7", }, { /* 2.07-compliant processor, i.e. Power8 "architected" mode */ @@ -456,6 +463,8 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_cpu_type = "ppc64/ibm-compat-v1", .cpu_setup = __setup_cpu_power8, .cpu_restore = __restore_cpu_power8, + .flush_tlb = __flush_tlb_power8, + .machine_check_early = __machine_check_early_realmode_p8, .platform = "power8", }, { /* Power7 */ @@ -474,6 +483,8 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_type = PPC_OPROFILE_POWER4, .cpu_setup = __setup_cpu_power7, .cpu_restore = __restore_cpu_power7, + .flush_tlb = __flush_tlb_power7, + .machine_check_early = __machine_check_early_realmode_p7, .platform = "power7", }, { /* Power7+ */ @@ -492,13 +503,35 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_type = PPC_OPROFILE_POWER4, .cpu_setup = __setup_cpu_power7, .cpu_restore = __restore_cpu_power7, + .flush_tlb = __flush_tlb_power7, + .machine_check_early = __machine_check_early_realmode_p7, .platform = "power7+", }, { /* Power8E */ .pvr_mask = 0xffff0000, .pvr_value = 0x004b0000, .cpu_name = "POWER8E (raw)", - .cpu_features = CPU_FTRS_POWER8, + .cpu_features = CPU_FTRS_POWER8E, + .cpu_user_features = COMMON_USER_POWER8, + .cpu_user_features2 = COMMON_USER2_POWER8, + .mmu_features = MMU_FTRS_POWER8, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .oprofile_cpu_type = "ppc64/power8", + .oprofile_type = PPC_OPROFILE_INVALID, + .cpu_setup = __setup_cpu_power8, + .cpu_restore = __restore_cpu_power8, + .flush_tlb = __flush_tlb_power8, + .machine_check_early = __machine_check_early_realmode_p8, + .platform = "power8", + }, + { /* Power8 DD1: Does not support doorbell IPIs */ + .pvr_mask = 0xffffff00, + .pvr_value = 0x004d0100, + .cpu_name = "POWER8 (raw)", + .cpu_features = CPU_FTRS_POWER8_DD1, .cpu_user_features = COMMON_USER_POWER8, .cpu_user_features2 = COMMON_USER2_POWER8, .mmu_features = MMU_FTRS_POWER8, @@ -510,6 +543,8 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_type = PPC_OPROFILE_INVALID, .cpu_setup = __setup_cpu_power8, .cpu_restore = __restore_cpu_power8, + .flush_tlb = __flush_tlb_power8, + .machine_check_early = __machine_check_early_realmode_p8, .platform = "power8", }, { /* Power8 */ @@ -528,6 +563,8 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_type = PPC_OPROFILE_INVALID, .cpu_setup = __setup_cpu_power8, .cpu_restore = __restore_cpu_power8, + .flush_tlb = __flush_tlb_power8, + .machine_check_early = __machine_check_early_realmode_p8, .platform = "power8", }, { /* Cell Broadband Engine */ @@ -2132,44 +2169,6 @@ static struct cpu_spec __initdata cpu_specs[] = { } #endif /* CONFIG_PPC32 */ #endif /* CONFIG_E500 */ - -#ifdef CONFIG_PPC_A2 - { /* Standard A2 (>= DD2) + FPU core */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00480000, - .cpu_name = "A2 (>= DD2)", - .cpu_features = CPU_FTRS_A2, - .cpu_user_features = COMMON_USER_PPC64, - .mmu_features = MMU_FTRS_A2, - .icache_bsize = 64, - .dcache_bsize = 64, - .num_pmcs = 0, - .cpu_setup = __setup_cpu_a2, - .cpu_restore = __restore_cpu_a2, - .machine_check = machine_check_generic, - .platform = "ppca2", - }, - { /* This is a default entry to get going, to be replaced by - * a real one at some stage - */ -#define CPU_FTRS_BASE_BOOK3E (CPU_FTR_USE_TB | \ - CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_SMT | \ - CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE) - .pvr_mask = 0x00000000, - .pvr_value = 0x00000000, - .cpu_name = "Book3E", - .cpu_features = CPU_FTRS_BASE_BOOK3E, - .cpu_user_features = COMMON_USER_PPC64, - .mmu_features = MMU_FTR_TYPE_3E | MMU_FTR_USE_TLBILX | - MMU_FTR_USE_TLBIVAX_BCAST | - MMU_FTR_LOCK_BCAST_INVAL, - .icache_bsize = 64, - .dcache_bsize = 64, - .num_pmcs = 0, - .machine_check = machine_check_generic, - .platform = "power6", - }, -#endif /* CONFIG_PPC_A2 */ }; static struct cpu_spec the_cpu_spec; diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index fdcd8f551af..51dbace3269 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -17,7 +17,6 @@ #include <linux/export.h> #include <linux/crash_dump.h> #include <linux/delay.h> -#include <linux/init.h> #include <linux/irq.h> #include <linux/types.h> @@ -82,7 +81,7 @@ void crash_ipi_callback(struct pt_regs *regs) } atomic_inc(&cpus_in_crash); - smp_mb__after_atomic_inc(); + smp_mb__after_atomic(); /* * Starting the kdump boot. diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index 779a78c2643..7a13f378ca2 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -98,17 +98,19 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, size_t csize, unsigned long offset, int userbuf) { void *vaddr; + phys_addr_t paddr; if (!csize) return 0; csize = min_t(size_t, csize, PAGE_SIZE); + paddr = pfn << PAGE_SHIFT; - if ((min_low_pfn < pfn) && (pfn < max_pfn)) { - vaddr = __va(pfn << PAGE_SHIFT); + if (memblock_is_region_memory(paddr, csize)) { + vaddr = __va(paddr); csize = copy_oldmem_vaddr(vaddr, buf, csize, offset, userbuf); } else { - vaddr = __ioremap(pfn << PAGE_SHIFT, PAGE_SIZE, 0); + vaddr = __ioremap(paddr, PAGE_SIZE, 0); csize = copy_oldmem_vaddr(vaddr, buf, csize, offset, userbuf); iounmap(vaddr); } @@ -124,15 +126,15 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, void crash_free_reserved_phys_range(unsigned long begin, unsigned long end) { unsigned long addr; - const u32 *basep, *sizep; + const __be32 *basep, *sizep; unsigned int rtas_start = 0, rtas_end = 0; basep = of_get_property(rtas.dev, "linux,rtas-base", NULL); sizep = of_get_property(rtas.dev, "rtas-size", NULL); if (basep && sizep) { - rtas_start = *basep; - rtas_end = *basep + *sizep; + rtas_start = be32_to_cpup(basep); + rtas_end = rtas_start + be32_to_cpup(sizep); } for (addr = begin; addr < end; addr += PAGE_SIZE) { diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index e4897523de4..54d0116256f 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -83,10 +83,10 @@ static int dma_iommu_dma_supported(struct device *dev, u64 mask) return 0; } - if (tbl->it_offset > (mask >> IOMMU_PAGE_SHIFT)) { + if (tbl->it_offset > (mask >> tbl->it_page_shift)) { dev_info(dev, "Warning: IOMMU offset too big for device mask\n"); dev_info(dev, "mask: 0x%08llx, table offset: 0x%08lx\n", - mask, tbl->it_offset << IOMMU_PAGE_SHIFT); + mask, tbl->it_offset << tbl->it_page_shift); return 0; } else return 1; diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index 8032b97ccdc..ee78f6e49d6 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -191,12 +191,10 @@ EXPORT_SYMBOL(dma_direct_ops); #define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) -int dma_set_mask(struct device *dev, u64 dma_mask) +int __dma_set_mask(struct device *dev, u64 dma_mask) { struct dma_map_ops *dma_ops = get_dma_ops(dev); - if (ppc_md.dma_set_mask) - return ppc_md.dma_set_mask(dev, dma_mask); if ((dma_ops != NULL) && (dma_ops->set_dma_mask != NULL)) return dma_ops->set_dma_mask(dev, dma_mask); if (!dev->dma_mask || !dma_supported(dev, dma_mask)) @@ -204,6 +202,12 @@ int dma_set_mask(struct device *dev, u64 dma_mask) *dev->dma_mask = dma_mask; return 0; } +int dma_set_mask(struct device *dev, u64 dma_mask) +{ + if (ppc_md.dma_set_mask) + return ppc_md.dma_set_mask(dev, dma_mask); + return __dma_set_mask(dev, dma_mask); +} EXPORT_SYMBOL(dma_set_mask); u64 dma_get_required_mask(struct device *dev) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 55593ee2d5a..86e25702aac 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -22,18 +22,21 @@ */ #include <linux/delay.h> +#include <linux/debugfs.h> #include <linux/sched.h> #include <linux/init.h> #include <linux/list.h> #include <linux/pci.h> #include <linux/proc_fs.h> #include <linux/rbtree.h> +#include <linux/reboot.h> #include <linux/seq_file.h> #include <linux/spinlock.h> #include <linux/export.h> #include <linux/of.h> #include <linux/atomic.h> +#include <asm/debug.h> #include <asm/eeh.h> #include <asm/eeh_event.h> #include <asm/io.h> @@ -84,24 +87,23 @@ #define EEH_MAX_FAILS 2100000 /* Time to wait for a PCI slot to report status, in milliseconds */ -#define PCI_BUS_RESET_WAIT_MSEC (60*1000) - -/* Platform dependent EEH operations */ -struct eeh_ops *eeh_ops = NULL; - -int eeh_subsystem_enabled; -EXPORT_SYMBOL(eeh_subsystem_enabled); +#define PCI_BUS_RESET_WAIT_MSEC (5*60*1000) /* - * EEH probe mode support. The intention is to support multiple - * platforms for EEH. Some platforms like pSeries do PCI emunation - * based on device tree. However, other platforms like powernv probe - * PCI devices from hardware. The flag is used to distinguish that. - * In addition, struct eeh_ops::probe would be invoked for particular - * OF node or PCI device so that the corresponding PE would be created - * there. + * EEH probe mode support, which is part of the flags, + * is to support multiple platforms for EEH. Some platforms + * like pSeries do PCI emunation based on device tree. + * However, other platforms like powernv probe PCI devices + * from hardware. The flag is used to distinguish that. + * In addition, struct eeh_ops::probe would be invoked for + * particular OF node or PCI device so that the corresponding + * PE would be created there. */ -int eeh_probe_mode; +int eeh_subsystem_flags; +EXPORT_SYMBOL(eeh_subsystem_flags); + +/* Platform dependent EEH operations */ +struct eeh_ops *eeh_ops = NULL; /* Lock to avoid races due to multiple reports of an error */ DEFINE_RAW_SPINLOCK(confirm_error_lock); @@ -132,6 +134,15 @@ static struct eeh_stats eeh_stats; #define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE) +static int __init eeh_setup(char *str) +{ + if (!strcmp(str, "off")) + eeh_subsystem_flags |= EEH_FORCE_DISABLED; + + return 1; +} +__setup("eeh=", eeh_setup); + /** * eeh_gather_pci_data - Copy assorted PCI config space registers to buff * @edev: device to report data for @@ -144,74 +155,67 @@ static struct eeh_stats eeh_stats; static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len) { struct device_node *dn = eeh_dev_to_of_node(edev); - struct pci_dev *dev = eeh_dev_to_pci_dev(edev); u32 cfg; int cap, i; int n = 0; n += scnprintf(buf+n, len-n, "%s\n", dn->full_name); - printk(KERN_WARNING "EEH: of node=%s\n", dn->full_name); + pr_warn("EEH: of node=%s\n", dn->full_name); eeh_ops->read_config(dn, PCI_VENDOR_ID, 4, &cfg); n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg); - printk(KERN_WARNING "EEH: PCI device/vendor: %08x\n", cfg); + pr_warn("EEH: PCI device/vendor: %08x\n", cfg); eeh_ops->read_config(dn, PCI_COMMAND, 4, &cfg); n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg); - printk(KERN_WARNING "EEH: PCI cmd/status register: %08x\n", cfg); - - if (!dev) { - printk(KERN_WARNING "EEH: no PCI device for this of node\n"); - return n; - } + pr_warn("EEH: PCI cmd/status register: %08x\n", cfg); /* Gather bridge-specific registers */ - if (dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) { + if (edev->mode & EEH_DEV_BRIDGE) { eeh_ops->read_config(dn, PCI_SEC_STATUS, 2, &cfg); n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg); - printk(KERN_WARNING "EEH: Bridge secondary status: %04x\n", cfg); + pr_warn("EEH: Bridge secondary status: %04x\n", cfg); eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &cfg); n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg); - printk(KERN_WARNING "EEH: Bridge control: %04x\n", cfg); + pr_warn("EEH: Bridge control: %04x\n", cfg); } /* Dump out the PCI-X command and status regs */ - cap = pci_find_capability(dev, PCI_CAP_ID_PCIX); + cap = edev->pcix_cap; if (cap) { eeh_ops->read_config(dn, cap, 4, &cfg); n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg); - printk(KERN_WARNING "EEH: PCI-X cmd: %08x\n", cfg); + pr_warn("EEH: PCI-X cmd: %08x\n", cfg); eeh_ops->read_config(dn, cap+4, 4, &cfg); n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg); - printk(KERN_WARNING "EEH: PCI-X status: %08x\n", cfg); + pr_warn("EEH: PCI-X status: %08x\n", cfg); } - /* If PCI-E capable, dump PCI-E cap 10, and the AER */ - cap = pci_find_capability(dev, PCI_CAP_ID_EXP); + /* If PCI-E capable, dump PCI-E cap 10 */ + cap = edev->pcie_cap; if (cap) { n += scnprintf(buf+n, len-n, "pci-e cap10:\n"); - printk(KERN_WARNING - "EEH: PCI-E capabilities and status follow:\n"); + pr_warn("EEH: PCI-E capabilities and status follow:\n"); for (i=0; i<=8; i++) { eeh_ops->read_config(dn, cap+4*i, 4, &cfg); n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); - printk(KERN_WARNING "EEH: PCI-E %02x: %08x\n", i, cfg); + pr_warn("EEH: PCI-E %02x: %08x\n", i, cfg); } + } - cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); - if (cap) { - n += scnprintf(buf+n, len-n, "pci-e AER:\n"); - printk(KERN_WARNING - "EEH: PCI-E AER capability register set follows:\n"); - - for (i=0; i<14; i++) { - eeh_ops->read_config(dn, cap+4*i, 4, &cfg); - n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); - printk(KERN_WARNING "EEH: PCI-E AER %02x: %08x\n", i, cfg); - } + /* If AER capable, dump it */ + cap = edev->aer_cap; + if (cap) { + n += scnprintf(buf+n, len-n, "pci-e AER:\n"); + pr_warn("EEH: PCI-E AER capability register set follows:\n"); + + for (i=0; i<14; i++) { + eeh_ops->read_config(dn, cap+4*i, 4, &cfg); + n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); + pr_warn("EEH: PCI-E AER %02x: %08x\n", i, cfg); } } @@ -232,21 +236,19 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity) { size_t loglen = 0; struct eeh_dev *edev, *tmp; - bool valid_cfg_log = true; /* * When the PHB is fenced or dead, it's pointless to collect * the data from PCI config space because it should return * 0xFF's. For ER, we still retrieve the data from the PCI * config space. + * + * For pHyp, we have to enable IO for log retrieval. Otherwise, + * 0xFF's is always returned from PCI config space. */ - if (eeh_probe_mode_dev() && - (pe->type & EEH_PE_PHB) && - (pe->state & (EEH_PE_ISOLATED | EEH_PE_PHB_DEAD))) - valid_cfg_log = false; - - if (valid_cfg_log) { - eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); + if (!(pe->type & EEH_PE_PHB)) { + if (eeh_probe_mode_devtree()) + eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); eeh_ops->configure_bridge(pe); eeh_pe_restore_bars(pe); @@ -309,7 +311,7 @@ static int eeh_phb_check_failure(struct eeh_pe *pe) /* If the PHB has been in problematic state */ eeh_serialize_lock(&flags); - if (phb_pe->state & (EEH_PE_ISOLATED | EEH_PE_PHB_DEAD)) { + if (phb_pe->state & EEH_PE_ISOLATED) { ret = 0; goto out; } @@ -327,11 +329,11 @@ static int eeh_phb_check_failure(struct eeh_pe *pe) /* Isolate the PHB and send event */ eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED); eeh_serialize_unlock(flags); - eeh_send_failure_event(phb_pe); - pr_err("EEH: PHB#%x failure detected\n", - phb_pe->phb->global_number); + pr_err("EEH: PHB#%x failure detected, location: %s\n", + phb_pe->phb->global_number, eeh_pe_loc_get(phb_pe)); dump_stack(); + eeh_send_failure_event(phb_pe); return 1; out: @@ -356,16 +358,17 @@ out: int eeh_dev_check_failure(struct eeh_dev *edev) { int ret; + int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); unsigned long flags; struct device_node *dn; struct pci_dev *dev; - struct eeh_pe *pe; + struct eeh_pe *pe, *parent_pe, *phb_pe; int rc = 0; const char *location; eeh_stats.total_mmio_ffs++; - if (!eeh_subsystem_enabled) + if (!eeh_enabled()) return 0; if (!edev) { @@ -437,14 +440,34 @@ int eeh_dev_check_failure(struct eeh_dev *edev) */ if ((ret < 0) || (ret == EEH_STATE_NOT_SUPPORT) || - (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) == - (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) { + ((ret & active_flags) == active_flags)) { eeh_stats.false_positives++; pe->false_positives++; rc = 0; goto dn_unlock; } + /* + * It should be corner case that the parent PE has been + * put into frozen state as well. We should take care + * that at first. + */ + parent_pe = pe->parent; + while (parent_pe) { + /* Hit the ceiling ? */ + if (parent_pe->type & EEH_PE_PHB) + break; + + /* Frozen parent PE ? */ + ret = eeh_ops->get_state(parent_pe, NULL); + if (ret > 0 && + (ret & active_flags) != active_flags) + pe = parent_pe; + + /* Next parent level */ + parent_pe = parent_pe->parent; + } + eeh_stats.slot_resets++; /* Avoid repeated reports of this failure, including problems @@ -454,16 +477,19 @@ int eeh_dev_check_failure(struct eeh_dev *edev) eeh_pe_state_mark(pe, EEH_PE_ISOLATED); eeh_serialize_unlock(flags); - eeh_send_failure_event(pe); - /* Most EEH events are due to device driver bugs. Having * a stack trace will help the device-driver authors figure * out what happened. So print that out. */ - pr_err("EEH: Frozen PE#%x detected on PHB#%x\n", - pe->addr, pe->phb->global_number); + phb_pe = eeh_phb_pe_get(pe->phb); + pr_err("EEH: Frozen PHB#%x-PE#%x detected\n", + pe->phb->global_number, pe->addr); + pr_err("EEH: PE location: %s, PHB location: %s\n", + eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe)); dump_stack(); + eeh_send_failure_event(pe); + return 1; dn_unlock: @@ -515,16 +541,42 @@ EXPORT_SYMBOL(eeh_check_failure); */ int eeh_pci_enable(struct eeh_pe *pe, int function) { - int rc; + int rc, flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); + + /* + * pHyp doesn't allow to enable IO or DMA on unfrozen PE. + * Also, it's pointless to enable them on unfrozen PE. So + * we have the check here. + */ + if (function == EEH_OPT_THAW_MMIO || + function == EEH_OPT_THAW_DMA) { + rc = eeh_ops->get_state(pe, NULL); + if (rc < 0) + return rc; + + /* Needn't to enable or already enabled */ + if ((rc == EEH_STATE_NOT_SUPPORT) || + ((rc & flags) == flags)) + return 0; + } rc = eeh_ops->set_option(pe, function); if (rc) - pr_warning("%s: Unexpected state change %d on PHB#%d-PE#%x, err=%d\n", - __func__, function, pe->phb->global_number, pe->addr, rc); + pr_warn("%s: Unexpected state change %d on " + "PHB#%d-PE#%x, err=%d\n", + __func__, function, pe->phb->global_number, + pe->addr, rc); rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); - if (rc > 0 && (rc & EEH_STATE_MMIO_ENABLED) && - (function == EEH_OPT_THAW_MMIO)) + if (rc <= 0) + return rc; + + if ((function == EEH_OPT_THAW_MMIO) && + (rc & EEH_STATE_MMIO_ENABLED)) + return 0; + + if ((function == EEH_OPT_THAW_DMA) && + (rc & EEH_STATE_DMA_ENABLED)) return 0; return rc; @@ -612,26 +664,7 @@ static void eeh_reset_pe_once(struct eeh_pe *pe) else eeh_ops->reset(pe, EEH_RESET_HOT); - /* The PCI bus requires that the reset be held high for at least - * a 100 milliseconds. We wait a bit longer 'just in case'. - */ -#define PCI_BUS_RST_HOLD_TIME_MSEC 250 - msleep(PCI_BUS_RST_HOLD_TIME_MSEC); - - /* We might get hit with another EEH freeze as soon as the - * pci slot reset line is dropped. Make sure we don't miss - * these, and clear the flag now. - */ - eeh_pe_state_clear(pe, EEH_PE_ISOLATED); - eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); - - /* After a PCI slot has been reset, the PCI Express spec requires - * a 1.5 second idle time for the bus to stabilize, before starting - * up traffic. - */ -#define PCI_BUS_SETTLE_TIME_MSEC 1800 - msleep(PCI_BUS_SETTLE_TIME_MSEC); } /** @@ -651,6 +684,10 @@ int eeh_reset_pe(struct eeh_pe *pe) for (i=0; i<3; i++) { eeh_reset_pe_once(pe); + /* + * EEH_PE_ISOLATED is expected to be removed after + * BAR restore. + */ rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); if ((rc & flags) == flags) return 0; @@ -687,6 +724,15 @@ void eeh_save_bars(struct eeh_dev *edev) for (i = 0; i < 16; i++) eeh_ops->read_config(dn, i * 4, 4, &edev->config_space[i]); + + /* + * For PCI bridges including root port, we need enable bus + * master explicitly. Otherwise, it can't fetch IODA table + * entries correctly. So we cache the bit in advance so that + * we can restore it after reset, either PHB range or PE range. + */ + if (edev->mode & EEH_DEV_BRIDGE) + edev->config_space[1] |= PCI_COMMAND_MASTER; } /** @@ -739,6 +785,17 @@ int __exit eeh_ops_unregister(const char *name) return -EEXIST; } +static int eeh_reboot_notifier(struct notifier_block *nb, + unsigned long action, void *unused) +{ + eeh_set_enable(false); + return NOTIFY_DONE; +} + +static struct notifier_block eeh_reboot_nb = { + .notifier_call = eeh_reboot_notifier, +}; + /** * eeh_init - EEH initialization * @@ -770,6 +827,14 @@ int eeh_init(void) if (machine_is(powernv) && cnt++ <= 0) return ret; + /* Register reboot notifier */ + ret = register_reboot_notifier(&eeh_reboot_nb); + if (ret) { + pr_warn("%s: Failed to register notifier (%d)\n", + __func__, ret); + return ret; + } + /* call platform initialization function */ if (!eeh_ops) { pr_warning("%s: Platform EEH operation not found\n", @@ -798,8 +863,8 @@ int eeh_init(void) &hose_list, list_node) pci_walk_bus(hose->bus, eeh_ops->dev_probe, NULL); } else { - pr_warning("%s: Invalid probe mode %d\n", - __func__, eeh_probe_mode); + pr_warn("%s: Invalid probe mode %x", + __func__, eeh_subsystem_flags); return -EINVAL; } @@ -814,7 +879,7 @@ int eeh_init(void) return ret; } - if (eeh_subsystem_enabled) + if (eeh_enabled()) pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n"); else pr_warning("EEH: No capable adapters found\n"); @@ -889,7 +954,7 @@ void eeh_add_device_late(struct pci_dev *dev) struct device_node *dn; struct eeh_dev *edev; - if (!dev || !eeh_subsystem_enabled) + if (!dev || !eeh_enabled()) return; pr_debug("EEH: Adding device %s\n", pci_name(dev)); @@ -913,6 +978,13 @@ void eeh_add_device_late(struct pci_dev *dev) eeh_sysfs_remove_device(edev->pdev); edev->mode &= ~EEH_DEV_SYSFS; + /* + * We definitely should have the PCI device removed + * though it wasn't correctly. So we needn't call + * into error handler afterwards. + */ + edev->mode |= EEH_DEV_NO_HANDLER; + edev->pdev = NULL; dev->dev.archdata.edev = NULL; } @@ -990,7 +1062,7 @@ void eeh_remove_device(struct pci_dev *dev) { struct eeh_dev *edev; - if (!dev || !eeh_subsystem_enabled) + if (!dev || !eeh_enabled()) return; edev = pci_dev_to_eeh_dev(dev); @@ -1015,6 +1087,14 @@ void eeh_remove_device(struct pci_dev *dev) else edev->mode |= EEH_DEV_DISCONNECTED; + /* + * We're removing from the PCI subsystem, that means + * the PCI device driver can't support EEH or not + * well. So we rely on hotplug completely to do recovery + * for the specific PCI device. + */ + edev->mode |= EEH_DEV_NO_HANDLER; + eeh_addr_cache_rmv_dev(dev); eeh_sysfs_remove_device(dev); edev->mode &= ~EEH_DEV_SYSFS; @@ -1022,7 +1102,7 @@ void eeh_remove_device(struct pci_dev *dev) static int proc_eeh_show(struct seq_file *m, void *v) { - if (0 == eeh_subsystem_enabled) { + if (!eeh_enabled()) { seq_printf(m, "EEH Subsystem is globally disabled\n"); seq_printf(m, "eeh_total_mmio_ffs=%llu\n", eeh_stats.total_mmio_ffs); } else { @@ -1059,10 +1139,45 @@ static const struct file_operations proc_eeh_operations = { .release = single_release, }; +#ifdef CONFIG_DEBUG_FS +static int eeh_enable_dbgfs_set(void *data, u64 val) +{ + if (val) + eeh_subsystem_flags &= ~EEH_FORCE_DISABLED; + else + eeh_subsystem_flags |= EEH_FORCE_DISABLED; + + /* Notify the backend */ + if (eeh_ops->post_init) + eeh_ops->post_init(); + + return 0; +} + +static int eeh_enable_dbgfs_get(void *data, u64 *val) +{ + if (eeh_enabled()) + *val = 0x1ul; + else + *val = 0x0ul; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(eeh_enable_dbgfs_ops, eeh_enable_dbgfs_get, + eeh_enable_dbgfs_set, "0x%llx\n"); +#endif + static int __init eeh_init_proc(void) { - if (machine_is(pseries) || machine_is(powernv)) + if (machine_is(pseries) || machine_is(powernv)) { proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations); +#ifdef CONFIG_DEBUG_FS + debugfs_create_file("eeh_enable", 0600, + powerpc_debugfs_root, NULL, + &eeh_enable_dbgfs_ops); +#endif + } + return 0; } __initcall(eeh_init_proc); diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 36bed5a1275..420da61d4ce 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -143,17 +143,43 @@ static void eeh_disable_irq(struct pci_dev *dev) static void eeh_enable_irq(struct pci_dev *dev) { struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); - struct irq_desc *desc; if ((edev->mode) & EEH_DEV_IRQ_DISABLED) { edev->mode &= ~EEH_DEV_IRQ_DISABLED; - - desc = irq_to_desc(dev->irq); - if (desc && desc->depth > 0) + /* + * FIXME !!!!! + * + * This is just ass backwards. This maze has + * unbalanced irq_enable/disable calls. So instead of + * finding the root cause it works around the warning + * in the irq_enable code by conditionally calling + * into it. + * + * That's just wrong.The warning in the core code is + * there to tell people to fix their assymetries in + * their own code, not by abusing the core information + * to avoid it. + * + * I so wish that the assymetry would be the other way + * round and a few more irq_disable calls render that + * shit unusable forever. + * + * tglx + */ + if (irqd_irq_disabled(irq_get_irq_data(dev->irq))) enable_irq(dev->irq); } } +static bool eeh_dev_removed(struct eeh_dev *edev) +{ + /* EEH device removed ? */ + if (!edev || (edev->mode & EEH_DEV_REMOVED)) + return true; + + return false; +} + /** * eeh_report_error - Report pci error to each device driver * @data: eeh device @@ -170,10 +196,8 @@ static void *eeh_report_error(void *data, void *userdata) enum pci_ers_result rc, *res = userdata; struct pci_driver *driver; - /* We might not have the associated PCI device, - * then we should continue for next one. - */ - if (!dev) return NULL; + if (!dev || eeh_dev_removed(edev)) + return NULL; dev->error_state = pci_channel_io_frozen; driver = eeh_pcid_get(dev); @@ -213,11 +237,15 @@ static void *eeh_report_mmio_enabled(void *data, void *userdata) enum pci_ers_result rc, *res = userdata; struct pci_driver *driver; + if (!dev || eeh_dev_removed(edev)) + return NULL; + driver = eeh_pcid_get(dev); if (!driver) return NULL; if (!driver->err_handler || - !driver->err_handler->mmio_enabled) { + !driver->err_handler->mmio_enabled || + (edev->mode & EEH_DEV_NO_HANDLER)) { eeh_pcid_put(dev); return NULL; } @@ -249,7 +277,8 @@ static void *eeh_report_reset(void *data, void *userdata) enum pci_ers_result rc, *res = userdata; struct pci_driver *driver; - if (!dev) return NULL; + if (!dev || eeh_dev_removed(edev)) + return NULL; dev->error_state = pci_channel_io_normal; driver = eeh_pcid_get(dev); @@ -258,7 +287,8 @@ static void *eeh_report_reset(void *data, void *userdata) eeh_enable_irq(dev); if (!driver->err_handler || - !driver->err_handler->slot_reset) { + !driver->err_handler->slot_reset || + (edev->mode & EEH_DEV_NO_HANDLER)) { eeh_pcid_put(dev); return NULL; } @@ -288,7 +318,8 @@ static void *eeh_report_resume(void *data, void *userdata) struct pci_dev *dev = eeh_dev_to_pci_dev(edev); struct pci_driver *driver; - if (!dev) return NULL; + if (!dev || eeh_dev_removed(edev)) + return NULL; dev->error_state = pci_channel_io_normal; driver = eeh_pcid_get(dev); @@ -297,7 +328,9 @@ static void *eeh_report_resume(void *data, void *userdata) eeh_enable_irq(dev); if (!driver->err_handler || - !driver->err_handler->resume) { + !driver->err_handler->resume || + (edev->mode & EEH_DEV_NO_HANDLER)) { + edev->mode &= ~EEH_DEV_NO_HANDLER; eeh_pcid_put(dev); return NULL; } @@ -322,7 +355,8 @@ static void *eeh_report_failure(void *data, void *userdata) struct pci_dev *dev = eeh_dev_to_pci_dev(edev); struct pci_driver *driver; - if (!dev) return NULL; + if (!dev || eeh_dev_removed(edev)) + return NULL; dev->error_state = pci_channel_io_perm_failure; driver = eeh_pcid_get(dev); @@ -358,10 +392,24 @@ static void *eeh_rmv_device(void *data, void *userdata) */ if (!dev || (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE)) return NULL; - driver = eeh_pcid_get(dev); - if (driver && driver->err_handler) + + /* + * We rely on count-based pcibios_release_device() to + * detach permanently offlined PEs. Unfortunately, that's + * not reliable enough. We might have the permanently + * offlined PEs attached, but we needn't take care of + * them and their child devices. + */ + if (eeh_dev_removed(edev)) return NULL; + driver = eeh_pcid_get(dev); + if (driver) { + eeh_pcid_put(dev); + if (driver->err_handler) + return NULL; + } + /* Remove it from PCI subsystem */ pr_debug("EEH: Removing %s without EEH sensitive driver\n", pci_name(dev)); @@ -369,7 +417,9 @@ static void *eeh_rmv_device(void *data, void *userdata) edev->mode |= EEH_DEV_DISCONNECTED; (*removed)++; + pci_lock_rescan_remove(); pci_stop_and_remove_bus_device(dev); + pci_unlock_rescan_remove(); return NULL; } @@ -390,6 +440,48 @@ static void *eeh_pe_detach_dev(void *data, void *userdata) return NULL; } +/* + * Explicitly clear PE's frozen state for PowerNV where + * we have frozen PE until BAR restore is completed. It's + * harmless to clear it for pSeries. To be consistent with + * PE reset (for 3 times), we try to clear the frozen state + * for 3 times as well. + */ +static void *__eeh_clear_pe_frozen_state(void *data, void *flag) +{ + struct eeh_pe *pe = (struct eeh_pe *)data; + int i, rc; + + for (i = 0; i < 3; i++) { + rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); + if (rc) + continue; + rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); + if (!rc) + break; + } + + /* The PE has been isolated, clear it */ + if (rc) { + pr_warn("%s: Can't clear frozen PHB#%x-PE#%x (%d)\n", + __func__, pe->phb->global_number, pe->addr, rc); + return (void *)pe; + } + + return NULL; +} + +static int eeh_clear_pe_frozen_state(struct eeh_pe *pe) +{ + void *rc; + + rc = eeh_pe_traverse(pe, __eeh_clear_pe_frozen_state, NULL); + if (!rc) + eeh_pe_state_clear(pe, EEH_PE_ISOLATED); + + return rc ? -EIO : 0; +} + /** * eeh_reset_device - Perform actual reset of a pci slot * @pe: EEH PE @@ -416,22 +508,41 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) * into pcibios_add_pci_devices(). */ eeh_pe_state_mark(pe, EEH_PE_KEEP); - if (bus) + if (bus) { + pci_lock_rescan_remove(); pcibios_remove_pci_devices(bus); - else if (frozen_bus) + pci_unlock_rescan_remove(); + } else if (frozen_bus) { eeh_pe_dev_traverse(pe, eeh_rmv_device, &removed); + } - /* Reset the pci controller. (Asserts RST#; resets config space). + /* + * Reset the pci controller. (Asserts RST#; resets config space). * Reconfigure bridges and devices. Don't try to bring the system * up if the reset failed for some reason. + * + * During the reset, it's very dangerous to have uncontrolled PCI + * config accesses. So we prefer to block them. However, controlled + * PCI config accesses initiated from EEH itself are allowed. */ + eeh_pe_state_mark(pe, EEH_PE_RESET); rc = eeh_reset_pe(pe); - if (rc) + if (rc) { + eeh_pe_state_clear(pe, EEH_PE_RESET); return rc; + } + + pci_lock_rescan_remove(); /* Restore PE */ eeh_ops->configure_bridge(pe); eeh_pe_restore_bars(pe); + eeh_pe_state_clear(pe, EEH_PE_RESET); + + /* Clear frozen state */ + rc = eeh_clear_pe_frozen_state(pe); + if (rc) + return rc; /* Give the system 5 seconds to finish running the user-space * hotplug shutdown scripts, e.g. ifdown for ethernet. Yes, @@ -462,13 +573,14 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) pe->tstamp = tstamp; pe->freeze_count = cnt; + pci_unlock_rescan_remove(); return 0; } /* The longest amount of time to wait for a pci device * to come back on line, in seconds. */ -#define MAX_WAIT_FOR_RECOVERY 150 +#define MAX_WAIT_FOR_RECOVERY 300 static void eeh_handle_normal_event(struct eeh_pe *pe) { @@ -540,7 +652,6 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) result = PCI_ERS_RESULT_NEED_RESET; } else { pr_info("EEH: Notify device drivers to resume I/O\n"); - result = PCI_ERS_RESULT_NONE; eeh_pe_dev_traverse(pe, eeh_report_mmio_enabled, &result); } } @@ -552,10 +663,17 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) if (rc < 0) goto hard_fail; - if (rc) + if (rc) { result = PCI_ERS_RESULT_NEED_RESET; - else + } else { + /* + * We didn't do PE reset for the case. The PE + * is still in frozen state. Clear it before + * resuming the PE. + */ + eeh_pe_state_clear(pe, EEH_PE_ISOLATED); result = PCI_ERS_RESULT_RECOVERED; + } } /* If any device has a hard failure, then shut off everything. */ @@ -617,93 +735,113 @@ perm_error: /* Notify all devices that they're about to go down. */ eeh_pe_dev_traverse(pe, eeh_report_failure, NULL); - /* Shut down the device drivers for good. */ - if (frozen_bus) + /* Mark the PE to be removed permanently */ + pe->freeze_count = EEH_MAX_ALLOWED_FREEZES + 1; + + /* + * Shut down the device drivers for good. We mark + * all removed devices correctly to avoid access + * the their PCI config any more. + */ + if (frozen_bus) { + eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); + + pci_lock_rescan_remove(); pcibios_remove_pci_devices(frozen_bus); + pci_unlock_rescan_remove(); + } } static void eeh_handle_special_event(void) { struct eeh_pe *pe, *phb_pe; struct pci_bus *bus; - struct pci_controller *hose, *tmp; + struct pci_controller *hose; unsigned long flags; - int rc = 0; + int rc; - /* - * The return value from next_error() has been classified as follows. - * It might be good to enumerate them. However, next_error() is only - * supported by PowerNV platform for now. So it would be fine to use - * integer directly: - * - * 4 - Dead IOC 3 - Dead PHB - * 2 - Fenced PHB 1 - Frozen PE - * 0 - No error found - * - */ - rc = eeh_ops->next_error(&pe); - if (rc <= 0) - return; - switch (rc) { - case 4: - /* Mark all PHBs in dead state */ - eeh_serialize_lock(&flags); - list_for_each_entry_safe(hose, tmp, - &hose_list, list_node) { - phb_pe = eeh_phb_pe_get(hose); - if (!phb_pe) continue; - - eeh_pe_state_mark(phb_pe, - EEH_PE_ISOLATED | EEH_PE_PHB_DEAD); + do { + rc = eeh_ops->next_error(&pe); + + switch (rc) { + case EEH_NEXT_ERR_DEAD_IOC: + /* Mark all PHBs in dead state */ + eeh_serialize_lock(&flags); + + /* Purge all events */ + eeh_remove_event(NULL, true); + + list_for_each_entry(hose, &hose_list, list_node) { + phb_pe = eeh_phb_pe_get(hose); + if (!phb_pe) continue; + + eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED); + } + + eeh_serialize_unlock(flags); + + break; + case EEH_NEXT_ERR_FROZEN_PE: + case EEH_NEXT_ERR_FENCED_PHB: + case EEH_NEXT_ERR_DEAD_PHB: + /* Mark the PE in fenced state */ + eeh_serialize_lock(&flags); + + /* Purge all events of the PHB */ + eeh_remove_event(pe, true); + + if (rc == EEH_NEXT_ERR_DEAD_PHB) + eeh_pe_state_mark(pe, EEH_PE_ISOLATED); + else + eeh_pe_state_mark(pe, + EEH_PE_ISOLATED | EEH_PE_RECOVERING); + + eeh_serialize_unlock(flags); + + break; + case EEH_NEXT_ERR_NONE: + return; + default: + pr_warn("%s: Invalid value %d from next_error()\n", + __func__, rc); + return; } - eeh_serialize_unlock(flags); - - /* Purge all events */ - eeh_remove_event(NULL); - break; - case 3: - case 2: - case 1: - /* Mark the PE in fenced state */ - eeh_serialize_lock(&flags); - if (rc == 3) - eeh_pe_state_mark(pe, - EEH_PE_ISOLATED | EEH_PE_PHB_DEAD); - else - eeh_pe_state_mark(pe, - EEH_PE_ISOLATED | EEH_PE_RECOVERING); - eeh_serialize_unlock(flags); - - /* Purge all events of the PHB */ - eeh_remove_event(pe); - break; - default: - pr_err("%s: Invalid value %d from next_error()\n", - __func__, rc); - return; - } - /* - * For fenced PHB and frozen PE, it's handled as normal - * event. We have to remove the affected PHBs for dead - * PHB and IOC - */ - if (rc == 2 || rc == 1) - eeh_handle_normal_event(pe); - else { - list_for_each_entry_safe(hose, tmp, - &hose_list, list_node) { - phb_pe = eeh_phb_pe_get(hose); - if (!phb_pe || !(phb_pe->state & EEH_PE_PHB_DEAD)) - continue; - - bus = eeh_pe_bus_get(phb_pe); - /* Notify all devices that they're about to go down. */ - eeh_pe_dev_traverse(pe, eeh_report_failure, NULL); - pcibios_remove_pci_devices(bus); + /* + * For fenced PHB and frozen PE, it's handled as normal + * event. We have to remove the affected PHBs for dead + * PHB and IOC + */ + if (rc == EEH_NEXT_ERR_FROZEN_PE || + rc == EEH_NEXT_ERR_FENCED_PHB) { + eeh_handle_normal_event(pe); + eeh_pe_state_clear(pe, EEH_PE_RECOVERING); + } else { + pci_lock_rescan_remove(); + list_for_each_entry(hose, &hose_list, list_node) { + phb_pe = eeh_phb_pe_get(hose); + if (!phb_pe || + !(phb_pe->state & EEH_PE_ISOLATED) || + (phb_pe->state & EEH_PE_RECOVERING)) + continue; + + /* Notify all devices to be down */ + bus = eeh_pe_bus_get(phb_pe); + eeh_pe_dev_traverse(pe, + eeh_report_failure, NULL); + pcibios_remove_pci_devices(bus); + } + pci_unlock_rescan_remove(); } - } + + /* + * If we have detected dead IOC, we needn't proceed + * any more since all PHBs would have been removed + */ + if (rc == EEH_NEXT_ERR_DEAD_IOC) + break; + } while (rc != EEH_NEXT_ERR_NONE); } /** diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c index d27c5afc90a..4eefb6e34db 100644 --- a/arch/powerpc/kernel/eeh_event.c +++ b/arch/powerpc/kernel/eeh_event.c @@ -74,8 +74,13 @@ static int eeh_event_handler(void * dummy) pe = event->pe; if (pe) { eeh_pe_state_mark(pe, EEH_PE_RECOVERING); - pr_info("EEH: Detected PCI bus error on PHB#%d-PE#%x\n", - pe->phb->global_number, pe->addr); + if (pe->type & EEH_PE_PHB) + pr_info("EEH: Detected error on PHB#%d\n", + pe->phb->global_number); + else + pr_info("EEH: Detected PCI bus error on " + "PHB#%d-PE#%x\n", + pe->phb->global_number, pe->addr); eeh_handle_event(pe); eeh_pe_state_clear(pe, EEH_PE_RECOVERING); } else { @@ -147,24 +152,33 @@ int eeh_send_failure_event(struct eeh_pe *pe) /** * eeh_remove_event - Remove EEH event from the queue * @pe: Event binding to the PE + * @force: Event will be removed unconditionally * * On PowerNV platform, we might have subsequent coming events * is part of the former one. For that case, those subsequent * coming events are totally duplicated and unnecessary, thus * they should be removed. */ -void eeh_remove_event(struct eeh_pe *pe) +void eeh_remove_event(struct eeh_pe *pe, bool force) { unsigned long flags; struct eeh_event *event, *tmp; + /* + * If we have NULL PE passed in, we have dead IOC + * or we're sure we can report all existing errors + * by the caller. + * + * With "force", the event with associated PE that + * have been isolated, the event won't be removed + * to avoid event lost. + */ spin_lock_irqsave(&eeh_eventlist_lock, flags); list_for_each_entry_safe(event, tmp, &eeh_eventlist, list) { - /* - * If we don't have valid PE passed in, that means - * we already have event corresponding to dead IOC - * and all events should be purged. - */ + if (!force && event->pe && + (event->pe->state & EEH_PE_ISOLATED)) + continue; + if (!pe) { list_del(&event->list); kfree(event); diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index f9450537e33..fbd01eba447 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -25,7 +25,6 @@ #include <linux/delay.h> #include <linux/export.h> #include <linux/gfp.h> -#include <linux/init.h> #include <linux/kernel.h> #include <linux/pci.h> #include <linux/string.h> @@ -504,13 +503,17 @@ static void *__eeh_pe_state_mark(void *data, void *flag) struct eeh_dev *edev, *tmp; struct pci_dev *pdev; - /* - * Mark the PE with the indicated state. Also, - * the associated PCI device will be put into - * I/O frozen state to avoid I/O accesses from - * the PCI device driver. - */ + /* Keep the state of permanently removed PE intact */ + if ((pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) && + (state & (EEH_PE_ISOLATED | EEH_PE_RECOVERING))) + return NULL; + pe->state |= state; + + /* Offline PCI devices if applicable */ + if (state != EEH_PE_ISOLATED) + return NULL; + eeh_pe_for_each_dev(pe, edev, tmp) { pdev = eeh_dev_to_pci_dev(edev); if (pdev) @@ -533,6 +536,27 @@ void eeh_pe_state_mark(struct eeh_pe *pe, int state) eeh_pe_traverse(pe, __eeh_pe_state_mark, &state); } +static void *__eeh_pe_dev_mode_mark(void *data, void *flag) +{ + struct eeh_dev *edev = data; + int mode = *((int *)flag); + + edev->mode |= mode; + + return NULL; +} + +/** + * eeh_pe_dev_state_mark - Mark state for all device under the PE + * @pe: EEH PE + * + * Mark specific state for all child devices of the PE. + */ +void eeh_pe_dev_mode_mark(struct eeh_pe *pe, int mode) +{ + eeh_pe_dev_traverse(pe, __eeh_pe_dev_mode_mark, &mode); +} + /** * __eeh_pe_state_clear - Clear state for the PE * @data: EEH PE @@ -547,8 +571,16 @@ static void *__eeh_pe_state_clear(void *data, void *flag) struct eeh_pe *pe = (struct eeh_pe *)data; int state = *((int *)flag); + /* Keep the state of permanently removed PE intact */ + if ((pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) && + (state & EEH_PE_ISOLATED)) + return NULL; + pe->state &= ~state; - pe->check_count = 0; + + /* Clear check count since last isolation */ + if (state & EEH_PE_ISOLATED) + pe->check_count = 0; return NULL; } @@ -737,6 +769,9 @@ static void *eeh_restore_one_device_bars(void *data, void *flag) else eeh_restore_device_bars(edev, dn); + if (eeh_ops->restore_config) + eeh_ops->restore_config(dn); + return NULL; } @@ -757,6 +792,66 @@ void eeh_pe_restore_bars(struct eeh_pe *pe) } /** + * eeh_pe_loc_get - Retrieve location code binding to the given PE + * @pe: EEH PE + * + * Retrieve the location code of the given PE. If the primary PE bus + * is root bus, we will grab location code from PHB device tree node + * or root port. Otherwise, the upstream bridge's device tree node + * of the primary PE bus will be checked for the location code. + */ +const char *eeh_pe_loc_get(struct eeh_pe *pe) +{ + struct pci_controller *hose; + struct pci_bus *bus = eeh_pe_bus_get(pe); + struct pci_dev *pdev; + struct device_node *dn; + const char *loc; + + if (!bus) + return "N/A"; + + /* PHB PE or root PE ? */ + if (pci_is_root_bus(bus)) { + hose = pci_bus_to_host(bus); + loc = of_get_property(hose->dn, + "ibm,loc-code", NULL); + if (loc) + return loc; + loc = of_get_property(hose->dn, + "ibm,io-base-loc-code", NULL); + if (loc) + return loc; + + pdev = pci_get_slot(bus, 0x0); + } else { + pdev = bus->self; + } + + if (!pdev) { + loc = "N/A"; + goto out; + } + + dn = pci_device_to_OF_node(pdev); + if (!dn) { + loc = "N/A"; + goto out; + } + + loc = of_get_property(dn, "ibm,loc-code", NULL); + if (!loc) + loc = of_get_property(dn, "ibm,slot-location-code", NULL); + if (!loc) + loc = "N/A"; + +out: + if (pci_is_root_bus(bus) && pdev) + pci_dev_put(pdev); + return loc; +} + +/** * eeh_pe_bus_get - Retrieve PCI bus according to the given PE * @pe: EEH PE * diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c index 5d753d4f2c7..e2595ba4b72 100644 --- a/arch/powerpc/kernel/eeh_sysfs.c +++ b/arch/powerpc/kernel/eeh_sysfs.c @@ -59,6 +59,9 @@ void eeh_sysfs_add_device(struct pci_dev *pdev) struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev); int rc=0; + if (!eeh_enabled()) + return; + if (edev && (edev->mode & EEH_DEV_SYSFS)) return; diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index c04cdf70d48..6528c5e2cc4 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -39,8 +39,8 @@ * System calls. */ .section ".toc","aw" -.SYS_CALL_TABLE: - .tc .sys_call_table[TC],.sys_call_table +SYS_CALL_TABLE: + .tc sys_call_table[TC],sys_call_table /* This value is used to mark exception frames on the stack. */ exception_marker: @@ -106,7 +106,7 @@ BEGIN_FW_FTR_SECTION LDX_BE r10,0,r10 /* get log write index */ cmpd cr1,r11,r10 beq+ cr1,33f - bl .accumulate_stolen_time + bl accumulate_stolen_time REST_GPR(0,r1) REST_4GPRS(3,r1) REST_2GPRS(7,r1) @@ -143,7 +143,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) std r10,SOFTE(r1) #ifdef SHOW_SYSCALLS - bl .do_show_syscall + bl do_show_syscall REST_GPR(0,r1) REST_4GPRS(3,r1) REST_2GPRS(7,r1) @@ -162,7 +162,7 @@ system_call: /* label this so stack traces look sane */ * Need to vector to 32 Bit or default sys_call_table here, * based on caller's run-mode / personality. */ - ld r11,.SYS_CALL_TABLE@toc(2) + ld r11,SYS_CALL_TABLE@toc(2) andi. r10,r10,_TIF_32BIT beq 15f addi r11,r11,8 /* use 32-bit syscall entries */ @@ -174,14 +174,14 @@ system_call: /* label this so stack traces look sane */ clrldi r8,r8,32 15: slwi r0,r0,4 - ldx r10,r11,r0 /* Fetch system call handler [ptr] */ - mtctr r10 + ldx r12,r11,r0 /* Fetch system call handler [ptr] */ + mtctr r12 bctrl /* Call handler */ syscall_exit: std r3,RESULT(r1) #ifdef SHOW_SYSCALLS - bl .do_show_syscall_exit + bl do_show_syscall_exit ld r3,RESULT(r1) #endif CURRENT_THREAD_INFO(r12, r1) @@ -248,9 +248,9 @@ syscall_error: /* Traced system call support */ syscall_dotrace: - bl .save_nvgprs + bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD - bl .do_syscall_trace_enter + bl do_syscall_trace_enter /* * Restore argument registers possibly just changed. * We use the return value of do_syscall_trace_enter @@ -308,7 +308,7 @@ syscall_exit_work: 4: /* Anything else left to do? */ SET_DEFAULT_THREAD_PPR(r3, r10) /* Set thread.ppr = 3 */ andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP) - beq .ret_from_except_lite + beq ret_from_except_lite /* Re-enable interrupts */ #ifdef CONFIG_PPC_BOOK3E @@ -319,10 +319,10 @@ syscall_exit_work: mtmsrd r10,1 #endif /* CONFIG_PPC_BOOK3E */ - bl .save_nvgprs + bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD - bl .do_syscall_trace_leave - b .ret_from_except + bl do_syscall_trace_leave + b ret_from_except /* Save non-volatile GPRs, if not already saved. */ _GLOBAL(save_nvgprs) @@ -345,52 +345,48 @@ _GLOBAL(save_nvgprs) */ _GLOBAL(ppc_fork) - bl .save_nvgprs - bl .sys_fork + bl save_nvgprs + bl sys_fork b syscall_exit _GLOBAL(ppc_vfork) - bl .save_nvgprs - bl .sys_vfork + bl save_nvgprs + bl sys_vfork b syscall_exit _GLOBAL(ppc_clone) - bl .save_nvgprs - bl .sys_clone + bl save_nvgprs + bl sys_clone b syscall_exit _GLOBAL(ppc32_swapcontext) - bl .save_nvgprs - bl .compat_sys_swapcontext + bl save_nvgprs + bl compat_sys_swapcontext b syscall_exit _GLOBAL(ppc64_swapcontext) - bl .save_nvgprs - bl .sys_swapcontext + bl save_nvgprs + bl sys_swapcontext b syscall_exit _GLOBAL(ret_from_fork) - bl .schedule_tail + bl schedule_tail REST_NVGPRS(r1) li r3,0 b syscall_exit _GLOBAL(ret_from_kernel_thread) - bl .schedule_tail + bl schedule_tail REST_NVGPRS(r1) - ld r14, 0(r14) mtlr r14 mr r3,r15 +#if defined(_CALL_ELF) && _CALL_ELF == 2 + mr r12,r14 +#endif blrl li r3,0 b syscall_exit - .section ".toc","aw" -DSCR_DEFAULT: - .tc dscr_default[TC],dscr_default - - .section ".text" - /* * This routine switches between two different tasks. The process * state of one is saved on its kernel stack. Then the state @@ -432,12 +428,6 @@ BEGIN_FTR_SECTION std r24,THREAD_VRSAVE(r3) END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif /* CONFIG_ALTIVEC */ -#ifdef CONFIG_PPC64 -BEGIN_FTR_SECTION - mfspr r25,SPRN_DSCR - std r25,THREAD_DSCR(r3) -END_FTR_SECTION_IFSET(CPU_FTR_DSCR) -#endif and. r0,r0,r22 beq+ 1f andc r22,r22,r0 @@ -575,11 +565,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #ifdef CONFIG_PPC64 BEGIN_FTR_SECTION lwz r6,THREAD_DSCR_INHERIT(r4) - ld r7,DSCR_DEFAULT@toc(2) ld r0,THREAD_DSCR(r4) cmpwi r6,0 bne 1f - ld r0,0(r7) + ld r0,PACA_DSCR(r13) 1: BEGIN_FTR_SECTION_NESTED(70) mfspr r8, SPRN_FSCR @@ -611,7 +600,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_DSCR) _GLOBAL(ret_from_except) ld r11,_TRAP(r1) andi. r0,r11,1 - bne .ret_from_except_lite + bne ret_from_except_lite REST_NVGPRS(r1) _GLOBAL(ret_from_except_lite) @@ -661,21 +650,27 @@ _GLOBAL(ret_from_except_lite) #endif 1: andi. r0,r4,_TIF_NEED_RESCHED beq 2f - bl .restore_interrupts + bl restore_interrupts SCHEDULE_USER - b .ret_from_except_lite - -2: bl .save_nvgprs - bl .restore_interrupts + b ret_from_except_lite +2: +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + andi. r0,r4,_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM + bne 3f /* only restore TM if nothing else to do */ addi r3,r1,STACK_FRAME_OVERHEAD - bl .do_notify_resume - b .ret_from_except + bl restore_tm_state + b restore +3: +#endif + bl save_nvgprs + bl restore_interrupts + addi r3,r1,STACK_FRAME_OVERHEAD + bl do_notify_resume + b ret_from_except resume_kernel: /* check current_thread_info, _TIF_EMULATE_STACK_STORE */ - CURRENT_THREAD_INFO(r9, r1) - ld r8,TI_FLAGS(r9) - andis. r8,r8,_TIF_EMULATE_STACK_STORE@h + andis. r8,r4,_TIF_EMULATE_STACK_STORE@h beq+ 1f addi r8,r1,INT_FRAME_SIZE /* Get the kprobed function entry */ @@ -724,7 +719,7 @@ resume_kernel: * sure we are soft-disabled first and reconcile irq state. */ RECONCILE_IRQ_STATE(r3,r4) -1: bl .preempt_schedule_irq +1: bl preempt_schedule_irq /* Re-test flags and eventually loop */ CURRENT_THREAD_INFO(r9, r1) @@ -786,7 +781,7 @@ restore_no_replay: */ do_restore: #ifdef CONFIG_PPC_BOOK3E - b .exception_return_book3e + b exception_return_book3e #else /* * Clear the reservation. If we know the CPU tracks the address of @@ -820,6 +815,12 @@ fast_exception_return: andi. r0,r3,MSR_RI beq- unrecov_restore + /* Load PPR from thread struct before we clear MSR:RI */ +BEGIN_FTR_SECTION + ld r2,PACACURRENT(r13) + ld r2,TASKTHREADPPR(r2) +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) + /* * Clear RI before restoring r13. If we are returning to * userspace and we take an exception after restoring r13, @@ -840,8 +841,10 @@ fast_exception_return: */ andi. r0,r3,MSR_PR beq 1f +BEGIN_FTR_SECTION + mtspr SPRN_PPR,r2 /* Restore PPR */ +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) ACCOUNT_CPU_USER_EXIT(r2, r4) - RESTORE_PPR(r2, r4) REST_GPR(13, r1) 1: mtspr SPRN_SRR1,r3 @@ -893,7 +896,7 @@ restore_check_irq_replay: * * Still, this might be useful for things like hash_page */ - bl .__check_irq_replay + bl __check_irq_replay cmpwi cr0,r3,0 beq restore_no_replay @@ -914,13 +917,13 @@ restore_check_irq_replay: cmpwi cr0,r3,0x500 bne 1f addi r3,r1,STACK_FRAME_OVERHEAD; - bl .do_IRQ - b .ret_from_except + bl do_IRQ + b ret_from_except 1: cmpwi cr0,r3,0x900 bne 1f addi r3,r1,STACK_FRAME_OVERHEAD; - bl .timer_interrupt - b .ret_from_except + bl timer_interrupt + b ret_from_except #ifdef CONFIG_PPC_DOORBELL 1: #ifdef CONFIG_PPC_BOOK3E @@ -934,14 +937,14 @@ restore_check_irq_replay: #endif /* CONFIG_PPC_BOOK3E */ bne 1f addi r3,r1,STACK_FRAME_OVERHEAD; - bl .doorbell_exception - b .ret_from_except + bl doorbell_exception + b ret_from_except #endif /* CONFIG_PPC_DOORBELL */ -1: b .ret_from_except /* What else to do here ? */ +1: b ret_from_except /* What else to do here ? */ unrecov_restore: addi r3,r1,STACK_FRAME_OVERHEAD - bl .unrecoverable_exception + bl unrecoverable_exception b unrecov_restore #ifdef CONFIG_PPC_RTAS @@ -1007,7 +1010,7 @@ _GLOBAL(enter_rtas) std r6,PACASAVEDMSR(r13) /* Setup our real return addr */ - LOAD_REG_ADDR(r4,.rtas_return_loc) + LOAD_REG_ADDR(r4,rtas_return_loc) clrldi r4,r4,2 /* convert to realmode address */ mtlr r4 @@ -1017,7 +1020,7 @@ _GLOBAL(enter_rtas) li r9,1 rldicr r9,r9,MSR_SF_LG,(63-MSR_SF_LG) - ori r9,r9,MSR_IR|MSR_DR|MSR_FE0|MSR_FE1|MSR_FP|MSR_RI + ori r9,r9,MSR_IR|MSR_DR|MSR_FE0|MSR_FE1|MSR_FP|MSR_RI|MSR_LE andc r6,r0,r9 sync /* disable interrupts so SRR0/1 */ mtmsrd r0 /* don't get trashed */ @@ -1031,14 +1034,16 @@ _GLOBAL(enter_rtas) rfid b . /* prevent speculative execution */ -_STATIC(rtas_return_loc) +rtas_return_loc: + FIXUP_ENDIAN + /* relocation is off at this point */ GET_PACA(r4) clrldi r4,r4,2 /* convert to realmode address */ bcl 20,31,$+4 0: mflr r3 - ld r3,(1f-0b)(r3) /* get &.rtas_restore_regs */ + ld r3,(1f-0b)(r3) /* get &rtas_restore_regs */ mfmsr r6 li r0,MSR_RI @@ -1055,9 +1060,9 @@ _STATIC(rtas_return_loc) b . /* prevent speculative execution */ .align 3 -1: .llong .rtas_restore_regs +1: .llong rtas_restore_regs -_STATIC(rtas_restore_regs) +rtas_restore_regs: /* relocation is on at this point */ REST_GPR(2, r1) /* Restore the TOC */ REST_GPR(13, r1) /* Restore paca */ @@ -1103,28 +1108,30 @@ _GLOBAL(enter_prom) std r10,_CCR(r1) std r11,_MSR(r1) - /* Get the PROM entrypoint */ - mtlr r4 + /* Put PROM address in SRR0 */ + mtsrr0 r4 + + /* Setup our trampoline return addr in LR */ + bcl 20,31,$+4 +0: mflr r4 + addi r4,r4,(1f - 0b) + mtlr r4 - /* Switch MSR to 32 bits mode + /* Prepare a 32-bit mode big endian MSR */ #ifdef CONFIG_PPC_BOOK3E rlwinm r11,r11,0,1,31 - mtmsr r11 + mtsrr1 r11 + rfi #else /* CONFIG_PPC_BOOK3E */ - mfmsr r11 - li r12,1 - rldicr r12,r12,MSR_SF_LG,(63-MSR_SF_LG) - andc r11,r11,r12 - li r12,1 - rldicr r12,r12,MSR_ISF_LG,(63-MSR_ISF_LG) - andc r11,r11,r12 - mtmsrd r11 + LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_ISF | MSR_LE) + andc r11,r11,r12 + mtsrr1 r11 + rfid #endif /* CONFIG_PPC_BOOK3E */ - isync - /* Enter PROM here... */ - blrl +1: /* Return from OF */ + FIXUP_ENDIAN /* Just make sure that r1 top 32 bits didn't get * corrupt by OF @@ -1155,7 +1162,7 @@ _GLOBAL(mcount) _GLOBAL(_mcount) blr -_GLOBAL(ftrace_caller) +_GLOBAL_TOC(ftrace_caller) /* Taken from output of objdump from lib64/glibc */ mflr r3 ld r11, 0(r1) @@ -1179,10 +1186,7 @@ _GLOBAL(ftrace_graph_stub) _GLOBAL(ftrace_stub) blr #else -_GLOBAL(mcount) - blr - -_GLOBAL(_mcount) +_GLOBAL_TOC(_mcount) /* Taken from output of objdump from lib64/glibc */ mflr r3 ld r11, 0(r1) @@ -1220,7 +1224,7 @@ _GLOBAL(ftrace_graph_caller) ld r11, 112(r1) addi r3, r11, 16 - bl .prepare_ftrace_return + bl prepare_ftrace_return nop ld r0, 128(r1) @@ -1236,7 +1240,7 @@ _GLOBAL(return_to_handler) mr r31, r1 stdu r1, -112(r1) - bl .ftrace_return_to_handler + bl ftrace_return_to_handler nop /* return value has real return address */ @@ -1266,7 +1270,7 @@ _GLOBAL(mod_return_to_handler) */ ld r2, PACATOC(r13) - bl .ftrace_return_to_handler + bl ftrace_return_to_handler nop /* return value has real return address */ diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c index 6300c13bbde..59e4ba74975 100644 --- a/arch/powerpc/kernel/epapr_paravirt.c +++ b/arch/powerpc/kernel/epapr_paravirt.c @@ -18,6 +18,7 @@ */ #include <linux/of.h> +#include <linux/of_fdt.h> #include <asm/epapr_hcalls.h> #include <asm/cacheflush.h> #include <asm/code-patching.h> @@ -29,13 +30,14 @@ extern u32 epapr_ev_idle_start[]; #endif bool epapr_paravirt_enabled; +static bool __maybe_unused epapr_has_idle; static int __init early_init_dt_scan_epapr(unsigned long node, const char *uname, int depth, void *data) { const u32 *insts; - unsigned long len; + int len; int i; insts = of_get_flat_dt_prop(node, "hcall-instructions", &len); @@ -46,15 +48,16 @@ static int __init early_init_dt_scan_epapr(unsigned long node, return -1; for (i = 0; i < (len / 4); i++) { - patch_instruction(epapr_hypercall_start + i, insts[i]); + u32 inst = be32_to_cpu(insts[i]); + patch_instruction(epapr_hypercall_start + i, inst); #if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64) - patch_instruction(epapr_ev_idle_start + i, insts[i]); + patch_instruction(epapr_ev_idle_start + i, inst); #endif } #if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64) if (of_get_flat_dt_prop(node, "has-idle", NULL)) - ppc_md.power_save = epapr_ev_idle; + epapr_has_idle = true; #endif epapr_paravirt_enabled = true; @@ -69,3 +72,14 @@ int __init epapr_paravirt_early_init(void) return 0; } +static int __init epapr_idle_init(void) +{ +#if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64) + if (epapr_has_idle) + ppc_md.power_save = epapr_ev_idle; +#endif + + return 0; +} + +postcore_initcall(epapr_idle_init); diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 2d067049db2..bb9cac6c805 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -34,7 +34,250 @@ * special interrupts from within a non-standard level will probably * blow you up */ -#define SPECIAL_EXC_FRAME_SIZE INT_FRAME_SIZE +#define SPECIAL_EXC_SRR0 0 +#define SPECIAL_EXC_SRR1 1 +#define SPECIAL_EXC_SPRG_GEN 2 +#define SPECIAL_EXC_SPRG_TLB 3 +#define SPECIAL_EXC_MAS0 4 +#define SPECIAL_EXC_MAS1 5 +#define SPECIAL_EXC_MAS2 6 +#define SPECIAL_EXC_MAS3 7 +#define SPECIAL_EXC_MAS6 8 +#define SPECIAL_EXC_MAS7 9 +#define SPECIAL_EXC_MAS5 10 /* E.HV only */ +#define SPECIAL_EXC_MAS8 11 /* E.HV only */ +#define SPECIAL_EXC_IRQHAPPENED 12 +#define SPECIAL_EXC_DEAR 13 +#define SPECIAL_EXC_ESR 14 +#define SPECIAL_EXC_SOFTE 15 +#define SPECIAL_EXC_CSRR0 16 +#define SPECIAL_EXC_CSRR1 17 +/* must be even to keep 16-byte stack alignment */ +#define SPECIAL_EXC_END 18 + +#define SPECIAL_EXC_FRAME_SIZE (INT_FRAME_SIZE + SPECIAL_EXC_END * 8) +#define SPECIAL_EXC_FRAME_OFFS (INT_FRAME_SIZE - 288) + +#define SPECIAL_EXC_STORE(reg, name) \ + std reg, (SPECIAL_EXC_##name * 8 + SPECIAL_EXC_FRAME_OFFS)(r1) + +#define SPECIAL_EXC_LOAD(reg, name) \ + ld reg, (SPECIAL_EXC_##name * 8 + SPECIAL_EXC_FRAME_OFFS)(r1) + +special_reg_save: + lbz r9,PACAIRQHAPPENED(r13) + RECONCILE_IRQ_STATE(r3,r4) + + /* + * We only need (or have stack space) to save this stuff if + * we interrupted the kernel. + */ + ld r3,_MSR(r1) + andi. r3,r3,MSR_PR + bnelr + + /* Copy info into temporary exception thread info */ + ld r11,PACAKSAVE(r13) + CURRENT_THREAD_INFO(r11, r11) + CURRENT_THREAD_INFO(r12, r1) + ld r10,TI_FLAGS(r11) + std r10,TI_FLAGS(r12) + ld r10,TI_PREEMPT(r11) + std r10,TI_PREEMPT(r12) + ld r10,TI_TASK(r11) + std r10,TI_TASK(r12) + + /* + * Advance to the next TLB exception frame for handler + * types that don't do it automatically. + */ + LOAD_REG_ADDR(r11,extlb_level_exc) + lwz r12,0(r11) + mfspr r10,SPRN_SPRG_TLB_EXFRAME + add r10,r10,r12 + mtspr SPRN_SPRG_TLB_EXFRAME,r10 + + /* + * Save registers needed to allow nesting of certain exceptions + * (such as TLB misses) inside special exception levels + */ + mfspr r10,SPRN_SRR0 + SPECIAL_EXC_STORE(r10,SRR0) + mfspr r10,SPRN_SRR1 + SPECIAL_EXC_STORE(r10,SRR1) + mfspr r10,SPRN_SPRG_GEN_SCRATCH + SPECIAL_EXC_STORE(r10,SPRG_GEN) + mfspr r10,SPRN_SPRG_TLB_SCRATCH + SPECIAL_EXC_STORE(r10,SPRG_TLB) + mfspr r10,SPRN_MAS0 + SPECIAL_EXC_STORE(r10,MAS0) + mfspr r10,SPRN_MAS1 + SPECIAL_EXC_STORE(r10,MAS1) + mfspr r10,SPRN_MAS2 + SPECIAL_EXC_STORE(r10,MAS2) + mfspr r10,SPRN_MAS3 + SPECIAL_EXC_STORE(r10,MAS3) + mfspr r10,SPRN_MAS6 + SPECIAL_EXC_STORE(r10,MAS6) + mfspr r10,SPRN_MAS7 + SPECIAL_EXC_STORE(r10,MAS7) +BEGIN_FTR_SECTION + mfspr r10,SPRN_MAS5 + SPECIAL_EXC_STORE(r10,MAS5) + mfspr r10,SPRN_MAS8 + SPECIAL_EXC_STORE(r10,MAS8) + + /* MAS5/8 could have inappropriate values if we interrupted KVM code */ + li r10,0 + mtspr SPRN_MAS5,r10 + mtspr SPRN_MAS8,r10 +END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) + SPECIAL_EXC_STORE(r9,IRQHAPPENED) + + mfspr r10,SPRN_DEAR + SPECIAL_EXC_STORE(r10,DEAR) + mfspr r10,SPRN_ESR + SPECIAL_EXC_STORE(r10,ESR) + + lbz r10,PACASOFTIRQEN(r13) + SPECIAL_EXC_STORE(r10,SOFTE) + ld r10,_NIP(r1) + SPECIAL_EXC_STORE(r10,CSRR0) + ld r10,_MSR(r1) + SPECIAL_EXC_STORE(r10,CSRR1) + + blr + +ret_from_level_except: + ld r3,_MSR(r1) + andi. r3,r3,MSR_PR + beq 1f + b ret_from_except +1: + + LOAD_REG_ADDR(r11,extlb_level_exc) + lwz r12,0(r11) + mfspr r10,SPRN_SPRG_TLB_EXFRAME + sub r10,r10,r12 + mtspr SPRN_SPRG_TLB_EXFRAME,r10 + + /* + * It's possible that the special level exception interrupted a + * TLB miss handler, and inserted the same entry that the + * interrupted handler was about to insert. On CPUs without TLB + * write conditional, this can result in a duplicate TLB entry. + * Wipe all non-bolted entries to be safe. + * + * Note that this doesn't protect against any TLB misses + * we may take accessing the stack from here to the end of + * the special level exception. It's not clear how we can + * reasonably protect against that, but only CPUs with + * neither TLB write conditional nor bolted kernel memory + * are affected. Do any such CPUs even exist? + */ + PPC_TLBILX_ALL(0,R0) + + REST_NVGPRS(r1) + + SPECIAL_EXC_LOAD(r10,SRR0) + mtspr SPRN_SRR0,r10 + SPECIAL_EXC_LOAD(r10,SRR1) + mtspr SPRN_SRR1,r10 + SPECIAL_EXC_LOAD(r10,SPRG_GEN) + mtspr SPRN_SPRG_GEN_SCRATCH,r10 + SPECIAL_EXC_LOAD(r10,SPRG_TLB) + mtspr SPRN_SPRG_TLB_SCRATCH,r10 + SPECIAL_EXC_LOAD(r10,MAS0) + mtspr SPRN_MAS0,r10 + SPECIAL_EXC_LOAD(r10,MAS1) + mtspr SPRN_MAS1,r10 + SPECIAL_EXC_LOAD(r10,MAS2) + mtspr SPRN_MAS2,r10 + SPECIAL_EXC_LOAD(r10,MAS3) + mtspr SPRN_MAS3,r10 + SPECIAL_EXC_LOAD(r10,MAS6) + mtspr SPRN_MAS6,r10 + SPECIAL_EXC_LOAD(r10,MAS7) + mtspr SPRN_MAS7,r10 +BEGIN_FTR_SECTION + SPECIAL_EXC_LOAD(r10,MAS5) + mtspr SPRN_MAS5,r10 + SPECIAL_EXC_LOAD(r10,MAS8) + mtspr SPRN_MAS8,r10 +END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) + + lbz r6,PACASOFTIRQEN(r13) + ld r5,SOFTE(r1) + + /* Interrupts had better not already be enabled... */ + twnei r6,0 + + cmpwi cr0,r5,0 + beq 1f + + TRACE_ENABLE_INTS + stb r5,PACASOFTIRQEN(r13) +1: + /* + * Restore PACAIRQHAPPENED rather than setting it based on + * the return MSR[EE], since we could have interrupted + * __check_irq_replay() or other inconsistent transitory + * states that must remain that way. + */ + SPECIAL_EXC_LOAD(r10,IRQHAPPENED) + stb r10,PACAIRQHAPPENED(r13) + + SPECIAL_EXC_LOAD(r10,DEAR) + mtspr SPRN_DEAR,r10 + SPECIAL_EXC_LOAD(r10,ESR) + mtspr SPRN_ESR,r10 + + stdcx. r0,0,r1 /* to clear the reservation */ + + REST_4GPRS(2, r1) + REST_4GPRS(6, r1) + + ld r10,_CTR(r1) + ld r11,_XER(r1) + mtctr r10 + mtxer r11 + + blr + +.macro ret_from_level srr0 srr1 paca_ex scratch + bl ret_from_level_except + + ld r10,_LINK(r1) + ld r11,_CCR(r1) + ld r0,GPR13(r1) + mtlr r10 + mtcr r11 + + ld r10,GPR10(r1) + ld r11,GPR11(r1) + ld r12,GPR12(r1) + mtspr \scratch,r0 + + std r10,\paca_ex+EX_R10(r13); + std r11,\paca_ex+EX_R11(r13); + ld r10,_NIP(r1) + ld r11,_MSR(r1) + ld r0,GPR0(r1) + ld r1,GPR1(r1) + mtspr \srr0,r10 + mtspr \srr1,r11 + ld r10,\paca_ex+EX_R10(r13) + ld r11,\paca_ex+EX_R11(r13) + mfspr r13,\scratch +.endm + +ret_from_crit_except: + ret_from_level SPRN_CSRR0 SPRN_CSRR1 PACA_EXCRIT SPRN_SPRG_CRIT_SCRATCH + rfci + +ret_from_mc_except: + ret_from_level SPRN_MCSRR0 SPRN_MCSRR1 PACA_EXMC SPRN_SPRG_MC_SCRATCH + rfmci /* Exception prolog code for all exceptions */ #define EXCEPTION_PROLOG(n, intnum, type, addition) \ @@ -42,7 +285,6 @@ mfspr r13,SPRN_SPRG_PACA; /* get PACA */ \ std r10,PACA_EX##type+EX_R10(r13); \ std r11,PACA_EX##type+EX_R11(r13); \ - PROLOG_STORE_RESTORE_SCRATCH_##type; \ mfcr r10; /* save CR */ \ mfspr r11,SPRN_##type##_SRR1;/* what are we coming from */ \ DO_KVM intnum,SPRN_##type##_SRR1; /* KVM hook */ \ @@ -69,19 +311,19 @@ #define CRIT_SET_KSTACK \ ld r1,PACA_CRIT_STACK(r13); \ - subi r1,r1,SPECIAL_EXC_FRAME_SIZE; + subi r1,r1,SPECIAL_EXC_FRAME_SIZE #define SPRN_CRIT_SRR0 SPRN_CSRR0 #define SPRN_CRIT_SRR1 SPRN_CSRR1 #define DBG_SET_KSTACK \ ld r1,PACA_DBG_STACK(r13); \ - subi r1,r1,SPECIAL_EXC_FRAME_SIZE; + subi r1,r1,SPECIAL_EXC_FRAME_SIZE #define SPRN_DBG_SRR0 SPRN_DSRR0 #define SPRN_DBG_SRR1 SPRN_DSRR1 #define MC_SET_KSTACK \ ld r1,PACA_MC_STACK(r13); \ - subi r1,r1,SPECIAL_EXC_FRAME_SIZE; + subi r1,r1,SPECIAL_EXC_FRAME_SIZE #define SPRN_MC_SRR0 SPRN_MCSRR0 #define SPRN_MC_SRR1 SPRN_MCSRR1 @@ -100,20 +342,6 @@ #define GDBELL_EXCEPTION_PROLOG(n, intnum, addition) \ EXCEPTION_PROLOG(n, intnum, GDBELL, addition##_GDBELL(n)) -/* - * Store user-visible scratch in PACA exception slots and restore proper value - */ -#define PROLOG_STORE_RESTORE_SCRATCH_GEN -#define PROLOG_STORE_RESTORE_SCRATCH_GDBELL -#define PROLOG_STORE_RESTORE_SCRATCH_DBG -#define PROLOG_STORE_RESTORE_SCRATCH_MC - -#define PROLOG_STORE_RESTORE_SCRATCH_CRIT \ - mfspr r10,SPRN_SPRG_CRIT_SCRATCH; /* get r13 */ \ - std r10,PACA_EXCRIT+EX_R13(r13); \ - ld r11,PACA_SPRG3(r13); \ - mtspr SPRN_SPRG_CRIT_SCRATCH,r11; - /* Variants of the "addition" argument for the prolog */ #define PROLOG_ADDITION_NONE_GEN(n) @@ -147,10 +375,8 @@ std r15,PACA_EXMC+EX_R15(r13) -/* Core exception code for all exceptions except TLB misses. - * XXX: Needs to make SPRN_SPRG_GEN depend on exception type - */ -#define EXCEPTION_COMMON(n, excf, ints) \ +/* Core exception code for all exceptions except TLB misses. */ +#define EXCEPTION_COMMON_LVL(n, scratch, excf) \ exc_##n##_common: \ std r0,GPR0(r1); /* save r0 in stackframe */ \ std r2,GPR2(r1); /* save r2 in stackframe */ \ @@ -163,7 +389,7 @@ exc_##n##_common: \ ACCOUNT_CPU_USER_ENTRY(r10,r11);/* accounting (uses cr0+eq) */ \ 2: ld r3,excf+EX_R10(r13); /* get back r10 */ \ ld r4,excf+EX_R11(r13); /* get back r11 */ \ - mfspr r5,SPRN_SPRG_GEN_SCRATCH;/* get back r13 */ \ + mfspr r5,scratch; /* get back r13 */ \ std r12,GPR12(r1); /* save r12 in stackframe */ \ ld r2,PACATOC(r13); /* get kernel TOC into r2 */ \ mflr r6; /* save LR in stackframe */ \ @@ -187,24 +413,29 @@ exc_##n##_common: \ std r11,SOFTE(r1); /* and save it to stackframe */ \ std r12,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */ \ std r3,_TRAP(r1); /* set trap number */ \ - std r0,RESULT(r1); /* clear regs->result */ \ - ints; + std r0,RESULT(r1); /* clear regs->result */ -/* Variants for the "ints" argument. This one does nothing when we want - * to keep interrupts in their original state - */ -#define INTS_KEEP +#define EXCEPTION_COMMON(n) \ + EXCEPTION_COMMON_LVL(n, SPRN_SPRG_GEN_SCRATCH, PACA_EXGEN) +#define EXCEPTION_COMMON_CRIT(n) \ + EXCEPTION_COMMON_LVL(n, SPRN_SPRG_CRIT_SCRATCH, PACA_EXCRIT) +#define EXCEPTION_COMMON_MC(n) \ + EXCEPTION_COMMON_LVL(n, SPRN_SPRG_MC_SCRATCH, PACA_EXMC) +#define EXCEPTION_COMMON_DBG(n) \ + EXCEPTION_COMMON_LVL(n, SPRN_SPRG_DBG_SCRATCH, PACA_EXDBG) -/* This second version is meant for exceptions that don't immediately - * hard-enable. We set a bit in paca->irq_happened to ensure that - * a subsequent call to arch_local_irq_restore() will properly - * hard-enable and avoid the fast-path, and then reconcile irq state. +/* + * This is meant for exceptions that don't immediately hard-enable. We + * set a bit in paca->irq_happened to ensure that a subsequent call to + * arch_local_irq_restore() will properly hard-enable and avoid the + * fast-path, and then reconcile irq state. */ #define INTS_DISABLE RECONCILE_IRQ_STATE(r3,r4) -/* This is called by exceptions that used INTS_KEEP (that did not touch - * irq indicators in the PACA). This will restore MSR:EE to it's previous - * value +/* + * This is called by exceptions that don't use INTS_DISABLE (that did not + * touch irq indicators in the PACA). This will restore MSR:EE to it's + * previous value * * XXX In the long run, we may want to open-code it in order to separate the * load from the wrtee, thus limiting the latency caused by the dependency @@ -262,12 +493,13 @@ exc_##n##_bad_stack: \ #define MASKABLE_EXCEPTION(trapnum, intnum, label, hdlr, ack) \ START_EXCEPTION(label); \ NORMAL_EXCEPTION_PROLOG(trapnum, intnum, PROLOG_ADDITION_MASKABLE)\ - EXCEPTION_COMMON(trapnum, PACA_EXGEN, INTS_DISABLE) \ + EXCEPTION_COMMON(trapnum) \ + INTS_DISABLE; \ ack(r8); \ CHECK_NAPPING(); \ addi r3,r1,STACK_FRAME_OVERHEAD; \ bl hdlr; \ - b .ret_from_except_lite; + b ret_from_except_lite; /* This value is used to mark exception frames on the stack. */ .section ".toc","aw" @@ -283,8 +515,8 @@ exception_marker: .balign 0x1000 .globl interrupt_base_book3e interrupt_base_book3e: /* fake trap */ - EXCEPTION_STUB(0x000, machine_check) /* 0x0200 */ - EXCEPTION_STUB(0x020, critical_input) /* 0x0580 */ + EXCEPTION_STUB(0x000, machine_check) + EXCEPTION_STUB(0x020, critical_input) /* 0x0100 */ EXCEPTION_STUB(0x040, debug_crit) /* 0x0d00 */ EXCEPTION_STUB(0x060, data_storage) /* 0x0300 */ EXCEPTION_STUB(0x080, instruction_storage) /* 0x0400 */ @@ -299,8 +531,8 @@ interrupt_base_book3e: /* fake trap */ EXCEPTION_STUB(0x1a0, watchdog) /* 0x09f0 */ EXCEPTION_STUB(0x1c0, data_tlb_miss) EXCEPTION_STUB(0x1e0, instruction_tlb_miss) - EXCEPTION_STUB(0x200, altivec_unavailable) /* 0x0f20 */ - EXCEPTION_STUB(0x220, altivec_assist) /* 0x1700 */ + EXCEPTION_STUB(0x200, altivec_unavailable) + EXCEPTION_STUB(0x220, altivec_assist) EXCEPTION_STUB(0x260, perfmon) EXCEPTION_STUB(0x280, doorbell) EXCEPTION_STUB(0x2a0, doorbell_crit) @@ -308,6 +540,7 @@ interrupt_base_book3e: /* fake trap */ EXCEPTION_STUB(0x2e0, guest_doorbell_crit) EXCEPTION_STUB(0x300, hypercall) EXCEPTION_STUB(0x320, ehpriv) + EXCEPTION_STUB(0x340, lrat_error) .globl interrupt_end_book3e interrupt_end_book3e: @@ -316,25 +549,25 @@ interrupt_end_book3e: START_EXCEPTION(critical_input); CRIT_EXCEPTION_PROLOG(0x100, BOOKE_INTERRUPT_CRITICAL, PROLOG_ADDITION_NONE) -// EXCEPTION_COMMON(0x100, PACA_EXCRIT, INTS_DISABLE) -// bl special_reg_save_crit -// CHECK_NAPPING(); -// addi r3,r1,STACK_FRAME_OVERHEAD -// bl .critical_exception -// b ret_from_crit_except - b . + EXCEPTION_COMMON_CRIT(0x100) + bl save_nvgprs + bl special_reg_save + CHECK_NAPPING(); + addi r3,r1,STACK_FRAME_OVERHEAD + bl unknown_exception + b ret_from_crit_except /* Machine Check Interrupt */ START_EXCEPTION(machine_check); - MC_EXCEPTION_PROLOG(0x200, BOOKE_INTERRUPT_MACHINE_CHECK, + MC_EXCEPTION_PROLOG(0x000, BOOKE_INTERRUPT_MACHINE_CHECK, PROLOG_ADDITION_NONE) -// EXCEPTION_COMMON(0x200, PACA_EXMC, INTS_DISABLE) -// bl special_reg_save_mc -// addi r3,r1,STACK_FRAME_OVERHEAD -// CHECK_NAPPING(); -// bl .machine_check_exception -// b ret_from_mc_except - b . + EXCEPTION_COMMON_MC(0x000) + bl save_nvgprs + bl special_reg_save + CHECK_NAPPING(); + addi r3,r1,STACK_FRAME_OVERHEAD + bl machine_check_exception + b ret_from_mc_except /* Data Storage Interrupt */ START_EXCEPTION(data_storage) @@ -342,7 +575,8 @@ interrupt_end_book3e: PROLOG_ADDITION_2REGS) mfspr r14,SPRN_DEAR mfspr r15,SPRN_ESR - EXCEPTION_COMMON(0x300, PACA_EXGEN, INTS_DISABLE) + EXCEPTION_COMMON(0x300) + INTS_DISABLE b storage_fault_common /* Instruction Storage Interrupt */ @@ -351,12 +585,13 @@ interrupt_end_book3e: PROLOG_ADDITION_2REGS) li r15,0 mr r14,r10 - EXCEPTION_COMMON(0x400, PACA_EXGEN, INTS_DISABLE) + EXCEPTION_COMMON(0x400) + INTS_DISABLE b storage_fault_common /* External Input Interrupt */ MASKABLE_EXCEPTION(0x500, BOOKE_INTERRUPT_EXTERNAL, - external_input, .do_IRQ, ACK_NONE) + external_input, do_IRQ, ACK_NONE) /* Alignment */ START_EXCEPTION(alignment); @@ -364,7 +599,7 @@ interrupt_end_book3e: PROLOG_ADDITION_2REGS) mfspr r14,SPRN_DEAR mfspr r15,SPRN_ESR - EXCEPTION_COMMON(0x600, PACA_EXGEN, INTS_KEEP) + EXCEPTION_COMMON(0x600) b alignment_more /* no room, go out of line */ /* Program Interrupt */ @@ -372,89 +607,96 @@ interrupt_end_book3e: NORMAL_EXCEPTION_PROLOG(0x700, BOOKE_INTERRUPT_PROGRAM, PROLOG_ADDITION_1REG) mfspr r14,SPRN_ESR - EXCEPTION_COMMON(0x700, PACA_EXGEN, INTS_DISABLE) + EXCEPTION_COMMON(0x700) + INTS_DISABLE std r14,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD ld r14,PACA_EXGEN+EX_R14(r13) - bl .save_nvgprs - bl .program_check_exception - b .ret_from_except + bl save_nvgprs + bl program_check_exception + b ret_from_except /* Floating Point Unavailable Interrupt */ START_EXCEPTION(fp_unavailable); NORMAL_EXCEPTION_PROLOG(0x800, BOOKE_INTERRUPT_FP_UNAVAIL, PROLOG_ADDITION_NONE) /* we can probably do a shorter exception entry for that one... */ - EXCEPTION_COMMON(0x800, PACA_EXGEN, INTS_KEEP) + EXCEPTION_COMMON(0x800) ld r12,_MSR(r1) andi. r0,r12,MSR_PR; beq- 1f - bl .load_up_fpu + bl load_up_fpu b fast_exception_return 1: INTS_DISABLE - bl .save_nvgprs + bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD - bl .kernel_fp_unavailable_exception - b .ret_from_except + bl kernel_fp_unavailable_exception + b ret_from_except /* Altivec Unavailable Interrupt */ START_EXCEPTION(altivec_unavailable); - NORMAL_EXCEPTION_PROLOG(0x200, BOOKE_INTERRUPT_ALTIVEC_UNAVAIL, + NORMAL_EXCEPTION_PROLOG(0x200, BOOKE_INTERRUPT_SPE_ALTIVEC_UNAVAIL, PROLOG_ADDITION_NONE) /* we can probably do a shorter exception entry for that one... */ - EXCEPTION_COMMON(0x200, PACA_EXGEN, INTS_KEEP) + EXCEPTION_COMMON(0x200) #ifdef CONFIG_ALTIVEC BEGIN_FTR_SECTION ld r12,_MSR(r1) andi. r0,r12,MSR_PR; beq- 1f - bl .load_up_altivec + bl load_up_altivec b fast_exception_return 1: END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif INTS_DISABLE - bl .save_nvgprs + bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD - bl .altivec_unavailable_exception - b .ret_from_except + bl altivec_unavailable_exception + b ret_from_except /* AltiVec Assist */ START_EXCEPTION(altivec_assist); - NORMAL_EXCEPTION_PROLOG(0x220, BOOKE_INTERRUPT_ALTIVEC_ASSIST, + NORMAL_EXCEPTION_PROLOG(0x220, + BOOKE_INTERRUPT_SPE_FP_DATA_ALTIVEC_ASSIST, PROLOG_ADDITION_NONE) - EXCEPTION_COMMON(0x220, PACA_EXGEN, INTS_DISABLE) - bl .save_nvgprs + EXCEPTION_COMMON(0x220) + INTS_DISABLE + bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD #ifdef CONFIG_ALTIVEC BEGIN_FTR_SECTION - bl .altivec_assist_exception + bl altivec_assist_exception END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #else - bl .unknown_exception + bl unknown_exception #endif - b .ret_from_except + b ret_from_except /* Decrementer Interrupt */ MASKABLE_EXCEPTION(0x900, BOOKE_INTERRUPT_DECREMENTER, - decrementer, .timer_interrupt, ACK_DEC) + decrementer, timer_interrupt, ACK_DEC) /* Fixed Interval Timer Interrupt */ MASKABLE_EXCEPTION(0x980, BOOKE_INTERRUPT_FIT, - fixed_interval, .unknown_exception, ACK_FIT) + fixed_interval, unknown_exception, ACK_FIT) /* Watchdog Timer Interrupt */ START_EXCEPTION(watchdog); CRIT_EXCEPTION_PROLOG(0x9f0, BOOKE_INTERRUPT_WATCHDOG, PROLOG_ADDITION_NONE) -// EXCEPTION_COMMON(0x9f0, PACA_EXCRIT, INTS_DISABLE) -// bl special_reg_save_crit -// CHECK_NAPPING(); -// addi r3,r1,STACK_FRAME_OVERHEAD -// bl .unknown_exception -// b ret_from_crit_except - b . + EXCEPTION_COMMON_CRIT(0x9f0) + bl save_nvgprs + bl special_reg_save + CHECK_NAPPING(); + addi r3,r1,STACK_FRAME_OVERHEAD +#ifdef CONFIG_BOOKE_WDT + bl WatchdogException +#else + bl unknown_exception +#endif + b ret_from_crit_except /* System Call Interrupt */ START_EXCEPTION(system_call) @@ -468,11 +710,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) START_EXCEPTION(ap_unavailable); NORMAL_EXCEPTION_PROLOG(0xf20, BOOKE_INTERRUPT_AP_UNAVAIL, PROLOG_ADDITION_NONE) - EXCEPTION_COMMON(0xf20, PACA_EXGEN, INTS_DISABLE) - bl .save_nvgprs + EXCEPTION_COMMON(0xf20) + INTS_DISABLE + bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD - bl .unknown_exception - b .ret_from_except + bl unknown_exception + b ret_from_except /* Debug exception as a critical interrupt*/ START_EXCEPTION(debug_crit); @@ -511,7 +754,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) mtcr r10 ld r10,PACA_EXCRIT+EX_R10(r13) /* restore registers */ ld r11,PACA_EXCRIT+EX_R11(r13) - ld r13,PACA_EXCRIT+EX_R13(r13) + mfspr r13,SPRN_SPRG_CRIT_SCRATCH rfci /* Normal debug exception */ @@ -524,18 +767,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) /* Now we mash up things to make it look like we are coming on a * normal exception */ - ld r15,PACA_EXCRIT+EX_R13(r13) - mtspr SPRN_SPRG_GEN_SCRATCH,r15 mfspr r14,SPRN_DBSR - EXCEPTION_COMMON(0xd00, PACA_EXCRIT, INTS_DISABLE) + EXCEPTION_COMMON_CRIT(0xd00) std r14,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD mr r4,r14 ld r14,PACA_EXCRIT+EX_R14(r13) ld r15,PACA_EXCRIT+EX_R15(r13) - bl .save_nvgprs - bl .DebugException - b .ret_from_except + bl save_nvgprs + bl DebugException + b ret_from_except kernel_dbg_exc: b . /* NYI */ @@ -590,42 +831,43 @@ kernel_dbg_exc: /* Now we mash up things to make it look like we are coming on a * normal exception */ - mfspr r15,SPRN_SPRG_DBG_SCRATCH - mtspr SPRN_SPRG_GEN_SCRATCH,r15 mfspr r14,SPRN_DBSR - EXCEPTION_COMMON(0xd08, PACA_EXDBG, INTS_DISABLE) + EXCEPTION_COMMON_DBG(0xd08) + INTS_DISABLE std r14,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD mr r4,r14 ld r14,PACA_EXDBG+EX_R14(r13) ld r15,PACA_EXDBG+EX_R15(r13) - bl .save_nvgprs - bl .DebugException - b .ret_from_except + bl save_nvgprs + bl DebugException + b ret_from_except START_EXCEPTION(perfmon); NORMAL_EXCEPTION_PROLOG(0x260, BOOKE_INTERRUPT_PERFORMANCE_MONITOR, PROLOG_ADDITION_NONE) - EXCEPTION_COMMON(0x260, PACA_EXGEN, INTS_DISABLE) + EXCEPTION_COMMON(0x260) + INTS_DISABLE + CHECK_NAPPING() addi r3,r1,STACK_FRAME_OVERHEAD - bl .performance_monitor_exception - b .ret_from_except_lite + bl performance_monitor_exception + b ret_from_except_lite /* Doorbell interrupt */ MASKABLE_EXCEPTION(0x280, BOOKE_INTERRUPT_DOORBELL, - doorbell, .doorbell_exception, ACK_NONE) + doorbell, doorbell_exception, ACK_NONE) /* Doorbell critical Interrupt */ START_EXCEPTION(doorbell_crit); CRIT_EXCEPTION_PROLOG(0x2a0, BOOKE_INTERRUPT_DOORBELL_CRITICAL, PROLOG_ADDITION_NONE) -// EXCEPTION_COMMON(0x2a0, PACA_EXCRIT, INTS_DISABLE) -// bl special_reg_save_crit -// CHECK_NAPPING(); -// addi r3,r1,STACK_FRAME_OVERHEAD -// bl .doorbell_critical_exception -// b ret_from_crit_except - b . + EXCEPTION_COMMON_CRIT(0x2a0) + bl save_nvgprs + bl special_reg_save + CHECK_NAPPING(); + addi r3,r1,STACK_FRAME_OVERHEAD + bl unknown_exception + b ret_from_crit_except /* * Guest doorbell interrupt @@ -634,41 +876,52 @@ kernel_dbg_exc: START_EXCEPTION(guest_doorbell); GDBELL_EXCEPTION_PROLOG(0x2c0, BOOKE_INTERRUPT_GUEST_DBELL, PROLOG_ADDITION_NONE) - EXCEPTION_COMMON(0x2c0, PACA_EXGEN, INTS_KEEP) + EXCEPTION_COMMON(0x2c0) addi r3,r1,STACK_FRAME_OVERHEAD - bl .save_nvgprs + bl save_nvgprs INTS_RESTORE_HARD - bl .unknown_exception - b .ret_from_except + bl unknown_exception + b ret_from_except /* Guest Doorbell critical Interrupt */ START_EXCEPTION(guest_doorbell_crit); CRIT_EXCEPTION_PROLOG(0x2e0, BOOKE_INTERRUPT_GUEST_DBELL_CRIT, PROLOG_ADDITION_NONE) -// EXCEPTION_COMMON(0x2e0, PACA_EXCRIT, INTS_DISABLE) -// bl special_reg_save_crit -// CHECK_NAPPING(); -// addi r3,r1,STACK_FRAME_OVERHEAD -// bl .guest_doorbell_critical_exception -// b ret_from_crit_except - b . + EXCEPTION_COMMON_CRIT(0x2e0) + bl save_nvgprs + bl special_reg_save + CHECK_NAPPING(); + addi r3,r1,STACK_FRAME_OVERHEAD + bl unknown_exception + b ret_from_crit_except /* Hypervisor call */ START_EXCEPTION(hypercall); NORMAL_EXCEPTION_PROLOG(0x310, BOOKE_INTERRUPT_HV_SYSCALL, PROLOG_ADDITION_NONE) - EXCEPTION_COMMON(0x310, PACA_EXGEN, INTS_KEEP) + EXCEPTION_COMMON(0x310) addi r3,r1,STACK_FRAME_OVERHEAD - bl .save_nvgprs + bl save_nvgprs INTS_RESTORE_HARD - bl .unknown_exception - b .ret_from_except + bl unknown_exception + b ret_from_except /* Embedded Hypervisor priviledged */ START_EXCEPTION(ehpriv); NORMAL_EXCEPTION_PROLOG(0x320, BOOKE_INTERRUPT_HV_PRIV, PROLOG_ADDITION_NONE) - EXCEPTION_COMMON(0x320, PACA_EXGEN, INTS_KEEP) + EXCEPTION_COMMON(0x320) + addi r3,r1,STACK_FRAME_OVERHEAD + bl save_nvgprs + INTS_RESTORE_HARD + bl unknown_exception + b ret_from_except + +/* LRAT Error interrupt */ + START_EXCEPTION(lrat_error); + NORMAL_EXCEPTION_PROLOG(0x340, BOOKE_INTERRUPT_LRAT_ERROR, + PROLOG_ADDITION_NONE) + EXCEPTION_COMMON(0x340) addi r3,r1,STACK_FRAME_OVERHEAD bl .save_nvgprs INTS_RESTORE_HARD @@ -761,16 +1014,16 @@ storage_fault_common: mr r5,r15 ld r14,PACA_EXGEN+EX_R14(r13) ld r15,PACA_EXGEN+EX_R15(r13) - bl .do_page_fault + bl do_page_fault cmpdi r3,0 bne- 1f - b .ret_from_except_lite -1: bl .save_nvgprs + b ret_from_except_lite +1: bl save_nvgprs mr r5,r3 addi r3,r1,STACK_FRAME_OVERHEAD ld r4,_DAR(r1) - bl .bad_page_fault - b .ret_from_except + bl bad_page_fault + b ret_from_except /* * Alignment exception doesn't fit entirely in the 0x100 bytes so it @@ -782,10 +1035,10 @@ alignment_more: addi r3,r1,STACK_FRAME_OVERHEAD ld r14,PACA_EXGEN+EX_R14(r13) ld r15,PACA_EXGEN+EX_R15(r13) - bl .save_nvgprs + bl save_nvgprs INTS_RESTORE_HARD - bl .alignment_exception - b .ret_from_except + bl alignment_exception + b ret_from_except /* * We branch here from entry_64.S for the last stage of the exception @@ -857,6 +1110,7 @@ BAD_STACK_TRAMPOLINE(0x2e0) BAD_STACK_TRAMPOLINE(0x300) BAD_STACK_TRAMPOLINE(0x310) BAD_STACK_TRAMPOLINE(0x320) +BAD_STACK_TRAMPOLINE(0x340) BAD_STACK_TRAMPOLINE(0x400) BAD_STACK_TRAMPOLINE(0x500) BAD_STACK_TRAMPOLINE(0x600) @@ -918,7 +1172,7 @@ bad_stack_book3e: std r12,0(r11) ld r2,PACATOC(r13) 1: addi r3,r1,STACK_FRAME_OVERHEAD - bl .kernel_bad_stack + bl kernel_bad_stack b 1b /* @@ -1053,12 +1307,9 @@ skpinv: addi r6,r6,1 /* Increment */ mtspr SPRN_MAS0,r3 tlbre mfspr r6,SPRN_MAS1 - rlwinm r6,r6,0,2,0 /* clear IPROT */ + rlwinm r6,r6,0,2,31 /* clear IPROT and VALID */ mtspr SPRN_MAS1,r6 tlbwe - - /* Invalidate TLB1 */ - PPC_TLBILX_ALL(0,R0) sync isync @@ -1112,12 +1363,9 @@ skpinv: addi r6,r6,1 /* Increment */ mtspr SPRN_MAS0,r4 tlbre mfspr r5,SPRN_MAS1 - rlwinm r5,r5,0,2,0 /* clear IPROT */ + rlwinm r5,r5,0,2,31 /* clear IPROT and VALID */ mtspr SPRN_MAS1,r5 tlbwe - - /* Invalidate TLB1 */ - PPC_TLBILX_ALL(0,R0) sync isync @@ -1219,22 +1467,6 @@ a2_tlbinit_after_linear_map: .globl a2_tlbinit_after_iprot_flush a2_tlbinit_after_iprot_flush: -#ifdef CONFIG_PPC_EARLY_DEBUG_WSP - /* Now establish early debug mappings if applicable */ - /* Restore the MAS0 we used for linear mapping load */ - mtspr SPRN_MAS0,r11 - - lis r3,(MAS1_VALID | MAS1_IPROT)@h - ori r3,r3,(BOOK3E_PAGESZ_4K << MAS1_TSIZE_SHIFT) - mtspr SPRN_MAS1,r3 - LOAD_REG_IMMEDIATE(r3, WSP_UART_VIRT | MAS2_I | MAS2_G) - mtspr SPRN_MAS2,r3 - LOAD_REG_IMMEDIATE(r3, WSP_UART_PHYS | MAS3_SR | MAS3_SW) - mtspr SPRN_MAS7_MAS3,r3 - /* re-use the MAS8 value from the linear mapping */ - tlbwe -#endif /* CONFIG_PPC_EARLY_DEBUG_WSP */ - PPC_TLBILX(0,0,R0) sync isync @@ -1273,13 +1505,13 @@ _GLOBAL(start_initialization_book3e) * and always use AS 0, so we just set it up to match our link * address and never use 0 based addresses. */ - bl .initial_tlb_book3e + bl initial_tlb_book3e /* Init global core bits */ - bl .init_core_book3e + bl init_core_book3e /* Init per-thread bits */ - bl .init_thread_book3e + bl init_thread_book3e /* Return to common init code */ tovirt(r28,r28) @@ -1300,7 +1532,7 @@ _GLOBAL(start_initialization_book3e) */ _GLOBAL(book3e_secondary_core_init_tlb_set) li r4,1 - b .generic_secondary_smp_init + b generic_secondary_smp_init _GLOBAL(book3e_secondary_core_init) mflr r28 @@ -1310,18 +1542,18 @@ _GLOBAL(book3e_secondary_core_init) bne 2f /* Setup TLB for this core */ - bl .initial_tlb_book3e + bl initial_tlb_book3e /* We can return from the above running at a different * address, so recalculate r2 (TOC) */ - bl .relative_toc + bl relative_toc /* Init global core bits */ -2: bl .init_core_book3e +2: bl init_core_book3e /* Init per-thread bits */ -3: bl .init_thread_book3e +3: bl init_thread_book3e /* Return to common init code at proper virtual address. * @@ -1348,14 +1580,14 @@ _GLOBAL(book3e_secondary_thread_init) mflr r28 b 3b -_STATIC(init_core_book3e) +init_core_book3e: /* Establish the interrupt vector base */ LOAD_REG_IMMEDIATE(r3, interrupt_base_book3e) mtspr SPRN_IVPR,r3 sync blr -_STATIC(init_thread_book3e) +init_thread_book3e: lis r3,(SPRN_EPCR_ICM | SPRN_EPCR_GICM)@h mtspr SPRN_EPCR,r3 @@ -1412,3 +1644,7 @@ _GLOBAL(setup_ehv_ivors) SET_IVOR(38, 0x2c0) /* Guest Processor Doorbell */ SET_IVOR(39, 0x2e0) /* Guest Processor Doorbell Crit/MC */ blr + +_GLOBAL(setup_lrat_ivor) + SET_IVOR(42, 0x340) /* LRAT Error */ + blr diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 3a9ed6ac224..a7d36b19221 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -54,14 +54,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \ xori r12,r12,MSR_LE ; \ mtspr SPRN_SRR1,r12 ; \ rfid ; /* return to userspace */ \ - b . ; \ -2: mfspr r12,SPRN_SRR1 ; \ - andi. r12,r12,MSR_PR ; \ - bne 0b ; \ - mtspr SPRN_SRR0,r3 ; \ - mtspr SPRN_SRR1,r4 ; \ - mtspr SPRN_SDR1,r5 ; \ - rfid ; \ b . ; /* prevent speculative execution */ #if defined(CONFIG_RELOCATABLE) @@ -121,12 +113,13 @@ BEGIN_FTR_SECTION cmpwi cr1,r13,2 /* Total loss of HV state is fatal, we could try to use the * PIR to locate a PACA, then use an emergency stack etc... - * but for now, let's just stay stuck here + * OPAL v3 based powernv platforms have new idle states + * which fall in this catagory. */ - bgt cr1,. + bgt cr1,8f GET_PACA(r13) -#ifdef CONFIG_KVM_BOOK3S_64_HV +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE li r0,KVM_HWTHREAD_IN_KERNEL stb r0,HSTATE_HWTHREAD_STATE(r13) /* Order setting hwthread_state vs. testing hwthread_req */ @@ -139,8 +132,13 @@ BEGIN_FTR_SECTION #endif beq cr1,2f - b .power7_wakeup_noloss -2: b .power7_wakeup_loss + b power7_wakeup_noloss +2: b power7_wakeup_loss + + /* Fast Sleep wakeup on PowerNV */ +8: GET_PACA(r13) + b power7_wakeup_tb_loss + 9: END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) #endif /* CONFIG_PPC_P7_NAP */ @@ -155,8 +153,35 @@ machine_check_pSeries_1: */ HMT_MEDIUM_PPR_DISCARD SET_SCRATCH0(r13) /* save r13 */ +#ifdef CONFIG_PPC_P7_NAP +BEGIN_FTR_SECTION + /* Running native on arch 2.06 or later, check if we are + * waking up from nap. We only handle no state loss and + * supervisor state loss. We do -not- handle hypervisor + * state loss at this time. + */ + mfspr r13,SPRN_SRR1 + rlwinm. r13,r13,47-31,30,31 + OPT_GET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR) + beq 9f + + mfspr r13,SPRN_SRR1 + rlwinm. r13,r13,47-31,30,31 + /* waking up from powersave (nap) state */ + cmpwi cr1,r13,2 + /* Total loss of HV state is fatal. let's just stay stuck here */ + OPT_GET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR) + bgt cr1,. +9: + OPT_SET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR) +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) +#endif /* CONFIG_PPC_P7_NAP */ EXCEPTION_PROLOG_0(PACA_EXMC) +BEGIN_FTR_SECTION + b machine_check_pSeries_early +FTR_SECTION_ELSE b machine_check_pSeries_0 +ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) . = 0x300 .globl data_access_pSeries @@ -186,16 +211,16 @@ data_access_slb_pSeries: #endif /* __DISABLED__ */ mfspr r12,SPRN_SRR1 #ifndef CONFIG_RELOCATABLE - b .slb_miss_realmode + b slb_miss_realmode #else /* - * We can't just use a direct branch to .slb_miss_realmode + * We can't just use a direct branch to slb_miss_realmode * because the distance from here to there depends on where * the kernel ends up being put. */ mfctr r11 ld r10,PACAKBASE(r13) - LOAD_HANDLER(r10, .slb_miss_realmode) + LOAD_HANDLER(r10, slb_miss_realmode) mtctr r10 bctr #endif @@ -218,11 +243,11 @@ instruction_access_slb_pSeries: #endif /* __DISABLED__ */ mfspr r12,SPRN_SRR1 #ifndef CONFIG_RELOCATABLE - b .slb_miss_realmode + b slb_miss_realmode #else mfctr r11 ld r10,PACAKBASE(r13) - LOAD_HANDLER(r10, .slb_miss_realmode) + LOAD_HANDLER(r10, slb_miss_realmode) mtctr r10 bctr #endif @@ -405,6 +430,80 @@ denorm_exception_hv: .align 7 /* moved from 0x200 */ +machine_check_pSeries_early: +BEGIN_FTR_SECTION + EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200) + /* + * Register contents: + * R13 = PACA + * R9 = CR + * Original R9 to R13 is saved on PACA_EXMC + * + * Switch to mc_emergency stack and handle re-entrancy (we limit + * the nested MCE upto level 4 to avoid stack overflow). + * Save MCE registers srr1, srr0, dar and dsisr and then set ME=1 + * + * We use paca->in_mce to check whether this is the first entry or + * nested machine check. We increment paca->in_mce to track nested + * machine checks. + * + * If this is the first entry then set stack pointer to + * paca->mc_emergency_sp, otherwise r1 is already pointing to + * stack frame on mc_emergency stack. + * + * NOTE: We are here with MSR_ME=0 (off), which means we risk a + * checkstop if we get another machine check exception before we do + * rfid with MSR_ME=1. + */ + mr r11,r1 /* Save r1 */ + lhz r10,PACA_IN_MCE(r13) + cmpwi r10,0 /* Are we in nested machine check */ + bne 0f /* Yes, we are. */ + /* First machine check entry */ + ld r1,PACAMCEMERGSP(r13) /* Use MC emergency stack */ +0: subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */ + addi r10,r10,1 /* increment paca->in_mce */ + sth r10,PACA_IN_MCE(r13) + /* Limit nested MCE to level 4 to avoid stack overflow */ + cmpwi r10,4 + bgt 2f /* Check if we hit limit of 4 */ + std r11,GPR1(r1) /* Save r1 on the stack. */ + std r11,0(r1) /* make stack chain pointer */ + mfspr r11,SPRN_SRR0 /* Save SRR0 */ + std r11,_NIP(r1) + mfspr r11,SPRN_SRR1 /* Save SRR1 */ + std r11,_MSR(r1) + mfspr r11,SPRN_DAR /* Save DAR */ + std r11,_DAR(r1) + mfspr r11,SPRN_DSISR /* Save DSISR */ + std r11,_DSISR(r1) + std r9,_CCR(r1) /* Save CR in stackframe */ + /* Save r9 through r13 from EXMC save area to stack frame. */ + EXCEPTION_PROLOG_COMMON_2(PACA_EXMC) + mfmsr r11 /* get MSR value */ + ori r11,r11,MSR_ME /* turn on ME bit */ + ori r11,r11,MSR_RI /* turn on RI bit */ + ld r12,PACAKBASE(r13) /* get high part of &label */ + LOAD_HANDLER(r12, machine_check_handle_early) +1: mtspr SPRN_SRR0,r12 + mtspr SPRN_SRR1,r11 + rfid + b . /* prevent speculative execution */ +2: + /* Stack overflow. Stay on emergency stack and panic. + * Keep the ME bit off while panic-ing, so that if we hit + * another machine check we checkstop. + */ + addi r1,r1,INT_FRAME_SIZE /* go back to previous stack frame */ + ld r11,PACAKMSR(r13) + ld r12,PACAKBASE(r13) + LOAD_HANDLER(r12, unrecover_mce) + li r10,MSR_ME + andc r11,r11,r10 /* Turn off MSR_ME */ + b 1b + b . /* prevent speculative execution */ +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) + machine_check_pSeries: .globl machine_check_fwnmi machine_check_fwnmi: @@ -425,7 +524,7 @@ data_access_check_stab: mfspr r9,SPRN_DSISR srdi r10,r10,60 rlwimi r10,r9,16,0x20 -#ifdef CONFIG_KVM_BOOK3S_PR +#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE lbz r9,HSTATE_IN_GUEST(r13) rlwimi r10,r9,8,0x300 #endif @@ -441,7 +540,7 @@ do_stab_bolted_pSeries: std r12,PACA_EXSLB+EX_R12(r13) GET_SCRATCH0(r10) std r10,PACA_EXSLB+EX_R13(r13) - EXCEPTION_PROLOG_PSERIES_1(.do_stab_bolted, EXC_STD) + EXCEPTION_PROLOG_PSERIES_1(do_stab_bolted, EXC_STD) KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x300) KVM_HANDLER_SKIP(PACA_EXSLB, EXC_STD, 0x380) @@ -650,6 +749,32 @@ slb_miss_user_pseries: b . /* prevent spec. execution */ #endif /* __DISABLED__ */ +#ifdef CONFIG_KVM_BOOK3S_64_HANDLER +kvmppc_skip_interrupt: + /* + * Here all GPRs are unchanged from when the interrupt happened + * except for r13, which is saved in SPRG_SCRATCH0. + */ + mfspr r13, SPRN_SRR0 + addi r13, r13, 4 + mtspr SPRN_SRR0, r13 + GET_SCRATCH0(r13) + rfid + b . + +kvmppc_skip_Hinterrupt: + /* + * Here all GPRs are unchanged from when the interrupt happened + * except for r13, which is saved in SPRG_SCRATCH0. + */ + mfspr r13, SPRN_HSRR0 + addi r13, r13, 4 + mtspr SPRN_HSRR0, r13 + GET_SCRATCH0(r13) + hrfid + b . +#endif + /* * Code from here down to __end_handlers is invoked from the * exception prologs above. Because the prologs assemble the @@ -660,62 +785,38 @@ slb_miss_user_pseries: /*** Common interrupt handlers ***/ - STD_EXCEPTION_COMMON(0x100, system_reset, .system_reset_exception) - - /* - * Machine check is different because we use a different - * save area: PACA_EXMC instead of PACA_EXGEN. - */ - .align 7 - .globl machine_check_common -machine_check_common: - - mfspr r10,SPRN_DAR - std r10,PACA_EXGEN+EX_DAR(r13) - mfspr r10,SPRN_DSISR - stw r10,PACA_EXGEN+EX_DSISR(r13) - EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC) - FINISH_NAP - DISABLE_INTS - ld r3,PACA_EXGEN+EX_DAR(r13) - lwz r4,PACA_EXGEN+EX_DSISR(r13) - std r3,_DAR(r1) - std r4,_DSISR(r1) - bl .save_nvgprs - addi r3,r1,STACK_FRAME_OVERHEAD - bl .machine_check_exception - b .ret_from_except + STD_EXCEPTION_COMMON(0x100, system_reset, system_reset_exception) STD_EXCEPTION_COMMON_ASYNC(0x500, hardware_interrupt, do_IRQ) - STD_EXCEPTION_COMMON_ASYNC(0x900, decrementer, .timer_interrupt) - STD_EXCEPTION_COMMON(0x980, hdecrementer, .hdec_interrupt) + STD_EXCEPTION_COMMON_ASYNC(0x900, decrementer, timer_interrupt) + STD_EXCEPTION_COMMON(0x980, hdecrementer, hdec_interrupt) #ifdef CONFIG_PPC_DOORBELL - STD_EXCEPTION_COMMON_ASYNC(0xa00, doorbell_super, .doorbell_exception) + STD_EXCEPTION_COMMON_ASYNC(0xa00, doorbell_super, doorbell_exception) #else - STD_EXCEPTION_COMMON_ASYNC(0xa00, doorbell_super, .unknown_exception) + STD_EXCEPTION_COMMON_ASYNC(0xa00, doorbell_super, unknown_exception) #endif - STD_EXCEPTION_COMMON(0xb00, trap_0b, .unknown_exception) - STD_EXCEPTION_COMMON(0xd00, single_step, .single_step_exception) - STD_EXCEPTION_COMMON(0xe00, trap_0e, .unknown_exception) - STD_EXCEPTION_COMMON(0xe40, emulation_assist, .emulation_assist_interrupt) - STD_EXCEPTION_COMMON(0xe60, hmi_exception, .unknown_exception) + STD_EXCEPTION_COMMON(0xb00, trap_0b, unknown_exception) + STD_EXCEPTION_COMMON(0xd00, single_step, single_step_exception) + STD_EXCEPTION_COMMON(0xe00, trap_0e, unknown_exception) + STD_EXCEPTION_COMMON(0xe40, emulation_assist, emulation_assist_interrupt) + STD_EXCEPTION_COMMON(0xe60, hmi_exception, unknown_exception) #ifdef CONFIG_PPC_DOORBELL - STD_EXCEPTION_COMMON_ASYNC(0xe80, h_doorbell, .doorbell_exception) + STD_EXCEPTION_COMMON_ASYNC(0xe80, h_doorbell, doorbell_exception) #else - STD_EXCEPTION_COMMON_ASYNC(0xe80, h_doorbell, .unknown_exception) + STD_EXCEPTION_COMMON_ASYNC(0xe80, h_doorbell, unknown_exception) #endif - STD_EXCEPTION_COMMON_ASYNC(0xf00, performance_monitor, .performance_monitor_exception) - STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, .instruction_breakpoint_exception) - STD_EXCEPTION_COMMON(0x1502, denorm, .unknown_exception) + STD_EXCEPTION_COMMON_ASYNC(0xf00, performance_monitor, performance_monitor_exception) + STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, instruction_breakpoint_exception) + STD_EXCEPTION_COMMON(0x1502, denorm, unknown_exception) #ifdef CONFIG_ALTIVEC - STD_EXCEPTION_COMMON(0x1700, altivec_assist, .altivec_assist_exception) + STD_EXCEPTION_COMMON(0x1700, altivec_assist, altivec_assist_exception) #else - STD_EXCEPTION_COMMON(0x1700, altivec_assist, .unknown_exception) + STD_EXCEPTION_COMMON(0x1700, altivec_assist, unknown_exception) #endif #ifdef CONFIG_CBE_RAS - STD_EXCEPTION_COMMON(0x1200, cbe_system_error, .cbe_system_error_exception) - STD_EXCEPTION_COMMON(0x1600, cbe_maintenance, .cbe_maintenance_exception) - STD_EXCEPTION_COMMON(0x1800, cbe_thermal, .cbe_thermal_exception) + STD_EXCEPTION_COMMON(0x1200, cbe_system_error, cbe_system_error_exception) + STD_EXCEPTION_COMMON(0x1600, cbe_maintenance, cbe_maintenance_exception) + STD_EXCEPTION_COMMON(0x1800, cbe_thermal, cbe_thermal_exception) #endif /* CONFIG_CBE_RAS */ /* @@ -744,16 +845,16 @@ data_access_slb_relon_pSeries: mfspr r3,SPRN_DAR mfspr r12,SPRN_SRR1 #ifndef CONFIG_RELOCATABLE - b .slb_miss_realmode + b slb_miss_realmode #else /* - * We can't just use a direct branch to .slb_miss_realmode + * We can't just use a direct branch to slb_miss_realmode * because the distance from here to there depends on where * the kernel ends up being put. */ mfctr r11 ld r10,PACAKBASE(r13) - LOAD_HANDLER(r10, .slb_miss_realmode) + LOAD_HANDLER(r10, slb_miss_realmode) mtctr r10 bctr #endif @@ -769,11 +870,11 @@ instruction_access_slb_relon_pSeries: mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */ mfspr r12,SPRN_SRR1 #ifndef CONFIG_RELOCATABLE - b .slb_miss_realmode + b slb_miss_realmode #else mfctr r11 ld r10,PACAKBASE(r13) - LOAD_HANDLER(r10, .slb_miss_realmode) + LOAD_HANDLER(r10, slb_miss_realmode) mtctr r10 bctr #endif @@ -881,7 +982,7 @@ system_call_entry: b system_call_common ppc64_runlatch_on_trampoline: - b .__ppc64_runlatch_on + b __ppc64_runlatch_on /* * Here we have detected that the kernel stack pointer is bad. @@ -940,7 +1041,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) std r12,RESULT(r1) std r11,STACK_FRAME_OVERHEAD-16(r1) 1: addi r3,r1,STACK_FRAME_OVERHEAD - bl .kernel_bad_stack + bl kernel_bad_stack b 1b /* @@ -961,7 +1062,7 @@ data_access_common: ld r3,PACA_EXGEN+EX_DAR(r13) lwz r4,PACA_EXGEN+EX_DSISR(r13) li r5,0x300 - b .do_hash_page /* Try to handle as hpte fault */ + b do_hash_page /* Try to handle as hpte fault */ .align 7 .globl h_data_storage_common @@ -971,11 +1072,11 @@ h_data_storage_common: mfspr r10,SPRN_HDSISR stw r10,PACA_EXGEN+EX_DSISR(r13) EXCEPTION_PROLOG_COMMON(0xe00, PACA_EXGEN) - bl .save_nvgprs + bl save_nvgprs DISABLE_INTS addi r3,r1,STACK_FRAME_OVERHEAD - bl .unknown_exception - b .ret_from_except + bl unknown_exception + b ret_from_except .align 7 .globl instruction_access_common @@ -986,9 +1087,9 @@ instruction_access_common: ld r3,_NIP(r1) andis. r4,r12,0x5820 li r5,0x400 - b .do_hash_page /* Try to handle as hpte fault */ + b do_hash_page /* Try to handle as hpte fault */ - STD_EXCEPTION_COMMON(0xe20, h_instr_storage, .unknown_exception) + STD_EXCEPTION_COMMON(0xe20, h_instr_storage, unknown_exception) /* * Here is the common SLB miss user that is used when going to virtual @@ -1003,7 +1104,7 @@ slb_miss_user_common: stw r9,PACA_EXGEN+EX_CCR(r13) std r10,PACA_EXGEN+EX_LR(r13) std r11,PACA_EXGEN+EX_SRR0(r13) - bl .slb_allocate_user + bl slb_allocate_user ld r10,PACA_EXGEN+EX_LR(r13) ld r3,PACA_EXGEN+EX_R3(r13) @@ -1046,14 +1147,38 @@ slb_miss_fault: unrecov_user_slb: EXCEPTION_PROLOG_COMMON(0x4200, PACA_EXGEN) DISABLE_INTS - bl .save_nvgprs + bl save_nvgprs 1: addi r3,r1,STACK_FRAME_OVERHEAD - bl .unrecoverable_exception + bl unrecoverable_exception b 1b #endif /* __DISABLED__ */ + /* + * Machine check is different because we use a different + * save area: PACA_EXMC instead of PACA_EXGEN. + */ + .align 7 + .globl machine_check_common +machine_check_common: + + mfspr r10,SPRN_DAR + std r10,PACA_EXGEN+EX_DAR(r13) + mfspr r10,SPRN_DSISR + stw r10,PACA_EXGEN+EX_DSISR(r13) + EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC) + FINISH_NAP + DISABLE_INTS + ld r3,PACA_EXGEN+EX_DAR(r13) + lwz r4,PACA_EXGEN+EX_DSISR(r13) + std r3,_DAR(r1) + std r4,_DSISR(r1) + bl save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + bl machine_check_exception + b ret_from_except + .align 7 .globl alignment_common alignment_common: @@ -1066,31 +1191,31 @@ alignment_common: lwz r4,PACA_EXGEN+EX_DSISR(r13) std r3,_DAR(r1) std r4,_DSISR(r1) - bl .save_nvgprs + bl save_nvgprs DISABLE_INTS addi r3,r1,STACK_FRAME_OVERHEAD - bl .alignment_exception - b .ret_from_except + bl alignment_exception + b ret_from_except .align 7 .globl program_check_common program_check_common: EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN) - bl .save_nvgprs + bl save_nvgprs DISABLE_INTS addi r3,r1,STACK_FRAME_OVERHEAD - bl .program_check_exception - b .ret_from_except + bl program_check_exception + b ret_from_except .align 7 .globl fp_unavailable_common fp_unavailable_common: EXCEPTION_PROLOG_COMMON(0x800, PACA_EXGEN) bne 1f /* if from user, just load it up */ - bl .save_nvgprs + bl save_nvgprs DISABLE_INTS addi r3,r1,STACK_FRAME_OVERHEAD - bl .kernel_fp_unavailable_exception + bl kernel_fp_unavailable_exception BUG_OPCODE 1: #ifdef CONFIG_PPC_TRANSACTIONAL_MEM @@ -1102,15 +1227,15 @@ BEGIN_FTR_SECTION bne- 2f END_FTR_SECTION_IFSET(CPU_FTR_TM) #endif - bl .load_up_fpu + bl load_up_fpu b fast_exception_return #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ - bl .save_nvgprs + bl save_nvgprs DISABLE_INTS addi r3,r1,STACK_FRAME_OVERHEAD - bl .fp_unavailable_tm - b .ret_from_except + bl fp_unavailable_tm + b ret_from_except #endif .align 7 .globl altivec_unavailable_common @@ -1128,24 +1253,24 @@ BEGIN_FTR_SECTION bne- 2f END_FTR_SECTION_NESTED(CPU_FTR_TM, CPU_FTR_TM, 69) #endif - bl .load_up_altivec + bl load_up_altivec b fast_exception_return #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ - bl .save_nvgprs + bl save_nvgprs DISABLE_INTS addi r3,r1,STACK_FRAME_OVERHEAD - bl .altivec_unavailable_tm - b .ret_from_except + bl altivec_unavailable_tm + b ret_from_except #endif 1: END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif - bl .save_nvgprs + bl save_nvgprs DISABLE_INTS addi r3,r1,STACK_FRAME_OVERHEAD - bl .altivec_unavailable_exception - b .ret_from_except + bl altivec_unavailable_exception + b ret_from_except .align 7 .globl vsx_unavailable_common @@ -1163,26 +1288,26 @@ BEGIN_FTR_SECTION bne- 2f END_FTR_SECTION_NESTED(CPU_FTR_TM, CPU_FTR_TM, 69) #endif - b .load_up_vsx + b load_up_vsx #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ - bl .save_nvgprs + bl save_nvgprs DISABLE_INTS addi r3,r1,STACK_FRAME_OVERHEAD - bl .vsx_unavailable_tm - b .ret_from_except + bl vsx_unavailable_tm + b ret_from_except #endif 1: END_FTR_SECTION_IFSET(CPU_FTR_VSX) #endif - bl .save_nvgprs + bl save_nvgprs DISABLE_INTS addi r3,r1,STACK_FRAME_OVERHEAD - bl .vsx_unavailable_exception - b .ret_from_except + bl vsx_unavailable_exception + b ret_from_except - STD_EXCEPTION_COMMON(0xf60, facility_unavailable, .facility_unavailable_exception) - STD_EXCEPTION_COMMON(0xf80, hv_facility_unavailable, .facility_unavailable_exception) + STD_EXCEPTION_COMMON(0xf60, facility_unavailable, facility_unavailable_exception) + STD_EXCEPTION_COMMON(0xf80, hv_facility_unavailable, facility_unavailable_exception) .align 7 .globl __end_handlers @@ -1237,6 +1362,154 @@ _GLOBAL(opal_mc_secondary_handler) #endif /* CONFIG_PPC_POWERNV */ +#define MACHINE_CHECK_HANDLER_WINDUP \ + /* Clear MSR_RI before setting SRR0 and SRR1. */\ + li r0,MSR_RI; \ + mfmsr r9; /* get MSR value */ \ + andc r9,r9,r0; \ + mtmsrd r9,1; /* Clear MSR_RI */ \ + /* Move original SRR0 and SRR1 into the respective regs */ \ + ld r9,_MSR(r1); \ + mtspr SPRN_SRR1,r9; \ + ld r3,_NIP(r1); \ + mtspr SPRN_SRR0,r3; \ + ld r9,_CTR(r1); \ + mtctr r9; \ + ld r9,_XER(r1); \ + mtxer r9; \ + ld r9,_LINK(r1); \ + mtlr r9; \ + REST_GPR(0, r1); \ + REST_8GPRS(2, r1); \ + REST_GPR(10, r1); \ + ld r11,_CCR(r1); \ + mtcr r11; \ + /* Decrement paca->in_mce. */ \ + lhz r12,PACA_IN_MCE(r13); \ + subi r12,r12,1; \ + sth r12,PACA_IN_MCE(r13); \ + REST_GPR(11, r1); \ + REST_2GPRS(12, r1); \ + /* restore original r1. */ \ + ld r1,GPR1(r1) + + /* + * Handle machine check early in real mode. We come here with + * ME=1, MMU (IR=0 and DR=0) off and using MC emergency stack. + */ + .align 7 + .globl machine_check_handle_early +machine_check_handle_early: + std r0,GPR0(r1) /* Save r0 */ + EXCEPTION_PROLOG_COMMON_3(0x200) + bl save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + bl machine_check_early + std r3,RESULT(r1) /* Save result */ + ld r12,_MSR(r1) +#ifdef CONFIG_PPC_P7_NAP + /* + * Check if thread was in power saving mode. We come here when any + * of the following is true: + * a. thread wasn't in power saving mode + * b. thread was in power saving mode with no state loss or + * supervisor state loss + * + * Go back to nap again if (b) is true. + */ + rlwinm. r11,r12,47-31,30,31 /* Was it in power saving mode? */ + beq 4f /* No, it wasn;t */ + /* Thread was in power saving mode. Go back to nap again. */ + cmpwi r11,2 + bne 3f + /* Supervisor state loss */ + li r0,1 + stb r0,PACA_NAPSTATELOST(r13) +3: bl machine_check_queue_event + MACHINE_CHECK_HANDLER_WINDUP + GET_PACA(r13) + ld r1,PACAR1(r13) + b power7_enter_nap_mode +4: +#endif + /* + * Check if we are coming from hypervisor userspace. If yes then we + * continue in host kernel in V mode to deliver the MC event. + */ + rldicl. r11,r12,4,63 /* See if MC hit while in HV mode. */ + beq 5f + andi. r11,r12,MSR_PR /* See if coming from user. */ + bne 9f /* continue in V mode if we are. */ + +5: +#ifdef CONFIG_KVM_BOOK3S_64_HV + /* + * We are coming from kernel context. Check if we are coming from + * guest. if yes, then we can continue. We will fall through + * do_kvm_200->kvmppc_interrupt to deliver the MC event to guest. + */ + lbz r11,HSTATE_IN_GUEST(r13) + cmpwi r11,0 /* Check if coming from guest */ + bne 9f /* continue if we are. */ +#endif + /* + * At this point we are not sure about what context we come from. + * Queue up the MCE event and return from the interrupt. + * But before that, check if this is an un-recoverable exception. + * If yes, then stay on emergency stack and panic. + */ + andi. r11,r12,MSR_RI + bne 2f +1: mfspr r11,SPRN_SRR0 + ld r10,PACAKBASE(r13) + LOAD_HANDLER(r10,unrecover_mce) + mtspr SPRN_SRR0,r10 + ld r10,PACAKMSR(r13) + /* + * We are going down. But there are chances that we might get hit by + * another MCE during panic path and we may run into unstable state + * with no way out. Hence, turn ME bit off while going down, so that + * when another MCE is hit during panic path, system will checkstop + * and hypervisor will get restarted cleanly by SP. + */ + li r3,MSR_ME + andc r10,r10,r3 /* Turn off MSR_ME */ + mtspr SPRN_SRR1,r10 + rfid + b . +2: + /* + * Check if we have successfully handled/recovered from error, if not + * then stay on emergency stack and panic. + */ + ld r3,RESULT(r1) /* Load result */ + cmpdi r3,0 /* see if we handled MCE successfully */ + + beq 1b /* if !handled then panic */ + /* + * Return from MC interrupt. + * Queue up the MCE event so that we can log it later, while + * returning from kernel or opal call. + */ + bl machine_check_queue_event + MACHINE_CHECK_HANDLER_WINDUP + rfid +9: + /* Deliver the machine check to host kernel in V mode. */ + MACHINE_CHECK_HANDLER_WINDUP + b machine_check_pSeries + +unrecover_mce: + /* Invoke machine_check_exception to print MCE event and panic. */ + addi r3,r1,STACK_FRAME_OVERHEAD + bl machine_check_exception + /* + * We will not reach here. Even if we did, there is no way out. Call + * unrecoverable_exception and die. + */ +1: addi r3,r1,STACK_FRAME_OVERHEAD + bl unrecoverable_exception + b 1b /* * r13 points to the PACA, r9 contains the saved CR, * r12 contain the saved SRR1, SRR0 is still ready for return @@ -1245,7 +1518,7 @@ _GLOBAL(opal_mc_secondary_handler) * r3 is saved in paca->slb_r3 * We assume we aren't going to take any exceptions during this procedure. */ -_GLOBAL(slb_miss_realmode) +slb_miss_realmode: mflr r10 #ifdef CONFIG_RELOCATABLE mtctr r11 @@ -1254,7 +1527,7 @@ _GLOBAL(slb_miss_realmode) stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */ std r10,PACA_EXSLB+EX_LR(r13) /* save LR */ - bl .slb_allocate_realmode + bl slb_allocate_realmode /* All done -- return from exception. */ @@ -1294,9 +1567,9 @@ _GLOBAL(slb_miss_realmode) unrecov_slb: EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB) DISABLE_INTS - bl .save_nvgprs + bl save_nvgprs 1: addi r3,r1,STACK_FRAME_OVERHEAD - bl .unrecoverable_exception + bl unrecoverable_exception b 1b @@ -1313,7 +1586,7 @@ power4_fixup_nap: * Hash table stuff */ .align 7 -_STATIC(do_hash_page) +do_hash_page: std r3,_DAR(r1) std r4,_DSISR(r1) @@ -1350,7 +1623,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB) * * at return r3 = 0 for success, 1 for page fault, negative for error */ - bl .hash_page /* build HPTE if possible */ + bl hash_page /* build HPTE if possible */ cmpdi r3,0 /* see if hash_page succeeded */ /* Success */ @@ -1364,35 +1637,35 @@ handle_page_fault: 11: ld r4,_DAR(r1) ld r5,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD - bl .do_page_fault + bl do_page_fault cmpdi r3,0 beq+ 12f - bl .save_nvgprs + bl save_nvgprs mr r5,r3 addi r3,r1,STACK_FRAME_OVERHEAD lwz r4,_DAR(r1) - bl .bad_page_fault - b .ret_from_except + bl bad_page_fault + b ret_from_except /* We have a data breakpoint exception - handle it */ handle_dabr_fault: - bl .save_nvgprs + bl save_nvgprs ld r4,_DAR(r1) ld r5,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD - bl .do_break -12: b .ret_from_except_lite + bl do_break +12: b ret_from_except_lite /* We have a page fault that hash_page could handle but HV refused * the PTE insertion */ -13: bl .save_nvgprs +13: bl save_nvgprs mr r5,r3 addi r3,r1,STACK_FRAME_OVERHEAD ld r4,_DAR(r1) - bl .low_hash_fault - b .ret_from_except + bl low_hash_fault + b ret_from_except /* * We come here as a result of a DSI at a point where we don't want @@ -1401,16 +1674,16 @@ handle_dabr_fault: * were soft-disabled. We want to invoke the exception handler for * the access, or panic if there isn't a handler. */ -77: bl .save_nvgprs +77: bl save_nvgprs mr r4,r3 addi r3,r1,STACK_FRAME_OVERHEAD li r5,SIGSEGV - bl .bad_page_fault - b .ret_from_except + bl bad_page_fault + b ret_from_except /* here we have a segment miss */ do_ste_alloc: - bl .ste_allocate /* try to insert stab entry */ + bl ste_allocate /* try to insert stab entry */ cmpdi r3,0 bne- handle_page_fault b fast_exception_return @@ -1423,7 +1696,7 @@ do_ste_alloc: * We assume (DAR >> 60) == 0xc. */ .align 7 -_GLOBAL(do_stab_bolted) +do_stab_bolted: stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */ std r11,PACA_EXSLB+EX_SRR0(r13) /* save SRR0 in exc. frame */ mfspr r11,SPRN_DAR /* ea */ diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 2230fd0ca3e..742694c1d85 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -55,9 +55,9 @@ int crash_mem_ranges; int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname, int depth, void *data) { - __be32 *sections; + const __be32 *sections; int i, num_sections; - unsigned long size; + int size; const int *token; if (depth != 1 || strcmp(uname, "rtas") != 0) @@ -69,7 +69,7 @@ int __init early_init_dt_scan_fw_dump(unsigned long node, */ token = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL); if (!token) - return 0; + return 1; fw_dump.fadump_supported = 1; fw_dump.ibm_configure_kernel_dump = *token; @@ -92,7 +92,7 @@ int __init early_init_dt_scan_fw_dump(unsigned long node, &size); if (!sections) - return 0; + return 1; num_sections = size / (3 * sizeof(u32)); @@ -110,6 +110,7 @@ int __init early_init_dt_scan_fw_dump(unsigned long node, break; } } + return 1; } @@ -645,7 +646,7 @@ static int __init fadump_build_cpu_notes(const struct fadump_mem_struct *fdm) } /* Lower 4 bytes of reg_value contains logical cpu id */ cpu = reg_entry->reg_value & FADUMP_CPU_ID_MASK; - if (!cpumask_test_cpu(cpu, &fdh->cpu_online_mask)) { + if (fdh && !cpumask_test_cpu(cpu, &fdh->cpu_online_mask)) { SKIP_TO_NEXT_CPU(reg_entry); continue; } @@ -662,9 +663,11 @@ static int __init fadump_build_cpu_notes(const struct fadump_mem_struct *fdm) } fadump_final_note(note_buf); - pr_debug("Updating elfcore header (%llx) with cpu notes\n", + if (fdh) { + pr_debug("Updating elfcore header (%llx) with cpu notes\n", fdh->elfcorehdr_addr); - fadump_update_elfcore_header((char *)__va(fdh->elfcorehdr_addr)); + fadump_update_elfcore_header((char *)__va(fdh->elfcorehdr_addr)); + } return 0; error_out: diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index caeaabf11a2..9ad236e5d2c 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -35,15 +35,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ 2: REST_32VSRS(n,c,base); \ 3: -#define __REST_32FPVSRS_TRANSACT(n,c,base) \ -BEGIN_FTR_SECTION \ - b 2f; \ -END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ - REST_32FPRS_TRANSACT(n,base); \ - b 3f; \ -2: REST_32VSRS_TRANSACT(n,c,base); \ -3: - #define __SAVE_32FPVSRS(n,c,base) \ BEGIN_FTR_SECTION \ b 2f; \ @@ -54,40 +45,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ 3: #else #define __REST_32FPVSRS(n,b,base) REST_32FPRS(n, base) -#define __REST_32FPVSRS_TRANSACT(n,b,base) REST_32FPRS(n, base) #define __SAVE_32FPVSRS(n,b,base) SAVE_32FPRS(n, base) #endif #define REST_32FPVSRS(n,c,base) __REST_32FPVSRS(n,__REG_##c,__REG_##base) -#define REST_32FPVSRS_TRANSACT(n,c,base) \ - __REST_32FPVSRS_TRANSACT(n,__REG_##c,__REG_##base) #define SAVE_32FPVSRS(n,c,base) __SAVE_32FPVSRS(n,__REG_##c,__REG_##base) #ifdef CONFIG_PPC_TRANSACTIONAL_MEM -/* - * Wrapper to call load_up_fpu from C. - * void do_load_up_fpu(struct pt_regs *regs); - */ -_GLOBAL(do_load_up_fpu) - mflr r0 - std r0, 16(r1) - stdu r1, -112(r1) - - subi r6, r3, STACK_FRAME_OVERHEAD - /* load_up_fpu expects r12=MSR, r13=PACA, and returns - * with r12 = new MSR. - */ - ld r12,_MSR(r6) - GET_PACA(r13) - - bl load_up_fpu - std r12,_MSR(r6) - - ld r0, 112+16(r1) - addi r1, r1, 112 - mtlr r0 - blr - - /* void do_load_up_transact_fpu(struct thread_struct *thread) * * This is similar to load_up_fpu but for the transactional version of the FP @@ -105,9 +68,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) SYNC MTMSRD(r5) - lfd fr0,THREAD_TRANSACT_FPSCR(r3) + addi r7,r3,THREAD_TRANSACT_FPSTATE + lfd fr0,FPSTATE_FPSCR(r7) MTFSF_L(fr0) - REST_32FPVSRS_TRANSACT(0, R4, R3) + REST_32FPVSRS(0, R4, R7) /* FP/VSX off again */ MTMSRD(r6) @@ -117,11 +81,49 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ /* + * Enable use of the FPU, and VSX if possible, for the caller. + */ +_GLOBAL(fp_enable) + mfmsr r3 + ori r3,r3,MSR_FP +#ifdef CONFIG_VSX +BEGIN_FTR_SECTION + oris r3,r3,MSR_VSX@h +END_FTR_SECTION_IFSET(CPU_FTR_VSX) +#endif + SYNC + MTMSRD(r3) + isync /* (not necessary for arch 2.02 and later) */ + blr + +/* + * Load state from memory into FP registers including FPSCR. + * Assumes the caller has enabled FP in the MSR. + */ +_GLOBAL(load_fp_state) + lfd fr0,FPSTATE_FPSCR(r3) + MTFSF_L(fr0) + REST_32FPVSRS(0, R4, R3) + blr + +/* + * Store FP state into memory, including FPSCR + * Assumes the caller has enabled FP in the MSR. + */ +_GLOBAL(store_fp_state) + SAVE_32FPVSRS(0, R4, R3) + mffs fr0 + stfd fr0,FPSTATE_FPSCR(r3) + blr + +/* * This task wants to use the FPU now. * On UP, disable FP for the task which had the FPU previously, * and save its floating-point registers in its thread_struct. * Load up this task's FP registers from its thread_struct, * enable the FPU for the current task and return to the task. + * Note that on 32-bit this can only use registers that will be + * restored by fast_exception_return, i.e. r3 - r6, r10 and r11. */ _GLOBAL(load_up_fpu) mfmsr r5 @@ -147,9 +149,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) beq 1f toreal(r4) addi r4,r4,THREAD /* want last_task_used_math->thread */ - SAVE_32FPVSRS(0, R5, R4) + addi r10,r4,THREAD_FPSTATE + SAVE_32FPVSRS(0, R5, R10) mffs fr0 - stfd fr0,THREAD_FPSCR(r4) + stfd fr0,FPSTATE_FPSCR(r10) PPC_LL r5,PT_REGS(r4) toreal(r5) PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5) @@ -160,7 +163,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) #endif /* CONFIG_SMP */ /* enable use of FP after return */ #ifdef CONFIG_PPC32 - mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */ + mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */ lwz r4,THREAD_FPEXC_MODE(r5) ori r9,r9,MSR_FP /* enable FP for current */ or r9,r9,r4 @@ -172,9 +175,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) or r12,r12,r4 std r12,_MSR(r1) #endif - lfd fr0,THREAD_FPSCR(r5) + addi r10,r5,THREAD_FPSTATE + lfd fr0,FPSTATE_FPSCR(r10) MTFSF_L(fr0) - REST_32FPVSRS(0, R4, R5) + REST_32FPVSRS(0, R4, R10) #ifndef CONFIG_SMP subi r4,r5,THREAD fromreal(r4) @@ -206,11 +210,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) PPC_LCMPI 0,r3,0 beqlr- /* if no previous owner, done */ addi r3,r3,THREAD /* want THREAD of task */ + PPC_LL r6,THREAD_FPSAVEAREA(r3) PPC_LL r5,PT_REGS(r3) - PPC_LCMPI 0,r5,0 - SAVE_32FPVSRS(0, R4 ,R3) + PPC_LCMPI 0,r6,0 + bne 2f + addi r6,r3,THREAD_FPSTATE +2: PPC_LCMPI 0,r5,0 + SAVE_32FPVSRS(0, R4, R6) mffs fr0 - stfd fr0,THREAD_FPSCR(r3) + stfd fr0,FPSTATE_FPSCR(r6) beq 1f PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5) li r3,MSR_FP|MSR_FE0|MSR_FE1 diff --git a/arch/powerpc/kernel/fsl_booke_entry_mapping.S b/arch/powerpc/kernel/fsl_booke_entry_mapping.S index a92c79be272..f22e7e44fbf 100644 --- a/arch/powerpc/kernel/fsl_booke_entry_mapping.S +++ b/arch/powerpc/kernel/fsl_booke_entry_mapping.S @@ -176,6 +176,8 @@ skpinv: addi r6,r6,1 /* Increment */ /* 7. Jump to KERNELBASE mapping */ lis r6,(KERNELBASE & ~0xfff)@h ori r6,r6,(KERNELBASE & ~0xfff)@l + rlwinm r7,r25,0,0x03ffffff + add r6,r7,r6 #elif defined(ENTRY_MAPPING_KEXEC_SETUP) /* diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index 1fb78561096..d178834fe50 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -10,6 +10,8 @@ * */ +#define pr_fmt(fmt) "ftrace-powerpc: " fmt + #include <linux/spinlock.h> #include <linux/hardirq.h> #include <linux/uaccess.h> @@ -74,6 +76,7 @@ ftrace_modify_code(unsigned long ip, unsigned int old, unsigned int new) */ static int test_24bit_addr(unsigned long ip, unsigned long addr) { + addr = ppc_function_entry((void *)addr); /* use the create_branch to verify that this offset can be branched */ return create_branch((unsigned int *)ip, addr, 0); @@ -104,11 +107,9 @@ __ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { unsigned int op; - unsigned int jmp[5]; - unsigned long ptr; + unsigned long entry, ptr; unsigned long ip = rec->ip; - unsigned long tramp; - int offset; + void *tramp; /* read where this goes */ if (probe_kernel_read(&op, (void *)ip, sizeof(int))) @@ -116,97 +117,46 @@ __ftrace_make_nop(struct module *mod, /* Make sure that that this is still a 24bit jump */ if (!is_bl_op(op)) { - printk(KERN_ERR "Not expected bl: opcode is %x\n", op); + pr_err("Not expected bl: opcode is %x\n", op); return -EINVAL; } /* lets find where the pointer goes */ - tramp = find_bl_target(ip, op); - - /* - * On PPC64 the trampoline looks like: - * 0x3d, 0x82, 0x00, 0x00, addis r12,r2, <high> - * 0x39, 0x8c, 0x00, 0x00, addi r12,r12, <low> - * Where the bytes 2,3,6 and 7 make up the 32bit offset - * to the TOC that holds the pointer. - * to jump to. - * 0xf8, 0x41, 0x00, 0x28, std r2,40(r1) - * 0xe9, 0x6c, 0x00, 0x20, ld r11,32(r12) - * The actually address is 32 bytes from the offset - * into the TOC. - * 0xe8, 0x4c, 0x00, 0x28, ld r2,40(r12) - */ - - pr_devel("ip:%lx jumps to %lx r2: %lx", ip, tramp, mod->arch.toc); + tramp = (void *)find_bl_target(ip, op); - /* Find where the trampoline jumps to */ - if (probe_kernel_read(jmp, (void *)tramp, sizeof(jmp))) { - printk(KERN_ERR "Failed to read %lx\n", tramp); - return -EFAULT; - } + pr_devel("ip:%lx jumps to %p", ip, tramp); - pr_devel(" %08x %08x", jmp[0], jmp[1]); - - /* verify that this is what we expect it to be */ - if (((jmp[0] & 0xffff0000) != 0x3d820000) || - ((jmp[1] & 0xffff0000) != 0x398c0000) || - (jmp[2] != 0xf8410028) || - (jmp[3] != 0xe96c0020) || - (jmp[4] != 0xe84c0028)) { - printk(KERN_ERR "Not a trampoline\n"); + if (!is_module_trampoline(tramp)) { + pr_err("Not a trampoline\n"); return -EINVAL; } - /* The bottom half is signed extended */ - offset = ((unsigned)((unsigned short)jmp[0]) << 16) + - (int)((short)jmp[1]); - - pr_devel(" %x ", offset); - - /* get the address this jumps too */ - tramp = mod->arch.toc + offset + 32; - pr_devel("toc: %lx", tramp); - - if (probe_kernel_read(jmp, (void *)tramp, 8)) { - printk(KERN_ERR "Failed to read %lx\n", tramp); + if (module_trampoline_target(mod, tramp, &ptr)) { + pr_err("Failed to get trampoline target\n"); return -EFAULT; } - pr_devel(" %08x %08x\n", jmp[0], jmp[1]); - - ptr = ((unsigned long)jmp[0] << 32) + jmp[1]; + pr_devel("trampoline target %lx", ptr); + entry = ppc_global_function_entry((void *)addr); /* This should match what was called */ - if (ptr != ppc_function_entry((void *)addr)) { - printk(KERN_ERR "addr does not match %lx\n", ptr); + if (ptr != entry) { + pr_err("addr %lx does not match expected %lx\n", ptr, entry); return -EINVAL; } /* - * We want to nop the line, but the next line is - * 0xe8, 0x41, 0x00, 0x28 ld r2,40(r1) - * This needs to be turned to a nop too. - */ - if (probe_kernel_read(&op, (void *)(ip+4), MCOUNT_INSN_SIZE)) - return -EFAULT; - - if (op != 0xe8410028) { - printk(KERN_ERR "Next line is not ld! (%08x)\n", op); - return -EINVAL; - } - - /* - * Milton Miller pointed out that we can not blindly do nops. - * If a task was preempted when calling a trace function, - * the nops will remove the way to restore the TOC in r2 - * and the r2 TOC will get corrupted. - */ - - /* - * Replace: - * bl <tramp> <==== will be replaced with "b 1f" - * ld r2,40(r1) - * 1: + * Our original call site looks like: + * + * bl <tramp> + * ld r2,XX(r1) + * + * Milton Miller pointed out that we can not simply nop the branch. + * If a task was preempted when calling a trace function, the nops + * will remove the way to restore the TOC in r2 and the r2 TOC will + * get corrupted. + * + * Use a b +8 to jump over the load. */ op = 0x48000008; /* b +8 */ @@ -231,7 +181,7 @@ __ftrace_make_nop(struct module *mod, /* Make sure that that this is still a 24bit jump */ if (!is_bl_op(op)) { - printk(KERN_ERR "Not expected bl: opcode is %x\n", op); + pr_err("Not expected bl: opcode is %x\n", op); return -EINVAL; } @@ -250,7 +200,7 @@ __ftrace_make_nop(struct module *mod, /* Find where the trampoline jumps to */ if (probe_kernel_read(jmp, (void *)tramp, sizeof(jmp))) { - printk(KERN_ERR "Failed to read %lx\n", tramp); + pr_err("Failed to read %lx\n", tramp); return -EFAULT; } @@ -261,7 +211,7 @@ __ftrace_make_nop(struct module *mod, ((jmp[1] & 0xffff0000) != 0x398c0000) || (jmp[2] != 0x7d8903a6) || (jmp[3] != 0x4e800420)) { - printk(KERN_ERR "Not a trampoline\n"); + pr_err("Not a trampoline\n"); return -EINVAL; } @@ -273,8 +223,7 @@ __ftrace_make_nop(struct module *mod, pr_devel(" %lx ", tramp); if (tramp != addr) { - printk(KERN_ERR - "Trampoline location %08lx does not match addr\n", + pr_err("Trampoline location %08lx does not match addr\n", tramp); return -EINVAL; } @@ -315,15 +264,13 @@ int ftrace_make_nop(struct module *mod, */ if (!rec->arch.mod) { if (!mod) { - printk(KERN_ERR "No module loaded addr=%lx\n", - addr); + pr_err("No module loaded addr=%lx\n", addr); return -EFAULT; } rec->arch.mod = mod; } else if (mod) { if (mod != rec->arch.mod) { - printk(KERN_ERR - "Record mod %p not equal to passed in mod %p\n", + pr_err("Record mod %p not equal to passed in mod %p\n", rec->arch.mod, mod); return -EINVAL; } @@ -344,45 +291,42 @@ static int __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { unsigned int op[2]; - unsigned long ip = rec->ip; + void *ip = (void *)rec->ip; /* read where this goes */ - if (probe_kernel_read(op, (void *)ip, MCOUNT_INSN_SIZE * 2)) + if (probe_kernel_read(op, ip, sizeof(op))) return -EFAULT; /* - * It should be pointing to two nops or - * b +8; ld r2,40(r1) + * We expect to see: + * + * b +8 + * ld r2,XX(r1) + * + * The load offset is different depending on the ABI. For simplicity + * just mask it out when doing the compare. */ - if (((op[0] != 0x48000008) || (op[1] != 0xe8410028)) && - ((op[0] != PPC_INST_NOP) || (op[1] != PPC_INST_NOP))) { - printk(KERN_ERR "Expected NOPs but have %x %x\n", op[0], op[1]); + if ((op[0] != 0x48000008) || ((op[1] & 0xffff0000) != 0xe8410000)) { + pr_err("Unexpected call sequence: %x %x\n", op[0], op[1]); return -EINVAL; } /* If we never set up a trampoline to ftrace_caller, then bail */ if (!rec->arch.mod->arch.tramp) { - printk(KERN_ERR "No ftrace trampoline\n"); + pr_err("No ftrace trampoline\n"); return -EINVAL; } - /* create the branch to the trampoline */ - op[0] = create_branch((unsigned int *)ip, - rec->arch.mod->arch.tramp, BRANCH_SET_LINK); - if (!op[0]) { - printk(KERN_ERR "REL24 out of range!\n"); + /* Ensure branch is within 24 bits */ + if (!create_branch(ip, rec->arch.mod->arch.tramp, BRANCH_SET_LINK)) { + pr_err("Branch out of range\n"); return -EINVAL; } - /* ld r2,40(r1) */ - op[1] = 0xe8410028; - - pr_devel("write to %lx\n", rec->ip); - - if (probe_kernel_write((void *)ip, op, MCOUNT_INSN_SIZE * 2)) - return -EPERM; - - flush_icache_range(ip, ip + 8); + if (patch_branch(ip, rec->arch.mod->arch.tramp, BRANCH_SET_LINK)) { + pr_err("REL24 out of range!\n"); + return -EINVAL; + } return 0; } @@ -399,13 +343,13 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) /* It should be pointing to a nop */ if (op != PPC_INST_NOP) { - printk(KERN_ERR "Expected NOP but have %x\n", op); + pr_err("Expected NOP but have %x\n", op); return -EINVAL; } /* If we never set up a trampoline to ftrace_caller, then bail */ if (!rec->arch.mod->arch.tramp) { - printk(KERN_ERR "No ftrace trampoline\n"); + pr_err("No ftrace trampoline\n"); return -EINVAL; } @@ -413,7 +357,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) op = create_branch((unsigned int *)ip, rec->arch.mod->arch.tramp, BRANCH_SET_LINK); if (!op) { - printk(KERN_ERR "REL24 out of range!\n"); + pr_err("REL24 out of range!\n"); return -EINVAL; } @@ -451,7 +395,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) * already have a module defined. */ if (!rec->arch.mod) { - printk(KERN_ERR "No module loaded\n"); + pr_err("No module loaded\n"); return -EINVAL; } @@ -527,13 +471,8 @@ void arch_ftrace_update_code(int command) ftrace_disable_ftrace_graph_caller(); } -int __init ftrace_dyn_arch_init(void *data) +int __init ftrace_dyn_arch_init(void) { - /* caller expects data to be zero */ - unsigned long *p = data; - - *p = 0; - return 0; } #endif /* CONFIG_DYNAMIC_FTRACE */ diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 67ee0d6c107..7d7d8635227 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -930,25 +930,6 @@ initial_mmu: tlbwe r4,r0,TLB_DATA /* Load the data portion of the entry */ tlbwe r3,r0,TLB_TAG /* Load the tag portion of the entry */ -#if defined(CONFIG_SERIAL_TEXT_DEBUG) && defined(SERIAL_DEBUG_IO_BASE) - - /* Load a TLB entry for the UART, so that ppc4xx_progress() can use - * the UARTs nice and early. We use a 4k real==virtual mapping. */ - - lis r3,SERIAL_DEBUG_IO_BASE@h - ori r3,r3,SERIAL_DEBUG_IO_BASE@l - mr r4,r3 - clrrwi r4,r4,12 - ori r4,r4,(TLB_WR|TLB_I|TLB_M|TLB_G) - - clrrwi r3,r3,12 - ori r3,r3,(TLB_VALID | TLB_PAGESZ(PAGESZ_4K)) - - li r0,0 /* TLB slot 0 */ - tlbwe r4,r0,TLB_DATA - tlbwe r3,r0,TLB_TAG -#endif /* CONFIG_SERIAL_DEBUG_TEXT && SERIAL_DEBUG_IO_BASE */ - isync /* Establish the exception vector base diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 3d11d8038de..a95145d7f61 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -23,6 +23,7 @@ */ #include <linux/threads.h> +#include <linux/init.h> #include <asm/reg.h> #include <asm/page.h> #include <asm/mmu.h> @@ -68,17 +69,18 @@ _stext: _GLOBAL(__start) /* NOP this out unconditionally */ BEGIN_FTR_SECTION - b .__start_initialization_multiplatform + FIXUP_ENDIAN + b __start_initialization_multiplatform END_FTR_SECTION(0, 1) /* Catch branch to 0 in real mode */ trap - /* Secondary processors spin on this value until it becomes nonzero. - * When it does it contains the real address of the descriptor - * of the function that the cpu should jump to to continue - * initialization. + /* Secondary processors spin on this value until it becomes non-zero. + * When non-zero, it contains the real address of the function the cpu + * should jump to. */ + .balign 8 .globl __secondary_hold_spinloop __secondary_hold_spinloop: .llong 0x0 @@ -115,6 +117,7 @@ __run_at_load: */ .globl __secondary_hold __secondary_hold: + FIXUP_ENDIAN #ifndef CONFIG_PPC_BOOK3E mfmsr r24 ori r24,r24,MSR_RI @@ -136,16 +139,15 @@ __secondary_hold: tovirt(r26,r26) #endif /* All secondary cpus wait here until told to start. */ -100: ld r4,__secondary_hold_spinloop-_stext(r26) - cmpdi 0,r4,0 +100: ld r12,__secondary_hold_spinloop-_stext(r26) + cmpdi 0,r12,0 beq 100b #if defined(CONFIG_SMP) || defined(CONFIG_KEXEC) #ifdef CONFIG_PPC_BOOK3E - tovirt(r4,r4) + tovirt(r12,r12) #endif - ld r4,0(r4) /* deref function descriptor */ - mtctr r4 + mtctr r12 mr r3,r24 /* * it may be the case that other platforms have r4 right to @@ -182,16 +184,16 @@ _GLOBAL(generic_secondary_thread_init) mr r24,r3 /* turn on 64-bit mode */ - bl .enable_64b_mode + bl enable_64b_mode /* get a valid TOC pointer, wherever we're mapped at */ - bl .relative_toc + bl relative_toc tovirt(r2,r2) #ifdef CONFIG_PPC_BOOK3E /* Book3E initialization */ mr r3,r24 - bl .book3e_secondary_thread_init + bl book3e_secondary_thread_init #endif b generic_secondary_common_init @@ -205,21 +207,22 @@ _GLOBAL(generic_secondary_thread_init) * as SCOM before entry). */ _GLOBAL(generic_secondary_smp_init) + FIXUP_ENDIAN mr r24,r3 mr r25,r4 /* turn on 64-bit mode */ - bl .enable_64b_mode + bl enable_64b_mode /* get a valid TOC pointer, wherever we're mapped at */ - bl .relative_toc + bl relative_toc tovirt(r2,r2) #ifdef CONFIG_PPC_BOOK3E /* Book3E initialization */ mr r3,r24 mr r4,r25 - bl .book3e_secondary_core_init + bl book3e_secondary_core_init #endif generic_secondary_common_init: @@ -231,7 +234,7 @@ generic_secondary_common_init: ld r13,0(r13) /* Get base vaddr of paca array */ #ifndef CONFIG_SMP addi r13,r13,PACA_SIZE /* know r13 if used accidentally */ - b .kexec_wait /* wait for next kernel if !SMP */ + b kexec_wait /* wait for next kernel if !SMP */ #else LOAD_REG_ADDR(r7, nr_cpu_ids) /* Load nr_cpu_ids address */ lwz r7,0(r7) /* also the max paca allocated */ @@ -245,7 +248,7 @@ generic_secondary_common_init: blt 1b mr r3,r24 /* not found, copy phys to r3 */ - b .kexec_wait /* next kernel might do better */ + b kexec_wait /* next kernel might do better */ 2: SET_PACA(r13) #ifdef CONFIG_PPC_BOOK3E @@ -259,11 +262,13 @@ generic_secondary_common_init: /* See if we need to call a cpu state restore handler */ LOAD_REG_ADDR(r23, cur_cpu_spec) ld r23,0(r23) - ld r23,CPU_SPEC_RESTORE(r23) - cmpdi 0,r23,0 + ld r12,CPU_SPEC_RESTORE(r23) + cmpdi 0,r12,0 beq 3f - ld r23,0(r23) - mtctr r23 +#if !defined(_CALL_ELF) || _CALL_ELF != 2 + ld r12,0(r12) +#endif + mtctr r12 bctrl 3: LOAD_REG_ADDR(r3, spinning_secondaries) /* Decrement spinning_secondaries */ @@ -294,7 +299,7 @@ generic_secondary_common_init: * Assumes we're mapped EA == RA if the MMU is on. */ #ifdef CONFIG_PPC_BOOK3S -_STATIC(__mmu_off) +__mmu_off: mfmsr r3 andi. r0,r3,MSR_IR|MSR_DR beqlr @@ -319,12 +324,12 @@ _STATIC(__mmu_off) * DT block, r4 is a physical pointer to the kernel itself * */ -_GLOBAL(__start_initialization_multiplatform) +__start_initialization_multiplatform: /* Make sure we are running in 64 bits mode */ - bl .enable_64b_mode + bl enable_64b_mode /* Get TOC pointer (current runtime address) */ - bl .relative_toc + bl relative_toc /* find out where we are now */ bcl 20,31,$+4 @@ -337,7 +342,7 @@ _GLOBAL(__start_initialization_multiplatform) */ cmpldi cr0,r5,0 beq 1f - b .__boot_from_prom /* yes -> prom */ + b __boot_from_prom /* yes -> prom */ 1: /* Save parameters */ mr r31,r3 @@ -349,8 +354,8 @@ _GLOBAL(__start_initialization_multiplatform) #endif #ifdef CONFIG_PPC_BOOK3E - bl .start_initialization_book3e - b .__after_prom_start + bl start_initialization_book3e + b __after_prom_start #else /* Setup some critical 970 SPRs before switching MMU off */ mfspr r0,SPRN_PVR @@ -363,15 +368,15 @@ _GLOBAL(__start_initialization_multiplatform) beq 1f cmpwi r0,0x45 /* 970GX */ bne 2f -1: bl .__cpu_preinit_ppc970 +1: bl __cpu_preinit_ppc970 2: /* Switch off MMU if not already off */ - bl .__mmu_off - b .__after_prom_start + bl __mmu_off + b __after_prom_start #endif /* CONFIG_PPC_BOOK3E */ -_INIT_STATIC(__boot_from_prom) +__boot_from_prom: #ifdef CONFIG_PPC_OF_BOOT_TRAMPOLINE /* Save parameters */ mr r31,r3 @@ -390,7 +395,7 @@ _INIT_STATIC(__boot_from_prom) #ifdef CONFIG_RELOCATABLE /* Relocate code for where we are now */ mr r3,r26 - bl .relocate + bl relocate #endif /* Restore parameters */ @@ -402,14 +407,14 @@ _INIT_STATIC(__boot_from_prom) /* Do all of the interaction with OF client interface */ mr r8,r26 - bl .prom_init + bl prom_init #endif /* #CONFIG_PPC_OF_BOOT_TRAMPOLINE */ /* We never return. We also hit that trap if trying to boot * from OF while CONFIG_PPC_OF_BOOT_TRAMPOLINE isn't selected */ trap -_STATIC(__after_prom_start) +__after_prom_start: #ifdef CONFIG_RELOCATABLE /* process relocations for the final address of the kernel */ lis r25,PAGE_OFFSET@highest /* compute virtual base of kernel */ @@ -419,7 +424,7 @@ _STATIC(__after_prom_start) bne 1f add r25,r25,r26 1: mr r3,r25 - bl .relocate + bl relocate #endif /* @@ -459,22 +464,23 @@ _STATIC(__after_prom_start) lis r5,(copy_to_here - _stext)@ha addi r5,r5,(copy_to_here - _stext)@l /* # bytes of memory to copy */ - bl .copy_and_flush /* copy the first n bytes */ + bl copy_and_flush /* copy the first n bytes */ /* this includes the code being */ /* executed here. */ addis r8,r3,(4f - _stext)@ha /* Jump to the copy of this code */ - addi r8,r8,(4f - _stext)@l /* that we just made */ - mtctr r8 + addi r12,r8,(4f - _stext)@l /* that we just made */ + mtctr r12 bctr +.balign 8 p_end: .llong _end - _stext 4: /* Now copy the rest of the kernel up to _end */ addis r5,r26,(p_end - _stext)@ha ld r5,(p_end - _stext)@l(r5) /* get _end */ -5: bl .copy_and_flush /* copy the rest */ +5: bl copy_and_flush /* copy the rest */ -9: b .start_here_multiplatform +9: b start_here_multiplatform /* * Copy routine used to copy the kernel to start at physical address 0 @@ -538,7 +544,7 @@ __secondary_start_pmac_0: _GLOBAL(pmac_secondary_start) /* turn on 64-bit mode */ - bl .enable_64b_mode + bl enable_64b_mode li r0,0 mfspr r3,SPRN_HID4 @@ -550,11 +556,11 @@ _GLOBAL(pmac_secondary_start) slbia /* get TOC pointer (real address) */ - bl .relative_toc + bl relative_toc tovirt(r2,r2) /* Copy some CPU settings from CPU 0 */ - bl .__restore_cpu_ppc970 + bl __restore_cpu_ppc970 /* pSeries do that early though I don't think we really need it */ mfmsr r3 @@ -613,7 +619,7 @@ __secondary_start: std r14,PACAKSAVE(r13) /* Do early setup for that CPU (stab, slb, hash table pointer) */ - bl .early_setup_secondary + bl early_setup_secondary /* * setup the new stack pointer, but *don't* use this until @@ -633,7 +639,7 @@ __secondary_start: stb r0,PACAIRQHAPPENED(r13) /* enable MMU and jump to start_secondary */ - LOAD_REG_ADDR(r3, .start_secondary_prolog) + LOAD_REG_ADDR(r3, start_secondary_prolog) LOAD_REG_IMMEDIATE(r4, MSR_KERNEL) mtspr SPRN_SRR0,r3 @@ -646,11 +652,11 @@ __secondary_start: * zero the stack back-chain pointer and get the TOC virtual address * before going into C code. */ -_GLOBAL(start_secondary_prolog) +start_secondary_prolog: ld r2,PACATOC(r13) li r3,0 std r3,0(r1) /* Zero the stack frame pointer */ - bl .start_secondary + bl start_secondary b . /* * Reset stack pointer and call start_secondary @@ -661,14 +667,14 @@ _GLOBAL(start_secondary_resume) ld r1,PACAKSAVE(r13) /* Reload kernel stack pointer */ li r3,0 std r3,0(r1) /* Zero the stack frame pointer */ - bl .start_secondary + bl start_secondary b . #endif /* * This subroutine clobbers r11 and r12 */ -_GLOBAL(enable_64b_mode) +enable_64b_mode: mfmsr r11 /* grab the current MSR */ #ifdef CONFIG_PPC_BOOK3E oris r11,r11,0x8000 /* CM bit set, we'll set ICM later */ @@ -709,9 +715,9 @@ p_toc: .llong __toc_start + 0x8000 - 0b /* * This is where the main kernel code starts. */ -_INIT_STATIC(start_here_multiplatform) +start_here_multiplatform: /* set up the TOC */ - bl .relative_toc + bl relative_toc tovirt(r2,r2) /* Clear out the BSS. It may have been done in prom_init, @@ -770,9 +776,9 @@ _INIT_STATIC(start_here_multiplatform) /* Restore parameters passed from prom_init/kexec */ mr r3,r31 - bl .early_setup /* also sets r13 and SPRG_PACA */ + bl early_setup /* also sets r13 and SPRG_PACA */ - LOAD_REG_ADDR(r3, .start_here_common) + LOAD_REG_ADDR(r3, start_here_common) ld r4,PACAKMSR(r13) mtspr SPRN_SRR0,r3 mtspr SPRN_SRR1,r4 @@ -780,7 +786,8 @@ _INIT_STATIC(start_here_multiplatform) b . /* prevent speculative execution */ /* This is where all platforms converge execution */ -_INIT_GLOBAL(start_here_common) + +start_here_common: /* relocation is on at this point */ std r1,PACAKSAVE(r13) @@ -788,7 +795,7 @@ _INIT_GLOBAL(start_here_common) ld r2,PACATOC(r13) /* Do more system initializations in virtual mode */ - bl .setup_system + bl setup_system /* Mark interrupts soft and hard disabled (they might be enabled * in the PACA when doing hotplug) @@ -799,7 +806,7 @@ _INIT_GLOBAL(start_here_common) stb r0,PACAIRQHAPPENED(r13) /* Generic kernel entry */ - bl .start_kernel + bl start_kernel /* Not reached */ BUG_OPCODE diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 1b92a97b1b0..7ee876d2adb 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -858,6 +858,9 @@ initial_mmu: addis r11, r11, 0x0080 /* Add 8M */ mtspr SPRN_MD_RPN, r11 + addi r10, r10, 0x0100 + mtspr SPRN_MD_CTR, r10 + addis r8, r8, 0x0080 /* Add 8M */ mtspr SPRN_MD_EPN, r8 mtspr SPRN_MD_TWC, r9 diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 289afaffbbb..b497188a94a 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -65,29 +65,78 @@ _ENTRY(_start); nop /* Translate device tree address to physical, save in r30/r31 */ - mfmsr r16 - mfspr r17,SPRN_PID - rlwinm r17,r17,16,0x3fff0000 /* turn PID into MAS6[SPID] */ - rlwimi r17,r16,28,0x00000001 /* turn MSR[DS] into MAS6[SAS] */ - mtspr SPRN_MAS6,r17 - - tlbsx 0,r3 /* must succeed */ - - mfspr r16,SPRN_MAS1 - mfspr r20,SPRN_MAS3 - rlwinm r17,r16,25,0x1f /* r17 = log2(page size) */ - li r18,1024 - slw r18,r18,r17 /* r18 = page size */ - addi r18,r18,-1 - and r19,r3,r18 /* r19 = page offset */ - andc r31,r20,r18 /* r31 = page base */ - or r31,r31,r19 /* r31 = devtree phys addr */ - mfspr r30,SPRN_MAS7 + bl get_phys_addr + mr r30,r3 + mr r31,r4 li r25,0 /* phys kernel start (low) */ li r24,0 /* CPU number */ li r23,0 /* phys kernel start (high) */ +#ifdef CONFIG_RELOCATABLE + LOAD_REG_ADDR_PIC(r3, _stext) /* Get our current runtime base */ + + /* Translate _stext address to physical, save in r23/r25 */ + bl get_phys_addr + mr r23,r3 + mr r25,r4 + + bl 0f +0: mflr r8 + addis r3,r8,(is_second_reloc - 0b)@ha + lwz r19,(is_second_reloc - 0b)@l(r3) + + /* Check if this is the second relocation. */ + cmpwi r19,1 + bne 1f + + /* + * For the second relocation, we already get the real memstart_addr + * from device tree. So we will map PAGE_OFFSET to memstart_addr, + * then the virtual address of start kernel should be: + * PAGE_OFFSET + (kernstart_addr - memstart_addr) + * Since the offset between kernstart_addr and memstart_addr should + * never be beyond 1G, so we can just use the lower 32bit of them + * for the calculation. + */ + lis r3,PAGE_OFFSET@h + + addis r4,r8,(kernstart_addr - 0b)@ha + addi r4,r4,(kernstart_addr - 0b)@l + lwz r5,4(r4) + + addis r6,r8,(memstart_addr - 0b)@ha + addi r6,r6,(memstart_addr - 0b)@l + lwz r7,4(r6) + + subf r5,r7,r5 + add r3,r3,r5 + b 2f + +1: + /* + * We have the runtime (virutal) address of our base. + * We calculate our shift of offset from a 64M page. + * We could map the 64M page we belong to at PAGE_OFFSET and + * get going from there. + */ + lis r4,KERNELBASE@h + ori r4,r4,KERNELBASE@l + rlwinm r6,r25,0,0x3ffffff /* r6 = PHYS_START % 64M */ + rlwinm r5,r4,0,0x3ffffff /* r5 = KERNELBASE % 64M */ + subf r3,r5,r6 /* r3 = r6 - r5 */ + add r3,r4,r3 /* Required Virtual Address */ + +2: bl relocate + + /* + * For the second relocation, we already set the right tlb entries + * for the kernel space, so skip the code in fsl_booke_entry_mapping.S + */ + cmpwi r19,1 + beq set_ivor +#endif + /* We try to not make any assumptions about how the boot loader * setup or used the TLBs. We invalidate all mappings from the * boot loader and load a single entry in TLB1[0] to map the @@ -113,6 +162,7 @@ _ENTRY(__early_start) #include "fsl_booke_entry_mapping.S" #undef ENTRY_MAPPING_BOOT_SETUP +set_ivor: /* Establish the interrupt vector offsets */ SET_IVOR(0, CriticalInput); SET_IVOR(1, MachineCheck); @@ -166,8 +216,7 @@ _ENTRY(__early_start) /* Check to see if we're the second processor, and jump * to the secondary_start code if so */ - lis r24, boot_cpuid@h - ori r24, r24, boot_cpuid@l + LOAD_REG_ADDR_PIC(r24, boot_cpuid) lwz r24, 0(r24) cmpwi r24, -1 mfspr r24,SPRN_PIR @@ -197,6 +246,18 @@ _ENTRY(__early_start) bl early_init +#ifdef CONFIG_RELOCATABLE + mr r3,r30 + mr r4,r31 +#ifdef CONFIG_PHYS_64BIT + mr r5,r23 + mr r6,r25 +#else + mr r5,r25 +#endif + bl relocate_init +#endif + #ifdef CONFIG_DYNAMIC_MEMSTART lis r3,kernstart_addr@ha la r3,kernstart_addr@l(r3) @@ -555,27 +616,27 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) #ifdef CONFIG_SPE /* SPE Unavailable */ START_EXCEPTION(SPEUnavailable) - NORMAL_EXCEPTION_PROLOG(SPE_UNAVAIL) + NORMAL_EXCEPTION_PROLOG(SPE_ALTIVEC_UNAVAIL) beq 1f bl load_up_spe b fast_exception_return 1: addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_EE_LITE(0x2010, KernelSPE) #else - EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, \ + EXCEPTION(0x2020, SPE_ALTIVEC_UNAVAIL, SPEUnavailable, \ unknown_exception, EXC_XFER_EE) #endif /* CONFIG_SPE */ /* SPE Floating Point Data */ #ifdef CONFIG_SPE - EXCEPTION(0x2030, SPE_FP_DATA, SPEFloatingPointData, \ - SPEFloatingPointException, EXC_XFER_EE); + EXCEPTION(0x2030, SPE_FP_DATA_ALTIVEC_ASSIST, SPEFloatingPointData, + SPEFloatingPointException, EXC_XFER_EE) /* SPE Floating Point Round */ EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \ SPEFloatingPointRoundException, EXC_XFER_EE) #else - EXCEPTION(0x2040, SPE_FP_DATA, SPEFloatingPointData, \ + EXCEPTION(0x2040, SPE_FP_DATA_ALTIVEC_ASSIST, SPEFloatingPointData, unknown_exception, EXC_XFER_EE) EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \ unknown_exception, EXC_XFER_EE) @@ -856,6 +917,33 @@ KernelSPE: #endif /* CONFIG_SPE */ /* + * Translate the effec addr in r3 to phys addr. The phys addr will be put + * into r3(higher 32bit) and r4(lower 32bit) + */ +get_phys_addr: + mfmsr r8 + mfspr r9,SPRN_PID + rlwinm r9,r9,16,0x3fff0000 /* turn PID into MAS6[SPID] */ + rlwimi r9,r8,28,0x00000001 /* turn MSR[DS] into MAS6[SAS] */ + mtspr SPRN_MAS6,r9 + + tlbsx 0,r3 /* must succeed */ + + mfspr r8,SPRN_MAS1 + mfspr r12,SPRN_MAS3 + rlwinm r9,r8,25,0x1f /* r9 = log2(page size) */ + li r10,1024 + slw r10,r10,r9 /* r10 = page size */ + addi r10,r10,-1 + and r11,r3,r10 /* r11 = page offset */ + andc r4,r12,r10 /* r4 = page base */ + or r4,r4,r11 /* r4 = devtree phys addr */ +#ifdef CONFIG_PHYS_64BIT + mfspr r3,SPRN_MAS7 +#endif + blr + +/* * Global functions */ @@ -1057,24 +1145,36 @@ _GLOBAL(__flush_disable_L1) /* When we get here, r24 needs to hold the CPU # */ .globl __secondary_start __secondary_start: - lis r3,__secondary_hold_acknowledge@h - ori r3,r3,__secondary_hold_acknowledge@l - stw r24,0(r3) - - li r3,0 - mr r4,r24 /* Why? */ - bl call_setup_cpu - - lis r3,tlbcam_index@ha - lwz r3,tlbcam_index@l(r3) + LOAD_REG_ADDR_PIC(r3, tlbcam_index) + lwz r3,0(r3) mtctr r3 li r26,0 /* r26 safe? */ + bl switch_to_as1 + mr r27,r3 /* tlb entry */ /* Load each CAM entry */ 1: mr r3,r26 bl loadcam_entry addi r26,r26,1 bdnz 1b + mr r3,r27 /* tlb entry */ + LOAD_REG_ADDR_PIC(r4, memstart_addr) + lwz r4,0(r4) + mr r5,r25 /* phys kernel start */ + rlwinm r5,r5,0,~0x3ffffff /* aligned 64M */ + subf r4,r5,r4 /* memstart_addr - phys kernel start */ + li r5,0 /* no device tree */ + li r6,0 /* not boot cpu */ + bl restore_to_as0 + + + lis r3,__secondary_hold_acknowledge@h + ori r3,r3,__secondary_hold_acknowledge@l + stw r24,0(r3) + + li r3,0 + mr r4,r24 /* Why? */ + bl call_setup_cpu /* get current_thread_info and current */ lis r1,secondary_ti@ha @@ -1111,6 +1211,112 @@ __secondary_hold_acknowledge: #endif /* + * Create a tlb entry with the same effective and physical address as + * the tlb entry used by the current running code. But set the TS to 1. + * Then switch to the address space 1. It will return with the r3 set to + * the ESEL of the new created tlb. + */ +_GLOBAL(switch_to_as1) + mflr r5 + + /* Find a entry not used */ + mfspr r3,SPRN_TLB1CFG + andi. r3,r3,0xfff + mfspr r4,SPRN_PID + rlwinm r4,r4,16,0x3fff0000 /* turn PID into MAS6[SPID] */ + mtspr SPRN_MAS6,r4 +1: lis r4,0x1000 /* Set MAS0(TLBSEL) = 1 */ + addi r3,r3,-1 + rlwimi r4,r3,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r3) */ + mtspr SPRN_MAS0,r4 + tlbre + mfspr r4,SPRN_MAS1 + andis. r4,r4,MAS1_VALID@h + bne 1b + + /* Get the tlb entry used by the current running code */ + bl 0f +0: mflr r4 + tlbsx 0,r4 + + mfspr r4,SPRN_MAS1 + ori r4,r4,MAS1_TS /* Set the TS = 1 */ + mtspr SPRN_MAS1,r4 + + mfspr r4,SPRN_MAS0 + rlwinm r4,r4,0,~MAS0_ESEL_MASK + rlwimi r4,r3,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r3) */ + mtspr SPRN_MAS0,r4 + tlbwe + isync + sync + + mfmsr r4 + ori r4,r4,MSR_IS | MSR_DS + mtspr SPRN_SRR0,r5 + mtspr SPRN_SRR1,r4 + sync + rfi + +/* + * Restore to the address space 0 and also invalidate the tlb entry created + * by switch_to_as1. + * r3 - the tlb entry which should be invalidated + * r4 - __pa(PAGE_OFFSET in AS1) - __pa(PAGE_OFFSET in AS0) + * r5 - device tree virtual address. If r4 is 0, r5 is ignored. + * r6 - boot cpu +*/ +_GLOBAL(restore_to_as0) + mflr r0 + + bl 0f +0: mflr r9 + addi r9,r9,1f - 0b + + /* + * We may map the PAGE_OFFSET in AS0 to a different physical address, + * so we need calculate the right jump and device tree address based + * on the offset passed by r4. + */ + add r9,r9,r4 + add r5,r5,r4 + add r0,r0,r4 + +2: mfmsr r7 + li r8,(MSR_IS | MSR_DS) + andc r7,r7,r8 + + mtspr SPRN_SRR0,r9 + mtspr SPRN_SRR1,r7 + sync + rfi + + /* Invalidate the temporary tlb entry for AS1 */ +1: lis r9,0x1000 /* Set MAS0(TLBSEL) = 1 */ + rlwimi r9,r3,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r3) */ + mtspr SPRN_MAS0,r9 + tlbre + mfspr r9,SPRN_MAS1 + rlwinm r9,r9,0,2,31 /* Clear MAS1 Valid and IPPROT */ + mtspr SPRN_MAS1,r9 + tlbwe + isync + + cmpwi r4,0 + cmpwi cr1,r6,0 + cror eq,4*cr1+eq,eq + bne 3f /* offset != 0 && is_boot_cpu */ + mtlr r0 + blr + + /* + * The PAGE_OFFSET will map to a different physical address, + * jump to _start to do another relocation again. + */ +3: mr r3,r5 + bl _start + +/* * We put a few things here that have to be page-aligned. This stuff * goes at the beginning of the data segment, which is page-aligned. */ diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index f0b47d1a6b0..0bb5918faaa 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -28,7 +28,6 @@ #include <linux/percpu.h> #include <linux/kernel.h> #include <linux/sched.h> -#include <linux/init.h> #include <linux/smp.h> #include <asm/hw_breakpoint.h> @@ -73,7 +72,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp) * If so, DABR will be populated in single_step_dabr_instruction(). */ if (current->thread.last_hit_ubp != bp) - set_breakpoint(info); + __set_breakpoint(info); return 0; } @@ -199,7 +198,7 @@ void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs) info = counter_arch_bp(tsk->thread.last_hit_ubp); regs->msr &= ~MSR_SE; - set_breakpoint(info); + __set_breakpoint(info); tsk->thread.last_hit_ubp = NULL; } @@ -285,7 +284,7 @@ int __kprobes hw_breakpoint_handler(struct die_args *args) if (!(info->type & HW_BRK_TYPE_EXTRANEOUS_IRQ)) perf_bp_event(bp, regs); - set_breakpoint(info); + __set_breakpoint(info); out: rcu_read_unlock(); return rc; @@ -317,7 +316,7 @@ int __kprobes single_step_dabr_instruction(struct die_args *args) if (!(info->type & HW_BRK_TYPE_EXTRANEOUS_IRQ)) perf_bp_event(bp, regs); - set_breakpoint(info); + __set_breakpoint(info); current->thread.last_hit_ubp = NULL; /* diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c index 16a7c2326d4..1114d13ac19 100644 --- a/arch/powerpc/kernel/ibmebus.c +++ b/arch/powerpc/kernel/ibmebus.c @@ -292,6 +292,7 @@ out: return rc; return count; } +static BUS_ATTR(probe, S_IWUSR, NULL, ibmebus_store_probe); static ssize_t ibmebus_store_remove(struct bus_type *bus, const char *buf, size_t count) @@ -317,13 +318,14 @@ static ssize_t ibmebus_store_remove(struct bus_type *bus, return -ENODEV; } } +static BUS_ATTR(remove, S_IWUSR, NULL, ibmebus_store_remove); - -static struct bus_attribute ibmebus_bus_attrs[] = { - __ATTR(probe, S_IWUSR, NULL, ibmebus_store_probe), - __ATTR(remove, S_IWUSR, NULL, ibmebus_store_remove), - __ATTR_NULL +static struct attribute *ibmbus_bus_attrs[] = { + &bus_attr_probe.attr, + &bus_attr_remove.attr, + NULL, }; +ATTRIBUTE_GROUPS(ibmbus_bus); static int ibmebus_bus_bus_match(struct device *dev, struct device_driver *drv) { @@ -713,7 +715,7 @@ static struct dev_pm_ops ibmebus_bus_dev_pm_ops = { struct bus_type ibmebus_bus_type = { .name = "ibmebus", .uevent = of_device_uevent_modalias, - .bus_attrs = ibmebus_bus_attrs, + .bus_groups = ibmbus_bus_groups, .match = ibmebus_bus_bus_match, .probe = ibmebus_bus_device_probe, .remove = ibmebus_bus_device_remove, diff --git a/arch/powerpc/kernel/idle_book3e.S b/arch/powerpc/kernel/idle_book3e.S index bfb73cc209c..48c21acef91 100644 --- a/arch/powerpc/kernel/idle_book3e.S +++ b/arch/powerpc/kernel/idle_book3e.S @@ -43,7 +43,7 @@ _GLOBAL(\name) */ #ifdef CONFIG_TRACE_IRQFLAGS stdu r1,-128(r1) - bl .trace_hardirqs_on + bl trace_hardirqs_on addi r1,r1,128 #endif li r0,1 diff --git a/arch/powerpc/kernel/idle_power4.S b/arch/powerpc/kernel/idle_power4.S index e3edaa18991..f57a19348bd 100644 --- a/arch/powerpc/kernel/idle_power4.S +++ b/arch/powerpc/kernel/idle_power4.S @@ -46,7 +46,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_CAN_NAP) mflr r0 std r0,16(r1) stdu r1,-128(r1) - bl .trace_hardirqs_on + bl trace_hardirqs_on addi r1,r1,128 ld r0,16(r1) mtlr r0 diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index e11863f4e59..5cf3d367190 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S @@ -17,20 +17,35 @@ #include <asm/ppc-opcode.h> #include <asm/hw_irq.h> #include <asm/kvm_book3s_asm.h> +#include <asm/opal.h> #undef DEBUG - .text +/* Idle state entry routines */ -_GLOBAL(power7_idle) - /* Now check if user or arch enabled NAP mode */ - LOAD_REG_ADDRBASE(r3,powersave_nap) - lwz r4,ADDROFF(powersave_nap)(r3) - cmpwi 0,r4,0 - beqlr - /* fall through */ +#define IDLE_STATE_ENTER_SEQ(IDLE_INST) \ + /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \ + std r0,0(r1); \ + ptesync; \ + ld r0,0(r1); \ +1: cmp cr0,r0,r0; \ + bne 1b; \ + IDLE_INST; \ + b . -_GLOBAL(power7_nap) + .text + +/* + * Pass requested state in r3: + * 0 - nap + * 1 - sleep + * + * To check IRQ_HAPPENED in r4 + * 0 - don't check + * 1 - check + */ +_GLOBAL(power7_powersave_common) + /* Use r3 to pass state nap/sleep/winkle */ /* NAP is a state loss, we create a regs frame on the * stack, fill it up with the state we care about and * stick a pointer to it in PACAR1. We really only @@ -47,7 +62,7 @@ _GLOBAL(power7_nap) /* Make sure FPU, VSX etc... are flushed as we may lose * state when going to nap mode */ - bl .discard_lazy_cpu_state + bl discard_lazy_cpu_state #endif /* CONFIG_SMP */ /* Hard disable interrupts */ @@ -60,6 +75,8 @@ _GLOBAL(power7_nap) lbz r0,PACAIRQHAPPENED(r13) cmpwi cr0,r0,0 beq 1f + cmpwi cr0,r4,0 + beq 1f addi r1,r1,INT_FRAME_SIZE ld r0,16(r1) mtlr r0 @@ -79,25 +96,70 @@ _GLOBAL(power7_nap) /* Continue saving state */ SAVE_GPR(2, r1) SAVE_NVGPRS(r1) - mfcr r3 - std r3,_CCR(r1) + mfcr r4 + std r4,_CCR(r1) std r9,_MSR(r1) std r1,PACAR1(r13) -#ifdef CONFIG_KVM_BOOK3S_64_HV +_GLOBAL(power7_enter_nap_mode) +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE /* Tell KVM we're napping */ li r4,KVM_HWTHREAD_IN_NAP stb r4,HSTATE_HWTHREAD_STATE(r13) #endif + cmpwi cr0,r3,1 + beq 2f + IDLE_STATE_ENTER_SEQ(PPC_NAP) + /* No return */ +2: IDLE_STATE_ENTER_SEQ(PPC_SLEEP) + /* No return */ - /* Magic NAP mode enter sequence */ - std r0,0(r1) - ptesync - ld r0,0(r1) -1: cmp cr0,r0,r0 - bne 1b - PPC_NAP - b . +_GLOBAL(power7_idle) + /* Now check if user or arch enabled NAP mode */ + LOAD_REG_ADDRBASE(r3,powersave_nap) + lwz r4,ADDROFF(powersave_nap)(r3) + cmpwi 0,r4,0 + beqlr + li r3, 1 + /* fall through */ + +_GLOBAL(power7_nap) + mr r4,r3 + li r3,0 + b power7_powersave_common + /* No return */ + +_GLOBAL(power7_sleep) + li r3,1 + li r4,1 + b power7_powersave_common + /* No return */ + +_GLOBAL(power7_wakeup_tb_loss) + ld r2,PACATOC(r13); + ld r1,PACAR1(r13) + + /* Time base re-sync */ + li r0,OPAL_RESYNC_TIMEBASE + LOAD_REG_ADDR(r11,opal); + ld r12,8(r11); + ld r2,0(r11); + mtctr r12 + bctrl + + /* TODO: Check r3 for failure */ + + REST_NVGPRS(r1) + REST_GPR(2, r1) + ld r3,_CCR(r1) + ld r4,_MSR(r1) + ld r5,_NIP(r1) + addi r1,r1,INT_FRAME_SIZE + mtcr r3 + mfspr r3,SPRN_SRR1 /* Return SRR1 */ + mtspr SPRN_SRR1,r4 + mtspr SPRN_SRR0,r5 + rfid _GLOBAL(power7_wakeup_loss) ld r1,PACAR1(r13) @@ -115,7 +177,7 @@ _GLOBAL(power7_wakeup_loss) _GLOBAL(power7_wakeup_noloss) lbz r0,PACA_NAPSTATELOST(r13) cmpwi r0,0 - bne .power7_wakeup_loss + bne power7_wakeup_loss ld r1,PACAR1(r13) ld r4,_MSR(r1) ld r5,_NIP(r1) diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c index 97a3715ac8b..12e48d56f77 100644 --- a/arch/powerpc/kernel/iomap.c +++ b/arch/powerpc/kernel/iomap.c @@ -3,7 +3,6 @@ * * (C) Copyright 2004 Linus Torvalds */ -#include <linux/init.h> #include <linux/pci.h> #include <linux/mm.h> #include <linux/export.h> @@ -24,7 +23,7 @@ unsigned int ioread16(void __iomem *addr) } unsigned int ioread16be(void __iomem *addr) { - return in_be16(addr); + return readw_be(addr); } unsigned int ioread32(void __iomem *addr) { @@ -32,7 +31,7 @@ unsigned int ioread32(void __iomem *addr) } unsigned int ioread32be(void __iomem *addr) { - return in_be32(addr); + return readl_be(addr); } EXPORT_SYMBOL(ioread8); EXPORT_SYMBOL(ioread16); @@ -50,7 +49,7 @@ void iowrite16(u16 val, void __iomem *addr) } void iowrite16be(u16 val, void __iomem *addr) { - out_be16(addr, val); + writew_be(val, addr); } void iowrite32(u32 val, void __iomem *addr) { @@ -58,7 +57,7 @@ void iowrite32(u32 val, void __iomem *addr) } void iowrite32be(u32 val, void __iomem *addr) { - out_be32(addr, val); + writel_be(val, addr); } EXPORT_SYMBOL(iowrite8); EXPORT_SYMBOL(iowrite16); @@ -76,15 +75,15 @@ EXPORT_SYMBOL(iowrite32be); */ void ioread8_rep(void __iomem *addr, void *dst, unsigned long count) { - _insb((u8 __iomem *) addr, dst, count); + readsb(addr, dst, count); } void ioread16_rep(void __iomem *addr, void *dst, unsigned long count) { - _insw_ns((u16 __iomem *) addr, dst, count); + readsw(addr, dst, count); } void ioread32_rep(void __iomem *addr, void *dst, unsigned long count) { - _insl_ns((u32 __iomem *) addr, dst, count); + readsl(addr, dst, count); } EXPORT_SYMBOL(ioread8_rep); EXPORT_SYMBOL(ioread16_rep); @@ -92,15 +91,15 @@ EXPORT_SYMBOL(ioread32_rep); void iowrite8_rep(void __iomem *addr, const void *src, unsigned long count) { - _outsb((u8 __iomem *) addr, src, count); + writesb(addr, src, count); } void iowrite16_rep(void __iomem *addr, const void *src, unsigned long count) { - _outsw_ns((u16 __iomem *) addr, src, count); + writesw(addr, src, count); } void iowrite32_rep(void __iomem *addr, const void *src, unsigned long count) { - _outsl_ns((u32 __iomem *) addr, src, count); + writesl(addr, src, count); } EXPORT_SYMBOL(iowrite8_rep); EXPORT_SYMBOL(iowrite16_rep); diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 0adab06ce5c..88e3ec6e1d9 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -251,14 +251,13 @@ again: if (dev) boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, - 1 << IOMMU_PAGE_SHIFT); + 1 << tbl->it_page_shift); else - boundary_size = ALIGN(1UL << 32, 1 << IOMMU_PAGE_SHIFT); + boundary_size = ALIGN(1UL << 32, 1 << tbl->it_page_shift); /* 4GB boundary for iseries_hv_alloc and iseries_hv_map */ - n = iommu_area_alloc(tbl->it_map, limit, start, npages, - tbl->it_offset, boundary_size >> IOMMU_PAGE_SHIFT, - align_mask); + n = iommu_area_alloc(tbl->it_map, limit, start, npages, tbl->it_offset, + boundary_size >> tbl->it_page_shift, align_mask); if (n == -1) { if (likely(pass == 0)) { /* First try the pool from the start */ @@ -320,12 +319,12 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, return DMA_ERROR_CODE; entry += tbl->it_offset; /* Offset into real TCE table */ - ret = entry << IOMMU_PAGE_SHIFT; /* Set the return dma address */ + ret = entry << tbl->it_page_shift; /* Set the return dma address */ /* Put the TCEs in the HW table */ build_fail = ppc_md.tce_build(tbl, entry, npages, - (unsigned long)page & IOMMU_PAGE_MASK, - direction, attrs); + (unsigned long)page & + IOMMU_PAGE_MASK(tbl), direction, attrs); /* ppc_md.tce_build() only returns non-zero for transient errors. * Clean up the table bitmap in this case and return @@ -352,7 +351,7 @@ static bool iommu_free_check(struct iommu_table *tbl, dma_addr_t dma_addr, { unsigned long entry, free_entry; - entry = dma_addr >> IOMMU_PAGE_SHIFT; + entry = dma_addr >> tbl->it_page_shift; free_entry = entry - tbl->it_offset; if (((free_entry + npages) > tbl->it_size) || @@ -401,7 +400,7 @@ static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, unsigned long flags; struct iommu_pool *pool; - entry = dma_addr >> IOMMU_PAGE_SHIFT; + entry = dma_addr >> tbl->it_page_shift; free_entry = entry - tbl->it_offset; pool = get_pool(tbl, free_entry); @@ -468,13 +467,13 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl, } /* Allocate iommu entries for that segment */ vaddr = (unsigned long) sg_virt(s); - npages = iommu_num_pages(vaddr, slen, IOMMU_PAGE_SIZE); + npages = iommu_num_pages(vaddr, slen, IOMMU_PAGE_SIZE(tbl)); align = 0; - if (IOMMU_PAGE_SHIFT < PAGE_SHIFT && slen >= PAGE_SIZE && + if (tbl->it_page_shift < PAGE_SHIFT && slen >= PAGE_SIZE && (vaddr & ~PAGE_MASK) == 0) - align = PAGE_SHIFT - IOMMU_PAGE_SHIFT; + align = PAGE_SHIFT - tbl->it_page_shift; entry = iommu_range_alloc(dev, tbl, npages, &handle, - mask >> IOMMU_PAGE_SHIFT, align); + mask >> tbl->it_page_shift, align); DBG(" - vaddr: %lx, size: %lx\n", vaddr, slen); @@ -489,16 +488,16 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl, /* Convert entry to a dma_addr_t */ entry += tbl->it_offset; - dma_addr = entry << IOMMU_PAGE_SHIFT; - dma_addr |= (s->offset & ~IOMMU_PAGE_MASK); + dma_addr = entry << tbl->it_page_shift; + dma_addr |= (s->offset & ~IOMMU_PAGE_MASK(tbl)); DBG(" - %lu pages, entry: %lx, dma_addr: %lx\n", npages, entry, dma_addr); /* Insert into HW table */ build_fail = ppc_md.tce_build(tbl, entry, npages, - vaddr & IOMMU_PAGE_MASK, - direction, attrs); + vaddr & IOMMU_PAGE_MASK(tbl), + direction, attrs); if(unlikely(build_fail)) goto failure; @@ -559,9 +558,9 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl, if (s->dma_length != 0) { unsigned long vaddr, npages; - vaddr = s->dma_address & IOMMU_PAGE_MASK; + vaddr = s->dma_address & IOMMU_PAGE_MASK(tbl); npages = iommu_num_pages(s->dma_address, s->dma_length, - IOMMU_PAGE_SIZE); + IOMMU_PAGE_SIZE(tbl)); __iommu_free(tbl, vaddr, npages); s->dma_address = DMA_ERROR_CODE; s->dma_length = 0; @@ -592,7 +591,7 @@ void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist, if (sg->dma_length == 0) break; npages = iommu_num_pages(dma_handle, sg->dma_length, - IOMMU_PAGE_SIZE); + IOMMU_PAGE_SIZE(tbl)); __iommu_free(tbl, dma_handle, npages); sg = sg_next(sg); } @@ -661,7 +660,7 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid) /* number of bytes needed for the bitmap */ sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long); - page = alloc_pages_node(nid, GFP_ATOMIC, get_order(sz)); + page = alloc_pages_node(nid, GFP_KERNEL, get_order(sz)); if (!page) panic("iommu_init_table: Can't allocate %ld bytes\n", sz); tbl->it_map = page_address(page); @@ -676,7 +675,7 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid) set_bit(0, tbl->it_map); /* We only split the IOMMU table if we have 1GB or more of space */ - if ((tbl->it_size << IOMMU_PAGE_SHIFT) >= (1UL * 1024 * 1024 * 1024)) + if ((tbl->it_size << tbl->it_page_shift) >= (1UL * 1024 * 1024 * 1024)) tbl->nr_pools = IOMMU_NR_POOLS; else tbl->nr_pools = 1; @@ -768,16 +767,16 @@ dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl, vaddr = page_address(page) + offset; uaddr = (unsigned long)vaddr; - npages = iommu_num_pages(uaddr, size, IOMMU_PAGE_SIZE); + npages = iommu_num_pages(uaddr, size, IOMMU_PAGE_SIZE(tbl)); if (tbl) { align = 0; - if (IOMMU_PAGE_SHIFT < PAGE_SHIFT && size >= PAGE_SIZE && + if (tbl->it_page_shift < PAGE_SHIFT && size >= PAGE_SIZE && ((unsigned long)vaddr & ~PAGE_MASK) == 0) - align = PAGE_SHIFT - IOMMU_PAGE_SHIFT; + align = PAGE_SHIFT - tbl->it_page_shift; dma_handle = iommu_alloc(dev, tbl, vaddr, npages, direction, - mask >> IOMMU_PAGE_SHIFT, align, + mask >> tbl->it_page_shift, align, attrs); if (dma_handle == DMA_ERROR_CODE) { if (printk_ratelimit()) { @@ -786,7 +785,7 @@ dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl, npages); } } else - dma_handle |= (uaddr & ~IOMMU_PAGE_MASK); + dma_handle |= (uaddr & ~IOMMU_PAGE_MASK(tbl)); } return dma_handle; @@ -801,7 +800,8 @@ void iommu_unmap_page(struct iommu_table *tbl, dma_addr_t dma_handle, BUG_ON(direction == DMA_NONE); if (tbl) { - npages = iommu_num_pages(dma_handle, size, IOMMU_PAGE_SIZE); + npages = iommu_num_pages(dma_handle, size, + IOMMU_PAGE_SIZE(tbl)); iommu_free(tbl, dma_handle, npages); } } @@ -845,10 +845,10 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl, memset(ret, 0, size); /* Set up tces to cover the allocated range */ - nio_pages = size >> IOMMU_PAGE_SHIFT; - io_order = get_iommu_order(size); + nio_pages = size >> tbl->it_page_shift; + io_order = get_iommu_order(size, tbl); mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL, - mask >> IOMMU_PAGE_SHIFT, io_order, NULL); + mask >> tbl->it_page_shift, io_order, NULL); if (mapping == DMA_ERROR_CODE) { free_pages((unsigned long)ret, order); return NULL; @@ -864,7 +864,7 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t size, unsigned int nio_pages; size = PAGE_ALIGN(size); - nio_pages = size >> IOMMU_PAGE_SHIFT; + nio_pages = size >> tbl->it_page_shift; iommu_free(tbl, dma_handle, nio_pages); size = PAGE_ALIGN(size); free_pages((unsigned long)vaddr, get_order(size)); @@ -935,10 +935,10 @@ int iommu_tce_clear_param_check(struct iommu_table *tbl, if (tce_value) return -EINVAL; - if (ioba & ~IOMMU_PAGE_MASK) + if (ioba & ~IOMMU_PAGE_MASK(tbl)) return -EINVAL; - ioba >>= IOMMU_PAGE_SHIFT; + ioba >>= tbl->it_page_shift; if (ioba < tbl->it_offset) return -EINVAL; @@ -955,13 +955,13 @@ int iommu_tce_put_param_check(struct iommu_table *tbl, if (!(tce & (TCE_PCI_WRITE | TCE_PCI_READ))) return -EINVAL; - if (tce & ~(IOMMU_PAGE_MASK | TCE_PCI_WRITE | TCE_PCI_READ)) + if (tce & ~(IOMMU_PAGE_MASK(tbl) | TCE_PCI_WRITE | TCE_PCI_READ)) return -EINVAL; - if (ioba & ~IOMMU_PAGE_MASK) + if (ioba & ~IOMMU_PAGE_MASK(tbl)) return -EINVAL; - ioba >>= IOMMU_PAGE_SHIFT; + ioba >>= tbl->it_page_shift; if (ioba < tbl->it_offset) return -EINVAL; @@ -1037,7 +1037,7 @@ int iommu_tce_build(struct iommu_table *tbl, unsigned long entry, /* if (unlikely(ret)) pr_err("iommu_tce: %s failed on hwaddr=%lx ioba=%lx kva=%lx ret=%d\n", - __func__, hwaddr, entry << IOMMU_PAGE_SHIFT, + __func__, hwaddr, entry << IOMMU_PAGE_SHIFT(tbl), hwaddr, ret); */ return ret; @@ -1049,14 +1049,14 @@ int iommu_put_tce_user_mode(struct iommu_table *tbl, unsigned long entry, { int ret; struct page *page = NULL; - unsigned long hwaddr, offset = tce & IOMMU_PAGE_MASK & ~PAGE_MASK; + unsigned long hwaddr, offset = tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK; enum dma_data_direction direction = iommu_tce_direction(tce); ret = get_user_pages_fast(tce & PAGE_MASK, 1, direction != DMA_TO_DEVICE, &page); if (unlikely(ret != 1)) { /* pr_err("iommu_tce: get_user_pages_fast failed tce=%lx ioba=%lx ret=%d\n", - tce, entry << IOMMU_PAGE_SHIFT, ret); */ + tce, entry << IOMMU_PAGE_SHIFT(tbl), ret); */ return -EFAULT; } hwaddr = (unsigned long) page_address(page) + offset; @@ -1067,7 +1067,7 @@ int iommu_put_tce_user_mode(struct iommu_table *tbl, unsigned long entry, if (ret < 0) pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%d\n", - __func__, entry << IOMMU_PAGE_SHIFT, tce, ret); + __func__, entry << tbl->it_page_shift, tce, ret); return ret; } @@ -1088,6 +1088,14 @@ int iommu_take_ownership(struct iommu_table *tbl) memset(tbl->it_map, 0xff, sz); iommu_clear_tces_and_put_pages(tbl, tbl->it_offset, tbl->it_size); + /* + * Disable iommu bypass, otherwise the user can DMA to all of + * our physical memory via the bypass window instead of just + * the pages that has been explicitly mapped into the iommu + */ + if (tbl->set_bypass) + tbl->set_bypass(tbl, false); + return 0; } EXPORT_SYMBOL_GPL(iommu_take_ownership); @@ -1102,10 +1110,14 @@ void iommu_release_ownership(struct iommu_table *tbl) /* Restore bit#0 set by iommu_init_table() */ if (tbl->it_offset == 0) set_bit(0, tbl->it_map); + + /* The kernel owns the device now, we can restore the iommu bypass */ + if (tbl->set_bypass) + tbl->set_bypass(tbl, true); } EXPORT_SYMBOL_GPL(iommu_release_ownership); -static int iommu_add_device(struct device *dev) +int iommu_add_device(struct device *dev) { struct iommu_table *tbl; int ret = 0; @@ -1127,6 +1139,12 @@ static int iommu_add_device(struct device *dev) pr_debug("iommu_tce: adding %s to iommu group %d\n", dev_name(dev), iommu_group_id(tbl->it_group)); + if (PAGE_SIZE < IOMMU_PAGE_SIZE(tbl)) { + pr_err("iommu_tce: unsupported iommu page size."); + pr_err("%s has not been added\n", dev_name(dev)); + return -EINVAL; + } + ret = iommu_group_add_device(tbl->it_group, dev); if (ret < 0) pr_err("iommu_tce: %s has not been added, ret=%d\n", @@ -1134,52 +1152,23 @@ static int iommu_add_device(struct device *dev) return ret; } +EXPORT_SYMBOL_GPL(iommu_add_device); -static void iommu_del_device(struct device *dev) -{ - iommu_group_remove_device(dev); -} - -static int iommu_bus_notifier(struct notifier_block *nb, - unsigned long action, void *data) +void iommu_del_device(struct device *dev) { - struct device *dev = data; - - switch (action) { - case BUS_NOTIFY_ADD_DEVICE: - return iommu_add_device(dev); - case BUS_NOTIFY_DEL_DEVICE: - iommu_del_device(dev); - return 0; - default: - return 0; + /* + * Some devices might not have IOMMU table and group + * and we needn't detach them from the associated + * IOMMU groups + */ + if (!dev->iommu_group) { + pr_debug("iommu_tce: skipping device %s with no tbl\n", + dev_name(dev)); + return; } -} - -static struct notifier_block tce_iommu_bus_nb = { - .notifier_call = iommu_bus_notifier, -}; - -static int __init tce_iommu_init(void) -{ - struct pci_dev *pdev = NULL; - - BUILD_BUG_ON(PAGE_SIZE < IOMMU_PAGE_SIZE); - - for_each_pci_dev(pdev) - iommu_add_device(&pdev->dev); - - bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb); - return 0; -} - -subsys_initcall_sync(tce_iommu_init); - -#else -void iommu_register_group(struct iommu_table *tbl, - int pci_domain_number, unsigned long pe_num) -{ + iommu_group_remove_device(dev); } +EXPORT_SYMBOL_GPL(iommu_del_device); #endif /* CONFIG_IOMMU_API */ diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index c69440cef7a..248ee7e5beb 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -304,7 +304,7 @@ void notrace restore_interrupts(void) * being re-enabled and generally sanitized the lazy irq state, * and in the latter case it will leave with interrupts hard * disabled and marked as such, so the local_irq_enable() call - * in cpu_idle() will properly re-enable everything. + * in arch_cpu_idle() will properly re-enable everything. */ bool prep_irq_for_idle(void) { @@ -354,8 +354,13 @@ int arch_show_interrupts(struct seq_file *p, int prec) seq_printf(p, "%*s: ", prec, "LOC"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs); - seq_printf(p, " Local timer interrupts\n"); + seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs_event); + seq_printf(p, " Local timer interrupts for timer event device\n"); + + seq_printf(p, "%*s: ", prec, "LOC"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs_others); + seq_printf(p, " Local timer interrupts for others\n"); seq_printf(p, "%*s: ", prec, "SPU"); for_each_online_cpu(j) @@ -389,11 +394,12 @@ int arch_show_interrupts(struct seq_file *p, int prec) */ u64 arch_irq_stat_cpu(unsigned int cpu) { - u64 sum = per_cpu(irq_stat, cpu).timer_irqs; + u64 sum = per_cpu(irq_stat, cpu).timer_irqs_event; sum += per_cpu(irq_stat, cpu).pmu_irqs; sum += per_cpu(irq_stat, cpu).mce_exceptions; sum += per_cpu(irq_stat, cpu).spurious_irqs; + sum += per_cpu(irq_stat, cpu).timer_irqs_others; #ifdef CONFIG_PPC_DOORBELL sum += per_cpu(irq_stat, cpu).doorbell_irqs; #endif @@ -441,50 +447,6 @@ void migrate_irqs(void) } #endif -static inline void handle_one_irq(unsigned int irq) -{ - struct thread_info *curtp, *irqtp; - unsigned long saved_sp_limit; - struct irq_desc *desc; - - desc = irq_to_desc(irq); - if (!desc) - return; - - /* Switch to the irq stack to handle this */ - curtp = current_thread_info(); - irqtp = hardirq_ctx[smp_processor_id()]; - - if (curtp == irqtp) { - /* We're already on the irq stack, just handle it */ - desc->handle_irq(irq, desc); - return; - } - - saved_sp_limit = current->thread.ksp_limit; - - irqtp->task = curtp->task; - irqtp->flags = 0; - - /* Copy the softirq bits in preempt_count so that the - * softirq checks work in the hardirq context. */ - irqtp->preempt_count = (irqtp->preempt_count & ~SOFTIRQ_MASK) | - (curtp->preempt_count & SOFTIRQ_MASK); - - current->thread.ksp_limit = (unsigned long)irqtp + - _ALIGN_UP(sizeof(struct thread_info), 16); - - call_handle_irq(irq, desc, irqtp, desc->handle_irq); - current->thread.ksp_limit = saved_sp_limit; - irqtp->task = NULL; - - /* Set any flag that may have been set on the - * alternate stack - */ - if (irqtp->flags) - set_bits(irqtp->flags, &curtp->flags); -} - static inline void check_stack_overflow(void) { #ifdef CONFIG_DEBUG_STACKOVERFLOW @@ -501,9 +463,8 @@ static inline void check_stack_overflow(void) #endif } -void do_IRQ(struct pt_regs *regs) +void __do_irq(struct pt_regs *regs) { - struct pt_regs *old_regs = set_irq_regs(regs); unsigned int irq; irq_enter(); @@ -519,18 +480,54 @@ void do_IRQ(struct pt_regs *regs) */ irq = ppc_md.get_irq(); - /* We can hard enable interrupts now */ + /* We can hard enable interrupts now to allow perf interrupts */ may_hard_irq_enable(); /* And finally process it */ - if (irq != NO_IRQ) - handle_one_irq(irq); - else + if (unlikely(irq == NO_IRQ)) __get_cpu_var(irq_stat).spurious_irqs++; + else + generic_handle_irq(irq); trace_irq_exit(regs); irq_exit(); +} + +void do_IRQ(struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + struct thread_info *curtp, *irqtp, *sirqtp; + + /* Switch to the irq stack to handle this */ + curtp = current_thread_info(); + irqtp = hardirq_ctx[raw_smp_processor_id()]; + sirqtp = softirq_ctx[raw_smp_processor_id()]; + + /* Already there ? */ + if (unlikely(curtp == irqtp || curtp == sirqtp)) { + __do_irq(regs); + set_irq_regs(old_regs); + return; + } + + /* Prepare the thread_info in the irq stack */ + irqtp->task = curtp->task; + irqtp->flags = 0; + + /* Copy the preempt_count so that the [soft]irq checks work. */ + irqtp->preempt_count = curtp->preempt_count; + + /* Switch stack and call */ + call_do_irq(regs, irqtp); + + /* Restore stack limit */ + irqtp->task = NULL; + + /* Copy back updates to the thread_info */ + if (irqtp->flags) + set_bits(irqtp->flags, &curtp->flags); + set_irq_regs(old_regs); } @@ -558,8 +555,13 @@ void exc_lvl_ctx_init(void) #ifdef CONFIG_PPC64 cpu_nr = i; #else +#ifdef CONFIG_SMP cpu_nr = get_hard_smp_processor_id(i); +#else + cpu_nr = 0; #endif +#endif + memset((void *)critirq_ctx[cpu_nr], 0, THREAD_SIZE); tp = critirq_ctx[cpu_nr]; tp->cpu = cpu_nr; @@ -592,28 +594,22 @@ void irq_ctx_init(void) memset((void *)softirq_ctx[i], 0, THREAD_SIZE); tp = softirq_ctx[i]; tp->cpu = i; - tp->preempt_count = 0; memset((void *)hardirq_ctx[i], 0, THREAD_SIZE); tp = hardirq_ctx[i]; tp->cpu = i; - tp->preempt_count = HARDIRQ_OFFSET; } } -static inline void do_softirq_onstack(void) +void do_softirq_own_stack(void) { struct thread_info *curtp, *irqtp; - unsigned long saved_sp_limit = current->thread.ksp_limit; curtp = current_thread_info(); irqtp = softirq_ctx[smp_processor_id()]; irqtp->task = curtp->task; irqtp->flags = 0; - current->thread.ksp_limit = (unsigned long)irqtp + - _ALIGN_UP(sizeof(struct thread_info), 16); call_do_softirq(irqtp); - current->thread.ksp_limit = saved_sp_limit; irqtp->task = NULL; /* Set any flag that may have been set on the @@ -623,21 +619,6 @@ static inline void do_softirq_onstack(void) set_bits(irqtp->flags, &curtp->flags); } -void do_softirq(void) -{ - unsigned long flags; - - if (in_interrupt()) - return; - - local_irq_save(flags); - - if (local_softirq_pending()) - do_softirq_onstack(); - - local_irq_restore(flags); -} - irq_hw_number_t virq_to_hw(unsigned int virq) { struct irq_data *irq_data = irq_get_irq_data(virq); diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index c1eef241017..8504657379f 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -15,7 +15,6 @@ */ #include <linux/kernel.h> -#include <linux/init.h> #include <linux/kgdb.h> #include <linux/smp.h> #include <linux/signal.h> @@ -151,15 +150,16 @@ static int kgdb_handle_breakpoint(struct pt_regs *regs) return 1; } +static DEFINE_PER_CPU(struct thread_info, kgdb_thread_info); static int kgdb_singlestep(struct pt_regs *regs) { struct thread_info *thread_info, *exception_thread_info; - struct thread_info *backup_current_thread_info; + struct thread_info *backup_current_thread_info = + &__get_cpu_var(kgdb_thread_info); if (user_mode(regs)) return 0; - backup_current_thread_info = kmalloc(sizeof(struct thread_info), GFP_KERNEL); /* * On Book E and perhaps other processors, singlestep is handled on * the critical exception stack. This causes current_thread_info() @@ -185,7 +185,6 @@ static int kgdb_singlestep(struct pt_regs *regs) /* Restore current_thread_info lastly. */ memcpy(exception_thread_info, backup_current_thread_info, sizeof *thread_info); - kfree(backup_current_thread_info); return 1; } diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 2156ea90eb5..2f72af82513 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -32,6 +32,7 @@ #include <linux/module.h> #include <linux/kdebug.h> #include <linux/slab.h> +#include <asm/code-patching.h> #include <asm/cacheflush.h> #include <asm/sstep.h> #include <asm/uaccess.h> @@ -429,7 +430,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) case KPROBE_HIT_SSDONE: /* * We increment the nmissed count for accounting, - * we can also use npre/npostfault count for accouting + * we can also use npre/npostfault count for accounting * these specific fault cases. */ kprobes_inc_nmissed_count(cur); @@ -491,12 +492,10 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, return ret; } -#ifdef CONFIG_PPC64 unsigned long arch_deref_entry_point(void *entry) { - return ((func_descr_t *)entry)->entry; + return ppc_global_function_entry(entry); } -#endif int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) { @@ -508,8 +507,12 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) /* setup return addr to the jprobe handler routine */ regs->nip = arch_deref_entry_point(jp->entry); #ifdef CONFIG_PPC64 +#if defined(_CALL_ELF) && _CALL_ELF == 2 + regs->gpr[12] = (unsigned long)jp->entry; +#else regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc); #endif +#endif return 1; } diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index db28032e320..33aa4ddf597 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -74,7 +74,7 @@ #define KVM_INST_MTSRIN 0x7c0001e4 static bool kvm_patching_worked = true; -static char kvm_tmp[1024 * 1024]; +char kvm_tmp[1024 * 1024]; static int kvm_tmp_index; static inline void kvm_patch_ins(u32 *inst, u32 new_inst) @@ -413,13 +413,13 @@ static void kvm_map_magic_page(void *data) { u32 *features = data; - ulong in[8]; + ulong in[8] = {0}; ulong out[8]; in[0] = KVM_MAGIC_PAGE; - in[1] = KVM_MAGIC_PAGE; + in[1] = KVM_MAGIC_PAGE | MAGIC_PAGE_FLAG_NOT_MAPPED_NX; - kvm_hypercall(in, out, KVM_HCALL_TOKEN(KVM_HC_PPC_MAP_MAGIC_PAGE)); + epapr_hypercall(in, out, KVM_HCALL_TOKEN(KVM_HC_PPC_MAP_MAGIC_PAGE)); *features = out[0]; } @@ -711,43 +711,6 @@ static void kvm_use_magic_page(void) kvm_patching_worked ? "worked" : "failed"); } -unsigned long kvm_hypercall(unsigned long *in, - unsigned long *out, - unsigned long nr) -{ - unsigned long register r0 asm("r0"); - unsigned long register r3 asm("r3") = in[0]; - unsigned long register r4 asm("r4") = in[1]; - unsigned long register r5 asm("r5") = in[2]; - unsigned long register r6 asm("r6") = in[3]; - unsigned long register r7 asm("r7") = in[4]; - unsigned long register r8 asm("r8") = in[5]; - unsigned long register r9 asm("r9") = in[6]; - unsigned long register r10 asm("r10") = in[7]; - unsigned long register r11 asm("r11") = nr; - unsigned long register r12 asm("r12"); - - asm volatile("bl epapr_hypercall_start" - : "=r"(r0), "=r"(r3), "=r"(r4), "=r"(r5), "=r"(r6), - "=r"(r7), "=r"(r8), "=r"(r9), "=r"(r10), "=r"(r11), - "=r"(r12) - : "r"(r3), "r"(r4), "r"(r5), "r"(r6), "r"(r7), "r"(r8), - "r"(r9), "r"(r10), "r"(r11) - : "memory", "cc", "xer", "ctr", "lr"); - - out[0] = r4; - out[1] = r5; - out[2] = r6; - out[3] = r7; - out[4] = r8; - out[5] = r9; - out[6] = r10; - out[7] = r11; - - return r3; -} -EXPORT_SYMBOL_GPL(kvm_hypercall); - static __init void kvm_free_tmp(void) { free_reserved_area(&kvm_tmp[kvm_tmp_index], diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index 22e88dd2f34..936258881c9 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -35,7 +35,7 @@ static struct legacy_serial_info { phys_addr_t taddr; } legacy_serial_infos[MAX_LEGACY_SERIAL_PORTS]; -static struct __initdata of_device_id legacy_serial_parents[] = { +static struct of_device_id legacy_serial_parents[] __initdata = { {.type = "soc",}, {.type = "tsi-bridge",}, {.type = "opb", }, @@ -48,6 +48,9 @@ static struct __initdata of_device_id legacy_serial_parents[] = { static unsigned int legacy_serial_count; static int legacy_serial_console = -1; +static const upf_t legacy_port_flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | + UPF_SHARE_IRQ | UPF_FIXED_PORT; + static unsigned int tsi_serial_in(struct uart_port *p, int offset) { unsigned int tmp; @@ -71,8 +74,9 @@ static int __init add_legacy_port(struct device_node *np, int want_index, phys_addr_t taddr, unsigned long irq, upf_t flags, int irq_check_parent) { - const __be32 *clk, *spd; + const __be32 *clk, *spd, *rs; u32 clock = BASE_BAUD * 16; + u32 shift = 0; int index; /* get clock freq. if present */ @@ -83,6 +87,11 @@ static int __init add_legacy_port(struct device_node *np, int want_index, /* get default speed if present */ spd = of_get_property(np, "current-speed", NULL); + /* get register shift if present */ + rs = of_get_property(np, "reg-shift", NULL); + if (rs && *rs) + shift = be32_to_cpup(rs); + /* If we have a location index, then try to use it */ if (want_index >= 0 && want_index < MAX_LEGACY_SERIAL_PORTS) index = want_index; @@ -126,6 +135,7 @@ static int __init add_legacy_port(struct device_node *np, int want_index, legacy_serial_ports[index].uartclk = clock; legacy_serial_ports[index].irq = irq; legacy_serial_ports[index].flags = flags; + legacy_serial_ports[index].regshift = shift; legacy_serial_infos[index].taddr = taddr; legacy_serial_infos[index].np = of_node_get(np); legacy_serial_infos[index].clock = clock; @@ -153,8 +163,6 @@ static int __init add_legacy_soc_port(struct device_node *np, { u64 addr; const __be32 *addrp; - upf_t flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_SHARE_IRQ - | UPF_FIXED_PORT; struct device_node *tsi = of_get_parent(np); /* We only support ports that have a clock frequency properly @@ -163,9 +171,8 @@ static int __init add_legacy_soc_port(struct device_node *np, if (of_get_property(np, "clock-frequency", NULL) == NULL) return -1; - /* if reg-shift or offset, don't try to use it */ - if ((of_get_property(np, "reg-shift", NULL) != NULL) || - (of_get_property(np, "reg-offset", NULL) != NULL)) + /* if reg-offset don't try to use it */ + if ((of_get_property(np, "reg-offset", NULL) != NULL)) return -1; /* if rtas uses this device, don't try to use it as well */ @@ -185,9 +192,11 @@ static int __init add_legacy_soc_port(struct device_node *np, * IO port value. It will be fixed up later along with the irq */ if (tsi && !strcmp(tsi->type, "tsi-bridge")) - return add_legacy_port(np, -1, UPIO_TSI, addr, addr, NO_IRQ, flags, 0); + return add_legacy_port(np, -1, UPIO_TSI, addr, addr, + NO_IRQ, legacy_port_flags, 0); else - return add_legacy_port(np, -1, UPIO_MEM, addr, addr, NO_IRQ, flags, 0); + return add_legacy_port(np, -1, UPIO_MEM, addr, addr, + NO_IRQ, legacy_port_flags, 0); } static int __init add_legacy_isa_port(struct device_node *np, @@ -233,7 +242,7 @@ static int __init add_legacy_isa_port(struct device_node *np, /* Add port, irq will be dealt with later */ return add_legacy_port(np, index, UPIO_PORT, be32_to_cpu(reg[1]), - taddr, NO_IRQ, UPF_BOOT_AUTOCONF, 0); + taddr, NO_IRQ, legacy_port_flags, 0); } @@ -306,7 +315,7 @@ static int __init add_legacy_pci_port(struct device_node *np, * IO port value. It will be fixed up later along with the irq */ return add_legacy_port(np, index, iotype, base, addr, NO_IRQ, - UPF_BOOT_AUTOCONF, np != pci_dev); + legacy_port_flags, np != pci_dev); } #endif @@ -315,17 +324,20 @@ static void __init setup_legacy_serial_console(int console) struct legacy_serial_info *info = &legacy_serial_infos[console]; struct plat_serial8250_port *port = &legacy_serial_ports[console]; void __iomem *addr; + unsigned int stride; + + stride = 1 << port->regshift; /* Check if a translated MMIO address has been found */ if (info->taddr) { addr = ioremap(info->taddr, 0x1000); if (addr == NULL) return; - udbg_uart_init_mmio(addr, 1); + udbg_uart_init_mmio(addr, stride); } else { /* Check if it's PIO and we support untranslated PIO */ if (port->iotype == UPIO_PORT && isa_io_special) - udbg_uart_init_pio(port->iobase, 1); + udbg_uart_init_pio(port->iobase, stride); else return; } diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c index e1ec57e87b3..015ae55c186 100644 --- a/arch/powerpc/kernel/machine_kexec.c +++ b/arch/powerpc/kernel/machine_kexec.c @@ -18,6 +18,7 @@ #include <linux/ftrace.h> #include <asm/machdep.h> +#include <asm/pgalloc.h> #include <asm/prom.h> #include <asm/sections.h> @@ -75,6 +76,17 @@ void arch_crash_save_vmcoreinfo(void) #ifndef CONFIG_NEED_MULTIPLE_NODES VMCOREINFO_SYMBOL(contig_page_data); #endif +#if defined(CONFIG_PPC64) && defined(CONFIG_SPARSEMEM_VMEMMAP) + VMCOREINFO_SYMBOL(vmemmap_list); + VMCOREINFO_SYMBOL(mmu_vmemmap_psize); + VMCOREINFO_SYMBOL(mmu_psize_defs); + VMCOREINFO_STRUCT_SIZE(vmemmap_backing); + VMCOREINFO_OFFSET(vmemmap_backing, list); + VMCOREINFO_OFFSET(vmemmap_backing, phys); + VMCOREINFO_OFFSET(vmemmap_backing, virt_addr); + VMCOREINFO_STRUCT_SIZE(mmu_psize_def); + VMCOREINFO_OFFSET(mmu_psize_def, shift); +#endif } /* @@ -136,7 +148,7 @@ void __init reserve_crashkernel(void) * a small SLB (128MB) since the crash kernel needs to place * itself and some stacks to be in the first segment. */ - crashk_res.start = min(0x80000000ULL, (ppc64_rma_size / 2)); + crashk_res.start = min(0x8000000ULL, (ppc64_rma_size / 2)); #else crashk_res.start = KDUMP_KERNELBASE; #endif @@ -184,7 +196,9 @@ int overlaps_crashkernel(unsigned long start, unsigned long size) /* Values we need to export to the second kernel via the device tree. */ static phys_addr_t kernel_end; +static phys_addr_t crashk_base; static phys_addr_t crashk_size; +static unsigned long long mem_limit; static struct property kernel_end_prop = { .name = "linux,kernel-end", @@ -195,7 +209,7 @@ static struct property kernel_end_prop = { static struct property crashk_base_prop = { .name = "linux,crashkernel-base", .length = sizeof(phys_addr_t), - .value = &crashk_res.start, + .value = &crashk_base }; static struct property crashk_size_prop = { @@ -207,9 +221,11 @@ static struct property crashk_size_prop = { static struct property memory_limit_prop = { .name = "linux,memory-limit", .length = sizeof(unsigned long long), - .value = &memory_limit, + .value = &mem_limit, }; +#define cpu_to_be_ulong __PASTE(cpu_to_be, BITS_PER_LONG) + static void __init export_crashk_values(struct device_node *node) { struct property *prop; @@ -225,8 +241,9 @@ static void __init export_crashk_values(struct device_node *node) of_remove_property(node, prop); if (crashk_res.start != 0) { + crashk_base = cpu_to_be_ulong(crashk_res.start), of_add_property(node, &crashk_base_prop); - crashk_size = resource_size(&crashk_res); + crashk_size = cpu_to_be_ulong(resource_size(&crashk_res)); of_add_property(node, &crashk_size_prop); } @@ -234,6 +251,7 @@ static void __init export_crashk_values(struct device_node *node) * memory_limit is required by the kexec-tools to limit the * crash regions to the actual memory used. */ + mem_limit = cpu_to_be_ulong(memory_limit); of_update_property(node, &memory_limit_prop); } @@ -252,7 +270,7 @@ static int __init kexec_setup(void) of_remove_property(node, prop); /* information needed by userspace when using default_machine_kexec */ - kernel_end = __pa(_end); + kernel_end = cpu_to_be_ulong(__pa(_end)); of_add_property(node, &kernel_end_prop); export_crashk_values(node); diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index 611acdf3009..879b3aacac3 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -237,7 +237,7 @@ static void wake_offline_cpus(void) if (!cpu_online(cpu)) { printk(KERN_INFO "kexec: Waking offline cpu %d.\n", cpu); - cpu_up(cpu); + WARN_ON(cpu_up(cpu)); } } } @@ -312,7 +312,7 @@ static union thread_union kexec_stack __init_task_data = */ struct paca_struct kexec_paca; -/* Our assembly helper, in kexec_stub.S */ +/* Our assembly helper, in misc_64.S */ extern void kexec_sequence(void *newstack, unsigned long start, void *image, void *control, void (*clear_all)(void)) __noreturn; @@ -369,6 +369,7 @@ void default_machine_kexec(struct kimage *image) /* Values we need to export to the second kernel via the device tree. */ static unsigned long htab_base; +static unsigned long htab_size; static struct property htab_base_prop = { .name = "linux,htab-base", @@ -379,7 +380,7 @@ static struct property htab_base_prop = { static struct property htab_size_prop = { .name = "linux,htab-size", .length = sizeof(unsigned long), - .value = &htab_size_bytes, + .value = &htab_size, }; static int __init export_htab_values(void) @@ -403,8 +404,9 @@ static int __init export_htab_values(void) if (prop) of_remove_property(node, prop); - htab_base = __pa(htab_address); + htab_base = cpu_to_be64(__pa(htab_address)); of_add_property(node, &htab_base_prop); + htab_size = cpu_to_be64(htab_size_bytes); of_add_property(node, &htab_size_prop); of_node_put(node); diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c new file mode 100644 index 00000000000..a7fd4cb78b7 --- /dev/null +++ b/arch/powerpc/kernel/mce.c @@ -0,0 +1,352 @@ +/* + * Machine check exception handling. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright 2013 IBM Corporation + * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> + */ + +#undef DEBUG +#define pr_fmt(fmt) "mce: " fmt + +#include <linux/types.h> +#include <linux/ptrace.h> +#include <linux/percpu.h> +#include <linux/export.h> +#include <linux/irq_work.h> +#include <asm/mce.h> + +static DEFINE_PER_CPU(int, mce_nest_count); +static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event); + +/* Queue for delayed MCE events. */ +static DEFINE_PER_CPU(int, mce_queue_count); +static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue); + +static void machine_check_process_queued_event(struct irq_work *work); +struct irq_work mce_event_process_work = { + .func = machine_check_process_queued_event, +}; + +static void mce_set_error_info(struct machine_check_event *mce, + struct mce_error_info *mce_err) +{ + mce->error_type = mce_err->error_type; + switch (mce_err->error_type) { + case MCE_ERROR_TYPE_UE: + mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type; + break; + case MCE_ERROR_TYPE_SLB: + mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type; + break; + case MCE_ERROR_TYPE_ERAT: + mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type; + break; + case MCE_ERROR_TYPE_TLB: + mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type; + break; + case MCE_ERROR_TYPE_UNKNOWN: + default: + break; + } +} + +/* + * Decode and save high level MCE information into per cpu buffer which + * is an array of machine_check_event structure. + */ +void save_mce_event(struct pt_regs *regs, long handled, + struct mce_error_info *mce_err, + uint64_t nip, uint64_t addr) +{ + uint64_t srr1; + int index = __get_cpu_var(mce_nest_count)++; + struct machine_check_event *mce = &__get_cpu_var(mce_event[index]); + + /* + * Return if we don't have enough space to log mce event. + * mce_nest_count may go beyond MAX_MC_EVT but that's ok, + * the check below will stop buffer overrun. + */ + if (index >= MAX_MC_EVT) + return; + + /* Populate generic machine check info */ + mce->version = MCE_V1; + mce->srr0 = nip; + mce->srr1 = regs->msr; + mce->gpr3 = regs->gpr[3]; + mce->in_use = 1; + + mce->initiator = MCE_INITIATOR_CPU; + if (handled) + mce->disposition = MCE_DISPOSITION_RECOVERED; + else + mce->disposition = MCE_DISPOSITION_NOT_RECOVERED; + mce->severity = MCE_SEV_ERROR_SYNC; + + srr1 = regs->msr; + + /* + * Populate the mce error_type and type-specific error_type. + */ + mce_set_error_info(mce, mce_err); + + if (!addr) + return; + + if (mce->error_type == MCE_ERROR_TYPE_TLB) { + mce->u.tlb_error.effective_address_provided = true; + mce->u.tlb_error.effective_address = addr; + } else if (mce->error_type == MCE_ERROR_TYPE_SLB) { + mce->u.slb_error.effective_address_provided = true; + mce->u.slb_error.effective_address = addr; + } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) { + mce->u.erat_error.effective_address_provided = true; + mce->u.erat_error.effective_address = addr; + } else if (mce->error_type == MCE_ERROR_TYPE_UE) { + mce->u.ue_error.effective_address_provided = true; + mce->u.ue_error.effective_address = addr; + } + return; +} + +/* + * get_mce_event: + * mce Pointer to machine_check_event structure to be filled. + * release Flag to indicate whether to free the event slot or not. + * 0 <= do not release the mce event. Caller will invoke + * release_mce_event() once event has been consumed. + * 1 <= release the slot. + * + * return 1 = success + * 0 = failure + * + * get_mce_event() will be called by platform specific machine check + * handle routine and in KVM. + * When we call get_mce_event(), we are still in interrupt context and + * preemption will not be scheduled until ret_from_expect() routine + * is called. + */ +int get_mce_event(struct machine_check_event *mce, bool release) +{ + int index = __get_cpu_var(mce_nest_count) - 1; + struct machine_check_event *mc_evt; + int ret = 0; + + /* Sanity check */ + if (index < 0) + return ret; + + /* Check if we have MCE info to process. */ + if (index < MAX_MC_EVT) { + mc_evt = &__get_cpu_var(mce_event[index]); + /* Copy the event structure and release the original */ + if (mce) + *mce = *mc_evt; + if (release) + mc_evt->in_use = 0; + ret = 1; + } + /* Decrement the count to free the slot. */ + if (release) + __get_cpu_var(mce_nest_count)--; + + return ret; +} + +void release_mce_event(void) +{ + get_mce_event(NULL, true); +} + +/* + * Queue up the MCE event which then can be handled later. + */ +void machine_check_queue_event(void) +{ + int index; + struct machine_check_event evt; + + if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) + return; + + index = __get_cpu_var(mce_queue_count)++; + /* If queue is full, just return for now. */ + if (index >= MAX_MC_EVT) { + __get_cpu_var(mce_queue_count)--; + return; + } + __get_cpu_var(mce_event_queue[index]) = evt; + + /* Queue irq work to process this event later. */ + irq_work_queue(&mce_event_process_work); +} + +/* + * process pending MCE event from the mce event queue. This function will be + * called during syscall exit. + */ +static void machine_check_process_queued_event(struct irq_work *work) +{ + int index; + + /* + * For now just print it to console. + * TODO: log this error event to FSP or nvram. + */ + while (__get_cpu_var(mce_queue_count) > 0) { + index = __get_cpu_var(mce_queue_count) - 1; + machine_check_print_event_info( + &__get_cpu_var(mce_event_queue[index])); + __get_cpu_var(mce_queue_count)--; + } +} + +void machine_check_print_event_info(struct machine_check_event *evt) +{ + const char *level, *sevstr, *subtype; + static const char *mc_ue_types[] = { + "Indeterminate", + "Instruction fetch", + "Page table walk ifetch", + "Load/Store", + "Page table walk Load/Store", + }; + static const char *mc_slb_types[] = { + "Indeterminate", + "Parity", + "Multihit", + }; + static const char *mc_erat_types[] = { + "Indeterminate", + "Parity", + "Multihit", + }; + static const char *mc_tlb_types[] = { + "Indeterminate", + "Parity", + "Multihit", + }; + + /* Print things out */ + if (evt->version != MCE_V1) { + pr_err("Machine Check Exception, Unknown event version %d !\n", + evt->version); + return; + } + switch (evt->severity) { + case MCE_SEV_NO_ERROR: + level = KERN_INFO; + sevstr = "Harmless"; + break; + case MCE_SEV_WARNING: + level = KERN_WARNING; + sevstr = ""; + break; + case MCE_SEV_ERROR_SYNC: + level = KERN_ERR; + sevstr = "Severe"; + break; + case MCE_SEV_FATAL: + default: + level = KERN_ERR; + sevstr = "Fatal"; + break; + } + + printk("%s%s Machine check interrupt [%s]\n", level, sevstr, + evt->disposition == MCE_DISPOSITION_RECOVERED ? + "Recovered" : "[Not recovered"); + printk("%s Initiator: %s\n", level, + evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown"); + switch (evt->error_type) { + case MCE_ERROR_TYPE_UE: + subtype = evt->u.ue_error.ue_error_type < + ARRAY_SIZE(mc_ue_types) ? + mc_ue_types[evt->u.ue_error.ue_error_type] + : "Unknown"; + printk("%s Error type: UE [%s]\n", level, subtype); + if (evt->u.ue_error.effective_address_provided) + printk("%s Effective address: %016llx\n", + level, evt->u.ue_error.effective_address); + if (evt->u.ue_error.physical_address_provided) + printk("%s Physial address: %016llx\n", + level, evt->u.ue_error.physical_address); + break; + case MCE_ERROR_TYPE_SLB: + subtype = evt->u.slb_error.slb_error_type < + ARRAY_SIZE(mc_slb_types) ? + mc_slb_types[evt->u.slb_error.slb_error_type] + : "Unknown"; + printk("%s Error type: SLB [%s]\n", level, subtype); + if (evt->u.slb_error.effective_address_provided) + printk("%s Effective address: %016llx\n", + level, evt->u.slb_error.effective_address); + break; + case MCE_ERROR_TYPE_ERAT: + subtype = evt->u.erat_error.erat_error_type < + ARRAY_SIZE(mc_erat_types) ? + mc_erat_types[evt->u.erat_error.erat_error_type] + : "Unknown"; + printk("%s Error type: ERAT [%s]\n", level, subtype); + if (evt->u.erat_error.effective_address_provided) + printk("%s Effective address: %016llx\n", + level, evt->u.erat_error.effective_address); + break; + case MCE_ERROR_TYPE_TLB: + subtype = evt->u.tlb_error.tlb_error_type < + ARRAY_SIZE(mc_tlb_types) ? + mc_tlb_types[evt->u.tlb_error.tlb_error_type] + : "Unknown"; + printk("%s Error type: TLB [%s]\n", level, subtype); + if (evt->u.tlb_error.effective_address_provided) + printk("%s Effective address: %016llx\n", + level, evt->u.tlb_error.effective_address); + break; + default: + case MCE_ERROR_TYPE_UNKNOWN: + printk("%s Error type: Unknown\n", level); + break; + } +} + +uint64_t get_mce_fault_addr(struct machine_check_event *evt) +{ + switch (evt->error_type) { + case MCE_ERROR_TYPE_UE: + if (evt->u.ue_error.effective_address_provided) + return evt->u.ue_error.effective_address; + break; + case MCE_ERROR_TYPE_SLB: + if (evt->u.slb_error.effective_address_provided) + return evt->u.slb_error.effective_address; + break; + case MCE_ERROR_TYPE_ERAT: + if (evt->u.erat_error.effective_address_provided) + return evt->u.erat_error.effective_address; + break; + case MCE_ERROR_TYPE_TLB: + if (evt->u.tlb_error.effective_address_provided) + return evt->u.tlb_error.effective_address; + break; + default: + case MCE_ERROR_TYPE_UNKNOWN: + break; + } + return 0; +} +EXPORT_SYMBOL(get_mce_fault_addr); diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c new file mode 100644 index 00000000000..aa9aff3d6ad --- /dev/null +++ b/arch/powerpc/kernel/mce_power.c @@ -0,0 +1,313 @@ +/* + * Machine check exception handling CPU-side for power7 and power8 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright 2013 IBM Corporation + * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> + */ + +#undef DEBUG +#define pr_fmt(fmt) "mce_power: " fmt + +#include <linux/types.h> +#include <linux/ptrace.h> +#include <asm/mmu.h> +#include <asm/mce.h> +#include <asm/machdep.h> + +/* flush SLBs and reload */ +static void flush_and_reload_slb(void) +{ + struct slb_shadow *slb; + unsigned long i, n; + + /* Invalidate all SLBs */ + asm volatile("slbmte %0,%0; slbia" : : "r" (0)); + +#ifdef CONFIG_KVM_BOOK3S_HANDLER + /* + * If machine check is hit when in guest or in transition, we will + * only flush the SLBs and continue. + */ + if (get_paca()->kvm_hstate.in_guest) + return; +#endif + + /* For host kernel, reload the SLBs from shadow SLB buffer. */ + slb = get_slb_shadow(); + if (!slb) + return; + + n = min_t(u32, be32_to_cpu(slb->persistent), SLB_MIN_SIZE); + + /* Load up the SLB entries from shadow SLB */ + for (i = 0; i < n; i++) { + unsigned long rb = be64_to_cpu(slb->save_area[i].esid); + unsigned long rs = be64_to_cpu(slb->save_area[i].vsid); + + rb = (rb & ~0xFFFul) | i; + asm volatile("slbmte %0,%1" : : "r" (rs), "r" (rb)); + } +} + +static long mce_handle_derror(uint64_t dsisr, uint64_t slb_error_bits) +{ + long handled = 1; + + /* + * flush and reload SLBs for SLB errors and flush TLBs for TLB errors. + * reset the error bits whenever we handle them so that at the end + * we can check whether we handled all of them or not. + * */ + if (dsisr & slb_error_bits) { + flush_and_reload_slb(); + /* reset error bits */ + dsisr &= ~(slb_error_bits); + } + if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) { + if (cur_cpu_spec && cur_cpu_spec->flush_tlb) + cur_cpu_spec->flush_tlb(TLBIEL_INVAL_PAGE); + /* reset error bits */ + dsisr &= ~P7_DSISR_MC_TLB_MULTIHIT_MFTLB; + } + /* Any other errors we don't understand? */ + if (dsisr & 0xffffffffUL) + handled = 0; + + return handled; +} + +static long mce_handle_derror_p7(uint64_t dsisr) +{ + return mce_handle_derror(dsisr, P7_DSISR_MC_SLB_ERRORS); +} + +static long mce_handle_common_ierror(uint64_t srr1) +{ + long handled = 0; + + switch (P7_SRR1_MC_IFETCH(srr1)) { + case 0: + break; + case P7_SRR1_MC_IFETCH_SLB_PARITY: + case P7_SRR1_MC_IFETCH_SLB_MULTIHIT: + /* flush and reload SLBs for SLB errors. */ + flush_and_reload_slb(); + handled = 1; + break; + case P7_SRR1_MC_IFETCH_TLB_MULTIHIT: + if (cur_cpu_spec && cur_cpu_spec->flush_tlb) { + cur_cpu_spec->flush_tlb(TLBIEL_INVAL_PAGE); + handled = 1; + } + break; + default: + break; + } + + return handled; +} + +static long mce_handle_ierror_p7(uint64_t srr1) +{ + long handled = 0; + + handled = mce_handle_common_ierror(srr1); + + if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) { + flush_and_reload_slb(); + handled = 1; + } + return handled; +} + +static void mce_get_common_ierror(struct mce_error_info *mce_err, uint64_t srr1) +{ + switch (P7_SRR1_MC_IFETCH(srr1)) { + case P7_SRR1_MC_IFETCH_SLB_PARITY: + mce_err->error_type = MCE_ERROR_TYPE_SLB; + mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY; + break; + case P7_SRR1_MC_IFETCH_SLB_MULTIHIT: + mce_err->error_type = MCE_ERROR_TYPE_SLB; + mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT; + break; + case P7_SRR1_MC_IFETCH_TLB_MULTIHIT: + mce_err->error_type = MCE_ERROR_TYPE_TLB; + mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT; + break; + case P7_SRR1_MC_IFETCH_UE: + case P7_SRR1_MC_IFETCH_UE_IFU_INTERNAL: + mce_err->error_type = MCE_ERROR_TYPE_UE; + mce_err->u.ue_error_type = MCE_UE_ERROR_IFETCH; + break; + case P7_SRR1_MC_IFETCH_UE_TLB_RELOAD: + mce_err->error_type = MCE_ERROR_TYPE_UE; + mce_err->u.ue_error_type = + MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH; + break; + } +} + +static void mce_get_ierror_p7(struct mce_error_info *mce_err, uint64_t srr1) +{ + mce_get_common_ierror(mce_err, srr1); + if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) { + mce_err->error_type = MCE_ERROR_TYPE_SLB; + mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE; + } +} + +static void mce_get_derror_p7(struct mce_error_info *mce_err, uint64_t dsisr) +{ + if (dsisr & P7_DSISR_MC_UE) { + mce_err->error_type = MCE_ERROR_TYPE_UE; + mce_err->u.ue_error_type = MCE_UE_ERROR_LOAD_STORE; + } else if (dsisr & P7_DSISR_MC_UE_TABLEWALK) { + mce_err->error_type = MCE_ERROR_TYPE_UE; + mce_err->u.ue_error_type = + MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE; + } else if (dsisr & P7_DSISR_MC_ERAT_MULTIHIT) { + mce_err->error_type = MCE_ERROR_TYPE_ERAT; + mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT; + } else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT) { + mce_err->error_type = MCE_ERROR_TYPE_SLB; + mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT; + } else if (dsisr & P7_DSISR_MC_SLB_PARITY_MFSLB) { + mce_err->error_type = MCE_ERROR_TYPE_SLB; + mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY; + } else if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) { + mce_err->error_type = MCE_ERROR_TYPE_TLB; + mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT; + } else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT_PARITY) { + mce_err->error_type = MCE_ERROR_TYPE_SLB; + mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE; + } +} + +static long mce_handle_ue_error(struct pt_regs *regs) +{ + long handled = 0; + + /* + * On specific SCOM read via MMIO we may get a machine check + * exception with SRR0 pointing inside opal. If that is the + * case OPAL may have recovery address to re-read SCOM data in + * different way and hence we can recover from this MC. + */ + + if (ppc_md.mce_check_early_recovery) { + if (ppc_md.mce_check_early_recovery(regs)) + handled = 1; + } + return handled; +} + +long __machine_check_early_realmode_p7(struct pt_regs *regs) +{ + uint64_t srr1, nip, addr; + long handled = 1; + struct mce_error_info mce_error_info = { 0 }; + + srr1 = regs->msr; + nip = regs->nip; + + /* + * Handle memory errors depending whether this was a load/store or + * ifetch exception. Also, populate the mce error_type and + * type-specific error_type from either SRR1 or DSISR, depending + * whether this was a load/store or ifetch exception + */ + if (P7_SRR1_MC_LOADSTORE(srr1)) { + handled = mce_handle_derror_p7(regs->dsisr); + mce_get_derror_p7(&mce_error_info, regs->dsisr); + addr = regs->dar; + } else { + handled = mce_handle_ierror_p7(srr1); + mce_get_ierror_p7(&mce_error_info, srr1); + addr = regs->nip; + } + + /* Handle UE error. */ + if (mce_error_info.error_type == MCE_ERROR_TYPE_UE) + handled = mce_handle_ue_error(regs); + + save_mce_event(regs, handled, &mce_error_info, nip, addr); + return handled; +} + +static void mce_get_ierror_p8(struct mce_error_info *mce_err, uint64_t srr1) +{ + mce_get_common_ierror(mce_err, srr1); + if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) { + mce_err->error_type = MCE_ERROR_TYPE_ERAT; + mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT; + } +} + +static void mce_get_derror_p8(struct mce_error_info *mce_err, uint64_t dsisr) +{ + mce_get_derror_p7(mce_err, dsisr); + if (dsisr & P8_DSISR_MC_ERAT_MULTIHIT_SEC) { + mce_err->error_type = MCE_ERROR_TYPE_ERAT; + mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT; + } +} + +static long mce_handle_ierror_p8(uint64_t srr1) +{ + long handled = 0; + + handled = mce_handle_common_ierror(srr1); + + if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) { + flush_and_reload_slb(); + handled = 1; + } + return handled; +} + +static long mce_handle_derror_p8(uint64_t dsisr) +{ + return mce_handle_derror(dsisr, P8_DSISR_MC_SLB_ERRORS); +} + +long __machine_check_early_realmode_p8(struct pt_regs *regs) +{ + uint64_t srr1, nip, addr; + long handled = 1; + struct mce_error_info mce_error_info = { 0 }; + + srr1 = regs->msr; + nip = regs->nip; + + if (P7_SRR1_MC_LOADSTORE(srr1)) { + handled = mce_handle_derror_p8(regs->dsisr); + mce_get_derror_p8(&mce_error_info, regs->dsisr); + addr = regs->dar; + } else { + handled = mce_handle_ierror_p8(srr1); + mce_get_ierror_p8(&mce_error_info, srr1); + addr = regs->nip; + } + + /* Handle UE error. */ + if (mce_error_info.error_type == MCE_ERROR_TYPE_UE) + handled = mce_handle_ue_error(regs); + + save_mce_event(regs, handled, &mce_error_info, nip, addr); + return handled; +} diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 777d999f563..7c6bb4b17b4 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -36,26 +36,44 @@ .text +/* + * We store the saved ksp_limit in the unused part + * of the STACK_FRAME_OVERHEAD + */ _GLOBAL(call_do_softirq) mflr r0 stw r0,4(r1) + lwz r10,THREAD+KSP_LIMIT(r2) + addi r11,r3,THREAD_INFO_GAP stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3) mr r1,r3 + stw r10,8(r1) + stw r11,THREAD+KSP_LIMIT(r2) bl __do_softirq + lwz r10,8(r1) lwz r1,0(r1) lwz r0,4(r1) + stw r10,THREAD+KSP_LIMIT(r2) mtlr r0 blr -_GLOBAL(call_handle_irq) +/* + * void call_do_irq(struct pt_regs *regs, struct thread_info *irqtp); + */ +_GLOBAL(call_do_irq) mflr r0 stw r0,4(r1) - mtctr r6 - stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r5) - mr r1,r5 - bctrl + lwz r10,THREAD+KSP_LIMIT(r2) + addi r11,r4,THREAD_INFO_GAP + stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4) + mr r1,r4 + stw r10,8(r1) + stw r11,THREAD+KSP_LIMIT(r2) + bl __do_irq + lwz r10,8(r1) lwz r1,0(r1) lwz r0,4(r1) + stw r10,THREAD+KSP_LIMIT(r2) mtlr r0 blr @@ -329,7 +347,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_UNIFIED_ID_CACHE) */ _KPROBE(flush_icache_range) BEGIN_FTR_SECTION - isync + PURGE_PREFETCHED_INS blr /* for 601, do nothing */ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) li r5,L1_CACHE_BYTES-1 @@ -433,6 +451,7 @@ _GLOBAL(invalidate_dcache_range) */ _GLOBAL(__flush_dcache_icache) BEGIN_FTR_SECTION + PURGE_PREFETCHED_INS blr END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) rlwinm r3,r3,0,0,31-PAGE_SHIFT /* Get page base address */ @@ -474,6 +493,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_44x) */ _GLOBAL(__flush_dcache_icache_phys) BEGIN_FTR_SECTION + PURGE_PREFETCHED_INS blr /* for 601, do nothing */ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) mfmsr r10 @@ -644,6 +664,20 @@ _GLOBAL(__lshrdi3) blr /* + * 64-bit comparison: __cmpdi2(s64 a, s64 b) + * Returns 0 if a < b, 1 if a == b, 2 if a > b. + */ +_GLOBAL(__cmpdi2) + cmpw r3,r5 + li r3,1 + bne 1f + cmplw r4,r6 + beqlr +1: li r3,0 + bltlr + li r3,2 + blr +/* * 64-bit comparison: __ucmpdi2(u64 a, u64 b) * Returns 0 if a < b, 1 if a == b, 2 if a > b. */ diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 971d7e78aff..4e314b90c75 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -34,20 +34,18 @@ _GLOBAL(call_do_softirq) std r0,16(r1) stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3) mr r1,r3 - bl .__do_softirq + bl __do_softirq ld r1,0(r1) ld r0,16(r1) mtlr r0 blr -_GLOBAL(call_handle_irq) - ld r8,0(r6) +_GLOBAL(call_do_irq) mflr r0 std r0,16(r1) - mtctr r8 - stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r5) - mr r1,r5 - bctrl + stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4) + mr r1,r4 + bl __do_irq ld r1,0(r1) ld r0,16(r1) mtlr r0 @@ -69,6 +67,7 @@ PPC64_CACHES: _KPROBE(flush_icache_range) BEGIN_FTR_SECTION + PURGE_PREFETCHED_INS blr END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) /* @@ -213,6 +212,11 @@ _GLOBAL(__flush_dcache_icache) * Different systems have different cache line sizes */ +BEGIN_FTR_SECTION + PURGE_PREFETCHED_INS + blr +END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) + /* Flush the dcache */ ld r7,PPC64_CACHES@toc(r2) clrrdi r3,r3,PAGE_SHIFT /* Page align */ @@ -248,8 +252,8 @@ _GLOBAL(__bswapdi2) or r3,r7,r9 blr -#if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE) +#ifdef CONFIG_PPC_EARLY_DEBUG_BOOTX _GLOBAL(rmci_on) sync isync @@ -279,6 +283,9 @@ _GLOBAL(rmci_off) isync sync blr +#endif /* CONFIG_PPC_EARLY_DEBUG_BOOTX */ + +#if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE) /* * Do an IO access in real mode @@ -499,7 +506,7 @@ _GLOBAL(kexec_smp_wait) stb r4,PACAKEXECSTATE(r13) SYNC - b .kexec_wait + b kexec_wait /* * switch to real mode (turn mmu off) @@ -569,7 +576,7 @@ _GLOBAL(kexec_sequence) /* copy dest pages, flush whole dest image */ mr r3,r29 - bl .kexec_copy_flush /* (image) */ + bl kexec_copy_flush /* (image) */ /* turn off mmu */ bl real_mode @@ -579,7 +586,7 @@ _GLOBAL(kexec_sequence) mr r4,r30 /* start, aka phys mem offset */ li r5,0x100 li r6,0 - bl .copy_and_flush /* (dest, src, copy limit, start offset) */ + bl copy_and_flush /* (dest, src, copy limit, start offset) */ 1: /* assume normal blr return */ /* release other cpus to the new kernel secondary start at 0x60 */ @@ -588,8 +595,12 @@ _GLOBAL(kexec_sequence) stw r6,kexec_flag-1b(5) /* clear out hardware hash page table and tlb */ - ld r5,0(r27) /* deref function descriptor */ - mtctr r5 +#if !defined(_CALL_ELF) || _CALL_ELF != 2 + ld r12,0(r27) /* deref function descriptor */ +#else + mr r12,r27 +#endif + mtctr r12 bctrl /* ppc_md.hpte_clear_all(void); */ /* @@ -623,3 +634,31 @@ _GLOBAL(kexec_sequence) li r5,0 blr /* image->start(physid, image->start, 0); */ #endif /* CONFIG_KEXEC */ + +#ifdef CONFIG_MODULES +#if defined(_CALL_ELF) && _CALL_ELF == 2 + +#ifdef CONFIG_MODVERSIONS +.weak __crc_TOC. +.section "___kcrctab+TOC.","a" +.globl __kcrctab_TOC. +__kcrctab_TOC.: + .llong __crc_TOC. +#endif + +/* + * Export a fake .TOC. since both modpost and depmod will complain otherwise. + * Both modpost and depmod strip the leading . so we do the same here. + */ +.section "__ksymtab_strings","a" +__kstrtab_TOC.: + .asciz "TOC." + +.section "___ksymtab+TOC.","a" +/* This symbol name is important: it's used by modpost to find exported syms */ +.globl __ksymtab_TOC. +__ksymtab_TOC.: + .llong 0 /* .value */ + .llong __kstrtab_TOC. +#endif /* ELFv2 */ +#endif /* MODULES */ diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index 2d275707f41..9547381b631 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -25,8 +25,7 @@ #include <asm/uaccess.h> #include <asm/firmware.h> #include <linux/sort.h> - -#include "setup.h" +#include <asm/setup.h> LIST_HEAD(module_bug_list); diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c index 2e3200ca485..6cff040bf45 100644 --- a/arch/powerpc/kernel/module_32.c +++ b/arch/powerpc/kernel/module_32.c @@ -26,8 +26,7 @@ #include <linux/cache.h> #include <linux/bug.h> #include <linux/sort.h> - -#include "setup.h" +#include <asm/setup.h> #if 0 #define DEBUGP printk diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 6ee59a0eb26..d807ee626af 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -22,12 +22,12 @@ #include <linux/vmalloc.h> #include <linux/ftrace.h> #include <linux/bug.h> +#include <linux/uaccess.h> #include <asm/module.h> #include <asm/firmware.h> #include <asm/code-patching.h> #include <linux/sort.h> - -#include "setup.h" +#include <asm/setup.h> /* FIXME: We don't do .init separately. To do this, we'd need to have a separate r2 value in the init and core section, and stub between @@ -42,35 +42,170 @@ #define DEBUGP(fmt , ...) #endif +#if defined(_CALL_ELF) && _CALL_ELF == 2 +#define R2_STACK_OFFSET 24 + +/* An address is simply the address of the function. */ +typedef unsigned long func_desc_t; + +static func_desc_t func_desc(unsigned long addr) +{ + return addr; +} +static unsigned long func_addr(unsigned long addr) +{ + return addr; +} +static unsigned long stub_func_addr(func_desc_t func) +{ + return func; +} + +/* PowerPC64 specific values for the Elf64_Sym st_other field. */ +#define STO_PPC64_LOCAL_BIT 5 +#define STO_PPC64_LOCAL_MASK (7 << STO_PPC64_LOCAL_BIT) +#define PPC64_LOCAL_ENTRY_OFFSET(other) \ + (((1 << (((other) & STO_PPC64_LOCAL_MASK) >> STO_PPC64_LOCAL_BIT)) >> 2) << 2) + +static unsigned int local_entry_offset(const Elf64_Sym *sym) +{ + /* sym->st_other indicates offset to local entry point + * (otherwise it will assume r12 is the address of the start + * of function and try to derive r2 from it). */ + return PPC64_LOCAL_ENTRY_OFFSET(sym->st_other); +} +#else +#define R2_STACK_OFFSET 40 + +/* An address is address of the OPD entry, which contains address of fn. */ +typedef struct ppc64_opd_entry func_desc_t; + +static func_desc_t func_desc(unsigned long addr) +{ + return *(struct ppc64_opd_entry *)addr; +} +static unsigned long func_addr(unsigned long addr) +{ + return func_desc(addr).funcaddr; +} +static unsigned long stub_func_addr(func_desc_t func) +{ + return func.funcaddr; +} +static unsigned int local_entry_offset(const Elf64_Sym *sym) +{ + return 0; +} +#endif + /* Like PPC32, we need little trampolines to do > 24-bit jumps (into the kernel itself). But on PPC64, these need to be used for every jump, actually, to reset r2 (TOC+0x8000). */ struct ppc64_stub_entry { - /* 28 byte jump instruction sequence (7 instructions) */ - unsigned char jump[28]; - unsigned char unused[4]; + /* 28 byte jump instruction sequence (7 instructions). We only + * need 6 instructions on ABIv2 but we always allocate 7 so + * so we don't have to modify the trampoline load instruction. */ + u32 jump[7]; + u32 unused; /* Data for the above code */ - struct ppc64_opd_entry opd; + func_desc_t funcdata; }; -/* We use a stub to fix up r2 (TOC ptr) and to jump to the (external) - function which may be more than 24-bits away. We could simply - patch the new r2 value and function pointer into the stub, but it's - significantly shorter to put these values at the end of the stub - code, and patch the stub address (32-bits relative to the TOC ptr, - r2) into the stub. */ -static struct ppc64_stub_entry ppc64_stub = -{ .jump = { - 0x3d, 0x82, 0x00, 0x00, /* addis r12,r2, <high> */ - 0x39, 0x8c, 0x00, 0x00, /* addi r12,r12, <low> */ +/* + * PPC64 uses 24 bit jumps, but we need to jump into other modules or + * the kernel which may be further. So we jump to a stub. + * + * For ELFv1 we need to use this to set up the new r2 value (aka TOC + * pointer). For ELFv2 it's the callee's responsibility to set up the + * new r2, but for both we need to save the old r2. + * + * We could simply patch the new r2 value and function pointer into + * the stub, but it's significantly shorter to put these values at the + * end of the stub code, and patch the stub address (32-bits relative + * to the TOC ptr, r2) into the stub. + */ + +static u32 ppc64_stub_insns[] = { + 0x3d620000, /* addis r11,r2, <high> */ + 0x396b0000, /* addi r11,r11, <low> */ /* Save current r2 value in magic place on the stack. */ - 0xf8, 0x41, 0x00, 0x28, /* std r2,40(r1) */ - 0xe9, 0x6c, 0x00, 0x20, /* ld r11,32(r12) */ - 0xe8, 0x4c, 0x00, 0x28, /* ld r2,40(r12) */ - 0x7d, 0x69, 0x03, 0xa6, /* mtctr r11 */ - 0x4e, 0x80, 0x04, 0x20 /* bctr */ -} }; + 0xf8410000|R2_STACK_OFFSET, /* std r2,R2_STACK_OFFSET(r1) */ + 0xe98b0020, /* ld r12,32(r11) */ +#if !defined(_CALL_ELF) || _CALL_ELF != 2 + /* Set up new r2 from function descriptor */ + 0xe84b0028, /* ld r2,40(r11) */ +#endif + 0x7d8903a6, /* mtctr r12 */ + 0x4e800420 /* bctr */ +}; + +#ifdef CONFIG_DYNAMIC_FTRACE + +static u32 ppc64_stub_mask[] = { + 0xffff0000, + 0xffff0000, + 0xffffffff, + 0xffffffff, +#if !defined(_CALL_ELF) || _CALL_ELF != 2 + 0xffffffff, +#endif + 0xffffffff, + 0xffffffff +}; + +bool is_module_trampoline(u32 *p) +{ + unsigned int i; + u32 insns[ARRAY_SIZE(ppc64_stub_insns)]; + + BUILD_BUG_ON(sizeof(ppc64_stub_insns) != sizeof(ppc64_stub_mask)); + + if (probe_kernel_read(insns, p, sizeof(insns))) + return -EFAULT; + + for (i = 0; i < ARRAY_SIZE(ppc64_stub_insns); i++) { + u32 insna = insns[i]; + u32 insnb = ppc64_stub_insns[i]; + u32 mask = ppc64_stub_mask[i]; + + if ((insna & mask) != (insnb & mask)) + return false; + } + + return true; +} + +int module_trampoline_target(struct module *mod, u32 *trampoline, + unsigned long *target) +{ + u32 buf[2]; + u16 upper, lower; + long offset; + void *toc_entry; + + if (probe_kernel_read(buf, trampoline, sizeof(buf))) + return -EFAULT; + + upper = buf[0] & 0xffff; + lower = buf[1] & 0xffff; + + /* perform the addis/addi, both signed */ + offset = ((short)upper << 16) + (short)lower; + + /* + * Now get the address this trampoline jumps to. This + * is always 32 bytes into our trampoline stub. + */ + toc_entry = (void *)mod->arch.toc + offset + 32; + + if (probe_kernel_read(target, toc_entry, sizeof(*target))) + return -EFAULT; + + return 0; +} + +#endif /* Count how many different 24-bit relocations (different symbol, different addend) */ @@ -173,17 +308,27 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr, return relocs * sizeof(struct ppc64_stub_entry); } +/* Still needed for ELFv2, for .TOC. */ static void dedotify_versions(struct modversion_info *vers, unsigned long size) { struct modversion_info *end; for (end = (void *)vers + size; vers < end; vers++) - if (vers->name[0] == '.') + if (vers->name[0] == '.') { memmove(vers->name, vers->name+1, strlen(vers->name)); +#ifdef ARCH_RELOCATES_KCRCTAB + /* The TOC symbol has no CRC computed. To avoid CRC + * check failing, we must force it to the expected + * value (see CRC check in module.c). + */ + if (!strcmp(vers->name, "TOC.")) + vers->crc = -(unsigned long)reloc_start; +#endif + } } -/* Undefined symbols which refer to .funcname, hack to funcname */ +/* Undefined symbols which refer to .funcname, hack to funcname (or .TOC.) */ static void dedotify(Elf64_Sym *syms, unsigned int numsyms, char *strtab) { unsigned int i; @@ -197,6 +342,24 @@ static void dedotify(Elf64_Sym *syms, unsigned int numsyms, char *strtab) } } +static Elf64_Sym *find_dot_toc(Elf64_Shdr *sechdrs, + const char *strtab, + unsigned int symindex) +{ + unsigned int i, numsyms; + Elf64_Sym *syms; + + syms = (Elf64_Sym *)sechdrs[symindex].sh_addr; + numsyms = sechdrs[symindex].sh_size / sizeof(Elf64_Sym); + + for (i = 1; i < numsyms; i++) { + if (syms[i].st_shndx == SHN_UNDEF + && strcmp(strtab + syms[i].st_name, "TOC.") == 0) + return &syms[i]; + } + return NULL; +} + int module_frob_arch_sections(Elf64_Ehdr *hdr, Elf64_Shdr *sechdrs, char *secstrings, @@ -261,16 +424,12 @@ static inline unsigned long my_r2(Elf64_Shdr *sechdrs, struct module *me) /* Patch stub to reference function and correct r2 value. */ static inline int create_stub(Elf64_Shdr *sechdrs, struct ppc64_stub_entry *entry, - struct ppc64_opd_entry *opd, + unsigned long addr, struct module *me) { - Elf64_Half *loc1, *loc2; long reladdr; - *entry = ppc64_stub; - - loc1 = (Elf64_Half *)&entry->jump[2]; - loc2 = (Elf64_Half *)&entry->jump[6]; + memcpy(entry->jump, ppc64_stub_insns, sizeof(ppc64_stub_insns)); /* Stub uses address relative to r2. */ reladdr = (unsigned long)entry - my_r2(sechdrs, me); @@ -281,35 +440,33 @@ static inline int create_stub(Elf64_Shdr *sechdrs, } DEBUGP("Stub %p get data from reladdr %li\n", entry, reladdr); - *loc1 = PPC_HA(reladdr); - *loc2 = PPC_LO(reladdr); - entry->opd.funcaddr = opd->funcaddr; - entry->opd.r2 = opd->r2; + entry->jump[0] |= PPC_HA(reladdr); + entry->jump[1] |= PPC_LO(reladdr); + entry->funcdata = func_desc(addr); return 1; } -/* Create stub to jump to function described in this OPD: we need the +/* Create stub to jump to function described in this OPD/ptr: we need the stub to set up the TOC ptr (r2) for the function. */ static unsigned long stub_for_addr(Elf64_Shdr *sechdrs, - unsigned long opdaddr, + unsigned long addr, struct module *me) { struct ppc64_stub_entry *stubs; - struct ppc64_opd_entry *opd = (void *)opdaddr; unsigned int i, num_stubs; num_stubs = sechdrs[me->arch.stubs_section].sh_size / sizeof(*stubs); /* Find this stub, or if that fails, the next avail. entry */ stubs = (void *)sechdrs[me->arch.stubs_section].sh_addr; - for (i = 0; stubs[i].opd.funcaddr; i++) { + for (i = 0; stub_func_addr(stubs[i].funcdata); i++) { BUG_ON(i >= num_stubs); - if (stubs[i].opd.funcaddr == opd->funcaddr) + if (stub_func_addr(stubs[i].funcdata) == func_addr(addr)) return (unsigned long)&stubs[i]; } - if (!create_stub(sechdrs, &stubs[i], opd, me)) + if (!create_stub(sechdrs, &stubs[i], addr, me)) return 0; return (unsigned long)&stubs[i]; @@ -324,7 +481,8 @@ static int restore_r2(u32 *instruction, struct module *me) me->name, *instruction); return 0; } - *instruction = 0xe8410028; /* ld r2,40(r1) */ + /* ld r2,R2_STACK_OFFSET(r1) */ + *instruction = 0xe8410000 | R2_STACK_OFFSET; return 1; } @@ -342,6 +500,17 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, DEBUGP("Applying ADD relocate section %u to %u\n", relsec, sechdrs[relsec].sh_info); + + /* First time we're called, we can fix up .TOC. */ + if (!me->arch.toc_fixed) { + sym = find_dot_toc(sechdrs, strtab, symindex); + /* It's theoretically possible that a module doesn't want a + * .TOC. so don't fail it just for that. */ + if (sym) + sym->st_value = my_r2(sechdrs, me); + me->arch.toc_fixed = true; + } + for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rela); i++) { /* This is where to make the change */ location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr @@ -438,7 +607,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, return -ENOENT; if (!restore_r2((u32 *)location + 1, me)) return -ENOEXEC; - } + } else + value += local_entry_offset(sym); /* Convert value to relative */ value -= (unsigned long)location; @@ -459,6 +629,31 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, *location = value - (unsigned long)location; break; + case R_PPC64_TOCSAVE: + /* + * Marker reloc indicates we don't have to save r2. + * That would only save us one instruction, so ignore + * it. + */ + break; + + case R_PPC64_REL16_HA: + /* Subtract location pointer */ + value -= (unsigned long)location; + value = ((value + 0x8000) >> 16); + *((uint16_t *) location) + = (*((uint16_t *) location) & ~0xffff) + | (value & 0xffff); + break; + + case R_PPC64_REL16_LO: + /* Subtract location pointer */ + value -= (unsigned long)location; + *((uint16_t *) location) + = (*((uint16_t *) location) & ~0xffff) + | (value & 0xffff); + break; + default: printk("%s: Unknown ADD relocation: %lu\n", me->name, diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index 8213ee1eb05..28b898e6818 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -210,7 +210,7 @@ static void __init nvram_print_partitions(char * label) printk(KERN_WARNING "--------%s---------\n", label); printk(KERN_WARNING "indx\t\tsig\tchks\tlen\tname\n"); list_for_each_entry(tmp_part, &nvram_partitions, partition) { - printk(KERN_WARNING "%4d \t%02x\t%02x\t%d\t%12s\n", + printk(KERN_WARNING "%4d \t%02x\t%02x\t%d\t%12.12s\n", tmp_part->index, tmp_part->header.signature, tmp_part->header.checksum, tmp_part->header.length, tmp_part->header.name); @@ -223,9 +223,13 @@ static int __init nvram_write_header(struct nvram_partition * part) { loff_t tmp_index; int rc; - + struct nvram_header phead; + + memcpy(&phead, &part->header, NVRAM_HEADER_LEN); + phead.length = cpu_to_be16(phead.length); + tmp_index = part->index; - rc = ppc_md.nvram_write((char *)&part->header, NVRAM_HEADER_LEN, &tmp_index); + rc = ppc_md.nvram_write((char *)&phead, NVRAM_HEADER_LEN, &tmp_index); return rc; } @@ -505,6 +509,8 @@ int __init nvram_scan_partitions(void) memcpy(&phead, header, NVRAM_HEADER_LEN); + phead.length = be16_to_cpu(phead.length); + err = 0; c_sum = nvram_checksum(&phead); if (c_sum != phead.checksum) { diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 3fc16e3beb9..d6e195e8cd4 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -46,7 +46,7 @@ struct lppaca lppaca[] = { static struct lppaca *extra_lppacas; static long __initdata lppaca_size; -static void allocate_lppacas(int nr_cpus, unsigned long limit) +static void __init allocate_lppacas(int nr_cpus, unsigned long limit) { if (nr_cpus <= NR_LPPACAS) return; @@ -57,7 +57,7 @@ static void allocate_lppacas(int nr_cpus, unsigned long limit) PAGE_SIZE, limit)); } -static struct lppaca *new_lppaca(int cpu) +static struct lppaca * __init new_lppaca(int cpu) { struct lppaca *lp; @@ -70,7 +70,7 @@ static struct lppaca *new_lppaca(int cpu) return lp; } -static void free_lppacas(void) +static void __init free_lppacas(void) { long new_size = 0, nr; @@ -98,13 +98,32 @@ static inline void free_lppacas(void) { } /* * 3 persistent SLBs are registered here. The buffer will be zero * initially, hence will all be invaild until we actually write them. + * + * If you make the number of persistent SLB entries dynamic, please also + * update PR KVM to flush and restore them accordingly. */ -struct slb_shadow slb_shadow[] __cacheline_aligned = { - [0 ... (NR_CPUS-1)] = { - .persistent = cpu_to_be32(SLB_NUM_BOLTED), - .buffer_length = cpu_to_be32(sizeof(struct slb_shadow)), - }, -}; +static struct slb_shadow *slb_shadow; + +static void __init allocate_slb_shadows(int nr_cpus, int limit) +{ + int size = PAGE_ALIGN(sizeof(struct slb_shadow) * nr_cpus); + slb_shadow = __va(memblock_alloc_base(size, PAGE_SIZE, limit)); + memset(slb_shadow, 0, size); +} + +static struct slb_shadow * __init init_slb_shadow(int cpu) +{ + struct slb_shadow *s = &slb_shadow[cpu]; + + s->persistent = cpu_to_be32(SLB_NUM_BOLTED); + s->buffer_length = cpu_to_be32(sizeof(*s)); + + return s; +} + +#else /* CONFIG_PPC_STD_MMU_64 */ + +static void __init allocate_slb_shadows(int nr_cpus, int limit) { } #endif /* CONFIG_PPC_STD_MMU_64 */ @@ -136,14 +155,20 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu) new_paca->paca_index = cpu; new_paca->kernel_toc = kernel_toc; new_paca->kernelbase = (unsigned long) _stext; - new_paca->kernel_msr = MSR_KERNEL; + /* Only set MSR:IR/DR when MMU is initialized */ + new_paca->kernel_msr = MSR_KERNEL & ~(MSR_IR | MSR_DR); new_paca->hw_cpu_id = 0xffff; new_paca->kexec_state = KEXEC_STATE_NONE; new_paca->__current = &init_task; new_paca->data_offset = 0xfeeeeeeeeeeeeeeeULL; #ifdef CONFIG_PPC_STD_MMU_64 - new_paca->slb_shadow_ptr = &slb_shadow[cpu]; + new_paca->slb_shadow_ptr = init_slb_shadow(cpu); #endif /* CONFIG_PPC_STD_MMU_64 */ + +#ifdef CONFIG_PPC_BOOK3E + /* For now -- if we have threads this will be adjusted later */ + new_paca->tcd_ptr = &new_paca->tcd; +#endif } /* Put the paca pointer into r13 and SPRG_PACA */ @@ -190,6 +215,8 @@ void __init allocate_pacas(void) allocate_lppacas(nr_cpu_ids, limit); + allocate_slb_shadows(nr_cpu_ids, limit); + /* Can't use for_each_*_cpu, as they aren't functional yet */ for (cpu = 0; cpu < nr_cpu_ids; cpu++) initialise_paca(&paca[cpu], cpu); diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 905a24bb7ac..b49c72fd7f1 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -21,6 +21,7 @@ #include <linux/string.h> #include <linux/init.h> #include <linux/bootmem.h> +#include <linux/delay.h> #include <linux/export.h> #include <linux/of_address.h> #include <linux/of_pci.h> @@ -120,6 +121,25 @@ resource_size_t pcibios_window_alignment(struct pci_bus *bus, return 1; } +void pcibios_reset_secondary_bus(struct pci_dev *dev) +{ + u16 ctrl; + + if (ppc_md.pcibios_reset_secondary_bus) { + ppc_md.pcibios_reset_secondary_bus(dev); + return; + } + + pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &ctrl); + ctrl |= PCI_BRIDGE_CTL_BUS_RESET; + pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl); + msleep(2); + + ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET; + pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl); + ssleep(1); +} + static resource_size_t pcibios_io_size(const struct pci_controller *hose) { #ifdef CONFIG_PPC64 @@ -201,26 +221,6 @@ struct pci_controller* pci_find_hose_for_OF_device(struct device_node* node) return NULL; } -static ssize_t pci_show_devspec(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct pci_dev *pdev; - struct device_node *np; - - pdev = to_pci_dev (dev); - np = pci_device_to_OF_node(pdev); - if (np == NULL || np->full_name == NULL) - return 0; - return sprintf(buf, "%s", np->full_name); -} -static DEVICE_ATTR(devspec, S_IRUGO, pci_show_devspec, NULL); - -/* Add sysfs properties */ -int pcibios_add_platform_entries(struct pci_dev *pdev) -{ - return device_create_file(&pdev->dev, &dev_attr_devspec); -} - /* * Reads the interrupt pin to determine if interrupt is use by card. * If the interrupt is used, then gets the interrupt line from the @@ -228,7 +228,7 @@ int pcibios_add_platform_entries(struct pci_dev *pdev) */ static int pci_read_irq_line(struct pci_dev *pci_dev) { - struct of_irq oirq; + struct of_phandle_args oirq; unsigned int virq; pr_debug("PCI: Try to map irq for %s...\n", pci_name(pci_dev)); @@ -237,7 +237,7 @@ static int pci_read_irq_line(struct pci_dev *pci_dev) memset(&oirq, 0xff, sizeof(oirq)); #endif /* Try to get a mapping from the device-tree */ - if (of_irq_map_pci(pci_dev, &oirq)) { + if (of_irq_parse_pci(pci_dev, &oirq)) { u8 line, pin; /* If that fails, lets fallback to what is in the config @@ -263,11 +263,10 @@ static int pci_read_irq_line(struct pci_dev *pci_dev) irq_set_irq_type(virq, IRQ_TYPE_LEVEL_LOW); } else { pr_debug(" Got one, spec %d cells (0x%08x 0x%08x...) on %s\n", - oirq.size, oirq.specifier[0], oirq.specifier[1], - of_node_full_name(oirq.controller)); + oirq.args_count, oirq.args[0], oirq.args[1], + of_node_full_name(oirq.np)); - virq = irq_create_of_mapping(oirq.controller, oirq.specifier, - oirq.size); + virq = irq_create_of_mapping(&oirq); } if(virq == NO_IRQ) { pr_debug(" Failed to map !\n"); @@ -667,60 +666,36 @@ void pci_resource_to_user(const struct pci_dev *dev, int bar, void pci_process_bridge_OF_ranges(struct pci_controller *hose, struct device_node *dev, int primary) { - const __be32 *ranges; - int rlen; - int pna = of_n_addr_cells(dev); - int np = pna + 5; int memno = 0; - u32 pci_space; - unsigned long long pci_addr, cpu_addr, pci_next, cpu_next, size; struct resource *res; + struct of_pci_range range; + struct of_pci_range_parser parser; printk(KERN_INFO "PCI host bridge %s %s ranges:\n", dev->full_name, primary ? "(primary)" : ""); - /* Get ranges property */ - ranges = of_get_property(dev, "ranges", &rlen); - if (ranges == NULL) + /* Check for ranges property */ + if (of_pci_range_parser_init(&parser, dev)) return; /* Parse it */ - while ((rlen -= np * 4) >= 0) { - /* Read next ranges element */ - pci_space = of_read_number(ranges, 1); - pci_addr = of_read_number(ranges + 1, 2); - cpu_addr = of_translate_address(dev, ranges + 3); - size = of_read_number(ranges + pna + 3, 2); - ranges += np; - + for_each_of_pci_range(&parser, &range) { /* If we failed translation or got a zero-sized region * (some FW try to feed us with non sensical zero sized regions * such as power3 which look like some kind of attempt at exposing * the VGA memory hole) */ - if (cpu_addr == OF_BAD_ADDR || size == 0) + if (range.cpu_addr == OF_BAD_ADDR || range.size == 0) continue; - /* Now consume following elements while they are contiguous */ - for (; rlen >= np * sizeof(u32); - ranges += np, rlen -= np * 4) { - if (of_read_number(ranges, 1) != pci_space) - break; - pci_next = of_read_number(ranges + 1, 2); - cpu_next = of_translate_address(dev, ranges + 3); - if (pci_next != pci_addr + size || - cpu_next != cpu_addr + size) - break; - size += of_read_number(ranges + pna + 3, 2); - } - /* Act based on address space type */ res = NULL; - switch ((pci_space >> 24) & 0x3) { - case 1: /* PCI IO space */ + switch (range.flags & IORESOURCE_TYPE_BITS) { + case IORESOURCE_IO: printk(KERN_INFO " IO 0x%016llx..0x%016llx -> 0x%016llx\n", - cpu_addr, cpu_addr + size - 1, pci_addr); + range.cpu_addr, range.cpu_addr + range.size - 1, + range.pci_addr); /* We support only one IO range */ if (hose->pci_io_size) { @@ -730,11 +705,12 @@ void pci_process_bridge_OF_ranges(struct pci_controller *hose, } #ifdef CONFIG_PPC32 /* On 32 bits, limit I/O space to 16MB */ - if (size > 0x01000000) - size = 0x01000000; + if (range.size > 0x01000000) + range.size = 0x01000000; /* 32 bits needs to map IOs here */ - hose->io_base_virt = ioremap(cpu_addr, size); + hose->io_base_virt = ioremap(range.cpu_addr, + range.size); /* Expect trouble if pci_addr is not 0 */ if (primary) @@ -744,20 +720,20 @@ void pci_process_bridge_OF_ranges(struct pci_controller *hose, /* pci_io_size and io_base_phys always represent IO * space starting at 0 so we factor in pci_addr */ - hose->pci_io_size = pci_addr + size; - hose->io_base_phys = cpu_addr - pci_addr; + hose->pci_io_size = range.pci_addr + range.size; + hose->io_base_phys = range.cpu_addr - range.pci_addr; /* Build resource */ res = &hose->io_resource; - res->flags = IORESOURCE_IO; - res->start = pci_addr; + range.cpu_addr = range.pci_addr; break; - case 2: /* PCI Memory space */ - case 3: /* PCI 64 bits Memory space */ + case IORESOURCE_MEM: printk(KERN_INFO " MEM 0x%016llx..0x%016llx -> 0x%016llx %s\n", - cpu_addr, cpu_addr + size - 1, pci_addr, - (pci_space & 0x40000000) ? "Prefetch" : ""); + range.cpu_addr, range.cpu_addr + range.size - 1, + range.pci_addr, + (range.pci_space & 0x40000000) ? + "Prefetch" : ""); /* We support only 3 memory ranges */ if (memno >= 3) { @@ -766,28 +742,21 @@ void pci_process_bridge_OF_ranges(struct pci_controller *hose, continue; } /* Handles ISA memory hole space here */ - if (pci_addr == 0) { + if (range.pci_addr == 0) { if (primary || isa_mem_base == 0) - isa_mem_base = cpu_addr; - hose->isa_mem_phys = cpu_addr; - hose->isa_mem_size = size; + isa_mem_base = range.cpu_addr; + hose->isa_mem_phys = range.cpu_addr; + hose->isa_mem_size = range.size; } /* Build resource */ - hose->mem_offset[memno] = cpu_addr - pci_addr; + hose->mem_offset[memno] = range.cpu_addr - + range.pci_addr; res = &hose->mem_resources[memno++]; - res->flags = IORESOURCE_MEM; - if (pci_space & 0x40000000) - res->flags |= IORESOURCE_PREFETCH; - res->start = cpu_addr; break; } if (res != NULL) { - res->name = dev->full_name; - res->end = res->start + size - 1; - res->parent = NULL; - res->sibling = NULL; - res->child = NULL; + of_pci_range_to_resource(&range, dev, res); } } } @@ -836,7 +805,7 @@ static void pcibios_fixup_resources(struct pci_dev *dev) * at 0 as unset as well, except if PCI_PROBE_ONLY is also set * since in that case, we don't want to re-assign anything */ - pcibios_resource_to_bus(dev, ®, res); + pcibios_resource_to_bus(dev->bus, ®, res); if (pci_has_flag(PCI_REASSIGN_ALL_RSRC) || (reg.start == 0 && !pci_has_flag(PCI_PROBE_ONLY))) { /* Only print message if not re-assigning */ @@ -887,7 +856,7 @@ static int pcibios_uninitialized_bridge_resource(struct pci_bus *bus, /* Job is a bit different between memory and IO */ if (res->flags & IORESOURCE_MEM) { - pcibios_resource_to_bus(dev, ®ion, res); + pcibios_resource_to_bus(dev->bus, ®ion, res); /* If the BAR is non-0 then it's probably been initialized */ if (region.start != 0) diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c index c1e17ae68a0..5b789177aa2 100644 --- a/arch/powerpc/kernel/pci-hotplug.c +++ b/arch/powerpc/kernel/pci-hotplug.c @@ -98,8 +98,7 @@ void pcibios_add_pci_devices(struct pci_bus * bus) max = bus->busn_res.start; for (pass = 0; pass < 2; pass++) { list_for_each_entry(dev, &bus->devices, bus_list) { - if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE || - dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) + if (pci_is_bridge(dev)) max = pci_scan_bridge(bus, dev, max, pass); } diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index a9e311f7a9d..155013da27e 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -208,8 +208,7 @@ long sys_pciconfig_iobase(long which, unsigned long in_bus, unsigned long in_devfn) { struct pci_controller* hose; - struct list_head *ln; - struct pci_bus *bus = NULL; + struct pci_bus *tmp_bus, *bus = NULL; struct device_node *hose_node; /* Argh ! Please forgive me for that hack, but that's the @@ -230,11 +229,12 @@ long sys_pciconfig_iobase(long which, unsigned long in_bus, * used on pre-domains setup. We return the first match */ - for (ln = pci_root_buses.next; ln != &pci_root_buses; ln = ln->next) { - bus = pci_bus_b(ln); - if (in_bus >= bus->number && in_bus <= bus->busn_res.end) + list_for_each_entry(tmp_bus, &pci_root_buses, node) { + if (in_bus >= tmp_bus->number && + in_bus <= tmp_bus->busn_res.end) { + bus = tmp_bus; break; - bus = NULL; + } } if (bus == NULL || bus->dev.of_node == NULL) return -ENODEV; diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c index 4368ec6fdc8..44562aa97f1 100644 --- a/arch/powerpc/kernel/pci_of_scan.c +++ b/arch/powerpc/kernel/pci_of_scan.c @@ -111,7 +111,7 @@ static void of_pci_parse_addrs(struct device_node *node, struct pci_dev *dev) res->name = pci_name(dev); region.start = base; region.end = base + size - 1; - pcibios_bus_to_resource(dev, res, ®ion); + pcibios_bus_to_resource(dev->bus, res, ®ion); } } @@ -280,7 +280,7 @@ void of_scan_pci_bridge(struct pci_dev *dev) res->flags = flags; region.start = of_read_number(&ranges[1], 2); region.end = region.start + size - 1; - pcibios_bus_to_resource(dev, res, ®ion); + pcibios_bus_to_resource(dev->bus, res, ®ion); } sprintf(bus->name, "PCI Bus %04x:%02x", pci_domain_nr(bus), bus->number); @@ -302,8 +302,11 @@ static struct pci_dev *of_scan_pci_dev(struct pci_bus *bus, struct device_node *dn) { struct pci_dev *dev = NULL; - const u32 *reg; + const __be32 *reg; int reglen, devfn; +#ifdef CONFIG_EEH + struct eeh_dev *edev = of_node_to_eeh_dev(dn); +#endif pr_debug(" * %s\n", dn->full_name); if (!of_device_is_available(dn)) @@ -312,7 +315,7 @@ static struct pci_dev *of_scan_pci_dev(struct pci_bus *bus, reg = of_get_property(dn, "reg", ®len); if (reg == NULL || reglen < 20) return NULL; - devfn = (reg[0] >> 8) & 0xff; + devfn = (of_read_number(reg, 1) >> 8) & 0xff; /* Check if the PCI device is already there */ dev = pci_get_slot(bus, devfn); @@ -321,6 +324,12 @@ static struct pci_dev *of_scan_pci_dev(struct pci_bus *bus, return dev; } + /* Device removed permanently ? */ +#ifdef CONFIG_EEH + if (edev && (edev->mode & EEH_DEV_REMOVED)) + return NULL; +#endif + /* create a new pci_dev for this device */ dev = of_create_pci_dev(dn, bus, devfn); if (!dev) @@ -362,8 +371,7 @@ static void __of_scan_bus(struct device_node *node, struct pci_bus *bus, /* Now scan child busses */ list_for_each_entry(dev, &bus->devices, bus_list) { - if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE || - dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) { + if (pci_is_bridge(dev)) { of_scan_pci_bridge(dev); } } diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index 21646dbe1bb..48d17d6fca5 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -79,10 +79,12 @@ EXPORT_SYMBOL(strlen); EXPORT_SYMBOL(strcmp); EXPORT_SYMBOL(strncmp); +#ifndef CONFIG_GENERIC_CSUM EXPORT_SYMBOL(csum_partial); EXPORT_SYMBOL(csum_partial_copy_generic); EXPORT_SYMBOL(ip_fast_csum); EXPORT_SYMBOL(csum_tcpudp_magic); +#endif EXPORT_SYMBOL(__copy_tofrom_user); EXPORT_SYMBOL(__clear_user); @@ -98,9 +100,13 @@ EXPORT_SYMBOL(start_thread); #ifdef CONFIG_PPC_FPU EXPORT_SYMBOL(giveup_fpu); +EXPORT_SYMBOL(load_fp_state); +EXPORT_SYMBOL(store_fp_state); #endif #ifdef CONFIG_ALTIVEC EXPORT_SYMBOL(giveup_altivec); +EXPORT_SYMBOL(load_vr_state); +EXPORT_SYMBOL(store_vr_state); #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_VSX EXPORT_SYMBOL(giveup_vsx); @@ -114,6 +120,7 @@ EXPORT_SYMBOL(giveup_spe); EXPORT_SYMBOL(flush_instruction_cache); #endif EXPORT_SYMBOL(flush_dcache_range); +EXPORT_SYMBOL(flush_icache_range); #ifdef CONFIG_SMP #ifdef CONFIG_PPC32 @@ -143,6 +150,8 @@ EXPORT_SYMBOL(__ashldi3); EXPORT_SYMBOL(__lshrdi3); int __ucmpdi2(unsigned long long, unsigned long long); EXPORT_SYMBOL(__ucmpdi2); +int __cmpdi2(long long, long long); +EXPORT_SYMBOL(__cmpdi2); #endif long long __bswapdi2(long long); EXPORT_SYMBOL(__bswapdi2); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 6f428da53e2..be99774d3f4 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -25,7 +25,6 @@ #include <linux/slab.h> #include <linux/user.h> #include <linux/elf.h> -#include <linux/init.h> #include <linux/prctl.h> #include <linux/init_task.h> #include <linux/export.h> @@ -55,6 +54,7 @@ #ifdef CONFIG_PPC64 #include <asm/firmware.h> #endif +#include <asm/code-patching.h> #include <linux/kprobes.h> #include <linux/kdebug.h> @@ -74,6 +74,48 @@ struct task_struct *last_task_used_vsx = NULL; struct task_struct *last_task_used_spe = NULL; #endif +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +void giveup_fpu_maybe_transactional(struct task_struct *tsk) +{ + /* + * If we are saving the current thread's registers, and the + * thread is in a transactional state, set the TIF_RESTORE_TM + * bit so that we know to restore the registers before + * returning to userspace. + */ + if (tsk == current && tsk->thread.regs && + MSR_TM_ACTIVE(tsk->thread.regs->msr) && + !test_thread_flag(TIF_RESTORE_TM)) { + tsk->thread.tm_orig_msr = tsk->thread.regs->msr; + set_thread_flag(TIF_RESTORE_TM); + } + + giveup_fpu(tsk); +} + +void giveup_altivec_maybe_transactional(struct task_struct *tsk) +{ + /* + * If we are saving the current thread's registers, and the + * thread is in a transactional state, set the TIF_RESTORE_TM + * bit so that we know to restore the registers before + * returning to userspace. + */ + if (tsk == current && tsk->thread.regs && + MSR_TM_ACTIVE(tsk->thread.regs->msr) && + !test_thread_flag(TIF_RESTORE_TM)) { + tsk->thread.tm_orig_msr = tsk->thread.regs->msr; + set_thread_flag(TIF_RESTORE_TM); + } + + giveup_altivec(tsk); +} + +#else +#define giveup_fpu_maybe_transactional(tsk) giveup_fpu(tsk) +#define giveup_altivec_maybe_transactional(tsk) giveup_altivec(tsk) +#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ + #ifdef CONFIG_PPC_FPU /* * Make sure the floating-point register state in the @@ -102,13 +144,13 @@ void flush_fp_to_thread(struct task_struct *tsk) */ BUG_ON(tsk != current); #endif - giveup_fpu(tsk); + giveup_fpu_maybe_transactional(tsk); } preempt_enable(); } } EXPORT_SYMBOL_GPL(flush_fp_to_thread); -#endif +#endif /* CONFIG_PPC_FPU */ void enable_kernel_fp(void) { @@ -116,11 +158,11 @@ void enable_kernel_fp(void) #ifdef CONFIG_SMP if (current->thread.regs && (current->thread.regs->msr & MSR_FP)) - giveup_fpu(current); + giveup_fpu_maybe_transactional(current); else giveup_fpu(NULL); /* just enables FP for kernel */ #else - giveup_fpu(last_task_used_math); + giveup_fpu_maybe_transactional(last_task_used_math); #endif /* CONFIG_SMP */ } EXPORT_SYMBOL(enable_kernel_fp); @@ -132,11 +174,11 @@ void enable_kernel_altivec(void) #ifdef CONFIG_SMP if (current->thread.regs && (current->thread.regs->msr & MSR_VEC)) - giveup_altivec(current); + giveup_altivec_maybe_transactional(current); else giveup_altivec_notask(); #else - giveup_altivec(last_task_used_altivec); + giveup_altivec_maybe_transactional(last_task_used_altivec); #endif /* CONFIG_SMP */ } EXPORT_SYMBOL(enable_kernel_altivec); @@ -153,7 +195,7 @@ void flush_altivec_to_thread(struct task_struct *tsk) #ifdef CONFIG_SMP BUG_ON(tsk != current); #endif - giveup_altivec(tsk); + giveup_altivec_maybe_transactional(tsk); } preempt_enable(); } @@ -182,8 +224,8 @@ EXPORT_SYMBOL(enable_kernel_vsx); void giveup_vsx(struct task_struct *tsk) { - giveup_fpu(tsk); - giveup_altivec(tsk); + giveup_fpu_maybe_transactional(tsk); + giveup_altivec_maybe_transactional(tsk); __giveup_vsx(tsk); } @@ -314,32 +356,32 @@ static DEFINE_PER_CPU(struct arch_hw_breakpoint, current_brk); */ static void set_debug_reg_defaults(struct thread_struct *thread) { - thread->iac1 = thread->iac2 = 0; + thread->debug.iac1 = thread->debug.iac2 = 0; #if CONFIG_PPC_ADV_DEBUG_IACS > 2 - thread->iac3 = thread->iac4 = 0; + thread->debug.iac3 = thread->debug.iac4 = 0; #endif - thread->dac1 = thread->dac2 = 0; + thread->debug.dac1 = thread->debug.dac2 = 0; #if CONFIG_PPC_ADV_DEBUG_DVCS > 0 - thread->dvc1 = thread->dvc2 = 0; + thread->debug.dvc1 = thread->debug.dvc2 = 0; #endif - thread->dbcr0 = 0; + thread->debug.dbcr0 = 0; #ifdef CONFIG_BOOKE /* * Force User/Supervisor bits to b11 (user-only MSR[PR]=1) */ - thread->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US | \ + thread->debug.dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US | DBCR1_IAC3US | DBCR1_IAC4US; /* * Force Data Address Compare User/Supervisor bits to be User-only * (0b11 MSR[PR]=1) and set all other bits in DBCR2 register to be 0. */ - thread->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US; + thread->debug.dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US; #else - thread->dbcr1 = 0; + thread->debug.dbcr1 = 0; #endif } -static void prime_debug_regs(struct thread_struct *thread) +static void prime_debug_regs(struct debug_reg *debug) { /* * We could have inherited MSR_DE from userspace, since @@ -348,22 +390,22 @@ static void prime_debug_regs(struct thread_struct *thread) */ mtmsr(mfmsr() & ~MSR_DE); - mtspr(SPRN_IAC1, thread->iac1); - mtspr(SPRN_IAC2, thread->iac2); + mtspr(SPRN_IAC1, debug->iac1); + mtspr(SPRN_IAC2, debug->iac2); #if CONFIG_PPC_ADV_DEBUG_IACS > 2 - mtspr(SPRN_IAC3, thread->iac3); - mtspr(SPRN_IAC4, thread->iac4); + mtspr(SPRN_IAC3, debug->iac3); + mtspr(SPRN_IAC4, debug->iac4); #endif - mtspr(SPRN_DAC1, thread->dac1); - mtspr(SPRN_DAC2, thread->dac2); + mtspr(SPRN_DAC1, debug->dac1); + mtspr(SPRN_DAC2, debug->dac2); #if CONFIG_PPC_ADV_DEBUG_DVCS > 0 - mtspr(SPRN_DVC1, thread->dvc1); - mtspr(SPRN_DVC2, thread->dvc2); + mtspr(SPRN_DVC1, debug->dvc1); + mtspr(SPRN_DVC2, debug->dvc2); #endif - mtspr(SPRN_DBCR0, thread->dbcr0); - mtspr(SPRN_DBCR1, thread->dbcr1); + mtspr(SPRN_DBCR0, debug->dbcr0); + mtspr(SPRN_DBCR1, debug->dbcr1); #ifdef CONFIG_BOOKE - mtspr(SPRN_DBCR2, thread->dbcr2); + mtspr(SPRN_DBCR2, debug->dbcr2); #endif } /* @@ -371,12 +413,13 @@ static void prime_debug_regs(struct thread_struct *thread) * debug registers, set the debug registers from the values * stored in the new thread. */ -static void switch_booke_debug_regs(struct thread_struct *new_thread) +void switch_booke_debug_regs(struct debug_reg *new_debug) { - if ((current->thread.dbcr0 & DBCR0_IDM) - || (new_thread->dbcr0 & DBCR0_IDM)) - prime_debug_regs(new_thread); + if ((current->thread.debug.dbcr0 & DBCR0_IDM) + || (new_debug->dbcr0 & DBCR0_IDM)) + prime_debug_regs(new_debug); } +EXPORT_SYMBOL_GPL(switch_booke_debug_regs); #else /* !CONFIG_PPC_ADV_DEBUG_REGS */ #ifndef CONFIG_HAVE_HW_BREAKPOINT static void set_debug_reg_defaults(struct thread_struct *thread) @@ -453,14 +496,21 @@ static inline int set_dawr(struct arch_hw_breakpoint *brk) return 0; } -int set_breakpoint(struct arch_hw_breakpoint *brk) +void __set_breakpoint(struct arch_hw_breakpoint *brk) { __get_cpu_var(current_brk) = *brk; if (cpu_has_feature(CPU_FTR_DAWR)) - return set_dawr(brk); + set_dawr(brk); + else + set_dabr(brk); +} - return set_dabr(brk); +void set_breakpoint(struct arch_hw_breakpoint *brk) +{ + preempt_disable(); + __set_breakpoint(brk); + preempt_enable(); } #ifdef CONFIG_PPC64 @@ -478,7 +528,48 @@ static inline bool hw_brk_match(struct arch_hw_breakpoint *a, return false; return true; } + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM +static void tm_reclaim_thread(struct thread_struct *thr, + struct thread_info *ti, uint8_t cause) +{ + unsigned long msr_diff = 0; + + /* + * If FP/VSX registers have been already saved to the + * thread_struct, move them to the transact_fp array. + * We clear the TIF_RESTORE_TM bit since after the reclaim + * the thread will no longer be transactional. + */ + if (test_ti_thread_flag(ti, TIF_RESTORE_TM)) { + msr_diff = thr->tm_orig_msr & ~thr->regs->msr; + if (msr_diff & MSR_FP) + memcpy(&thr->transact_fp, &thr->fp_state, + sizeof(struct thread_fp_state)); + if (msr_diff & MSR_VEC) + memcpy(&thr->transact_vr, &thr->vr_state, + sizeof(struct thread_vr_state)); + clear_ti_thread_flag(ti, TIF_RESTORE_TM); + msr_diff &= MSR_FP | MSR_VEC | MSR_VSX | MSR_FE0 | MSR_FE1; + } + + tm_reclaim(thr, thr->regs->msr, cause); + + /* Having done the reclaim, we now have the checkpointed + * FP/VSX values in the registers. These might be valid + * even if we have previously called enable_kernel_fp() or + * flush_fp_to_thread(), so update thr->regs->msr to + * indicate their current validity. + */ + thr->regs->msr |= msr_diff; +} + +void tm_reclaim_current(uint8_t cause) +{ + tm_enable(); + tm_reclaim_thread(¤t->thread, current_thread_info(), cause); +} + static inline void tm_reclaim_task(struct task_struct *tsk) { /* We have to work out if we're switching from/to a task that's in the @@ -501,9 +592,11 @@ static inline void tm_reclaim_task(struct task_struct *tsk) /* Stash the original thread MSR, as giveup_fpu et al will * modify it. We hold onto it to see whether the task used - * FP & vector regs. + * FP & vector regs. If the TIF_RESTORE_TM flag is set, + * tm_orig_msr is already set. */ - thr->tm_orig_msr = thr->regs->msr; + if (!test_ti_thread_flag(task_thread_info(tsk), TIF_RESTORE_TM)) + thr->tm_orig_msr = thr->regs->msr; TM_DEBUG("--- tm_reclaim on pid %d (NIP=%lx, " "ccr=%lx, msr=%lx, trap=%lx)\n", @@ -511,7 +604,7 @@ static inline void tm_reclaim_task(struct task_struct *tsk) thr->regs->ccr, thr->regs->msr, thr->regs->trap); - tm_reclaim(thr, thr->regs->msr, TM_CAUSE_RESCHED); + tm_reclaim_thread(thr, task_thread_info(tsk), TM_CAUSE_RESCHED); TM_DEBUG("--- tm_reclaim on pid %d complete\n", tsk->pid); @@ -525,6 +618,31 @@ out_and_saveregs: tm_save_sprs(thr); } +extern void __tm_recheckpoint(struct thread_struct *thread, + unsigned long orig_msr); + +void tm_recheckpoint(struct thread_struct *thread, + unsigned long orig_msr) +{ + unsigned long flags; + + /* We really can't be interrupted here as the TEXASR registers can't + * change and later in the trecheckpoint code, we have a userspace R1. + * So let's hard disable over this region. + */ + local_irq_save(flags); + hard_irq_disable(); + + /* The TM SPRs are restored here, so that TEXASR.FS can be set + * before the trecheckpoint and no explosion occurs. + */ + tm_restore_sprs(thread); + + __tm_recheckpoint(thread, orig_msr); + + local_irq_restore(flags); +} + static inline void tm_recheckpoint_new_task(struct task_struct *new) { unsigned long msr; @@ -543,13 +661,10 @@ static inline void tm_recheckpoint_new_task(struct task_struct *new) if (!new->thread.regs) return; - /* The TM SPRs are restored here, so that TEXASR.FS can be set - * before the trecheckpoint and no explosion occurs. - */ - tm_restore_sprs(&new->thread); - - if (!MSR_TM_ACTIVE(new->thread.regs->msr)) + if (!MSR_TM_ACTIVE(new->thread.regs->msr)){ + tm_restore_sprs(&new->thread); return; + } msr = new->thread.tm_orig_msr; /* Recheckpoint to restore original checkpointed register state. */ TM_DEBUG("*** tm_recheckpoint of pid %d " @@ -587,6 +702,43 @@ static inline void __switch_to_tm(struct task_struct *prev) tm_reclaim_task(prev); } } + +/* + * This is called if we are on the way out to userspace and the + * TIF_RESTORE_TM flag is set. It checks if we need to reload + * FP and/or vector state and does so if necessary. + * If userspace is inside a transaction (whether active or + * suspended) and FP/VMX/VSX instructions have ever been enabled + * inside that transaction, then we have to keep them enabled + * and keep the FP/VMX/VSX state loaded while ever the transaction + * continues. The reason is that if we didn't, and subsequently + * got a FP/VMX/VSX unavailable interrupt inside a transaction, + * we don't know whether it's the same transaction, and thus we + * don't know which of the checkpointed state and the transactional + * state to use. + */ +void restore_tm_state(struct pt_regs *regs) +{ + unsigned long msr_diff; + + clear_thread_flag(TIF_RESTORE_TM); + if (!MSR_TM_ACTIVE(regs->msr)) + return; + + msr_diff = current->thread.tm_orig_msr & ~regs->msr; + msr_diff &= MSR_FP | MSR_VEC | MSR_VSX; + if (msr_diff & MSR_FP) { + fp_enable(); + load_fp_state(¤t->thread.fp_state); + regs->msr |= current->thread.fpexc_mode; + } + if (msr_diff & MSR_VEC) { + vec_enable(); + load_vr_state(¤t->thread.vr_state); + } + regs->msr |= msr_diff; +} + #else #define tm_recheckpoint_new_task(new) #define __switch_to_tm(prev) @@ -596,21 +748,22 @@ struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *new) { struct thread_struct *new_thread, *old_thread; - unsigned long flags; struct task_struct *last; #ifdef CONFIG_PPC_BOOK3S_64 struct ppc64_tlb_batch *batch; #endif - /* Back up the TAR across context switches. + WARN_ON(!irqs_disabled()); + + /* Back up the TAR and DSCR across context switches. * Note that the TAR is not available for use in the kernel. (To * provide this, the TAR should be backed up/restored on exception * entry/exit instead, and be in pt_regs. FIXME, this should be in * pt_regs anyway (for debug).) - * Save the TAR here before we do treclaim/trecheckpoint as these - * will change the TAR. + * Save the TAR and DSCR here before we do treclaim/trecheckpoint as + * these will change them. */ - save_tar(&prev->thread); + save_early_sprs(&prev->thread); __switch_to_tm(prev); @@ -681,15 +834,15 @@ struct task_struct *__switch_to(struct task_struct *prev, #endif /* CONFIG_SMP */ #ifdef CONFIG_PPC_ADV_DEBUG_REGS - switch_booke_debug_regs(&new->thread); + switch_booke_debug_regs(&new->thread.debug); #else /* * For PPC_BOOK3S_64, we use the hw-breakpoint interfaces that would * schedule DABR */ #ifndef CONFIG_HAVE_HW_BREAKPOINT - if (unlikely(hw_brk_match(&__get_cpu_var(current_brk), &new->thread.hw_brk))) - set_breakpoint(&new->thread.hw_brk); + if (unlikely(!hw_brk_match(&__get_cpu_var(current_brk), &new->thread.hw_brk))) + __set_breakpoint(&new->thread.hw_brk); #endif /* CONFIG_HAVE_HW_BREAKPOINT */ #endif @@ -721,8 +874,6 @@ struct task_struct *__switch_to(struct task_struct *prev, } #endif /* CONFIG_PPC_BOOK3S_64 */ - local_irq_save(flags); - /* * We can't take a PMU exception inside _switch() since there is a * window where the kernel stack SLB and the kernel stack are out @@ -742,8 +893,6 @@ struct task_struct *__switch_to(struct task_struct *prev, } #endif /* CONFIG_PPC_BOOK3S_64 */ - local_irq_restore(flags); - return last; } @@ -860,17 +1009,21 @@ void show_regs(struct pt_regs * regs) printk("MSR: "REG" ", regs->msr); printbits(regs->msr, msr_bits); printk(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer); -#ifdef CONFIG_PPC64 - printk("SOFTE: %ld\n", regs->softe); -#endif trap = TRAP(regs); if ((regs->trap != 0xc00) && cpu_has_feature(CPU_FTR_CFAR)) - printk("CFAR: "REG"\n", regs->orig_gpr3); - if (trap == 0x300 || trap == 0x600) + printk("CFAR: "REG" ", regs->orig_gpr3); + if (trap == 0x200 || trap == 0x300 || trap == 0x600) #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) - printk("DEAR: "REG", ESR: "REG"\n", regs->dar, regs->dsisr); + printk("DEAR: "REG" ESR: "REG" ", regs->dar, regs->dsisr); #else - printk("DAR: "REG", DSISR: %08lx\n", regs->dar, regs->dsisr); + printk("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr); +#endif +#ifdef CONFIG_PPC64 + printk("SOFTE: %ld ", regs->softe); +#endif +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (MSR_TM_ACTIVE(regs->msr)) + printk("\nPACATMSCRATCH: %016llx ", get_paca()->tm_scratch); #endif for (i = 0; i < 32; i++) { @@ -889,9 +1042,6 @@ void show_regs(struct pt_regs * regs) printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip); printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link); #endif -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - printk("PACATMSCRATCH [%llx]\n", get_paca()->tm_scratch); -#endif show_stack(current, (unsigned long *) regs->gpr[1]); if (!user_mode(regs)) show_instructions(regs); @@ -928,6 +1078,15 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) flush_altivec_to_thread(src); flush_vsx_to_thread(src); flush_spe_to_thread(src); + /* + * Flush TM state out so we can copy it. __switch_to_tm() does this + * flush but it removes the checkpointed state from the current CPU and + * transitions the CPU out of TM mode. Hence we need to call + * tm_recheckpoint_new_task() (on the same task) to restore the + * checkpointed state back and the TM mode. + */ + __switch_to_tm(src); + tm_recheckpoint_new_task(src); *dst = *src; @@ -957,7 +1116,9 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, struct thread_info *ti = (void *)task_stack_page(p); memset(childregs, 0, sizeof(struct pt_regs)); childregs->gpr[1] = sp + sizeof(struct pt_regs); - childregs->gpr[14] = usp; /* function */ + /* function */ + if (usp) + childregs->gpr[14] = ppc_function_entry((void *)usp); #ifdef CONFIG_PPC64 clear_tsk_thread_flag(p, TIF_32BIT); childregs->softe = 1; @@ -1000,13 +1161,19 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, kregs = (struct pt_regs *) sp; sp -= STACK_FRAME_OVERHEAD; p->thread.ksp = sp; +#ifdef CONFIG_PPC32 p->thread.ksp_limit = (unsigned long)task_stack_page(p) + _ALIGN_UP(sizeof(struct thread_info), 16); - +#endif #ifdef CONFIG_HAVE_HW_BREAKPOINT p->thread.ptrace_bps[0] = NULL; #endif + p->thread.fp_save_area = NULL; +#ifdef CONFIG_ALTIVEC + p->thread.vr_save_area = NULL; +#endif + #ifdef CONFIG_PPC_STD_MMU_64 if (mmu_has_feature(MMU_FTR_SLB)) { unsigned long sp_vsid; @@ -1030,17 +1197,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, if (cpu_has_feature(CPU_FTR_HAS_PPR)) p->thread.ppr = INIT_PPR; #endif - /* - * The PPC64 ABI makes use of a TOC to contain function - * pointers. The function (ret_from_except) is actually a pointer - * to the TOC entry. The first entry is a pointer to the actual - * function. - */ -#ifdef CONFIG_PPC64 - kregs->nip = *((unsigned long *)f); -#else - kregs->nip = (unsigned long)f; -#endif + kregs->nip = ppc_function_entry(f); return 0; } @@ -1082,25 +1239,45 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) regs->msr = MSR_USER; #else if (!is_32bit_task()) { - unsigned long entry, toc; + unsigned long entry; - /* start is a relocated pointer to the function descriptor for - * the elf _start routine. The first entry in the function - * descriptor is the entry address of _start and the second - * entry is the TOC value we need to use. - */ - __get_user(entry, (unsigned long __user *)start); - __get_user(toc, (unsigned long __user *)start+1); + if (is_elf2_task()) { + /* Look ma, no function descriptors! */ + entry = start; - /* Check whether the e_entry function descriptor entries - * need to be relocated before we can use them. - */ - if (load_addr != 0) { - entry += load_addr; - toc += load_addr; + /* + * Ulrich says: + * The latest iteration of the ABI requires that when + * calling a function (at its global entry point), + * the caller must ensure r12 holds the entry point + * address (so that the function can quickly + * establish addressability). + */ + regs->gpr[12] = start; + /* Make sure that's restored on entry to userspace. */ + set_thread_flag(TIF_RESTOREALL); + } else { + unsigned long toc; + + /* start is a relocated pointer to the function + * descriptor for the elf _start routine. The first + * entry in the function descriptor is the entry + * address of _start and the second entry is the TOC + * value we need to use. + */ + __get_user(entry, (unsigned long __user *)start); + __get_user(toc, (unsigned long __user *)start+1); + + /* Check whether the e_entry function descriptor entries + * need to be relocated before we can use them. + */ + if (load_addr != 0) { + entry += load_addr; + toc += load_addr; + } + regs->gpr[2] = toc; } regs->nip = entry; - regs->gpr[2] = toc; regs->msr = MSR_USER64; } else { regs->nip = start; @@ -1112,12 +1289,12 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) #ifdef CONFIG_VSX current->thread.used_vsr = 0; #endif - memset(current->thread.fpr, 0, sizeof(current->thread.fpr)); - current->thread.fpscr.val = 0; + memset(¤t->thread.fp_state, 0, sizeof(current->thread.fp_state)); + current->thread.fp_save_area = NULL; #ifdef CONFIG_ALTIVEC - memset(current->thread.vr, 0, sizeof(current->thread.vr)); - memset(¤t->thread.vscr, 0, sizeof(current->thread.vscr)); - current->thread.vscr.u[3] = 0x00010000; /* Java mode disabled */ + memset(¤t->thread.vr_state, 0, sizeof(current->thread.vr_state)); + current->thread.vr_state.vscr.u[3] = 0x00010000; /* Java mode disabled */ + current->thread.vr_save_area = NULL; current->thread.vrsave = 0; current->thread.used_vr = 0; #endif /* CONFIG_ALTIVEC */ @@ -1150,6 +1327,19 @@ int set_fpexc_mode(struct task_struct *tsk, unsigned int val) if (val & PR_FP_EXC_SW_ENABLE) { #ifdef CONFIG_SPE if (cpu_has_feature(CPU_FTR_SPE)) { + /* + * When the sticky exception bits are set + * directly by userspace, it must call prctl + * with PR_GET_FPEXC (with PR_FP_EXC_SW_ENABLE + * in the existing prctl settings) or + * PR_SET_FPEXC (with PR_FP_EXC_SW_ENABLE in + * the bits being set). <fenv.h> functions + * saving and restoring the whole + * floating-point environment need to do so + * anyway to restore the prctl settings from + * the saved environment. + */ + tsk->thread.spefscr_last = mfspr(SPRN_SPEFSCR); tsk->thread.fpexc_mode = val & (PR_FP_EXC_SW_ENABLE | PR_FP_ALL_EXCEPT); return 0; @@ -1181,9 +1371,22 @@ int get_fpexc_mode(struct task_struct *tsk, unsigned long adr) if (tsk->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE) #ifdef CONFIG_SPE - if (cpu_has_feature(CPU_FTR_SPE)) + if (cpu_has_feature(CPU_FTR_SPE)) { + /* + * When the sticky exception bits are set + * directly by userspace, it must call prctl + * with PR_GET_FPEXC (with PR_FP_EXC_SW_ENABLE + * in the existing prctl settings) or + * PR_SET_FPEXC (with PR_FP_EXC_SW_ENABLE in + * the bits being set). <fenv.h> functions + * saving and restoring the whole + * floating-point environment need to do so + * anyway to restore the prctl settings from + * the saved environment. + */ + tsk->thread.spefscr_last = mfspr(SPRN_SPEFSCR); val = tsk->thread.fpexc_mode; - else + } else return -EINVAL; #else return -EINVAL; diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index b7634ce41db..b694b073097 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -29,10 +29,11 @@ #include <linux/bitops.h> #include <linux/export.h> #include <linux/kexec.h> -#include <linux/debugfs.h> #include <linux/irq.h> #include <linux/memblock.h> #include <linux/of.h> +#include <linux/of_fdt.h> +#include <linux/libfdt.h> #include <asm/prom.h> #include <asm/rtas.h> @@ -117,14 +118,14 @@ static void __init move_device_tree(void) DBG("-> move_device_tree\n"); start = __pa(initial_boot_params); - size = be32_to_cpu(initial_boot_params->totalsize); + size = fdt_totalsize(initial_boot_params); if ((memory_limit && (start + size) > PHYSICAL_START + memory_limit) || overlaps_crashkernel(start, size) || overlaps_initrd(start, size)) { p = __va(memblock_alloc(size, PAGE_SIZE)); memcpy(p, initial_boot_params, size); - initial_boot_params = (struct boot_param_header *)p; + initial_boot_params = p; DBG("Moved device tree to 0x%p\n", p); } @@ -162,7 +163,7 @@ static struct ibm_pa_feature { {CPU_FTR_REAL_LE, PPC_FEATURE_TRUE_LE, 5, 0, 0}, }; -static void __init scan_features(unsigned long node, unsigned char *ftrs, +static void __init scan_features(unsigned long node, const unsigned char *ftrs, unsigned long tablelen, struct ibm_pa_feature *fp, unsigned long ft_size) @@ -201,8 +202,8 @@ static void __init scan_features(unsigned long node, unsigned char *ftrs, static void __init check_cpu_pa_features(unsigned long node) { - unsigned char *pa_ftrs; - unsigned long tablelen; + const unsigned char *pa_ftrs; + int tablelen; pa_ftrs = of_get_flat_dt_prop(node, "ibm,pa-features", &tablelen); if (pa_ftrs == NULL) @@ -215,7 +216,7 @@ static void __init check_cpu_pa_features(unsigned long node) #ifdef CONFIG_PPC_STD_MMU_64 static void __init check_cpu_slb_size(unsigned long node) { - __be32 *slb_size_ptr; + const __be32 *slb_size_ptr; slb_size_ptr = of_get_flat_dt_prop(node, "slb-size", NULL); if (slb_size_ptr != NULL) { @@ -256,7 +257,7 @@ static struct feature_property { static inline void identical_pvr_fixup(unsigned long node) { unsigned int pvr; - char *model = of_get_flat_dt_prop(node, "model", NULL); + const char *model = of_get_flat_dt_prop(node, "model", NULL); /* * Since 440GR(x)/440EP(x) processors have the same pvr, @@ -294,11 +295,11 @@ static int __init early_init_dt_scan_cpus(unsigned long node, const char *uname, int depth, void *data) { - char *type = of_get_flat_dt_prop(node, "device_type", NULL); + const char *type = of_get_flat_dt_prop(node, "device_type", NULL); const __be32 *prop; const __be32 *intserv; int i, nthreads; - unsigned long len; + int len; int found = -1; int found_thread = 0; @@ -324,9 +325,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node, * version 2 of the kexec param format adds the phys cpuid of * booted proc. */ - if (be32_to_cpu(initial_boot_params->version) >= 2) { + if (fdt_version(initial_boot_params) >= 2) { if (be32_to_cpu(intserv[i]) == - be32_to_cpu(initial_boot_params->boot_cpuid_phys)) { + fdt_boot_cpuid_phys(initial_boot_params)) { found = boot_cpu_count; found_thread = i; } @@ -346,52 +347,52 @@ static int __init early_init_dt_scan_cpus(unsigned long node, #endif } - if (found >= 0) { - DBG("boot cpu: logical %d physical %d\n", found, - be32_to_cpu(intserv[found_thread])); - boot_cpuid = found; - set_hard_smp_processor_id(found, - be32_to_cpu(intserv[found_thread])); + /* Not the boot CPU */ + if (found < 0) + return 0; - /* - * PAPR defines "logical" PVR values for cpus that - * meet various levels of the architecture: - * 0x0f000001 Architecture version 2.04 - * 0x0f000002 Architecture version 2.05 - * If the cpu-version property in the cpu node contains - * such a value, we call identify_cpu again with the - * logical PVR value in order to use the cpu feature - * bits appropriate for the architecture level. - * - * A POWER6 partition in "POWER6 architected" mode - * uses the 0x0f000002 PVR value; in POWER5+ mode - * it uses 0x0f000001. - */ - prop = of_get_flat_dt_prop(node, "cpu-version", NULL); - if (prop && (be32_to_cpup(prop) & 0xff000000) == 0x0f000000) - identify_cpu(0, be32_to_cpup(prop)); + DBG("boot cpu: logical %d physical %d\n", found, + be32_to_cpu(intserv[found_thread])); + boot_cpuid = found; + set_hard_smp_processor_id(found, be32_to_cpu(intserv[found_thread])); - identical_pvr_fixup(node); - } + /* + * PAPR defines "logical" PVR values for cpus that + * meet various levels of the architecture: + * 0x0f000001 Architecture version 2.04 + * 0x0f000002 Architecture version 2.05 + * If the cpu-version property in the cpu node contains + * such a value, we call identify_cpu again with the + * logical PVR value in order to use the cpu feature + * bits appropriate for the architecture level. + * + * A POWER6 partition in "POWER6 architected" mode + * uses the 0x0f000002 PVR value; in POWER5+ mode + * it uses 0x0f000001. + */ + prop = of_get_flat_dt_prop(node, "cpu-version", NULL); + if (prop && (be32_to_cpup(prop) & 0xff000000) == 0x0f000000) + identify_cpu(0, be32_to_cpup(prop)); + + identical_pvr_fixup(node); check_cpu_feature_properties(node); check_cpu_pa_features(node); check_cpu_slb_size(node); -#ifdef CONFIG_PPC_PSERIES +#ifdef CONFIG_PPC64 if (nthreads > 1) cur_cpu_spec->cpu_features |= CPU_FTR_SMT; else cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT; #endif - return 0; } int __init early_init_dt_scan_chosen_ppc(unsigned long node, const char *uname, int depth, void *data) { - unsigned long *lprop; /* All these set by kernel, so no need to convert endian */ + const unsigned long *lprop; /* All these set by kernel, so no need to convert endian */ /* Use common scan routine to determine if this is the chosen node */ if (early_init_dt_scan_chosen(node, uname, depth, data) == 0) @@ -442,8 +443,9 @@ int __init early_init_dt_scan_chosen_ppc(unsigned long node, const char *uname, */ static int __init early_init_dt_scan_drconf_memory(unsigned long node) { - __be32 *dm, *ls, *usm; - unsigned long l, n, flags; + const __be32 *dm, *ls, *usm; + int l; + unsigned long n, flags; u64 base, size, memblock_size; unsigned int is_kexec_kdump = 0, rngs; @@ -523,6 +525,20 @@ static int __init early_init_dt_scan_memory_ppc(unsigned long node, return early_init_dt_scan_memory(node, uname, depth, data); } +/* + * For a relocatable kernel, we need to get the memstart_addr first, + * then use it to calculate the virtual kernel start address. This has + * to happen at a very early stage (before machine_init). In this case, + * we just want to get the memstart_address and would not like to mess the + * memblock at this stage. So introduce a variable to skip the memblock_add() + * for this reason. + */ +#ifdef CONFIG_RELOCATABLE +static int add_mem_to_memblock = 1; +#else +#define add_mem_to_memblock 1 +#endif + void __init early_init_dt_add_memory_arch(u64 base, u64 size) { #ifdef CONFIG_PPC64 @@ -543,23 +559,18 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size) } /* Add the chunk to the MEMBLOCK list */ - memblock_add(base, size); -} - -#ifdef CONFIG_BLK_DEV_INITRD -void __init early_init_dt_setup_initrd_arch(u64 start, u64 end) -{ - initrd_start = (unsigned long)__va(start); - initrd_end = (unsigned long)__va(end); - initrd_below_start_ok = 1; + if (add_mem_to_memblock) + memblock_add(base, size); } -#endif static void __init early_reserve_mem_dt(void) { - unsigned long i, len, dt_root; + unsigned long i, dt_root; + int len; const __be32 *prop; + early_init_fdt_scan_reserved_mem(); + dt_root = of_get_flat_dt_root(); prop = of_get_flat_dt_prop(dt_root, "reserved-ranges", &len); @@ -586,18 +597,10 @@ static void __init early_reserve_mem_dt(void) static void __init early_reserve_mem(void) { - u64 base, size; __be64 *reserve_map; - unsigned long self_base; - unsigned long self_size; reserve_map = (__be64 *)(((unsigned long)initial_boot_params) + - be32_to_cpu(initial_boot_params->off_mem_rsvmap)); - - /* before we do anything, lets reserve the dt blob */ - self_base = __pa((unsigned long)initial_boot_params); - self_size = be32_to_cpu(initial_boot_params->totalsize); - memblock_reserve(self_base, self_size); + fdt_off_mem_rsvmap(initial_boot_params)); /* Look for the new "reserved-regions" property in the DT */ early_reserve_mem_dt(); @@ -627,26 +630,12 @@ static void __init early_reserve_mem(void) size_32 = be32_to_cpup(reserve_map_32++); if (size_32 == 0) break; - /* skip if the reservation is for the blob */ - if (base_32 == self_base && size_32 == self_size) - continue; DBG("reserving: %x -> %x\n", base_32, size_32); memblock_reserve(base_32, size_32); } return; } #endif - DBG("Processing reserve map\n"); - - /* Handle the reserve map in the fdt blob if it exists */ - while (1) { - base = be64_to_cpup(reserve_map++); - size = be64_to_cpup(reserve_map++); - if (size == 0) - break; - DBG("reserving: %llx -> %llx\n", base, size); - memblock_reserve(base, size); - } } void __init early_init_devtree(void *params) @@ -673,13 +662,6 @@ void __init early_init_devtree(void *params) of_scan_flat_dt(early_init_dt_scan_fw_dump, NULL); #endif - /* Pre-initialize the cmd_line with the content of boot_commmand_line, - * which will be empty except when the content of the variable has - * been overriden by a bootloading mechanism. This happens typically - * with HAL takeover - */ - strlcpy(cmd_line, boot_command_line, COMMAND_LINE_SIZE); - /* Retrieve various informations from the /chosen node of the * device-tree, including the platform type, initrd location and * size, TCE reserve, and more ... @@ -738,6 +720,10 @@ void __init early_init_devtree(void *params) * (altivec support, boot CPU ID, ...) */ of_scan_flat_dt(early_init_dt_scan_cpus, NULL); + if (boot_cpuid < 0) { + printk("Failed to indentify boot CPU !\n"); + BUG(); + } #if defined(CONFIG_SMP) && defined(CONFIG_PPC64) /* We'll later wait for secondaries to check in; there are @@ -746,9 +732,38 @@ void __init early_init_devtree(void *params) spinning_secondaries = boot_cpu_count - 1; #endif +#ifdef CONFIG_PPC_POWERNV + /* Scan and build the list of machine check recoverable ranges */ + of_scan_flat_dt(early_init_dt_scan_recoverable_ranges, NULL); +#endif + DBG(" <- early_init_devtree()\n"); } +#ifdef CONFIG_RELOCATABLE +/* + * This function run before early_init_devtree, so we have to init + * initial_boot_params. + */ +void __init early_get_first_memblock_info(void *params, phys_addr_t *size) +{ + /* Setup flat device-tree pointer */ + initial_boot_params = params; + + /* + * Scan the memory nodes and set add_mem_to_memblock to 0 to avoid + * mess the memblock. + */ + add_mem_to_memblock = 0; + of_scan_flat_dt(early_init_dt_scan_root, NULL); + of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL); + add_mem_to_memblock = 1; + + if (size) + *size = first_memblock_size; +} +#endif + /******* * * New implementation of the OF "find" APIs, return a refcounted @@ -761,37 +776,6 @@ void __init early_init_devtree(void *params) *******/ /** - * of_find_next_cache_node - Find a node's subsidiary cache - * @np: node of type "cpu" or "cache" - * - * Returns a node pointer with refcount incremented, use - * of_node_put() on it when done. Caller should hold a reference - * to np. - */ -struct device_node *of_find_next_cache_node(struct device_node *np) -{ - struct device_node *child; - const phandle *handle; - - handle = of_get_property(np, "l2-cache", NULL); - if (!handle) - handle = of_get_property(np, "next-level-cache", NULL); - - if (handle) - return of_find_node_by_phandle(*handle); - - /* OF on pmac has nodes instead of properties named "l2-cache" - * beneath CPU nodes. - */ - if (!strcmp(np->type, "cpu")) - for_each_child_of_node(np, child) - if (!strcmp(child->type, "cache")) - return child; - - return NULL; -} - -/** * of_get_ibm_chip_id - Returns the IBM "chip-id" of a device * @np: device node of the device * @@ -817,6 +801,26 @@ int of_get_ibm_chip_id(struct device_node *np) return -1; } +/** + * cpu_to_chip_id - Return the cpus chip-id + * @cpu: The logical cpu number. + * + * Return the value of the ibm,chip-id property corresponding to the given + * logical cpu number. If the chip-id can not be found, returns -1. + */ +int cpu_to_chip_id(int cpu) +{ + struct device_node *np; + + np = of_get_cpu_node(cpu, NULL); + if (!np) + return -1; + + of_node_put(np); + return of_get_ibm_chip_id(np); +} +EXPORT_SYMBOL(cpu_to_chip_id); + #ifdef CONFIG_PPC_PSERIES /* * Fix up the uninitialized fields in a new device node: @@ -891,23 +895,3 @@ bool arch_match_cpu_phys_id(int cpu, u64 phys_id) { return (int)phys_id == get_hard_smp_processor_id(cpu); } - -#if defined(CONFIG_DEBUG_FS) && defined(DEBUG) -static struct debugfs_blob_wrapper flat_dt_blob; - -static int __init export_flat_device_tree(void) -{ - struct dentry *d; - - flat_dt_blob.data = initial_boot_params; - flat_dt_blob.size = be32_to_cpu(initial_boot_params->totalsize); - - d = debugfs_create_blob("flat-device-tree", S_IFREG | S_IRUSR, - powerpc_debugfs_root, &flat_dt_blob); - if (!d) - return 1; - - return 0; -} -__initcall(export_flat_device_tree); -#endif diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 12e656ffe60..1a85d8f9673 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -196,6 +196,8 @@ static int __initdata mem_reserve_cnt; static cell_t __initdata regbuf[1024]; +static bool rtas_has_query_cpu_stopped; + /* * Error results ... some OF calls will return "-1" on error, some @@ -856,7 +858,8 @@ static void __init prom_send_capabilities(void) { ihandle root; prom_arg_t ret; - __be32 *cores; + u32 cores; + unsigned char *ptcores; root = call_prom("open", 1, 1, ADDR("/")); if (root != 0) { @@ -866,15 +869,30 @@ static void __init prom_send_capabilities(void) * (we assume this is the same for all cores) and use it to * divide NR_CPUS. */ - cores = (__be32 *)&ibm_architecture_vec[IBM_ARCH_VEC_NRCORES_OFFSET]; - if (be32_to_cpup(cores) != NR_CPUS) { + + /* The core value may start at an odd address. If such a word + * access is made at a cache line boundary, this leads to an + * exception which may not be handled at this time. + * Forcing a per byte access to avoid exception. + */ + ptcores = &ibm_architecture_vec[IBM_ARCH_VEC_NRCORES_OFFSET]; + cores = 0; + cores |= ptcores[0] << 24; + cores |= ptcores[1] << 16; + cores |= ptcores[2] << 8; + cores |= ptcores[3]; + if (cores != NR_CPUS) { prom_printf("WARNING ! " "ibm_architecture_vec structure inconsistent: %lu!\n", - be32_to_cpup(cores)); + cores); } else { - *cores = cpu_to_be32(DIV_ROUND_UP(NR_CPUS, prom_count_smt_threads())); + cores = DIV_ROUND_UP(NR_CPUS, prom_count_smt_threads()); prom_printf("Max number of cores passed to firmware: %lu (NR_CPUS = %lu)\n", - be32_to_cpup(cores), NR_CPUS); + cores, NR_CPUS); + ptcores[0] = (cores >> 24) & 0xff; + ptcores[1] = (cores >> 16) & 0xff; + ptcores[2] = (cores >> 8) & 0xff; + ptcores[3] = cores & 0xff; } /* try calling the ibm,client-architecture-support method */ @@ -1250,201 +1268,6 @@ static u64 __initdata prom_opal_base; static u64 __initdata prom_opal_entry; #endif -#ifdef __BIG_ENDIAN__ -/* XXX Don't change this structure without updating opal-takeover.S */ -static struct opal_secondary_data { - s64 ack; /* 0 */ - u64 go; /* 8 */ - struct opal_takeover_args args; /* 16 */ -} opal_secondary_data; - -static u64 __initdata prom_opal_align; -static u64 __initdata prom_opal_size; -static int __initdata prom_rtas_start_cpu; -static u64 __initdata prom_rtas_data; -static u64 __initdata prom_rtas_entry; - -extern char opal_secondary_entry; - -static void __init prom_query_opal(void) -{ - long rc; - - /* We must not query for OPAL presence on a machine that - * supports TNK takeover (970 blades), as this uses the same - * h-call with different arguments and will crash - */ - if (PHANDLE_VALID(call_prom("finddevice", 1, 1, - ADDR("/tnk-memory-map")))) { - prom_printf("TNK takeover detected, skipping OPAL check\n"); - return; - } - - prom_printf("Querying for OPAL presence... "); - - rc = opal_query_takeover(&prom_opal_size, - &prom_opal_align); - prom_debug("(rc = %ld) ", rc); - if (rc != 0) { - prom_printf("not there.\n"); - return; - } - of_platform = PLATFORM_OPAL; - prom_printf(" there !\n"); - prom_debug(" opal_size = 0x%lx\n", prom_opal_size); - prom_debug(" opal_align = 0x%lx\n", prom_opal_align); - if (prom_opal_align < 0x10000) - prom_opal_align = 0x10000; -} - -static int __init prom_rtas_call(int token, int nargs, int nret, - int *outputs, ...) -{ - struct rtas_args rtas_args; - va_list list; - int i; - - rtas_args.token = token; - rtas_args.nargs = nargs; - rtas_args.nret = nret; - rtas_args.rets = (rtas_arg_t *)&(rtas_args.args[nargs]); - va_start(list, outputs); - for (i = 0; i < nargs; ++i) - rtas_args.args[i] = va_arg(list, rtas_arg_t); - va_end(list); - - for (i = 0; i < nret; ++i) - rtas_args.rets[i] = 0; - - opal_enter_rtas(&rtas_args, prom_rtas_data, - prom_rtas_entry); - - if (nret > 1 && outputs != NULL) - for (i = 0; i < nret-1; ++i) - outputs[i] = rtas_args.rets[i+1]; - return (nret > 0)? rtas_args.rets[0]: 0; -} - -static void __init prom_opal_hold_cpus(void) -{ - int i, cnt, cpu, rc; - long j; - phandle node; - char type[64]; - u32 servers[8]; - void *entry = (unsigned long *)&opal_secondary_entry; - struct opal_secondary_data *data = &opal_secondary_data; - - prom_debug("prom_opal_hold_cpus: start...\n"); - prom_debug(" - entry = 0x%x\n", entry); - prom_debug(" - data = 0x%x\n", data); - - data->ack = -1; - data->go = 0; - - /* look for cpus */ - for (node = 0; prom_next_node(&node); ) { - type[0] = 0; - prom_getprop(node, "device_type", type, sizeof(type)); - if (strcmp(type, "cpu") != 0) - continue; - - /* Skip non-configured cpus. */ - if (prom_getprop(node, "status", type, sizeof(type)) > 0) - if (strcmp(type, "okay") != 0) - continue; - - cnt = prom_getprop(node, "ibm,ppc-interrupt-server#s", servers, - sizeof(servers)); - if (cnt == PROM_ERROR) - break; - cnt >>= 2; - for (i = 0; i < cnt; i++) { - cpu = servers[i]; - prom_debug("CPU %d ... ", cpu); - if (cpu == prom.cpu) { - prom_debug("booted !\n"); - continue; - } - prom_debug("starting ... "); - - /* Init the acknowledge var which will be reset by - * the secondary cpu when it awakens from its OF - * spinloop. - */ - data->ack = -1; - rc = prom_rtas_call(prom_rtas_start_cpu, 3, 1, - NULL, cpu, entry, data); - prom_debug("rtas rc=%d ...", rc); - - for (j = 0; j < 100000000 && data->ack == -1; j++) { - HMT_low(); - mb(); - } - HMT_medium(); - if (data->ack != -1) - prom_debug("done, PIR=0x%x\n", data->ack); - else - prom_debug("timeout !\n"); - } - } - prom_debug("prom_opal_hold_cpus: end...\n"); -} - -static void __init prom_opal_takeover(void) -{ - struct opal_secondary_data *data = &opal_secondary_data; - struct opal_takeover_args *args = &data->args; - u64 align = prom_opal_align; - u64 top_addr, opal_addr; - - args->k_image = (u64)_stext; - args->k_size = _end - _stext; - args->k_entry = 0; - args->k_entry2 = 0x60; - - top_addr = _ALIGN_UP(args->k_size, align); - - if (prom_initrd_start != 0) { - args->rd_image = prom_initrd_start; - args->rd_size = prom_initrd_end - args->rd_image; - args->rd_loc = top_addr; - top_addr = _ALIGN_UP(args->rd_loc + args->rd_size, align); - } - - /* Pickup an address for the HAL. We want to go really high - * up to avoid problem with future kexecs. On the other hand - * we don't want to be all over the TCEs on P5IOC2 machines - * which are going to be up there too. We assume the machine - * has plenty of memory, and we ask for the HAL for now to - * be just below the 1G point, or above the initrd - */ - opal_addr = _ALIGN_DOWN(0x40000000 - prom_opal_size, align); - if (opal_addr < top_addr) - opal_addr = top_addr; - args->hal_addr = opal_addr; - - /* Copy the command line to the kernel image */ - strlcpy(boot_command_line, prom_cmd_line, - COMMAND_LINE_SIZE); - - prom_debug(" k_image = 0x%lx\n", args->k_image); - prom_debug(" k_size = 0x%lx\n", args->k_size); - prom_debug(" k_entry = 0x%lx\n", args->k_entry); - prom_debug(" k_entry2 = 0x%lx\n", args->k_entry2); - prom_debug(" hal_addr = 0x%lx\n", args->hal_addr); - prom_debug(" rd_image = 0x%lx\n", args->rd_image); - prom_debug(" rd_size = 0x%lx\n", args->rd_size); - prom_debug(" rd_loc = 0x%lx\n", args->rd_loc); - prom_printf("Performing OPAL takeover,this can take a few minutes..\n"); - prom_close_stdin(); - mb(); - data->go = 1; - for (;;) - opal_do_takeover(args); -} -#endif /* __BIG_ENDIAN__ */ - /* * Allocate room for and instantiate OPAL */ @@ -1574,12 +1397,11 @@ static void __init prom_instantiate_rtas(void) prom_setprop(rtas_node, "/rtas", "linux,rtas-entry", &val, sizeof(val)); -#if defined(CONFIG_PPC_POWERNV) && defined(__BIG_ENDIAN__) - /* PowerVN takeover hack */ - prom_rtas_data = base; - prom_rtas_entry = entry; - prom_getprop(rtas_node, "start-cpu", &prom_rtas_start_cpu, 4); -#endif + /* Check if it supports "query-cpu-stopped-state" */ + if (prom_getprop(rtas_node, "query-cpu-stopped-state", + &val, sizeof(val)) != PROM_ERROR) + rtas_has_query_cpu_stopped = true; + prom_debug("rtas base = 0x%x\n", base); prom_debug("rtas entry = 0x%x\n", entry); prom_debug("rtas size = 0x%x\n", (long)size); @@ -1815,6 +1637,18 @@ static void __init prom_hold_cpus(void) = (void *) LOW_ADDR(__secondary_hold_acknowledge); unsigned long secondary_hold = LOW_ADDR(__secondary_hold); + /* + * On pseries, if RTAS supports "query-cpu-stopped-state", + * we skip this stage, the CPUs will be started by the + * kernel using RTAS. + */ + if ((of_platform == PLATFORM_PSERIES || + of_platform == PLATFORM_PSERIES_LPAR) && + rtas_has_query_cpu_stopped) { + prom_printf("prom_hold_cpus: skipped\n"); + return; + } + prom_debug("prom_hold_cpus: start...\n"); prom_debug(" 1) spinloop = 0x%x\n", (unsigned long)spinloop); prom_debug(" 1) *spinloop = 0x%x\n", *spinloop); @@ -1951,19 +1785,23 @@ static void __init prom_init_stdout(void) /* Get the full OF pathname of the stdout device */ memset(path, 0, 256); call_prom("instance-to-path", 3, 1, prom.stdout, path, 255); - stdout_node = call_prom("instance-to-package", 1, 1, prom.stdout); - val = cpu_to_be32(stdout_node); - prom_setprop(prom.chosen, "/chosen", "linux,stdout-package", - &val, sizeof(val)); prom_printf("OF stdout device is: %s\n", of_stdout_device); prom_setprop(prom.chosen, "/chosen", "linux,stdout-path", path, strlen(path) + 1); - /* If it's a display, note it */ - memset(type, 0, sizeof(type)); - prom_getprop(stdout_node, "device_type", type, sizeof(type)); - if (strcmp(type, "display") == 0) - prom_setprop(stdout_node, path, "linux,boot-display", NULL, 0); + /* instance-to-package fails on PA-Semi */ + stdout_node = call_prom("instance-to-package", 1, 1, prom.stdout); + if (stdout_node != PROM_ERROR) { + val = cpu_to_be32(stdout_node); + prom_setprop(prom.chosen, "/chosen", "linux,stdout-package", + &val, sizeof(val)); + + /* If it's a display, note it */ + memset(type, 0, sizeof(type)); + prom_getprop(stdout_node, "device_type", type, sizeof(type)); + if (strcmp(type, "display") == 0) + prom_setprop(stdout_node, path, "linux,boot-display", NULL, 0); + } } static int __init prom_find_machine_type(void) @@ -2988,16 +2826,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, prom_instantiate_rtas(); #ifdef CONFIG_PPC_POWERNV -#ifdef __BIG_ENDIAN__ - /* Detect HAL and try instanciating it & doing takeover */ - if (of_platform == PLATFORM_PSERIES_LPAR) { - prom_query_opal(); - if (of_platform == PLATFORM_OPAL) { - prom_opal_hold_cpus(); - prom_opal_takeover(); - } - } else -#endif /* __BIG_ENDIAN__ */ if (of_platform == PLATFORM_OPAL) prom_instantiate_opal(); #endif /* CONFIG_PPC_POWERNV */ @@ -3011,6 +2839,8 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, * On non-powermacs, put all CPUs in spin-loops. * * PowerMacs use a different mechanism to spin CPUs + * + * (This must be done after instanciating RTAS) */ if (of_platform != PLATFORM_POWERMAC && of_platform != PLATFORM_OPAL) diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh index b0c263da219..fe8e54b9ef7 100644 --- a/arch/powerpc/kernel/prom_init_check.sh +++ b/arch/powerpc/kernel/prom_init_check.sh @@ -21,9 +21,7 @@ _end enter_prom memcpy memset reloc_offset __secondary_hold __secondary_hold_acknowledge __secondary_hold_spinloop __start strcmp strcpy strlcpy strlen strncmp strstr logo_linux_clut224 reloc_got2 kernstart_addr memstart_addr linux_banner _stext -opal_query_takeover opal_do_takeover opal_enter_rtas opal_secondary_entry -boot_command_line __prom_init_toc_start __prom_init_toc_end -btext_setup_display" +__prom_init_toc_start __prom_init_toc_end btext_setup_display TOC." NM="$1" OBJ="$2" diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 9a0d24c390a..2e3d2bf536c 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -362,7 +362,7 @@ static int fpr_get(struct task_struct *target, const struct user_regset *regset, void *kbuf, void __user *ubuf) { #ifdef CONFIG_VSX - double buf[33]; + u64 buf[33]; int i; #endif flush_fp_to_thread(target); @@ -371,15 +371,15 @@ static int fpr_get(struct task_struct *target, const struct user_regset *regset, /* copy to local buffer then write that out */ for (i = 0; i < 32 ; i++) buf[i] = target->thread.TS_FPR(i); - memcpy(&buf[32], &target->thread.fpscr, sizeof(double)); + buf[32] = target->thread.fp_state.fpscr; return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1); #else - BUILD_BUG_ON(offsetof(struct thread_struct, fpscr) != - offsetof(struct thread_struct, TS_FPR(32))); + BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) != + offsetof(struct thread_fp_state, fpr[32][0])); return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.fpr, 0, -1); + &target->thread.fp_state, 0, -1); #endif } @@ -388,7 +388,7 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset, const void *kbuf, const void __user *ubuf) { #ifdef CONFIG_VSX - double buf[33]; + u64 buf[33]; int i; #endif flush_fp_to_thread(target); @@ -400,14 +400,14 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset, return i; for (i = 0; i < 32 ; i++) target->thread.TS_FPR(i) = buf[i]; - memcpy(&target->thread.fpscr, &buf[32], sizeof(double)); + target->thread.fp_state.fpscr = buf[32]; return 0; #else - BUILD_BUG_ON(offsetof(struct thread_struct, fpscr) != - offsetof(struct thread_struct, TS_FPR(32))); + BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) != + offsetof(struct thread_fp_state, fpr[32][0])); return user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.fpr, 0, -1); + &target->thread.fp_state, 0, -1); #endif } @@ -440,11 +440,11 @@ static int vr_get(struct task_struct *target, const struct user_regset *regset, flush_altivec_to_thread(target); - BUILD_BUG_ON(offsetof(struct thread_struct, vscr) != - offsetof(struct thread_struct, vr[32])); + BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) != + offsetof(struct thread_vr_state, vr[32])); ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.vr, 0, + &target->thread.vr_state, 0, 33 * sizeof(vector128)); if (!ret) { /* @@ -471,11 +471,12 @@ static int vr_set(struct task_struct *target, const struct user_regset *regset, flush_altivec_to_thread(target); - BUILD_BUG_ON(offsetof(struct thread_struct, vscr) != - offsetof(struct thread_struct, vr[32])); + BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) != + offsetof(struct thread_vr_state, vr[32])); ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.vr, 0, 33 * sizeof(vector128)); + &target->thread.vr_state, 0, + 33 * sizeof(vector128)); if (!ret && count > 0) { /* * We use only the first word of vrsave. @@ -514,13 +515,13 @@ static int vsr_get(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) { - double buf[32]; + u64 buf[32]; int ret, i; flush_vsx_to_thread(target); for (i = 0; i < 32 ; i++) - buf[i] = target->thread.fpr[i][TS_VSRLOWOFFSET]; + buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET]; ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, 32 * sizeof(double)); @@ -531,7 +532,7 @@ static int vsr_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { - double buf[32]; + u64 buf[32]; int ret,i; flush_vsx_to_thread(target); @@ -539,7 +540,7 @@ static int vsr_set(struct task_struct *target, const struct user_regset *regset, ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, 32 * sizeof(double)); for (i = 0; i < 32 ; i++) - target->thread.fpr[i][TS_VSRLOWOFFSET] = buf[i]; + target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i]; return ret; @@ -657,7 +658,7 @@ static const struct user_regset native_regsets[] = { #endif #ifdef CONFIG_SPE [REGSET_SPE] = { - .n = 35, + .core_note_type = NT_PPC_SPE, .n = 35, .size = sizeof(u32), .align = sizeof(u32), .active = evr_active, .get = evr_get, .set = evr_set }, @@ -854,8 +855,8 @@ void user_enable_single_step(struct task_struct *task) if (regs != NULL) { #ifdef CONFIG_PPC_ADV_DEBUG_REGS - task->thread.dbcr0 &= ~DBCR0_BT; - task->thread.dbcr0 |= DBCR0_IDM | DBCR0_IC; + task->thread.debug.dbcr0 &= ~DBCR0_BT; + task->thread.debug.dbcr0 |= DBCR0_IDM | DBCR0_IC; regs->msr |= MSR_DE; #else regs->msr &= ~MSR_BE; @@ -871,8 +872,8 @@ void user_enable_block_step(struct task_struct *task) if (regs != NULL) { #ifdef CONFIG_PPC_ADV_DEBUG_REGS - task->thread.dbcr0 &= ~DBCR0_IC; - task->thread.dbcr0 = DBCR0_IDM | DBCR0_BT; + task->thread.debug.dbcr0 &= ~DBCR0_IC; + task->thread.debug.dbcr0 = DBCR0_IDM | DBCR0_BT; regs->msr |= MSR_DE; #else regs->msr &= ~MSR_SE; @@ -894,16 +895,16 @@ void user_disable_single_step(struct task_struct *task) * And, after doing so, if all debug flags are off, turn * off DBCR0(IDM) and MSR(DE) .... Torez */ - task->thread.dbcr0 &= ~DBCR0_IC; + task->thread.debug.dbcr0 &= ~(DBCR0_IC|DBCR0_BT); /* * Test to see if any of the DBCR_ACTIVE_EVENTS bits are set. */ - if (!DBCR_ACTIVE_EVENTS(task->thread.dbcr0, - task->thread.dbcr1)) { + if (!DBCR_ACTIVE_EVENTS(task->thread.debug.dbcr0, + task->thread.debug.dbcr1)) { /* * All debug events were off..... */ - task->thread.dbcr0 &= ~DBCR0_IDM; + task->thread.debug.dbcr0 &= ~DBCR0_IDM; regs->msr &= ~MSR_DE; } #else @@ -1022,14 +1023,14 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, */ /* DAC's hold the whole address without any mode flags */ - task->thread.dac1 = data & ~0x3UL; + task->thread.debug.dac1 = data & ~0x3UL; - if (task->thread.dac1 == 0) { + if (task->thread.debug.dac1 == 0) { dbcr_dac(task) &= ~(DBCR_DAC1R | DBCR_DAC1W); - if (!DBCR_ACTIVE_EVENTS(task->thread.dbcr0, - task->thread.dbcr1)) { + if (!DBCR_ACTIVE_EVENTS(task->thread.debug.dbcr0, + task->thread.debug.dbcr1)) { task->thread.regs->msr &= ~MSR_DE; - task->thread.dbcr0 &= ~DBCR0_IDM; + task->thread.debug.dbcr0 &= ~DBCR0_IDM; } return 0; } @@ -1041,7 +1042,7 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, /* Set the Internal Debugging flag (IDM bit 1) for the DBCR0 register */ - task->thread.dbcr0 |= DBCR0_IDM; + task->thread.debug.dbcr0 |= DBCR0_IDM; /* Check for write and read flags and set DBCR0 accordingly */ @@ -1071,10 +1072,10 @@ static long set_instruction_bp(struct task_struct *child, struct ppc_hw_breakpoint *bp_info) { int slot; - int slot1_in_use = ((child->thread.dbcr0 & DBCR0_IAC1) != 0); - int slot2_in_use = ((child->thread.dbcr0 & DBCR0_IAC2) != 0); - int slot3_in_use = ((child->thread.dbcr0 & DBCR0_IAC3) != 0); - int slot4_in_use = ((child->thread.dbcr0 & DBCR0_IAC4) != 0); + int slot1_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC1) != 0); + int slot2_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC2) != 0); + int slot3_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC3) != 0); + int slot4_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC4) != 0); if (dbcr_iac_range(child) & DBCR_IAC12MODE) slot2_in_use = 1; @@ -1093,9 +1094,9 @@ static long set_instruction_bp(struct task_struct *child, /* We need a pair of IAC regsisters */ if ((!slot1_in_use) && (!slot2_in_use)) { slot = 1; - child->thread.iac1 = bp_info->addr; - child->thread.iac2 = bp_info->addr2; - child->thread.dbcr0 |= DBCR0_IAC1; + child->thread.debug.iac1 = bp_info->addr; + child->thread.debug.iac2 = bp_info->addr2; + child->thread.debug.dbcr0 |= DBCR0_IAC1; if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE) dbcr_iac_range(child) |= DBCR_IAC12X; @@ -1104,9 +1105,9 @@ static long set_instruction_bp(struct task_struct *child, #if CONFIG_PPC_ADV_DEBUG_IACS > 2 } else if ((!slot3_in_use) && (!slot4_in_use)) { slot = 3; - child->thread.iac3 = bp_info->addr; - child->thread.iac4 = bp_info->addr2; - child->thread.dbcr0 |= DBCR0_IAC3; + child->thread.debug.iac3 = bp_info->addr; + child->thread.debug.iac4 = bp_info->addr2; + child->thread.debug.dbcr0 |= DBCR0_IAC3; if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE) dbcr_iac_range(child) |= DBCR_IAC34X; @@ -1126,30 +1127,30 @@ static long set_instruction_bp(struct task_struct *child, */ if (slot2_in_use || (slot3_in_use == slot4_in_use)) { slot = 1; - child->thread.iac1 = bp_info->addr; - child->thread.dbcr0 |= DBCR0_IAC1; + child->thread.debug.iac1 = bp_info->addr; + child->thread.debug.dbcr0 |= DBCR0_IAC1; goto out; } } if (!slot2_in_use) { slot = 2; - child->thread.iac2 = bp_info->addr; - child->thread.dbcr0 |= DBCR0_IAC2; + child->thread.debug.iac2 = bp_info->addr; + child->thread.debug.dbcr0 |= DBCR0_IAC2; #if CONFIG_PPC_ADV_DEBUG_IACS > 2 } else if (!slot3_in_use) { slot = 3; - child->thread.iac3 = bp_info->addr; - child->thread.dbcr0 |= DBCR0_IAC3; + child->thread.debug.iac3 = bp_info->addr; + child->thread.debug.dbcr0 |= DBCR0_IAC3; } else if (!slot4_in_use) { slot = 4; - child->thread.iac4 = bp_info->addr; - child->thread.dbcr0 |= DBCR0_IAC4; + child->thread.debug.iac4 = bp_info->addr; + child->thread.debug.dbcr0 |= DBCR0_IAC4; #endif } else return -ENOSPC; } out: - child->thread.dbcr0 |= DBCR0_IDM; + child->thread.debug.dbcr0 |= DBCR0_IDM; child->thread.regs->msr |= MSR_DE; return slot; @@ -1159,49 +1160,49 @@ static int del_instruction_bp(struct task_struct *child, int slot) { switch (slot) { case 1: - if ((child->thread.dbcr0 & DBCR0_IAC1) == 0) + if ((child->thread.debug.dbcr0 & DBCR0_IAC1) == 0) return -ENOENT; if (dbcr_iac_range(child) & DBCR_IAC12MODE) { /* address range - clear slots 1 & 2 */ - child->thread.iac2 = 0; + child->thread.debug.iac2 = 0; dbcr_iac_range(child) &= ~DBCR_IAC12MODE; } - child->thread.iac1 = 0; - child->thread.dbcr0 &= ~DBCR0_IAC1; + child->thread.debug.iac1 = 0; + child->thread.debug.dbcr0 &= ~DBCR0_IAC1; break; case 2: - if ((child->thread.dbcr0 & DBCR0_IAC2) == 0) + if ((child->thread.debug.dbcr0 & DBCR0_IAC2) == 0) return -ENOENT; if (dbcr_iac_range(child) & DBCR_IAC12MODE) /* used in a range */ return -EINVAL; - child->thread.iac2 = 0; - child->thread.dbcr0 &= ~DBCR0_IAC2; + child->thread.debug.iac2 = 0; + child->thread.debug.dbcr0 &= ~DBCR0_IAC2; break; #if CONFIG_PPC_ADV_DEBUG_IACS > 2 case 3: - if ((child->thread.dbcr0 & DBCR0_IAC3) == 0) + if ((child->thread.debug.dbcr0 & DBCR0_IAC3) == 0) return -ENOENT; if (dbcr_iac_range(child) & DBCR_IAC34MODE) { /* address range - clear slots 3 & 4 */ - child->thread.iac4 = 0; + child->thread.debug.iac4 = 0; dbcr_iac_range(child) &= ~DBCR_IAC34MODE; } - child->thread.iac3 = 0; - child->thread.dbcr0 &= ~DBCR0_IAC3; + child->thread.debug.iac3 = 0; + child->thread.debug.dbcr0 &= ~DBCR0_IAC3; break; case 4: - if ((child->thread.dbcr0 & DBCR0_IAC4) == 0) + if ((child->thread.debug.dbcr0 & DBCR0_IAC4) == 0) return -ENOENT; if (dbcr_iac_range(child) & DBCR_IAC34MODE) /* Used in a range */ return -EINVAL; - child->thread.iac4 = 0; - child->thread.dbcr0 &= ~DBCR0_IAC4; + child->thread.debug.iac4 = 0; + child->thread.debug.dbcr0 &= ~DBCR0_IAC4; break; #endif default: @@ -1231,18 +1232,18 @@ static int set_dac(struct task_struct *child, struct ppc_hw_breakpoint *bp_info) dbcr_dac(child) |= DBCR_DAC1R; if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) dbcr_dac(child) |= DBCR_DAC1W; - child->thread.dac1 = (unsigned long)bp_info->addr; + child->thread.debug.dac1 = (unsigned long)bp_info->addr; #if CONFIG_PPC_ADV_DEBUG_DVCS > 0 if (byte_enable) { - child->thread.dvc1 = + child->thread.debug.dvc1 = (unsigned long)bp_info->condition_value; - child->thread.dbcr2 |= + child->thread.debug.dbcr2 |= ((byte_enable << DBCR2_DVC1BE_SHIFT) | (condition_mode << DBCR2_DVC1M_SHIFT)); } #endif #ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE - } else if (child->thread.dbcr2 & DBCR2_DAC12MODE) { + } else if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) { /* Both dac1 and dac2 are part of a range */ return -ENOSPC; #endif @@ -1252,19 +1253,19 @@ static int set_dac(struct task_struct *child, struct ppc_hw_breakpoint *bp_info) dbcr_dac(child) |= DBCR_DAC2R; if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) dbcr_dac(child) |= DBCR_DAC2W; - child->thread.dac2 = (unsigned long)bp_info->addr; + child->thread.debug.dac2 = (unsigned long)bp_info->addr; #if CONFIG_PPC_ADV_DEBUG_DVCS > 0 if (byte_enable) { - child->thread.dvc2 = + child->thread.debug.dvc2 = (unsigned long)bp_info->condition_value; - child->thread.dbcr2 |= + child->thread.debug.dbcr2 |= ((byte_enable << DBCR2_DVC2BE_SHIFT) | (condition_mode << DBCR2_DVC2M_SHIFT)); } #endif } else return -ENOSPC; - child->thread.dbcr0 |= DBCR0_IDM; + child->thread.debug.dbcr0 |= DBCR0_IDM; child->thread.regs->msr |= MSR_DE; return slot + 4; @@ -1276,32 +1277,32 @@ static int del_dac(struct task_struct *child, int slot) if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0) return -ENOENT; - child->thread.dac1 = 0; + child->thread.debug.dac1 = 0; dbcr_dac(child) &= ~(DBCR_DAC1R | DBCR_DAC1W); #ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE - if (child->thread.dbcr2 & DBCR2_DAC12MODE) { - child->thread.dac2 = 0; - child->thread.dbcr2 &= ~DBCR2_DAC12MODE; + if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) { + child->thread.debug.dac2 = 0; + child->thread.debug.dbcr2 &= ~DBCR2_DAC12MODE; } - child->thread.dbcr2 &= ~(DBCR2_DVC1M | DBCR2_DVC1BE); + child->thread.debug.dbcr2 &= ~(DBCR2_DVC1M | DBCR2_DVC1BE); #endif #if CONFIG_PPC_ADV_DEBUG_DVCS > 0 - child->thread.dvc1 = 0; + child->thread.debug.dvc1 = 0; #endif } else if (slot == 2) { if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0) return -ENOENT; #ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE - if (child->thread.dbcr2 & DBCR2_DAC12MODE) + if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) /* Part of a range */ return -EINVAL; - child->thread.dbcr2 &= ~(DBCR2_DVC2M | DBCR2_DVC2BE); + child->thread.debug.dbcr2 &= ~(DBCR2_DVC2M | DBCR2_DVC2BE); #endif #if CONFIG_PPC_ADV_DEBUG_DVCS > 0 - child->thread.dvc2 = 0; + child->thread.debug.dvc2 = 0; #endif - child->thread.dac2 = 0; + child->thread.debug.dac2 = 0; dbcr_dac(child) &= ~(DBCR_DAC2R | DBCR_DAC2W); } else return -EINVAL; @@ -1343,22 +1344,22 @@ static int set_dac_range(struct task_struct *child, return -EIO; } - if (child->thread.dbcr0 & + if (child->thread.debug.dbcr0 & (DBCR0_DAC1R | DBCR0_DAC1W | DBCR0_DAC2R | DBCR0_DAC2W)) return -ENOSPC; if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) - child->thread.dbcr0 |= (DBCR0_DAC1R | DBCR0_IDM); + child->thread.debug.dbcr0 |= (DBCR0_DAC1R | DBCR0_IDM); if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) - child->thread.dbcr0 |= (DBCR0_DAC1W | DBCR0_IDM); - child->thread.dac1 = bp_info->addr; - child->thread.dac2 = bp_info->addr2; + child->thread.debug.dbcr0 |= (DBCR0_DAC1W | DBCR0_IDM); + child->thread.debug.dac1 = bp_info->addr; + child->thread.debug.dac2 = bp_info->addr2; if (mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE) - child->thread.dbcr2 |= DBCR2_DAC12M; + child->thread.debug.dbcr2 |= DBCR2_DAC12M; else if (mode == PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE) - child->thread.dbcr2 |= DBCR2_DAC12MX; + child->thread.debug.dbcr2 |= DBCR2_DAC12MX; else /* PPC_BREAKPOINT_MODE_MASK */ - child->thread.dbcr2 |= DBCR2_DAC12MM; + child->thread.debug.dbcr2 |= DBCR2_DAC12MM; child->thread.regs->msr |= MSR_DE; return 5; @@ -1489,9 +1490,9 @@ static long ppc_del_hwdebug(struct task_struct *child, long data) rc = del_dac(child, (int)data - 4); if (!rc) { - if (!DBCR_ACTIVE_EVENTS(child->thread.dbcr0, - child->thread.dbcr1)) { - child->thread.dbcr0 &= ~DBCR0_IDM; + if (!DBCR_ACTIVE_EVENTS(child->thread.debug.dbcr0, + child->thread.debug.dbcr1)) { + child->thread.debug.dbcr0 &= ~DBCR0_IDM; child->thread.regs->msr &= ~MSR_DE; } } @@ -1554,10 +1555,10 @@ long arch_ptrace(struct task_struct *child, long request, flush_fp_to_thread(child); if (fpidx < (PT_FPSCR - PT_FPR0)) - tmp = ((unsigned long *)child->thread.fpr) - [fpidx * TS_FPRWIDTH]; + memcpy(&tmp, &child->thread.TS_FPR(fpidx), + sizeof(long)); else - tmp = child->thread.fpscr.val; + tmp = child->thread.fp_state.fpscr; } ret = put_user(tmp, datalp); break; @@ -1587,10 +1588,10 @@ long arch_ptrace(struct task_struct *child, long request, flush_fp_to_thread(child); if (fpidx < (PT_FPSCR - PT_FPR0)) - ((unsigned long *)child->thread.fpr) - [fpidx * TS_FPRWIDTH] = data; + memcpy(&child->thread.TS_FPR(fpidx), &data, + sizeof(long)); else - child->thread.fpscr.val = data; + child->thread.fp_state.fpscr = data; ret = 0; } break; @@ -1669,7 +1670,7 @@ long arch_ptrace(struct task_struct *child, long request, if (addr > 0) break; #ifdef CONFIG_PPC_ADV_DEBUG_REGS - ret = put_user(child->thread.dac1, datalp); + ret = put_user(child->thread.debug.dac1, datalp); #else dabr_fake = ((child->thread.hw_brk.address & (~HW_BRK_TYPE_DABR)) | (child->thread.hw_brk.type & HW_BRK_TYPE_DABR)); diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace32.c index f51599e941c..f52b7db327c 100644 --- a/arch/powerpc/kernel/ptrace32.c +++ b/arch/powerpc/kernel/ptrace32.c @@ -43,7 +43,6 @@ #define FPRNUMBER(i) (((i) - PT_FPR0) >> 1) #define FPRHALF(i) (((i) - PT_FPR0) & 1) #define FPRINDEX(i) TS_FPRWIDTH * FPRNUMBER(i) * 2 + FPRHALF(i) -#define FPRINDEX_3264(i) (TS_FPRWIDTH * ((i) - PT_FPR0)) long compat_arch_ptrace(struct task_struct *child, compat_long_t request, compat_ulong_t caddr, compat_ulong_t cdata) @@ -105,7 +104,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, * to be an array of unsigned int (32 bits) - the * index passed in is based on this assumption. */ - tmp = ((unsigned int *)child->thread.fpr) + tmp = ((unsigned int *)child->thread.fp_state.fpr) [FPRINDEX(index)]; } ret = put_user((unsigned int)tmp, (u32 __user *)data); @@ -147,8 +146,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, if (numReg >= PT_FPR0) { flush_fp_to_thread(child); /* get 64 bit FPR */ - tmp = ((u64 *)child->thread.fpr) - [FPRINDEX_3264(numReg)]; + tmp = child->thread.fp_state.fpr[numReg - PT_FPR0][0]; } else { /* register within PT_REGS struct */ unsigned long tmp2; ret = ptrace_get_reg(child, numReg, &tmp2); @@ -207,7 +205,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, * to be an array of unsigned int (32 bits) - the * index passed in is based on this assumption. */ - ((unsigned int *)child->thread.fpr) + ((unsigned int *)child->thread.fp_state.fpr) [FPRINDEX(index)] = data; ret = 0; } @@ -251,8 +249,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, u64 *tmp; flush_fp_to_thread(child); /* get 64 bit FPR ... */ - tmp = &(((u64 *)child->thread.fpr) - [FPRINDEX_3264(numReg)]); + tmp = &child->thread.fp_state.fpr[numReg - PT_FPR0][0]; /* ... write the 32 bit part we want */ ((u32 *)tmp)[index % 2] = data; ret = 0; @@ -269,7 +266,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, if (addr > 0) break; #ifdef CONFIG_PPC_ADV_DEBUG_REGS - ret = put_user(child->thread.dac1, (u32 __user *)data); + ret = put_user(child->thread.debug.dac1, (u32 __user *)data); #else dabr_fake = ( (child->thread.hw_brk.address & (~HW_BRK_TYPE_DABR)) | diff --git a/arch/powerpc/kernel/reloc_64.S b/arch/powerpc/kernel/reloc_64.S index b47a0e1ab00..d88736fbece 100644 --- a/arch/powerpc/kernel/reloc_64.S +++ b/arch/powerpc/kernel/reloc_64.S @@ -69,8 +69,8 @@ _GLOBAL(relocate) * R_PPC64_RELATIVE ones. */ mtctr r8 -5: lwz r0,12(9) /* ELF64_R_TYPE(reloc->r_info) */ - cmpwi r0,R_PPC64_RELATIVE +5: ld r0,8(9) /* ELF64_R_TYPE(reloc->r_info) */ + cmpdi r0,R_PPC64_RELATIVE bne 6f ld r6,0(r9) /* reloc->r_offset */ ld r0,16(r9) /* reloc->r_addend */ @@ -81,6 +81,7 @@ _GLOBAL(relocate) 6: blr +.balign 8 p_dyn: .llong __dynamic_start - 0b p_rela: .llong __rela_dyn_start - 0b p_st: .llong _stext - 0b diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 4cf674d7d5a..8b4c857c142 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -993,32 +993,36 @@ struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log, (struct rtas_ext_event_log_v6 *)log->buffer; struct pseries_errorlog *sect; unsigned char *p, *log_end; + uint32_t ext_log_length = rtas_error_extended_log_length(log); + uint8_t log_format = rtas_ext_event_log_format(ext_log); + uint32_t company_id = rtas_ext_event_company_id(ext_log); /* Check that we understand the format */ - if (log->extended_log_length < sizeof(struct rtas_ext_event_log_v6) || - ext_log->log_format != RTAS_V6EXT_LOG_FORMAT_EVENT_LOG || - ext_log->company_id != RTAS_V6EXT_COMPANY_ID_IBM) + if (ext_log_length < sizeof(struct rtas_ext_event_log_v6) || + log_format != RTAS_V6EXT_LOG_FORMAT_EVENT_LOG || + company_id != RTAS_V6EXT_COMPANY_ID_IBM) return NULL; - log_end = log->buffer + log->extended_log_length; + log_end = log->buffer + ext_log_length; p = ext_log->vendor_log; while (p < log_end) { sect = (struct pseries_errorlog *)p; - if (sect->id == section_id) + if (pseries_errorlog_id(sect) == section_id) return sect; - p += sect->length; + p += pseries_errorlog_length(sect); } return NULL; } +/* We assume to be passed big endian arguments */ asmlinkage int ppc_rtas(struct rtas_args __user *uargs) { struct rtas_args args; unsigned long flags; char *buff_copy, *errbuf = NULL; - int nargs; + int nargs, nret, token; int rc; if (!capable(CAP_SYS_ADMIN)) @@ -1027,10 +1031,13 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs) if (copy_from_user(&args, uargs, 3 * sizeof(u32)) != 0) return -EFAULT; - nargs = args.nargs; + nargs = be32_to_cpu(args.nargs); + nret = be32_to_cpu(args.nret); + token = be32_to_cpu(args.token); + if (nargs > ARRAY_SIZE(args.args) - || args.nret > ARRAY_SIZE(args.args) - || nargs + args.nret > ARRAY_SIZE(args.args)) + || nret > ARRAY_SIZE(args.args) + || nargs + nret > ARRAY_SIZE(args.args)) return -EINVAL; /* Copy in args. */ @@ -1038,14 +1045,14 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs) nargs * sizeof(rtas_arg_t)) != 0) return -EFAULT; - if (args.token == RTAS_UNKNOWN_SERVICE) + if (token == RTAS_UNKNOWN_SERVICE) return -EINVAL; args.rets = &args.args[nargs]; - memset(args.rets, 0, args.nret * sizeof(rtas_arg_t)); + memset(args.rets, 0, nret * sizeof(rtas_arg_t)); /* Need to handle ibm,suspend_me call specially */ - if (args.token == ibm_suspend_me_token) { + if (token == ibm_suspend_me_token) { rc = rtas_ibm_suspend_me(&args); if (rc) return rc; @@ -1062,7 +1069,7 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs) /* A -1 return code indicates that the last command couldn't be completed due to a hardware error. */ - if (args.rets[0] == -1) + if (be32_to_cpu(args.rets[0]) == -1) errbuf = __fetch_rtas_last_error(buff_copy); unlock_rtas(flags); @@ -1077,7 +1084,7 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs) /* Copy out args. */ if (copy_to_user(uargs->args + nargs, args.args + nargs, - args.nret * sizeof(rtas_arg_t)) != 0) + nret * sizeof(rtas_arg_t)) != 0) return -EFAULT; return 0; @@ -1135,7 +1142,7 @@ void __init rtas_initialize(void) int __init early_init_dt_scan_rtas(unsigned long node, const char *uname, int depth, void *data) { - u32 *basep, *entryp, *sizep; + const u32 *basep, *entryp, *sizep; if (depth != 1 || strcmp(uname, "rtas") != 0) return 0; diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c index 2f3cdb01506..db2b482af65 100644 --- a/arch/powerpc/kernel/rtas_flash.c +++ b/arch/powerpc/kernel/rtas_flash.c @@ -611,17 +611,19 @@ static void rtas_flash_firmware(int reboot_type) for (f = flist; f; f = next) { /* Translate data addrs to absolute */ for (i = 0; i < f->num_blocks; i++) { - f->blocks[i].data = (char *)__pa(f->blocks[i].data); + f->blocks[i].data = (char *)cpu_to_be64(__pa(f->blocks[i].data)); image_size += f->blocks[i].length; + f->blocks[i].length = cpu_to_be64(f->blocks[i].length); } next = f->next; /* Don't translate NULL pointer for last entry */ if (f->next) - f->next = (struct flash_block_list *)__pa(f->next); + f->next = (struct flash_block_list *)cpu_to_be64(__pa(f->next)); else f->next = NULL; /* make num_blocks into the version/length field */ f->num_blocks = (FLASH_BLOCK_LIST_VERSION << 56) | ((f->num_blocks+1)*16); + f->num_blocks = cpu_to_be64(f->num_blocks); } printk(KERN_ALERT "FLASH: flash image is %ld bytes\n", image_size); @@ -705,7 +707,7 @@ static int __init rtas_flash_init(void) if (rtas_token("ibm,update-flash-64-and-reboot") == RTAS_UNKNOWN_SERVICE) { pr_info("rtas_flash: no firmware flash support\n"); - return 1; + return -EINVAL; } rtas_validate_flash_data.buf = kzalloc(VALIDATE_BUF_SIZE, GFP_KERNEL); diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c index 6e7b7cdeec6..c168337aef9 100644 --- a/arch/powerpc/kernel/rtas_pci.c +++ b/arch/powerpc/kernel/rtas_pci.c @@ -80,10 +80,6 @@ int rtas_read_config(struct pci_dn *pdn, int where, int size, u32 *val) if (ret) return PCIBIOS_DEVICE_NOT_FOUND; - if (returnval == EEH_IO_ERROR_VALUE(size) && - eeh_dev_check_failure(of_node_to_eeh_dev(pdn->node))) - return PCIBIOS_DEVICE_NOT_FOUND; - return PCIBIOS_SUCCESSFUL; } @@ -92,18 +88,39 @@ static int rtas_pci_read_config(struct pci_bus *bus, int where, int size, u32 *val) { struct device_node *busdn, *dn; - - busdn = pci_bus_to_OF_node(bus); + struct pci_dn *pdn; + bool found = false; +#ifdef CONFIG_EEH + struct eeh_dev *edev; +#endif + int ret; /* Search only direct children of the bus */ + *val = 0xFFFFFFFF; + busdn = pci_bus_to_OF_node(bus); for (dn = busdn->child; dn; dn = dn->sibling) { - struct pci_dn *pdn = PCI_DN(dn); + pdn = PCI_DN(dn); if (pdn && pdn->devfn == devfn - && of_device_is_available(dn)) - return rtas_read_config(pdn, where, size, val); + && of_device_is_available(dn)) { + found = true; + break; + } } - return PCIBIOS_DEVICE_NOT_FOUND; + if (!found) + return PCIBIOS_DEVICE_NOT_FOUND; +#ifdef CONFIG_EEH + edev = of_node_to_eeh_dev(dn); + if (edev && edev->pe && edev->pe->state & EEH_PE_RESET) + return PCIBIOS_DEVICE_NOT_FOUND; +#endif + + ret = rtas_read_config(pdn, where, size, val); + if (*val == EEH_IO_ERROR_VALUE(size) && + eeh_dev_check_failure(of_node_to_eeh_dev(dn))) + return PCIBIOS_DEVICE_NOT_FOUND; + + return ret; } int rtas_write_config(struct pci_dn *pdn, int where, int size, u32 val) @@ -136,17 +153,34 @@ static int rtas_pci_write_config(struct pci_bus *bus, int where, int size, u32 val) { struct device_node *busdn, *dn; - - busdn = pci_bus_to_OF_node(bus); + struct pci_dn *pdn; + bool found = false; +#ifdef CONFIG_EEH + struct eeh_dev *edev; +#endif + int ret; /* Search only direct children of the bus */ + busdn = pci_bus_to_OF_node(bus); for (dn = busdn->child; dn; dn = dn->sibling) { - struct pci_dn *pdn = PCI_DN(dn); + pdn = PCI_DN(dn); if (pdn && pdn->devfn == devfn - && of_device_is_available(dn)) - return rtas_write_config(pdn, where, size, val); + && of_device_is_available(dn)) { + found = true; + break; + } } - return PCIBIOS_DEVICE_NOT_FOUND; + + if (!found) + return PCIBIOS_DEVICE_NOT_FOUND; +#ifdef CONFIG_EEH + edev = of_node_to_eeh_dev(dn); + if (edev && edev->pe && (edev->pe->state & EEH_PE_RESET)) + return PCIBIOS_DEVICE_NOT_FOUND; +#endif + ret = rtas_write_config(pdn, where, size, val); + + return ret; } static struct pci_ops rtas_pci_ops = { @@ -223,7 +257,7 @@ unsigned long get_phb_buid(struct device_node *phb) static int phb_set_bus_ranges(struct device_node *dev, struct pci_controller *phb) { - const int *bus_range; + const __be32 *bus_range; unsigned int len; bus_range = of_get_property(dev, "bus-range", &len); @@ -231,8 +265,8 @@ static int phb_set_bus_ranges(struct device_node *dev, return 1; } - phb->first_busno = bus_range[0]; - phb->last_busno = bus_range[1]; + phb->first_busno = be32_to_cpu(bus_range[0]); + phb->last_busno = be32_to_cpu(bus_range[1]); return 0; } diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index 1130c53ad65..e736387fee6 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -150,8 +150,8 @@ static void printk_log_rtas(char *buf, int len) struct rtas_error_log *errlog = (struct rtas_error_log *)buf; printk(RTAS_DEBUG "event: %d, Type: %s, Severity: %d\n", - error_log_cnt, rtas_event_type(errlog->type), - errlog->severity); + error_log_cnt, rtas_event_type(rtas_error_type(errlog)), + rtas_error_severity(errlog)); } } @@ -159,14 +159,16 @@ static int log_rtas_len(char * buf) { int len; struct rtas_error_log *err; + uint32_t extended_log_length; /* rtas fixed header */ len = 8; err = (struct rtas_error_log *)buf; - if (err->extended && err->extended_log_length) { + extended_log_length = rtas_error_extended_log_length(err); + if (rtas_error_extended(err) && extended_log_length) { /* extended header */ - len += err->extended_log_length; + len += extended_log_length; } if (rtas_error_log_max == 0) @@ -293,15 +295,13 @@ void prrn_schedule_update(u32 scope) static void handle_rtas_event(const struct rtas_error_log *log) { - if (log->type == RTAS_TYPE_PRRN) { - /* For PRRN Events the extended log length is used to denote - * the scope for calling rtas update-nodes. - */ - if (prrn_is_enabled()) - prrn_schedule_update(log->extended_log_length); - } + if (rtas_error_type(log) != RTAS_TYPE_PRRN || !prrn_is_enabled()) + return; - return; + /* For PRRN Events the extended log length is used to denote + * the scope for calling rtas update-nodes. + */ + prrn_schedule_update(rtas_error_extended_log_length(log)); } #else diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 3d261c071fc..e5b022c55cc 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -62,8 +62,6 @@ #include <mm/mmu_decl.h> #include <asm/fadump.h> -#include "setup.h" - #ifdef DEBUG #include <asm/udbg.h> #define DBG(fmt...) udbg_printf(fmt) @@ -78,6 +76,9 @@ EXPORT_SYMBOL(ppc_md); struct machdep_calls *machine_id; EXPORT_SYMBOL(machine_id); +int boot_cpuid = -1; +EXPORT_SYMBOL_GPL(boot_cpuid); + unsigned long klimit = (unsigned long) _end; char cmd_line[COMMAND_LINE_SIZE]; @@ -211,6 +212,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) { unsigned long cpu_id = (unsigned long)v - 1; unsigned int pvr; + unsigned long proc_freq; unsigned short maj; unsigned short min; @@ -262,12 +264,19 @@ static int show_cpuinfo(struct seq_file *m, void *v) #endif /* CONFIG_TAU */ /* - * Assume here that all clock rates are the same in a - * smp system. -- Cort + * Platforms that have variable clock rates, should implement + * the method ppc_md.get_proc_freq() that reports the clock + * rate of a given cpu. The rest can use ppc_proc_freq to + * report the clock rate that is same across all cpus. */ - if (ppc_proc_freq) + if (ppc_md.get_proc_freq) + proc_freq = ppc_md.get_proc_freq(cpu_id); + else + proc_freq = ppc_proc_freq; + + if (proc_freq) seq_printf(m, "clock\t\t: %lu.%06luMHz\n", - ppc_proc_freq / 1000000, ppc_proc_freq % 1000000); + proc_freq / 1000000, proc_freq % 1000000); if (ppc_md.show_percpuinfo != NULL) ppc_md.show_percpuinfo(m, cpu_id); @@ -381,9 +390,10 @@ void __init check_for_initrd(void) #ifdef CONFIG_SMP -int threads_per_core, threads_shift; +int threads_per_core, threads_per_subcore, threads_shift; cpumask_t threads_core_mask; EXPORT_SYMBOL_GPL(threads_per_core); +EXPORT_SYMBOL_GPL(threads_per_subcore); EXPORT_SYMBOL_GPL(threads_shift); EXPORT_SYMBOL_GPL(threads_core_mask); @@ -392,6 +402,7 @@ static void __init cpu_init_thread_core_maps(int tpc) int i; threads_per_core = tpc; + threads_per_subcore = tpc; cpumask_clear(&threads_core_mask); /* This implementation only supports power of 2 number of threads @@ -458,9 +469,17 @@ void __init smp_setup_cpu_maps(void) } for (j = 0; j < nthreads && cpu < nr_cpu_ids; j++) { + bool avail; + DBG(" thread %d -> cpu %d (hard id %d)\n", j, cpu, be32_to_cpu(intserv[j])); - set_cpu_present(cpu, true); + + avail = of_device_is_available(dn); + if (!avail) + avail = !of_property_match_string(dn, + "enable-method", "spin-table"); + + set_cpu_present(cpu, avail); set_hard_smp_processor_id(cpu, be32_to_cpu(intserv[j])); set_cpu_possible(cpu, true); cpu++; @@ -481,7 +500,7 @@ void __init smp_setup_cpu_maps(void) if (machine_is(pseries) && firmware_has_feature(FW_FEATURE_LPAR) && (dn = of_find_node_by_path("/rtas"))) { int num_addr_cell, num_size_cell, maxcpus; - const unsigned int *ireg; + const __be32 *ireg; num_addr_cell = of_n_addr_cells(dn); num_size_cell = of_n_size_cells(dn); @@ -491,7 +510,7 @@ void __init smp_setup_cpu_maps(void) if (!ireg) goto out; - maxcpus = ireg[num_addr_cell + num_size_cell]; + maxcpus = be32_to_cpup(ireg + num_addr_cell + num_size_cell); /* Double maxcpus for processors which have SMT capability */ if (cpu_has_feature(CPU_FTR_SMT)) @@ -717,33 +736,6 @@ static int powerpc_debugfs_init(void) arch_initcall(powerpc_debugfs_init); #endif -#ifdef CONFIG_BOOKE_WDT -extern u32 booke_wdt_enabled; -extern u32 booke_wdt_period; - -/* Checks wdt=x and wdt_period=xx command-line option */ -notrace int __init early_parse_wdt(char *p) -{ - if (p && strncmp(p, "0", 1) != 0) - booke_wdt_enabled = 1; - - return 0; -} -early_param("wdt", early_parse_wdt); - -int __init early_parse_wdt_period(char *p) -{ - unsigned long ret; - if (p) { - if (!kstrtol(p, 0, &ret)) - booke_wdt_period = ret; - } - - return 0; -} -early_param("wdt_period", early_parse_wdt_period); -#endif /* CONFIG_BOOKE_WDT */ - void ppc_printk_progress(char *s, unsigned short hex) { pr_info("%s\n", s); diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h deleted file mode 100644 index 4c67ad7fae0..00000000000 --- a/arch/powerpc/kernel/setup.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef _POWERPC_KERNEL_SETUP_H -#define _POWERPC_KERNEL_SETUP_H - -void check_for_initrd(void); -void do_init_bootmem(void); -void setup_panic(void); -extern int do_early_xmon; - -#endif /* _POWERPC_KERNEL_SETUP_H */ diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index a4bbcae7257..ea4fda60e57 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -40,14 +40,10 @@ #include <asm/mmu_context.h> #include <asm/epapr_hcalls.h> -#include "setup.h" - #define DBG(fmt...) extern void bootx_init(unsigned long r4, unsigned long phys); -int boot_cpuid = -1; -EXPORT_SYMBOL_GPL(boot_cpuid); int boot_cpuid_phys; EXPORT_SYMBOL_GPL(boot_cpuid_phys); @@ -249,7 +245,12 @@ static void __init exc_lvl_early_init(void) /* interrupt stacks must be in lowmem, we get that for free on ppc32 * as the memblock is limited to lowmem by MEMBLOCK_REAL_LIMIT */ for_each_possible_cpu(i) { +#ifdef CONFIG_SMP hw_cpu = get_hard_smp_processor_id(i); +#else + hw_cpu = 0; +#endif + critirq_ctx[hw_cpu] = (struct thread_info *) __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); #ifdef CONFIG_BOOKE @@ -298,9 +299,6 @@ void __init setup_arch(char **cmdline_p) if (cpu_has_feature(CPU_FTR_UNIFIED_ID_CACHE)) ucache_bsize = icache_bsize = dcache_bsize; - /* reboot on panic */ - panic_timeout = 180; - if (ppc_md.panic) setup_panic(); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 278ca93e1f2..ee082d77117 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -36,6 +36,7 @@ #include <linux/lockdep.h> #include <linux/memblock.h> #include <linux/hugetlb.h> +#include <linux/memory.h> #include <asm/io.h> #include <asm/kdump.h> @@ -68,15 +69,12 @@ #include <asm/hugetlb.h> #include <asm/epapr_hcalls.h> -#include "setup.h" - #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) #else #define DBG(fmt...) #endif -int boot_cpuid = 0; int spinning_secondaries; u64 ppc64_pft_size; @@ -99,6 +97,38 @@ int dcache_bsize; int icache_bsize; int ucache_bsize; +#if defined(CONFIG_PPC_BOOK3E) && defined(CONFIG_SMP) +static void setup_tlb_core_data(void) +{ + int cpu; + + BUILD_BUG_ON(offsetof(struct tlb_core_data, lock) != 0); + + for_each_possible_cpu(cpu) { + int first = cpu_first_thread_sibling(cpu); + + paca[cpu].tcd_ptr = &paca[first].tcd; + + /* + * If we have threads, we need either tlbsrx. + * or e6500 tablewalk mode, or else TLB handlers + * will be racy and could produce duplicate entries. + */ + if (smt_enabled_at_boot >= 2 && + !mmu_has_feature(MMU_FTR_USE_TLBRSRV) && + book3e_htw_mode != PPC_HTW_E6500) { + /* Should we panic instead? */ + WARN_ONCE("%s: unsupported MMU configuration -- expect problems\n", + __func__); + } + } +} +#else +static void setup_tlb_core_data(void) +{ +} +#endif + #ifdef CONFIG_SMP static char *smt_enabled_cmdline; @@ -166,6 +196,19 @@ static void fixup_boot_paca(void) get_paca()->data_offset = 0; } +static void cpu_ready_for_interrupts(void) +{ + /* Set IR and DR in PACA MSR */ + get_paca()->kernel_msr = MSR_KERNEL; + + /* Enable AIL if supported */ + if (cpu_has_feature(CPU_FTR_HVMODE) && + cpu_has_feature(CPU_FTR_ARCH_207S)) { + unsigned long lpcr = mfspr(SPRN_LPCR); + mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3); + } +} + /* * Early initialization entry point. This is called by head.S * with MMU translation disabled. We rely on the "feature" of @@ -232,6 +275,14 @@ void __init early_setup(unsigned long dt_ptr) /* Initialize the hash table or TLB handling */ early_init_mmu(); + /* + * At this point, we can let interrupts switch to virtual mode + * (the MMU has been setup), so adjust the MSR in the PACA to + * have IR and DR set and enable AIL if it exists + */ + cpu_ready_for_interrupts(); + + /* Reserve large chunks of memory for use by CMA for KVM */ kvm_cma_reserve(); /* @@ -264,6 +315,13 @@ void early_setup_secondary(void) /* Initialize the hash table or TLB handling */ early_init_mmu_secondary(); + + /* + * At this point, we can let interrupts switch to virtual mode + * (the MMU has been setup), so adjust the MSR in the PACA to + * have IR and DR set. + */ + cpu_ready_for_interrupts(); } #endif /* CONFIG_SMP */ @@ -284,7 +342,7 @@ void smp_release_cpus(void) ptr = (unsigned long *)((unsigned long)&__secondary_hold_spinloop - PHYSICAL_START); - *ptr = __pa(generic_secondary_smp_init); + *ptr = ppc_function_entry(generic_secondary_smp_init); /* And wait a bit for them to catch up */ for (i = 0; i < 100000; i++) { @@ -447,6 +505,7 @@ void __init setup_system(void) smp_setup_cpu_maps(); check_smt_enabled(); + setup_tlb_core_data(); #ifdef CONFIG_SMP /* Release secondary cpus out of their spinloops at 0x60 now that @@ -522,23 +581,25 @@ static void __init irqstack_early_init(void) #ifdef CONFIG_PPC_BOOK3E static void __init exc_lvl_early_init(void) { - extern unsigned int interrupt_base_book3e; - extern unsigned int exc_debug_debug_book3e; - unsigned int i; + unsigned long sp; for_each_possible_cpu(i) { - critirq_ctx[i] = (struct thread_info *) - __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); - dbgirq_ctx[i] = (struct thread_info *) - __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); - mcheckirq_ctx[i] = (struct thread_info *) - __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); + sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE); + critirq_ctx[i] = (struct thread_info *)__va(sp); + paca[i].crit_kstack = __va(sp + THREAD_SIZE); + + sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE); + dbgirq_ctx[i] = (struct thread_info *)__va(sp); + paca[i].dbg_kstack = __va(sp + THREAD_SIZE); + + sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE); + mcheckirq_ctx[i] = (struct thread_info *)__va(sp); + paca[i].mc_kstack = __va(sp + THREAD_SIZE); } if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC)) - patch_branch(&interrupt_base_book3e + (0x040 / 4) + 1, - (unsigned long)&exc_debug_debug_book3e, 0); + patch_exception(0x040, exc_debug_debug_book3e); } #else #define exc_lvl_early_init() @@ -546,7 +607,8 @@ static void __init exc_lvl_early_init(void) /* * Stack space used when we detect a bad kernel stack pointer, and - * early in SMP boots before relocation is enabled. + * early in SMP boots before relocation is enabled. Exclusive emergency + * stack for machine checks. */ static void __init emergency_stack_init(void) { @@ -569,6 +631,13 @@ static void __init emergency_stack_init(void) sp = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit); sp += THREAD_SIZE; paca[i].emergency_sp = __va(sp); + +#ifdef CONFIG_PPC_BOOK3S_64 + /* emergency stack for machine check exception handling. */ + sp = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit); + sp += THREAD_SIZE; + paca[i].mc_emergency_sp = __va(sp); +#endif } } @@ -590,9 +659,6 @@ void __init setup_arch(char **cmdline_p) dcache_bsize = ppc64_caches.dline_size; icache_bsize = ppc64_caches.iline_size; - /* reboot on panic */ - panic_timeout = 180; - if (ppc_md.panic) setup_panic(); @@ -715,6 +781,15 @@ void __init setup_per_cpu_areas(void) } #endif +#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE +unsigned long memory_block_size_bytes(void) +{ + if (ppc_md.memory_block_size) + return ppc_md.memory_block_size(); + + return MIN_MEMORY_BLOCK_SIZE; +} +#endif #if defined(CONFIG_PPC_INDIRECT_PIO) || defined(CONFIG_PPC_INDIRECT_MMIO) struct ppc_pci_io ppc_pci_io; diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index 457e97aa294..1c794cef288 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -134,7 +134,7 @@ static int do_signal(struct pt_regs *regs) */ if (current->thread.hw_brk.address && current->thread.hw_brk.type) - set_breakpoint(¤t->thread.hw_brk); + __set_breakpoint(¤t->thread.hw_brk); #endif /* Re-enable the breakpoints for the signal stack */ thread_change_pc(current, regs); @@ -203,8 +203,7 @@ unsigned long get_tm_stackpointer(struct pt_regs *regs) #ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (MSR_TM_ACTIVE(regs->msr)) { - tm_enable(); - tm_reclaim(¤t->thread, regs->msr, TM_CAUSE_SIGNAL); + tm_reclaim_current(TM_CAUSE_SIGNAL); if (MSR_TM_TRANSACTIONAL(regs->msr)) return current->thread.ckpt_regs.gpr[1]; } diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index bebdf1a1a54..1bc5a1755ed 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -54,7 +54,6 @@ #include "signal.h" -#undef DEBUG_SIG #ifdef CONFIG_PPC64 #define sys_rt_sigreturn compat_sys_rt_sigreturn @@ -265,27 +264,27 @@ struct rt_sigframe { unsigned long copy_fpr_to_user(void __user *to, struct task_struct *task) { - double buf[ELF_NFPREG]; + u64 buf[ELF_NFPREG]; int i; /* save FPR copy to local buffer then write to the thread_struct */ for (i = 0; i < (ELF_NFPREG - 1) ; i++) buf[i] = task->thread.TS_FPR(i); - memcpy(&buf[i], &task->thread.fpscr, sizeof(double)); + buf[i] = task->thread.fp_state.fpscr; return __copy_to_user(to, buf, ELF_NFPREG * sizeof(double)); } unsigned long copy_fpr_from_user(struct task_struct *task, void __user *from) { - double buf[ELF_NFPREG]; + u64 buf[ELF_NFPREG]; int i; if (__copy_from_user(buf, from, ELF_NFPREG * sizeof(double))) return 1; for (i = 0; i < (ELF_NFPREG - 1) ; i++) task->thread.TS_FPR(i) = buf[i]; - memcpy(&task->thread.fpscr, &buf[i], sizeof(double)); + task->thread.fp_state.fpscr = buf[i]; return 0; } @@ -293,25 +292,25 @@ unsigned long copy_fpr_from_user(struct task_struct *task, unsigned long copy_vsx_to_user(void __user *to, struct task_struct *task) { - double buf[ELF_NVSRHALFREG]; + u64 buf[ELF_NVSRHALFREG]; int i; /* save FPR copy to local buffer then write to the thread_struct */ for (i = 0; i < ELF_NVSRHALFREG; i++) - buf[i] = task->thread.fpr[i][TS_VSRLOWOFFSET]; + buf[i] = task->thread.fp_state.fpr[i][TS_VSRLOWOFFSET]; return __copy_to_user(to, buf, ELF_NVSRHALFREG * sizeof(double)); } unsigned long copy_vsx_from_user(struct task_struct *task, void __user *from) { - double buf[ELF_NVSRHALFREG]; + u64 buf[ELF_NVSRHALFREG]; int i; if (__copy_from_user(buf, from, ELF_NVSRHALFREG * sizeof(double))) return 1; for (i = 0; i < ELF_NVSRHALFREG ; i++) - task->thread.fpr[i][TS_VSRLOWOFFSET] = buf[i]; + task->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i]; return 0; } @@ -319,27 +318,27 @@ unsigned long copy_vsx_from_user(struct task_struct *task, unsigned long copy_transact_fpr_to_user(void __user *to, struct task_struct *task) { - double buf[ELF_NFPREG]; + u64 buf[ELF_NFPREG]; int i; /* save FPR copy to local buffer then write to the thread_struct */ for (i = 0; i < (ELF_NFPREG - 1) ; i++) buf[i] = task->thread.TS_TRANS_FPR(i); - memcpy(&buf[i], &task->thread.transact_fpscr, sizeof(double)); + buf[i] = task->thread.transact_fp.fpscr; return __copy_to_user(to, buf, ELF_NFPREG * sizeof(double)); } unsigned long copy_transact_fpr_from_user(struct task_struct *task, void __user *from) { - double buf[ELF_NFPREG]; + u64 buf[ELF_NFPREG]; int i; if (__copy_from_user(buf, from, ELF_NFPREG * sizeof(double))) return 1; for (i = 0; i < (ELF_NFPREG - 1) ; i++) task->thread.TS_TRANS_FPR(i) = buf[i]; - memcpy(&task->thread.transact_fpscr, &buf[i], sizeof(double)); + task->thread.transact_fp.fpscr = buf[i]; return 0; } @@ -347,25 +346,25 @@ unsigned long copy_transact_fpr_from_user(struct task_struct *task, unsigned long copy_transact_vsx_to_user(void __user *to, struct task_struct *task) { - double buf[ELF_NVSRHALFREG]; + u64 buf[ELF_NVSRHALFREG]; int i; /* save FPR copy to local buffer then write to the thread_struct */ for (i = 0; i < ELF_NVSRHALFREG; i++) - buf[i] = task->thread.transact_fpr[i][TS_VSRLOWOFFSET]; + buf[i] = task->thread.transact_fp.fpr[i][TS_VSRLOWOFFSET]; return __copy_to_user(to, buf, ELF_NVSRHALFREG * sizeof(double)); } unsigned long copy_transact_vsx_from_user(struct task_struct *task, void __user *from) { - double buf[ELF_NVSRHALFREG]; + u64 buf[ELF_NVSRHALFREG]; int i; if (__copy_from_user(buf, from, ELF_NVSRHALFREG * sizeof(double))) return 1; for (i = 0; i < ELF_NVSRHALFREG ; i++) - task->thread.transact_fpr[i][TS_VSRLOWOFFSET] = buf[i]; + task->thread.transact_fp.fpr[i][TS_VSRLOWOFFSET] = buf[i]; return 0; } #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ @@ -373,14 +372,14 @@ unsigned long copy_transact_vsx_from_user(struct task_struct *task, inline unsigned long copy_fpr_to_user(void __user *to, struct task_struct *task) { - return __copy_to_user(to, task->thread.fpr, + return __copy_to_user(to, task->thread.fp_state.fpr, ELF_NFPREG * sizeof(double)); } inline unsigned long copy_fpr_from_user(struct task_struct *task, void __user *from) { - return __copy_from_user(task->thread.fpr, from, + return __copy_from_user(task->thread.fp_state.fpr, from, ELF_NFPREG * sizeof(double)); } @@ -388,14 +387,14 @@ inline unsigned long copy_fpr_from_user(struct task_struct *task, inline unsigned long copy_transact_fpr_to_user(void __user *to, struct task_struct *task) { - return __copy_to_user(to, task->thread.transact_fpr, + return __copy_to_user(to, task->thread.transact_fp.fpr, ELF_NFPREG * sizeof(double)); } inline unsigned long copy_transact_fpr_from_user(struct task_struct *task, void __user *from) { - return __copy_from_user(task->thread.transact_fpr, from, + return __copy_from_user(task->thread.transact_fp.fpr, from, ELF_NFPREG * sizeof(double)); } #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ @@ -423,7 +422,7 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, /* save altivec registers */ if (current->thread.used_vr) { flush_altivec_to_thread(current); - if (__copy_to_user(&frame->mc_vregs, current->thread.vr, + if (__copy_to_user(&frame->mc_vregs, ¤t->thread.vr_state, ELF_NVRREG * sizeof(vector128))) return 1; /* set MSR_VEC in the saved MSR value to indicate that @@ -445,6 +444,12 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, #endif /* CONFIG_ALTIVEC */ if (copy_fpr_to_user(&frame->mc_fregs, current)) return 1; + + /* + * Clear the MSR VSX bit to indicate there is no valid state attached + * to this context, except in the specific case below where we set it. + */ + msr &= ~MSR_VSX; #ifdef CONFIG_VSX /* * Copy VSR 0-31 upper half from thread_struct to local @@ -513,6 +518,13 @@ static int save_tm_user_regs(struct pt_regs *regs, { unsigned long msr = regs->msr; + /* Remove TM bits from thread's MSR. The MSR in the sigcontext + * just indicates to userland that we were doing a transaction, but we + * don't want to return in transactional state. This also ensures + * that flush_fp_to_thread won't set TIF_RESTORE_TM again. + */ + regs->msr &= ~MSR_TS_MASK; + /* Make sure floating point registers are stored in regs */ flush_fp_to_thread(current); @@ -534,17 +546,17 @@ static int save_tm_user_regs(struct pt_regs *regs, /* save altivec registers */ if (current->thread.used_vr) { flush_altivec_to_thread(current); - if (__copy_to_user(&frame->mc_vregs, current->thread.vr, + if (__copy_to_user(&frame->mc_vregs, ¤t->thread.vr_state, ELF_NVRREG * sizeof(vector128))) return 1; if (msr & MSR_VEC) { if (__copy_to_user(&tm_frame->mc_vregs, - current->thread.transact_vr, + ¤t->thread.transact_vr, ELF_NVRREG * sizeof(vector128))) return 1; } else { if (__copy_to_user(&tm_frame->mc_vregs, - current->thread.vr, + ¤t->thread.vr_state, ELF_NVRREG * sizeof(vector128))) return 1; } @@ -692,11 +704,12 @@ static long restore_user_regs(struct pt_regs *regs, regs->msr &= ~MSR_VEC; if (msr & MSR_VEC) { /* restore altivec registers from the stack */ - if (__copy_from_user(current->thread.vr, &sr->mc_vregs, + if (__copy_from_user(¤t->thread.vr_state, &sr->mc_vregs, sizeof(sr->mc_vregs))) return 1; } else if (current->thread.used_vr) - memset(current->thread.vr, 0, ELF_NVRREG * sizeof(vector128)); + memset(¤t->thread.vr_state, 0, + ELF_NVRREG * sizeof(vector128)); /* Always get VRSAVE back */ if (__get_user(current->thread.vrsave, (u32 __user *)&sr->mc_vregs[32])) @@ -722,7 +735,7 @@ static long restore_user_regs(struct pt_regs *regs, return 1; } else if (current->thread.used_vsr) for (i = 0; i < 32 ; i++) - current->thread.fpr[i][TS_VSRLOWOFFSET] = 0; + current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0; #endif /* CONFIG_VSX */ /* * force the process to reload the FP registers from @@ -798,15 +811,16 @@ static long restore_tm_user_regs(struct pt_regs *regs, regs->msr &= ~MSR_VEC; if (msr & MSR_VEC) { /* restore altivec registers from the stack */ - if (__copy_from_user(current->thread.vr, &sr->mc_vregs, + if (__copy_from_user(¤t->thread.vr_state, &sr->mc_vregs, sizeof(sr->mc_vregs)) || - __copy_from_user(current->thread.transact_vr, + __copy_from_user(¤t->thread.transact_vr, &tm_sr->mc_vregs, sizeof(sr->mc_vregs))) return 1; } else if (current->thread.used_vr) { - memset(current->thread.vr, 0, ELF_NVRREG * sizeof(vector128)); - memset(current->thread.transact_vr, 0, + memset(¤t->thread.vr_state, 0, + ELF_NVRREG * sizeof(vector128)); + memset(¤t->thread.transact_vr, 0, ELF_NVRREG * sizeof(vector128)); } @@ -838,8 +852,8 @@ static long restore_tm_user_regs(struct pt_regs *regs, return 1; } else if (current->thread.used_vsr) for (i = 0; i < 32 ; i++) { - current->thread.fpr[i][TS_VSRLOWOFFSET] = 0; - current->thread.transact_fpr[i][TS_VSRLOWOFFSET] = 0; + current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0; + current->thread.transact_fp.fpr[i][TS_VSRLOWOFFSET] = 0; } #endif /* CONFIG_VSX */ @@ -866,6 +880,8 @@ static long restore_tm_user_regs(struct pt_regs *regs, * transactional versions should be loaded. */ tm_enable(); + /* Make sure the transaction is marked as failed */ + current->thread.tm_texasr |= TEXASR_FS; /* This loads the checkpointed FP/VEC state, if used */ tm_recheckpoint(¤t->thread, msr); /* Get the top half of the MSR */ @@ -891,7 +907,7 @@ static long restore_tm_user_regs(struct pt_regs *regs, #endif #ifdef CONFIG_PPC64 -int copy_siginfo_to_user32(struct compat_siginfo __user *d, siginfo_t *s) +int copy_siginfo_to_user32(struct compat_siginfo __user *d, const siginfo_t *s) { int err; @@ -1007,30 +1023,25 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, #ifdef CONFIG_PPC_TRANSACTIONAL_MEM tm_frame = &rt_sf->uc_transact.uc_mcontext; if (MSR_TM_ACTIVE(regs->msr)) { + if (__put_user((unsigned long)&rt_sf->uc_transact, + &rt_sf->uc.uc_link) || + __put_user((unsigned long)tm_frame, + &rt_sf->uc_transact.uc_regs)) + goto badframe; if (save_tm_user_regs(regs, frame, tm_frame, sigret)) goto badframe; } else #endif { + if (__put_user(0, &rt_sf->uc.uc_link)) + goto badframe; if (save_user_regs(regs, frame, tm_frame, sigret, 1)) goto badframe; } regs->link = tramp; -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - if (MSR_TM_ACTIVE(regs->msr)) { - if (__put_user((unsigned long)&rt_sf->uc_transact, - &rt_sf->uc.uc_link) - || __put_user((unsigned long)tm_frame, &rt_sf->uc_transact.uc_regs)) - goto badframe; - } - else -#endif - if (__put_user(0, &rt_sf->uc.uc_link)) - goto badframe; - - current->thread.fpscr.val = 0; /* turn off all fp exceptions */ + current->thread.fp_state.fpscr = 0; /* turn off all fp exceptions */ /* create a stack frame for the caller of the handler */ newsp = ((unsigned long)rt_sf) - (__SIGNAL_FRAMESIZE + 16); @@ -1045,22 +1056,12 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, regs->gpr[5] = (unsigned long) &rt_sf->uc; regs->gpr[6] = (unsigned long) rt_sf; regs->nip = (unsigned long) ka->sa.sa_handler; - /* enter the signal handler in big-endian mode */ + /* enter the signal handler in native-endian mode */ regs->msr &= ~MSR_LE; -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - /* Remove TM bits from thread's MSR. The MSR in the sigcontext - * just indicates to userland that we were doing a transaction, but we - * don't want to return in transactional state: - */ - regs->msr &= ~MSR_TS_MASK; -#endif + regs->msr |= (MSR_KERNEL & MSR_LE); return 1; badframe: -#ifdef DEBUG_SIG - printk("badframe in handle_rt_signal, regs=%p frame=%p newsp=%lx\n", - regs, frame, newsp); -#endif if (show_unhandled_signals) printk_ratelimited(KERN_INFO "%s[%d]: bad frame in handle_rt_signal32: " @@ -1309,7 +1310,7 @@ int sys_debug_setcontext(struct ucontext __user *ctx, unsigned char tmp; unsigned long new_msr = regs->msr; #ifdef CONFIG_PPC_ADV_DEBUG_REGS - unsigned long new_dbcr0 = current->thread.dbcr0; + unsigned long new_dbcr0 = current->thread.debug.dbcr0; #endif for (i=0; i<ndbg; i++) { @@ -1324,7 +1325,7 @@ int sys_debug_setcontext(struct ucontext __user *ctx, } else { new_dbcr0 &= ~DBCR0_IC; if (!DBCR_ACTIVE_EVENTS(new_dbcr0, - current->thread.dbcr1)) { + current->thread.debug.dbcr1)) { new_msr &= ~MSR_DE; new_dbcr0 &= ~DBCR0_IDM; } @@ -1359,7 +1360,7 @@ int sys_debug_setcontext(struct ucontext __user *ctx, the user is really doing something wrong. */ regs->msr = new_msr; #ifdef CONFIG_PPC_ADV_DEBUG_REGS - current->thread.dbcr0 = new_dbcr0; + current->thread.debug.dbcr0 = new_dbcr0; #endif if (!access_ok(VERIFY_READ, ctx, sizeof(*ctx)) @@ -1462,7 +1463,7 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka, regs->link = tramp; - current->thread.fpscr.val = 0; /* turn off all fp exceptions */ + current->thread.fp_state.fpscr = 0; /* turn off all fp exceptions */ /* create a stack frame for the caller of the handler */ newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE; @@ -1475,20 +1476,9 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka, regs->nip = (unsigned long) ka->sa.sa_handler; /* enter the signal handler in big-endian mode */ regs->msr &= ~MSR_LE; -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - /* Remove TM bits from thread's MSR. The MSR in the sigcontext - * just indicates to userland that we were doing a transaction, but we - * don't want to return in transactional state: - */ - regs->msr &= ~MSR_TS_MASK; -#endif return 1; badframe: -#ifdef DEBUG_SIG - printk("badframe in handle_signal, regs=%p frame=%p newsp=%lx\n", - regs, frame, newsp); -#endif if (show_unhandled_signals) printk_ratelimited(KERN_INFO "%s[%d]: bad frame in handle_signal32: " diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index f93ec2835a1..97c1e4b683f 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -38,7 +38,6 @@ #include "signal.h" -#define DEBUG_SIG 0 #define GP_REGS_SIZE min(sizeof(elf_gregset_t), sizeof(struct pt_regs)) #define FP_REGS_SIZE sizeof(elf_fpregset_t) @@ -65,8 +64,8 @@ struct rt_sigframe { struct siginfo __user *pinfo; void __user *puc; struct siginfo info; - /* 64 bit ABI allows for 288 bytes below sp before decrementing it. */ - char abigap[288]; + /* New 64 bit little-endian ABI allows redzone of 512 bytes below sp */ + char abigap[USER_REDZONE_SIZE]; } __attribute__ ((aligned (16))); static const char fmt32[] = KERN_INFO \ @@ -103,7 +102,8 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, if (current->thread.used_vr) { flush_altivec_to_thread(current); /* Copy 33 vec registers (vr0..31 and vscr) to the stack */ - err |= __copy_to_user(v_regs, current->thread.vr, 33 * sizeof(vector128)); + err |= __copy_to_user(v_regs, ¤t->thread.vr_state, + 33 * sizeof(vector128)); /* set MSR_VEC in the MSR value in the frame to indicate that sc->v_reg) * contains valid data. */ @@ -121,6 +121,12 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, flush_fp_to_thread(current); /* copy fpr regs and fpscr */ err |= copy_fpr_to_user(&sc->fp_regs, current); + + /* + * Clear the MSR VSX bit to indicate there is no valid state attached + * to this context, except in the specific case below where we set it. + */ + msr &= ~MSR_VSX; #ifdef CONFIG_VSX /* * Copy VSX low doubleword to local buffer for formatting, @@ -185,6 +191,13 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc, BUG_ON(!MSR_TM_ACTIVE(regs->msr)); + /* Remove TM bits from thread's MSR. The MSR in the sigcontext + * just indicates to userland that we were doing a transaction, but we + * don't want to return in transactional state. This also ensures + * that flush_fp_to_thread won't set TIF_RESTORE_TM again. + */ + regs->msr &= ~MSR_TS_MASK; + flush_fp_to_thread(current); #ifdef CONFIG_ALTIVEC @@ -195,18 +208,18 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc, if (current->thread.used_vr) { flush_altivec_to_thread(current); /* Copy 33 vec registers (vr0..31 and vscr) to the stack */ - err |= __copy_to_user(v_regs, current->thread.vr, + err |= __copy_to_user(v_regs, ¤t->thread.vr_state, 33 * sizeof(vector128)); /* If VEC was enabled there are transactional VRs valid too, * else they're a copy of the checkpointed VRs. */ if (msr & MSR_VEC) err |= __copy_to_user(tm_v_regs, - current->thread.transact_vr, + ¤t->thread.transact_vr, 33 * sizeof(vector128)); else err |= __copy_to_user(tm_v_regs, - current->thread.vr, + ¤t->thread.vr_state, 33 * sizeof(vector128)); /* set MSR_VEC in the MSR value in the frame to indicate @@ -349,10 +362,10 @@ static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig, return -EFAULT; /* Copy 33 vec registers (vr0..31 and vscr) from the stack */ if (v_regs != NULL && (msr & MSR_VEC) != 0) - err |= __copy_from_user(current->thread.vr, v_regs, + err |= __copy_from_user(¤t->thread.vr_state, v_regs, 33 * sizeof(vector128)); else if (current->thread.used_vr) - memset(current->thread.vr, 0, 33 * sizeof(vector128)); + memset(¤t->thread.vr_state, 0, 33 * sizeof(vector128)); /* Always get VRSAVE back */ if (v_regs != NULL) err |= __get_user(current->thread.vrsave, (u32 __user *)&v_regs[33]); @@ -374,7 +387,7 @@ static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig, err |= copy_vsx_from_user(current, v_regs); else for (i = 0; i < 32 ; i++) - current->thread.fpr[i][TS_VSRLOWOFFSET] = 0; + current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0; #endif return err; } @@ -468,14 +481,14 @@ static long restore_tm_sigcontexts(struct pt_regs *regs, return -EFAULT; /* Copy 33 vec registers (vr0..31 and vscr) from the stack */ if (v_regs != NULL && tm_v_regs != NULL && (msr & MSR_VEC) != 0) { - err |= __copy_from_user(current->thread.vr, v_regs, + err |= __copy_from_user(¤t->thread.vr_state, v_regs, 33 * sizeof(vector128)); - err |= __copy_from_user(current->thread.transact_vr, tm_v_regs, + err |= __copy_from_user(¤t->thread.transact_vr, tm_v_regs, 33 * sizeof(vector128)); } else if (current->thread.used_vr) { - memset(current->thread.vr, 0, 33 * sizeof(vector128)); - memset(current->thread.transact_vr, 0, 33 * sizeof(vector128)); + memset(¤t->thread.vr_state, 0, 33 * sizeof(vector128)); + memset(¤t->thread.transact_vr, 0, 33 * sizeof(vector128)); } /* Always get VRSAVE back */ if (v_regs != NULL && tm_v_regs != NULL) { @@ -507,12 +520,14 @@ static long restore_tm_sigcontexts(struct pt_regs *regs, err |= copy_transact_vsx_from_user(current, tm_v_regs); } else { for (i = 0; i < 32 ; i++) { - current->thread.fpr[i][TS_VSRLOWOFFSET] = 0; - current->thread.transact_fpr[i][TS_VSRLOWOFFSET] = 0; + current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0; + current->thread.transact_fp.fpr[i][TS_VSRLOWOFFSET] = 0; } } #endif tm_enable(); + /* Make sure the transaction is marked as failed */ + current->thread.tm_texasr |= TEXASR_FS; /* This loads the checkpointed FP/VEC state, if used */ tm_recheckpoint(¤t->thread, msr); @@ -684,10 +699,6 @@ int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5, return 0; badframe: -#if DEBUG_SIG - printk("badframe in sys_rt_sigreturn, regs=%p uc=%p &uc->uc_mcontext=%p\n", - regs, uc, &uc->uc_mcontext); -#endif if (show_unhandled_signals) printk_ratelimited(regs->msr & MSR_64BIT ? fmt64 : fmt32, current->comm, current->pid, "rt_sigreturn", @@ -700,12 +711,6 @@ badframe: int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs *regs) { - /* Handler is *really* a pointer to the function descriptor for - * the signal routine. The first entry in the function - * descriptor is the entry address of signal and the second - * entry is the TOC value we need to use. - */ - func_descr_t __user *funct_desc_ptr; struct rt_sigframe __user *frame; unsigned long newsp = 0; long err = 0; @@ -747,14 +752,7 @@ int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info, goto badframe; /* Make sure signal handler doesn't get spurious FP exceptions */ - current->thread.fpscr.val = 0; -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - /* Remove TM bits from thread's MSR. The MSR in the sigcontext - * just indicates to userland that we were doing a transaction, but we - * don't want to return in transactional state: - */ - regs->msr &= ~MSR_TS_MASK; -#endif + current->thread.fp_state.fpscr = 0; /* Set up to return from userspace. */ if (vdso64_rt_sigtramp && current->mm->context.vdso_base) { @@ -765,18 +763,32 @@ int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info, goto badframe; regs->link = (unsigned long) &frame->tramp[0]; } - funct_desc_ptr = (func_descr_t __user *) ka->sa.sa_handler; /* Allocate a dummy caller frame for the signal handler. */ newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE; err |= put_user(regs->gpr[1], (unsigned long __user *)newsp); /* Set up "regs" so we "return" to the signal handler. */ - err |= get_user(regs->nip, &funct_desc_ptr->entry); - /* enter the signal handler in big-endian mode */ + if (is_elf2_task()) { + regs->nip = (unsigned long) ka->sa.sa_handler; + regs->gpr[12] = regs->nip; + } else { + /* Handler is *really* a pointer to the function descriptor for + * the signal routine. The first entry in the function + * descriptor is the entry address of signal and the second + * entry is the TOC value we need to use. + */ + func_descr_t __user *funct_desc_ptr = + (func_descr_t __user *) ka->sa.sa_handler; + + err |= get_user(regs->nip, &funct_desc_ptr->entry); + err |= get_user(regs->gpr[2], &funct_desc_ptr->toc); + } + + /* enter the signal handler in native-endian mode */ regs->msr &= ~MSR_LE; + regs->msr |= (MSR_KERNEL & MSR_LE); regs->gpr[1] = newsp; - err |= get_user(regs->gpr[2], &funct_desc_ptr->toc); regs->gpr[3] = signr; regs->result = 0; if (ka->sa.sa_flags & SA_SIGINFO) { @@ -792,10 +804,6 @@ int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info, return 1; badframe: -#if DEBUG_SIG - printk("badframe in setup_rt_frame, regs=%p frame=%p newsp=%lx\n", - regs, frame, newsp); -#endif if (show_unhandled_signals) printk_ratelimited(regs->msr & MSR_64BIT ? fmt64 : fmt32, current->comm, current->pid, "setup_rt_frame", diff --git a/arch/powerpc/kernel/smp-tbsync.c b/arch/powerpc/kernel/smp-tbsync.c index e68fd1ae727..7a37ecd3afa 100644 --- a/arch/powerpc/kernel/smp-tbsync.c +++ b/arch/powerpc/kernel/smp-tbsync.c @@ -9,7 +9,6 @@ #include <linux/sched.h> #include <linux/smp.h> #include <linux/unistd.h> -#include <linux/init.h> #include <linux/slab.h> #include <linux/atomic.h> #include <asm/smp.h> diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 8e59abc237d..1007fb802e6 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -35,6 +35,8 @@ #include <asm/ptrace.h> #include <linux/atomic.h> #include <asm/irq.h> +#include <asm/hw_irq.h> +#include <asm/kvm_ppc.h> #include <asm/page.h> #include <asm/pgtable.h> #include <asm/prom.h> @@ -145,9 +147,9 @@ static irqreturn_t reschedule_action(int irq, void *data) return IRQ_HANDLED; } -static irqreturn_t call_function_single_action(int irq, void *data) +static irqreturn_t tick_broadcast_ipi_action(int irq, void *data) { - generic_smp_call_function_single_interrupt(); + tick_broadcast_ipi_handler(); return IRQ_HANDLED; } @@ -168,14 +170,14 @@ static irqreturn_t debug_ipi_action(int irq, void *data) static irq_handler_t smp_ipi_action[] = { [PPC_MSG_CALL_FUNCTION] = call_function_action, [PPC_MSG_RESCHEDULE] = reschedule_action, - [PPC_MSG_CALL_FUNC_SINGLE] = call_function_single_action, + [PPC_MSG_TICK_BROADCAST] = tick_broadcast_ipi_action, [PPC_MSG_DEBUGGER_BREAK] = debug_ipi_action, }; const char *smp_ipi_name[] = { [PPC_MSG_CALL_FUNCTION] = "ipi call function", [PPC_MSG_RESCHEDULE] = "ipi reschedule", - [PPC_MSG_CALL_FUNC_SINGLE] = "ipi call function single", + [PPC_MSG_TICK_BROADCAST] = "ipi tick-broadcast", [PPC_MSG_DEBUGGER_BREAK] = "ipi debugger", }; @@ -251,8 +253,8 @@ irqreturn_t smp_ipi_demux(void) generic_smp_call_function_interrupt(); if (all & IPI_MESSAGE(PPC_MSG_RESCHEDULE)) scheduler_ipi(); - if (all & IPI_MESSAGE(PPC_MSG_CALL_FUNC_SINGLE)) - generic_smp_call_function_single_interrupt(); + if (all & IPI_MESSAGE(PPC_MSG_TICK_BROADCAST)) + tick_broadcast_ipi_handler(); if (all & IPI_MESSAGE(PPC_MSG_DEBUGGER_BREAK)) debug_ipi_action(0, NULL); } while (info->messages); @@ -280,7 +282,7 @@ EXPORT_SYMBOL_GPL(smp_send_reschedule); void arch_send_call_function_single_ipi(int cpu) { - do_message_pass(cpu, PPC_MSG_CALL_FUNC_SINGLE); + do_message_pass(cpu, PPC_MSG_CALL_FUNCTION); } void arch_send_call_function_ipi_mask(const struct cpumask *mask) @@ -291,6 +293,16 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask) do_message_pass(cpu, PPC_MSG_CALL_FUNCTION); } +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST +void tick_broadcast(const struct cpumask *mask) +{ + unsigned int cpu; + + for_each_cpu(cpu, mask) + do_message_pass(cpu, PPC_MSG_TICK_BROADCAST); +} +#endif + #if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC) void smp_send_debugger_break(void) { @@ -369,13 +381,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus) cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid)); cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid)); - if (smp_ops) - if (smp_ops->probe) - max_cpus = smp_ops->probe(); - else - max_cpus = NR_CPUS; - else - max_cpus = 1; + if (smp_ops && smp_ops->probe) + smp_ops->probe(); } void smp_prepare_boot_cpu(void) @@ -384,6 +391,7 @@ void smp_prepare_boot_cpu(void) #ifdef CONFIG_PPC64 paca[boot_cpuid].__current = current; #endif + set_numa_node(numa_cpu_lookup_table[boot_cpuid]); current_set[boot_cpuid] = task_thread_info(current); } @@ -451,38 +459,9 @@ int generic_check_cpu_restart(unsigned int cpu) return per_cpu(cpu_state, cpu) == CPU_UP_PREPARE; } -static atomic_t secondary_inhibit_count; - -/* - * Don't allow secondary CPU threads to come online - */ -void inhibit_secondary_onlining(void) -{ - /* - * This makes secondary_inhibit_count stable during cpu - * online/offline operations. - */ - get_online_cpus(); - - atomic_inc(&secondary_inhibit_count); - put_online_cpus(); -} -EXPORT_SYMBOL_GPL(inhibit_secondary_onlining); - -/* - * Allow secondary CPU threads to come online again - */ -void uninhibit_secondary_onlining(void) -{ - get_online_cpus(); - atomic_dec(&secondary_inhibit_count); - put_online_cpus(); -} -EXPORT_SYMBOL_GPL(uninhibit_secondary_onlining); - -static int secondaries_inhibited(void) +static bool secondaries_inhibited(void) { - return atomic_read(&secondary_inhibit_count); + return kvm_hv_mode_active(); } #else /* HOTPLUG_CPU */ @@ -511,7 +490,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle) * Don't allow secondary threads to come online if inhibited */ if (threads_per_core > 1 && secondaries_inhibited() && - cpu % threads_per_core != 0) + cpu_thread_in_subcore(cpu)) return -EBUSY; if (smp_ops == NULL || @@ -580,7 +559,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle) int cpu_to_core_id(int cpu) { struct device_node *np; - const int *reg; + const __be32 *reg; int id = -1; np = of_get_cpu_node(cpu, NULL); @@ -591,28 +570,12 @@ int cpu_to_core_id(int cpu) if (!reg) goto out; - id = *reg; + id = be32_to_cpup(reg); out: of_node_put(np); return id; } -/* Return the value of the chip-id property corresponding - * to the given logical cpu. - */ -int cpu_to_chip_id(int cpu) -{ - struct device_node *np; - - np = of_get_cpu_node(cpu, NULL); - if (!np) - return -1; - - of_node_put(np); - return of_get_ibm_chip_id(np); -} -EXPORT_SYMBOL(cpu_to_chip_id); - /* Helper routines for cpu to core mapping */ int cpu_core_index_of_thread(int cpu) { @@ -760,6 +723,12 @@ void start_secondary(void *unused) } traverse_core_siblings(cpu, true); + /* + * numa_node_id() works after this. + */ + set_numa_node(numa_cpu_lookup_table[cpu]); + set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu])); + smp_wmb(); notify_cpu_starting(cpu); set_cpu_online(cpu, true); @@ -776,6 +745,28 @@ int setup_profiling_timer(unsigned int multiplier) return 0; } +#ifdef CONFIG_SCHED_SMT +/* cpumask of CPUs with asymetric SMT dependancy */ +static int powerpc_smt_flags(void) +{ + int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES; + + if (cpu_has_feature(CPU_FTR_ASYM_SMT)) { + printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n"); + flags |= SD_ASYM_PACKING; + } + return flags; +} +#endif + +static struct sched_domain_topology_level powerpc_topology[] = { +#ifdef CONFIG_SCHED_SMT + { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) }, +#endif + { cpu_cpu_mask, SD_INIT_NAME(DIE) }, + { NULL, }, +}; + void __init smp_cpus_done(unsigned int max_cpus) { cpumask_var_t old_mask; @@ -800,15 +791,8 @@ void __init smp_cpus_done(unsigned int max_cpus) dump_numa_cpu_topology(); -} + set_sched_topology(powerpc_topology); -int arch_sd_sibling_asym_packing(void) -{ - if (cpu_has_feature(CPU_FTR_ASYM_SMT)) { - printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n"); - return SD_ASYM_PACKING; - } - return 0; } #ifdef CONFIG_HOTPLUG_CPU @@ -844,18 +828,6 @@ void __cpu_die(unsigned int cpu) smp_ops->cpu_die(cpu); } -static DEFINE_MUTEX(powerpc_cpu_hotplug_driver_mutex); - -void cpu_hotplug_driver_lock() -{ - mutex_lock(&powerpc_cpu_hotplug_driver_mutex); -} - -void cpu_hotplug_driver_unlock() -{ - mutex_unlock(&powerpc_cpu_hotplug_driver_mutex); -} - void cpu_die(void) { if (ppc_md.cpu_die) diff --git a/arch/powerpc/kernel/swsusp_asm64.S b/arch/powerpc/kernel/swsusp_asm64.S index 22045984835..988f38dced0 100644 --- a/arch/powerpc/kernel/swsusp_asm64.S +++ b/arch/powerpc/kernel/swsusp_asm64.S @@ -114,7 +114,9 @@ _GLOBAL(swsusp_arch_suspend) SAVE_SPECIAL(MSR) SAVE_SPECIAL(XER) #ifdef CONFIG_PPC_BOOK3S_64 +BEGIN_FW_FTR_SECTION SAVE_SPECIAL(SDR1) +END_FW_FTR_SECTION_IFCLR(FW_FEATURE_LPAR) #else SAVE_SPR(TCR) @@ -231,7 +233,9 @@ nothing_to_copy: /* can't use RESTORE_SPECIAL(MSR) */ ld r0, SL_MSR(r11) mtmsrd r0, 0 +BEGIN_FW_FTR_SECTION RESTORE_SPECIAL(SDR1) +END_FW_FTR_SECTION_IFCLR(FW_FEATURE_LPAR) #else /* Restore SPRG1, be used to save paca */ ld r0, SL_SPRG1(r11) diff --git a/arch/powerpc/kernel/swsusp_booke.S b/arch/powerpc/kernel/swsusp_booke.S index 0f204053e5b..553c1405ee0 100644 --- a/arch/powerpc/kernel/swsusp_booke.S +++ b/arch/powerpc/kernel/swsusp_booke.S @@ -74,21 +74,21 @@ _GLOBAL(swsusp_arch_suspend) bne 1b /* Save SPRGs */ - mfsprg r4,0 + mfspr r4,SPRN_SPRG0 stw r4,SL_SPRG0(r11) - mfsprg r4,1 + mfspr r4,SPRN_SPRG1 stw r4,SL_SPRG1(r11) - mfsprg r4,2 + mfspr r4,SPRN_SPRG2 stw r4,SL_SPRG2(r11) - mfsprg r4,3 + mfspr r4,SPRN_SPRG3 stw r4,SL_SPRG3(r11) - mfsprg r4,4 + mfspr r4,SPRN_SPRG4 stw r4,SL_SPRG4(r11) - mfsprg r4,5 + mfspr r4,SPRN_SPRG5 stw r4,SL_SPRG5(r11) - mfsprg r4,6 + mfspr r4,SPRN_SPRG6 stw r4,SL_SPRG6(r11) - mfsprg r4,7 + mfspr r4,SPRN_SPRG7 stw r4,SL_SPRG7(r11) /* Call the low level suspend stuff (we should probably have made @@ -150,21 +150,21 @@ _GLOBAL(swsusp_arch_resume) bl _tlbil_all lwz r4,SL_SPRG0(r11) - mtsprg 0,r4 + mtspr SPRN_SPRG0,r4 lwz r4,SL_SPRG1(r11) - mtsprg 1,r4 + mtspr SPRN_SPRG1,r4 lwz r4,SL_SPRG2(r11) - mtsprg 2,r4 + mtspr SPRN_SPRG2,r4 lwz r4,SL_SPRG3(r11) - mtsprg 3,r4 + mtspr SPRN_SPRG3,r4 lwz r4,SL_SPRG4(r11) - mtsprg 4,r4 + mtspr SPRN_SPRG4,r4 lwz r4,SL_SPRG5(r11) - mtsprg 5,r4 + mtspr SPRN_SPRG5,r4 lwz r4,SL_SPRG6(r11) - mtsprg 6,r4 + mtspr SPRN_SPRG6,r4 lwz r4,SL_SPRG7(r11) - mtsprg 7,r4 + mtspr SPRN_SPRG7,r4 /* restore the MSR */ lwz r3,SL_MSR(r11) diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c index 4e3cc47f26b..cd9be9aa016 100644 --- a/arch/powerpc/kernel/syscalls.c +++ b/arch/powerpc/kernel/syscalls.c @@ -34,7 +34,6 @@ #include <linux/ipc.h> #include <linux/utsname.h> #include <linux/file.h> -#include <linux/init.h> #include <linux/personality.h> #include <asm/uaccess.h> diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 27a90b99ef6..67fd2fd2620 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -17,6 +17,7 @@ #include <asm/machdep.h> #include <asm/smp.h> #include <asm/pmc.h> +#include <asm/firmware.h> #include "cacheinfo.h" @@ -50,8 +51,6 @@ static ssize_t store_smt_snooze_delay(struct device *dev, return -EINVAL; per_cpu(smt_snooze_delay, cpu->dev.id) = snooze; - update_smt_snooze_delay(cpu->dev.id, snooze); - return count; } @@ -85,6 +84,304 @@ __setup("smt-snooze-delay=", setup_smt_snooze_delay); #endif /* CONFIG_PPC64 */ +#ifdef CONFIG_PPC_FSL_BOOK3E +#define MAX_BIT 63 + +static u64 pw20_wt; +static u64 altivec_idle_wt; + +static unsigned int get_idle_ticks_bit(u64 ns) +{ + u64 cycle; + + if (ns >= 10000) + cycle = div_u64(ns + 500, 1000) * tb_ticks_per_usec; + else + cycle = div_u64(ns * tb_ticks_per_usec, 1000); + + if (!cycle) + return 0; + + return ilog2(cycle); +} + +static void do_show_pwrmgtcr0(void *val) +{ + u32 *value = val; + + *value = mfspr(SPRN_PWRMGTCR0); +} + +static ssize_t show_pw20_state(struct device *dev, + struct device_attribute *attr, char *buf) +{ + u32 value; + unsigned int cpu = dev->id; + + smp_call_function_single(cpu, do_show_pwrmgtcr0, &value, 1); + + value &= PWRMGTCR0_PW20_WAIT; + + return sprintf(buf, "%u\n", value ? 1 : 0); +} + +static void do_store_pw20_state(void *val) +{ + u32 *value = val; + u32 pw20_state; + + pw20_state = mfspr(SPRN_PWRMGTCR0); + + if (*value) + pw20_state |= PWRMGTCR0_PW20_WAIT; + else + pw20_state &= ~PWRMGTCR0_PW20_WAIT; + + mtspr(SPRN_PWRMGTCR0, pw20_state); +} + +static ssize_t store_pw20_state(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + u32 value; + unsigned int cpu = dev->id; + + if (kstrtou32(buf, 0, &value)) + return -EINVAL; + + if (value > 1) + return -EINVAL; + + smp_call_function_single(cpu, do_store_pw20_state, &value, 1); + + return count; +} + +static ssize_t show_pw20_wait_time(struct device *dev, + struct device_attribute *attr, char *buf) +{ + u32 value; + u64 tb_cycle = 1; + u64 time; + + unsigned int cpu = dev->id; + + if (!pw20_wt) { + smp_call_function_single(cpu, do_show_pwrmgtcr0, &value, 1); + value = (value & PWRMGTCR0_PW20_ENT) >> + PWRMGTCR0_PW20_ENT_SHIFT; + + tb_cycle = (tb_cycle << (MAX_BIT - value + 1)); + /* convert ms to ns */ + if (tb_ticks_per_usec > 1000) { + time = div_u64(tb_cycle, tb_ticks_per_usec / 1000); + } else { + u32 rem_us; + + time = div_u64_rem(tb_cycle, tb_ticks_per_usec, + &rem_us); + time = time * 1000 + rem_us * 1000 / tb_ticks_per_usec; + } + } else { + time = pw20_wt; + } + + return sprintf(buf, "%llu\n", time > 0 ? time : 0); +} + +static void set_pw20_wait_entry_bit(void *val) +{ + u32 *value = val; + u32 pw20_idle; + + pw20_idle = mfspr(SPRN_PWRMGTCR0); + + /* Set Automatic PW20 Core Idle Count */ + /* clear count */ + pw20_idle &= ~PWRMGTCR0_PW20_ENT; + + /* set count */ + pw20_idle |= ((MAX_BIT - *value) << PWRMGTCR0_PW20_ENT_SHIFT); + + mtspr(SPRN_PWRMGTCR0, pw20_idle); +} + +static ssize_t store_pw20_wait_time(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + u32 entry_bit; + u64 value; + + unsigned int cpu = dev->id; + + if (kstrtou64(buf, 0, &value)) + return -EINVAL; + + if (!value) + return -EINVAL; + + entry_bit = get_idle_ticks_bit(value); + if (entry_bit > MAX_BIT) + return -EINVAL; + + pw20_wt = value; + + smp_call_function_single(cpu, set_pw20_wait_entry_bit, + &entry_bit, 1); + + return count; +} + +static ssize_t show_altivec_idle(struct device *dev, + struct device_attribute *attr, char *buf) +{ + u32 value; + unsigned int cpu = dev->id; + + smp_call_function_single(cpu, do_show_pwrmgtcr0, &value, 1); + + value &= PWRMGTCR0_AV_IDLE_PD_EN; + + return sprintf(buf, "%u\n", value ? 1 : 0); +} + +static void do_store_altivec_idle(void *val) +{ + u32 *value = val; + u32 altivec_idle; + + altivec_idle = mfspr(SPRN_PWRMGTCR0); + + if (*value) + altivec_idle |= PWRMGTCR0_AV_IDLE_PD_EN; + else + altivec_idle &= ~PWRMGTCR0_AV_IDLE_PD_EN; + + mtspr(SPRN_PWRMGTCR0, altivec_idle); +} + +static ssize_t store_altivec_idle(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + u32 value; + unsigned int cpu = dev->id; + + if (kstrtou32(buf, 0, &value)) + return -EINVAL; + + if (value > 1) + return -EINVAL; + + smp_call_function_single(cpu, do_store_altivec_idle, &value, 1); + + return count; +} + +static ssize_t show_altivec_idle_wait_time(struct device *dev, + struct device_attribute *attr, char *buf) +{ + u32 value; + u64 tb_cycle = 1; + u64 time; + + unsigned int cpu = dev->id; + + if (!altivec_idle_wt) { + smp_call_function_single(cpu, do_show_pwrmgtcr0, &value, 1); + value = (value & PWRMGTCR0_AV_IDLE_CNT) >> + PWRMGTCR0_AV_IDLE_CNT_SHIFT; + + tb_cycle = (tb_cycle << (MAX_BIT - value + 1)); + /* convert ms to ns */ + if (tb_ticks_per_usec > 1000) { + time = div_u64(tb_cycle, tb_ticks_per_usec / 1000); + } else { + u32 rem_us; + + time = div_u64_rem(tb_cycle, tb_ticks_per_usec, + &rem_us); + time = time * 1000 + rem_us * 1000 / tb_ticks_per_usec; + } + } else { + time = altivec_idle_wt; + } + + return sprintf(buf, "%llu\n", time > 0 ? time : 0); +} + +static void set_altivec_idle_wait_entry_bit(void *val) +{ + u32 *value = val; + u32 altivec_idle; + + altivec_idle = mfspr(SPRN_PWRMGTCR0); + + /* Set Automatic AltiVec Idle Count */ + /* clear count */ + altivec_idle &= ~PWRMGTCR0_AV_IDLE_CNT; + + /* set count */ + altivec_idle |= ((MAX_BIT - *value) << PWRMGTCR0_AV_IDLE_CNT_SHIFT); + + mtspr(SPRN_PWRMGTCR0, altivec_idle); +} + +static ssize_t store_altivec_idle_wait_time(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + u32 entry_bit; + u64 value; + + unsigned int cpu = dev->id; + + if (kstrtou64(buf, 0, &value)) + return -EINVAL; + + if (!value) + return -EINVAL; + + entry_bit = get_idle_ticks_bit(value); + if (entry_bit > MAX_BIT) + return -EINVAL; + + altivec_idle_wt = value; + + smp_call_function_single(cpu, set_altivec_idle_wait_entry_bit, + &entry_bit, 1); + + return count; +} + +/* + * Enable/Disable interface: + * 0, disable. 1, enable. + */ +static DEVICE_ATTR(pw20_state, 0600, show_pw20_state, store_pw20_state); +static DEVICE_ATTR(altivec_idle, 0600, show_altivec_idle, store_altivec_idle); + +/* + * Set wait time interface:(Nanosecond) + * Example: Base on TBfreq is 41MHZ. + * 1~48(ns): TB[63] + * 49~97(ns): TB[62] + * 98~195(ns): TB[61] + * 196~390(ns): TB[60] + * 391~780(ns): TB[59] + * 781~1560(ns): TB[58] + * ... + */ +static DEVICE_ATTR(pw20_wait_time, 0600, + show_pw20_wait_time, + store_pw20_wait_time); +static DEVICE_ATTR(altivec_idle_wait_time, 0600, + show_altivec_idle_wait_time, + store_altivec_idle_wait_time); +#endif + /* * Enabling PMCs will slow partition context switch times so we only do * it the first time we write to the PMCs. @@ -107,16 +404,18 @@ void ppc_enable_pmcs(void) } EXPORT_SYMBOL(ppc_enable_pmcs); -#define SYSFS_PMCSETUP(NAME, ADDRESS) \ +#define __SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, EXTRA) \ static void read_##NAME(void *val) \ { \ *(unsigned long *)val = mfspr(ADDRESS); \ } \ static void write_##NAME(void *val) \ { \ - ppc_enable_pmcs(); \ + EXTRA; \ mtspr(ADDRESS, *(unsigned long *)val); \ -} \ +} + +#define __SYSFS_SPRSETUP_SHOW_STORE(NAME) \ static ssize_t show_##NAME(struct device *dev, \ struct device_attribute *attr, \ char *buf) \ @@ -139,6 +438,15 @@ static ssize_t __used \ return count; \ } +#define SYSFS_PMCSETUP(NAME, ADDRESS) \ + __SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, ppc_enable_pmcs()) \ + __SYSFS_SPRSETUP_SHOW_STORE(NAME) +#define SYSFS_SPRSETUP(NAME, ADDRESS) \ + __SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, ) \ + __SYSFS_SPRSETUP_SHOW_STORE(NAME) + +#define SYSFS_SPRSETUP_SHOW_STORE(NAME) \ + __SYSFS_SPRSETUP_SHOW_STORE(NAME) /* Let's define all possible registers, we'll only hook up the ones * that are implemented on the current processor @@ -174,34 +482,50 @@ SYSFS_PMCSETUP(pmc7, SPRN_PMC7); SYSFS_PMCSETUP(pmc8, SPRN_PMC8); SYSFS_PMCSETUP(mmcra, SPRN_MMCRA); -SYSFS_PMCSETUP(purr, SPRN_PURR); -SYSFS_PMCSETUP(spurr, SPRN_SPURR); -SYSFS_PMCSETUP(dscr, SPRN_DSCR); -SYSFS_PMCSETUP(pir, SPRN_PIR); +SYSFS_SPRSETUP(purr, SPRN_PURR); +SYSFS_SPRSETUP(spurr, SPRN_SPURR); +SYSFS_SPRSETUP(pir, SPRN_PIR); +/* + Lets only enable read for phyp resources and + enable write when needed with a separate function. + Lets be conservative and default to pseries. +*/ static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra); static DEVICE_ATTR(spurr, 0400, show_spurr, NULL); -static DEVICE_ATTR(dscr, 0600, show_dscr, store_dscr); -static DEVICE_ATTR(purr, 0600, show_purr, store_purr); +static DEVICE_ATTR(purr, 0400, show_purr, store_purr); static DEVICE_ATTR(pir, 0400, show_pir, NULL); -unsigned long dscr_default = 0; -EXPORT_SYMBOL(dscr_default); +static unsigned long dscr_default; -static ssize_t show_dscr_default(struct device *dev, - struct device_attribute *attr, char *buf) +static void read_dscr(void *val) { - return sprintf(buf, "%lx\n", dscr_default); + *(unsigned long *)val = get_paca()->dscr_default; } -static void update_dscr(void *dummy) +static void write_dscr(void *val) { + get_paca()->dscr_default = *(unsigned long *)val; if (!current->thread.dscr_inherit) { - current->thread.dscr = dscr_default; - mtspr(SPRN_DSCR, dscr_default); + current->thread.dscr = *(unsigned long *)val; + mtspr(SPRN_DSCR, *(unsigned long *)val); } } +SYSFS_SPRSETUP_SHOW_STORE(dscr); +static DEVICE_ATTR(dscr, 0600, show_dscr, store_dscr); + +static void add_write_permission_dev_attr(struct device_attribute *attr) +{ + attr->attr.mode |= 0200; +} + +static ssize_t show_dscr_default(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%lx\n", dscr_default); +} + static ssize_t __used store_dscr_default(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) @@ -214,7 +538,7 @@ static ssize_t __used store_dscr_default(struct device *dev, return -EINVAL; dscr_default = val; - on_each_cpu(update_dscr, NULL, 1); + on_each_cpu(write_dscr, &val, 1); return count; } @@ -238,34 +562,34 @@ SYSFS_PMCSETUP(pa6t_pmc3, SPRN_PA6T_PMC3); SYSFS_PMCSETUP(pa6t_pmc4, SPRN_PA6T_PMC4); SYSFS_PMCSETUP(pa6t_pmc5, SPRN_PA6T_PMC5); #ifdef CONFIG_DEBUG_KERNEL -SYSFS_PMCSETUP(hid0, SPRN_HID0); -SYSFS_PMCSETUP(hid1, SPRN_HID1); -SYSFS_PMCSETUP(hid4, SPRN_HID4); -SYSFS_PMCSETUP(hid5, SPRN_HID5); -SYSFS_PMCSETUP(ima0, SPRN_PA6T_IMA0); -SYSFS_PMCSETUP(ima1, SPRN_PA6T_IMA1); -SYSFS_PMCSETUP(ima2, SPRN_PA6T_IMA2); -SYSFS_PMCSETUP(ima3, SPRN_PA6T_IMA3); -SYSFS_PMCSETUP(ima4, SPRN_PA6T_IMA4); -SYSFS_PMCSETUP(ima5, SPRN_PA6T_IMA5); -SYSFS_PMCSETUP(ima6, SPRN_PA6T_IMA6); -SYSFS_PMCSETUP(ima7, SPRN_PA6T_IMA7); -SYSFS_PMCSETUP(ima8, SPRN_PA6T_IMA8); -SYSFS_PMCSETUP(ima9, SPRN_PA6T_IMA9); -SYSFS_PMCSETUP(imaat, SPRN_PA6T_IMAAT); -SYSFS_PMCSETUP(btcr, SPRN_PA6T_BTCR); -SYSFS_PMCSETUP(pccr, SPRN_PA6T_PCCR); -SYSFS_PMCSETUP(rpccr, SPRN_PA6T_RPCCR); -SYSFS_PMCSETUP(der, SPRN_PA6T_DER); -SYSFS_PMCSETUP(mer, SPRN_PA6T_MER); -SYSFS_PMCSETUP(ber, SPRN_PA6T_BER); -SYSFS_PMCSETUP(ier, SPRN_PA6T_IER); -SYSFS_PMCSETUP(sier, SPRN_PA6T_SIER); -SYSFS_PMCSETUP(siar, SPRN_PA6T_SIAR); -SYSFS_PMCSETUP(tsr0, SPRN_PA6T_TSR0); -SYSFS_PMCSETUP(tsr1, SPRN_PA6T_TSR1); -SYSFS_PMCSETUP(tsr2, SPRN_PA6T_TSR2); -SYSFS_PMCSETUP(tsr3, SPRN_PA6T_TSR3); +SYSFS_SPRSETUP(hid0, SPRN_HID0); +SYSFS_SPRSETUP(hid1, SPRN_HID1); +SYSFS_SPRSETUP(hid4, SPRN_HID4); +SYSFS_SPRSETUP(hid5, SPRN_HID5); +SYSFS_SPRSETUP(ima0, SPRN_PA6T_IMA0); +SYSFS_SPRSETUP(ima1, SPRN_PA6T_IMA1); +SYSFS_SPRSETUP(ima2, SPRN_PA6T_IMA2); +SYSFS_SPRSETUP(ima3, SPRN_PA6T_IMA3); +SYSFS_SPRSETUP(ima4, SPRN_PA6T_IMA4); +SYSFS_SPRSETUP(ima5, SPRN_PA6T_IMA5); +SYSFS_SPRSETUP(ima6, SPRN_PA6T_IMA6); +SYSFS_SPRSETUP(ima7, SPRN_PA6T_IMA7); +SYSFS_SPRSETUP(ima8, SPRN_PA6T_IMA8); +SYSFS_SPRSETUP(ima9, SPRN_PA6T_IMA9); +SYSFS_SPRSETUP(imaat, SPRN_PA6T_IMAAT); +SYSFS_SPRSETUP(btcr, SPRN_PA6T_BTCR); +SYSFS_SPRSETUP(pccr, SPRN_PA6T_PCCR); +SYSFS_SPRSETUP(rpccr, SPRN_PA6T_RPCCR); +SYSFS_SPRSETUP(der, SPRN_PA6T_DER); +SYSFS_SPRSETUP(mer, SPRN_PA6T_MER); +SYSFS_SPRSETUP(ber, SPRN_PA6T_BER); +SYSFS_SPRSETUP(ier, SPRN_PA6T_IER); +SYSFS_SPRSETUP(sier, SPRN_PA6T_SIER); +SYSFS_SPRSETUP(siar, SPRN_PA6T_SIAR); +SYSFS_SPRSETUP(tsr0, SPRN_PA6T_TSR0); +SYSFS_SPRSETUP(tsr1, SPRN_PA6T_TSR1); +SYSFS_SPRSETUP(tsr2, SPRN_PA6T_TSR2); +SYSFS_SPRSETUP(tsr3, SPRN_PA6T_TSR3); #endif /* CONFIG_DEBUG_KERNEL */ #endif /* HAS_PPC_PMC_PA6T */ @@ -394,8 +718,11 @@ static void register_cpu_online(unsigned int cpu) if (cpu_has_feature(CPU_FTR_MMCRA)) device_create_file(s, &dev_attr_mmcra); - if (cpu_has_feature(CPU_FTR_PURR)) + if (cpu_has_feature(CPU_FTR_PURR)) { + if (!firmware_has_feature(FW_FEATURE_LPAR)) + add_write_permission_dev_attr(&dev_attr_purr); device_create_file(s, &dev_attr_purr); + } if (cpu_has_feature(CPU_FTR_SPURR)) device_create_file(s, &dev_attr_spurr); @@ -407,6 +734,15 @@ static void register_cpu_online(unsigned int cpu) device_create_file(s, &dev_attr_pir); #endif /* CONFIG_PPC64 */ +#ifdef CONFIG_PPC_FSL_BOOK3E + if (PVR_VER(cur_cpu_spec->pvr_value) == PVR_VER_E6500) { + device_create_file(s, &dev_attr_pw20_state); + device_create_file(s, &dev_attr_pw20_wait_time); + + device_create_file(s, &dev_attr_altivec_idle); + device_create_file(s, &dev_attr_altivec_idle_wait_time); + } +#endif cacheinfo_cpu_online(cpu); } @@ -479,6 +815,15 @@ static void unregister_cpu_online(unsigned int cpu) device_remove_file(s, &dev_attr_pir); #endif /* CONFIG_PPC64 */ +#ifdef CONFIG_PPC_FSL_BOOK3E + if (PVR_VER(cur_cpu_spec->pvr_value) == PVR_VER_E6500) { + device_remove_file(s, &dev_attr_pw20_state); + device_remove_file(s, &dev_attr_pw20_wait_time); + + device_remove_file(s, &dev_attr_altivec_idle); + device_remove_file(s, &dev_attr_altivec_idle_wait_time); + } +#endif cacheinfo_cpu_offline(cpu); } @@ -643,7 +988,8 @@ static int __init topology_init(void) int cpu; register_nodes(); - register_cpu_notifier(&sysfs_cpu_nb); + + cpu_notifier_register_begin(); for_each_possible_cpu(cpu) { struct cpu *c = &per_cpu(cpu_devices, cpu); @@ -667,6 +1013,11 @@ static int __init topology_init(void) if (cpu_online(cpu)) register_cpu_online(cpu); } + + __register_cpu_notifier(&sysfs_cpu_nb); + + cpu_notifier_register_done(); + #ifdef CONFIG_PPC64 sysfs_create_dscr_default(); #endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S index 93219c34af3..895c50ca943 100644 --- a/arch/powerpc/kernel/systbl.S +++ b/arch/powerpc/kernel/systbl.S @@ -17,12 +17,12 @@ #include <asm/ppc_asm.h> #ifdef CONFIG_PPC64 -#define SYSCALL(func) .llong .sys_##func,.sys_##func -#define COMPAT_SYS(func) .llong .sys_##func,.compat_sys_##func -#define PPC_SYS(func) .llong .ppc_##func,.ppc_##func -#define OLDSYS(func) .llong .sys_ni_syscall,.sys_ni_syscall -#define SYS32ONLY(func) .llong .sys_ni_syscall,.compat_sys_##func -#define SYSX(f, f3264, f32) .llong .f,.f3264 +#define SYSCALL(func) .llong DOTSYM(sys_##func),DOTSYM(sys_##func) +#define COMPAT_SYS(func) .llong DOTSYM(sys_##func),DOTSYM(compat_sys_##func) +#define PPC_SYS(func) .llong DOTSYM(ppc_##func),DOTSYM(ppc_##func) +#define OLDSYS(func) .llong DOTSYM(sys_ni_syscall),DOTSYM(sys_ni_syscall) +#define SYS32ONLY(func) .llong DOTSYM(sys_ni_syscall),DOTSYM(compat_sys_##func) +#define SYSX(f, f3264, f32) .llong DOTSYM(f),DOTSYM(f3264) #else #define SYSCALL(func) .long sys_##func #define COMPAT_SYS(func) .long sys_##func @@ -36,6 +36,8 @@ #define PPC_SYS_SPU(func) PPC_SYS(func) #define SYSX_SPU(f, f3264, f32) SYSX(f, f3264, f32) +.section .rodata,"a" + #ifdef CONFIG_PPC64 #define sys_sigpending sys_ni_syscall #define sys_old_getrlimit sys_ni_syscall @@ -43,5 +45,7 @@ .p2align 3 #endif -_GLOBAL(sys_call_table) +.globl sys_call_table +sys_call_table: + #include <asm/systbl.h> diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 192b051df97..9fff9cdcc51 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -42,6 +42,7 @@ #include <linux/timex.h> #include <linux/kernel_stat.h> #include <linux/time.h> +#include <linux/clockchips.h> #include <linux/init.h> #include <linux/profile.h> #include <linux/cpu.h> @@ -106,7 +107,7 @@ struct clock_event_device decrementer_clockevent = { .irq = 0, .set_next_event = decrementer_set_next_event, .set_mode = decrementer_set_mode, - .features = CLOCK_EVT_FEAT_ONESHOT, + .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP, }; EXPORT_SYMBOL(decrementer_clockevent); @@ -213,8 +214,6 @@ static u64 scan_dispatch_log(u64 stop_tb) if (i == be64_to_cpu(vpa->dtl_idx)) return 0; while (i < be64_to_cpu(vpa->dtl_idx)) { - if (dtl_consumer) - dtl_consumer(dtl, i); dtb = be64_to_cpu(dtl->timebase); tb_delta = be32_to_cpu(dtl->enqueue_to_dispatch_time) + be32_to_cpu(dtl->ready_to_enqueue_time); @@ -227,6 +226,8 @@ static u64 scan_dispatch_log(u64 stop_tb) } if (dtb > stop_tb) break; + if (dtl_consumer) + dtl_consumer(dtl, i); stolen += tb_delta; ++i; ++dtl; @@ -478,6 +479,47 @@ void arch_irq_work_raise(void) #endif /* CONFIG_IRQ_WORK */ +void __timer_interrupt(void) +{ + struct pt_regs *regs = get_irq_regs(); + u64 *next_tb = &__get_cpu_var(decrementers_next_tb); + struct clock_event_device *evt = &__get_cpu_var(decrementers); + u64 now; + + trace_timer_interrupt_entry(regs); + + if (test_irq_work_pending()) { + clear_irq_work_pending(); + irq_work_run(); + } + + now = get_tb_or_rtc(); + if (now >= *next_tb) { + *next_tb = ~(u64)0; + if (evt->event_handler) + evt->event_handler(evt); + __get_cpu_var(irq_stat).timer_irqs_event++; + } else { + now = *next_tb - now; + if (now <= DECREMENTER_MAX) + set_dec((int)now); + /* We may have raced with new irq work */ + if (test_irq_work_pending()) + set_dec(1); + __get_cpu_var(irq_stat).timer_irqs_others++; + } + +#ifdef CONFIG_PPC64 + /* collect purr register values often, for accurate calculations */ + if (firmware_has_feature(FW_FEATURE_SPLPAR)) { + struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array); + cu->current_tb = mfspr(SPRN_PURR); + } +#endif + + trace_timer_interrupt_exit(regs); +} + /* * timer_interrupt - gets called when the decrementer overflows, * with interrupts disabled. @@ -486,8 +528,6 @@ void timer_interrupt(struct pt_regs * regs) { struct pt_regs *old_regs; u64 *next_tb = &__get_cpu_var(decrementers_next_tb); - struct clock_event_device *evt = &__get_cpu_var(decrementers); - u64 now; /* Ensure a positive value is written to the decrementer, or else * some CPUs will continue to take decrementer exceptions. @@ -510,9 +550,8 @@ void timer_interrupt(struct pt_regs * regs) */ may_hard_irq_enable(); - __get_cpu_var(irq_stat).timer_irqs++; -#if defined(CONFIG_PPC32) && defined(CONFIG_PMAC) +#if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC) if (atomic_read(&ppc_n_lost_interrupts) != 0) do_IRQ(regs); #endif @@ -520,34 +559,7 @@ void timer_interrupt(struct pt_regs * regs) old_regs = set_irq_regs(regs); irq_enter(); - trace_timer_interrupt_entry(regs); - - if (test_irq_work_pending()) { - clear_irq_work_pending(); - irq_work_run(); - } - - now = get_tb_or_rtc(); - if (now >= *next_tb) { - *next_tb = ~(u64)0; - if (evt->event_handler) - evt->event_handler(evt); - } else { - now = *next_tb - now; - if (now <= DECREMENTER_MAX) - set_dec((int)now); - } - -#ifdef CONFIG_PPC64 - /* collect purr register values often, for accurate calculations */ - if (firmware_has_feature(FW_FEATURE_SPLPAR)) { - struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array); - cu->current_tb = mfspr(SPRN_PURR); - } -#endif - - trace_timer_interrupt_exit(regs); - + __timer_interrupt(); irq_exit(); set_irq_regs(old_regs); } @@ -803,6 +815,11 @@ static int decrementer_set_next_event(unsigned long evt, { __get_cpu_var(decrementers_next_tb) = get_tb_or_rtc() + evt; set_dec(evt); + + /* We may have raced with new irq work */ + if (test_irq_work_pending()) + set_dec(1); + return 0; } @@ -813,6 +830,15 @@ static void decrementer_set_mode(enum clock_event_mode mode, decrementer_set_next_event(DECREMENTER_MAX, dev); } +/* Interrupt handler for the timer broadcast IPI */ +void tick_broadcast_ipi_handler(void) +{ + u64 *next_tb = &__get_cpu_var(decrementers_next_tb); + + *next_tb = get_tb_or_rtc(); + __timer_interrupt(); +} + static void register_decrementer_clockevent(int cpu) { struct clock_event_device *dec = &per_cpu(decrementers, cpu); @@ -916,6 +942,7 @@ void __init time_init(void) clocksource_init(); init_decrementer_clockevent(); + tick_setup_hrtimer_broadcast(); } diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 7b60b985146..2a324f4cb1b 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -10,18 +10,18 @@ #include <asm/ppc-opcode.h> #include <asm/ptrace.h> #include <asm/reg.h> +#include <asm/bug.h> #ifdef CONFIG_VSX -/* See fpu.S, this is very similar but to save/restore checkpointed FPRs/VSRs */ -#define __SAVE_32FPRS_VSRS_TRANSACT(n,c,base) \ +/* See fpu.S, this is borrowed from there */ +#define __SAVE_32FPRS_VSRS(n,c,base) \ BEGIN_FTR_SECTION \ b 2f; \ END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ - SAVE_32FPRS_TRANSACT(n,base); \ + SAVE_32FPRS(n,base); \ b 3f; \ -2: SAVE_32VSRS_TRANSACT(n,c,base); \ +2: SAVE_32VSRS(n,c,base); \ 3: -/* ...and this is just plain borrowed from there. */ #define __REST_32FPRS_VSRS(n,c,base) \ BEGIN_FTR_SECTION \ b 2f; \ @@ -31,18 +31,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ 2: REST_32VSRS(n,c,base); \ 3: #else -#define __SAVE_32FPRS_VSRS_TRANSACT(n,c,base) SAVE_32FPRS_TRANSACT(n, base) -#define __REST_32FPRS_VSRS(n,c,base) REST_32FPRS(n, base) +#define __SAVE_32FPRS_VSRS(n,c,base) SAVE_32FPRS(n, base) +#define __REST_32FPRS_VSRS(n,c,base) REST_32FPRS(n, base) #endif -#define SAVE_32FPRS_VSRS_TRANSACT(n,c,base) \ - __SAVE_32FPRS_VSRS_TRANSACT(n,__REG_##c,__REG_##base) +#define SAVE_32FPRS_VSRS(n,c,base) \ + __SAVE_32FPRS_VSRS(n,__REG_##c,__REG_##base) #define REST_32FPRS_VSRS(n,c,base) \ __REST_32FPRS_VSRS(n,__REG_##c,__REG_##base) /* Stack frame offsets for local variables. */ #define TM_FRAME_L0 TM_FRAME_SIZE-16 #define TM_FRAME_L1 TM_FRAME_SIZE-8 -#define STACK_PARAM(x) (48+((x)*8)) /* In order to access the TM SPRs, TM must be enabled. So, do so: */ @@ -79,7 +78,6 @@ _GLOBAL(tm_abort) TABORT(R3) blr - /* void tm_reclaim(struct thread_struct *thread, * unsigned long orig_msr, * uint8_t cause) @@ -102,14 +100,14 @@ _GLOBAL(tm_abort) _GLOBAL(tm_reclaim) mfcr r6 mflr r0 - std r6, 8(r1) + stw r6, 8(r1) std r0, 16(r1) - std r2, 40(r1) + std r2, STK_GOT(r1) stdu r1, -TM_FRAME_SIZE(r1) /* We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD]. */ - std r3, STACK_PARAM(0)(r1) + std r3, STK_PARAM(R3)(r1) SAVE_NVGPRS(r1) /* We need to setup MSR for VSX register save instructions. Here we @@ -123,6 +121,7 @@ _GLOBAL(tm_reclaim) mr r15, r14 ori r15, r15, MSR_FP li r16, MSR_RI + ori r16, r16, MSR_EE /* IRQs hard off */ andc r15, r15, r16 oris r15, r15, MSR_VEC@h #ifdef CONFIG_VSX @@ -151,10 +150,11 @@ _GLOBAL(tm_reclaim) andis. r0, r4, MSR_VEC@h beq dont_backup_vec - SAVE_32VRS_TRANSACT(0, r6, r3) /* r6 scratch, r3 thread */ + addi r7, r3, THREAD_TRANSACT_VRSTATE + SAVE_32VRS(0, r6, r7) /* r6 scratch, r7 transact vr state */ mfvscr vr0 - li r6, THREAD_TRANSACT_VSCR - stvx vr0, r3, r6 + li r6, VRSTATE_VSCR + stvx vr0, r7, r6 dont_backup_vec: mfspr r0, SPRN_VRSAVE std r0, THREAD_TRANSACT_VRSAVE(r3) @@ -162,12 +162,20 @@ dont_backup_vec: andi. r0, r4, MSR_FP beq dont_backup_fp - SAVE_32FPRS_VSRS_TRANSACT(0, R6, R3) /* r6 scratch, r3 thread */ + addi r7, r3, THREAD_TRANSACT_FPSTATE + SAVE_32FPRS_VSRS(0, R6, R7) /* r6 scratch, r7 transact fp state */ mffs fr0 - stfd fr0,THREAD_TRANSACT_FPSCR(r3) + stfd fr0,FPSTATE_FPSCR(r7) dont_backup_fp: + /* Do sanity check on MSR to make sure we are suspended */ + li r7, (MSR_TS_S)@higher + srdi r6, r14, 32 + and r6, r6, r7 +1: tdeqi r6, 0 + EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0 + /* The moment we treclaim, ALL of our GPRs will switch * to user register state. (FPRs, CCR etc. also!) * Use an sprg and a tm_scratch in the PACA to shuffle. @@ -187,10 +195,17 @@ dont_backup_fp: std r1, PACATMSCRATCH(r13) ld r1, PACAR1(r13) + /* Store the PPR in r11 and reset to decent value */ + std r11, GPR11(r1) /* Temporary stash */ + mfspr r11, SPRN_PPR + HMT_MEDIUM + /* Now get some more GPRS free */ std r7, GPR7(r1) /* Temporary stash */ std r12, GPR12(r1) /* '' '' '' */ - ld r12, STACK_PARAM(0)(r1) /* Param 0, thread_struct * */ + ld r12, STK_PARAM(R3)(r1) /* Param 0, thread_struct * */ + + std r11, THREAD_TM_PPR(r12) /* Store PPR and free r11 */ addi r7, r12, PT_CKPT_REGS /* Thread's ckpt_regs */ @@ -203,15 +218,19 @@ dont_backup_fp: SAVE_GPR(0, r7) /* user r0 */ SAVE_GPR(2, r7) /* user r2 */ SAVE_4GPRS(3, r7) /* user r3-r6 */ - SAVE_4GPRS(8, r7) /* user r8-r11 */ + SAVE_GPR(8, r7) /* user r8 */ + SAVE_GPR(9, r7) /* user r9 */ + SAVE_GPR(10, r7) /* user r10 */ ld r3, PACATMSCRATCH(r13) /* user r1 */ ld r4, GPR7(r1) /* user r7 */ - ld r5, GPR12(r1) /* user r12 */ - GET_SCRATCH0(6) /* user r13 */ + ld r5, GPR11(r1) /* user r11 */ + ld r6, GPR12(r1) /* user r12 */ + GET_SCRATCH0(8) /* user r13 */ std r3, GPR1(r7) std r4, GPR7(r7) - std r5, GPR12(r7) - std r6, GPR13(r7) + std r5, GPR11(r7) + std r6, GPR12(r7) + std r8, GPR13(r7) SAVE_NVGPRS(r7) /* user r14-r31 */ @@ -234,14 +253,12 @@ dont_backup_fp: std r6, _XER(r7) - /* ******************** TAR, PPR, DSCR ********** */ + /* ******************** TAR, DSCR ********** */ mfspr r3, SPRN_TAR - mfspr r4, SPRN_PPR - mfspr r5, SPRN_DSCR + mfspr r4, SPRN_DSCR std r3, THREAD_TM_TAR(r12) - std r4, THREAD_TM_PPR(r12) - std r5, THREAD_TM_DSCR(r12) + std r4, THREAD_TM_DSCR(r12) /* MSR and flags: We don't change CRs, and we don't need to alter * MSR. @@ -258,7 +275,7 @@ dont_backup_fp: std r3, THREAD_TM_TFHAR(r12) std r4, THREAD_TM_TFIAR(r12) - /* AMR and PPR are checkpointed too, but are unsupported by Linux. */ + /* AMR is checkpointed too, but is unsupported by Linux. */ /* Restore original MSR/IRQ state & clear TM mode */ ld r14, TM_FRAME_L0(r1) /* Orig MSR */ @@ -269,11 +286,16 @@ dont_backup_fp: REST_NVGPRS(r1) addi r1, r1, TM_FRAME_SIZE - ld r4, 8(r1) + lwz r4, 8(r1) ld r0, 16(r1) mtcr r4 mtlr r0 - ld r2, 40(r1) + ld r2, STK_GOT(r1) + + /* Load CPU's default DSCR */ + ld r0, PACA_DSCR(r13) + mtspr SPRN_DSCR, r0 + blr @@ -285,12 +307,12 @@ dont_backup_fp: * Call with IRQs off, stacks get all out of sync for * some periods in here! */ -_GLOBAL(tm_recheckpoint) +_GLOBAL(__tm_recheckpoint) mfcr r5 mflr r0 - std r5, 8(r1) + stw r5, 8(r1) std r0, 16(r1) - std r2, 40(r1) + std r2, STK_GOT(r1) stdu r1, -TM_FRAME_SIZE(r1) /* We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD]. @@ -298,8 +320,6 @@ _GLOBAL(tm_recheckpoint) */ SAVE_NVGPRS(r1) - std r1, PACAR1(r13) - /* Load complete register state from ts_ckpt* registers */ addi r7, r3, PT_CKPT_REGS /* Thread's ckpt_regs */ @@ -337,10 +357,11 @@ _GLOBAL(tm_recheckpoint) andis. r0, r4, MSR_VEC@h beq dont_restore_vec - li r5, THREAD_VSCR - lvx vr0, r3, r5 + addi r8, r3, THREAD_VRSTATE + li r5, VRSTATE_VSCR + lvx vr0, r8, r5 mtvscr vr0 - REST_32VRS(0, r5, r3) /* r5 scratch, r3 THREAD ptr */ + REST_32VRS(0, r5, r8) /* r5 scratch, r8 ptr */ dont_restore_vec: ld r5, THREAD_VRSAVE(r3) mtspr SPRN_VRSAVE, r5 @@ -349,54 +370,85 @@ dont_restore_vec: andi. r0, r4, MSR_FP beq dont_restore_fp - lfd fr0, THREAD_FPSCR(r3) + addi r8, r3, THREAD_FPSTATE + lfd fr0, FPSTATE_FPSCR(r8) MTFSF_L(fr0) - REST_32FPRS_VSRS(0, R4, R3) + REST_32FPRS_VSRS(0, R4, R8) dont_restore_fp: mtmsr r6 /* FP/Vec off again! */ restore_gprs: - /* ******************** TAR, PPR, DSCR ********** */ - ld r4, THREAD_TM_TAR(r3) - ld r5, THREAD_TM_PPR(r3) - ld r6, THREAD_TM_DSCR(r3) + /* ******************** CR,LR,CCR,MSR ********** */ + ld r4, _CTR(r7) + ld r5, _LINK(r7) + ld r8, _XER(r7) - mtspr SPRN_TAR, r4 - mtspr SPRN_PPR, r5 - mtspr SPRN_DSCR, r6 + mtctr r4 + mtlr r5 + mtxer r8 - /* ******************** CR,LR,CCR,MSR ********** */ - ld r3, _CTR(r7) - ld r4, _LINK(r7) - ld r5, _CCR(r7) - ld r6, _XER(r7) + /* ******************** TAR ******************** */ + ld r4, THREAD_TM_TAR(r3) + mtspr SPRN_TAR, r4 - mtctr r3 - mtlr r4 - mtcr r5 - mtxer r6 + /* Load up the PPR and DSCR in GPRs only at this stage */ + ld r5, THREAD_TM_DSCR(r3) + ld r6, THREAD_TM_PPR(r3) /* Clear the MSR RI since we are about to change R1. EE is already off */ li r4, 0 mtmsrd r4, 1 - REST_4GPRS(0, r7) /* GPR0-3 */ - REST_GPR(4, r7) /* GPR4-6 */ - REST_GPR(5, r7) - REST_GPR(6, r7) + REST_GPR(0, r7) /* GPR0 */ + REST_2GPRS(2, r7) /* GPR2-3 */ + REST_GPR(4, r7) /* GPR4 */ REST_4GPRS(8, r7) /* GPR8-11 */ REST_2GPRS(12, r7) /* GPR12-13 */ REST_NVGPRS(r7) /* GPR14-31 */ - ld r7, GPR7(r7) /* GPR7 */ + /* Load up PPR and DSCR here so we don't run with user values for long + */ + mtspr SPRN_DSCR, r5 + mtspr SPRN_PPR, r6 + + /* Do final sanity check on TEXASR to make sure FS is set. Do this + * here before we load up the userspace r1 so any bugs we hit will get + * a call chain */ + mfspr r5, SPRN_TEXASR + srdi r5, r5, 16 + li r6, (TEXASR_FS)@h + and r6, r6, r5 +1: tdeqi r6, 0 + EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0 + + /* Do final sanity check on MSR to make sure we are not transactional + * or suspended + */ + mfmsr r6 + li r5, (MSR_TS_MASK)@higher + srdi r6, r6, 32 + and r6, r6, r5 +1: tdnei r6, 0 + EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0 + + /* Restore CR */ + ld r6, _CCR(r7) + mtcr r6 + + REST_GPR(1, r7) /* GPR1 */ + REST_GPR(5, r7) /* GPR5-7 */ + REST_GPR(6, r7) + ld r7, GPR7(r7) /* Commit register state as checkpointed state: */ TRECHKPT + HMT_MEDIUM + /* Our transactional state has now changed. * * Now just get out of here. Transactional (current) state will be @@ -414,11 +466,16 @@ restore_gprs: REST_NVGPRS(r1) addi r1, r1, TM_FRAME_SIZE - ld r4, 8(r1) + lwz r4, 8(r1) ld r0, 16(r1) mtcr r4 mtlr r0 - ld r2, 40(r1) + ld r2, STK_GOT(r1) + + /* Load CPU's default DSCR */ + ld r0, PACA_DSCR(r13) + mtspr SPRN_DSCR, r0 + blr /* ****************************************************************** */ diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index f783c932fae..239f1cde3ff 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -285,6 +285,23 @@ void system_reset_exception(struct pt_regs *regs) /* What should we do here? We could issue a shutdown or hard reset. */ } + +/* + * This function is called in real mode. Strictly no printk's please. + * + * regs->nip and regs->msr contains srr0 and ssr1. + */ +long machine_check_early(struct pt_regs *regs) +{ + long handled = 0; + + __get_cpu_var(irq_stat).mce_exceptions++; + + if (cur_cpu_spec && cur_cpu_spec->machine_check_early) + handled = cur_cpu_spec->machine_check_early(regs); + return handled; +} + #endif /* @@ -351,8 +368,8 @@ static inline int check_io_access(struct pt_regs *regs) #define REASON_TRAP ESR_PTR /* single-step stuff */ -#define single_stepping(regs) (current->thread.dbcr0 & DBCR0_IC) -#define clear_single_step(regs) (current->thread.dbcr0 &= ~DBCR0_IC) +#define single_stepping(regs) (current->thread.debug.dbcr0 & DBCR0_IC) +#define clear_single_step(regs) (current->thread.debug.dbcr0 &= ~DBCR0_IC) #else /* On non-4xx, the reason for the machine check or program @@ -816,7 +833,7 @@ static void parse_fpe(struct pt_regs *regs) flush_fp_to_thread(current); - code = __parse_fpscr(current->thread.fpscr.val); + code = __parse_fpscr(current->thread.fp_state.fpscr); _exception(SIGFPE, regs, code, regs->nip); } @@ -1018,6 +1035,13 @@ static int emulate_instruction(struct pt_regs *regs) return emulate_isel(regs, instword); } + /* Emulate sync instruction variants */ + if ((instword & PPC_INST_SYNC_MASK) == PPC_INST_SYNC) { + PPC_WARN_EMULATED(sync, regs); + asm volatile("sync"); + return 0; + } + #ifdef CONFIG_PPC64 /* Emulate the mfspr rD, DSCR. */ if ((((instword & PPC_INST_MFSPR_DSCR_USER_MASK) == @@ -1069,7 +1093,7 @@ static int emulate_math(struct pt_regs *regs) return 0; case 1: { int code = 0; - code = __parse_fpscr(current->thread.fpscr.val); + code = __parse_fpscr(current->thread.fp_state.fpscr); _exception(SIGFPE, regs, code, regs->nip); return 0; } @@ -1357,8 +1381,9 @@ void facility_unavailable_exception(struct pt_regs *regs) if (!arch_irq_disabled_regs(regs)) local_irq_enable(); - pr_err("%sFacility '%s' unavailable, exception at 0x%lx, MSR=%lx\n", - hv ? "Hypervisor " : "", facility, regs->nip, regs->msr); + pr_err_ratelimited( + "%sFacility '%s' unavailable, exception at 0x%lx, MSR=%lx\n", + hv ? "Hypervisor " : "", facility, regs->nip, regs->msr); if (user_mode(regs)) { _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); @@ -1371,15 +1396,12 @@ void facility_unavailable_exception(struct pt_regs *regs) #ifdef CONFIG_PPC_TRANSACTIONAL_MEM -extern void do_load_up_fpu(struct pt_regs *regs); - void fp_unavailable_tm(struct pt_regs *regs) { /* Note: This does not handle any kind of FP laziness. */ TM_DEBUG("FP Unavailable trap whilst transactional at 0x%lx, MSR=%lx\n", regs->nip, regs->msr); - tm_enable(); /* We can only have got here if the task started using FP after * beginning the transaction. So, the transactional regs are just a @@ -1388,8 +1410,7 @@ void fp_unavailable_tm(struct pt_regs *regs) * transaction, and probably retry but now with FP enabled. So the * checkpointed FP registers need to be loaded. */ - tm_reclaim(¤t->thread, current->thread.regs->msr, - TM_CAUSE_FAC_UNAV); + tm_reclaim_current(TM_CAUSE_FAC_UNAV); /* Reclaim didn't save out any FPRs to transact_fprs. */ /* Enable FP for the task: */ @@ -1398,12 +1419,18 @@ void fp_unavailable_tm(struct pt_regs *regs) /* This loads and recheckpoints the FP registers from * thread.fpr[]. They will remain in registers after the * checkpoint so we don't need to reload them after. + * If VMX is in use, the VRs now hold checkpointed values, + * so we don't want to load the VRs from the thread_struct. */ - tm_recheckpoint(¤t->thread, regs->msr); -} + tm_recheckpoint(¤t->thread, MSR_FP); -#ifdef CONFIG_ALTIVEC -extern void do_load_up_altivec(struct pt_regs *regs); + /* If VMX is in use, get the transactional values back */ + if (regs->msr & MSR_VEC) { + do_load_up_transact_altivec(¤t->thread); + /* At this point all the VSX state is loaded, so enable it */ + regs->msr |= MSR_VSX; + } +} void altivec_unavailable_tm(struct pt_regs *regs) { @@ -1414,18 +1441,21 @@ void altivec_unavailable_tm(struct pt_regs *regs) TM_DEBUG("Vector Unavailable trap whilst transactional at 0x%lx," "MSR=%lx\n", regs->nip, regs->msr); - tm_enable(); - tm_reclaim(¤t->thread, current->thread.regs->msr, - TM_CAUSE_FAC_UNAV); + tm_reclaim_current(TM_CAUSE_FAC_UNAV); regs->msr |= MSR_VEC; - tm_recheckpoint(¤t->thread, regs->msr); + tm_recheckpoint(¤t->thread, MSR_VEC); current->thread.used_vr = 1; + + if (regs->msr & MSR_FP) { + do_load_up_transact_fpu(¤t->thread); + regs->msr |= MSR_VSX; + } } -#endif -#ifdef CONFIG_VSX void vsx_unavailable_tm(struct pt_regs *regs) { + unsigned long orig_msr = regs->msr; + /* See the comments in fp_unavailable_tm(). This works similarly, * though we're loading both FP and VEC registers in here. * @@ -1437,18 +1467,30 @@ void vsx_unavailable_tm(struct pt_regs *regs) "MSR=%lx\n", regs->nip, regs->msr); - tm_enable(); + current->thread.used_vsr = 1; + + /* If FP and VMX are already loaded, we have all the state we need */ + if ((orig_msr & (MSR_FP | MSR_VEC)) == (MSR_FP | MSR_VEC)) { + regs->msr |= MSR_VSX; + return; + } + /* This reclaims FP and/or VR regs if they're already enabled */ - tm_reclaim(¤t->thread, current->thread.regs->msr, - TM_CAUSE_FAC_UNAV); + tm_reclaim_current(TM_CAUSE_FAC_UNAV); regs->msr |= MSR_VEC | MSR_FP | current->thread.fpexc_mode | MSR_VSX; - /* This loads & recheckpoints FP and VRs. */ - tm_recheckpoint(¤t->thread, regs->msr); - current->thread.used_vsr = 1; + + /* This loads & recheckpoints FP and VRs; but we have + * to be sure not to overwrite previously-valid state. + */ + tm_recheckpoint(¤t->thread, regs->msr & ~orig_msr); + + if (orig_msr & MSR_FP) + do_load_up_transact_fpu(¤t->thread); + if (orig_msr & MSR_VEC) + do_load_up_transact_altivec(¤t->thread); } -#endif #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ void performance_monitor_exception(struct pt_regs *regs) @@ -1465,7 +1507,8 @@ void SoftwareEmulation(struct pt_regs *regs) if (!user_mode(regs)) { debugger(regs); - die("Kernel Mode Software FPU Emulation", regs, SIGFPE); + die("Kernel Mode Unimplemented Instruction or SW FPU Emulation", + regs, SIGFPE); } if (!emulate_math(regs)) @@ -1486,7 +1529,7 @@ static void handle_debug(struct pt_regs *regs, unsigned long debug_status) if (debug_status & (DBSR_DAC1R | DBSR_DAC1W)) { dbcr_dac(current) &= ~(DBCR_DAC1R | DBCR_DAC1W); #ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE - current->thread.dbcr2 &= ~DBCR2_DAC12MODE; + current->thread.debug.dbcr2 &= ~DBCR2_DAC12MODE; #endif do_send_trap(regs, mfspr(SPRN_DAC1), debug_status, TRAP_HWBKPT, 5); @@ -1497,24 +1540,24 @@ static void handle_debug(struct pt_regs *regs, unsigned long debug_status) 6); changed |= 0x01; } else if (debug_status & DBSR_IAC1) { - current->thread.dbcr0 &= ~DBCR0_IAC1; + current->thread.debug.dbcr0 &= ~DBCR0_IAC1; dbcr_iac_range(current) &= ~DBCR_IAC12MODE; do_send_trap(regs, mfspr(SPRN_IAC1), debug_status, TRAP_HWBKPT, 1); changed |= 0x01; } else if (debug_status & DBSR_IAC2) { - current->thread.dbcr0 &= ~DBCR0_IAC2; + current->thread.debug.dbcr0 &= ~DBCR0_IAC2; do_send_trap(regs, mfspr(SPRN_IAC2), debug_status, TRAP_HWBKPT, 2); changed |= 0x01; } else if (debug_status & DBSR_IAC3) { - current->thread.dbcr0 &= ~DBCR0_IAC3; + current->thread.debug.dbcr0 &= ~DBCR0_IAC3; dbcr_iac_range(current) &= ~DBCR_IAC34MODE; do_send_trap(regs, mfspr(SPRN_IAC3), debug_status, TRAP_HWBKPT, 3); changed |= 0x01; } else if (debug_status & DBSR_IAC4) { - current->thread.dbcr0 &= ~DBCR0_IAC4; + current->thread.debug.dbcr0 &= ~DBCR0_IAC4; do_send_trap(regs, mfspr(SPRN_IAC4), debug_status, TRAP_HWBKPT, 4); changed |= 0x01; @@ -1524,19 +1567,20 @@ static void handle_debug(struct pt_regs *regs, unsigned long debug_status) * Check all other debug flags and see if that bit needs to be turned * back on or not. */ - if (DBCR_ACTIVE_EVENTS(current->thread.dbcr0, current->thread.dbcr1)) + if (DBCR_ACTIVE_EVENTS(current->thread.debug.dbcr0, + current->thread.debug.dbcr1)) regs->msr |= MSR_DE; else /* Make sure the IDM flag is off */ - current->thread.dbcr0 &= ~DBCR0_IDM; + current->thread.debug.dbcr0 &= ~DBCR0_IDM; if (changed & 0x01) - mtspr(SPRN_DBCR0, current->thread.dbcr0); + mtspr(SPRN_DBCR0, current->thread.debug.dbcr0); } void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status) { - current->thread.dbsr = debug_status; + current->thread.debug.dbsr = debug_status; /* Hack alert: On BookE, Branch Taken stops on the branch itself, while * on server, it stops on the target of the branch. In order to simulate @@ -1553,8 +1597,8 @@ void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status) /* Do the single step trick only when coming from userspace */ if (user_mode(regs)) { - current->thread.dbcr0 &= ~DBCR0_BT; - current->thread.dbcr0 |= DBCR0_IDM | DBCR0_IC; + current->thread.debug.dbcr0 &= ~DBCR0_BT; + current->thread.debug.dbcr0 |= DBCR0_IDM | DBCR0_IC; regs->msr |= MSR_DE; return; } @@ -1582,13 +1626,13 @@ void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status) return; if (user_mode(regs)) { - current->thread.dbcr0 &= ~DBCR0_IC; - if (DBCR_ACTIVE_EVENTS(current->thread.dbcr0, - current->thread.dbcr1)) + current->thread.debug.dbcr0 &= ~DBCR0_IC; + if (DBCR_ACTIVE_EVENTS(current->thread.debug.dbcr0, + current->thread.debug.dbcr1)) regs->msr |= MSR_DE; else /* Make sure the IDM bit is off */ - current->thread.dbcr0 &= ~DBCR0_IDM; + current->thread.debug.dbcr0 &= ~DBCR0_IDM; } _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip); @@ -1634,7 +1678,7 @@ void altivec_assist_exception(struct pt_regs *regs) /* XXX quick hack for now: set the non-Java bit in the VSCR */ printk_ratelimited(KERN_ERR "Unrecognized altivec instruction " "in %s at %lx\n", current->comm, regs->nip); - current->thread.vscr.u[3] |= 0x10000; + current->thread.vr_state.vscr.u[3] |= 0x10000; } } #endif /* CONFIG_ALTIVEC */ @@ -1815,6 +1859,7 @@ struct ppc_emulated ppc_emulated = { WARN_EMULATED_SETUP(popcntb), WARN_EMULATED_SETUP(spe), WARN_EMULATED_SETUP(string), + WARN_EMULATED_SETUP(sync), WARN_EMULATED_SETUP(unaligned), #ifdef CONFIG_MATH_EMULATION WARN_EMULATED_SETUP(math), @@ -1825,6 +1870,7 @@ struct ppc_emulated ppc_emulated = { #ifdef CONFIG_PPC64 WARN_EMULATED_SETUP(mfdscr), WARN_EMULATED_SETUP(mtdscr), + WARN_EMULATED_SETUP(lq_stq), #endif }; diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c index a15837519dc..b7aa07279a6 100644 --- a/arch/powerpc/kernel/udbg.c +++ b/arch/powerpc/kernel/udbg.c @@ -62,8 +62,6 @@ void __init udbg_early_init(void) udbg_init_cpm(); #elif defined(CONFIG_PPC_EARLY_DEBUG_USBGECKO) udbg_init_usbgecko(); -#elif defined(CONFIG_PPC_EARLY_DEBUG_WSP) - udbg_init_wsp(); #elif defined(CONFIG_PPC_EARLY_DEBUG_MEMCONS) /* In memory console */ udbg_init_memcons(); diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c index 75702e207b2..6e7c4923b5e 100644 --- a/arch/powerpc/kernel/udbg_16550.c +++ b/arch/powerpc/kernel/udbg_16550.c @@ -296,14 +296,3 @@ void __init udbg_init_40x_realmode(void) } #endif /* CONFIG_PPC_EARLY_DEBUG_40x */ - - -#ifdef CONFIG_PPC_EARLY_DEBUG_WSP - -void __init udbg_init_wsp(void) -{ - udbg_uart_init_mmio((void *)WSP_UART_VIRT, 1); - udbg_uart_setup(57600, 50000000); -} - -#endif /* CONFIG_PPC_EARLY_DEBUG_WSP */ diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c index 59f419b935f..003b20964ea 100644 --- a/arch/powerpc/kernel/uprobes.c +++ b/arch/powerpc/kernel/uprobes.c @@ -186,7 +186,7 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) * emulate_step() returns 1 if the insn was successfully emulated. * For all other cases, we need to single-step in hardware. */ - ret = emulate_step(regs, auprobe->ainsn); + ret = emulate_step(regs, auprobe->insn); if (ret > 0) return true; diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 1d9c92621b3..ce74c335a6a 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -34,8 +34,7 @@ #include <asm/firmware.h> #include <asm/vdso.h> #include <asm/vdso_datapage.h> - -#include "setup.h" +#include <asm/setup.h> #undef DEBUG @@ -716,8 +715,8 @@ int vdso_getcpu_init(void) unsigned long cpu, node, val; /* - * SPRG3 contains the CPU in the bottom 16 bits and the NUMA node in - * the next 16 bits. The VDSO uses this to implement getcpu(). + * SPRG_VDSO contains the CPU in the bottom 16 bits and the NUMA node + * in the next 16 bits. The VDSO uses this to implement getcpu(). */ cpu = get_cpu(); WARN_ON_ONCE(cpu > 0xffff); @@ -726,8 +725,8 @@ int vdso_getcpu_init(void) WARN_ON_ONCE(node > 0xffff); val = (cpu & 0xfff) | ((node & 0xffff) << 16); - mtspr(SPRN_SPRG3, val); - get_paca()->sprg3 = val; + mtspr(SPRN_SPRG_VDSO_WRITE, val); + get_paca()->sprg_vdso = val; put_cpu(); diff --git a/arch/powerpc/kernel/vdso32/getcpu.S b/arch/powerpc/kernel/vdso32/getcpu.S index 47afd08c90f..23eb9a9441b 100644 --- a/arch/powerpc/kernel/vdso32/getcpu.S +++ b/arch/powerpc/kernel/vdso32/getcpu.S @@ -29,7 +29,7 @@ */ V_FUNCTION_BEGIN(__kernel_getcpu) .cfi_startproc - mfspr r5,SPRN_USPRG3 + mfspr r5,SPRN_SPRG_VDSO_READ cmpdi cr0,r3,0 cmpdi cr1,r4,0 clrlwi r6,r5,16 diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S index 6b1f2a6d551..6b2b69616e7 100644 --- a/arch/powerpc/kernel/vdso32/gettimeofday.S +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S @@ -232,9 +232,15 @@ __do_get_tspec: lwz r6,(CFG_TB_ORIG_STAMP+4)(r9) /* Get a stable TB value */ +#ifdef CONFIG_8xx +2: mftbu r3 + mftbl r4 + mftbu r0 +#else 2: mfspr r3, SPRN_TBRU mfspr r4, SPRN_TBRL mfspr r0, SPRN_TBRU +#endif cmplw cr0,r3,r0 bne- 2b diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S index f223409629b..e58ee10fa5c 100644 --- a/arch/powerpc/kernel/vdso32/vdso32.lds.S +++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S @@ -4,7 +4,11 @@ */ #include <asm/vdso.h> +#ifdef __LITTLE_ENDIAN__ +OUTPUT_FORMAT("elf32-powerpcle", "elf32-powerpcle", "elf32-powerpcle") +#else OUTPUT_FORMAT("elf32-powerpc", "elf32-powerpc", "elf32-powerpc") +#endif OUTPUT_ARCH(powerpc:common) ENTRY(_start) diff --git a/arch/powerpc/kernel/vdso32/vdso32_wrapper.S b/arch/powerpc/kernel/vdso32/vdso32_wrapper.S index 6e8f507ed32..6ac107ac402 100644 --- a/arch/powerpc/kernel/vdso32/vdso32_wrapper.S +++ b/arch/powerpc/kernel/vdso32/vdso32_wrapper.S @@ -1,4 +1,3 @@ -#include <linux/init.h> #include <linux/linkage.h> #include <asm/page.h> @@ -7,7 +6,7 @@ .globl vdso32_start, vdso32_end .balign PAGE_SIZE vdso32_start: - .incbin "arch/powerpc/kernel/vdso32/vdso32.so" + .incbin "arch/powerpc/kernel/vdso32/vdso32.so.dbg" .balign PAGE_SIZE vdso32_end: diff --git a/arch/powerpc/kernel/vdso64/getcpu.S b/arch/powerpc/kernel/vdso64/getcpu.S index 47afd08c90f..23eb9a9441b 100644 --- a/arch/powerpc/kernel/vdso64/getcpu.S +++ b/arch/powerpc/kernel/vdso64/getcpu.S @@ -29,7 +29,7 @@ */ V_FUNCTION_BEGIN(__kernel_getcpu) .cfi_startproc - mfspr r5,SPRN_USPRG3 + mfspr r5,SPRN_SPRG_VDSO_READ cmpdi cr0,r3,0 cmpdi cr1,r4,0 clrlwi r6,r5,16 diff --git a/arch/powerpc/kernel/vdso64/sigtramp.S b/arch/powerpc/kernel/vdso64/sigtramp.S index 45ea281e9a2..542c6f422e4 100644 --- a/arch/powerpc/kernel/vdso64/sigtramp.S +++ b/arch/powerpc/kernel/vdso64/sigtramp.S @@ -142,6 +142,13 @@ V_FUNCTION_END(__kernel_sigtramp_rt64) /* Size of CR reg in DWARF unwind info. */ #define CRSIZE 4 +/* Offset of CR reg within a full word. */ +#ifdef __LITTLE_ENDIAN__ +#define CROFF 0 +#else +#define CROFF (RSIZE - CRSIZE) +#endif + /* This is the offset of the VMX reg pointer. */ #define VREGS 48*RSIZE+33*8 @@ -181,7 +188,14 @@ V_FUNCTION_END(__kernel_sigtramp_rt64) rsave (31, 31*RSIZE); \ rsave (67, 32*RSIZE); /* ap, used as temp for nip */ \ rsave (65, 36*RSIZE); /* lr */ \ - rsave (70, 38*RSIZE + (RSIZE - CRSIZE)) /* cr */ + rsave (68, 38*RSIZE + CROFF); /* cr fields */ \ + rsave (69, 38*RSIZE + CROFF); \ + rsave (70, 38*RSIZE + CROFF); \ + rsave (71, 38*RSIZE + CROFF); \ + rsave (72, 38*RSIZE + CROFF); \ + rsave (73, 38*RSIZE + CROFF); \ + rsave (74, 38*RSIZE + CROFF); \ + rsave (75, 38*RSIZE + CROFF) /* Describe where the FP regs are saved. */ #define EH_FRAME_FP \ diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso64/vdso64.lds.S index e4863819663..64fb183a47c 100644 --- a/arch/powerpc/kernel/vdso64/vdso64.lds.S +++ b/arch/powerpc/kernel/vdso64/vdso64.lds.S @@ -4,7 +4,11 @@ */ #include <asm/vdso.h> +#ifdef __LITTLE_ENDIAN__ +OUTPUT_FORMAT("elf64-powerpcle", "elf64-powerpcle", "elf64-powerpcle") +#else OUTPUT_FORMAT("elf64-powerpc", "elf64-powerpc", "elf64-powerpc") +#endif OUTPUT_ARCH(powerpc:common64) ENTRY(_start) diff --git a/arch/powerpc/kernel/vdso64/vdso64_wrapper.S b/arch/powerpc/kernel/vdso64/vdso64_wrapper.S index b8553d62b79..df60fca6a13 100644 --- a/arch/powerpc/kernel/vdso64/vdso64_wrapper.S +++ b/arch/powerpc/kernel/vdso64/vdso64_wrapper.S @@ -1,4 +1,3 @@ -#include <linux/init.h> #include <linux/linkage.h> #include <asm/page.h> @@ -7,7 +6,7 @@ .globl vdso64_start, vdso64_end .balign PAGE_SIZE vdso64_start: - .incbin "arch/powerpc/kernel/vdso64/vdso64.so" + .incbin "arch/powerpc/kernel/vdso64/vdso64.so.dbg" .balign PAGE_SIZE vdso64_end: diff --git a/arch/powerpc/kernel/vecemu.c b/arch/powerpc/kernel/vecemu.c index 604d0947cb2..c4bfadb2606 100644 --- a/arch/powerpc/kernel/vecemu.c +++ b/arch/powerpc/kernel/vecemu.c @@ -271,7 +271,7 @@ int emulate_altivec(struct pt_regs *regs) vb = (instr >> 11) & 0x1f; vc = (instr >> 6) & 0x1f; - vrs = current->thread.vr; + vrs = current->thread.vr_state.vr; switch (instr & 0x3f) { case 10: switch (vc) { @@ -320,12 +320,12 @@ int emulate_altivec(struct pt_regs *regs) case 14: /* vctuxs */ for (i = 0; i < 4; ++i) vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va, - ¤t->thread.vscr.u[3]); + ¤t->thread.vr_state.vscr.u[3]); break; case 15: /* vctsxs */ for (i = 0; i < 4; ++i) vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va, - ¤t->thread.vscr.u[3]); + ¤t->thread.vr_state.vscr.u[3]); break; default: return -EINVAL; diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 9e20999aaef..74f8050518d 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -8,29 +8,6 @@ #include <asm/ptrace.h> #ifdef CONFIG_PPC_TRANSACTIONAL_MEM -/* - * Wrapper to call load_up_altivec from C. - * void do_load_up_altivec(struct pt_regs *regs); - */ -_GLOBAL(do_load_up_altivec) - mflr r0 - std r0, 16(r1) - stdu r1, -112(r1) - - subi r6, r3, STACK_FRAME_OVERHEAD - /* load_up_altivec expects r12=MSR, r13=PACA, and returns - * with r12 = new MSR. - */ - ld r12,_MSR(r6) - GET_PACA(r13) - bl load_up_altivec - std r12,_MSR(r6) - - ld r0, 112+16(r1) - addi r1, r1, 112 - mtlr r0 - blr - /* void do_load_up_transact_altivec(struct thread_struct *thread) * * This is similar to load_up_altivec but for the transactional version of the @@ -46,10 +23,11 @@ _GLOBAL(do_load_up_transact_altivec) li r4,1 stw r4,THREAD_USED_VR(r3) - li r10,THREAD_TRANSACT_VSCR + li r10,THREAD_TRANSACT_VRSTATE+VRSTATE_VSCR lvx vr0,r10,r3 mtvscr vr0 - REST_32VRS_TRANSACT(0,r4,r3) + addi r10,r3,THREAD_TRANSACT_VRSTATE + REST_32VRS(0,r4,r10) /* Disable VEC again. */ MTMSRD(r6) @@ -59,12 +37,46 @@ _GLOBAL(do_load_up_transact_altivec) #endif /* - * load_up_altivec(unused, unused, tsk) + * Enable use of VMX/Altivec for the caller. + */ +_GLOBAL(vec_enable) + mfmsr r3 + oris r3,r3,MSR_VEC@h + MTMSRD(r3) + isync + blr + +/* + * Load state from memory into VMX registers including VSCR. + * Assumes the caller has enabled VMX in the MSR. + */ +_GLOBAL(load_vr_state) + li r4,VRSTATE_VSCR + lvx vr0,r4,r3 + mtvscr vr0 + REST_32VRS(0,r4,r3) + blr + +/* + * Store VMX state into memory, including VSCR. + * Assumes the caller has enabled VMX in the MSR. + */ +_GLOBAL(store_vr_state) + SAVE_32VRS(0, r4, r3) + mfvscr vr0 + li r4, VRSTATE_VSCR + stvx vr0, r4, r3 + blr + +/* * Disable VMX for the task which had it previously, * and save its vector registers in its thread_struct. * Enables the VMX for use in the kernel on return. * On SMP we know the VMX is free, since we give it up every * switch (ie, no lazy save of the vector registers). + * + * Note that on 32-bit this can only use registers that will be + * restored by fast_exception_return, i.e. r3 - r6, r10 and r11. */ _GLOBAL(load_up_altivec) mfmsr r5 /* grab the current MSR */ @@ -90,10 +102,11 @@ _GLOBAL(load_up_altivec) /* Save VMX state to last_task_used_altivec's THREAD struct */ toreal(r4) addi r4,r4,THREAD - SAVE_32VRS(0,r5,r4) + addi r6,r4,THREAD_VRSTATE + SAVE_32VRS(0,r5,r6) mfvscr vr0 - li r10,THREAD_VSCR - stvx vr0,r10,r4 + li r10,VRSTATE_VSCR + stvx vr0,r10,r6 /* Disable VMX for last_task_used_altivec */ PPC_LL r5,PT_REGS(r4) toreal(r5) @@ -125,12 +138,13 @@ _GLOBAL(load_up_altivec) oris r12,r12,MSR_VEC@h std r12,_MSR(r1) #endif + addi r6,r5,THREAD_VRSTATE li r4,1 - li r10,THREAD_VSCR + li r10,VRSTATE_VSCR stw r4,THREAD_USED_VR(r5) - lvx vr0,r10,r5 + lvx vr0,r10,r6 mtvscr vr0 - REST_32VRS(0,r4,r5) + REST_32VRS(0,r4,r6) #ifndef CONFIG_SMP /* Update last_task_used_altivec to 'current' */ subi r4,r5,THREAD /* Back to 'current' */ @@ -165,12 +179,16 @@ _GLOBAL(giveup_altivec) PPC_LCMPI 0,r3,0 beqlr /* if no previous owner, done */ addi r3,r3,THREAD /* want THREAD of task */ + PPC_LL r7,THREAD_VRSAVEAREA(r3) PPC_LL r5,PT_REGS(r3) - PPC_LCMPI 0,r5,0 - SAVE_32VRS(0,r4,r3) + PPC_LCMPI 0,r7,0 + bne 2f + addi r7,r3,THREAD_VRSTATE +2: PPC_LCMPI 0,r5,0 + SAVE_32VRS(0,r4,r7) mfvscr vr0 - li r4,THREAD_VSCR - stvx vr0,r4,r3 + li r4,VRSTATE_VSCR + stvx vr0,r4,r7 beq 1f PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5) #ifdef CONFIG_VSX diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index 78a350670de..904c66128fa 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -518,16 +518,18 @@ static dma_addr_t vio_dma_iommu_map_page(struct device *dev, struct page *page, struct dma_attrs *attrs) { struct vio_dev *viodev = to_vio_dev(dev); + struct iommu_table *tbl; dma_addr_t ret = DMA_ERROR_CODE; - if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE))) { + tbl = get_iommu_table_base(dev); + if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl)))) { atomic_inc(&viodev->cmo.allocs_failed); return ret; } ret = dma_iommu_ops.map_page(dev, page, offset, size, direction, attrs); if (unlikely(dma_mapping_error(dev, ret))) { - vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE)); + vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl))); atomic_inc(&viodev->cmo.allocs_failed); } @@ -540,10 +542,12 @@ static void vio_dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle, struct dma_attrs *attrs) { struct vio_dev *viodev = to_vio_dev(dev); + struct iommu_table *tbl; + tbl = get_iommu_table_base(dev); dma_iommu_ops.unmap_page(dev, dma_handle, size, direction, attrs); - vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE)); + vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl))); } static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, @@ -551,12 +555,14 @@ static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, struct dma_attrs *attrs) { struct vio_dev *viodev = to_vio_dev(dev); + struct iommu_table *tbl; struct scatterlist *sgl; int ret, count = 0; size_t alloc_size = 0; + tbl = get_iommu_table_base(dev); for (sgl = sglist; count < nelems; count++, sgl++) - alloc_size += roundup(sgl->length, IOMMU_PAGE_SIZE); + alloc_size += roundup(sgl->length, IOMMU_PAGE_SIZE(tbl)); if (vio_cmo_alloc(viodev, alloc_size)) { atomic_inc(&viodev->cmo.allocs_failed); @@ -572,7 +578,7 @@ static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, } for (sgl = sglist, count = 0; count < ret; count++, sgl++) - alloc_size -= roundup(sgl->dma_length, IOMMU_PAGE_SIZE); + alloc_size -= roundup(sgl->dma_length, IOMMU_PAGE_SIZE(tbl)); if (alloc_size) vio_cmo_dealloc(viodev, alloc_size); @@ -585,12 +591,14 @@ static void vio_dma_iommu_unmap_sg(struct device *dev, struct dma_attrs *attrs) { struct vio_dev *viodev = to_vio_dev(dev); + struct iommu_table *tbl; struct scatterlist *sgl; size_t alloc_size = 0; int count = 0; + tbl = get_iommu_table_base(dev); for (sgl = sglist; count < nelems; count++, sgl++) - alloc_size += roundup(sgl->dma_length, IOMMU_PAGE_SIZE); + alloc_size += roundup(sgl->dma_length, IOMMU_PAGE_SIZE(tbl)); dma_iommu_ops.unmap_sg(dev, sglist, nelems, direction, attrs); @@ -706,11 +714,14 @@ static int vio_cmo_bus_probe(struct vio_dev *viodev) { struct vio_cmo_dev_entry *dev_ent; struct device *dev = &viodev->dev; + struct iommu_table *tbl; struct vio_driver *viodrv = to_vio_driver(dev->driver); unsigned long flags; size_t size; bool dma_capable = false; + tbl = get_iommu_table_base(dev); + /* A device requires entitlement if it has a DMA window property */ switch (viodev->family) { case VDEVICE: @@ -736,7 +747,8 @@ static int vio_cmo_bus_probe(struct vio_dev *viodev) return -EINVAL; } - viodev->cmo.desired = IOMMU_PAGE_ALIGN(viodrv->get_desired_dma(viodev)); + viodev->cmo.desired = + IOMMU_PAGE_ALIGN(viodrv->get_desired_dma(viodev), tbl); if (viodev->cmo.desired < VIO_CMO_MIN_ENT) viodev->cmo.desired = VIO_CMO_MIN_ENT; size = VIO_CMO_MIN_ENT; @@ -997,21 +1009,36 @@ static struct device_attribute vio_cmo_dev_attrs[] = { /* sysfs bus functions and data structures for CMO */ #define viobus_cmo_rd_attr(name) \ -static ssize_t \ -viobus_cmo_##name##_show(struct bus_type *bt, char *buf) \ +static ssize_t cmo_##name##_show(struct bus_type *bt, char *buf) \ { \ return sprintf(buf, "%lu\n", vio_cmo.name); \ -} +} \ +static BUS_ATTR_RO(cmo_##name) #define viobus_cmo_pool_rd_attr(name, var) \ static ssize_t \ -viobus_cmo_##name##_pool_show_##var(struct bus_type *bt, char *buf) \ +cmo_##name##_##var##_show(struct bus_type *bt, char *buf) \ { \ return sprintf(buf, "%lu\n", vio_cmo.name.var); \ +} \ +static BUS_ATTR_RO(cmo_##name##_##var) + +viobus_cmo_rd_attr(entitled); +viobus_cmo_rd_attr(spare); +viobus_cmo_rd_attr(min); +viobus_cmo_rd_attr(desired); +viobus_cmo_rd_attr(curr); +viobus_cmo_pool_rd_attr(reserve, size); +viobus_cmo_pool_rd_attr(excess, size); +viobus_cmo_pool_rd_attr(excess, free); + +static ssize_t cmo_high_show(struct bus_type *bt, char *buf) +{ + return sprintf(buf, "%lu\n", vio_cmo.high); } -static ssize_t viobus_cmo_high_reset(struct bus_type *bt, const char *buf, - size_t count) +static ssize_t cmo_high_store(struct bus_type *bt, const char *buf, + size_t count) { unsigned long flags; @@ -1021,35 +1048,26 @@ static ssize_t viobus_cmo_high_reset(struct bus_type *bt, const char *buf, return count; } - -viobus_cmo_rd_attr(entitled); -viobus_cmo_pool_rd_attr(reserve, size); -viobus_cmo_pool_rd_attr(excess, size); -viobus_cmo_pool_rd_attr(excess, free); -viobus_cmo_rd_attr(spare); -viobus_cmo_rd_attr(min); -viobus_cmo_rd_attr(desired); -viobus_cmo_rd_attr(curr); -viobus_cmo_rd_attr(high); - -static struct bus_attribute vio_cmo_bus_attrs[] = { - __ATTR(cmo_entitled, S_IRUGO, viobus_cmo_entitled_show, NULL), - __ATTR(cmo_reserve_size, S_IRUGO, viobus_cmo_reserve_pool_show_size, NULL), - __ATTR(cmo_excess_size, S_IRUGO, viobus_cmo_excess_pool_show_size, NULL), - __ATTR(cmo_excess_free, S_IRUGO, viobus_cmo_excess_pool_show_free, NULL), - __ATTR(cmo_spare, S_IRUGO, viobus_cmo_spare_show, NULL), - __ATTR(cmo_min, S_IRUGO, viobus_cmo_min_show, NULL), - __ATTR(cmo_desired, S_IRUGO, viobus_cmo_desired_show, NULL), - __ATTR(cmo_curr, S_IRUGO, viobus_cmo_curr_show, NULL), - __ATTR(cmo_high, S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH, - viobus_cmo_high_show, viobus_cmo_high_reset), - __ATTR_NULL +static BUS_ATTR_RW(cmo_high); + +static struct attribute *vio_bus_attrs[] = { + &bus_attr_cmo_entitled.attr, + &bus_attr_cmo_spare.attr, + &bus_attr_cmo_min.attr, + &bus_attr_cmo_desired.attr, + &bus_attr_cmo_curr.attr, + &bus_attr_cmo_high.attr, + &bus_attr_cmo_reserve_size.attr, + &bus_attr_cmo_excess_size.attr, + &bus_attr_cmo_excess_free.attr, + NULL, }; +ATTRIBUTE_GROUPS(vio_bus); static void vio_cmo_sysfs_init(void) { vio_bus_type.dev_attrs = vio_cmo_dev_attrs; - vio_bus_type.bus_attrs = vio_cmo_bus_attrs; + vio_bus_type.bus_groups = vio_bus_groups; } #else /* CONFIG_PPC_SMLPAR */ int vio_cmo_entitlement_update(size_t new_entitlement) { return 0; } @@ -1170,9 +1188,10 @@ static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev) &tbl->it_index, &offset, &size); /* TCE table size - measured in tce entries */ - tbl->it_size = size >> IOMMU_PAGE_SHIFT; + tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K; + tbl->it_size = size >> tbl->it_page_shift; /* offset for VIO should always be 0 */ - tbl->it_offset = offset >> IOMMU_PAGE_SHIFT; + tbl->it_offset = offset >> tbl->it_page_shift; tbl->it_busno = 0; tbl->it_type = TCE_VB; tbl->it_blocksize = 16; @@ -1413,8 +1432,8 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node) /* needed to ensure proper operation of coherent allocations * later, in case driver doesn't set it explicitly */ - dma_set_mask(&viodev->dev, DMA_BIT_MASK(64)); - dma_set_coherent_mask(&viodev->dev, DMA_BIT_MASK(64)); + viodev->dev.coherent_dma_mask = DMA_BIT_MASK(64); + viodev->dev.dma_mask = &viodev->dev.coherent_dma_mask; } /* register with generic device framework */ @@ -1530,11 +1549,15 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, const char *cp; dn = dev->of_node; - if (!dn) - return -ENODEV; + if (!dn) { + strcpy(buf, "\n"); + return strlen(buf); + } cp = of_get_property(dn, "compatible", NULL); - if (!cp) - return -ENODEV; + if (!cp) { + strcpy(buf, "\n"); + return strlen(buf); + } return sprintf(buf, "vio:T%sS%s\n", vio_dev->type, cp); } |
