diff options
Diffstat (limited to 'arch/powerpc/kernel')
101 files changed, 5565 insertions, 2957 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 445cb6e39d5..670c312d914 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -2,6 +2,7 @@  # Makefile for the linux kernel.  # +CFLAGS_prom.o		= -I$(src)/../../../scripts/dtc/libfdt  CFLAGS_ptrace.o		+= -DUTS_MACHINE='"$(UTS_MACHINE)"'  subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror @@ -39,15 +40,14 @@ obj-$(CONFIG_PPC64)		+= setup_64.o sys_ppc32.o \  obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= hw_breakpoint.o  obj-$(CONFIG_PPC_BOOK3S_64)	+= cpu_setup_ppc970.o cpu_setup_pa6t.o  obj-$(CONFIG_PPC_BOOK3S_64)	+= cpu_setup_power.o +obj-$(CONFIG_PPC_BOOK3S_64)	+= mce.o mce_power.o  obj64-$(CONFIG_RELOCATABLE)	+= reloc_64.o  obj-$(CONFIG_PPC_BOOK3E_64)	+= exceptions-64e.o idle_book3e.o -obj-$(CONFIG_PPC_A2)		+= cpu_setup_a2.o  obj-$(CONFIG_PPC64)		+= vdso64/  obj-$(CONFIG_ALTIVEC)		+= vecemu.o  obj-$(CONFIG_PPC_970_NAP)	+= idle_power4.o  obj-$(CONFIG_PPC_P7_NAP)	+= idle_power7.o  obj-$(CONFIG_PPC_OF)		+= of_platform.o prom_parse.o -obj-$(CONFIG_PPC_CLOCK)		+= clock.o  procfs-y			:= proc_powerpc.o  obj-$(CONFIG_PROC_FS)		+= $(procfs-y)  rtaspci-$(CONFIG_PPC64)-$(CONFIG_PCI)	:= rtas_pci.o diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index a27ccd5dc6b..34f55524d45 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -25,14 +25,13 @@  #include <asm/cputable.h>  #include <asm/emulated_ops.h>  #include <asm/switch_to.h> +#include <asm/disassemble.h>  struct aligninfo {  	unsigned char len;  	unsigned char flags;  }; -#define IS_XFORM(inst)	(((inst) >> 26) == 31) -#define IS_DSFORM(inst)	(((inst) >> 26) >= 56)  #define INVALID	{ 0, 0 } @@ -54,8 +53,6 @@ struct aligninfo {  /* DSISR bits reported for a DCBZ instruction: */  #define DCBZ	0x5f	/* 8xx/82xx dcbz faults when cache not enabled */ -#define SWAP(a, b)	(t = (a), (a) = (b), (b) = t) -  /*   * The PowerPC stores certain bits of the instruction that caused the   * alignment exception in the DSISR register.  This array maps those @@ -75,7 +72,7 @@ static struct aligninfo aligninfo[128] = {  	{ 8, LD+F },		/* 00 0 1001: lfd */  	{ 4, ST+F+S },		/* 00 0 1010: stfs */  	{ 8, ST+F },		/* 00 0 1011: stfd */ -	INVALID,		/* 00 0 1100 */ +	{ 16, LD },		/* 00 0 1100: lq */  	{ 8, LD },		/* 00 0 1101: ld/ldu/lwa */  	INVALID,		/* 00 0 1110 */  	{ 8, ST },		/* 00 0 1111: std/stdu */ @@ -142,7 +139,7 @@ static struct aligninfo aligninfo[128] = {  	{ 2, LD+SW },		/* 10 0 1100: lhbrx */  	{ 4, LD+SE },		/* 10 0 1101  lwa */  	{ 2, ST+SW },		/* 10 0 1110: sthbrx */ -	INVALID,		/* 10 0 1111 */ +	{ 16, ST },		/* 10 0 1111: stq */  	INVALID,		/* 10 1 0000 */  	INVALID,		/* 10 1 0001 */  	INVALID,		/* 10 1 0010 */ @@ -194,37 +191,6 @@ static struct aligninfo aligninfo[128] = {  };  /* - * Create a DSISR value from the instruction - */ -static inline unsigned make_dsisr(unsigned instr) -{ -	unsigned dsisr; - - -	/* bits  6:15 --> 22:31 */ -	dsisr = (instr & 0x03ff0000) >> 16; - -	if (IS_XFORM(instr)) { -		/* bits 29:30 --> 15:16 */ -		dsisr |= (instr & 0x00000006) << 14; -		/* bit     25 -->    17 */ -		dsisr |= (instr & 0x00000040) << 8; -		/* bits 21:24 --> 18:21 */ -		dsisr |= (instr & 0x00000780) << 3; -	} else { -		/* bit      5 -->    17 */ -		dsisr |= (instr & 0x04000000) >> 12; -		/* bits  1: 4 --> 18:21 */ -		dsisr |= (instr & 0x78000000) >> 17; -		/* bits 30:31 --> 12:13 */ -		if (IS_DSFORM(instr)) -			dsisr |= (instr & 0x00000003) << 18; -	} - -	return dsisr; -} - -/*   * The dcbz (data cache block zero) instruction   * gives an alignment fault if used on non-cacheable   * memory.  We handle the fault mainly for the @@ -256,11 +222,17 @@ static int emulate_dcbz(struct pt_regs *regs, unsigned char __user *addr)   * bottom 4 bytes of each register, and the loads clear the   * top 4 bytes of the affected register.   */ +#ifdef __BIG_ENDIAN__  #ifdef CONFIG_PPC64  #define REG_BYTE(rp, i)		*((u8 *)((rp) + ((i) >> 2)) + ((i) & 3) + 4)  #else  #define REG_BYTE(rp, i)		*((u8 *)(rp) + (i))  #endif +#endif + +#ifdef __LITTLE_ENDIAN__ +#define REG_BYTE(rp, i)		(*(((u8 *)((rp) + ((i)>>2)) + ((i)&3)))) +#endif  #define SWIZ_PTR(p)		((unsigned char __user *)((p) ^ swiz)) @@ -305,6 +277,15 @@ static int emulate_multiple(struct pt_regs *regs, unsigned char __user *addr,  			nb0 = nb + reg * 4 - 128;  			nb = 128 - reg * 4;  		} +#ifdef __LITTLE_ENDIAN__ +		/* +		 *  String instructions are endian neutral but the code +		 *  below is not.  Force byte swapping on so that the +		 *  effects of swizzling are undone in the load/store +		 *  loops below. +		 */ +		flags ^= SW; +#endif  	} else {  		/* lwm, stmw */  		nb = (32 - reg) * 4; @@ -372,8 +353,6 @@ static int emulate_fp_pair(unsigned char __user *addr, unsigned int reg,  	char *ptr1 = (char *) ¤t->thread.TS_FPR(reg+1);  	int i, ret, sw = 0; -	if (!(flags & F)) -		return 0;  	if (reg & 1)  		return 0;	/* invalid form: FRS/FRT must be even */  	if (flags & SW) @@ -393,6 +372,34 @@ static int emulate_fp_pair(unsigned char __user *addr, unsigned int reg,  	return 1;	/* exception handled and fixed up */  } +#ifdef CONFIG_PPC64 +static int emulate_lq_stq(struct pt_regs *regs, unsigned char __user *addr, +			  unsigned int reg, unsigned int flags) +{ +	char *ptr0 = (char *)®s->gpr[reg]; +	char *ptr1 = (char *)®s->gpr[reg+1]; +	int i, ret, sw = 0; + +	if (reg & 1) +		return 0;	/* invalid form: GPR must be even */ +	if (flags & SW) +		sw = 7; +	ret = 0; +	for (i = 0; i < 8; ++i) { +		if (!(flags & ST)) { +			ret |= __get_user(ptr0[i^sw], addr + i); +			ret |= __get_user(ptr1[i^sw], addr + i + 8); +		} else { +			ret |= __put_user(ptr0[i^sw], addr + i); +			ret |= __put_user(ptr1[i^sw], addr + i + 8); +		} +	} +	if (ret) +		return -EFAULT; +	return 1;	/* exception handled and fixed up */ +} +#endif /* CONFIG_PPC64 */ +  #ifdef CONFIG_SPE  static struct aligninfo spe_aligninfo[32] = { @@ -458,7 +465,7 @@ static struct aligninfo spe_aligninfo[32] = {  static int emulate_spe(struct pt_regs *regs, unsigned int reg,  		       unsigned int instr)  { -	int t, ret; +	int ret;  	union {  		u64 ll;  		u32 w[2]; @@ -581,24 +588,18 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg,  	if (flags & SW) {  		switch (flags & 0xf0) {  		case E8: -			SWAP(data.v[0], data.v[7]); -			SWAP(data.v[1], data.v[6]); -			SWAP(data.v[2], data.v[5]); -			SWAP(data.v[3], data.v[4]); +			data.ll = swab64(data.ll);  			break;  		case E4: - -			SWAP(data.v[0], data.v[3]); -			SWAP(data.v[1], data.v[2]); -			SWAP(data.v[4], data.v[7]); -			SWAP(data.v[5], data.v[6]); +			data.w[0] = swab32(data.w[0]); +			data.w[1] = swab32(data.w[1]);  			break;  		/* Its half word endian */  		default: -			SWAP(data.v[0], data.v[1]); -			SWAP(data.v[2], data.v[3]); -			SWAP(data.v[4], data.v[5]); -			SWAP(data.v[6], data.v[7]); +			data.h[0] = swab16(data.h[0]); +			data.h[1] = swab16(data.h[1]); +			data.h[2] = swab16(data.h[2]); +			data.h[3] = swab16(data.h[3]);  			break;  		}  	} @@ -658,14 +659,31 @@ static int emulate_vsx(unsigned char __user *addr, unsigned int reg,  	flush_vsx_to_thread(current);  	if (reg < 32) -		ptr = (char *) ¤t->thread.TS_FPR(reg); +		ptr = (char *) ¤t->thread.fp_state.fpr[reg][0];  	else -		ptr = (char *) ¤t->thread.vr[reg - 32]; +		ptr = (char *) ¤t->thread.vr_state.vr[reg - 32];  	lptr = (unsigned long *) ptr; +#ifdef __LITTLE_ENDIAN__ +	if (flags & SW) { +		elsize = length; +		sw = length-1; +	} else { +		/* +		 * The elements are BE ordered, even in LE mode, so process +		 * them in reverse order. +		 */ +		addr += length - elsize; + +		/* 8 byte memory accesses go in the top 8 bytes of the VR */ +		if (length == 8) +			ptr += 8; +	} +#else  	if (flags & SW)  		sw = elsize-1; +#endif  	for (j = 0; j < length; j += elsize) {  		for (i = 0; i < elsize; ++i) { @@ -675,19 +693,31 @@ static int emulate_vsx(unsigned char __user *addr, unsigned int reg,  				ret |= __get_user(ptr[i^sw], addr + i);  		}  		ptr  += elsize; +#ifdef __LITTLE_ENDIAN__ +		addr -= elsize; +#else  		addr += elsize; +#endif  	} +#ifdef __BIG_ENDIAN__ +#define VSX_HI 0 +#define VSX_LO 1 +#else +#define VSX_HI 1 +#define VSX_LO 0 +#endif +  	if (!ret) {  		if (flags & U)  			regs->gpr[areg] = regs->dar;  		/* Splat load copies the same data to top and bottom 8 bytes */  		if (flags & SPLT) -			lptr[1] = lptr[0]; -		/* For 8 byte loads, zero the top 8 bytes */ +			lptr[VSX_LO] = lptr[VSX_HI]; +		/* For 8 byte loads, zero the low 8 bytes */  		else if (!(flags & ST) && (8 == length)) -			lptr[1] = 0; +			lptr[VSX_LO] = 0;  	} else  		return -EFAULT; @@ -710,18 +740,28 @@ int fix_alignment(struct pt_regs *regs)  	unsigned int dsisr;  	unsigned char __user *addr;  	unsigned long p, swiz; -	int ret, t; -	union { +	int ret, i; +	union data {  		u64 ll;  		double dd;  		unsigned char v[8];  		struct { +#ifdef __LITTLE_ENDIAN__ +			int	 low32; +			unsigned hi32; +#else  			unsigned hi32;  			int	 low32; +#endif  		} x32;  		struct { +#ifdef __LITTLE_ENDIAN__ +			short	      low16; +			unsigned char hi48[6]; +#else  			unsigned char hi48[6];  			short	      low16; +#endif  		} x16;  	} data; @@ -780,8 +820,9 @@ int fix_alignment(struct pt_regs *regs)  	/* Byteswap little endian loads and stores */  	swiz = 0; -	if (regs->msr & MSR_LE) { +	if ((regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE)) {  		flags ^= SW; +#ifdef __BIG_ENDIAN__  		/*  		 * So-called "PowerPC little endian" mode works by  		 * swizzling addresses rather than by actually doing @@ -794,6 +835,7 @@ int fix_alignment(struct pt_regs *regs)  		 */  		if (cpu_has_feature(CPU_FTR_PPC_LE))  			swiz = 7; +#endif  	}  	/* DAR has the operand effective address */ @@ -818,7 +860,7 @@ int fix_alignment(struct pt_regs *regs)  			elsize = 8;  		flags = 0; -		if (regs->msr & MSR_LE) +		if ((regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE))  			flags |= SW;  		if (instruction & 0x100)  			flags |= ST; @@ -866,10 +908,20 @@ int fix_alignment(struct pt_regs *regs)  		flush_fp_to_thread(current);  	} -	/* Special case for 16-byte FP loads and stores */ -	if (nb == 16) { -		PPC_WARN_ALIGNMENT(fp_pair, regs); -		return emulate_fp_pair(addr, reg, flags); +	if ((nb == 16)) { +		if (flags & F) { +			/* Special case for 16-byte FP loads and stores */ +			PPC_WARN_ALIGNMENT(fp_pair, regs); +			return emulate_fp_pair(addr, reg, flags); +		} else { +#ifdef CONFIG_PPC64 +			/* Special case for 16-byte loads and stores */ +			PPC_WARN_ALIGNMENT(lq_stq, regs); +			return emulate_lq_stq(regs, addr, reg, flags); +#else +			return 0; +#endif +		}  	}  	PPC_WARN_ALIGNMENT(unaligned, regs); @@ -878,32 +930,36 @@ int fix_alignment(struct pt_regs *regs)  	 * get it from register values  	 */  	if (!(flags & ST)) { -		data.ll = 0; -		ret = 0; -		p = (unsigned long) addr; +		unsigned int start = 0; +  		switch (nb) { -		case 8: -			ret |= __get_user_inatomic(data.v[0], SWIZ_PTR(p++)); -			ret |= __get_user_inatomic(data.v[1], SWIZ_PTR(p++)); -			ret |= __get_user_inatomic(data.v[2], SWIZ_PTR(p++)); -			ret |= __get_user_inatomic(data.v[3], SWIZ_PTR(p++));  		case 4: -			ret |= __get_user_inatomic(data.v[4], SWIZ_PTR(p++)); -			ret |= __get_user_inatomic(data.v[5], SWIZ_PTR(p++)); +			start = offsetof(union data, x32.low32); +			break;  		case 2: -			ret |= __get_user_inatomic(data.v[6], SWIZ_PTR(p++)); -			ret |= __get_user_inatomic(data.v[7], SWIZ_PTR(p++)); -			if (unlikely(ret)) -				return -EFAULT; +			start = offsetof(union data, x16.low16); +			break;  		} + +		data.ll = 0; +		ret = 0; +		p = (unsigned long)addr; + +		for (i = 0; i < nb; i++) +			ret |= __get_user_inatomic(data.v[start + i], +						   SWIZ_PTR(p++)); + +		if (unlikely(ret)) +			return -EFAULT; +  	} else if (flags & F) { -		data.dd = current->thread.TS_FPR(reg); +		data.ll = current->thread.TS_FPR(reg);  		if (flags & S) {  			/* Single-precision FP store requires conversion... */  #ifdef CONFIG_PPC_FPU  			preempt_disable();  			enable_kernel_fp(); -			cvt_df(&data.dd, (float *)&data.v[4]); +			cvt_df(&data.dd, (float *)&data.x32.low32);  			preempt_enable();  #else  			return 0; @@ -915,17 +971,13 @@ int fix_alignment(struct pt_regs *regs)  	if (flags & SW) {  		switch (nb) {  		case 8: -			SWAP(data.v[0], data.v[7]); -			SWAP(data.v[1], data.v[6]); -			SWAP(data.v[2], data.v[5]); -			SWAP(data.v[3], data.v[4]); +			data.ll = swab64(data.ll);  			break;  		case 4: -			SWAP(data.v[4], data.v[7]); -			SWAP(data.v[5], data.v[6]); +			data.x32.low32 = swab32(data.x32.low32);  			break;  		case 2: -			SWAP(data.v[6], data.v[7]); +			data.x16.low16 = swab16(data.x16.low16);  			break;  		}  	} @@ -947,7 +999,7 @@ int fix_alignment(struct pt_regs *regs)  #ifdef CONFIG_PPC_FPU  		preempt_disable();  		enable_kernel_fp(); -		cvt_fd((float *)&data.v[4], &data.dd); +		cvt_fd((float *)&data.x32.low32, &data.dd);  		preempt_enable();  #else  		return 0; @@ -957,25 +1009,28 @@ int fix_alignment(struct pt_regs *regs)  	/* Store result to memory or update registers */  	if (flags & ST) { -		ret = 0; -		p = (unsigned long) addr; +		unsigned int start = 0; +  		switch (nb) { -		case 8: -			ret |= __put_user_inatomic(data.v[0], SWIZ_PTR(p++)); -			ret |= __put_user_inatomic(data.v[1], SWIZ_PTR(p++)); -			ret |= __put_user_inatomic(data.v[2], SWIZ_PTR(p++)); -			ret |= __put_user_inatomic(data.v[3], SWIZ_PTR(p++));  		case 4: -			ret |= __put_user_inatomic(data.v[4], SWIZ_PTR(p++)); -			ret |= __put_user_inatomic(data.v[5], SWIZ_PTR(p++)); +			start = offsetof(union data, x32.low32); +			break;  		case 2: -			ret |= __put_user_inatomic(data.v[6], SWIZ_PTR(p++)); -			ret |= __put_user_inatomic(data.v[7], SWIZ_PTR(p++)); +			start = offsetof(union data, x16.low16); +			break;  		} + +		ret = 0; +		p = (unsigned long)addr; + +		for (i = 0; i < nb; i++) +			ret |= __put_user_inatomic(data.v[start + i], +						   SWIZ_PTR(p++)); +  		if (unlikely(ret))  			return -EFAULT;  	} else if (flags & F) -		current->thread.TS_FPR(reg) = data.dd; +		current->thread.TS_FPR(reg) = data.ll;  	else  		regs->gpr[reg] = data.ll; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 502c7a4e73f..f5995a91221 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -54,6 +54,7 @@  #endif  #if defined(CONFIG_KVM) && defined(CONFIG_PPC_BOOK3S)  #include <asm/kvm_book3s.h> +#include <asm/kvm_ppc.h>  #endif  #ifdef CONFIG_PPC32 @@ -90,16 +91,17 @@ int main(void)  	DEFINE(THREAD_NORMSAVES, offsetof(struct thread_struct, normsave[0]));  #endif  	DEFINE(THREAD_FPEXC_MODE, offsetof(struct thread_struct, fpexc_mode)); -	DEFINE(THREAD_FPR0, offsetof(struct thread_struct, fpr[0])); -	DEFINE(THREAD_FPSCR, offsetof(struct thread_struct, fpscr)); +	DEFINE(THREAD_FPSTATE, offsetof(struct thread_struct, fp_state)); +	DEFINE(THREAD_FPSAVEAREA, offsetof(struct thread_struct, fp_save_area)); +	DEFINE(FPSTATE_FPSCR, offsetof(struct thread_fp_state, fpscr));  #ifdef CONFIG_ALTIVEC -	DEFINE(THREAD_VR0, offsetof(struct thread_struct, vr[0])); +	DEFINE(THREAD_VRSTATE, offsetof(struct thread_struct, vr_state)); +	DEFINE(THREAD_VRSAVEAREA, offsetof(struct thread_struct, vr_save_area));  	DEFINE(THREAD_VRSAVE, offsetof(struct thread_struct, vrsave)); -	DEFINE(THREAD_VSCR, offsetof(struct thread_struct, vscr));  	DEFINE(THREAD_USED_VR, offsetof(struct thread_struct, used_vr)); +	DEFINE(VRSTATE_VSCR, offsetof(struct thread_vr_state, vscr));  #endif /* CONFIG_ALTIVEC */  #ifdef CONFIG_VSX -	DEFINE(THREAD_VSR0, offsetof(struct thread_struct, fpr));  	DEFINE(THREAD_USED_VSR, offsetof(struct thread_struct, used_vsr));  #endif /* CONFIG_VSX */  #ifdef CONFIG_PPC64 @@ -114,7 +116,7 @@ int main(void)  #endif /* CONFIG_SPE */  #endif /* CONFIG_PPC64 */  #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) -	DEFINE(THREAD_DBCR0, offsetof(struct thread_struct, dbcr0)); +	DEFINE(THREAD_DBCR0, offsetof(struct thread_struct, debug.dbcr0));  #endif  #ifdef CONFIG_KVM_BOOK3S_32_HANDLER  	DEFINE(THREAD_KVM_SVCPU, offsetof(struct thread_struct, kvm_shadow_vcpu)); @@ -143,20 +145,12 @@ int main(void)  	DEFINE(THREAD_TM_PPR, offsetof(struct thread_struct, tm_ppr));  	DEFINE(THREAD_TM_DSCR, offsetof(struct thread_struct, tm_dscr));  	DEFINE(PT_CKPT_REGS, offsetof(struct thread_struct, ckpt_regs)); -	DEFINE(THREAD_TRANSACT_VR0, offsetof(struct thread_struct, -					 transact_vr[0])); -	DEFINE(THREAD_TRANSACT_VSCR, offsetof(struct thread_struct, -					  transact_vscr)); +	DEFINE(THREAD_TRANSACT_VRSTATE, offsetof(struct thread_struct, +						 transact_vr));  	DEFINE(THREAD_TRANSACT_VRSAVE, offsetof(struct thread_struct,  					    transact_vrsave)); -	DEFINE(THREAD_TRANSACT_FPR0, offsetof(struct thread_struct, -					  transact_fpr[0])); -	DEFINE(THREAD_TRANSACT_FPSCR, offsetof(struct thread_struct, -					   transact_fpscr)); -#ifdef CONFIG_VSX -	DEFINE(THREAD_TRANSACT_VSR0, offsetof(struct thread_struct, -					  transact_fpr[0])); -#endif +	DEFINE(THREAD_TRANSACT_FPSTATE, offsetof(struct thread_struct, +						 transact_fp));  	/* Local pt_regs on stack for Transactional Memory funcs. */  	DEFINE(TM_FRAME_SIZE, STACK_FRAME_OVERHEAD +  	       sizeof(struct pt_regs) + 16); @@ -210,6 +204,15 @@ int main(void)  	DEFINE(PACA_MC_STACK, offsetof(struct paca_struct, mc_kstack));  	DEFINE(PACA_CRIT_STACK, offsetof(struct paca_struct, crit_kstack));  	DEFINE(PACA_DBG_STACK, offsetof(struct paca_struct, dbg_kstack)); +	DEFINE(PACA_TCD_PTR, offsetof(struct paca_struct, tcd_ptr)); + +	DEFINE(TCD_ESEL_NEXT, +		offsetof(struct tlb_core_data, esel_next)); +	DEFINE(TCD_ESEL_MAX, +		offsetof(struct tlb_core_data, esel_max)); +	DEFINE(TCD_ESEL_FIRST, +		offsetof(struct tlb_core_data, esel_first)); +	DEFINE(TCD_LOCK, offsetof(struct tlb_core_data, lock));  #endif /* CONFIG_PPC_BOOK3E */  #ifdef CONFIG_PPC_STD_MMU_64 @@ -239,15 +242,20 @@ int main(void)  	DEFINE(PACA_DTL_RIDX, offsetof(struct paca_struct, dtl_ridx));  #endif /* CONFIG_PPC_STD_MMU_64 */  	DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp)); +#ifdef CONFIG_PPC_BOOK3S_64 +	DEFINE(PACAMCEMERGSP, offsetof(struct paca_struct, mc_emergency_sp)); +	DEFINE(PACA_IN_MCE, offsetof(struct paca_struct, in_mce)); +#endif  	DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id));  	DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state)); +	DEFINE(PACA_DSCR, offsetof(struct paca_struct, dscr_default));  	DEFINE(PACA_STARTTIME, offsetof(struct paca_struct, starttime));  	DEFINE(PACA_STARTTIME_USER, offsetof(struct paca_struct, starttime_user));  	DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time));  	DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time));  	DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save));  	DEFINE(PACA_NAPSTATELOST, offsetof(struct paca_struct, nap_state_lost)); -	DEFINE(PACA_SPRG3, offsetof(struct paca_struct, sprg3)); +	DEFINE(PACA_SPRG_VDSO, offsetof(struct paca_struct, sprg_vdso));  #endif /* CONFIG_PPC64 */  	/* RTAS */ @@ -432,21 +440,19 @@ int main(void)  	DEFINE(VCPU_GUEST_PID, offsetof(struct kvm_vcpu, arch.pid));  	DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));  	DEFINE(VCPU_VRSAVE, offsetof(struct kvm_vcpu, arch.vrsave)); -	DEFINE(VCPU_FPRS, offsetof(struct kvm_vcpu, arch.fpr)); -	DEFINE(VCPU_FPSCR, offsetof(struct kvm_vcpu, arch.fpscr)); +	DEFINE(VCPU_FPRS, offsetof(struct kvm_vcpu, arch.fp.fpr));  #ifdef CONFIG_ALTIVEC -	DEFINE(VCPU_VRS, offsetof(struct kvm_vcpu, arch.vr)); -	DEFINE(VCPU_VSCR, offsetof(struct kvm_vcpu, arch.vscr)); -#endif -#ifdef CONFIG_VSX -	DEFINE(VCPU_VSRS, offsetof(struct kvm_vcpu, arch.vsr)); +	DEFINE(VCPU_VRS, offsetof(struct kvm_vcpu, arch.vr.vr));  #endif  	DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer));  	DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr));  	DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); +#ifdef CONFIG_PPC_BOOK3S +	DEFINE(VCPU_TAR, offsetof(struct kvm_vcpu, arch.tar)); +#endif  	DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));  	DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc)); -#ifdef CONFIG_KVM_BOOK3S_64_HV +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE  	DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.shregs.msr));  	DEFINE(VCPU_SRR0, offsetof(struct kvm_vcpu, arch.shregs.srr0));  	DEFINE(VCPU_SRR1, offsetof(struct kvm_vcpu, arch.shregs.srr1)); @@ -465,6 +471,9 @@ int main(void)  	DEFINE(VCPU_SHARED, offsetof(struct kvm_vcpu, arch.shared));  	DEFINE(VCPU_SHARED_MSR, offsetof(struct kvm_vcpu_arch_shared, msr));  	DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr)); +#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_PR_POSSIBLE) +	DEFINE(VCPU_SHAREDBE, offsetof(struct kvm_vcpu, arch.shared_big_endian)); +#endif  	DEFINE(VCPU_SHARED_MAS0, offsetof(struct kvm_vcpu_arch_shared, mas0));  	DEFINE(VCPU_SHARED_MAS1, offsetof(struct kvm_vcpu_arch_shared, mas1)); @@ -477,7 +486,7 @@ int main(void)  	DEFINE(KVM_LPID, offsetof(struct kvm, arch.lpid));  	/* book3s */ -#ifdef CONFIG_KVM_BOOK3S_64_HV +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE  	DEFINE(KVM_SDR1, offsetof(struct kvm, arch.sdr1));  	DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid));  	DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr)); @@ -496,11 +505,18 @@ int main(void)  	DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id));  	DEFINE(VCPU_PURR, offsetof(struct kvm_vcpu, arch.purr));  	DEFINE(VCPU_SPURR, offsetof(struct kvm_vcpu, arch.spurr)); +	DEFINE(VCPU_IC, offsetof(struct kvm_vcpu, arch.ic)); +	DEFINE(VCPU_VTB, offsetof(struct kvm_vcpu, arch.vtb));  	DEFINE(VCPU_DSCR, offsetof(struct kvm_vcpu, arch.dscr));  	DEFINE(VCPU_AMR, offsetof(struct kvm_vcpu, arch.amr));  	DEFINE(VCPU_UAMOR, offsetof(struct kvm_vcpu, arch.uamor)); +	DEFINE(VCPU_IAMR, offsetof(struct kvm_vcpu, arch.iamr));  	DEFINE(VCPU_CTRL, offsetof(struct kvm_vcpu, arch.ctrl));  	DEFINE(VCPU_DABR, offsetof(struct kvm_vcpu, arch.dabr)); +	DEFINE(VCPU_DABRX, offsetof(struct kvm_vcpu, arch.dabrx)); +	DEFINE(VCPU_DAWR, offsetof(struct kvm_vcpu, arch.dawr)); +	DEFINE(VCPU_DAWRX, offsetof(struct kvm_vcpu, arch.dawrx)); +	DEFINE(VCPU_CIABR, offsetof(struct kvm_vcpu, arch.ciabr));  	DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags));  	DEFINE(VCPU_DEC, offsetof(struct kvm_vcpu, arch.dec));  	DEFINE(VCPU_DEC_EXPIRES, offsetof(struct kvm_vcpu, arch.dec_expires)); @@ -509,27 +525,64 @@ int main(void)  	DEFINE(VCPU_PRODDED, offsetof(struct kvm_vcpu, arch.prodded));  	DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr));  	DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc)); +	DEFINE(VCPU_SPMC, offsetof(struct kvm_vcpu, arch.spmc)); +	DEFINE(VCPU_SIAR, offsetof(struct kvm_vcpu, arch.siar)); +	DEFINE(VCPU_SDAR, offsetof(struct kvm_vcpu, arch.sdar)); +	DEFINE(VCPU_SIER, offsetof(struct kvm_vcpu, arch.sier));  	DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb));  	DEFINE(VCPU_SLB_MAX, offsetof(struct kvm_vcpu, arch.slb_max));  	DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr));  	DEFINE(VCPU_FAULT_DSISR, offsetof(struct kvm_vcpu, arch.fault_dsisr));  	DEFINE(VCPU_FAULT_DAR, offsetof(struct kvm_vcpu, arch.fault_dar)); +	DEFINE(VCPU_INTR_MSR, offsetof(struct kvm_vcpu, arch.intr_msr));  	DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));  	DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap)); -	DEFINE(VCPU_PTID, offsetof(struct kvm_vcpu, arch.ptid));  	DEFINE(VCPU_CFAR, offsetof(struct kvm_vcpu, arch.cfar)); +	DEFINE(VCPU_PPR, offsetof(struct kvm_vcpu, arch.ppr)); +	DEFINE(VCPU_FSCR, offsetof(struct kvm_vcpu, arch.fscr)); +	DEFINE(VCPU_SHADOW_FSCR, offsetof(struct kvm_vcpu, arch.shadow_fscr)); +	DEFINE(VCPU_PSPB, offsetof(struct kvm_vcpu, arch.pspb)); +	DEFINE(VCPU_EBBHR, offsetof(struct kvm_vcpu, arch.ebbhr)); +	DEFINE(VCPU_EBBRR, offsetof(struct kvm_vcpu, arch.ebbrr)); +	DEFINE(VCPU_BESCR, offsetof(struct kvm_vcpu, arch.bescr)); +	DEFINE(VCPU_CSIGR, offsetof(struct kvm_vcpu, arch.csigr)); +	DEFINE(VCPU_TACR, offsetof(struct kvm_vcpu, arch.tacr)); +	DEFINE(VCPU_TCSCR, offsetof(struct kvm_vcpu, arch.tcscr)); +	DEFINE(VCPU_ACOP, offsetof(struct kvm_vcpu, arch.acop)); +	DEFINE(VCPU_WORT, offsetof(struct kvm_vcpu, arch.wort)); +	DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1));  	DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_count));  	DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count));  	DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest));  	DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads)); -	DEFINE(VCPU_SVCPU, offsetof(struct kvmppc_vcpu_book3s, shadow_vcpu) - -			   offsetof(struct kvmppc_vcpu_book3s, vcpu)); +	DEFINE(VCORE_KVM, offsetof(struct kvmppc_vcore, kvm)); +	DEFINE(VCORE_TB_OFFSET, offsetof(struct kvmppc_vcore, tb_offset)); +	DEFINE(VCORE_LPCR, offsetof(struct kvmppc_vcore, lpcr)); +	DEFINE(VCORE_PCR, offsetof(struct kvmppc_vcore, pcr)); +	DEFINE(VCORE_DPDES, offsetof(struct kvmppc_vcore, dpdes));  	DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige));  	DEFINE(VCPU_SLB_V, offsetof(struct kvmppc_slb, origv));  	DEFINE(VCPU_SLB_SIZE, sizeof(struct kvmppc_slb)); +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +	DEFINE(VCPU_TFHAR, offsetof(struct kvm_vcpu, arch.tfhar)); +	DEFINE(VCPU_TFIAR, offsetof(struct kvm_vcpu, arch.tfiar)); +	DEFINE(VCPU_TEXASR, offsetof(struct kvm_vcpu, arch.texasr)); +	DEFINE(VCPU_GPR_TM, offsetof(struct kvm_vcpu, arch.gpr_tm)); +	DEFINE(VCPU_FPRS_TM, offsetof(struct kvm_vcpu, arch.fp_tm.fpr)); +	DEFINE(VCPU_VRS_TM, offsetof(struct kvm_vcpu, arch.vr_tm.vr)); +	DEFINE(VCPU_VRSAVE_TM, offsetof(struct kvm_vcpu, arch.vrsave_tm)); +	DEFINE(VCPU_CR_TM, offsetof(struct kvm_vcpu, arch.cr_tm)); +	DEFINE(VCPU_LR_TM, offsetof(struct kvm_vcpu, arch.lr_tm)); +	DEFINE(VCPU_CTR_TM, offsetof(struct kvm_vcpu, arch.ctr_tm)); +	DEFINE(VCPU_AMR_TM, offsetof(struct kvm_vcpu, arch.amr_tm)); +	DEFINE(VCPU_PPR_TM, offsetof(struct kvm_vcpu, arch.ppr_tm)); +	DEFINE(VCPU_DSCR_TM, offsetof(struct kvm_vcpu, arch.dscr_tm)); +	DEFINE(VCPU_TAR_TM, offsetof(struct kvm_vcpu, arch.tar_tm)); +#endif  #ifdef CONFIG_PPC_BOOK3S_64 -#ifdef CONFIG_KVM_BOOK3S_PR +#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE +	DEFINE(PACA_SVCPU, offsetof(struct paca_struct, shadow_vcpu));  # define SVCPU_FIELD(x, f)	DEFINE(x, offsetof(struct paca_struct, shadow_vcpu.f))  #else  # define SVCPU_FIELD(x, f) @@ -569,6 +622,7 @@ int main(void)  #ifdef CONFIG_PPC64  	SVCPU_FIELD(SVCPU_SLB, slb);  	SVCPU_FIELD(SVCPU_SLB_MAX, slb_max); +	SVCPU_FIELD(SVCPU_SHADOW_FSCR, shadow_fscr);  #endif  	HSTATE_FIELD(HSTATE_HOST_R1, host_r1); @@ -577,11 +631,12 @@ int main(void)  	HSTATE_FIELD(HSTATE_VMHANDLER, vmhandler);  	HSTATE_FIELD(HSTATE_SCRATCH0, scratch0);  	HSTATE_FIELD(HSTATE_SCRATCH1, scratch1); +	HSTATE_FIELD(HSTATE_SCRATCH2, scratch2);  	HSTATE_FIELD(HSTATE_IN_GUEST, in_guest);  	HSTATE_FIELD(HSTATE_RESTORE_HID5, restore_hid5);  	HSTATE_FIELD(HSTATE_NAPPING, napping); -#ifdef CONFIG_KVM_BOOK3S_64_HV +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE  	HSTATE_FIELD(HSTATE_HWTHREAD_REQ, hwthread_req);  	HSTATE_FIELD(HSTATE_HWTHREAD_STATE, hwthread_state);  	HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu); @@ -589,6 +644,7 @@ int main(void)  	HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys);  	HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr);  	HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi); +	HSTATE_FIELD(HSTATE_PTID, ptid);  	HSTATE_FIELD(HSTATE_MMCR, host_mmcr);  	HSTATE_FIELD(HSTATE_PMC, host_pmc);  	HSTATE_FIELD(HSTATE_PURR, host_purr); @@ -597,10 +653,12 @@ int main(void)  	HSTATE_FIELD(HSTATE_DABR, dabr);  	HSTATE_FIELD(HSTATE_DECEXP, dec_expires);  	DEFINE(IPI_PRIORITY, IPI_PRIORITY); -#endif /* CONFIG_KVM_BOOK3S_64_HV */ +#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */  #ifdef CONFIG_PPC_BOOK3S_64  	HSTATE_FIELD(HSTATE_CFAR, cfar); +	HSTATE_FIELD(HSTATE_PPR, ppr); +	HSTATE_FIELD(HSTATE_HOST_FSCR, host_fscr);  #endif /* CONFIG_PPC_BOOK3S_64 */  #else /* CONFIG_PPC_BOOK3S */ diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index 65493272787..40198d50b4c 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -12,7 +12,6 @@  #include <linux/cpu.h>  #include <linux/cpumask.h> -#include <linux/init.h>  #include <linux/kernel.h>  #include <linux/kobject.h>  #include <linux/list.h> @@ -757,7 +756,10 @@ void cacheinfo_cpu_online(unsigned int cpu_id)  	cacheinfo_sysfs_populate(cpu_id, cache);  } -#ifdef CONFIG_HOTPLUG_CPU /* functions needed for cpu offline */ +/* functions needed to remove cache entry for cpu offline or suspend/resume */ + +#if (defined(CONFIG_PPC_PSERIES) && defined(CONFIG_SUSPEND)) || \ +    defined(CONFIG_HOTPLUG_CPU)  static struct cache *cache_lookup_by_cpu(unsigned int cpu_id)  { @@ -794,6 +796,9 @@ static void remove_cache_dir(struct cache_dir *cache_dir)  {  	remove_index_dirs(cache_dir); +	/* Remove cache dir from sysfs */ +	kobject_del(cache_dir->kobj); +  	kobject_put(cache_dir->kobj);  	kfree(cache_dir); @@ -841,4 +846,4 @@ void cacheinfo_cpu_offline(unsigned int cpu_id)  	if (cache)  		cache_cpu_clear(cache, cpu_id);  } -#endif /* CONFIG_HOTPLUG_CPU */ +#endif /* (CONFIG_PPC_PSERIES && CONFIG_SUSPEND) || CONFIG_HOTPLUG_CPU */ diff --git a/arch/powerpc/kernel/clock.c b/arch/powerpc/kernel/clock.c deleted file mode 100644 index a764b47791e..00000000000 --- a/arch/powerpc/kernel/clock.c +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Dummy clk implementations for powerpc. - * These need to be overridden in platform code. - */ - -#include <linux/clk.h> -#include <linux/err.h> -#include <linux/errno.h> -#include <linux/export.h> -#include <asm/clk_interface.h> - -struct clk_interface clk_functions; - -struct clk *clk_get(struct device *dev, const char *id) -{ -	if (clk_functions.clk_get) -		return clk_functions.clk_get(dev, id); -	return ERR_PTR(-ENOSYS); -} -EXPORT_SYMBOL(clk_get); - -void clk_put(struct clk *clk) -{ -	if (clk_functions.clk_put) -		clk_functions.clk_put(clk); -} -EXPORT_SYMBOL(clk_put); - -int clk_enable(struct clk *clk) -{ -	if (clk_functions.clk_enable) -		return clk_functions.clk_enable(clk); -	return -ENOSYS; -} -EXPORT_SYMBOL(clk_enable); - -void clk_disable(struct clk *clk) -{ -	if (clk_functions.clk_disable) -		clk_functions.clk_disable(clk); -} -EXPORT_SYMBOL(clk_disable); - -unsigned long clk_get_rate(struct clk *clk) -{ -	if (clk_functions.clk_get_rate) -		return clk_functions.clk_get_rate(clk); -	return 0; -} -EXPORT_SYMBOL(clk_get_rate); - -long clk_round_rate(struct clk *clk, unsigned long rate) -{ -	if (clk_functions.clk_round_rate) -		return clk_functions.clk_round_rate(clk, rate); -	return -ENOSYS; -} -EXPORT_SYMBOL(clk_round_rate); - -int clk_set_rate(struct clk *clk, unsigned long rate) -{ -	if (clk_functions.clk_set_rate) -		return clk_functions.clk_set_rate(clk, rate); -	return -ENOSYS; -} -EXPORT_SYMBOL(clk_set_rate); - -struct clk *clk_get_parent(struct clk *clk) -{ -	if (clk_functions.clk_get_parent) -		return clk_functions.clk_get_parent(clk); -	return ERR_PTR(-ENOSYS); -} -EXPORT_SYMBOL(clk_get_parent); - -int clk_set_parent(struct clk *clk, struct clk *parent) -{ -	if (clk_functions.clk_set_parent) -		return clk_functions.clk_set_parent(clk, parent); -	return -ENOSYS; -} -EXPORT_SYMBOL(clk_set_parent); diff --git a/arch/powerpc/kernel/cpu_setup_a2.S b/arch/powerpc/kernel/cpu_setup_a2.S deleted file mode 100644 index 61f079e05b6..00000000000 --- a/arch/powerpc/kernel/cpu_setup_a2.S +++ /dev/null @@ -1,120 +0,0 @@ -/* - *  A2 specific assembly support code - * - *  Copyright 2009 Ben Herrenschmidt, IBM Corp. - * - *  This program is free software; you can redistribute it and/or - *  modify it under the terms of the GNU General Public License - *  as published by the Free Software Foundation; either version - *  2 of the License, or (at your option) any later version. - */ - -#include <asm/asm-offsets.h> -#include <asm/ppc_asm.h> -#include <asm/ppc-opcode.h> -#include <asm/processor.h> -#include <asm/reg_a2.h> -#include <asm/reg.h> -#include <asm/thread_info.h> - -/* - * Disable thdid and class fields in ERATs to bump PID to full 14 bits capacity. - * This also prevents external LPID accesses but that isn't a problem when not a - * guest. Under PV, this setting will be ignored and MMUCR will return the right - * number of PID bits we can use. - */ -#define MMUCR1_EXTEND_PID \ -	(MMUCR1_ICTID | MMUCR1_ITTID | MMUCR1_DCTID | \ -	 MMUCR1_DTTID | MMUCR1_DCCD) - -/* - * Use extended PIDs if enabled. - * Don't clear the ERATs on context sync events and enable I & D LRU. - * Enable ERAT back invalidate when tlbwe overwrites an entry. - */ -#define INITIAL_MMUCR1 \ -	(MMUCR1_EXTEND_PID | MMUCR1_CSINV_NEVER | MMUCR1_IRRE | \ -	 MMUCR1_DRRE | MMUCR1_TLBWE_BINV) - -_GLOBAL(__setup_cpu_a2) -	/* Some of these are actually thread local and some are -	 * core local but doing it always won't hurt -	 */ - -#ifdef CONFIG_PPC_ICSWX -	/* Make sure ACOP starts out as zero */ -	li	r3,0 -	mtspr   SPRN_ACOP,r3 - -	/* Skip the following if we are in Guest mode */ -	mfmsr	r3 -	andis.	r0,r3,MSR_GS@h -	bne	_icswx_skip_guest - -	/* Enable icswx instruction */ -	mfspr   r3,SPRN_A2_CCR2 -	ori     r3,r3,A2_CCR2_ENABLE_ICSWX -	mtspr   SPRN_A2_CCR2,r3 - -	/* Unmask all CTs in HACOP */ -	li      r3,-1 -	mtspr   SPRN_HACOP,r3 -_icswx_skip_guest: -#endif /* CONFIG_PPC_ICSWX */ - -	/* Enable doorbell */ -	mfspr   r3,SPRN_A2_CCR2 -	oris     r3,r3,A2_CCR2_ENABLE_PC@h -	mtspr   SPRN_A2_CCR2,r3 -	isync - -	/* Setup CCR0 to disable power saving for now as it's busted -	 * in the current implementations. Setup CCR1 to wake on -	 * interrupts normally (we write the default value but who -	 * knows what FW may have clobbered...) -	 */ -	li	r3,0 -	mtspr	SPRN_A2_CCR0, r3 -	LOAD_REG_IMMEDIATE(r3,0x0f0f0f0f) -	mtspr	SPRN_A2_CCR1, r3 - -	/* Initialise MMUCR1 */ -	lis	r3,INITIAL_MMUCR1@h -	ori	r3,r3,INITIAL_MMUCR1@l -	mtspr	SPRN_MMUCR1,r3 - -	/* Set MMUCR2 to enable 4K, 64K, 1M, 16M and 1G pages */ -	LOAD_REG_IMMEDIATE(r3, 0x000a7531) -	mtspr	SPRN_MMUCR2,r3 - -	/* Set MMUCR3 to write all thids bit to the TLB */ -	LOAD_REG_IMMEDIATE(r3, 0x0000000f) -	mtspr	SPRN_MMUCR3,r3 - -	/* Don't do ERAT stuff if running guest mode */ -	mfmsr	r3 -	andis.	r0,r3,MSR_GS@h -	bne	1f - -	/* Now set the I-ERAT watermark to 15 */ -	lis	r4,(MMUCR0_TLBSEL_I|MMUCR0_ECL)@h -	mtspr	SPRN_MMUCR0, r4 -	li	r4,A2_IERAT_SIZE-1 -	PPC_ERATWE(R4,R4,3) - -	/* Now set the D-ERAT watermark to 31 */ -	lis	r4,(MMUCR0_TLBSEL_D|MMUCR0_ECL)@h -	mtspr	SPRN_MMUCR0, r4 -	li	r4,A2_DERAT_SIZE-1 -	PPC_ERATWE(R4,R4,3) - -	/* And invalidate the beast just in case. That won't get rid of -	 * a bolted entry though it will be in LRU and so will go away eventually -	 * but let's not bother for now -	 */ -	PPC_ERATILX(0,0,R0) -1: -	blr - -_GLOBAL(__restore_cpu_a2) -	b	__setup_cpu_a2 diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S index bfb18c7290b..4f1393d2007 100644 --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S +++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S @@ -53,11 +53,57 @@ _GLOBAL(__e500_dcache_setup)  	isync  	blr +/* + * FIXME - we haven't yet done testing to determine a reasonable default + * value for PW20_WAIT_IDLE_BIT. + */ +#define PW20_WAIT_IDLE_BIT		50 /* 1ms, TB frequency is 41.66MHZ */ +_GLOBAL(setup_pw20_idle) +	mfspr	r3, SPRN_PWRMGTCR0 + +	/* Set PW20_WAIT bit, enable pw20 state*/ +	ori	r3, r3, PWRMGTCR0_PW20_WAIT +	li	r11, PW20_WAIT_IDLE_BIT + +	/* Set Automatic PW20 Core Idle Count */ +	rlwimi	r3, r11, PWRMGTCR0_PW20_ENT_SHIFT, PWRMGTCR0_PW20_ENT + +	mtspr	SPRN_PWRMGTCR0, r3 + +	blr + +/* + * FIXME - we haven't yet done testing to determine a reasonable default + * value for AV_WAIT_IDLE_BIT. + */ +#define AV_WAIT_IDLE_BIT		50 /* 1ms, TB frequency is 41.66MHZ */ +_GLOBAL(setup_altivec_idle) +	mfspr	r3, SPRN_PWRMGTCR0 + +	/* Enable Altivec Idle */ +	oris	r3, r3, PWRMGTCR0_AV_IDLE_PD_EN@h +	li	r11, AV_WAIT_IDLE_BIT + +	/* Set Automatic AltiVec Idle Count */ +	rlwimi	r3, r11, PWRMGTCR0_AV_IDLE_CNT_SHIFT, PWRMGTCR0_AV_IDLE_CNT + +	mtspr	SPRN_PWRMGTCR0, r3 + +	blr +  _GLOBAL(__setup_cpu_e6500)  	mflr	r6  #ifdef CONFIG_PPC64 -	bl	.setup_altivec_ivors +	bl	setup_altivec_ivors +	/* Touch IVOR42 only if the CPU supports E.HV category */ +	mfspr	r10,SPRN_MMUCFG +	rlwinm.	r10,r10,0,MMUCFG_LPIDSIZE +	beq	1f +	bl	setup_lrat_ivor +1:  #endif +	bl	setup_pw20_idle +	bl	setup_altivec_idle  	bl	__setup_cpu_e5500  	mtlr	r6  	blr @@ -118,7 +164,15 @@ _GLOBAL(__setup_cpu_e5500)  #ifdef CONFIG_PPC_BOOK3E_64  _GLOBAL(__restore_cpu_e6500)  	mflr	r5 -	bl	.setup_altivec_ivors +	bl	setup_altivec_ivors +	/* Touch IVOR42 only if the CPU supports E.HV category */ +	mfspr	r10,SPRN_MMUCFG +	rlwinm.	r10,r10,0,MMUCFG_LPIDSIZE +	beq	1f +	bl	setup_lrat_ivor +1: +	bl	setup_pw20_idle +	bl	setup_altivec_idle  	bl	__restore_cpu_e5500  	mtlr	r5  	blr @@ -127,9 +181,9 @@ _GLOBAL(__restore_cpu_e5500)  	mflr	r4  	bl	__e500_icache_setup  	bl	__e500_dcache_setup -	bl	.__setup_base_ivors -	bl	.setup_perfmon_ivor -	bl	.setup_doorbell_ivors +	bl	__setup_base_ivors +	bl	setup_perfmon_ivor +	bl	setup_doorbell_ivors  	/*  	 * We only want to touch IVOR38-41 if we're running on hardware  	 * that supports category E.HV.  The architectural way to determine @@ -138,7 +192,7 @@ _GLOBAL(__restore_cpu_e5500)  	mfspr	r10,SPRN_MMUCFG  	rlwinm.	r10,r10,0,MMUCFG_LPIDSIZE  	beq	1f -	bl	.setup_ehv_ivors +	bl	setup_ehv_ivors  1:  	mtlr	r4  	blr @@ -147,9 +201,9 @@ _GLOBAL(__setup_cpu_e5500)  	mflr	r5  	bl	__e500_icache_setup  	bl	__e500_dcache_setup -	bl	.__setup_base_ivors -	bl	.setup_perfmon_ivor -	bl	.setup_doorbell_ivors +	bl	__setup_base_ivors +	bl	setup_perfmon_ivor +	bl	setup_doorbell_ivors  	/*  	 * We only want to touch IVOR38-41 if we're running on hardware  	 * that supports category E.HV.  The architectural way to determine @@ -158,7 +212,7 @@ _GLOBAL(__setup_cpu_e5500)  	mfspr	r10,SPRN_MMUCFG  	rlwinm.	r10,r10,0,MMUCFG_LPIDSIZE  	beq	1f -	bl	.setup_ehv_ivors +	bl	setup_ehv_ivors  	b	2f  1:  	ld	r10,CPU_SPEC_FEATURES(r4) diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index 18b5b9cf8e3..46733535cc0 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -29,7 +29,7 @@ _GLOBAL(__setup_cpu_power7)  	mtspr	SPRN_LPID,r0  	mfspr	r3,SPRN_LPCR  	bl	__init_LPCR -	bl	__init_TLB +	bl	__init_tlb_power7  	mtlr	r11  	blr @@ -42,7 +42,7 @@ _GLOBAL(__restore_cpu_power7)  	mtspr	SPRN_LPID,r0  	mfspr	r3,SPRN_LPCR  	bl	__init_LPCR -	bl	__init_TLB +	bl	__init_tlb_power7  	mtlr	r11  	blr @@ -56,10 +56,10 @@ _GLOBAL(__setup_cpu_power8)  	li	r0,0  	mtspr	SPRN_LPID,r0  	mfspr	r3,SPRN_LPCR -	oris	r3, r3, LPCR_AIL_3@h +	ori	r3, r3, LPCR_PECEDH  	bl	__init_LPCR  	bl	__init_HFSCR -	bl	__init_TLB +	bl	__init_tlb_power8  	bl	__init_PMU_HV  	mtlr	r11  	blr @@ -75,10 +75,10 @@ _GLOBAL(__restore_cpu_power8)  	li	r0,0  	mtspr	SPRN_LPID,r0  	mfspr   r3,SPRN_LPCR -	oris	r3, r3, LPCR_AIL_3@h +	ori	r3, r3, LPCR_PECEDH  	bl	__init_LPCR  	bl	__init_HFSCR -	bl	__init_TLB +	bl	__init_tlb_power8  	bl	__init_PMU_HV  	mtlr	r11  	blr @@ -134,15 +134,31 @@ __init_HFSCR:  	mtspr	SPRN_HFSCR,r3  	blr -__init_TLB: -	/* -	 * Clear the TLB using the "IS 3" form of tlbiel instruction -	 * (invalidate by congruence class). P7 has 128 CCs, P8 has 512 -	 * so we just always do 512 -	 */ +/* + * Clear the TLB using the specified IS form of tlbiel instruction + * (invalidate by congruence class). P7 has 128 CCs., P8 has 512. + * + * r3 = IS field + */ +__init_tlb_power7: +	li	r3,0xc00	/* IS field = 0b11 */ +_GLOBAL(__flush_tlb_power7) +	li	r6,128 +	mtctr	r6 +	mr	r7,r3		/* IS field */ +	ptesync +2:	tlbiel	r7 +	addi	r7,r7,0x1000 +	bdnz	2b +	ptesync +1:	blr + +__init_tlb_power8: +	li	r3,0xc00	/* IS field = 0b11 */ +_GLOBAL(__flush_tlb_power8)  	li	r6,512  	mtctr	r6 -	li	r7,0xc00	/* IS field = 0b11 */ +	mr	r7,r3		/* IS field */  	ptesync  2:	tlbiel	r7  	addi	r7,r7,0x1000 diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 597d954e586..0c157642c2a 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -71,6 +71,10 @@ extern void __restore_cpu_power7(void);  extern void __setup_cpu_power8(unsigned long offset, struct cpu_spec* spec);  extern void __restore_cpu_power8(void);  extern void __restore_cpu_a2(void); +extern void __flush_tlb_power7(unsigned long inval_selector); +extern void __flush_tlb_power8(unsigned long inval_selector); +extern long __machine_check_early_realmode_p7(struct pt_regs *regs); +extern long __machine_check_early_realmode_p8(struct pt_regs *regs);  #endif /* CONFIG_PPC64 */  #if defined(CONFIG_E500)  extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec); @@ -105,7 +109,8 @@ extern void __restore_cpu_e6500(void);  				 PPC_FEATURE_PSERIES_PERFMON_COMPAT)  #define COMMON_USER2_POWER8	(PPC_FEATURE2_ARCH_2_07 | \  				 PPC_FEATURE2_HTM_COMP | PPC_FEATURE2_DSCR | \ -				 PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR) +				 PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR | \ +				 PPC_FEATURE2_VEC_CRYPTO)  #define COMMON_USER_PA6T	(COMMON_USER_PPC64 | PPC_FEATURE_PA6T |\  				 PPC_FEATURE_TRUE_LE | \  				 PPC_FEATURE_HAS_ALTIVEC_COMP) @@ -440,6 +445,8 @@ static struct cpu_spec __initdata cpu_specs[] = {  		.oprofile_cpu_type	= "ppc64/ibm-compat-v1",  		.cpu_setup		= __setup_cpu_power7,  		.cpu_restore		= __restore_cpu_power7, +		.flush_tlb		= __flush_tlb_power7, +		.machine_check_early	= __machine_check_early_realmode_p7,  		.platform		= "power7",  	},  	{	/* 2.07-compliant processor, i.e. Power8 "architected" mode */ @@ -456,6 +463,8 @@ static struct cpu_spec __initdata cpu_specs[] = {  		.oprofile_cpu_type	= "ppc64/ibm-compat-v1",  		.cpu_setup		= __setup_cpu_power8,  		.cpu_restore		= __restore_cpu_power8, +		.flush_tlb		= __flush_tlb_power8, +		.machine_check_early	= __machine_check_early_realmode_p8,  		.platform		= "power8",  	},  	{	/* Power7 */ @@ -474,6 +483,8 @@ static struct cpu_spec __initdata cpu_specs[] = {  		.oprofile_type		= PPC_OPROFILE_POWER4,  		.cpu_setup		= __setup_cpu_power7,  		.cpu_restore		= __restore_cpu_power7, +		.flush_tlb		= __flush_tlb_power7, +		.machine_check_early	= __machine_check_early_realmode_p7,  		.platform		= "power7",  	},  	{	/* Power7+ */ @@ -492,13 +503,35 @@ static struct cpu_spec __initdata cpu_specs[] = {  		.oprofile_type		= PPC_OPROFILE_POWER4,  		.cpu_setup		= __setup_cpu_power7,  		.cpu_restore		= __restore_cpu_power7, +		.flush_tlb		= __flush_tlb_power7, +		.machine_check_early	= __machine_check_early_realmode_p7,  		.platform		= "power7+",  	},  	{	/* Power8E */  		.pvr_mask		= 0xffff0000,  		.pvr_value		= 0x004b0000,  		.cpu_name		= "POWER8E (raw)", -		.cpu_features		= CPU_FTRS_POWER8, +		.cpu_features		= CPU_FTRS_POWER8E, +		.cpu_user_features	= COMMON_USER_POWER8, +		.cpu_user_features2	= COMMON_USER2_POWER8, +		.mmu_features		= MMU_FTRS_POWER8, +		.icache_bsize		= 128, +		.dcache_bsize		= 128, +		.num_pmcs		= 6, +		.pmc_type		= PPC_PMC_IBM, +		.oprofile_cpu_type	= "ppc64/power8", +		.oprofile_type		= PPC_OPROFILE_INVALID, +		.cpu_setup		= __setup_cpu_power8, +		.cpu_restore		= __restore_cpu_power8, +		.flush_tlb		= __flush_tlb_power8, +		.machine_check_early	= __machine_check_early_realmode_p8, +		.platform		= "power8", +	}, +	{	/* Power8 DD1: Does not support doorbell IPIs */ +		.pvr_mask		= 0xffffff00, +		.pvr_value		= 0x004d0100, +		.cpu_name		= "POWER8 (raw)", +		.cpu_features		= CPU_FTRS_POWER8_DD1,  		.cpu_user_features	= COMMON_USER_POWER8,  		.cpu_user_features2	= COMMON_USER2_POWER8,  		.mmu_features		= MMU_FTRS_POWER8, @@ -510,6 +543,8 @@ static struct cpu_spec __initdata cpu_specs[] = {  		.oprofile_type		= PPC_OPROFILE_INVALID,  		.cpu_setup		= __setup_cpu_power8,  		.cpu_restore		= __restore_cpu_power8, +		.flush_tlb		= __flush_tlb_power8, +		.machine_check_early	= __machine_check_early_realmode_p8,  		.platform		= "power8",  	},  	{	/* Power8 */ @@ -528,6 +563,8 @@ static struct cpu_spec __initdata cpu_specs[] = {  		.oprofile_type		= PPC_OPROFILE_INVALID,  		.cpu_setup		= __setup_cpu_power8,  		.cpu_restore		= __restore_cpu_power8, +		.flush_tlb		= __flush_tlb_power8, +		.machine_check_early	= __machine_check_early_realmode_p8,  		.platform		= "power8",  	},  	{	/* Cell Broadband Engine */ @@ -2132,44 +2169,6 @@ static struct cpu_spec __initdata cpu_specs[] = {  	}  #endif /* CONFIG_PPC32 */  #endif /* CONFIG_E500 */ - -#ifdef CONFIG_PPC_A2 -	{	/* Standard A2 (>= DD2) + FPU core */ -		.pvr_mask		= 0xffff0000, -		.pvr_value		= 0x00480000, -		.cpu_name		= "A2 (>= DD2)", -		.cpu_features		= CPU_FTRS_A2, -		.cpu_user_features	= COMMON_USER_PPC64, -		.mmu_features		= MMU_FTRS_A2, -		.icache_bsize		= 64, -		.dcache_bsize		= 64, -		.num_pmcs		= 0, -		.cpu_setup		= __setup_cpu_a2, -		.cpu_restore		= __restore_cpu_a2, -		.machine_check		= machine_check_generic, -		.platform		= "ppca2", -	}, -	{	/* This is a default entry to get going, to be replaced by -		 * a real one at some stage -		 */ -#define CPU_FTRS_BASE_BOOK3E	(CPU_FTR_USE_TB | \ -	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_SMT | \ -	    CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE) -		.pvr_mask		= 0x00000000, -		.pvr_value		= 0x00000000, -		.cpu_name		= "Book3E", -		.cpu_features		= CPU_FTRS_BASE_BOOK3E, -		.cpu_user_features	= COMMON_USER_PPC64, -		.mmu_features		= MMU_FTR_TYPE_3E | MMU_FTR_USE_TLBILX | -					  MMU_FTR_USE_TLBIVAX_BCAST | -					  MMU_FTR_LOCK_BCAST_INVAL, -		.icache_bsize		= 64, -		.dcache_bsize		= 64, -		.num_pmcs		= 0, -		.machine_check		= machine_check_generic, -		.platform		= "power6", -	}, -#endif /* CONFIG_PPC_A2 */  };  static struct cpu_spec the_cpu_spec; diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index fdcd8f551af..51dbace3269 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -17,7 +17,6 @@  #include <linux/export.h>  #include <linux/crash_dump.h>  #include <linux/delay.h> -#include <linux/init.h>  #include <linux/irq.h>  #include <linux/types.h> @@ -82,7 +81,7 @@ void crash_ipi_callback(struct pt_regs *regs)  	}  	atomic_inc(&cpus_in_crash); -	smp_mb__after_atomic_inc(); +	smp_mb__after_atomic();  	/*  	 * Starting the kdump boot. diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index 779a78c2643..7a13f378ca2 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -98,17 +98,19 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,  			size_t csize, unsigned long offset, int userbuf)  {  	void  *vaddr; +	phys_addr_t paddr;  	if (!csize)  		return 0;  	csize = min_t(size_t, csize, PAGE_SIZE); +	paddr = pfn << PAGE_SHIFT; -	if ((min_low_pfn < pfn) && (pfn < max_pfn)) { -		vaddr = __va(pfn << PAGE_SHIFT); +	if (memblock_is_region_memory(paddr, csize)) { +		vaddr = __va(paddr);  		csize = copy_oldmem_vaddr(vaddr, buf, csize, offset, userbuf);  	} else { -		vaddr = __ioremap(pfn << PAGE_SHIFT, PAGE_SIZE, 0); +		vaddr = __ioremap(paddr, PAGE_SIZE, 0);  		csize = copy_oldmem_vaddr(vaddr, buf, csize, offset, userbuf);  		iounmap(vaddr);  	} @@ -124,15 +126,15 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,  void crash_free_reserved_phys_range(unsigned long begin, unsigned long end)  {  	unsigned long addr; -	const u32 *basep, *sizep; +	const __be32 *basep, *sizep;  	unsigned int rtas_start = 0, rtas_end = 0;  	basep = of_get_property(rtas.dev, "linux,rtas-base", NULL);  	sizep = of_get_property(rtas.dev, "rtas-size", NULL);  	if (basep && sizep) { -		rtas_start = *basep; -		rtas_end = *basep + *sizep; +		rtas_start = be32_to_cpup(basep); +		rtas_end = rtas_start + be32_to_cpup(sizep);  	}  	for (addr = begin; addr < end; addr += PAGE_SIZE) { diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index e4897523de4..54d0116256f 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -83,10 +83,10 @@ static int dma_iommu_dma_supported(struct device *dev, u64 mask)  		return 0;  	} -	if (tbl->it_offset > (mask >> IOMMU_PAGE_SHIFT)) { +	if (tbl->it_offset > (mask >> tbl->it_page_shift)) {  		dev_info(dev, "Warning: IOMMU offset too big for device mask\n");  		dev_info(dev, "mask: 0x%08llx, table offset: 0x%08lx\n", -				mask, tbl->it_offset << IOMMU_PAGE_SHIFT); +				mask, tbl->it_offset << tbl->it_page_shift);  		return 0;  	} else  		return 1; diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index 8032b97ccdc..ee78f6e49d6 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -191,12 +191,10 @@ EXPORT_SYMBOL(dma_direct_ops);  #define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) -int dma_set_mask(struct device *dev, u64 dma_mask) +int __dma_set_mask(struct device *dev, u64 dma_mask)  {  	struct dma_map_ops *dma_ops = get_dma_ops(dev); -	if (ppc_md.dma_set_mask) -		return ppc_md.dma_set_mask(dev, dma_mask);  	if ((dma_ops != NULL) && (dma_ops->set_dma_mask != NULL))  		return dma_ops->set_dma_mask(dev, dma_mask);  	if (!dev->dma_mask || !dma_supported(dev, dma_mask)) @@ -204,6 +202,12 @@ int dma_set_mask(struct device *dev, u64 dma_mask)  	*dev->dma_mask = dma_mask;  	return 0;  } +int dma_set_mask(struct device *dev, u64 dma_mask) +{ +	if (ppc_md.dma_set_mask) +		return ppc_md.dma_set_mask(dev, dma_mask); +	return __dma_set_mask(dev, dma_mask); +}  EXPORT_SYMBOL(dma_set_mask);  u64 dma_get_required_mask(struct device *dev) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 55593ee2d5a..86e25702aac 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -22,18 +22,21 @@   */  #include <linux/delay.h> +#include <linux/debugfs.h>  #include <linux/sched.h>  #include <linux/init.h>  #include <linux/list.h>  #include <linux/pci.h>  #include <linux/proc_fs.h>  #include <linux/rbtree.h> +#include <linux/reboot.h>  #include <linux/seq_file.h>  #include <linux/spinlock.h>  #include <linux/export.h>  #include <linux/of.h>  #include <linux/atomic.h> +#include <asm/debug.h>  #include <asm/eeh.h>  #include <asm/eeh_event.h>  #include <asm/io.h> @@ -84,24 +87,23 @@  #define EEH_MAX_FAILS	2100000  /* Time to wait for a PCI slot to report status, in milliseconds */ -#define PCI_BUS_RESET_WAIT_MSEC (60*1000) - -/* Platform dependent EEH operations */ -struct eeh_ops *eeh_ops = NULL; - -int eeh_subsystem_enabled; -EXPORT_SYMBOL(eeh_subsystem_enabled); +#define PCI_BUS_RESET_WAIT_MSEC (5*60*1000)  /* - * EEH probe mode support. The intention is to support multiple - * platforms for EEH. Some platforms like pSeries do PCI emunation - * based on device tree. However, other platforms like powernv probe - * PCI devices from hardware. The flag is used to distinguish that. - * In addition, struct eeh_ops::probe would be invoked for particular - * OF node or PCI device so that the corresponding PE would be created - * there. + * EEH probe mode support, which is part of the flags, + * is to support multiple platforms for EEH. Some platforms + * like pSeries do PCI emunation based on device tree. + * However, other platforms like powernv probe PCI devices + * from hardware. The flag is used to distinguish that. + * In addition, struct eeh_ops::probe would be invoked for + * particular OF node or PCI device so that the corresponding + * PE would be created there.   */ -int eeh_probe_mode; +int eeh_subsystem_flags; +EXPORT_SYMBOL(eeh_subsystem_flags); + +/* Platform dependent EEH operations */ +struct eeh_ops *eeh_ops = NULL;  /* Lock to avoid races due to multiple reports of an error */  DEFINE_RAW_SPINLOCK(confirm_error_lock); @@ -132,6 +134,15 @@ static struct eeh_stats eeh_stats;  #define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE) +static int __init eeh_setup(char *str) +{ +	if (!strcmp(str, "off")) +		eeh_subsystem_flags |= EEH_FORCE_DISABLED; + +	return 1; +} +__setup("eeh=", eeh_setup); +  /**   * eeh_gather_pci_data - Copy assorted PCI config space registers to buff   * @edev: device to report data for @@ -144,74 +155,67 @@ static struct eeh_stats eeh_stats;  static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len)  {  	struct device_node *dn = eeh_dev_to_of_node(edev); -	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);  	u32 cfg;  	int cap, i;  	int n = 0;  	n += scnprintf(buf+n, len-n, "%s\n", dn->full_name); -	printk(KERN_WARNING "EEH: of node=%s\n", dn->full_name); +	pr_warn("EEH: of node=%s\n", dn->full_name);  	eeh_ops->read_config(dn, PCI_VENDOR_ID, 4, &cfg);  	n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg); -	printk(KERN_WARNING "EEH: PCI device/vendor: %08x\n", cfg); +	pr_warn("EEH: PCI device/vendor: %08x\n", cfg);  	eeh_ops->read_config(dn, PCI_COMMAND, 4, &cfg);  	n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg); -	printk(KERN_WARNING "EEH: PCI cmd/status register: %08x\n", cfg); - -	if (!dev) { -		printk(KERN_WARNING "EEH: no PCI device for this of node\n"); -		return n; -	} +	pr_warn("EEH: PCI cmd/status register: %08x\n", cfg);  	/* Gather bridge-specific registers */ -	if (dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) { +	if (edev->mode & EEH_DEV_BRIDGE) {  		eeh_ops->read_config(dn, PCI_SEC_STATUS, 2, &cfg);  		n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg); -		printk(KERN_WARNING "EEH: Bridge secondary status: %04x\n", cfg); +		pr_warn("EEH: Bridge secondary status: %04x\n", cfg);  		eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &cfg);  		n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg); -		printk(KERN_WARNING "EEH: Bridge control: %04x\n", cfg); +		pr_warn("EEH: Bridge control: %04x\n", cfg);  	}  	/* Dump out the PCI-X command and status regs */ -	cap = pci_find_capability(dev, PCI_CAP_ID_PCIX); +	cap = edev->pcix_cap;  	if (cap) {  		eeh_ops->read_config(dn, cap, 4, &cfg);  		n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg); -		printk(KERN_WARNING "EEH: PCI-X cmd: %08x\n", cfg); +		pr_warn("EEH: PCI-X cmd: %08x\n", cfg);  		eeh_ops->read_config(dn, cap+4, 4, &cfg);  		n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg); -		printk(KERN_WARNING "EEH: PCI-X status: %08x\n", cfg); +		pr_warn("EEH: PCI-X status: %08x\n", cfg);  	} -	/* If PCI-E capable, dump PCI-E cap 10, and the AER */ -	cap = pci_find_capability(dev, PCI_CAP_ID_EXP); +	/* If PCI-E capable, dump PCI-E cap 10 */ +	cap = edev->pcie_cap;  	if (cap) {  		n += scnprintf(buf+n, len-n, "pci-e cap10:\n"); -		printk(KERN_WARNING -		       "EEH: PCI-E capabilities and status follow:\n"); +		pr_warn("EEH: PCI-E capabilities and status follow:\n");  		for (i=0; i<=8; i++) {  			eeh_ops->read_config(dn, cap+4*i, 4, &cfg);  			n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); -			printk(KERN_WARNING "EEH: PCI-E %02x: %08x\n", i, cfg); +			pr_warn("EEH: PCI-E %02x: %08x\n", i, cfg);  		} +	} -		cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); -		if (cap) { -			n += scnprintf(buf+n, len-n, "pci-e AER:\n"); -			printk(KERN_WARNING -			       "EEH: PCI-E AER capability register set follows:\n"); - -			for (i=0; i<14; i++) { -				eeh_ops->read_config(dn, cap+4*i, 4, &cfg); -				n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); -				printk(KERN_WARNING "EEH: PCI-E AER %02x: %08x\n", i, cfg); -			} +	/* If AER capable, dump it */ +	cap = edev->aer_cap; +	if (cap) { +		n += scnprintf(buf+n, len-n, "pci-e AER:\n"); +		pr_warn("EEH: PCI-E AER capability register set follows:\n"); + +		for (i=0; i<14; i++) { +			eeh_ops->read_config(dn, cap+4*i, 4, &cfg); +			n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); +			pr_warn("EEH: PCI-E AER %02x: %08x\n", i, cfg);  		}  	} @@ -232,21 +236,19 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity)  {  	size_t loglen = 0;  	struct eeh_dev *edev, *tmp; -	bool valid_cfg_log = true;  	/*  	 * When the PHB is fenced or dead, it's pointless to collect  	 * the data from PCI config space because it should return  	 * 0xFF's. For ER, we still retrieve the data from the PCI  	 * config space. +	 * +	 * For pHyp, we have to enable IO for log retrieval. Otherwise, +	 * 0xFF's is always returned from PCI config space.  	 */ -	if (eeh_probe_mode_dev() && -	    (pe->type & EEH_PE_PHB) && -	    (pe->state & (EEH_PE_ISOLATED | EEH_PE_PHB_DEAD))) -		valid_cfg_log = false; - -	if (valid_cfg_log) { -		eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); +	if (!(pe->type & EEH_PE_PHB)) { +		if (eeh_probe_mode_devtree()) +			eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);  		eeh_ops->configure_bridge(pe);  		eeh_pe_restore_bars(pe); @@ -309,7 +311,7 @@ static int eeh_phb_check_failure(struct eeh_pe *pe)  	/* If the PHB has been in problematic state */  	eeh_serialize_lock(&flags); -	if (phb_pe->state & (EEH_PE_ISOLATED | EEH_PE_PHB_DEAD)) { +	if (phb_pe->state & EEH_PE_ISOLATED) {  		ret = 0;  		goto out;  	} @@ -327,11 +329,11 @@ static int eeh_phb_check_failure(struct eeh_pe *pe)  	/* Isolate the PHB and send event */  	eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED);  	eeh_serialize_unlock(flags); -	eeh_send_failure_event(phb_pe); -	pr_err("EEH: PHB#%x failure detected\n", -		phb_pe->phb->global_number); +	pr_err("EEH: PHB#%x failure detected, location: %s\n", +		phb_pe->phb->global_number, eeh_pe_loc_get(phb_pe));  	dump_stack(); +	eeh_send_failure_event(phb_pe);  	return 1;  out: @@ -356,16 +358,17 @@ out:  int eeh_dev_check_failure(struct eeh_dev *edev)  {  	int ret; +	int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);  	unsigned long flags;  	struct device_node *dn;  	struct pci_dev *dev; -	struct eeh_pe *pe; +	struct eeh_pe *pe, *parent_pe, *phb_pe;  	int rc = 0;  	const char *location;  	eeh_stats.total_mmio_ffs++; -	if (!eeh_subsystem_enabled) +	if (!eeh_enabled())  		return 0;  	if (!edev) { @@ -437,14 +440,34 @@ int eeh_dev_check_failure(struct eeh_dev *edev)  	 */  	if ((ret < 0) ||  	    (ret == EEH_STATE_NOT_SUPPORT) || -	    (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) == -	    (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) { +	    ((ret & active_flags) == active_flags)) {  		eeh_stats.false_positives++;  		pe->false_positives++;  		rc = 0;  		goto dn_unlock;  	} +	/* +	 * It should be corner case that the parent PE has been +	 * put into frozen state as well. We should take care +	 * that at first. +	 */ +	parent_pe = pe->parent; +	while (parent_pe) { +		/* Hit the ceiling ? */ +		if (parent_pe->type & EEH_PE_PHB) +			break; + +		/* Frozen parent PE ? */ +		ret = eeh_ops->get_state(parent_pe, NULL); +		if (ret > 0 && +		    (ret & active_flags) != active_flags) +			pe = parent_pe; + +		/* Next parent level */ +		parent_pe = parent_pe->parent; +	} +  	eeh_stats.slot_resets++;  	/* Avoid repeated reports of this failure, including problems @@ -454,16 +477,19 @@ int eeh_dev_check_failure(struct eeh_dev *edev)  	eeh_pe_state_mark(pe, EEH_PE_ISOLATED);  	eeh_serialize_unlock(flags); -	eeh_send_failure_event(pe); -  	/* Most EEH events are due to device driver bugs.  Having  	 * a stack trace will help the device-driver authors figure  	 * out what happened.  So print that out.  	 */ -	pr_err("EEH: Frozen PE#%x detected on PHB#%x\n", -		pe->addr, pe->phb->global_number); +	phb_pe = eeh_phb_pe_get(pe->phb); +	pr_err("EEH: Frozen PHB#%x-PE#%x detected\n", +	       pe->phb->global_number, pe->addr); +	pr_err("EEH: PE location: %s, PHB location: %s\n", +	       eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe));  	dump_stack(); +	eeh_send_failure_event(pe); +  	return 1;  dn_unlock: @@ -515,16 +541,42 @@ EXPORT_SYMBOL(eeh_check_failure);   */  int eeh_pci_enable(struct eeh_pe *pe, int function)  { -	int rc; +	int rc, flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); + +	/* +	 * pHyp doesn't allow to enable IO or DMA on unfrozen PE. +	 * Also, it's pointless to enable them on unfrozen PE. So +	 * we have the check here. +	 */ +	if (function == EEH_OPT_THAW_MMIO || +	    function == EEH_OPT_THAW_DMA) { +		rc = eeh_ops->get_state(pe, NULL); +		if (rc < 0) +			return rc; + +		/* Needn't to enable or already enabled */ +		if ((rc == EEH_STATE_NOT_SUPPORT) || +		    ((rc & flags) == flags)) +			return 0; +	}  	rc = eeh_ops->set_option(pe, function);  	if (rc) -		pr_warning("%s: Unexpected state change %d on PHB#%d-PE#%x, err=%d\n", -			__func__, function, pe->phb->global_number, pe->addr, rc); +		pr_warn("%s: Unexpected state change %d on " +			"PHB#%d-PE#%x, err=%d\n", +			__func__, function, pe->phb->global_number, +			pe->addr, rc);  	rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); -	if (rc > 0 && (rc & EEH_STATE_MMIO_ENABLED) && -	   (function == EEH_OPT_THAW_MMIO)) +	if (rc <= 0) +		return rc; + +	if ((function == EEH_OPT_THAW_MMIO) && +	    (rc & EEH_STATE_MMIO_ENABLED)) +		return 0; + +	if ((function == EEH_OPT_THAW_DMA) && +	    (rc & EEH_STATE_DMA_ENABLED))  		return 0;  	return rc; @@ -612,26 +664,7 @@ static void eeh_reset_pe_once(struct eeh_pe *pe)  	else  		eeh_ops->reset(pe, EEH_RESET_HOT); -	/* The PCI bus requires that the reset be held high for at least -	 * a 100 milliseconds. We wait a bit longer 'just in case'. -	 */ -#define PCI_BUS_RST_HOLD_TIME_MSEC 250 -	msleep(PCI_BUS_RST_HOLD_TIME_MSEC); - -	/* We might get hit with another EEH freeze as soon as the -	 * pci slot reset line is dropped. Make sure we don't miss -	 * these, and clear the flag now. -	 */ -	eeh_pe_state_clear(pe, EEH_PE_ISOLATED); -  	eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); - -	/* After a PCI slot has been reset, the PCI Express spec requires -	 * a 1.5 second idle time for the bus to stabilize, before starting -	 * up traffic. -	 */ -#define PCI_BUS_SETTLE_TIME_MSEC 1800 -	msleep(PCI_BUS_SETTLE_TIME_MSEC);  }  /** @@ -651,6 +684,10 @@ int eeh_reset_pe(struct eeh_pe *pe)  	for (i=0; i<3; i++) {  		eeh_reset_pe_once(pe); +		/* +		 * EEH_PE_ISOLATED is expected to be removed after +		 * BAR restore. +		 */  		rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);  		if ((rc & flags) == flags)  			return 0; @@ -687,6 +724,15 @@ void eeh_save_bars(struct eeh_dev *edev)  	for (i = 0; i < 16; i++)  		eeh_ops->read_config(dn, i * 4, 4, &edev->config_space[i]); + +	/* +	 * For PCI bridges including root port, we need enable bus +	 * master explicitly. Otherwise, it can't fetch IODA table +	 * entries correctly. So we cache the bit in advance so that +	 * we can restore it after reset, either PHB range or PE range. +	 */ +	if (edev->mode & EEH_DEV_BRIDGE) +		edev->config_space[1] |= PCI_COMMAND_MASTER;  }  /** @@ -739,6 +785,17 @@ int __exit eeh_ops_unregister(const char *name)  	return -EEXIST;  } +static int eeh_reboot_notifier(struct notifier_block *nb, +			       unsigned long action, void *unused) +{ +	eeh_set_enable(false); +	return NOTIFY_DONE; +} + +static struct notifier_block eeh_reboot_nb = { +	.notifier_call = eeh_reboot_notifier, +}; +  /**   * eeh_init - EEH initialization   * @@ -770,6 +827,14 @@ int eeh_init(void)  	if (machine_is(powernv) && cnt++ <= 0)  		return ret; +	/* Register reboot notifier */ +	ret = register_reboot_notifier(&eeh_reboot_nb); +	if (ret) { +		pr_warn("%s: Failed to register notifier (%d)\n", +			__func__, ret); +		return ret; +	} +  	/* call platform initialization function */  	if (!eeh_ops) {  		pr_warning("%s: Platform EEH operation not found\n", @@ -798,8 +863,8 @@ int eeh_init(void)  			&hose_list, list_node)  			pci_walk_bus(hose->bus, eeh_ops->dev_probe, NULL);  	} else { -		pr_warning("%s: Invalid probe mode %d\n", -			   __func__, eeh_probe_mode); +		pr_warn("%s: Invalid probe mode %x", +			__func__, eeh_subsystem_flags);  		return -EINVAL;  	} @@ -814,7 +879,7 @@ int eeh_init(void)  			return ret;  	} -	if (eeh_subsystem_enabled) +	if (eeh_enabled())  		pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n");  	else  		pr_warning("EEH: No capable adapters found\n"); @@ -889,7 +954,7 @@ void eeh_add_device_late(struct pci_dev *dev)  	struct device_node *dn;  	struct eeh_dev *edev; -	if (!dev || !eeh_subsystem_enabled) +	if (!dev || !eeh_enabled())  		return;  	pr_debug("EEH: Adding device %s\n", pci_name(dev)); @@ -913,6 +978,13 @@ void eeh_add_device_late(struct pci_dev *dev)  		eeh_sysfs_remove_device(edev->pdev);  		edev->mode &= ~EEH_DEV_SYSFS; +		/* +		 * We definitely should have the PCI device removed +		 * though it wasn't correctly. So we needn't call +		 * into error handler afterwards. +		 */ +		edev->mode |= EEH_DEV_NO_HANDLER; +  		edev->pdev = NULL;  		dev->dev.archdata.edev = NULL;  	} @@ -990,7 +1062,7 @@ void eeh_remove_device(struct pci_dev *dev)  {  	struct eeh_dev *edev; -	if (!dev || !eeh_subsystem_enabled) +	if (!dev || !eeh_enabled())  		return;  	edev = pci_dev_to_eeh_dev(dev); @@ -1015,6 +1087,14 @@ void eeh_remove_device(struct pci_dev *dev)  	else  		edev->mode |= EEH_DEV_DISCONNECTED; +	/* +	 * We're removing from the PCI subsystem, that means +	 * the PCI device driver can't support EEH or not +	 * well. So we rely on hotplug completely to do recovery +	 * for the specific PCI device. +	 */ +	edev->mode |= EEH_DEV_NO_HANDLER; +  	eeh_addr_cache_rmv_dev(dev);  	eeh_sysfs_remove_device(dev);  	edev->mode &= ~EEH_DEV_SYSFS; @@ -1022,7 +1102,7 @@ void eeh_remove_device(struct pci_dev *dev)  static int proc_eeh_show(struct seq_file *m, void *v)  { -	if (0 == eeh_subsystem_enabled) { +	if (!eeh_enabled()) {  		seq_printf(m, "EEH Subsystem is globally disabled\n");  		seq_printf(m, "eeh_total_mmio_ffs=%llu\n", eeh_stats.total_mmio_ffs);  	} else { @@ -1059,10 +1139,45 @@ static const struct file_operations proc_eeh_operations = {  	.release   = single_release,  }; +#ifdef CONFIG_DEBUG_FS +static int eeh_enable_dbgfs_set(void *data, u64 val) +{ +	if (val) +		eeh_subsystem_flags &= ~EEH_FORCE_DISABLED; +	else +		eeh_subsystem_flags |= EEH_FORCE_DISABLED; + +	/* Notify the backend */ +	if (eeh_ops->post_init) +		eeh_ops->post_init(); + +	return 0; +} + +static int eeh_enable_dbgfs_get(void *data, u64 *val) +{ +	if (eeh_enabled()) +		*val = 0x1ul; +	else +		*val = 0x0ul; +	return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(eeh_enable_dbgfs_ops, eeh_enable_dbgfs_get, +			eeh_enable_dbgfs_set, "0x%llx\n"); +#endif +  static int __init eeh_init_proc(void)  { -	if (machine_is(pseries) || machine_is(powernv)) +	if (machine_is(pseries) || machine_is(powernv)) {  		proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations); +#ifdef CONFIG_DEBUG_FS +		debugfs_create_file("eeh_enable", 0600, +                                    powerpc_debugfs_root, NULL, +                                    &eeh_enable_dbgfs_ops); +#endif +	} +  	return 0;  }  __initcall(eeh_init_proc); diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 36bed5a1275..420da61d4ce 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -143,17 +143,43 @@ static void eeh_disable_irq(struct pci_dev *dev)  static void eeh_enable_irq(struct pci_dev *dev)  {  	struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); -	struct irq_desc *desc;  	if ((edev->mode) & EEH_DEV_IRQ_DISABLED) {  		edev->mode &= ~EEH_DEV_IRQ_DISABLED; - -		desc = irq_to_desc(dev->irq); -		if (desc && desc->depth > 0) +		/* +		 * FIXME !!!!! +		 * +		 * This is just ass backwards. This maze has +		 * unbalanced irq_enable/disable calls. So instead of +		 * finding the root cause it works around the warning +		 * in the irq_enable code by conditionally calling +		 * into it. +		 * +		 * That's just wrong.The warning in the core code is +		 * there to tell people to fix their assymetries in +		 * their own code, not by abusing the core information +		 * to avoid it. +		 * +		 * I so wish that the assymetry would be the other way +		 * round and a few more irq_disable calls render that +		 * shit unusable forever. +		 * +		 *	tglx +		 */ +		if (irqd_irq_disabled(irq_get_irq_data(dev->irq)))  			enable_irq(dev->irq);  	}  } +static bool eeh_dev_removed(struct eeh_dev *edev) +{ +	/* EEH device removed ? */ +	if (!edev || (edev->mode & EEH_DEV_REMOVED)) +		return true; + +	return false; +} +  /**   * eeh_report_error - Report pci error to each device driver   * @data: eeh device @@ -170,10 +196,8 @@ static void *eeh_report_error(void *data, void *userdata)  	enum pci_ers_result rc, *res = userdata;  	struct pci_driver *driver; -	/* We might not have the associated PCI device, -	 * then we should continue for next one. -	 */ -	if (!dev) return NULL; +	if (!dev || eeh_dev_removed(edev)) +		return NULL;  	dev->error_state = pci_channel_io_frozen;  	driver = eeh_pcid_get(dev); @@ -213,11 +237,15 @@ static void *eeh_report_mmio_enabled(void *data, void *userdata)  	enum pci_ers_result rc, *res = userdata;  	struct pci_driver *driver; +	if (!dev || eeh_dev_removed(edev)) +		return NULL; +  	driver = eeh_pcid_get(dev);  	if (!driver) return NULL;  	if (!driver->err_handler || -	    !driver->err_handler->mmio_enabled) { +	    !driver->err_handler->mmio_enabled || +	    (edev->mode & EEH_DEV_NO_HANDLER)) {  		eeh_pcid_put(dev);  		return NULL;  	} @@ -249,7 +277,8 @@ static void *eeh_report_reset(void *data, void *userdata)  	enum pci_ers_result rc, *res = userdata;  	struct pci_driver *driver; -	if (!dev) return NULL; +	if (!dev || eeh_dev_removed(edev)) +		return NULL;  	dev->error_state = pci_channel_io_normal;  	driver = eeh_pcid_get(dev); @@ -258,7 +287,8 @@ static void *eeh_report_reset(void *data, void *userdata)  	eeh_enable_irq(dev);  	if (!driver->err_handler || -	    !driver->err_handler->slot_reset) { +	    !driver->err_handler->slot_reset || +	    (edev->mode & EEH_DEV_NO_HANDLER)) {  		eeh_pcid_put(dev);  		return NULL;  	} @@ -288,7 +318,8 @@ static void *eeh_report_resume(void *data, void *userdata)  	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);  	struct pci_driver *driver; -	if (!dev) return NULL; +	if (!dev || eeh_dev_removed(edev)) +		return NULL;  	dev->error_state = pci_channel_io_normal;  	driver = eeh_pcid_get(dev); @@ -297,7 +328,9 @@ static void *eeh_report_resume(void *data, void *userdata)  	eeh_enable_irq(dev);  	if (!driver->err_handler || -	    !driver->err_handler->resume) { +	    !driver->err_handler->resume || +	    (edev->mode & EEH_DEV_NO_HANDLER)) { +		edev->mode &= ~EEH_DEV_NO_HANDLER;  		eeh_pcid_put(dev);  		return NULL;  	} @@ -322,7 +355,8 @@ static void *eeh_report_failure(void *data, void *userdata)  	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);  	struct pci_driver *driver; -	if (!dev) return NULL; +	if (!dev || eeh_dev_removed(edev)) +		return NULL;  	dev->error_state = pci_channel_io_perm_failure;  	driver = eeh_pcid_get(dev); @@ -358,10 +392,24 @@ static void *eeh_rmv_device(void *data, void *userdata)  	 */  	if (!dev || (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE))  		return NULL; -	driver = eeh_pcid_get(dev); -	if (driver && driver->err_handler) + +	/* +	 * We rely on count-based pcibios_release_device() to +	 * detach permanently offlined PEs. Unfortunately, that's +	 * not reliable enough. We might have the permanently +	 * offlined PEs attached, but we needn't take care of +	 * them and their child devices. +	 */ +	if (eeh_dev_removed(edev))  		return NULL; +	driver = eeh_pcid_get(dev); +	if (driver) { +		eeh_pcid_put(dev); +		if (driver->err_handler) +			return NULL; +	} +  	/* Remove it from PCI subsystem */  	pr_debug("EEH: Removing %s without EEH sensitive driver\n",  		 pci_name(dev)); @@ -369,7 +417,9 @@ static void *eeh_rmv_device(void *data, void *userdata)  	edev->mode |= EEH_DEV_DISCONNECTED;  	(*removed)++; +	pci_lock_rescan_remove();  	pci_stop_and_remove_bus_device(dev); +	pci_unlock_rescan_remove();  	return NULL;  } @@ -390,6 +440,48 @@ static void *eeh_pe_detach_dev(void *data, void *userdata)  	return NULL;  } +/* + * Explicitly clear PE's frozen state for PowerNV where + * we have frozen PE until BAR restore is completed. It's + * harmless to clear it for pSeries. To be consistent with + * PE reset (for 3 times), we try to clear the frozen state + * for 3 times as well. + */ +static void *__eeh_clear_pe_frozen_state(void *data, void *flag) +{ +	struct eeh_pe *pe = (struct eeh_pe *)data; +	int i, rc; + +	for (i = 0; i < 3; i++) { +		rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); +		if (rc) +			continue; +		rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); +		if (!rc) +			break; +	} + +	/* The PE has been isolated, clear it */ +	if (rc) { +		pr_warn("%s: Can't clear frozen PHB#%x-PE#%x (%d)\n", +			__func__, pe->phb->global_number, pe->addr, rc); +		return (void *)pe; +	} + +	return NULL; +} + +static int eeh_clear_pe_frozen_state(struct eeh_pe *pe) +{ +	void *rc; + +	rc = eeh_pe_traverse(pe, __eeh_clear_pe_frozen_state, NULL); +	if (!rc) +		eeh_pe_state_clear(pe, EEH_PE_ISOLATED); + +	return rc ? -EIO : 0; +} +  /**   * eeh_reset_device - Perform actual reset of a pci slot   * @pe: EEH PE @@ -416,22 +508,41 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)  	 * into pcibios_add_pci_devices().  	 */  	eeh_pe_state_mark(pe, EEH_PE_KEEP); -	if (bus) +	if (bus) { +		pci_lock_rescan_remove();  		pcibios_remove_pci_devices(bus); -	else if (frozen_bus) +		pci_unlock_rescan_remove(); +	} else if (frozen_bus) {  		eeh_pe_dev_traverse(pe, eeh_rmv_device, &removed); +	} -	/* Reset the pci controller. (Asserts RST#; resets config space). +	/* +	 * Reset the pci controller. (Asserts RST#; resets config space).  	 * Reconfigure bridges and devices. Don't try to bring the system  	 * up if the reset failed for some reason. +	 * +	 * During the reset, it's very dangerous to have uncontrolled PCI +	 * config accesses. So we prefer to block them. However, controlled +	 * PCI config accesses initiated from EEH itself are allowed.  	 */ +	eeh_pe_state_mark(pe, EEH_PE_RESET);  	rc = eeh_reset_pe(pe); -	if (rc) +	if (rc) { +		eeh_pe_state_clear(pe, EEH_PE_RESET);  		return rc; +	} + +	pci_lock_rescan_remove();  	/* Restore PE */  	eeh_ops->configure_bridge(pe);  	eeh_pe_restore_bars(pe); +	eeh_pe_state_clear(pe, EEH_PE_RESET); + +	/* Clear frozen state */ +	rc = eeh_clear_pe_frozen_state(pe); +	if (rc) +		return rc;  	/* Give the system 5 seconds to finish running the user-space  	 * hotplug shutdown scripts, e.g. ifdown for ethernet.  Yes, @@ -462,13 +573,14 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)  	pe->tstamp = tstamp;  	pe->freeze_count = cnt; +	pci_unlock_rescan_remove();  	return 0;  }  /* The longest amount of time to wait for a pci device   * to come back on line, in seconds.   */ -#define MAX_WAIT_FOR_RECOVERY 150 +#define MAX_WAIT_FOR_RECOVERY 300  static void eeh_handle_normal_event(struct eeh_pe *pe)  { @@ -540,7 +652,6 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)  			result = PCI_ERS_RESULT_NEED_RESET;  		} else {  			pr_info("EEH: Notify device drivers to resume I/O\n"); -			result = PCI_ERS_RESULT_NONE;  			eeh_pe_dev_traverse(pe, eeh_report_mmio_enabled, &result);  		}  	} @@ -552,10 +663,17 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)  		if (rc < 0)  			goto hard_fail; -		if (rc) +		if (rc) {  			result = PCI_ERS_RESULT_NEED_RESET; -		else +		} else { +			/* +			 * We didn't do PE reset for the case. The PE +			 * is still in frozen state. Clear it before +			 * resuming the PE. +			 */ +			eeh_pe_state_clear(pe, EEH_PE_ISOLATED);  			result = PCI_ERS_RESULT_RECOVERED; +		}  	}  	/* If any device has a hard failure, then shut off everything. */ @@ -617,93 +735,113 @@ perm_error:  	/* Notify all devices that they're about to go down. */  	eeh_pe_dev_traverse(pe, eeh_report_failure, NULL); -	/* Shut down the device drivers for good. */ -	if (frozen_bus) +	/* Mark the PE to be removed permanently */ +	pe->freeze_count = EEH_MAX_ALLOWED_FREEZES + 1; + +	/* +	 * Shut down the device drivers for good. We mark +	 * all removed devices correctly to avoid access +	 * the their PCI config any more. +	 */ +	if (frozen_bus) { +		eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); + +		pci_lock_rescan_remove();  		pcibios_remove_pci_devices(frozen_bus); +		pci_unlock_rescan_remove(); +	}  }  static void eeh_handle_special_event(void)  {  	struct eeh_pe *pe, *phb_pe;  	struct pci_bus *bus; -	struct pci_controller *hose, *tmp; +	struct pci_controller *hose;  	unsigned long flags; -	int rc = 0; +	int rc; -	/* -	 * The return value from next_error() has been classified as follows. -	 * It might be good to enumerate them. However, next_error() is only -	 * supported by PowerNV platform for now. So it would be fine to use -	 * integer directly: -	 * -	 * 4 - Dead IOC           3 - Dead PHB -	 * 2 - Fenced PHB         1 - Frozen PE -	 * 0 - No error found -	 * -	 */ -	rc = eeh_ops->next_error(&pe); -	if (rc <= 0) -		return; -	switch (rc) { -	case 4: -		/* Mark all PHBs in dead state */ -		eeh_serialize_lock(&flags); -		list_for_each_entry_safe(hose, tmp, -				&hose_list, list_node) { -			phb_pe = eeh_phb_pe_get(hose); -			if (!phb_pe) continue; - -			eeh_pe_state_mark(phb_pe, -				EEH_PE_ISOLATED | EEH_PE_PHB_DEAD); +	do { +		rc = eeh_ops->next_error(&pe); + +		switch (rc) { +		case EEH_NEXT_ERR_DEAD_IOC: +			/* Mark all PHBs in dead state */ +			eeh_serialize_lock(&flags); + +			/* Purge all events */ +			eeh_remove_event(NULL, true); + +			list_for_each_entry(hose, &hose_list, list_node) { +				phb_pe = eeh_phb_pe_get(hose); +				if (!phb_pe) continue; + +				eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED); +			} + +			eeh_serialize_unlock(flags); + +			break; +		case EEH_NEXT_ERR_FROZEN_PE: +		case EEH_NEXT_ERR_FENCED_PHB: +		case EEH_NEXT_ERR_DEAD_PHB: +			/* Mark the PE in fenced state */ +			eeh_serialize_lock(&flags); + +			/* Purge all events of the PHB */ +			eeh_remove_event(pe, true); + +			if (rc == EEH_NEXT_ERR_DEAD_PHB) +				eeh_pe_state_mark(pe, EEH_PE_ISOLATED); +			else +				eeh_pe_state_mark(pe, +					EEH_PE_ISOLATED | EEH_PE_RECOVERING); + +			eeh_serialize_unlock(flags); + +			break; +		case EEH_NEXT_ERR_NONE: +			return; +		default: +			pr_warn("%s: Invalid value %d from next_error()\n", +				__func__, rc); +			return;  		} -		eeh_serialize_unlock(flags); - -		/* Purge all events */ -		eeh_remove_event(NULL); -		break; -	case 3: -	case 2: -	case 1: -		/* Mark the PE in fenced state */ -		eeh_serialize_lock(&flags); -		if (rc == 3) -			eeh_pe_state_mark(pe, -				EEH_PE_ISOLATED | EEH_PE_PHB_DEAD); -		else -			eeh_pe_state_mark(pe, -				EEH_PE_ISOLATED | EEH_PE_RECOVERING); -		eeh_serialize_unlock(flags); - -		/* Purge all events of the PHB */ -		eeh_remove_event(pe); -		break; -	default: -		pr_err("%s: Invalid value %d from next_error()\n", -		       __func__, rc); -		return; -	} -	/* -	 * For fenced PHB and frozen PE, it's handled as normal -	 * event. We have to remove the affected PHBs for dead -	 * PHB and IOC -	 */ -	if (rc == 2 || rc == 1) -		eeh_handle_normal_event(pe); -	else { -		list_for_each_entry_safe(hose, tmp, -			&hose_list, list_node) { -			phb_pe = eeh_phb_pe_get(hose); -			if (!phb_pe || !(phb_pe->state & EEH_PE_PHB_DEAD)) -				continue; - -			bus = eeh_pe_bus_get(phb_pe); -			/* Notify all devices that they're about to go down. */ -			eeh_pe_dev_traverse(pe, eeh_report_failure, NULL); -			pcibios_remove_pci_devices(bus); +		/* +		 * For fenced PHB and frozen PE, it's handled as normal +		 * event. We have to remove the affected PHBs for dead +		 * PHB and IOC +		 */ +		if (rc == EEH_NEXT_ERR_FROZEN_PE || +		    rc == EEH_NEXT_ERR_FENCED_PHB) { +			eeh_handle_normal_event(pe); +			eeh_pe_state_clear(pe, EEH_PE_RECOVERING); +		} else { +			pci_lock_rescan_remove(); +			list_for_each_entry(hose, &hose_list, list_node) { +				phb_pe = eeh_phb_pe_get(hose); +				if (!phb_pe || +				    !(phb_pe->state & EEH_PE_ISOLATED) || +				    (phb_pe->state & EEH_PE_RECOVERING)) +					continue; + +				/* Notify all devices to be down */ +				bus = eeh_pe_bus_get(phb_pe); +				eeh_pe_dev_traverse(pe, +					eeh_report_failure, NULL); +				pcibios_remove_pci_devices(bus); +			} +			pci_unlock_rescan_remove();  		} -	} + +		/* +		 * If we have detected dead IOC, we needn't proceed +		 * any more since all PHBs would have been removed +		 */ +		if (rc == EEH_NEXT_ERR_DEAD_IOC) +			break; +	} while (rc != EEH_NEXT_ERR_NONE);  }  /** diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c index d27c5afc90a..4eefb6e34db 100644 --- a/arch/powerpc/kernel/eeh_event.c +++ b/arch/powerpc/kernel/eeh_event.c @@ -74,8 +74,13 @@ static int eeh_event_handler(void * dummy)  		pe = event->pe;  		if (pe) {  			eeh_pe_state_mark(pe, EEH_PE_RECOVERING); -			pr_info("EEH: Detected PCI bus error on PHB#%d-PE#%x\n", -				 pe->phb->global_number, pe->addr); +			if (pe->type & EEH_PE_PHB) +				pr_info("EEH: Detected error on PHB#%d\n", +					 pe->phb->global_number); +			else +				pr_info("EEH: Detected PCI bus error on " +					"PHB#%d-PE#%x\n", +					pe->phb->global_number, pe->addr);  			eeh_handle_event(pe);  			eeh_pe_state_clear(pe, EEH_PE_RECOVERING);  		} else { @@ -147,24 +152,33 @@ int eeh_send_failure_event(struct eeh_pe *pe)  /**   * eeh_remove_event - Remove EEH event from the queue   * @pe: Event binding to the PE + * @force: Event will be removed unconditionally   *   * On PowerNV platform, we might have subsequent coming events   * is part of the former one. For that case, those subsequent   * coming events are totally duplicated and unnecessary, thus   * they should be removed.   */ -void eeh_remove_event(struct eeh_pe *pe) +void eeh_remove_event(struct eeh_pe *pe, bool force)  {  	unsigned long flags;  	struct eeh_event *event, *tmp; +	/* +	 * If we have NULL PE passed in, we have dead IOC +	 * or we're sure we can report all existing errors +	 * by the caller. +	 * +	 * With "force", the event with associated PE that +	 * have been isolated, the event won't be removed +	 * to avoid event lost. +	 */  	spin_lock_irqsave(&eeh_eventlist_lock, flags);  	list_for_each_entry_safe(event, tmp, &eeh_eventlist, list) { -		/* -		 * If we don't have valid PE passed in, that means -		 * we already have event corresponding to dead IOC -		 * and all events should be purged. -		 */ +		if (!force && event->pe && +		    (event->pe->state & EEH_PE_ISOLATED)) +			continue; +  		if (!pe) {  			list_del(&event->list);  			kfree(event); diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index f9450537e33..fbd01eba447 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -25,7 +25,6 @@  #include <linux/delay.h>  #include <linux/export.h>  #include <linux/gfp.h> -#include <linux/init.h>  #include <linux/kernel.h>  #include <linux/pci.h>  #include <linux/string.h> @@ -504,13 +503,17 @@ static void *__eeh_pe_state_mark(void *data, void *flag)  	struct eeh_dev *edev, *tmp;  	struct pci_dev *pdev; -	/* -	 * Mark the PE with the indicated state. Also, -	 * the associated PCI device will be put into -	 * I/O frozen state to avoid I/O accesses from -	 * the PCI device driver. -	 */ +	/* Keep the state of permanently removed PE intact */ +	if ((pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) && +	    (state & (EEH_PE_ISOLATED | EEH_PE_RECOVERING))) +		return NULL; +  	pe->state |= state; + +	/* Offline PCI devices if applicable */ +	if (state != EEH_PE_ISOLATED) +		return NULL; +  	eeh_pe_for_each_dev(pe, edev, tmp) {  		pdev = eeh_dev_to_pci_dev(edev);  		if (pdev) @@ -533,6 +536,27 @@ void eeh_pe_state_mark(struct eeh_pe *pe, int state)  	eeh_pe_traverse(pe, __eeh_pe_state_mark, &state);  } +static void *__eeh_pe_dev_mode_mark(void *data, void *flag) +{ +	struct eeh_dev *edev = data; +	int mode = *((int *)flag); + +	edev->mode |= mode; + +	return NULL; +} + +/** + * eeh_pe_dev_state_mark - Mark state for all device under the PE + * @pe: EEH PE + * + * Mark specific state for all child devices of the PE. + */ +void eeh_pe_dev_mode_mark(struct eeh_pe *pe, int mode) +{ +	eeh_pe_dev_traverse(pe, __eeh_pe_dev_mode_mark, &mode); +} +  /**   * __eeh_pe_state_clear - Clear state for the PE   * @data: EEH PE @@ -547,8 +571,16 @@ static void *__eeh_pe_state_clear(void *data, void *flag)  	struct eeh_pe *pe = (struct eeh_pe *)data;  	int state = *((int *)flag); +	/* Keep the state of permanently removed PE intact */ +	if ((pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) && +	    (state & EEH_PE_ISOLATED)) +		return NULL; +  	pe->state &= ~state; -	pe->check_count = 0; + +	/* Clear check count since last isolation */ +	if (state & EEH_PE_ISOLATED) +		pe->check_count = 0;  	return NULL;  } @@ -737,6 +769,9 @@ static void *eeh_restore_one_device_bars(void *data, void *flag)  	else  		eeh_restore_device_bars(edev, dn); +	if (eeh_ops->restore_config) +		eeh_ops->restore_config(dn); +  	return NULL;  } @@ -757,6 +792,66 @@ void eeh_pe_restore_bars(struct eeh_pe *pe)  }  /** + * eeh_pe_loc_get - Retrieve location code binding to the given PE + * @pe: EEH PE + * + * Retrieve the location code of the given PE. If the primary PE bus + * is root bus, we will grab location code from PHB device tree node + * or root port. Otherwise, the upstream bridge's device tree node + * of the primary PE bus will be checked for the location code. + */ +const char *eeh_pe_loc_get(struct eeh_pe *pe) +{ +	struct pci_controller *hose; +	struct pci_bus *bus = eeh_pe_bus_get(pe); +	struct pci_dev *pdev; +	struct device_node *dn; +	const char *loc; + +	if (!bus) +		return "N/A"; + +	/* PHB PE or root PE ? */ +	if (pci_is_root_bus(bus)) { +		hose = pci_bus_to_host(bus); +		loc = of_get_property(hose->dn, +				"ibm,loc-code", NULL); +		if (loc) +			return loc; +		loc = of_get_property(hose->dn, +				"ibm,io-base-loc-code", NULL); +		if (loc) +			return loc; + +		pdev = pci_get_slot(bus, 0x0); +	} else { +		pdev = bus->self; +	} + +	if (!pdev) { +		loc = "N/A"; +		goto out; +	} + +	dn = pci_device_to_OF_node(pdev); +	if (!dn) { +		loc = "N/A"; +		goto out; +	} + +	loc = of_get_property(dn, "ibm,loc-code", NULL); +	if (!loc) +		loc = of_get_property(dn, "ibm,slot-location-code", NULL); +	if (!loc) +		loc = "N/A"; + +out: +	if (pci_is_root_bus(bus) && pdev) +		pci_dev_put(pdev); +	return loc; +} + +/**   * eeh_pe_bus_get - Retrieve PCI bus according to the given PE   * @pe: EEH PE   * diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c index 5d753d4f2c7..e2595ba4b72 100644 --- a/arch/powerpc/kernel/eeh_sysfs.c +++ b/arch/powerpc/kernel/eeh_sysfs.c @@ -59,6 +59,9 @@ void eeh_sysfs_add_device(struct pci_dev *pdev)  	struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);  	int rc=0; +	if (!eeh_enabled()) +		return; +  	if (edev && (edev->mode & EEH_DEV_SYSFS))  		return; diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index c04cdf70d48..6528c5e2cc4 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -39,8 +39,8 @@   * System calls.   */  	.section	".toc","aw" -.SYS_CALL_TABLE: -	.tc .sys_call_table[TC],.sys_call_table +SYS_CALL_TABLE: +	.tc sys_call_table[TC],sys_call_table  /* This value is used to mark exception frames on the stack. */  exception_marker: @@ -106,7 +106,7 @@ BEGIN_FW_FTR_SECTION  	LDX_BE	r10,0,r10		/* get log write index */  	cmpd	cr1,r11,r10  	beq+	cr1,33f -	bl	.accumulate_stolen_time +	bl	accumulate_stolen_time  	REST_GPR(0,r1)  	REST_4GPRS(3,r1)  	REST_2GPRS(7,r1) @@ -143,7 +143,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)  	std	r10,SOFTE(r1)  #ifdef SHOW_SYSCALLS -	bl	.do_show_syscall +	bl	do_show_syscall  	REST_GPR(0,r1)  	REST_4GPRS(3,r1)  	REST_2GPRS(7,r1) @@ -162,7 +162,7 @@ system_call:			/* label this so stack traces look sane */   * Need to vector to 32 Bit or default sys_call_table here,   * based on caller's run-mode / personality.   */ -	ld	r11,.SYS_CALL_TABLE@toc(2) +	ld	r11,SYS_CALL_TABLE@toc(2)  	andi.	r10,r10,_TIF_32BIT  	beq	15f  	addi	r11,r11,8	/* use 32-bit syscall entries */ @@ -174,14 +174,14 @@ system_call:			/* label this so stack traces look sane */  	clrldi	r8,r8,32  15:  	slwi	r0,r0,4 -	ldx	r10,r11,r0	/* Fetch system call handler [ptr] */ -	mtctr   r10 +	ldx	r12,r11,r0	/* Fetch system call handler [ptr] */ +	mtctr   r12  	bctrl			/* Call handler */  syscall_exit:  	std	r3,RESULT(r1)  #ifdef SHOW_SYSCALLS -	bl	.do_show_syscall_exit +	bl	do_show_syscall_exit  	ld	r3,RESULT(r1)  #endif  	CURRENT_THREAD_INFO(r12, r1) @@ -248,9 +248,9 @@ syscall_error:  /* Traced system call support */  syscall_dotrace: -	bl	.save_nvgprs +	bl	save_nvgprs  	addi	r3,r1,STACK_FRAME_OVERHEAD -	bl	.do_syscall_trace_enter +	bl	do_syscall_trace_enter  	/*  	 * Restore argument registers possibly just changed.  	 * We use the return value of do_syscall_trace_enter @@ -308,7 +308,7 @@ syscall_exit_work:  4:	/* Anything else left to do? */  	SET_DEFAULT_THREAD_PPR(r3, r10)		/* Set thread.ppr = 3 */  	andi.	r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP) -	beq	.ret_from_except_lite +	beq	ret_from_except_lite  	/* Re-enable interrupts */  #ifdef CONFIG_PPC_BOOK3E @@ -319,10 +319,10 @@ syscall_exit_work:  	mtmsrd	r10,1  #endif /* CONFIG_PPC_BOOK3E */ -	bl	.save_nvgprs +	bl	save_nvgprs  	addi	r3,r1,STACK_FRAME_OVERHEAD -	bl	.do_syscall_trace_leave -	b	.ret_from_except +	bl	do_syscall_trace_leave +	b	ret_from_except  /* Save non-volatile GPRs, if not already saved. */  _GLOBAL(save_nvgprs) @@ -345,52 +345,48 @@ _GLOBAL(save_nvgprs)   */  _GLOBAL(ppc_fork) -	bl	.save_nvgprs -	bl	.sys_fork +	bl	save_nvgprs +	bl	sys_fork  	b	syscall_exit  _GLOBAL(ppc_vfork) -	bl	.save_nvgprs -	bl	.sys_vfork +	bl	save_nvgprs +	bl	sys_vfork  	b	syscall_exit  _GLOBAL(ppc_clone) -	bl	.save_nvgprs -	bl	.sys_clone +	bl	save_nvgprs +	bl	sys_clone  	b	syscall_exit  _GLOBAL(ppc32_swapcontext) -	bl	.save_nvgprs -	bl	.compat_sys_swapcontext +	bl	save_nvgprs +	bl	compat_sys_swapcontext  	b	syscall_exit  _GLOBAL(ppc64_swapcontext) -	bl	.save_nvgprs -	bl	.sys_swapcontext +	bl	save_nvgprs +	bl	sys_swapcontext  	b	syscall_exit  _GLOBAL(ret_from_fork) -	bl	.schedule_tail +	bl	schedule_tail  	REST_NVGPRS(r1)  	li	r3,0  	b	syscall_exit  _GLOBAL(ret_from_kernel_thread) -	bl	.schedule_tail +	bl	schedule_tail  	REST_NVGPRS(r1) -	ld	r14, 0(r14)  	mtlr	r14  	mr	r3,r15 +#if defined(_CALL_ELF) && _CALL_ELF == 2 +	mr	r12,r14 +#endif  	blrl  	li	r3,0  	b	syscall_exit -	.section	".toc","aw" -DSCR_DEFAULT: -	.tc dscr_default[TC],dscr_default - -	.section	".text" -  /*   * This routine switches between two different tasks.  The process   * state of one is saved on its kernel stack.  Then the state @@ -432,12 +428,6 @@ BEGIN_FTR_SECTION  	std	r24,THREAD_VRSAVE(r3)  END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)  #endif /* CONFIG_ALTIVEC */ -#ifdef CONFIG_PPC64 -BEGIN_FTR_SECTION -	mfspr	r25,SPRN_DSCR -	std	r25,THREAD_DSCR(r3) -END_FTR_SECTION_IFSET(CPU_FTR_DSCR) -#endif  	and.	r0,r0,r22  	beq+	1f  	andc	r22,r22,r0 @@ -575,11 +565,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)  #ifdef CONFIG_PPC64  BEGIN_FTR_SECTION  	lwz	r6,THREAD_DSCR_INHERIT(r4) -	ld	r7,DSCR_DEFAULT@toc(2)  	ld	r0,THREAD_DSCR(r4)  	cmpwi	r6,0  	bne	1f -	ld	r0,0(r7) +	ld	r0,PACA_DSCR(r13)  1:  BEGIN_FTR_SECTION_NESTED(70)  	mfspr	r8, SPRN_FSCR @@ -611,7 +600,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_DSCR)  _GLOBAL(ret_from_except)  	ld	r11,_TRAP(r1)  	andi.	r0,r11,1 -	bne	.ret_from_except_lite +	bne	ret_from_except_lite  	REST_NVGPRS(r1)  _GLOBAL(ret_from_except_lite) @@ -661,21 +650,27 @@ _GLOBAL(ret_from_except_lite)  #endif  1:	andi.	r0,r4,_TIF_NEED_RESCHED  	beq	2f -	bl	.restore_interrupts +	bl	restore_interrupts  	SCHEDULE_USER -	b	.ret_from_except_lite - -2:	bl	.save_nvgprs -	bl	.restore_interrupts +	b	ret_from_except_lite +2: +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +	andi.	r0,r4,_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM +	bne	3f		/* only restore TM if nothing else to do */  	addi	r3,r1,STACK_FRAME_OVERHEAD -	bl	.do_notify_resume -	b	.ret_from_except +	bl	restore_tm_state +	b	restore +3: +#endif +	bl	save_nvgprs +	bl	restore_interrupts +	addi	r3,r1,STACK_FRAME_OVERHEAD +	bl	do_notify_resume +	b	ret_from_except  resume_kernel:  	/* check current_thread_info, _TIF_EMULATE_STACK_STORE */ -	CURRENT_THREAD_INFO(r9, r1) -	ld	r8,TI_FLAGS(r9) -	andis.	r8,r8,_TIF_EMULATE_STACK_STORE@h +	andis.	r8,r4,_TIF_EMULATE_STACK_STORE@h  	beq+	1f  	addi	r8,r1,INT_FRAME_SIZE	/* Get the kprobed function entry */ @@ -724,7 +719,7 @@ resume_kernel:  	 * sure we are soft-disabled first and reconcile irq state.  	 */  	RECONCILE_IRQ_STATE(r3,r4) -1:	bl	.preempt_schedule_irq +1:	bl	preempt_schedule_irq  	/* Re-test flags and eventually loop */  	CURRENT_THREAD_INFO(r9, r1) @@ -786,7 +781,7 @@ restore_no_replay:  	 */  do_restore:  #ifdef CONFIG_PPC_BOOK3E -	b	.exception_return_book3e +	b	exception_return_book3e  #else  	/*  	 * Clear the reservation. If we know the CPU tracks the address of @@ -820,6 +815,12 @@ fast_exception_return:  	andi.	r0,r3,MSR_RI  	beq-	unrecov_restore +	/* Load PPR from thread struct before we clear MSR:RI */ +BEGIN_FTR_SECTION +	ld	r2,PACACURRENT(r13) +	ld	r2,TASKTHREADPPR(r2) +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) +  	/*  	 * Clear RI before restoring r13.  If we are returning to  	 * userspace and we take an exception after restoring r13, @@ -840,8 +841,10 @@ fast_exception_return:  	 */  	andi.	r0,r3,MSR_PR  	beq	1f +BEGIN_FTR_SECTION +	mtspr	SPRN_PPR,r2	/* Restore PPR */ +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)  	ACCOUNT_CPU_USER_EXIT(r2, r4) -	RESTORE_PPR(r2, r4)  	REST_GPR(13, r1)  1:  	mtspr	SPRN_SRR1,r3 @@ -893,7 +896,7 @@ restore_check_irq_replay:  	 *  	 * Still, this might be useful for things like hash_page  	 */ -	bl	.__check_irq_replay +	bl	__check_irq_replay  	cmpwi	cr0,r3,0   	beq	restore_no_replay @@ -914,13 +917,13 @@ restore_check_irq_replay:  	cmpwi	cr0,r3,0x500  	bne	1f  	addi	r3,r1,STACK_FRAME_OVERHEAD; - 	bl	.do_IRQ -	b	.ret_from_except + 	bl	do_IRQ +	b	ret_from_except  1:	cmpwi	cr0,r3,0x900  	bne	1f  	addi	r3,r1,STACK_FRAME_OVERHEAD; -	bl	.timer_interrupt -	b	.ret_from_except +	bl	timer_interrupt +	b	ret_from_except  #ifdef CONFIG_PPC_DOORBELL  1:  #ifdef CONFIG_PPC_BOOK3E @@ -934,14 +937,14 @@ restore_check_irq_replay:  #endif /* CONFIG_PPC_BOOK3E */  	bne	1f  	addi	r3,r1,STACK_FRAME_OVERHEAD; -	bl	.doorbell_exception -	b	.ret_from_except +	bl	doorbell_exception +	b	ret_from_except  #endif /* CONFIG_PPC_DOORBELL */ -1:	b	.ret_from_except /* What else to do here ? */ +1:	b	ret_from_except /* What else to do here ? */  unrecov_restore:  	addi	r3,r1,STACK_FRAME_OVERHEAD -	bl	.unrecoverable_exception +	bl	unrecoverable_exception  	b	unrecov_restore  #ifdef CONFIG_PPC_RTAS @@ -1007,7 +1010,7 @@ _GLOBAL(enter_rtas)          std	r6,PACASAVEDMSR(r13)  	/* Setup our real return addr */	 -	LOAD_REG_ADDR(r4,.rtas_return_loc) +	LOAD_REG_ADDR(r4,rtas_return_loc)  	clrldi	r4,r4,2			/* convert to realmode address */         	mtlr	r4 @@ -1017,7 +1020,7 @@ _GLOBAL(enter_rtas)          li      r9,1          rldicr  r9,r9,MSR_SF_LG,(63-MSR_SF_LG) -	ori	r9,r9,MSR_IR|MSR_DR|MSR_FE0|MSR_FE1|MSR_FP|MSR_RI +	ori	r9,r9,MSR_IR|MSR_DR|MSR_FE0|MSR_FE1|MSR_FP|MSR_RI|MSR_LE  	andc	r6,r0,r9  	sync				/* disable interrupts so SRR0/1 */  	mtmsrd	r0			/* don't get trashed */ @@ -1031,14 +1034,16 @@ _GLOBAL(enter_rtas)  	rfid  	b	.	/* prevent speculative execution */ -_STATIC(rtas_return_loc) +rtas_return_loc: +	FIXUP_ENDIAN +  	/* relocation is off at this point */  	GET_PACA(r4)  	clrldi	r4,r4,2			/* convert to realmode address */  	bcl	20,31,$+4  0:	mflr	r3 -	ld	r3,(1f-0b)(r3)		/* get &.rtas_restore_regs */ +	ld	r3,(1f-0b)(r3)		/* get &rtas_restore_regs */  	mfmsr   r6  	li	r0,MSR_RI @@ -1055,9 +1060,9 @@ _STATIC(rtas_return_loc)  	b	.	/* prevent speculative execution */  	.align	3 -1:	.llong	.rtas_restore_regs +1:	.llong	rtas_restore_regs -_STATIC(rtas_restore_regs) +rtas_restore_regs:  	/* relocation is on at this point */  	REST_GPR(2, r1)			/* Restore the TOC */  	REST_GPR(13, r1)		/* Restore paca */ @@ -1103,28 +1108,30 @@ _GLOBAL(enter_prom)  	std	r10,_CCR(r1)  	std	r11,_MSR(r1) -	/* Get the PROM entrypoint */ -	mtlr	r4 +	/* Put PROM address in SRR0 */ +	mtsrr0	r4 + +	/* Setup our trampoline return addr in LR */ +	bcl	20,31,$+4 +0:	mflr	r4 +	addi	r4,r4,(1f - 0b) +       	mtlr	r4 -	/* Switch MSR to 32 bits mode +	/* Prepare a 32-bit mode big endian MSR  	 */  #ifdef CONFIG_PPC_BOOK3E  	rlwinm	r11,r11,0,1,31 -	mtmsr	r11 +	mtsrr1	r11 +	rfi  #else /* CONFIG_PPC_BOOK3E */ -        mfmsr   r11 -        li      r12,1 -        rldicr  r12,r12,MSR_SF_LG,(63-MSR_SF_LG) -        andc    r11,r11,r12 -        li      r12,1 -        rldicr  r12,r12,MSR_ISF_LG,(63-MSR_ISF_LG) -        andc    r11,r11,r12 -        mtmsrd  r11 +	LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_ISF | MSR_LE) +	andc	r11,r11,r12 +	mtsrr1	r11 +	rfid  #endif /* CONFIG_PPC_BOOK3E */ -        isync -	/* Enter PROM here... */ -	blrl +1:	/* Return from OF */ +	FIXUP_ENDIAN  	/* Just make sure that r1 top 32 bits didn't get  	 * corrupt by OF @@ -1155,7 +1162,7 @@ _GLOBAL(mcount)  _GLOBAL(_mcount)  	blr -_GLOBAL(ftrace_caller) +_GLOBAL_TOC(ftrace_caller)  	/* Taken from output of objdump from lib64/glibc */  	mflr	r3  	ld	r11, 0(r1) @@ -1179,10 +1186,7 @@ _GLOBAL(ftrace_graph_stub)  _GLOBAL(ftrace_stub)  	blr  #else -_GLOBAL(mcount) -	blr - -_GLOBAL(_mcount) +_GLOBAL_TOC(_mcount)  	/* Taken from output of objdump from lib64/glibc */  	mflr	r3  	ld	r11, 0(r1) @@ -1220,7 +1224,7 @@ _GLOBAL(ftrace_graph_caller)  	ld	r11, 112(r1)  	addi	r3, r11, 16 -	bl	.prepare_ftrace_return +	bl	prepare_ftrace_return  	nop  	ld	r0, 128(r1) @@ -1236,7 +1240,7 @@ _GLOBAL(return_to_handler)  	mr	r31, r1  	stdu	r1, -112(r1) -	bl	.ftrace_return_to_handler +	bl	ftrace_return_to_handler  	nop  	/* return value has real return address */ @@ -1266,7 +1270,7 @@ _GLOBAL(mod_return_to_handler)  	 */  	ld	r2, PACATOC(r13) -	bl	.ftrace_return_to_handler +	bl	ftrace_return_to_handler  	nop  	/* return value has real return address */ diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c index 6300c13bbde..59e4ba74975 100644 --- a/arch/powerpc/kernel/epapr_paravirt.c +++ b/arch/powerpc/kernel/epapr_paravirt.c @@ -18,6 +18,7 @@   */  #include <linux/of.h> +#include <linux/of_fdt.h>  #include <asm/epapr_hcalls.h>  #include <asm/cacheflush.h>  #include <asm/code-patching.h> @@ -29,13 +30,14 @@ extern u32 epapr_ev_idle_start[];  #endif  bool epapr_paravirt_enabled; +static bool __maybe_unused epapr_has_idle;  static int __init early_init_dt_scan_epapr(unsigned long node,  					   const char *uname,  					   int depth, void *data)  {  	const u32 *insts; -	unsigned long len; +	int len;  	int i;  	insts = of_get_flat_dt_prop(node, "hcall-instructions", &len); @@ -46,15 +48,16 @@ static int __init early_init_dt_scan_epapr(unsigned long node,  		return -1;  	for (i = 0; i < (len / 4); i++) { -		patch_instruction(epapr_hypercall_start + i, insts[i]); +		u32 inst = be32_to_cpu(insts[i]); +		patch_instruction(epapr_hypercall_start + i, inst);  #if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64) -		patch_instruction(epapr_ev_idle_start + i, insts[i]); +		patch_instruction(epapr_ev_idle_start + i, inst);  #endif  	}  #if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64)  	if (of_get_flat_dt_prop(node, "has-idle", NULL)) -		ppc_md.power_save = epapr_ev_idle; +		epapr_has_idle = true;  #endif  	epapr_paravirt_enabled = true; @@ -69,3 +72,14 @@ int __init epapr_paravirt_early_init(void)  	return 0;  } +static int __init epapr_idle_init(void) +{ +#if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64) +	if (epapr_has_idle) +		ppc_md.power_save = epapr_ev_idle; +#endif + +	return 0; +} + +postcore_initcall(epapr_idle_init); diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 2d067049db2..bb9cac6c805 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -34,7 +34,250 @@   *     special interrupts from within a non-standard level will probably   *     blow you up   */ -#define	SPECIAL_EXC_FRAME_SIZE	INT_FRAME_SIZE +#define SPECIAL_EXC_SRR0	0 +#define SPECIAL_EXC_SRR1	1 +#define SPECIAL_EXC_SPRG_GEN	2 +#define SPECIAL_EXC_SPRG_TLB	3 +#define SPECIAL_EXC_MAS0	4 +#define SPECIAL_EXC_MAS1	5 +#define SPECIAL_EXC_MAS2	6 +#define SPECIAL_EXC_MAS3	7 +#define SPECIAL_EXC_MAS6	8 +#define SPECIAL_EXC_MAS7	9 +#define SPECIAL_EXC_MAS5	10	/* E.HV only */ +#define SPECIAL_EXC_MAS8	11	/* E.HV only */ +#define SPECIAL_EXC_IRQHAPPENED	12 +#define SPECIAL_EXC_DEAR	13 +#define SPECIAL_EXC_ESR		14 +#define SPECIAL_EXC_SOFTE	15 +#define SPECIAL_EXC_CSRR0	16 +#define SPECIAL_EXC_CSRR1	17 +/* must be even to keep 16-byte stack alignment */ +#define SPECIAL_EXC_END		18 + +#define SPECIAL_EXC_FRAME_SIZE	(INT_FRAME_SIZE + SPECIAL_EXC_END * 8) +#define SPECIAL_EXC_FRAME_OFFS  (INT_FRAME_SIZE - 288) + +#define SPECIAL_EXC_STORE(reg, name) \ +	std	reg, (SPECIAL_EXC_##name * 8 + SPECIAL_EXC_FRAME_OFFS)(r1) + +#define SPECIAL_EXC_LOAD(reg, name) \ +	ld	reg, (SPECIAL_EXC_##name * 8 + SPECIAL_EXC_FRAME_OFFS)(r1) + +special_reg_save: +	lbz	r9,PACAIRQHAPPENED(r13) +	RECONCILE_IRQ_STATE(r3,r4) + +	/* +	 * We only need (or have stack space) to save this stuff if +	 * we interrupted the kernel. +	 */ +	ld	r3,_MSR(r1) +	andi.	r3,r3,MSR_PR +	bnelr + +	/* Copy info into temporary exception thread info */ +	ld	r11,PACAKSAVE(r13) +	CURRENT_THREAD_INFO(r11, r11) +	CURRENT_THREAD_INFO(r12, r1) +	ld	r10,TI_FLAGS(r11) +	std	r10,TI_FLAGS(r12) +	ld	r10,TI_PREEMPT(r11) +	std	r10,TI_PREEMPT(r12) +	ld	r10,TI_TASK(r11) +	std	r10,TI_TASK(r12) + +	/* +	 * Advance to the next TLB exception frame for handler +	 * types that don't do it automatically. +	 */ +	LOAD_REG_ADDR(r11,extlb_level_exc) +	lwz	r12,0(r11) +	mfspr	r10,SPRN_SPRG_TLB_EXFRAME +	add	r10,r10,r12 +	mtspr	SPRN_SPRG_TLB_EXFRAME,r10 + +	/* +	 * Save registers needed to allow nesting of certain exceptions +	 * (such as TLB misses) inside special exception levels +	 */ +	mfspr	r10,SPRN_SRR0 +	SPECIAL_EXC_STORE(r10,SRR0) +	mfspr	r10,SPRN_SRR1 +	SPECIAL_EXC_STORE(r10,SRR1) +	mfspr	r10,SPRN_SPRG_GEN_SCRATCH +	SPECIAL_EXC_STORE(r10,SPRG_GEN) +	mfspr	r10,SPRN_SPRG_TLB_SCRATCH +	SPECIAL_EXC_STORE(r10,SPRG_TLB) +	mfspr	r10,SPRN_MAS0 +	SPECIAL_EXC_STORE(r10,MAS0) +	mfspr	r10,SPRN_MAS1 +	SPECIAL_EXC_STORE(r10,MAS1) +	mfspr	r10,SPRN_MAS2 +	SPECIAL_EXC_STORE(r10,MAS2) +	mfspr	r10,SPRN_MAS3 +	SPECIAL_EXC_STORE(r10,MAS3) +	mfspr	r10,SPRN_MAS6 +	SPECIAL_EXC_STORE(r10,MAS6) +	mfspr	r10,SPRN_MAS7 +	SPECIAL_EXC_STORE(r10,MAS7) +BEGIN_FTR_SECTION +	mfspr	r10,SPRN_MAS5 +	SPECIAL_EXC_STORE(r10,MAS5) +	mfspr	r10,SPRN_MAS8 +	SPECIAL_EXC_STORE(r10,MAS8) + +	/* MAS5/8 could have inappropriate values if we interrupted KVM code */ +	li	r10,0 +	mtspr	SPRN_MAS5,r10 +	mtspr	SPRN_MAS8,r10 +END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) +	SPECIAL_EXC_STORE(r9,IRQHAPPENED) + +	mfspr	r10,SPRN_DEAR +	SPECIAL_EXC_STORE(r10,DEAR) +	mfspr	r10,SPRN_ESR +	SPECIAL_EXC_STORE(r10,ESR) + +	lbz	r10,PACASOFTIRQEN(r13) +	SPECIAL_EXC_STORE(r10,SOFTE) +	ld	r10,_NIP(r1) +	SPECIAL_EXC_STORE(r10,CSRR0) +	ld	r10,_MSR(r1) +	SPECIAL_EXC_STORE(r10,CSRR1) + +	blr + +ret_from_level_except: +	ld	r3,_MSR(r1) +	andi.	r3,r3,MSR_PR +	beq	1f +	b	ret_from_except +1: + +	LOAD_REG_ADDR(r11,extlb_level_exc) +	lwz	r12,0(r11) +	mfspr	r10,SPRN_SPRG_TLB_EXFRAME +	sub	r10,r10,r12 +	mtspr	SPRN_SPRG_TLB_EXFRAME,r10 + +	/* +	 * It's possible that the special level exception interrupted a +	 * TLB miss handler, and inserted the same entry that the +	 * interrupted handler was about to insert.  On CPUs without TLB +	 * write conditional, this can result in a duplicate TLB entry. +	 * Wipe all non-bolted entries to be safe. +	 * +	 * Note that this doesn't protect against any TLB misses +	 * we may take accessing the stack from here to the end of +	 * the special level exception.  It's not clear how we can +	 * reasonably protect against that, but only CPUs with +	 * neither TLB write conditional nor bolted kernel memory +	 * are affected.  Do any such CPUs even exist? +	 */ +	PPC_TLBILX_ALL(0,R0) + +	REST_NVGPRS(r1) + +	SPECIAL_EXC_LOAD(r10,SRR0) +	mtspr	SPRN_SRR0,r10 +	SPECIAL_EXC_LOAD(r10,SRR1) +	mtspr	SPRN_SRR1,r10 +	SPECIAL_EXC_LOAD(r10,SPRG_GEN) +	mtspr	SPRN_SPRG_GEN_SCRATCH,r10 +	SPECIAL_EXC_LOAD(r10,SPRG_TLB) +	mtspr	SPRN_SPRG_TLB_SCRATCH,r10 +	SPECIAL_EXC_LOAD(r10,MAS0) +	mtspr	SPRN_MAS0,r10 +	SPECIAL_EXC_LOAD(r10,MAS1) +	mtspr	SPRN_MAS1,r10 +	SPECIAL_EXC_LOAD(r10,MAS2) +	mtspr	SPRN_MAS2,r10 +	SPECIAL_EXC_LOAD(r10,MAS3) +	mtspr	SPRN_MAS3,r10 +	SPECIAL_EXC_LOAD(r10,MAS6) +	mtspr	SPRN_MAS6,r10 +	SPECIAL_EXC_LOAD(r10,MAS7) +	mtspr	SPRN_MAS7,r10 +BEGIN_FTR_SECTION +	SPECIAL_EXC_LOAD(r10,MAS5) +	mtspr	SPRN_MAS5,r10 +	SPECIAL_EXC_LOAD(r10,MAS8) +	mtspr	SPRN_MAS8,r10 +END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) + +	lbz	r6,PACASOFTIRQEN(r13) +	ld	r5,SOFTE(r1) + +	/* Interrupts had better not already be enabled... */ +	twnei	r6,0 + +	cmpwi	cr0,r5,0 +	beq	1f + +	TRACE_ENABLE_INTS +	stb	r5,PACASOFTIRQEN(r13) +1: +	/* +	 * Restore PACAIRQHAPPENED rather than setting it based on +	 * the return MSR[EE], since we could have interrupted +	 * __check_irq_replay() or other inconsistent transitory +	 * states that must remain that way. +	 */ +	SPECIAL_EXC_LOAD(r10,IRQHAPPENED) +	stb	r10,PACAIRQHAPPENED(r13) + +	SPECIAL_EXC_LOAD(r10,DEAR) +	mtspr	SPRN_DEAR,r10 +	SPECIAL_EXC_LOAD(r10,ESR) +	mtspr	SPRN_ESR,r10 + +	stdcx.	r0,0,r1		/* to clear the reservation */ + +	REST_4GPRS(2, r1) +	REST_4GPRS(6, r1) + +	ld	r10,_CTR(r1) +	ld	r11,_XER(r1) +	mtctr	r10 +	mtxer	r11 + +	blr + +.macro ret_from_level srr0 srr1 paca_ex scratch +	bl	ret_from_level_except + +	ld	r10,_LINK(r1) +	ld	r11,_CCR(r1) +	ld	r0,GPR13(r1) +	mtlr	r10 +	mtcr	r11 + +	ld	r10,GPR10(r1) +	ld	r11,GPR11(r1) +	ld	r12,GPR12(r1) +	mtspr	\scratch,r0 + +	std	r10,\paca_ex+EX_R10(r13); +	std	r11,\paca_ex+EX_R11(r13); +	ld	r10,_NIP(r1) +	ld	r11,_MSR(r1) +	ld	r0,GPR0(r1) +	ld	r1,GPR1(r1) +	mtspr	\srr0,r10 +	mtspr	\srr1,r11 +	ld	r10,\paca_ex+EX_R10(r13) +	ld	r11,\paca_ex+EX_R11(r13) +	mfspr	r13,\scratch +.endm + +ret_from_crit_except: +	ret_from_level SPRN_CSRR0 SPRN_CSRR1 PACA_EXCRIT SPRN_SPRG_CRIT_SCRATCH +	rfci + +ret_from_mc_except: +	ret_from_level SPRN_MCSRR0 SPRN_MCSRR1 PACA_EXMC SPRN_SPRG_MC_SCRATCH +	rfmci  /* Exception prolog code for all exceptions */  #define EXCEPTION_PROLOG(n, intnum, type, addition)	    		    \ @@ -42,7 +285,6 @@  	mfspr	r13,SPRN_SPRG_PACA;	/* get PACA */			    \  	std	r10,PACA_EX##type+EX_R10(r13);				    \  	std	r11,PACA_EX##type+EX_R11(r13);				    \ -	PROLOG_STORE_RESTORE_SCRATCH_##type;				    \  	mfcr	r10;			/* save CR */			    \  	mfspr	r11,SPRN_##type##_SRR1;/* what are we coming from */	    \  	DO_KVM	intnum,SPRN_##type##_SRR1;    /* KVM hook */		    \ @@ -69,19 +311,19 @@  #define CRIT_SET_KSTACK						            \  	ld	r1,PACA_CRIT_STACK(r13);				    \ -	subi	r1,r1,SPECIAL_EXC_FRAME_SIZE; +	subi	r1,r1,SPECIAL_EXC_FRAME_SIZE  #define SPRN_CRIT_SRR0	SPRN_CSRR0  #define SPRN_CRIT_SRR1	SPRN_CSRR1  #define DBG_SET_KSTACK						            \  	ld	r1,PACA_DBG_STACK(r13);					    \ -	subi	r1,r1,SPECIAL_EXC_FRAME_SIZE; +	subi	r1,r1,SPECIAL_EXC_FRAME_SIZE  #define SPRN_DBG_SRR0	SPRN_DSRR0  #define SPRN_DBG_SRR1	SPRN_DSRR1  #define MC_SET_KSTACK						            \  	ld	r1,PACA_MC_STACK(r13);					    \ -	subi	r1,r1,SPECIAL_EXC_FRAME_SIZE; +	subi	r1,r1,SPECIAL_EXC_FRAME_SIZE  #define SPRN_MC_SRR0	SPRN_MCSRR0  #define SPRN_MC_SRR1	SPRN_MCSRR1 @@ -100,20 +342,6 @@  #define GDBELL_EXCEPTION_PROLOG(n, intnum, addition)			    \  	EXCEPTION_PROLOG(n, intnum, GDBELL, addition##_GDBELL(n)) -/* - * Store user-visible scratch in PACA exception slots and restore proper value - */ -#define PROLOG_STORE_RESTORE_SCRATCH_GEN -#define PROLOG_STORE_RESTORE_SCRATCH_GDBELL -#define PROLOG_STORE_RESTORE_SCRATCH_DBG -#define PROLOG_STORE_RESTORE_SCRATCH_MC - -#define PROLOG_STORE_RESTORE_SCRATCH_CRIT				    \ -	mfspr	r10,SPRN_SPRG_CRIT_SCRATCH;	/* get r13 */		    \ -	std	r10,PACA_EXCRIT+EX_R13(r13);				    \ -	ld	r11,PACA_SPRG3(r13);					    \ -	mtspr	SPRN_SPRG_CRIT_SCRATCH,r11; -  /* Variants of the "addition" argument for the prolog   */  #define PROLOG_ADDITION_NONE_GEN(n) @@ -147,10 +375,8 @@  	std	r15,PACA_EXMC+EX_R15(r13) -/* Core exception code for all exceptions except TLB misses. - * XXX: Needs to make SPRN_SPRG_GEN depend on exception type - */ -#define EXCEPTION_COMMON(n, excf, ints)					    \ +/* Core exception code for all exceptions except TLB misses. */ +#define EXCEPTION_COMMON_LVL(n, scratch, excf)				    \  exc_##n##_common:							    \  	std	r0,GPR0(r1);		/* save r0 in stackframe */	    \  	std	r2,GPR2(r1);		/* save r2 in stackframe */	    \ @@ -163,7 +389,7 @@ exc_##n##_common:							    \  	ACCOUNT_CPU_USER_ENTRY(r10,r11);/* accounting (uses cr0+eq) */	    \  2:	ld	r3,excf+EX_R10(r13);	/* get back r10 */		    \  	ld	r4,excf+EX_R11(r13);	/* get back r11 */		    \ -	mfspr	r5,SPRN_SPRG_GEN_SCRATCH;/* get back r13 */		    \ +	mfspr	r5,scratch;		/* get back r13 */		    \  	std	r12,GPR12(r1);		/* save r12 in stackframe */	    \  	ld	r2,PACATOC(r13);	/* get kernel TOC into r2 */	    \  	mflr	r6;			/* save LR in stackframe */	    \ @@ -187,24 +413,29 @@ exc_##n##_common:							    \  	std	r11,SOFTE(r1);		/* and save it to stackframe */     \  	std	r12,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */	    \  	std	r3,_TRAP(r1);		/* set trap number		*/  \ -	std	r0,RESULT(r1);		/* clear regs->result */	    \ -	ints; +	std	r0,RESULT(r1);		/* clear regs->result */ -/* Variants for the "ints" argument. This one does nothing when we want - * to keep interrupts in their original state - */ -#define INTS_KEEP +#define EXCEPTION_COMMON(n) \ +	EXCEPTION_COMMON_LVL(n, SPRN_SPRG_GEN_SCRATCH, PACA_EXGEN) +#define EXCEPTION_COMMON_CRIT(n) \ +	EXCEPTION_COMMON_LVL(n, SPRN_SPRG_CRIT_SCRATCH, PACA_EXCRIT) +#define EXCEPTION_COMMON_MC(n) \ +	EXCEPTION_COMMON_LVL(n, SPRN_SPRG_MC_SCRATCH, PACA_EXMC) +#define EXCEPTION_COMMON_DBG(n) \ +	EXCEPTION_COMMON_LVL(n, SPRN_SPRG_DBG_SCRATCH, PACA_EXDBG) -/* This second version is meant for exceptions that don't immediately - * hard-enable. We set a bit in paca->irq_happened to ensure that - * a subsequent call to arch_local_irq_restore() will properly - * hard-enable and avoid the fast-path, and then reconcile irq state. +/* + * This is meant for exceptions that don't immediately hard-enable.  We + * set a bit in paca->irq_happened to ensure that a subsequent call to + * arch_local_irq_restore() will properly hard-enable and avoid the + * fast-path, and then reconcile irq state.   */  #define INTS_DISABLE	RECONCILE_IRQ_STATE(r3,r4) -/* This is called by exceptions that used INTS_KEEP (that did not touch - * irq indicators in the PACA). This will restore MSR:EE to it's previous - * value +/* + * This is called by exceptions that don't use INTS_DISABLE (that did not + * touch irq indicators in the PACA).  This will restore MSR:EE to it's + * previous value   *   * XXX In the long run, we may want to open-code it in order to separate the   *     load from the wrtee, thus limiting the latency caused by the dependency @@ -262,12 +493,13 @@ exc_##n##_bad_stack:							    \  #define MASKABLE_EXCEPTION(trapnum, intnum, label, hdlr, ack)		\  	START_EXCEPTION(label);						\  	NORMAL_EXCEPTION_PROLOG(trapnum, intnum, PROLOG_ADDITION_MASKABLE)\ -	EXCEPTION_COMMON(trapnum, PACA_EXGEN, INTS_DISABLE)		\ +	EXCEPTION_COMMON(trapnum)					\ +	INTS_DISABLE;							\  	ack(r8);							\  	CHECK_NAPPING();						\  	addi	r3,r1,STACK_FRAME_OVERHEAD;				\  	bl	hdlr;							\ -	b	.ret_from_except_lite; +	b	ret_from_except_lite;  /* This value is used to mark exception frames on the stack. */  	.section	".toc","aw" @@ -283,8 +515,8 @@ exception_marker:  	.balign	0x1000  	.globl interrupt_base_book3e  interrupt_base_book3e:					/* fake trap */ -	EXCEPTION_STUB(0x000, machine_check)		/* 0x0200 */ -	EXCEPTION_STUB(0x020, critical_input)		/* 0x0580 */ +	EXCEPTION_STUB(0x000, machine_check) +	EXCEPTION_STUB(0x020, critical_input)		/* 0x0100 */  	EXCEPTION_STUB(0x040, debug_crit)		/* 0x0d00 */  	EXCEPTION_STUB(0x060, data_storage)		/* 0x0300 */  	EXCEPTION_STUB(0x080, instruction_storage)	/* 0x0400 */ @@ -299,8 +531,8 @@ interrupt_base_book3e:					/* fake trap */  	EXCEPTION_STUB(0x1a0, watchdog)			/* 0x09f0 */  	EXCEPTION_STUB(0x1c0, data_tlb_miss)  	EXCEPTION_STUB(0x1e0, instruction_tlb_miss) -	EXCEPTION_STUB(0x200, altivec_unavailable)	/* 0x0f20 */ -	EXCEPTION_STUB(0x220, altivec_assist)		/* 0x1700 */ +	EXCEPTION_STUB(0x200, altivec_unavailable) +	EXCEPTION_STUB(0x220, altivec_assist)  	EXCEPTION_STUB(0x260, perfmon)  	EXCEPTION_STUB(0x280, doorbell)  	EXCEPTION_STUB(0x2a0, doorbell_crit) @@ -308,6 +540,7 @@ interrupt_base_book3e:					/* fake trap */  	EXCEPTION_STUB(0x2e0, guest_doorbell_crit)  	EXCEPTION_STUB(0x300, hypercall)  	EXCEPTION_STUB(0x320, ehpriv) +	EXCEPTION_STUB(0x340, lrat_error)  	.globl interrupt_end_book3e  interrupt_end_book3e: @@ -316,25 +549,25 @@ interrupt_end_book3e:  	START_EXCEPTION(critical_input);  	CRIT_EXCEPTION_PROLOG(0x100, BOOKE_INTERRUPT_CRITICAL,  			      PROLOG_ADDITION_NONE) -//	EXCEPTION_COMMON(0x100, PACA_EXCRIT, INTS_DISABLE) -//	bl	special_reg_save_crit -//	CHECK_NAPPING(); -//	addi	r3,r1,STACK_FRAME_OVERHEAD -//	bl	.critical_exception -//	b	ret_from_crit_except -	b	. +	EXCEPTION_COMMON_CRIT(0x100) +	bl	save_nvgprs +	bl	special_reg_save +	CHECK_NAPPING(); +	addi	r3,r1,STACK_FRAME_OVERHEAD +	bl	unknown_exception +	b	ret_from_crit_except  /* Machine Check Interrupt */  	START_EXCEPTION(machine_check); -	MC_EXCEPTION_PROLOG(0x200, BOOKE_INTERRUPT_MACHINE_CHECK, +	MC_EXCEPTION_PROLOG(0x000, BOOKE_INTERRUPT_MACHINE_CHECK,  			    PROLOG_ADDITION_NONE) -//	EXCEPTION_COMMON(0x200, PACA_EXMC, INTS_DISABLE) -//	bl	special_reg_save_mc -//	addi	r3,r1,STACK_FRAME_OVERHEAD -//	CHECK_NAPPING(); -//	bl	.machine_check_exception -//	b	ret_from_mc_except -	b	. +	EXCEPTION_COMMON_MC(0x000) +	bl	save_nvgprs +	bl	special_reg_save +	CHECK_NAPPING(); +	addi	r3,r1,STACK_FRAME_OVERHEAD +	bl	machine_check_exception +	b	ret_from_mc_except  /* Data Storage Interrupt */  	START_EXCEPTION(data_storage) @@ -342,7 +575,8 @@ interrupt_end_book3e:  				PROLOG_ADDITION_2REGS)  	mfspr	r14,SPRN_DEAR  	mfspr	r15,SPRN_ESR -	EXCEPTION_COMMON(0x300, PACA_EXGEN, INTS_DISABLE) +	EXCEPTION_COMMON(0x300) +	INTS_DISABLE  	b	storage_fault_common  /* Instruction Storage Interrupt */ @@ -351,12 +585,13 @@ interrupt_end_book3e:  				PROLOG_ADDITION_2REGS)  	li	r15,0  	mr	r14,r10 -	EXCEPTION_COMMON(0x400, PACA_EXGEN, INTS_DISABLE) +	EXCEPTION_COMMON(0x400) +	INTS_DISABLE  	b	storage_fault_common  /* External Input Interrupt */  	MASKABLE_EXCEPTION(0x500, BOOKE_INTERRUPT_EXTERNAL, -			   external_input, .do_IRQ, ACK_NONE) +			   external_input, do_IRQ, ACK_NONE)  /* Alignment */  	START_EXCEPTION(alignment); @@ -364,7 +599,7 @@ interrupt_end_book3e:  				PROLOG_ADDITION_2REGS)  	mfspr	r14,SPRN_DEAR  	mfspr	r15,SPRN_ESR -	EXCEPTION_COMMON(0x600, PACA_EXGEN, INTS_KEEP) +	EXCEPTION_COMMON(0x600)  	b	alignment_more	/* no room, go out of line */  /* Program Interrupt */ @@ -372,89 +607,96 @@ interrupt_end_book3e:  	NORMAL_EXCEPTION_PROLOG(0x700, BOOKE_INTERRUPT_PROGRAM,  				PROLOG_ADDITION_1REG)  	mfspr	r14,SPRN_ESR -	EXCEPTION_COMMON(0x700, PACA_EXGEN, INTS_DISABLE) +	EXCEPTION_COMMON(0x700) +	INTS_DISABLE  	std	r14,_DSISR(r1)  	addi	r3,r1,STACK_FRAME_OVERHEAD  	ld	r14,PACA_EXGEN+EX_R14(r13) -	bl	.save_nvgprs -	bl	.program_check_exception -	b	.ret_from_except +	bl	save_nvgprs +	bl	program_check_exception +	b	ret_from_except  /* Floating Point Unavailable Interrupt */  	START_EXCEPTION(fp_unavailable);  	NORMAL_EXCEPTION_PROLOG(0x800, BOOKE_INTERRUPT_FP_UNAVAIL,  				PROLOG_ADDITION_NONE)  	/* we can probably do a shorter exception entry for that one... */ -	EXCEPTION_COMMON(0x800, PACA_EXGEN, INTS_KEEP) +	EXCEPTION_COMMON(0x800)  	ld	r12,_MSR(r1)  	andi.	r0,r12,MSR_PR;  	beq-	1f -	bl	.load_up_fpu +	bl	load_up_fpu  	b	fast_exception_return  1:	INTS_DISABLE -	bl	.save_nvgprs +	bl	save_nvgprs  	addi	r3,r1,STACK_FRAME_OVERHEAD -	bl	.kernel_fp_unavailable_exception -	b	.ret_from_except +	bl	kernel_fp_unavailable_exception +	b	ret_from_except  /* Altivec Unavailable Interrupt */  	START_EXCEPTION(altivec_unavailable); -	NORMAL_EXCEPTION_PROLOG(0x200, BOOKE_INTERRUPT_ALTIVEC_UNAVAIL, +	NORMAL_EXCEPTION_PROLOG(0x200, BOOKE_INTERRUPT_SPE_ALTIVEC_UNAVAIL,  				PROLOG_ADDITION_NONE)  	/* we can probably do a shorter exception entry for that one... */ -	EXCEPTION_COMMON(0x200, PACA_EXGEN, INTS_KEEP) +	EXCEPTION_COMMON(0x200)  #ifdef CONFIG_ALTIVEC  BEGIN_FTR_SECTION  	ld	r12,_MSR(r1)  	andi.	r0,r12,MSR_PR;  	beq-	1f -	bl	.load_up_altivec +	bl	load_up_altivec  	b	fast_exception_return  1:  END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)  #endif  	INTS_DISABLE -	bl	.save_nvgprs +	bl	save_nvgprs  	addi	r3,r1,STACK_FRAME_OVERHEAD -	bl	.altivec_unavailable_exception -	b	.ret_from_except +	bl	altivec_unavailable_exception +	b	ret_from_except  /* AltiVec Assist */  	START_EXCEPTION(altivec_assist); -	NORMAL_EXCEPTION_PROLOG(0x220, BOOKE_INTERRUPT_ALTIVEC_ASSIST, +	NORMAL_EXCEPTION_PROLOG(0x220, +				BOOKE_INTERRUPT_SPE_FP_DATA_ALTIVEC_ASSIST,  				PROLOG_ADDITION_NONE) -	EXCEPTION_COMMON(0x220, PACA_EXGEN, INTS_DISABLE) -	bl	.save_nvgprs +	EXCEPTION_COMMON(0x220) +	INTS_DISABLE +	bl	save_nvgprs  	addi	r3,r1,STACK_FRAME_OVERHEAD  #ifdef CONFIG_ALTIVEC  BEGIN_FTR_SECTION -	bl	.altivec_assist_exception +	bl	altivec_assist_exception  END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)  #else -	bl	.unknown_exception +	bl	unknown_exception  #endif -	b	.ret_from_except +	b	ret_from_except  /* Decrementer Interrupt */  	MASKABLE_EXCEPTION(0x900, BOOKE_INTERRUPT_DECREMENTER, -			   decrementer, .timer_interrupt, ACK_DEC) +			   decrementer, timer_interrupt, ACK_DEC)  /* Fixed Interval Timer Interrupt */  	MASKABLE_EXCEPTION(0x980, BOOKE_INTERRUPT_FIT, -			   fixed_interval, .unknown_exception, ACK_FIT) +			   fixed_interval, unknown_exception, ACK_FIT)  /* Watchdog Timer Interrupt */  	START_EXCEPTION(watchdog);  	CRIT_EXCEPTION_PROLOG(0x9f0, BOOKE_INTERRUPT_WATCHDOG,  			      PROLOG_ADDITION_NONE) -//	EXCEPTION_COMMON(0x9f0, PACA_EXCRIT, INTS_DISABLE) -//	bl	special_reg_save_crit -//	CHECK_NAPPING(); -//	addi	r3,r1,STACK_FRAME_OVERHEAD -//	bl	.unknown_exception -//	b	ret_from_crit_except -	b	. +	EXCEPTION_COMMON_CRIT(0x9f0) +	bl	save_nvgprs +	bl	special_reg_save +	CHECK_NAPPING(); +	addi	r3,r1,STACK_FRAME_OVERHEAD +#ifdef CONFIG_BOOKE_WDT +	bl	WatchdogException +#else +	bl	unknown_exception +#endif +	b	ret_from_crit_except  /* System Call Interrupt */  	START_EXCEPTION(system_call) @@ -468,11 +710,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)  	START_EXCEPTION(ap_unavailable);  	NORMAL_EXCEPTION_PROLOG(0xf20, BOOKE_INTERRUPT_AP_UNAVAIL,  				PROLOG_ADDITION_NONE) -	EXCEPTION_COMMON(0xf20, PACA_EXGEN, INTS_DISABLE) -	bl	.save_nvgprs +	EXCEPTION_COMMON(0xf20) +	INTS_DISABLE +	bl	save_nvgprs  	addi	r3,r1,STACK_FRAME_OVERHEAD -	bl	.unknown_exception -	b	.ret_from_except +	bl	unknown_exception +	b	ret_from_except  /* Debug exception as a critical interrupt*/  	START_EXCEPTION(debug_crit); @@ -511,7 +754,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)  	mtcr	r10  	ld	r10,PACA_EXCRIT+EX_R10(r13)	/* restore registers */  	ld	r11,PACA_EXCRIT+EX_R11(r13) -	ld	r13,PACA_EXCRIT+EX_R13(r13) +	mfspr	r13,SPRN_SPRG_CRIT_SCRATCH  	rfci  	/* Normal debug exception */ @@ -524,18 +767,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)  	/* Now we mash up things to make it look like we are coming on a  	 * normal exception  	 */ -	ld	r15,PACA_EXCRIT+EX_R13(r13) -	mtspr	SPRN_SPRG_GEN_SCRATCH,r15  	mfspr	r14,SPRN_DBSR -	EXCEPTION_COMMON(0xd00, PACA_EXCRIT, INTS_DISABLE) +	EXCEPTION_COMMON_CRIT(0xd00)  	std	r14,_DSISR(r1)  	addi	r3,r1,STACK_FRAME_OVERHEAD  	mr	r4,r14  	ld	r14,PACA_EXCRIT+EX_R14(r13)  	ld	r15,PACA_EXCRIT+EX_R15(r13) -	bl	.save_nvgprs -	bl	.DebugException -	b	.ret_from_except +	bl	save_nvgprs +	bl	DebugException +	b	ret_from_except  kernel_dbg_exc:  	b	.	/* NYI */ @@ -590,42 +831,43 @@ kernel_dbg_exc:  	/* Now we mash up things to make it look like we are coming on a  	 * normal exception  	 */ -	mfspr	r15,SPRN_SPRG_DBG_SCRATCH -	mtspr	SPRN_SPRG_GEN_SCRATCH,r15  	mfspr	r14,SPRN_DBSR -	EXCEPTION_COMMON(0xd08, PACA_EXDBG, INTS_DISABLE) +	EXCEPTION_COMMON_DBG(0xd08) +	INTS_DISABLE  	std	r14,_DSISR(r1)  	addi	r3,r1,STACK_FRAME_OVERHEAD  	mr	r4,r14  	ld	r14,PACA_EXDBG+EX_R14(r13)  	ld	r15,PACA_EXDBG+EX_R15(r13) -	bl	.save_nvgprs -	bl	.DebugException -	b	.ret_from_except +	bl	save_nvgprs +	bl	DebugException +	b	ret_from_except  	START_EXCEPTION(perfmon);  	NORMAL_EXCEPTION_PROLOG(0x260, BOOKE_INTERRUPT_PERFORMANCE_MONITOR,  				PROLOG_ADDITION_NONE) -	EXCEPTION_COMMON(0x260, PACA_EXGEN, INTS_DISABLE) +	EXCEPTION_COMMON(0x260) +	INTS_DISABLE +	CHECK_NAPPING()  	addi	r3,r1,STACK_FRAME_OVERHEAD -	bl	.performance_monitor_exception -	b	.ret_from_except_lite +	bl	performance_monitor_exception +	b	ret_from_except_lite  /* Doorbell interrupt */  	MASKABLE_EXCEPTION(0x280, BOOKE_INTERRUPT_DOORBELL, -			   doorbell, .doorbell_exception, ACK_NONE) +			   doorbell, doorbell_exception, ACK_NONE)  /* Doorbell critical Interrupt */  	START_EXCEPTION(doorbell_crit);  	CRIT_EXCEPTION_PROLOG(0x2a0, BOOKE_INTERRUPT_DOORBELL_CRITICAL,  			      PROLOG_ADDITION_NONE) -//	EXCEPTION_COMMON(0x2a0, PACA_EXCRIT, INTS_DISABLE) -//	bl	special_reg_save_crit -//	CHECK_NAPPING(); -//	addi	r3,r1,STACK_FRAME_OVERHEAD -//	bl	.doorbell_critical_exception -//	b	ret_from_crit_except -	b	. +	EXCEPTION_COMMON_CRIT(0x2a0) +	bl	save_nvgprs +	bl	special_reg_save +	CHECK_NAPPING(); +	addi	r3,r1,STACK_FRAME_OVERHEAD +	bl	unknown_exception +	b	ret_from_crit_except  /*   *	Guest doorbell interrupt @@ -634,41 +876,52 @@ kernel_dbg_exc:  	START_EXCEPTION(guest_doorbell);  	GDBELL_EXCEPTION_PROLOG(0x2c0, BOOKE_INTERRUPT_GUEST_DBELL,  			        PROLOG_ADDITION_NONE) -	EXCEPTION_COMMON(0x2c0, PACA_EXGEN, INTS_KEEP) +	EXCEPTION_COMMON(0x2c0)  	addi	r3,r1,STACK_FRAME_OVERHEAD -	bl	.save_nvgprs +	bl	save_nvgprs  	INTS_RESTORE_HARD -	bl	.unknown_exception -	b	.ret_from_except +	bl	unknown_exception +	b	ret_from_except  /* Guest Doorbell critical Interrupt */  	START_EXCEPTION(guest_doorbell_crit);  	CRIT_EXCEPTION_PROLOG(0x2e0, BOOKE_INTERRUPT_GUEST_DBELL_CRIT,  			      PROLOG_ADDITION_NONE) -//	EXCEPTION_COMMON(0x2e0, PACA_EXCRIT, INTS_DISABLE) -//	bl	special_reg_save_crit -//	CHECK_NAPPING(); -//	addi	r3,r1,STACK_FRAME_OVERHEAD -//	bl	.guest_doorbell_critical_exception -//	b	ret_from_crit_except -	b	. +	EXCEPTION_COMMON_CRIT(0x2e0) +	bl	save_nvgprs +	bl	special_reg_save +	CHECK_NAPPING(); +	addi	r3,r1,STACK_FRAME_OVERHEAD +	bl	unknown_exception +	b	ret_from_crit_except  /* Hypervisor call */  	START_EXCEPTION(hypercall);  	NORMAL_EXCEPTION_PROLOG(0x310, BOOKE_INTERRUPT_HV_SYSCALL,  			        PROLOG_ADDITION_NONE) -	EXCEPTION_COMMON(0x310, PACA_EXGEN, INTS_KEEP) +	EXCEPTION_COMMON(0x310)  	addi	r3,r1,STACK_FRAME_OVERHEAD -	bl	.save_nvgprs +	bl	save_nvgprs  	INTS_RESTORE_HARD -	bl	.unknown_exception -	b	.ret_from_except +	bl	unknown_exception +	b	ret_from_except  /* Embedded Hypervisor priviledged  */  	START_EXCEPTION(ehpriv);  	NORMAL_EXCEPTION_PROLOG(0x320, BOOKE_INTERRUPT_HV_PRIV,  			        PROLOG_ADDITION_NONE) -	EXCEPTION_COMMON(0x320, PACA_EXGEN, INTS_KEEP) +	EXCEPTION_COMMON(0x320) +	addi	r3,r1,STACK_FRAME_OVERHEAD +	bl	save_nvgprs +	INTS_RESTORE_HARD +	bl	unknown_exception +	b	ret_from_except + +/* LRAT Error interrupt */ +	START_EXCEPTION(lrat_error); +	NORMAL_EXCEPTION_PROLOG(0x340, BOOKE_INTERRUPT_LRAT_ERROR, +			        PROLOG_ADDITION_NONE) +	EXCEPTION_COMMON(0x340)  	addi	r3,r1,STACK_FRAME_OVERHEAD  	bl	.save_nvgprs  	INTS_RESTORE_HARD @@ -761,16 +1014,16 @@ storage_fault_common:  	mr	r5,r15  	ld	r14,PACA_EXGEN+EX_R14(r13)  	ld	r15,PACA_EXGEN+EX_R15(r13) -	bl	.do_page_fault +	bl	do_page_fault  	cmpdi	r3,0  	bne-	1f -	b	.ret_from_except_lite -1:	bl	.save_nvgprs +	b	ret_from_except_lite +1:	bl	save_nvgprs  	mr	r5,r3  	addi	r3,r1,STACK_FRAME_OVERHEAD  	ld	r4,_DAR(r1) -	bl	.bad_page_fault -	b	.ret_from_except +	bl	bad_page_fault +	b	ret_from_except  /*   * Alignment exception doesn't fit entirely in the 0x100 bytes so it @@ -782,10 +1035,10 @@ alignment_more:  	addi	r3,r1,STACK_FRAME_OVERHEAD  	ld	r14,PACA_EXGEN+EX_R14(r13)  	ld	r15,PACA_EXGEN+EX_R15(r13) -	bl	.save_nvgprs +	bl	save_nvgprs  	INTS_RESTORE_HARD -	bl	.alignment_exception -	b	.ret_from_except +	bl	alignment_exception +	b	ret_from_except  /*   * We branch here from entry_64.S for the last stage of the exception @@ -857,6 +1110,7 @@ BAD_STACK_TRAMPOLINE(0x2e0)  BAD_STACK_TRAMPOLINE(0x300)  BAD_STACK_TRAMPOLINE(0x310)  BAD_STACK_TRAMPOLINE(0x320) +BAD_STACK_TRAMPOLINE(0x340)  BAD_STACK_TRAMPOLINE(0x400)  BAD_STACK_TRAMPOLINE(0x500)  BAD_STACK_TRAMPOLINE(0x600) @@ -918,7 +1172,7 @@ bad_stack_book3e:  	std	r12,0(r11)  	ld	r2,PACATOC(r13)  1:	addi	r3,r1,STACK_FRAME_OVERHEAD -	bl	.kernel_bad_stack +	bl	kernel_bad_stack  	b	1b  /* @@ -1053,12 +1307,9 @@ skpinv:	addi	r6,r6,1				/* Increment */  	mtspr	SPRN_MAS0,r3  	tlbre  	mfspr	r6,SPRN_MAS1 -	rlwinm	r6,r6,0,2,0	/* clear IPROT */ +	rlwinm	r6,r6,0,2,31	/* clear IPROT and VALID */  	mtspr	SPRN_MAS1,r6  	tlbwe - -	/* Invalidate TLB1 */ -	PPC_TLBILX_ALL(0,R0)  	sync  	isync @@ -1112,12 +1363,9 @@ skpinv:	addi	r6,r6,1				/* Increment */  	mtspr	SPRN_MAS0,r4  	tlbre  	mfspr	r5,SPRN_MAS1 -	rlwinm	r5,r5,0,2,0	/* clear IPROT */ +	rlwinm	r5,r5,0,2,31	/* clear IPROT and VALID */  	mtspr	SPRN_MAS1,r5  	tlbwe - -	/* Invalidate TLB1 */ -	PPC_TLBILX_ALL(0,R0)  	sync  	isync @@ -1219,22 +1467,6 @@ a2_tlbinit_after_linear_map:  	.globl  a2_tlbinit_after_iprot_flush  a2_tlbinit_after_iprot_flush: -#ifdef CONFIG_PPC_EARLY_DEBUG_WSP -	/* Now establish early debug mappings if applicable */ -	/* Restore the MAS0 we used for linear mapping load */ -	mtspr	SPRN_MAS0,r11 - -	lis	r3,(MAS1_VALID | MAS1_IPROT)@h -	ori	r3,r3,(BOOK3E_PAGESZ_4K << MAS1_TSIZE_SHIFT) -	mtspr	SPRN_MAS1,r3 -	LOAD_REG_IMMEDIATE(r3, WSP_UART_VIRT | MAS2_I | MAS2_G) -	mtspr	SPRN_MAS2,r3 -	LOAD_REG_IMMEDIATE(r3, WSP_UART_PHYS | MAS3_SR | MAS3_SW) -	mtspr	SPRN_MAS7_MAS3,r3 -	/* re-use the MAS8 value from the linear mapping */ -	tlbwe -#endif /* CONFIG_PPC_EARLY_DEBUG_WSP */ -  	PPC_TLBILX(0,0,R0)  	sync  	isync @@ -1273,13 +1505,13 @@ _GLOBAL(start_initialization_book3e)  	 * and always use AS 0, so we just set it up to match our link  	 * address and never use 0 based addresses.  	 */ -	bl	.initial_tlb_book3e +	bl	initial_tlb_book3e  	/* Init global core bits */ -	bl	.init_core_book3e +	bl	init_core_book3e  	/* Init per-thread bits */ -	bl	.init_thread_book3e +	bl	init_thread_book3e  	/* Return to common init code */  	tovirt(r28,r28) @@ -1300,7 +1532,7 @@ _GLOBAL(start_initialization_book3e)   */  _GLOBAL(book3e_secondary_core_init_tlb_set)  	li	r4,1 -	b	.generic_secondary_smp_init +	b	generic_secondary_smp_init  _GLOBAL(book3e_secondary_core_init)  	mflr	r28 @@ -1310,18 +1542,18 @@ _GLOBAL(book3e_secondary_core_init)  	bne	2f  	/* Setup TLB for this core */ -	bl	.initial_tlb_book3e +	bl	initial_tlb_book3e  	/* We can return from the above running at a different  	 * address, so recalculate r2 (TOC)  	 */ -	bl	.relative_toc +	bl	relative_toc  	/* Init global core bits */ -2:	bl	.init_core_book3e +2:	bl	init_core_book3e  	/* Init per-thread bits */ -3:	bl	.init_thread_book3e +3:	bl	init_thread_book3e  	/* Return to common init code at proper virtual address.  	 * @@ -1348,14 +1580,14 @@ _GLOBAL(book3e_secondary_thread_init)  	mflr	r28  	b	3b -_STATIC(init_core_book3e) +init_core_book3e:  	/* Establish the interrupt vector base */  	LOAD_REG_IMMEDIATE(r3, interrupt_base_book3e)  	mtspr	SPRN_IVPR,r3  	sync  	blr -_STATIC(init_thread_book3e) +init_thread_book3e:  	lis	r3,(SPRN_EPCR_ICM | SPRN_EPCR_GICM)@h  	mtspr	SPRN_EPCR,r3 @@ -1412,3 +1644,7 @@ _GLOBAL(setup_ehv_ivors)  	SET_IVOR(38, 0x2c0) /* Guest Processor Doorbell */  	SET_IVOR(39, 0x2e0) /* Guest Processor Doorbell Crit/MC */  	blr + +_GLOBAL(setup_lrat_ivor) +	SET_IVOR(42, 0x340) /* LRAT Error */ +	blr diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 3a9ed6ac224..a7d36b19221 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -54,14 +54,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)				\  	xori	r12,r12,MSR_LE ;				\  	mtspr	SPRN_SRR1,r12 ;					\  	rfid ;		/* return to userspace */		\ -	b	. ;						\ -2:	mfspr	r12,SPRN_SRR1 ;					\ -	andi.	r12,r12,MSR_PR ;				\ -	bne	0b ;						\ -	mtspr	SPRN_SRR0,r3 ;					\ -	mtspr	SPRN_SRR1,r4 ;					\ -	mtspr	SPRN_SDR1,r5 ;					\ -	rfid ;							\  	b	. ;	/* prevent speculative execution */  #if defined(CONFIG_RELOCATABLE) @@ -121,12 +113,13 @@ BEGIN_FTR_SECTION  	cmpwi	cr1,r13,2  	/* Total loss of HV state is fatal, we could try to use the  	 * PIR to locate a PACA, then use an emergency stack etc... -	 * but for now, let's just stay stuck here +	 * OPAL v3 based powernv platforms have new idle states +	 * which fall in this catagory.  	 */ -	bgt	cr1,. +	bgt	cr1,8f  	GET_PACA(r13) -#ifdef CONFIG_KVM_BOOK3S_64_HV +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE  	li	r0,KVM_HWTHREAD_IN_KERNEL  	stb	r0,HSTATE_HWTHREAD_STATE(r13)  	/* Order setting hwthread_state vs. testing hwthread_req */ @@ -139,8 +132,13 @@ BEGIN_FTR_SECTION  #endif  	beq	cr1,2f -	b	.power7_wakeup_noloss -2:	b	.power7_wakeup_loss +	b	power7_wakeup_noloss +2:	b	power7_wakeup_loss + +	/* Fast Sleep wakeup on PowerNV */ +8:	GET_PACA(r13) +	b 	power7_wakeup_tb_loss +  9:  END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)  #endif /* CONFIG_PPC_P7_NAP */ @@ -155,8 +153,35 @@ machine_check_pSeries_1:  	 */  	HMT_MEDIUM_PPR_DISCARD  	SET_SCRATCH0(r13)		/* save r13 */ +#ifdef CONFIG_PPC_P7_NAP +BEGIN_FTR_SECTION +	/* Running native on arch 2.06 or later, check if we are +	 * waking up from nap. We only handle no state loss and +	 * supervisor state loss. We do -not- handle hypervisor +	 * state loss at this time. +	 */ +	mfspr	r13,SPRN_SRR1 +	rlwinm.	r13,r13,47-31,30,31 +	OPT_GET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR) +	beq	9f + +	mfspr	r13,SPRN_SRR1 +	rlwinm.	r13,r13,47-31,30,31 +	/* waking up from powersave (nap) state */ +	cmpwi	cr1,r13,2 +	/* Total loss of HV state is fatal. let's just stay stuck here */ +	OPT_GET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR) +	bgt	cr1,. +9: +	OPT_SET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR) +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) +#endif /* CONFIG_PPC_P7_NAP */  	EXCEPTION_PROLOG_0(PACA_EXMC) +BEGIN_FTR_SECTION +	b	machine_check_pSeries_early +FTR_SECTION_ELSE  	b	machine_check_pSeries_0 +ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)  	. = 0x300  	.globl data_access_pSeries @@ -186,16 +211,16 @@ data_access_slb_pSeries:  #endif /* __DISABLED__ */  	mfspr	r12,SPRN_SRR1  #ifndef CONFIG_RELOCATABLE -	b	.slb_miss_realmode +	b	slb_miss_realmode  #else  	/* -	 * We can't just use a direct branch to .slb_miss_realmode +	 * We can't just use a direct branch to slb_miss_realmode  	 * because the distance from here to there depends on where  	 * the kernel ends up being put.  	 */  	mfctr	r11  	ld	r10,PACAKBASE(r13) -	LOAD_HANDLER(r10, .slb_miss_realmode) +	LOAD_HANDLER(r10, slb_miss_realmode)  	mtctr	r10  	bctr  #endif @@ -218,11 +243,11 @@ instruction_access_slb_pSeries:  #endif /* __DISABLED__ */  	mfspr	r12,SPRN_SRR1  #ifndef CONFIG_RELOCATABLE -	b	.slb_miss_realmode +	b	slb_miss_realmode  #else  	mfctr	r11  	ld	r10,PACAKBASE(r13) -	LOAD_HANDLER(r10, .slb_miss_realmode) +	LOAD_HANDLER(r10, slb_miss_realmode)  	mtctr	r10  	bctr  #endif @@ -405,6 +430,80 @@ denorm_exception_hv:  	.align	7  	/* moved from 0x200 */ +machine_check_pSeries_early: +BEGIN_FTR_SECTION +	EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200) +	/* +	 * Register contents: +	 * R13		= PACA +	 * R9		= CR +	 * Original R9 to R13 is saved on PACA_EXMC +	 * +	 * Switch to mc_emergency stack and handle re-entrancy (we limit +	 * the nested MCE upto level 4 to avoid stack overflow). +	 * Save MCE registers srr1, srr0, dar and dsisr and then set ME=1 +	 * +	 * We use paca->in_mce to check whether this is the first entry or +	 * nested machine check. We increment paca->in_mce to track nested +	 * machine checks. +	 * +	 * If this is the first entry then set stack pointer to +	 * paca->mc_emergency_sp, otherwise r1 is already pointing to +	 * stack frame on mc_emergency stack. +	 * +	 * NOTE: We are here with MSR_ME=0 (off), which means we risk a +	 * checkstop if we get another machine check exception before we do +	 * rfid with MSR_ME=1. +	 */ +	mr	r11,r1			/* Save r1 */ +	lhz	r10,PACA_IN_MCE(r13) +	cmpwi	r10,0			/* Are we in nested machine check */ +	bne	0f			/* Yes, we are. */ +	/* First machine check entry */ +	ld	r1,PACAMCEMERGSP(r13)	/* Use MC emergency stack */ +0:	subi	r1,r1,INT_FRAME_SIZE	/* alloc stack frame */ +	addi	r10,r10,1		/* increment paca->in_mce */ +	sth	r10,PACA_IN_MCE(r13) +	/* Limit nested MCE to level 4 to avoid stack overflow */ +	cmpwi	r10,4 +	bgt	2f			/* Check if we hit limit of 4 */ +	std	r11,GPR1(r1)		/* Save r1 on the stack. */ +	std	r11,0(r1)		/* make stack chain pointer */ +	mfspr	r11,SPRN_SRR0		/* Save SRR0 */ +	std	r11,_NIP(r1) +	mfspr	r11,SPRN_SRR1		/* Save SRR1 */ +	std	r11,_MSR(r1) +	mfspr	r11,SPRN_DAR		/* Save DAR */ +	std	r11,_DAR(r1) +	mfspr	r11,SPRN_DSISR		/* Save DSISR */ +	std	r11,_DSISR(r1) +	std	r9,_CCR(r1)		/* Save CR in stackframe */ +	/* Save r9 through r13 from EXMC save area to stack frame. */ +	EXCEPTION_PROLOG_COMMON_2(PACA_EXMC) +	mfmsr	r11			/* get MSR value */ +	ori	r11,r11,MSR_ME		/* turn on ME bit */ +	ori	r11,r11,MSR_RI		/* turn on RI bit */ +	ld	r12,PACAKBASE(r13)	/* get high part of &label */ +	LOAD_HANDLER(r12, machine_check_handle_early) +1:	mtspr	SPRN_SRR0,r12 +	mtspr	SPRN_SRR1,r11 +	rfid +	b	.	/* prevent speculative execution */ +2: +	/* Stack overflow. Stay on emergency stack and panic. +	 * Keep the ME bit off while panic-ing, so that if we hit +	 * another machine check we checkstop. +	 */ +	addi	r1,r1,INT_FRAME_SIZE	/* go back to previous stack frame */ +	ld	r11,PACAKMSR(r13) +	ld	r12,PACAKBASE(r13) +	LOAD_HANDLER(r12, unrecover_mce) +	li	r10,MSR_ME +	andc	r11,r11,r10		/* Turn off MSR_ME */ +	b	1b +	b	.	/* prevent speculative execution */ +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) +  machine_check_pSeries:  	.globl machine_check_fwnmi  machine_check_fwnmi: @@ -425,7 +524,7 @@ data_access_check_stab:  	mfspr	r9,SPRN_DSISR  	srdi	r10,r10,60  	rlwimi	r10,r9,16,0x20 -#ifdef CONFIG_KVM_BOOK3S_PR +#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE  	lbz	r9,HSTATE_IN_GUEST(r13)  	rlwimi	r10,r9,8,0x300  #endif @@ -441,7 +540,7 @@ do_stab_bolted_pSeries:  	std	r12,PACA_EXSLB+EX_R12(r13)  	GET_SCRATCH0(r10)  	std	r10,PACA_EXSLB+EX_R13(r13) -	EXCEPTION_PROLOG_PSERIES_1(.do_stab_bolted, EXC_STD) +	EXCEPTION_PROLOG_PSERIES_1(do_stab_bolted, EXC_STD)  	KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x300)  	KVM_HANDLER_SKIP(PACA_EXSLB, EXC_STD, 0x380) @@ -650,6 +749,32 @@ slb_miss_user_pseries:  	b	.				/* prevent spec. execution */  #endif /* __DISABLED__ */ +#ifdef CONFIG_KVM_BOOK3S_64_HANDLER +kvmppc_skip_interrupt: +	/* +	 * Here all GPRs are unchanged from when the interrupt happened +	 * except for r13, which is saved in SPRG_SCRATCH0. +	 */ +	mfspr	r13, SPRN_SRR0 +	addi	r13, r13, 4 +	mtspr	SPRN_SRR0, r13 +	GET_SCRATCH0(r13) +	rfid +	b	. + +kvmppc_skip_Hinterrupt: +	/* +	 * Here all GPRs are unchanged from when the interrupt happened +	 * except for r13, which is saved in SPRG_SCRATCH0. +	 */ +	mfspr	r13, SPRN_HSRR0 +	addi	r13, r13, 4 +	mtspr	SPRN_HSRR0, r13 +	GET_SCRATCH0(r13) +	hrfid +	b	. +#endif +  /*   * Code from here down to __end_handlers is invoked from the   * exception prologs above.  Because the prologs assemble the @@ -660,62 +785,38 @@ slb_miss_user_pseries:  /*** Common interrupt handlers ***/ -	STD_EXCEPTION_COMMON(0x100, system_reset, .system_reset_exception) - -	/* -	 * Machine check is different because we use a different -	 * save area: PACA_EXMC instead of PACA_EXGEN. -	 */ -	.align	7 -	.globl machine_check_common -machine_check_common: - -	mfspr	r10,SPRN_DAR -	std	r10,PACA_EXGEN+EX_DAR(r13) -	mfspr	r10,SPRN_DSISR -	stw	r10,PACA_EXGEN+EX_DSISR(r13) -	EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC) -	FINISH_NAP -	DISABLE_INTS -	ld	r3,PACA_EXGEN+EX_DAR(r13) -	lwz	r4,PACA_EXGEN+EX_DSISR(r13) -	std	r3,_DAR(r1) -	std	r4,_DSISR(r1) -	bl	.save_nvgprs -	addi	r3,r1,STACK_FRAME_OVERHEAD -	bl	.machine_check_exception -	b	.ret_from_except +	STD_EXCEPTION_COMMON(0x100, system_reset, system_reset_exception)  	STD_EXCEPTION_COMMON_ASYNC(0x500, hardware_interrupt, do_IRQ) -	STD_EXCEPTION_COMMON_ASYNC(0x900, decrementer, .timer_interrupt) -	STD_EXCEPTION_COMMON(0x980, hdecrementer, .hdec_interrupt) +	STD_EXCEPTION_COMMON_ASYNC(0x900, decrementer, timer_interrupt) +	STD_EXCEPTION_COMMON(0x980, hdecrementer, hdec_interrupt)  #ifdef CONFIG_PPC_DOORBELL -	STD_EXCEPTION_COMMON_ASYNC(0xa00, doorbell_super, .doorbell_exception) +	STD_EXCEPTION_COMMON_ASYNC(0xa00, doorbell_super, doorbell_exception)  #else -	STD_EXCEPTION_COMMON_ASYNC(0xa00, doorbell_super, .unknown_exception) +	STD_EXCEPTION_COMMON_ASYNC(0xa00, doorbell_super, unknown_exception)  #endif -	STD_EXCEPTION_COMMON(0xb00, trap_0b, .unknown_exception) -	STD_EXCEPTION_COMMON(0xd00, single_step, .single_step_exception) -	STD_EXCEPTION_COMMON(0xe00, trap_0e, .unknown_exception) -	STD_EXCEPTION_COMMON(0xe40, emulation_assist, .emulation_assist_interrupt) -	STD_EXCEPTION_COMMON(0xe60, hmi_exception, .unknown_exception) +	STD_EXCEPTION_COMMON(0xb00, trap_0b, unknown_exception) +	STD_EXCEPTION_COMMON(0xd00, single_step, single_step_exception) +	STD_EXCEPTION_COMMON(0xe00, trap_0e, unknown_exception) +	STD_EXCEPTION_COMMON(0xe40, emulation_assist, emulation_assist_interrupt) +	STD_EXCEPTION_COMMON(0xe60, hmi_exception, unknown_exception)  #ifdef CONFIG_PPC_DOORBELL -	STD_EXCEPTION_COMMON_ASYNC(0xe80, h_doorbell, .doorbell_exception) +	STD_EXCEPTION_COMMON_ASYNC(0xe80, h_doorbell, doorbell_exception)  #else -	STD_EXCEPTION_COMMON_ASYNC(0xe80, h_doorbell, .unknown_exception) +	STD_EXCEPTION_COMMON_ASYNC(0xe80, h_doorbell, unknown_exception)  #endif -	STD_EXCEPTION_COMMON_ASYNC(0xf00, performance_monitor, .performance_monitor_exception) -	STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, .instruction_breakpoint_exception) -	STD_EXCEPTION_COMMON(0x1502, denorm, .unknown_exception) +	STD_EXCEPTION_COMMON_ASYNC(0xf00, performance_monitor, performance_monitor_exception) +	STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, instruction_breakpoint_exception) +	STD_EXCEPTION_COMMON(0x1502, denorm, unknown_exception)  #ifdef CONFIG_ALTIVEC -	STD_EXCEPTION_COMMON(0x1700, altivec_assist, .altivec_assist_exception) +	STD_EXCEPTION_COMMON(0x1700, altivec_assist, altivec_assist_exception)  #else -	STD_EXCEPTION_COMMON(0x1700, altivec_assist, .unknown_exception) +	STD_EXCEPTION_COMMON(0x1700, altivec_assist, unknown_exception)  #endif  #ifdef CONFIG_CBE_RAS -	STD_EXCEPTION_COMMON(0x1200, cbe_system_error, .cbe_system_error_exception) -	STD_EXCEPTION_COMMON(0x1600, cbe_maintenance, .cbe_maintenance_exception) -	STD_EXCEPTION_COMMON(0x1800, cbe_thermal, .cbe_thermal_exception) +	STD_EXCEPTION_COMMON(0x1200, cbe_system_error, cbe_system_error_exception) +	STD_EXCEPTION_COMMON(0x1600, cbe_maintenance, cbe_maintenance_exception) +	STD_EXCEPTION_COMMON(0x1800, cbe_thermal, cbe_thermal_exception)  #endif /* CONFIG_CBE_RAS */  	/* @@ -744,16 +845,16 @@ data_access_slb_relon_pSeries:  	mfspr	r3,SPRN_DAR  	mfspr	r12,SPRN_SRR1  #ifndef CONFIG_RELOCATABLE -	b	.slb_miss_realmode +	b	slb_miss_realmode  #else  	/* -	 * We can't just use a direct branch to .slb_miss_realmode +	 * We can't just use a direct branch to slb_miss_realmode  	 * because the distance from here to there depends on where  	 * the kernel ends up being put.  	 */  	mfctr	r11  	ld	r10,PACAKBASE(r13) -	LOAD_HANDLER(r10, .slb_miss_realmode) +	LOAD_HANDLER(r10, slb_miss_realmode)  	mtctr	r10  	bctr  #endif @@ -769,11 +870,11 @@ instruction_access_slb_relon_pSeries:  	mfspr	r3,SPRN_SRR0		/* SRR0 is faulting address */  	mfspr	r12,SPRN_SRR1  #ifndef CONFIG_RELOCATABLE -	b	.slb_miss_realmode +	b	slb_miss_realmode  #else  	mfctr	r11  	ld	r10,PACAKBASE(r13) -	LOAD_HANDLER(r10, .slb_miss_realmode) +	LOAD_HANDLER(r10, slb_miss_realmode)  	mtctr	r10  	bctr  #endif @@ -881,7 +982,7 @@ system_call_entry:  	b	system_call_common  ppc64_runlatch_on_trampoline: -	b	.__ppc64_runlatch_on +	b	__ppc64_runlatch_on  /*   * Here we have detected that the kernel stack pointer is bad. @@ -940,7 +1041,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)  	std	r12,RESULT(r1)  	std	r11,STACK_FRAME_OVERHEAD-16(r1)  1:	addi	r3,r1,STACK_FRAME_OVERHEAD -	bl	.kernel_bad_stack +	bl	kernel_bad_stack  	b	1b  /* @@ -961,7 +1062,7 @@ data_access_common:  	ld	r3,PACA_EXGEN+EX_DAR(r13)  	lwz	r4,PACA_EXGEN+EX_DSISR(r13)  	li	r5,0x300 -	b	.do_hash_page		/* Try to handle as hpte fault */ +	b	do_hash_page		/* Try to handle as hpte fault */  	.align  7  	.globl  h_data_storage_common @@ -971,11 +1072,11 @@ h_data_storage_common:  	mfspr   r10,SPRN_HDSISR  	stw     r10,PACA_EXGEN+EX_DSISR(r13)  	EXCEPTION_PROLOG_COMMON(0xe00, PACA_EXGEN) -	bl      .save_nvgprs +	bl      save_nvgprs  	DISABLE_INTS  	addi    r3,r1,STACK_FRAME_OVERHEAD -	bl      .unknown_exception -	b       .ret_from_except +	bl      unknown_exception +	b       ret_from_except  	.align	7  	.globl instruction_access_common @@ -986,9 +1087,9 @@ instruction_access_common:  	ld	r3,_NIP(r1)  	andis.	r4,r12,0x5820  	li	r5,0x400 -	b	.do_hash_page		/* Try to handle as hpte fault */ +	b	do_hash_page		/* Try to handle as hpte fault */ -	STD_EXCEPTION_COMMON(0xe20, h_instr_storage, .unknown_exception) +	STD_EXCEPTION_COMMON(0xe20, h_instr_storage, unknown_exception)  /*   * Here is the common SLB miss user that is used when going to virtual @@ -1003,7 +1104,7 @@ slb_miss_user_common:  	stw	r9,PACA_EXGEN+EX_CCR(r13)  	std	r10,PACA_EXGEN+EX_LR(r13)  	std	r11,PACA_EXGEN+EX_SRR0(r13) -	bl	.slb_allocate_user +	bl	slb_allocate_user  	ld	r10,PACA_EXGEN+EX_LR(r13)  	ld	r3,PACA_EXGEN+EX_R3(r13) @@ -1046,14 +1147,38 @@ slb_miss_fault:  unrecov_user_slb:  	EXCEPTION_PROLOG_COMMON(0x4200, PACA_EXGEN)  	DISABLE_INTS -	bl	.save_nvgprs +	bl	save_nvgprs  1:	addi	r3,r1,STACK_FRAME_OVERHEAD -	bl	.unrecoverable_exception +	bl	unrecoverable_exception  	b	1b  #endif /* __DISABLED__ */ +	/* +	 * Machine check is different because we use a different +	 * save area: PACA_EXMC instead of PACA_EXGEN. +	 */ +	.align	7 +	.globl machine_check_common +machine_check_common: + +	mfspr	r10,SPRN_DAR +	std	r10,PACA_EXGEN+EX_DAR(r13) +	mfspr	r10,SPRN_DSISR +	stw	r10,PACA_EXGEN+EX_DSISR(r13) +	EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC) +	FINISH_NAP +	DISABLE_INTS +	ld	r3,PACA_EXGEN+EX_DAR(r13) +	lwz	r4,PACA_EXGEN+EX_DSISR(r13) +	std	r3,_DAR(r1) +	std	r4,_DSISR(r1) +	bl	save_nvgprs +	addi	r3,r1,STACK_FRAME_OVERHEAD +	bl	machine_check_exception +	b	ret_from_except +  	.align	7  	.globl alignment_common  alignment_common: @@ -1066,31 +1191,31 @@ alignment_common:  	lwz	r4,PACA_EXGEN+EX_DSISR(r13)  	std	r3,_DAR(r1)  	std	r4,_DSISR(r1) -	bl	.save_nvgprs +	bl	save_nvgprs  	DISABLE_INTS  	addi	r3,r1,STACK_FRAME_OVERHEAD -	bl	.alignment_exception -	b	.ret_from_except +	bl	alignment_exception +	b	ret_from_except  	.align	7  	.globl program_check_common  program_check_common:  	EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN) -	bl	.save_nvgprs +	bl	save_nvgprs  	DISABLE_INTS  	addi	r3,r1,STACK_FRAME_OVERHEAD -	bl	.program_check_exception -	b	.ret_from_except +	bl	program_check_exception +	b	ret_from_except  	.align	7  	.globl fp_unavailable_common  fp_unavailable_common:  	EXCEPTION_PROLOG_COMMON(0x800, PACA_EXGEN)  	bne	1f			/* if from user, just load it up */ -	bl	.save_nvgprs +	bl	save_nvgprs  	DISABLE_INTS  	addi	r3,r1,STACK_FRAME_OVERHEAD -	bl	.kernel_fp_unavailable_exception +	bl	kernel_fp_unavailable_exception  	BUG_OPCODE  1:  #ifdef CONFIG_PPC_TRANSACTIONAL_MEM @@ -1102,15 +1227,15 @@ BEGIN_FTR_SECTION  	bne-	2f  END_FTR_SECTION_IFSET(CPU_FTR_TM)  #endif -	bl	.load_up_fpu +	bl	load_up_fpu  	b	fast_exception_return  #ifdef CONFIG_PPC_TRANSACTIONAL_MEM  2:	/* User process was in a transaction */ -	bl	.save_nvgprs +	bl	save_nvgprs  	DISABLE_INTS  	addi	r3,r1,STACK_FRAME_OVERHEAD -	bl	.fp_unavailable_tm -	b	.ret_from_except +	bl	fp_unavailable_tm +	b	ret_from_except  #endif  	.align	7  	.globl altivec_unavailable_common @@ -1128,24 +1253,24 @@ BEGIN_FTR_SECTION  	bne-	2f    END_FTR_SECTION_NESTED(CPU_FTR_TM, CPU_FTR_TM, 69)  #endif -	bl	.load_up_altivec +	bl	load_up_altivec  	b	fast_exception_return  #ifdef CONFIG_PPC_TRANSACTIONAL_MEM  2:	/* User process was in a transaction */ -	bl	.save_nvgprs +	bl	save_nvgprs  	DISABLE_INTS  	addi	r3,r1,STACK_FRAME_OVERHEAD -	bl	.altivec_unavailable_tm -	b	.ret_from_except +	bl	altivec_unavailable_tm +	b	ret_from_except  #endif  1:  END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)  #endif -	bl	.save_nvgprs +	bl	save_nvgprs  	DISABLE_INTS  	addi	r3,r1,STACK_FRAME_OVERHEAD -	bl	.altivec_unavailable_exception -	b	.ret_from_except +	bl	altivec_unavailable_exception +	b	ret_from_except  	.align	7  	.globl vsx_unavailable_common @@ -1163,26 +1288,26 @@ BEGIN_FTR_SECTION  	bne-	2f    END_FTR_SECTION_NESTED(CPU_FTR_TM, CPU_FTR_TM, 69)  #endif -	b	.load_up_vsx +	b	load_up_vsx  #ifdef CONFIG_PPC_TRANSACTIONAL_MEM  2:	/* User process was in a transaction */ -	bl	.save_nvgprs +	bl	save_nvgprs  	DISABLE_INTS  	addi	r3,r1,STACK_FRAME_OVERHEAD -	bl	.vsx_unavailable_tm -	b	.ret_from_except +	bl	vsx_unavailable_tm +	b	ret_from_except  #endif  1:  END_FTR_SECTION_IFSET(CPU_FTR_VSX)  #endif -	bl	.save_nvgprs +	bl	save_nvgprs  	DISABLE_INTS  	addi	r3,r1,STACK_FRAME_OVERHEAD -	bl	.vsx_unavailable_exception -	b	.ret_from_except +	bl	vsx_unavailable_exception +	b	ret_from_except -	STD_EXCEPTION_COMMON(0xf60, facility_unavailable, .facility_unavailable_exception) -	STD_EXCEPTION_COMMON(0xf80, hv_facility_unavailable, .facility_unavailable_exception) +	STD_EXCEPTION_COMMON(0xf60, facility_unavailable, facility_unavailable_exception) +	STD_EXCEPTION_COMMON(0xf80, hv_facility_unavailable, facility_unavailable_exception)  	.align	7  	.globl	__end_handlers @@ -1237,6 +1362,154 @@ _GLOBAL(opal_mc_secondary_handler)  #endif /* CONFIG_PPC_POWERNV */ +#define MACHINE_CHECK_HANDLER_WINDUP			\ +	/* Clear MSR_RI before setting SRR0 and SRR1. */\ +	li	r0,MSR_RI;				\ +	mfmsr	r9;		/* get MSR value */	\ +	andc	r9,r9,r0;				\ +	mtmsrd	r9,1;		/* Clear MSR_RI */	\ +	/* Move original SRR0 and SRR1 into the respective regs */	\ +	ld	r9,_MSR(r1);				\ +	mtspr	SPRN_SRR1,r9;				\ +	ld	r3,_NIP(r1);				\ +	mtspr	SPRN_SRR0,r3;				\ +	ld	r9,_CTR(r1);				\ +	mtctr	r9;					\ +	ld	r9,_XER(r1);				\ +	mtxer	r9;					\ +	ld	r9,_LINK(r1);				\ +	mtlr	r9;					\ +	REST_GPR(0, r1);				\ +	REST_8GPRS(2, r1);				\ +	REST_GPR(10, r1);				\ +	ld	r11,_CCR(r1);				\ +	mtcr	r11;					\ +	/* Decrement paca->in_mce. */			\ +	lhz	r12,PACA_IN_MCE(r13);			\ +	subi	r12,r12,1;				\ +	sth	r12,PACA_IN_MCE(r13);			\ +	REST_GPR(11, r1);				\ +	REST_2GPRS(12, r1);				\ +	/* restore original r1. */			\ +	ld	r1,GPR1(r1) + +	/* +	 * Handle machine check early in real mode. We come here with +	 * ME=1, MMU (IR=0 and DR=0) off and using MC emergency stack. +	 */ +	.align	7 +	.globl machine_check_handle_early +machine_check_handle_early: +	std	r0,GPR0(r1)	/* Save r0 */ +	EXCEPTION_PROLOG_COMMON_3(0x200) +	bl	save_nvgprs +	addi	r3,r1,STACK_FRAME_OVERHEAD +	bl	machine_check_early +	std	r3,RESULT(r1)	/* Save result */ +	ld	r12,_MSR(r1) +#ifdef	CONFIG_PPC_P7_NAP +	/* +	 * Check if thread was in power saving mode. We come here when any +	 * of the following is true: +	 * a. thread wasn't in power saving mode +	 * b. thread was in power saving mode with no state loss or +	 *    supervisor state loss +	 * +	 * Go back to nap again if (b) is true. +	 */ +	rlwinm.	r11,r12,47-31,30,31	/* Was it in power saving mode? */ +	beq	4f			/* No, it wasn;t */ +	/* Thread was in power saving mode. Go back to nap again. */ +	cmpwi	r11,2 +	bne	3f +	/* Supervisor state loss */ +	li	r0,1 +	stb	r0,PACA_NAPSTATELOST(r13) +3:	bl	machine_check_queue_event +	MACHINE_CHECK_HANDLER_WINDUP +	GET_PACA(r13) +	ld	r1,PACAR1(r13) +	b	power7_enter_nap_mode +4: +#endif +	/* +	 * Check if we are coming from hypervisor userspace. If yes then we +	 * continue in host kernel in V mode to deliver the MC event. +	 */ +	rldicl.	r11,r12,4,63		/* See if MC hit while in HV mode. */ +	beq	5f +	andi.	r11,r12,MSR_PR		/* See if coming from user. */ +	bne	9f			/* continue in V mode if we are. */ + +5: +#ifdef CONFIG_KVM_BOOK3S_64_HV +	/* +	 * We are coming from kernel context. Check if we are coming from +	 * guest. if yes, then we can continue. We will fall through +	 * do_kvm_200->kvmppc_interrupt to deliver the MC event to guest. +	 */ +	lbz	r11,HSTATE_IN_GUEST(r13) +	cmpwi	r11,0			/* Check if coming from guest */ +	bne	9f			/* continue if we are. */ +#endif +	/* +	 * At this point we are not sure about what context we come from. +	 * Queue up the MCE event and return from the interrupt. +	 * But before that, check if this is an un-recoverable exception. +	 * If yes, then stay on emergency stack and panic. +	 */ +	andi.	r11,r12,MSR_RI +	bne	2f +1:	mfspr	r11,SPRN_SRR0 +	ld	r10,PACAKBASE(r13) +	LOAD_HANDLER(r10,unrecover_mce) +	mtspr	SPRN_SRR0,r10 +	ld	r10,PACAKMSR(r13) +	/* +	 * We are going down. But there are chances that we might get hit by +	 * another MCE during panic path and we may run into unstable state +	 * with no way out. Hence, turn ME bit off while going down, so that +	 * when another MCE is hit during panic path, system will checkstop +	 * and hypervisor will get restarted cleanly by SP. +	 */ +	li	r3,MSR_ME +	andc	r10,r10,r3		/* Turn off MSR_ME */ +	mtspr	SPRN_SRR1,r10 +	rfid +	b	. +2: +	/* +	 * Check if we have successfully handled/recovered from error, if not +	 * then stay on emergency stack and panic. +	 */ +	ld	r3,RESULT(r1)	/* Load result */ +	cmpdi	r3,0		/* see if we handled MCE successfully */ + +	beq	1b		/* if !handled then panic */ +	/* +	 * Return from MC interrupt. +	 * Queue up the MCE event so that we can log it later, while +	 * returning from kernel or opal call. +	 */ +	bl	machine_check_queue_event +	MACHINE_CHECK_HANDLER_WINDUP +	rfid +9: +	/* Deliver the machine check to host kernel in V mode. */ +	MACHINE_CHECK_HANDLER_WINDUP +	b	machine_check_pSeries + +unrecover_mce: +	/* Invoke machine_check_exception to print MCE event and panic. */ +	addi	r3,r1,STACK_FRAME_OVERHEAD +	bl	machine_check_exception +	/* +	 * We will not reach here. Even if we did, there is no way out. Call +	 * unrecoverable_exception and die. +	 */ +1:	addi	r3,r1,STACK_FRAME_OVERHEAD +	bl	unrecoverable_exception +	b	1b  /*   * r13 points to the PACA, r9 contains the saved CR,   * r12 contain the saved SRR1, SRR0 is still ready for return @@ -1245,7 +1518,7 @@ _GLOBAL(opal_mc_secondary_handler)   * r3 is saved in paca->slb_r3   * We assume we aren't going to take any exceptions during this procedure.   */ -_GLOBAL(slb_miss_realmode) +slb_miss_realmode:  	mflr	r10  #ifdef CONFIG_RELOCATABLE  	mtctr	r11 @@ -1254,7 +1527,7 @@ _GLOBAL(slb_miss_realmode)  	stw	r9,PACA_EXSLB+EX_CCR(r13)	/* save CR in exc. frame */  	std	r10,PACA_EXSLB+EX_LR(r13)	/* save LR */ -	bl	.slb_allocate_realmode +	bl	slb_allocate_realmode  	/* All done -- return from exception. */ @@ -1294,9 +1567,9 @@ _GLOBAL(slb_miss_realmode)  unrecov_slb:  	EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB)  	DISABLE_INTS -	bl	.save_nvgprs +	bl	save_nvgprs  1:	addi	r3,r1,STACK_FRAME_OVERHEAD -	bl	.unrecoverable_exception +	bl	unrecoverable_exception  	b	1b @@ -1313,7 +1586,7 @@ power4_fixup_nap:   * Hash table stuff   */  	.align	7 -_STATIC(do_hash_page) +do_hash_page:  	std	r3,_DAR(r1)  	std	r4,_DSISR(r1) @@ -1350,7 +1623,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB)  	 *  	 * at return r3 = 0 for success, 1 for page fault, negative for error  	 */ -	bl	.hash_page		/* build HPTE if possible */ +	bl	hash_page		/* build HPTE if possible */  	cmpdi	r3,0			/* see if hash_page succeeded */  	/* Success */ @@ -1364,35 +1637,35 @@ handle_page_fault:  11:	ld	r4,_DAR(r1)  	ld	r5,_DSISR(r1)  	addi	r3,r1,STACK_FRAME_OVERHEAD -	bl	.do_page_fault +	bl	do_page_fault  	cmpdi	r3,0  	beq+	12f -	bl	.save_nvgprs +	bl	save_nvgprs  	mr	r5,r3  	addi	r3,r1,STACK_FRAME_OVERHEAD  	lwz	r4,_DAR(r1) -	bl	.bad_page_fault -	b	.ret_from_except +	bl	bad_page_fault +	b	ret_from_except  /* We have a data breakpoint exception - handle it */  handle_dabr_fault: -	bl	.save_nvgprs +	bl	save_nvgprs  	ld      r4,_DAR(r1)  	ld      r5,_DSISR(r1)  	addi    r3,r1,STACK_FRAME_OVERHEAD -	bl      .do_break -12:	b       .ret_from_except_lite +	bl      do_break +12:	b       ret_from_except_lite  /* We have a page fault that hash_page could handle but HV refused   * the PTE insertion   */ -13:	bl	.save_nvgprs +13:	bl	save_nvgprs  	mr	r5,r3  	addi	r3,r1,STACK_FRAME_OVERHEAD  	ld	r4,_DAR(r1) -	bl	.low_hash_fault -	b	.ret_from_except +	bl	low_hash_fault +	b	ret_from_except  /*   * We come here as a result of a DSI at a point where we don't want @@ -1401,16 +1674,16 @@ handle_dabr_fault:   * were soft-disabled.  We want to invoke the exception handler for   * the access, or panic if there isn't a handler.   */ -77:	bl	.save_nvgprs +77:	bl	save_nvgprs  	mr	r4,r3  	addi	r3,r1,STACK_FRAME_OVERHEAD  	li	r5,SIGSEGV -	bl	.bad_page_fault -	b	.ret_from_except +	bl	bad_page_fault +	b	ret_from_except  	/* here we have a segment miss */  do_ste_alloc: -	bl	.ste_allocate		/* try to insert stab entry */ +	bl	ste_allocate		/* try to insert stab entry */  	cmpdi	r3,0  	bne-	handle_page_fault  	b	fast_exception_return @@ -1423,7 +1696,7 @@ do_ste_alloc:   * We assume (DAR >> 60) == 0xc.   */  	.align	7 -_GLOBAL(do_stab_bolted) +do_stab_bolted:  	stw	r9,PACA_EXSLB+EX_CCR(r13)	/* save CR in exc. frame */  	std	r11,PACA_EXSLB+EX_SRR0(r13)	/* save SRR0 in exc. frame */  	mfspr	r11,SPRN_DAR			/* ea */ diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 2230fd0ca3e..742694c1d85 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -55,9 +55,9 @@ int crash_mem_ranges;  int __init early_init_dt_scan_fw_dump(unsigned long node,  			const char *uname, int depth, void *data)  { -	__be32 *sections; +	const __be32 *sections;  	int i, num_sections; -	unsigned long size; +	int size;  	const int *token;  	if (depth != 1 || strcmp(uname, "rtas") != 0) @@ -69,7 +69,7 @@ int __init early_init_dt_scan_fw_dump(unsigned long node,  	 */  	token = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL);  	if (!token) -		return 0; +		return 1;  	fw_dump.fadump_supported = 1;  	fw_dump.ibm_configure_kernel_dump = *token; @@ -92,7 +92,7 @@ int __init early_init_dt_scan_fw_dump(unsigned long node,  					&size);  	if (!sections) -		return 0; +		return 1;  	num_sections = size / (3 * sizeof(u32)); @@ -110,6 +110,7 @@ int __init early_init_dt_scan_fw_dump(unsigned long node,  			break;  		}  	} +  	return 1;  } @@ -645,7 +646,7 @@ static int __init fadump_build_cpu_notes(const struct fadump_mem_struct *fdm)  		}  		/* Lower 4 bytes of reg_value contains logical cpu id */  		cpu = reg_entry->reg_value & FADUMP_CPU_ID_MASK; -		if (!cpumask_test_cpu(cpu, &fdh->cpu_online_mask)) { +		if (fdh && !cpumask_test_cpu(cpu, &fdh->cpu_online_mask)) {  			SKIP_TO_NEXT_CPU(reg_entry);  			continue;  		} @@ -662,9 +663,11 @@ static int __init fadump_build_cpu_notes(const struct fadump_mem_struct *fdm)  	}  	fadump_final_note(note_buf); -	pr_debug("Updating elfcore header (%llx) with cpu notes\n", +	if (fdh) { +		pr_debug("Updating elfcore header (%llx) with cpu notes\n",  							fdh->elfcorehdr_addr); -	fadump_update_elfcore_header((char *)__va(fdh->elfcorehdr_addr)); +		fadump_update_elfcore_header((char *)__va(fdh->elfcorehdr_addr)); +	}  	return 0;  error_out: diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index caeaabf11a2..9ad236e5d2c 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -35,15 +35,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX);					\  2:	REST_32VSRS(n,c,base);						\  3: -#define __REST_32FPVSRS_TRANSACT(n,c,base)				\ -BEGIN_FTR_SECTION							\ -	b	2f;							\ -END_FTR_SECTION_IFSET(CPU_FTR_VSX);					\ -	REST_32FPRS_TRANSACT(n,base);					\ -	b	3f;							\ -2:	REST_32VSRS_TRANSACT(n,c,base);					\ -3: -  #define __SAVE_32FPVSRS(n,c,base)					\  BEGIN_FTR_SECTION							\  	b	2f;							\ @@ -54,40 +45,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX);					\  3:  #else  #define __REST_32FPVSRS(n,b,base)	REST_32FPRS(n, base) -#define __REST_32FPVSRS_TRANSACT(n,b,base)	REST_32FPRS(n, base)  #define __SAVE_32FPVSRS(n,b,base)	SAVE_32FPRS(n, base)  #endif  #define REST_32FPVSRS(n,c,base) __REST_32FPVSRS(n,__REG_##c,__REG_##base) -#define REST_32FPVSRS_TRANSACT(n,c,base) \ -	__REST_32FPVSRS_TRANSACT(n,__REG_##c,__REG_##base)  #define SAVE_32FPVSRS(n,c,base) __SAVE_32FPVSRS(n,__REG_##c,__REG_##base)  #ifdef CONFIG_PPC_TRANSACTIONAL_MEM -/* - * Wrapper to call load_up_fpu from C. - * void do_load_up_fpu(struct pt_regs *regs); - */ -_GLOBAL(do_load_up_fpu) -	mflr	r0 -	std	r0, 16(r1) -	stdu	r1, -112(r1) - -	subi	r6, r3, STACK_FRAME_OVERHEAD -	/* load_up_fpu expects r12=MSR, r13=PACA, and returns -	 * with r12 = new MSR. -	 */ -	ld	r12,_MSR(r6) -	GET_PACA(r13) - -	bl	load_up_fpu -	std	r12,_MSR(r6) - -	ld	r0, 112+16(r1) -	addi	r1, r1, 112 -	mtlr	r0 -	blr - -  /* void do_load_up_transact_fpu(struct thread_struct *thread)   *   * This is similar to load_up_fpu but for the transactional version of the FP @@ -105,9 +68,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)  	SYNC  	MTMSRD(r5) -	lfd	fr0,THREAD_TRANSACT_FPSCR(r3) +	addi	r7,r3,THREAD_TRANSACT_FPSTATE +	lfd	fr0,FPSTATE_FPSCR(r7)  	MTFSF_L(fr0) -	REST_32FPVSRS_TRANSACT(0, R4, R3) +	REST_32FPVSRS(0, R4, R7)  	/* FP/VSX off again */  	MTMSRD(r6) @@ -117,11 +81,49 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)  #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */  /* + * Enable use of the FPU, and VSX if possible, for the caller. + */ +_GLOBAL(fp_enable) +	mfmsr	r3 +	ori	r3,r3,MSR_FP +#ifdef CONFIG_VSX +BEGIN_FTR_SECTION +	oris	r3,r3,MSR_VSX@h +END_FTR_SECTION_IFSET(CPU_FTR_VSX) +#endif +	SYNC +	MTMSRD(r3) +	isync			/* (not necessary for arch 2.02 and later) */ +	blr + +/* + * Load state from memory into FP registers including FPSCR. + * Assumes the caller has enabled FP in the MSR. + */ +_GLOBAL(load_fp_state) +	lfd	fr0,FPSTATE_FPSCR(r3) +	MTFSF_L(fr0) +	REST_32FPVSRS(0, R4, R3) +	blr + +/* + * Store FP state into memory, including FPSCR + * Assumes the caller has enabled FP in the MSR. + */ +_GLOBAL(store_fp_state) +	SAVE_32FPVSRS(0, R4, R3) +	mffs	fr0 +	stfd	fr0,FPSTATE_FPSCR(r3) +	blr + +/*   * This task wants to use the FPU now.   * On UP, disable FP for the task which had the FPU previously,   * and save its floating-point registers in its thread_struct.   * Load up this task's FP registers from its thread_struct,   * enable the FPU for the current task and return to the task. + * Note that on 32-bit this can only use registers that will be + * restored by fast_exception_return, i.e. r3 - r6, r10 and r11.   */  _GLOBAL(load_up_fpu)  	mfmsr	r5 @@ -147,9 +149,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)  	beq	1f  	toreal(r4)  	addi	r4,r4,THREAD		/* want last_task_used_math->thread */ -	SAVE_32FPVSRS(0, R5, R4) +	addi	r10,r4,THREAD_FPSTATE +	SAVE_32FPVSRS(0, R5, R10)  	mffs	fr0 -	stfd	fr0,THREAD_FPSCR(r4) +	stfd	fr0,FPSTATE_FPSCR(r10)  	PPC_LL	r5,PT_REGS(r4)  	toreal(r5)  	PPC_LL	r4,_MSR-STACK_FRAME_OVERHEAD(r5) @@ -160,7 +163,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)  #endif /* CONFIG_SMP */  	/* enable use of FP after return */  #ifdef CONFIG_PPC32 -	mfspr	r5,SPRN_SPRG_THREAD		/* current task's THREAD (phys) */ +	mfspr	r5,SPRN_SPRG_THREAD	/* current task's THREAD (phys) */  	lwz	r4,THREAD_FPEXC_MODE(r5)  	ori	r9,r9,MSR_FP		/* enable FP for current */  	or	r9,r9,r4 @@ -172,9 +175,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)  	or	r12,r12,r4  	std	r12,_MSR(r1)  #endif -	lfd	fr0,THREAD_FPSCR(r5) +	addi	r10,r5,THREAD_FPSTATE +	lfd	fr0,FPSTATE_FPSCR(r10)  	MTFSF_L(fr0) -	REST_32FPVSRS(0, R4, R5) +	REST_32FPVSRS(0, R4, R10)  #ifndef CONFIG_SMP  	subi	r4,r5,THREAD  	fromreal(r4) @@ -206,11 +210,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)  	PPC_LCMPI	0,r3,0  	beqlr-				/* if no previous owner, done */  	addi	r3,r3,THREAD	        /* want THREAD of task */ +	PPC_LL	r6,THREAD_FPSAVEAREA(r3)  	PPC_LL	r5,PT_REGS(r3) -	PPC_LCMPI	0,r5,0 -	SAVE_32FPVSRS(0, R4 ,R3) +	PPC_LCMPI	0,r6,0 +	bne	2f +	addi	r6,r3,THREAD_FPSTATE +2:	PPC_LCMPI	0,r5,0 +	SAVE_32FPVSRS(0, R4, R6)  	mffs	fr0 -	stfd	fr0,THREAD_FPSCR(r3) +	stfd	fr0,FPSTATE_FPSCR(r6)  	beq	1f  	PPC_LL	r4,_MSR-STACK_FRAME_OVERHEAD(r5)  	li	r3,MSR_FP|MSR_FE0|MSR_FE1 diff --git a/arch/powerpc/kernel/fsl_booke_entry_mapping.S b/arch/powerpc/kernel/fsl_booke_entry_mapping.S index a92c79be272..f22e7e44fbf 100644 --- a/arch/powerpc/kernel/fsl_booke_entry_mapping.S +++ b/arch/powerpc/kernel/fsl_booke_entry_mapping.S @@ -176,6 +176,8 @@ skpinv:	addi	r6,r6,1				/* Increment */  /* 7. Jump to KERNELBASE mapping */  	lis	r6,(KERNELBASE & ~0xfff)@h  	ori	r6,r6,(KERNELBASE & ~0xfff)@l +	rlwinm	r7,r25,0,0x03ffffff +	add	r6,r7,r6  #elif defined(ENTRY_MAPPING_KEXEC_SETUP)  /* diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index 1fb78561096..d178834fe50 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -10,6 +10,8 @@   *   */ +#define pr_fmt(fmt) "ftrace-powerpc: " fmt +  #include <linux/spinlock.h>  #include <linux/hardirq.h>  #include <linux/uaccess.h> @@ -74,6 +76,7 @@ ftrace_modify_code(unsigned long ip, unsigned int old, unsigned int new)   */  static int test_24bit_addr(unsigned long ip, unsigned long addr)  { +	addr = ppc_function_entry((void *)addr);  	/* use the create_branch to verify that this offset can be branched */  	return create_branch((unsigned int *)ip, addr, 0); @@ -104,11 +107,9 @@ __ftrace_make_nop(struct module *mod,  		  struct dyn_ftrace *rec, unsigned long addr)  {  	unsigned int op; -	unsigned int jmp[5]; -	unsigned long ptr; +	unsigned long entry, ptr;  	unsigned long ip = rec->ip; -	unsigned long tramp; -	int offset; +	void *tramp;  	/* read where this goes */  	if (probe_kernel_read(&op, (void *)ip, sizeof(int))) @@ -116,97 +117,46 @@ __ftrace_make_nop(struct module *mod,  	/* Make sure that that this is still a 24bit jump */  	if (!is_bl_op(op)) { -		printk(KERN_ERR "Not expected bl: opcode is %x\n", op); +		pr_err("Not expected bl: opcode is %x\n", op);  		return -EINVAL;  	}  	/* lets find where the pointer goes */ -	tramp = find_bl_target(ip, op); - -	/* -	 * On PPC64 the trampoline looks like: -	 * 0x3d, 0x82, 0x00, 0x00,    addis   r12,r2, <high> -	 * 0x39, 0x8c, 0x00, 0x00,    addi    r12,r12, <low> -	 *   Where the bytes 2,3,6 and 7 make up the 32bit offset -	 *   to the TOC that holds the pointer. -	 *   to jump to. -	 * 0xf8, 0x41, 0x00, 0x28,    std     r2,40(r1) -	 * 0xe9, 0x6c, 0x00, 0x20,    ld      r11,32(r12) -	 *   The actually address is 32 bytes from the offset -	 *   into the TOC. -	 * 0xe8, 0x4c, 0x00, 0x28,    ld      r2,40(r12) -	 */ - -	pr_devel("ip:%lx jumps to %lx r2: %lx", ip, tramp, mod->arch.toc); +	tramp = (void *)find_bl_target(ip, op); -	/* Find where the trampoline jumps to */ -	if (probe_kernel_read(jmp, (void *)tramp, sizeof(jmp))) { -		printk(KERN_ERR "Failed to read %lx\n", tramp); -		return -EFAULT; -	} +	pr_devel("ip:%lx jumps to %p", ip, tramp); -	pr_devel(" %08x %08x", jmp[0], jmp[1]); - -	/* verify that this is what we expect it to be */ -	if (((jmp[0] & 0xffff0000) != 0x3d820000) || -	    ((jmp[1] & 0xffff0000) != 0x398c0000) || -	    (jmp[2] != 0xf8410028) || -	    (jmp[3] != 0xe96c0020) || -	    (jmp[4] != 0xe84c0028)) { -		printk(KERN_ERR "Not a trampoline\n"); +	if (!is_module_trampoline(tramp)) { +		pr_err("Not a trampoline\n");  		return -EINVAL;  	} -	/* The bottom half is signed extended */ -	offset = ((unsigned)((unsigned short)jmp[0]) << 16) + -		(int)((short)jmp[1]); - -	pr_devel(" %x ", offset); - -	/* get the address this jumps too */ -	tramp = mod->arch.toc + offset + 32; -	pr_devel("toc: %lx", tramp); - -	if (probe_kernel_read(jmp, (void *)tramp, 8)) { -		printk(KERN_ERR "Failed to read %lx\n", tramp); +	if (module_trampoline_target(mod, tramp, &ptr)) { +		pr_err("Failed to get trampoline target\n");  		return -EFAULT;  	} -	pr_devel(" %08x %08x\n", jmp[0], jmp[1]); - -	ptr = ((unsigned long)jmp[0] << 32) + jmp[1]; +	pr_devel("trampoline target %lx", ptr); +	entry = ppc_global_function_entry((void *)addr);  	/* This should match what was called */ -	if (ptr != ppc_function_entry((void *)addr)) { -		printk(KERN_ERR "addr does not match %lx\n", ptr); +	if (ptr != entry) { +		pr_err("addr %lx does not match expected %lx\n", ptr, entry);  		return -EINVAL;  	}  	/* -	 * We want to nop the line, but the next line is -	 *  0xe8, 0x41, 0x00, 0x28   ld r2,40(r1) -	 * This needs to be turned to a nop too. -	 */ -	if (probe_kernel_read(&op, (void *)(ip+4), MCOUNT_INSN_SIZE)) -		return -EFAULT; - -	if (op != 0xe8410028) { -		printk(KERN_ERR "Next line is not ld! (%08x)\n", op); -		return -EINVAL; -	} - -	/* -	 * Milton Miller pointed out that we can not blindly do nops. -	 * If a task was preempted when calling a trace function, -	 * the nops will remove the way to restore the TOC in r2 -	 * and the r2 TOC will get corrupted. -	 */ - -	/* -	 * Replace: -	 *   bl <tramp>  <==== will be replaced with "b 1f" -	 *   ld r2,40(r1) -	 *  1: +	 * Our original call site looks like: +	 * +	 * bl <tramp> +	 * ld r2,XX(r1) +	 * +	 * Milton Miller pointed out that we can not simply nop the branch. +	 * If a task was preempted when calling a trace function, the nops +	 * will remove the way to restore the TOC in r2 and the r2 TOC will +	 * get corrupted. +	 * +	 * Use a b +8 to jump over the load.  	 */  	op = 0x48000008;	/* b +8 */ @@ -231,7 +181,7 @@ __ftrace_make_nop(struct module *mod,  	/* Make sure that that this is still a 24bit jump */  	if (!is_bl_op(op)) { -		printk(KERN_ERR "Not expected bl: opcode is %x\n", op); +		pr_err("Not expected bl: opcode is %x\n", op);  		return -EINVAL;  	} @@ -250,7 +200,7 @@ __ftrace_make_nop(struct module *mod,  	/* Find where the trampoline jumps to */  	if (probe_kernel_read(jmp, (void *)tramp, sizeof(jmp))) { -		printk(KERN_ERR "Failed to read %lx\n", tramp); +		pr_err("Failed to read %lx\n", tramp);  		return -EFAULT;  	} @@ -261,7 +211,7 @@ __ftrace_make_nop(struct module *mod,  	    ((jmp[1] & 0xffff0000) != 0x398c0000) ||  	    (jmp[2] != 0x7d8903a6) ||  	    (jmp[3] != 0x4e800420)) { -		printk(KERN_ERR "Not a trampoline\n"); +		pr_err("Not a trampoline\n");  		return -EINVAL;  	} @@ -273,8 +223,7 @@ __ftrace_make_nop(struct module *mod,  	pr_devel(" %lx ", tramp);  	if (tramp != addr) { -		printk(KERN_ERR -		       "Trampoline location %08lx does not match addr\n", +		pr_err("Trampoline location %08lx does not match addr\n",  		       tramp);  		return -EINVAL;  	} @@ -315,15 +264,13 @@ int ftrace_make_nop(struct module *mod,  	 */  	if (!rec->arch.mod) {  		if (!mod) { -			printk(KERN_ERR "No module loaded addr=%lx\n", -			       addr); +			pr_err("No module loaded addr=%lx\n", addr);  			return -EFAULT;  		}  		rec->arch.mod = mod;  	} else if (mod) {  		if (mod != rec->arch.mod) { -			printk(KERN_ERR -			       "Record mod %p not equal to passed in mod %p\n", +			pr_err("Record mod %p not equal to passed in mod %p\n",  			       rec->arch.mod, mod);  			return -EINVAL;  		} @@ -344,45 +291,42 @@ static int  __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)  {  	unsigned int op[2]; -	unsigned long ip = rec->ip; +	void *ip = (void *)rec->ip;  	/* read where this goes */ -	if (probe_kernel_read(op, (void *)ip, MCOUNT_INSN_SIZE * 2)) +	if (probe_kernel_read(op, ip, sizeof(op)))  		return -EFAULT;  	/* -	 * It should be pointing to two nops or -	 *  b +8; ld r2,40(r1) +	 * We expect to see: +	 * +	 * b +8 +	 * ld r2,XX(r1) +	 * +	 * The load offset is different depending on the ABI. For simplicity +	 * just mask it out when doing the compare.  	 */ -	if (((op[0] != 0x48000008) || (op[1] != 0xe8410028)) && -	    ((op[0] != PPC_INST_NOP) || (op[1] != PPC_INST_NOP))) { -		printk(KERN_ERR "Expected NOPs but have %x %x\n", op[0], op[1]); +	if ((op[0] != 0x48000008) || ((op[1] & 0xffff0000) != 0xe8410000)) { +		pr_err("Unexpected call sequence: %x %x\n", op[0], op[1]);  		return -EINVAL;  	}  	/* If we never set up a trampoline to ftrace_caller, then bail */  	if (!rec->arch.mod->arch.tramp) { -		printk(KERN_ERR "No ftrace trampoline\n"); +		pr_err("No ftrace trampoline\n");  		return -EINVAL;  	} -	/* create the branch to the trampoline */ -	op[0] = create_branch((unsigned int *)ip, -			      rec->arch.mod->arch.tramp, BRANCH_SET_LINK); -	if (!op[0]) { -		printk(KERN_ERR "REL24 out of range!\n"); +	/* Ensure branch is within 24 bits */ +	if (!create_branch(ip, rec->arch.mod->arch.tramp, BRANCH_SET_LINK)) { +		pr_err("Branch out of range\n");  		return -EINVAL;  	} -	/* ld r2,40(r1) */ -	op[1] = 0xe8410028; - -	pr_devel("write to %lx\n", rec->ip); - -	if (probe_kernel_write((void *)ip, op, MCOUNT_INSN_SIZE * 2)) -		return -EPERM; - -	flush_icache_range(ip, ip + 8); +	if (patch_branch(ip, rec->arch.mod->arch.tramp, BRANCH_SET_LINK)) { +		pr_err("REL24 out of range!\n"); +		return -EINVAL; +	}  	return 0;  } @@ -399,13 +343,13 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)  	/* It should be pointing to a nop */  	if (op != PPC_INST_NOP) { -		printk(KERN_ERR "Expected NOP but have %x\n", op); +		pr_err("Expected NOP but have %x\n", op);  		return -EINVAL;  	}  	/* If we never set up a trampoline to ftrace_caller, then bail */  	if (!rec->arch.mod->arch.tramp) { -		printk(KERN_ERR "No ftrace trampoline\n"); +		pr_err("No ftrace trampoline\n");  		return -EINVAL;  	} @@ -413,7 +357,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)  	op = create_branch((unsigned int *)ip,  			   rec->arch.mod->arch.tramp, BRANCH_SET_LINK);  	if (!op) { -		printk(KERN_ERR "REL24 out of range!\n"); +		pr_err("REL24 out of range!\n");  		return -EINVAL;  	} @@ -451,7 +395,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)  	 * already have a module defined.  	 */  	if (!rec->arch.mod) { -		printk(KERN_ERR "No module loaded\n"); +		pr_err("No module loaded\n");  		return -EINVAL;  	} @@ -527,13 +471,8 @@ void arch_ftrace_update_code(int command)  		ftrace_disable_ftrace_graph_caller();  } -int __init ftrace_dyn_arch_init(void *data) +int __init ftrace_dyn_arch_init(void)  { -	/* caller expects data to be zero */ -	unsigned long *p = data; - -	*p = 0; -  	return 0;  }  #endif /* CONFIG_DYNAMIC_FTRACE */ diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 67ee0d6c107..7d7d8635227 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -930,25 +930,6 @@ initial_mmu:  	tlbwe	r4,r0,TLB_DATA		/* Load the data portion of the entry */  	tlbwe	r3,r0,TLB_TAG		/* Load the tag portion of the entry */ -#if defined(CONFIG_SERIAL_TEXT_DEBUG) && defined(SERIAL_DEBUG_IO_BASE) - -	/* Load a TLB entry for the UART, so that ppc4xx_progress() can use -	 * the UARTs nice and early.  We use a 4k real==virtual mapping. */ - -	lis	r3,SERIAL_DEBUG_IO_BASE@h -	ori	r3,r3,SERIAL_DEBUG_IO_BASE@l -	mr	r4,r3 -	clrrwi	r4,r4,12 -	ori	r4,r4,(TLB_WR|TLB_I|TLB_M|TLB_G) - -	clrrwi	r3,r3,12 -	ori	r3,r3,(TLB_VALID | TLB_PAGESZ(PAGESZ_4K)) - -	li	r0,0			/* TLB slot 0 */ -	tlbwe	r4,r0,TLB_DATA -	tlbwe	r3,r0,TLB_TAG -#endif /* CONFIG_SERIAL_DEBUG_TEXT && SERIAL_DEBUG_IO_BASE */ -  	isync  	/* Establish the exception vector base diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 3d11d8038de..a95145d7f61 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -23,6 +23,7 @@   */  #include <linux/threads.h> +#include <linux/init.h>  #include <asm/reg.h>  #include <asm/page.h>  #include <asm/mmu.h> @@ -68,17 +69,18 @@ _stext:  _GLOBAL(__start)  	/* NOP this out unconditionally */  BEGIN_FTR_SECTION -	b	.__start_initialization_multiplatform +	FIXUP_ENDIAN +	b	__start_initialization_multiplatform  END_FTR_SECTION(0, 1)  	/* Catch branch to 0 in real mode */  	trap -	/* Secondary processors spin on this value until it becomes nonzero. -	 * When it does it contains the real address of the descriptor -	 * of the function that the cpu should jump to to continue -	 * initialization. +	/* Secondary processors spin on this value until it becomes non-zero. +	 * When non-zero, it contains the real address of the function the cpu +	 * should jump to.  	 */ +	.balign 8  	.globl  __secondary_hold_spinloop  __secondary_hold_spinloop:  	.llong	0x0 @@ -115,6 +117,7 @@ __run_at_load:   */  	.globl	__secondary_hold  __secondary_hold: +	FIXUP_ENDIAN  #ifndef CONFIG_PPC_BOOK3E  	mfmsr	r24  	ori	r24,r24,MSR_RI @@ -136,16 +139,15 @@ __secondary_hold:  	tovirt(r26,r26)  #endif  	/* All secondary cpus wait here until told to start. */ -100:	ld	r4,__secondary_hold_spinloop-_stext(r26) -	cmpdi	0,r4,0 +100:	ld	r12,__secondary_hold_spinloop-_stext(r26) +	cmpdi	0,r12,0  	beq	100b  #if defined(CONFIG_SMP) || defined(CONFIG_KEXEC)  #ifdef CONFIG_PPC_BOOK3E -	tovirt(r4,r4) +	tovirt(r12,r12)  #endif -	ld	r4,0(r4)		/* deref function descriptor */ -	mtctr	r4 +	mtctr	r12  	mr	r3,r24  	/*  	 * it may be the case that other platforms have r4 right to @@ -182,16 +184,16 @@ _GLOBAL(generic_secondary_thread_init)  	mr	r24,r3  	/* turn on 64-bit mode */ -	bl	.enable_64b_mode +	bl	enable_64b_mode  	/* get a valid TOC pointer, wherever we're mapped at */ -	bl	.relative_toc +	bl	relative_toc  	tovirt(r2,r2)  #ifdef CONFIG_PPC_BOOK3E  	/* Book3E initialization */  	mr	r3,r24 -	bl	.book3e_secondary_thread_init +	bl	book3e_secondary_thread_init  #endif  	b	generic_secondary_common_init @@ -205,21 +207,22 @@ _GLOBAL(generic_secondary_thread_init)   * as SCOM before entry).   */  _GLOBAL(generic_secondary_smp_init) +	FIXUP_ENDIAN  	mr	r24,r3  	mr	r25,r4  	/* turn on 64-bit mode */ -	bl	.enable_64b_mode +	bl	enable_64b_mode  	/* get a valid TOC pointer, wherever we're mapped at */ -	bl	.relative_toc +	bl	relative_toc  	tovirt(r2,r2)  #ifdef CONFIG_PPC_BOOK3E  	/* Book3E initialization */  	mr	r3,r24  	mr	r4,r25 -	bl	.book3e_secondary_core_init +	bl	book3e_secondary_core_init  #endif  generic_secondary_common_init: @@ -231,7 +234,7 @@ generic_secondary_common_init:  	ld	r13,0(r13)		/* Get base vaddr of paca array	 */  #ifndef CONFIG_SMP  	addi	r13,r13,PACA_SIZE	/* know r13 if used accidentally */ -	b	.kexec_wait		/* wait for next kernel if !SMP	 */ +	b	kexec_wait		/* wait for next kernel if !SMP	 */  #else  	LOAD_REG_ADDR(r7, nr_cpu_ids)	/* Load nr_cpu_ids address       */  	lwz	r7,0(r7)		/* also the max paca allocated 	 */ @@ -245,7 +248,7 @@ generic_secondary_common_init:  	blt	1b  	mr	r3,r24			/* not found, copy phys to r3	 */ -	b	.kexec_wait		/* next kernel might do better	 */ +	b	kexec_wait		/* next kernel might do better	 */  2:	SET_PACA(r13)  #ifdef CONFIG_PPC_BOOK3E @@ -259,11 +262,13 @@ generic_secondary_common_init:  	/* See if we need to call a cpu state restore handler */  	LOAD_REG_ADDR(r23, cur_cpu_spec)  	ld	r23,0(r23) -	ld	r23,CPU_SPEC_RESTORE(r23) -	cmpdi	0,r23,0 +	ld	r12,CPU_SPEC_RESTORE(r23) +	cmpdi	0,r12,0  	beq	3f -	ld	r23,0(r23) -	mtctr	r23 +#if !defined(_CALL_ELF) || _CALL_ELF != 2 +	ld	r12,0(r12) +#endif +	mtctr	r12  	bctrl  3:	LOAD_REG_ADDR(r3, spinning_secondaries) /* Decrement spinning_secondaries */ @@ -294,7 +299,7 @@ generic_secondary_common_init:   * Assumes we're mapped EA == RA if the MMU is on.   */  #ifdef CONFIG_PPC_BOOK3S -_STATIC(__mmu_off) +__mmu_off:  	mfmsr	r3  	andi.	r0,r3,MSR_IR|MSR_DR  	beqlr @@ -319,12 +324,12 @@ _STATIC(__mmu_off)   *                 DT block, r4 is a physical pointer to the kernel itself   *   */ -_GLOBAL(__start_initialization_multiplatform) +__start_initialization_multiplatform:  	/* Make sure we are running in 64 bits mode */ -	bl	.enable_64b_mode +	bl	enable_64b_mode  	/* Get TOC pointer (current runtime address) */ -	bl	.relative_toc +	bl	relative_toc  	/* find out where we are now */  	bcl	20,31,$+4 @@ -337,7 +342,7 @@ _GLOBAL(__start_initialization_multiplatform)  	 */  	cmpldi	cr0,r5,0  	beq	1f -	b	.__boot_from_prom		/* yes -> prom */ +	b	__boot_from_prom		/* yes -> prom */  1:  	/* Save parameters */  	mr	r31,r3 @@ -349,8 +354,8 @@ _GLOBAL(__start_initialization_multiplatform)  #endif  #ifdef CONFIG_PPC_BOOK3E -	bl	.start_initialization_book3e -	b	.__after_prom_start +	bl	start_initialization_book3e +	b	__after_prom_start  #else  	/* Setup some critical 970 SPRs before switching MMU off */  	mfspr	r0,SPRN_PVR @@ -363,15 +368,15 @@ _GLOBAL(__start_initialization_multiplatform)  	beq	1f  	cmpwi	r0,0x45		/* 970GX */  	bne	2f -1:	bl	.__cpu_preinit_ppc970 +1:	bl	__cpu_preinit_ppc970  2:  	/* Switch off MMU if not already off */ -	bl	.__mmu_off -	b	.__after_prom_start +	bl	__mmu_off +	b	__after_prom_start  #endif /* CONFIG_PPC_BOOK3E */ -_INIT_STATIC(__boot_from_prom) +__boot_from_prom:  #ifdef CONFIG_PPC_OF_BOOT_TRAMPOLINE  	/* Save parameters */  	mr	r31,r3 @@ -390,7 +395,7 @@ _INIT_STATIC(__boot_from_prom)  #ifdef CONFIG_RELOCATABLE  	/* Relocate code for where we are now */  	mr	r3,r26 -	bl	.relocate +	bl	relocate  #endif  	/* Restore parameters */ @@ -402,14 +407,14 @@ _INIT_STATIC(__boot_from_prom)  	/* Do all of the interaction with OF client interface */  	mr	r8,r26 -	bl	.prom_init +	bl	prom_init  #endif /* #CONFIG_PPC_OF_BOOT_TRAMPOLINE */  	/* We never return. We also hit that trap if trying to boot  	 * from OF while CONFIG_PPC_OF_BOOT_TRAMPOLINE isn't selected */  	trap -_STATIC(__after_prom_start) +__after_prom_start:  #ifdef CONFIG_RELOCATABLE  	/* process relocations for the final address of the kernel */  	lis	r25,PAGE_OFFSET@highest	/* compute virtual base of kernel */ @@ -419,7 +424,7 @@ _STATIC(__after_prom_start)  	bne	1f  	add	r25,r25,r26  1:	mr	r3,r25 -	bl	.relocate +	bl	relocate  #endif  /* @@ -459,22 +464,23 @@ _STATIC(__after_prom_start)  	lis	r5,(copy_to_here - _stext)@ha  	addi	r5,r5,(copy_to_here - _stext)@l /* # bytes of memory to copy */ -	bl	.copy_and_flush		/* copy the first n bytes	 */ +	bl	copy_and_flush		/* copy the first n bytes	 */  					/* this includes the code being	 */  					/* executed here.		 */  	addis	r8,r3,(4f - _stext)@ha	/* Jump to the copy of this code */ -	addi	r8,r8,(4f - _stext)@l	/* that we just made */ -	mtctr	r8 +	addi	r12,r8,(4f - _stext)@l	/* that we just made */ +	mtctr	r12  	bctr +.balign 8  p_end:	.llong	_end - _stext  4:	/* Now copy the rest of the kernel up to _end */  	addis	r5,r26,(p_end - _stext)@ha  	ld	r5,(p_end - _stext)@l(r5)	/* get _end */ -5:	bl	.copy_and_flush		/* copy the rest */ +5:	bl	copy_and_flush		/* copy the rest */ -9:	b	.start_here_multiplatform +9:	b	start_here_multiplatform  /*   * Copy routine used to copy the kernel to start at physical address 0 @@ -538,7 +544,7 @@ __secondary_start_pmac_0:  _GLOBAL(pmac_secondary_start)  	/* turn on 64-bit mode */ -	bl	.enable_64b_mode +	bl	enable_64b_mode  	li	r0,0  	mfspr	r3,SPRN_HID4 @@ -550,11 +556,11 @@ _GLOBAL(pmac_secondary_start)  	slbia  	/* get TOC pointer (real address) */ -	bl	.relative_toc +	bl	relative_toc  	tovirt(r2,r2)  	/* Copy some CPU settings from CPU 0 */ -	bl	.__restore_cpu_ppc970 +	bl	__restore_cpu_ppc970  	/* pSeries do that early though I don't think we really need it */  	mfmsr	r3 @@ -613,7 +619,7 @@ __secondary_start:  	std	r14,PACAKSAVE(r13)  	/* Do early setup for that CPU (stab, slb, hash table pointer) */ -	bl	.early_setup_secondary +	bl	early_setup_secondary  	/*  	 * setup the new stack pointer, but *don't* use this until @@ -633,7 +639,7 @@ __secondary_start:  	stb	r0,PACAIRQHAPPENED(r13)  	/* enable MMU and jump to start_secondary */ -	LOAD_REG_ADDR(r3, .start_secondary_prolog) +	LOAD_REG_ADDR(r3, start_secondary_prolog)  	LOAD_REG_IMMEDIATE(r4, MSR_KERNEL)  	mtspr	SPRN_SRR0,r3 @@ -646,11 +652,11 @@ __secondary_start:   * zero the stack back-chain pointer and get the TOC virtual address   * before going into C code.   */ -_GLOBAL(start_secondary_prolog) +start_secondary_prolog:  	ld	r2,PACATOC(r13)  	li	r3,0  	std	r3,0(r1)		/* Zero the stack frame pointer	*/ -	bl	.start_secondary +	bl	start_secondary  	b	.  /*   * Reset stack pointer and call start_secondary @@ -661,14 +667,14 @@ _GLOBAL(start_secondary_resume)  	ld	r1,PACAKSAVE(r13)	/* Reload kernel stack pointer */  	li	r3,0  	std	r3,0(r1)		/* Zero the stack frame pointer	*/ -	bl	.start_secondary +	bl	start_secondary  	b	.  #endif  /*   * This subroutine clobbers r11 and r12   */ -_GLOBAL(enable_64b_mode) +enable_64b_mode:  	mfmsr	r11			/* grab the current MSR */  #ifdef CONFIG_PPC_BOOK3E  	oris	r11,r11,0x8000		/* CM bit set, we'll set ICM later */ @@ -709,9 +715,9 @@ p_toc:	.llong	__toc_start + 0x8000 - 0b  /*   * This is where the main kernel code starts.   */ -_INIT_STATIC(start_here_multiplatform) +start_here_multiplatform:  	/* set up the TOC */ -	bl      .relative_toc +	bl      relative_toc  	tovirt(r2,r2)  	/* Clear out the BSS. It may have been done in prom_init, @@ -770,9 +776,9 @@ _INIT_STATIC(start_here_multiplatform)  	/* Restore parameters passed from prom_init/kexec */  	mr	r3,r31 -	bl	.early_setup		/* also sets r13 and SPRG_PACA */ +	bl	early_setup		/* also sets r13 and SPRG_PACA */ -	LOAD_REG_ADDR(r3, .start_here_common) +	LOAD_REG_ADDR(r3, start_here_common)  	ld	r4,PACAKMSR(r13)  	mtspr	SPRN_SRR0,r3  	mtspr	SPRN_SRR1,r4 @@ -780,7 +786,8 @@ _INIT_STATIC(start_here_multiplatform)  	b	.	/* prevent speculative execution */  	/* This is where all platforms converge execution */ -_INIT_GLOBAL(start_here_common) + +start_here_common:  	/* relocation is on at this point */  	std	r1,PACAKSAVE(r13) @@ -788,7 +795,7 @@ _INIT_GLOBAL(start_here_common)  	ld	r2,PACATOC(r13)  	/* Do more system initializations in virtual mode */ -	bl	.setup_system +	bl	setup_system  	/* Mark interrupts soft and hard disabled (they might be enabled  	 * in the PACA when doing hotplug) @@ -799,7 +806,7 @@ _INIT_GLOBAL(start_here_common)  	stb	r0,PACAIRQHAPPENED(r13)  	/* Generic kernel entry */ -	bl	.start_kernel +	bl	start_kernel  	/* Not reached */  	BUG_OPCODE diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 1b92a97b1b0..7ee876d2adb 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -858,6 +858,9 @@ initial_mmu:  	addis	r11, r11, 0x0080	/* Add 8M */  	mtspr	SPRN_MD_RPN, r11 +	addi	r10, r10, 0x0100 +	mtspr	SPRN_MD_CTR, r10 +  	addis	r8, r8, 0x0080		/* Add 8M */  	mtspr	SPRN_MD_EPN, r8  	mtspr	SPRN_MD_TWC, r9 diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 289afaffbbb..b497188a94a 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -65,29 +65,78 @@ _ENTRY(_start);  	nop  	/* Translate device tree address to physical, save in r30/r31 */ -	mfmsr	r16 -	mfspr	r17,SPRN_PID -	rlwinm	r17,r17,16,0x3fff0000	/* turn PID into MAS6[SPID] */ -	rlwimi	r17,r16,28,0x00000001	/* turn MSR[DS] into MAS6[SAS] */ -	mtspr	SPRN_MAS6,r17 - -	tlbsx	0,r3			/* must succeed */ - -	mfspr	r16,SPRN_MAS1 -	mfspr	r20,SPRN_MAS3 -	rlwinm	r17,r16,25,0x1f		/* r17 = log2(page size) */ -	li	r18,1024 -	slw	r18,r18,r17		/* r18 = page size */ -	addi	r18,r18,-1 -	and	r19,r3,r18		/* r19 = page offset */ -	andc	r31,r20,r18		/* r31 = page base */ -	or	r31,r31,r19		/* r31 = devtree phys addr */ -	mfspr	r30,SPRN_MAS7 +	bl	get_phys_addr +	mr	r30,r3 +	mr	r31,r4  	li	r25,0			/* phys kernel start (low) */  	li	r24,0			/* CPU number */  	li	r23,0			/* phys kernel start (high) */ +#ifdef CONFIG_RELOCATABLE +	LOAD_REG_ADDR_PIC(r3, _stext)	/* Get our current runtime base */ + +	/* Translate _stext address to physical, save in r23/r25 */ +	bl	get_phys_addr +	mr	r23,r3 +	mr	r25,r4 + +	bl	0f +0:	mflr	r8 +	addis	r3,r8,(is_second_reloc - 0b)@ha +	lwz	r19,(is_second_reloc - 0b)@l(r3) + +	/* Check if this is the second relocation. */ +	cmpwi	r19,1 +	bne	1f + +	/* +	 * For the second relocation, we already get the real memstart_addr +	 * from device tree. So we will map PAGE_OFFSET to memstart_addr, +	 * then the virtual address of start kernel should be: +	 *          PAGE_OFFSET + (kernstart_addr - memstart_addr) +	 * Since the offset between kernstart_addr and memstart_addr should +	 * never be beyond 1G, so we can just use the lower 32bit of them +	 * for the calculation. +	 */ +	lis	r3,PAGE_OFFSET@h + +	addis	r4,r8,(kernstart_addr - 0b)@ha +	addi	r4,r4,(kernstart_addr - 0b)@l +	lwz	r5,4(r4) + +	addis	r6,r8,(memstart_addr - 0b)@ha +	addi	r6,r6,(memstart_addr - 0b)@l +	lwz	r7,4(r6) + +	subf	r5,r7,r5 +	add	r3,r3,r5 +	b	2f + +1: +	/* +	 * We have the runtime (virutal) address of our base. +	 * We calculate our shift of offset from a 64M page. +	 * We could map the 64M page we belong to at PAGE_OFFSET and +	 * get going from there. +	 */ +	lis	r4,KERNELBASE@h +	ori	r4,r4,KERNELBASE@l +	rlwinm	r6,r25,0,0x3ffffff		/* r6 = PHYS_START % 64M */ +	rlwinm	r5,r4,0,0x3ffffff		/* r5 = KERNELBASE % 64M */ +	subf	r3,r5,r6			/* r3 = r6 - r5 */ +	add	r3,r4,r3			/* Required Virtual Address */ + +2:	bl	relocate + +	/* +	 * For the second relocation, we already set the right tlb entries +	 * for the kernel space, so skip the code in fsl_booke_entry_mapping.S +	*/ +	cmpwi	r19,1 +	beq	set_ivor +#endif +  /* We try to not make any assumptions about how the boot loader   * setup or used the TLBs.  We invalidate all mappings from the   * boot loader and load a single entry in TLB1[0] to map the @@ -113,6 +162,7 @@ _ENTRY(__early_start)  #include "fsl_booke_entry_mapping.S"  #undef ENTRY_MAPPING_BOOT_SETUP +set_ivor:  	/* Establish the interrupt vector offsets */  	SET_IVOR(0,  CriticalInput);  	SET_IVOR(1,  MachineCheck); @@ -166,8 +216,7 @@ _ENTRY(__early_start)  	/* Check to see if we're the second processor, and jump  	 * to the secondary_start code if so  	 */ -	lis	r24, boot_cpuid@h -	ori	r24, r24, boot_cpuid@l +	LOAD_REG_ADDR_PIC(r24, boot_cpuid)  	lwz	r24, 0(r24)  	cmpwi	r24, -1  	mfspr   r24,SPRN_PIR @@ -197,6 +246,18 @@ _ENTRY(__early_start)  	bl	early_init +#ifdef CONFIG_RELOCATABLE +	mr	r3,r30 +	mr	r4,r31 +#ifdef CONFIG_PHYS_64BIT +	mr	r5,r23 +	mr	r6,r25 +#else +	mr	r5,r25 +#endif +	bl	relocate_init +#endif +  #ifdef CONFIG_DYNAMIC_MEMSTART  	lis	r3,kernstart_addr@ha  	la	r3,kernstart_addr@l(r3) @@ -555,27 +616,27 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)  #ifdef CONFIG_SPE  	/* SPE Unavailable */  	START_EXCEPTION(SPEUnavailable) -	NORMAL_EXCEPTION_PROLOG(SPE_UNAVAIL) +	NORMAL_EXCEPTION_PROLOG(SPE_ALTIVEC_UNAVAIL)  	beq	1f  	bl	load_up_spe  	b	fast_exception_return  1:	addi	r3,r1,STACK_FRAME_OVERHEAD  	EXC_XFER_EE_LITE(0x2010, KernelSPE)  #else -	EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, \ +	EXCEPTION(0x2020, SPE_ALTIVEC_UNAVAIL, SPEUnavailable, \  		  unknown_exception, EXC_XFER_EE)  #endif /* CONFIG_SPE */  	/* SPE Floating Point Data */  #ifdef CONFIG_SPE -	EXCEPTION(0x2030, SPE_FP_DATA, SPEFloatingPointData, \ -		  SPEFloatingPointException, EXC_XFER_EE); +	EXCEPTION(0x2030, SPE_FP_DATA_ALTIVEC_ASSIST, SPEFloatingPointData, +		  SPEFloatingPointException, EXC_XFER_EE)  	/* SPE Floating Point Round */  	EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \  		  SPEFloatingPointRoundException, EXC_XFER_EE)  #else -	EXCEPTION(0x2040, SPE_FP_DATA, SPEFloatingPointData, \ +	EXCEPTION(0x2040, SPE_FP_DATA_ALTIVEC_ASSIST, SPEFloatingPointData,  		  unknown_exception, EXC_XFER_EE)  	EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \  		  unknown_exception, EXC_XFER_EE) @@ -856,6 +917,33 @@ KernelSPE:  #endif /* CONFIG_SPE */  /* + * Translate the effec addr in r3 to phys addr. The phys addr will be put + * into r3(higher 32bit) and r4(lower 32bit) + */ +get_phys_addr: +	mfmsr	r8 +	mfspr	r9,SPRN_PID +	rlwinm	r9,r9,16,0x3fff0000	/* turn PID into MAS6[SPID] */ +	rlwimi	r9,r8,28,0x00000001	/* turn MSR[DS] into MAS6[SAS] */ +	mtspr	SPRN_MAS6,r9 + +	tlbsx	0,r3			/* must succeed */ + +	mfspr	r8,SPRN_MAS1 +	mfspr	r12,SPRN_MAS3 +	rlwinm	r9,r8,25,0x1f		/* r9 = log2(page size) */ +	li	r10,1024 +	slw	r10,r10,r9		/* r10 = page size */ +	addi	r10,r10,-1 +	and	r11,r3,r10		/* r11 = page offset */ +	andc	r4,r12,r10		/* r4 = page base */ +	or	r4,r4,r11		/* r4 = devtree phys addr */ +#ifdef CONFIG_PHYS_64BIT +	mfspr	r3,SPRN_MAS7 +#endif +	blr + +/*   * Global functions   */ @@ -1057,24 +1145,36 @@ _GLOBAL(__flush_disable_L1)  /* When we get here, r24 needs to hold the CPU # */  	.globl __secondary_start  __secondary_start: -	lis	r3,__secondary_hold_acknowledge@h -	ori	r3,r3,__secondary_hold_acknowledge@l -	stw	r24,0(r3) - -	li	r3,0 -	mr	r4,r24		/* Why? */ -	bl	call_setup_cpu - -	lis	r3,tlbcam_index@ha -	lwz	r3,tlbcam_index@l(r3) +	LOAD_REG_ADDR_PIC(r3, tlbcam_index) +	lwz	r3,0(r3)  	mtctr	r3  	li	r26,0		/* r26 safe? */ +	bl	switch_to_as1 +	mr	r27,r3		/* tlb entry */  	/* Load each CAM entry */  1:	mr	r3,r26  	bl	loadcam_entry  	addi	r26,r26,1  	bdnz	1b +	mr	r3,r27		/* tlb entry */ +	LOAD_REG_ADDR_PIC(r4, memstart_addr) +	lwz	r4,0(r4) +	mr	r5,r25		/* phys kernel start */ +	rlwinm	r5,r5,0,~0x3ffffff	/* aligned 64M */ +	subf	r4,r5,r4	/* memstart_addr - phys kernel start */ +	li	r5,0		/* no device tree */ +	li	r6,0		/* not boot cpu */ +	bl	restore_to_as0 + + +	lis	r3,__secondary_hold_acknowledge@h +	ori	r3,r3,__secondary_hold_acknowledge@l +	stw	r24,0(r3) + +	li	r3,0 +	mr	r4,r24		/* Why? */ +	bl	call_setup_cpu  	/* get current_thread_info and current */  	lis	r1,secondary_ti@ha @@ -1111,6 +1211,112 @@ __secondary_hold_acknowledge:  #endif  /* + * Create a tlb entry with the same effective and physical address as + * the tlb entry used by the current running code. But set the TS to 1. + * Then switch to the address space 1. It will return with the r3 set to + * the ESEL of the new created tlb. + */ +_GLOBAL(switch_to_as1) +	mflr	r5 + +	/* Find a entry not used */ +	mfspr	r3,SPRN_TLB1CFG +	andi.	r3,r3,0xfff +	mfspr	r4,SPRN_PID +	rlwinm	r4,r4,16,0x3fff0000	/* turn PID into MAS6[SPID] */ +	mtspr	SPRN_MAS6,r4 +1:	lis	r4,0x1000		/* Set MAS0(TLBSEL) = 1 */ +	addi	r3,r3,-1 +	rlwimi	r4,r3,16,4,15		/* Setup MAS0 = TLBSEL | ESEL(r3) */ +	mtspr	SPRN_MAS0,r4 +	tlbre +	mfspr	r4,SPRN_MAS1 +	andis.	r4,r4,MAS1_VALID@h +	bne	1b + +	/* Get the tlb entry used by the current running code */ +	bl	0f +0:	mflr	r4 +	tlbsx	0,r4 + +	mfspr	r4,SPRN_MAS1 +	ori	r4,r4,MAS1_TS		/* Set the TS = 1 */ +	mtspr	SPRN_MAS1,r4 + +	mfspr	r4,SPRN_MAS0 +	rlwinm	r4,r4,0,~MAS0_ESEL_MASK +	rlwimi	r4,r3,16,4,15		/* Setup MAS0 = TLBSEL | ESEL(r3) */ +	mtspr	SPRN_MAS0,r4 +	tlbwe +	isync +	sync + +	mfmsr	r4 +	ori	r4,r4,MSR_IS | MSR_DS +	mtspr	SPRN_SRR0,r5 +	mtspr	SPRN_SRR1,r4 +	sync +	rfi + +/* + * Restore to the address space 0 and also invalidate the tlb entry created + * by switch_to_as1. + * r3 - the tlb entry which should be invalidated + * r4 - __pa(PAGE_OFFSET in AS1) - __pa(PAGE_OFFSET in AS0) + * r5 - device tree virtual address. If r4 is 0, r5 is ignored. + * r6 - boot cpu +*/ +_GLOBAL(restore_to_as0) +	mflr	r0 + +	bl	0f +0:	mflr	r9 +	addi	r9,r9,1f - 0b + +	/* +	 * We may map the PAGE_OFFSET in AS0 to a different physical address, +	 * so we need calculate the right jump and device tree address based +	 * on the offset passed by r4. +	 */ +	add	r9,r9,r4 +	add	r5,r5,r4 +	add	r0,r0,r4 + +2:	mfmsr	r7 +	li	r8,(MSR_IS | MSR_DS) +	andc	r7,r7,r8 + +	mtspr	SPRN_SRR0,r9 +	mtspr	SPRN_SRR1,r7 +	sync +	rfi + +	/* Invalidate the temporary tlb entry for AS1 */ +1:	lis	r9,0x1000		/* Set MAS0(TLBSEL) = 1 */ +	rlwimi	r9,r3,16,4,15		/* Setup MAS0 = TLBSEL | ESEL(r3) */ +	mtspr	SPRN_MAS0,r9 +	tlbre +	mfspr	r9,SPRN_MAS1 +	rlwinm	r9,r9,0,2,31		/* Clear MAS1 Valid and IPPROT */ +	mtspr	SPRN_MAS1,r9 +	tlbwe +	isync + +	cmpwi	r4,0 +	cmpwi	cr1,r6,0 +	cror	eq,4*cr1+eq,eq +	bne	3f			/* offset != 0 && is_boot_cpu */ +	mtlr	r0 +	blr + +	/* +	 * The PAGE_OFFSET will map to a different physical address, +	 * jump to _start to do another relocation again. +	*/ +3:	mr	r3,r5 +	bl	_start + +/*   * We put a few things here that have to be page-aligned. This stuff   * goes at the beginning of the data segment, which is page-aligned.   */ diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index f0b47d1a6b0..0bb5918faaa 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -28,7 +28,6 @@  #include <linux/percpu.h>  #include <linux/kernel.h>  #include <linux/sched.h> -#include <linux/init.h>  #include <linux/smp.h>  #include <asm/hw_breakpoint.h> @@ -73,7 +72,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp)  	 * If so, DABR will be populated in single_step_dabr_instruction().  	 */  	if (current->thread.last_hit_ubp != bp) -		set_breakpoint(info); +		__set_breakpoint(info);  	return 0;  } @@ -199,7 +198,7 @@ void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs)  	info = counter_arch_bp(tsk->thread.last_hit_ubp);  	regs->msr &= ~MSR_SE; -	set_breakpoint(info); +	__set_breakpoint(info);  	tsk->thread.last_hit_ubp = NULL;  } @@ -285,7 +284,7 @@ int __kprobes hw_breakpoint_handler(struct die_args *args)  	if (!(info->type & HW_BRK_TYPE_EXTRANEOUS_IRQ))  		perf_bp_event(bp, regs); -	set_breakpoint(info); +	__set_breakpoint(info);  out:  	rcu_read_unlock();  	return rc; @@ -317,7 +316,7 @@ int __kprobes single_step_dabr_instruction(struct die_args *args)  	if (!(info->type & HW_BRK_TYPE_EXTRANEOUS_IRQ))  		perf_bp_event(bp, regs); -	set_breakpoint(info); +	__set_breakpoint(info);  	current->thread.last_hit_ubp = NULL;  	/* diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c index 16a7c2326d4..1114d13ac19 100644 --- a/arch/powerpc/kernel/ibmebus.c +++ b/arch/powerpc/kernel/ibmebus.c @@ -292,6 +292,7 @@ out:  		return rc;  	return count;  } +static BUS_ATTR(probe, S_IWUSR, NULL, ibmebus_store_probe);  static ssize_t ibmebus_store_remove(struct bus_type *bus,  				    const char *buf, size_t count) @@ -317,13 +318,14 @@ static ssize_t ibmebus_store_remove(struct bus_type *bus,  		return -ENODEV;  	}  } +static BUS_ATTR(remove, S_IWUSR, NULL, ibmebus_store_remove); - -static struct bus_attribute ibmebus_bus_attrs[] = { -	__ATTR(probe, S_IWUSR, NULL, ibmebus_store_probe), -	__ATTR(remove, S_IWUSR, NULL, ibmebus_store_remove), -	__ATTR_NULL +static struct attribute *ibmbus_bus_attrs[] = { +	&bus_attr_probe.attr, +	&bus_attr_remove.attr, +	NULL,  }; +ATTRIBUTE_GROUPS(ibmbus_bus);  static int ibmebus_bus_bus_match(struct device *dev, struct device_driver *drv)  { @@ -713,7 +715,7 @@ static struct dev_pm_ops ibmebus_bus_dev_pm_ops = {  struct bus_type ibmebus_bus_type = {  	.name      = "ibmebus",  	.uevent    = of_device_uevent_modalias, -	.bus_attrs = ibmebus_bus_attrs, +	.bus_groups = ibmbus_bus_groups,  	.match     = ibmebus_bus_bus_match,  	.probe     = ibmebus_bus_device_probe,  	.remove    = ibmebus_bus_device_remove, diff --git a/arch/powerpc/kernel/idle_book3e.S b/arch/powerpc/kernel/idle_book3e.S index bfb73cc209c..48c21acef91 100644 --- a/arch/powerpc/kernel/idle_book3e.S +++ b/arch/powerpc/kernel/idle_book3e.S @@ -43,7 +43,7 @@ _GLOBAL(\name)  	 */  #ifdef CONFIG_TRACE_IRQFLAGS  	stdu    r1,-128(r1) -	bl	.trace_hardirqs_on +	bl	trace_hardirqs_on  	addi    r1,r1,128  #endif  	li	r0,1 diff --git a/arch/powerpc/kernel/idle_power4.S b/arch/powerpc/kernel/idle_power4.S index e3edaa18991..f57a19348bd 100644 --- a/arch/powerpc/kernel/idle_power4.S +++ b/arch/powerpc/kernel/idle_power4.S @@ -46,7 +46,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_CAN_NAP)  	mflr	r0  	std	r0,16(r1)  	stdu    r1,-128(r1) -	bl	.trace_hardirqs_on +	bl	trace_hardirqs_on  	addi    r1,r1,128  	ld	r0,16(r1)  	mtlr	r0 diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index e11863f4e59..5cf3d367190 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S @@ -17,20 +17,35 @@  #include <asm/ppc-opcode.h>  #include <asm/hw_irq.h>  #include <asm/kvm_book3s_asm.h> +#include <asm/opal.h>  #undef DEBUG -	.text +/* Idle state entry routines */ -_GLOBAL(power7_idle) -	/* Now check if user or arch enabled NAP mode */ -	LOAD_REG_ADDRBASE(r3,powersave_nap) -	lwz	r4,ADDROFF(powersave_nap)(r3) -	cmpwi	0,r4,0 -	beqlr -	/* fall through */ +#define	IDLE_STATE_ENTER_SEQ(IDLE_INST)				\ +	/* Magic NAP/SLEEP/WINKLE mode enter sequence */	\ +	std	r0,0(r1);					\ +	ptesync;						\ +	ld	r0,0(r1);					\ +1:	cmp	cr0,r0,r0;					\ +	bne	1b;						\ +	IDLE_INST;						\ +	b	. -_GLOBAL(power7_nap) +	.text + +/* + * Pass requested state in r3: + * 	0 - nap + * 	1 - sleep + * + * To check IRQ_HAPPENED in r4 + * 	0 - don't check + * 	1 - check + */ +_GLOBAL(power7_powersave_common) +	/* Use r3 to pass state nap/sleep/winkle */  	/* NAP is a state loss, we create a regs frame on the  	 * stack, fill it up with the state we care about and  	 * stick a pointer to it in PACAR1. We really only @@ -47,7 +62,7 @@ _GLOBAL(power7_nap)  	/* Make sure FPU, VSX etc... are flushed as we may lose  	 * state when going to nap mode  	 */ -	bl	.discard_lazy_cpu_state +	bl	discard_lazy_cpu_state  #endif /* CONFIG_SMP */  	/* Hard disable interrupts */ @@ -60,6 +75,8 @@ _GLOBAL(power7_nap)  	lbz	r0,PACAIRQHAPPENED(r13)  	cmpwi	cr0,r0,0  	beq	1f +	cmpwi	cr0,r4,0 +	beq	1f  	addi	r1,r1,INT_FRAME_SIZE  	ld	r0,16(r1)  	mtlr	r0 @@ -79,25 +96,70 @@ _GLOBAL(power7_nap)  	/* Continue saving state */  	SAVE_GPR(2, r1)  	SAVE_NVGPRS(r1) -	mfcr	r3 -	std	r3,_CCR(r1) +	mfcr	r4 +	std	r4,_CCR(r1)  	std	r9,_MSR(r1)  	std	r1,PACAR1(r13) -#ifdef CONFIG_KVM_BOOK3S_64_HV +_GLOBAL(power7_enter_nap_mode) +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE  	/* Tell KVM we're napping */  	li	r4,KVM_HWTHREAD_IN_NAP  	stb	r4,HSTATE_HWTHREAD_STATE(r13)  #endif +	cmpwi	cr0,r3,1 +	beq	2f +	IDLE_STATE_ENTER_SEQ(PPC_NAP) +	/* No return */ +2:	IDLE_STATE_ENTER_SEQ(PPC_SLEEP) +	/* No return */ -	/* Magic NAP mode enter sequence */ -	std	r0,0(r1) -	ptesync -	ld	r0,0(r1) -1:	cmp	cr0,r0,r0 -	bne	1b -	PPC_NAP -	b	. +_GLOBAL(power7_idle) +	/* Now check if user or arch enabled NAP mode */ +	LOAD_REG_ADDRBASE(r3,powersave_nap) +	lwz	r4,ADDROFF(powersave_nap)(r3) +	cmpwi	0,r4,0 +	beqlr +	li	r3, 1 +	/* fall through */ + +_GLOBAL(power7_nap) +	mr	r4,r3 +	li	r3,0 +	b	power7_powersave_common +	/* No return */ + +_GLOBAL(power7_sleep) +	li	r3,1 +	li	r4,1 +	b	power7_powersave_common +	/* No return */ + +_GLOBAL(power7_wakeup_tb_loss) +	ld	r2,PACATOC(r13); +	ld	r1,PACAR1(r13) + +	/* Time base re-sync */ +	li	r0,OPAL_RESYNC_TIMEBASE +	LOAD_REG_ADDR(r11,opal); +	ld	r12,8(r11); +	ld	r2,0(r11); +	mtctr	r12 +	bctrl + +	/* TODO: Check r3 for failure */ + +	REST_NVGPRS(r1) +	REST_GPR(2, r1) +	ld	r3,_CCR(r1) +	ld	r4,_MSR(r1) +	ld	r5,_NIP(r1) +	addi	r1,r1,INT_FRAME_SIZE +	mtcr	r3 +	mfspr	r3,SPRN_SRR1		/* Return SRR1 */ +	mtspr	SPRN_SRR1,r4 +	mtspr	SPRN_SRR0,r5 +	rfid  _GLOBAL(power7_wakeup_loss)  	ld	r1,PACAR1(r13) @@ -115,7 +177,7 @@ _GLOBAL(power7_wakeup_loss)  _GLOBAL(power7_wakeup_noloss)  	lbz	r0,PACA_NAPSTATELOST(r13)  	cmpwi	r0,0 -	bne	.power7_wakeup_loss +	bne	power7_wakeup_loss  	ld	r1,PACAR1(r13)  	ld	r4,_MSR(r1)  	ld	r5,_NIP(r1) diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c index 97a3715ac8b..12e48d56f77 100644 --- a/arch/powerpc/kernel/iomap.c +++ b/arch/powerpc/kernel/iomap.c @@ -3,7 +3,6 @@   *   * (C) Copyright 2004 Linus Torvalds   */ -#include <linux/init.h>  #include <linux/pci.h>  #include <linux/mm.h>  #include <linux/export.h> @@ -24,7 +23,7 @@ unsigned int ioread16(void __iomem *addr)  }  unsigned int ioread16be(void __iomem *addr)  { -	return in_be16(addr); +	return readw_be(addr);  }  unsigned int ioread32(void __iomem *addr)  { @@ -32,7 +31,7 @@ unsigned int ioread32(void __iomem *addr)  }  unsigned int ioread32be(void __iomem *addr)  { -	return in_be32(addr); +	return readl_be(addr);  }  EXPORT_SYMBOL(ioread8);  EXPORT_SYMBOL(ioread16); @@ -50,7 +49,7 @@ void iowrite16(u16 val, void __iomem *addr)  }  void iowrite16be(u16 val, void __iomem *addr)  { -	out_be16(addr, val); +	writew_be(val, addr);  }  void iowrite32(u32 val, void __iomem *addr)  { @@ -58,7 +57,7 @@ void iowrite32(u32 val, void __iomem *addr)  }  void iowrite32be(u32 val, void __iomem *addr)  { -	out_be32(addr, val); +	writel_be(val, addr);  }  EXPORT_SYMBOL(iowrite8);  EXPORT_SYMBOL(iowrite16); @@ -76,15 +75,15 @@ EXPORT_SYMBOL(iowrite32be);   */  void ioread8_rep(void __iomem *addr, void *dst, unsigned long count)  { -	_insb((u8 __iomem *) addr, dst, count); +	readsb(addr, dst, count);  }  void ioread16_rep(void __iomem *addr, void *dst, unsigned long count)  { -	_insw_ns((u16 __iomem *) addr, dst, count); +	readsw(addr, dst, count);  }  void ioread32_rep(void __iomem *addr, void *dst, unsigned long count)  { -	_insl_ns((u32 __iomem *) addr, dst, count); +	readsl(addr, dst, count);  }  EXPORT_SYMBOL(ioread8_rep);  EXPORT_SYMBOL(ioread16_rep); @@ -92,15 +91,15 @@ EXPORT_SYMBOL(ioread32_rep);  void iowrite8_rep(void __iomem *addr, const void *src, unsigned long count)  { -	_outsb((u8 __iomem *) addr, src, count); +	writesb(addr, src, count);  }  void iowrite16_rep(void __iomem *addr, const void *src, unsigned long count)  { -	_outsw_ns((u16 __iomem *) addr, src, count); +	writesw(addr, src, count);  }  void iowrite32_rep(void __iomem *addr, const void *src, unsigned long count)  { -	_outsl_ns((u32 __iomem *) addr, src, count); +	writesl(addr, src, count);  }  EXPORT_SYMBOL(iowrite8_rep);  EXPORT_SYMBOL(iowrite16_rep); diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 0adab06ce5c..88e3ec6e1d9 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -251,14 +251,13 @@ again:  	if (dev)  		boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, -				      1 << IOMMU_PAGE_SHIFT); +				      1 << tbl->it_page_shift);  	else -		boundary_size = ALIGN(1UL << 32, 1 << IOMMU_PAGE_SHIFT); +		boundary_size = ALIGN(1UL << 32, 1 << tbl->it_page_shift);  	/* 4GB boundary for iseries_hv_alloc and iseries_hv_map */ -	n = iommu_area_alloc(tbl->it_map, limit, start, npages, -			     tbl->it_offset, boundary_size >> IOMMU_PAGE_SHIFT, -			     align_mask); +	n = iommu_area_alloc(tbl->it_map, limit, start, npages, tbl->it_offset, +			     boundary_size >> tbl->it_page_shift, align_mask);  	if (n == -1) {  		if (likely(pass == 0)) {  			/* First try the pool from the start */ @@ -320,12 +319,12 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,  		return DMA_ERROR_CODE;  	entry += tbl->it_offset;	/* Offset into real TCE table */ -	ret = entry << IOMMU_PAGE_SHIFT;	/* Set the return dma address */ +	ret = entry << tbl->it_page_shift;	/* Set the return dma address */  	/* Put the TCEs in the HW table */  	build_fail = ppc_md.tce_build(tbl, entry, npages, -	                              (unsigned long)page & IOMMU_PAGE_MASK, -	                              direction, attrs); +				      (unsigned long)page & +				      IOMMU_PAGE_MASK(tbl), direction, attrs);  	/* ppc_md.tce_build() only returns non-zero for transient errors.  	 * Clean up the table bitmap in this case and return @@ -352,7 +351,7 @@ static bool iommu_free_check(struct iommu_table *tbl, dma_addr_t dma_addr,  {  	unsigned long entry, free_entry; -	entry = dma_addr >> IOMMU_PAGE_SHIFT; +	entry = dma_addr >> tbl->it_page_shift;  	free_entry = entry - tbl->it_offset;  	if (((free_entry + npages) > tbl->it_size) || @@ -401,7 +400,7 @@ static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,  	unsigned long flags;  	struct iommu_pool *pool; -	entry = dma_addr >> IOMMU_PAGE_SHIFT; +	entry = dma_addr >> tbl->it_page_shift;  	free_entry = entry - tbl->it_offset;  	pool = get_pool(tbl, free_entry); @@ -468,13 +467,13 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,  		}  		/* Allocate iommu entries for that segment */  		vaddr = (unsigned long) sg_virt(s); -		npages = iommu_num_pages(vaddr, slen, IOMMU_PAGE_SIZE); +		npages = iommu_num_pages(vaddr, slen, IOMMU_PAGE_SIZE(tbl));  		align = 0; -		if (IOMMU_PAGE_SHIFT < PAGE_SHIFT && slen >= PAGE_SIZE && +		if (tbl->it_page_shift < PAGE_SHIFT && slen >= PAGE_SIZE &&  		    (vaddr & ~PAGE_MASK) == 0) -			align = PAGE_SHIFT - IOMMU_PAGE_SHIFT; +			align = PAGE_SHIFT - tbl->it_page_shift;  		entry = iommu_range_alloc(dev, tbl, npages, &handle, -					  mask >> IOMMU_PAGE_SHIFT, align); +					  mask >> tbl->it_page_shift, align);  		DBG("  - vaddr: %lx, size: %lx\n", vaddr, slen); @@ -489,16 +488,16 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,  		/* Convert entry to a dma_addr_t */  		entry += tbl->it_offset; -		dma_addr = entry << IOMMU_PAGE_SHIFT; -		dma_addr |= (s->offset & ~IOMMU_PAGE_MASK); +		dma_addr = entry << tbl->it_page_shift; +		dma_addr |= (s->offset & ~IOMMU_PAGE_MASK(tbl));  		DBG("  - %lu pages, entry: %lx, dma_addr: %lx\n",  			    npages, entry, dma_addr);  		/* Insert into HW table */  		build_fail = ppc_md.tce_build(tbl, entry, npages, -		                              vaddr & IOMMU_PAGE_MASK, -		                              direction, attrs); +					      vaddr & IOMMU_PAGE_MASK(tbl), +					      direction, attrs);  		if(unlikely(build_fail))  			goto failure; @@ -559,9 +558,9 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,  		if (s->dma_length != 0) {  			unsigned long vaddr, npages; -			vaddr = s->dma_address & IOMMU_PAGE_MASK; +			vaddr = s->dma_address & IOMMU_PAGE_MASK(tbl);  			npages = iommu_num_pages(s->dma_address, s->dma_length, -						 IOMMU_PAGE_SIZE); +						 IOMMU_PAGE_SIZE(tbl));  			__iommu_free(tbl, vaddr, npages);  			s->dma_address = DMA_ERROR_CODE;  			s->dma_length = 0; @@ -592,7 +591,7 @@ void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,  		if (sg->dma_length == 0)  			break;  		npages = iommu_num_pages(dma_handle, sg->dma_length, -					 IOMMU_PAGE_SIZE); +					 IOMMU_PAGE_SIZE(tbl));  		__iommu_free(tbl, dma_handle, npages);  		sg = sg_next(sg);  	} @@ -661,7 +660,7 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)  	/* number of bytes needed for the bitmap */  	sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long); -	page = alloc_pages_node(nid, GFP_ATOMIC, get_order(sz)); +	page = alloc_pages_node(nid, GFP_KERNEL, get_order(sz));  	if (!page)  		panic("iommu_init_table: Can't allocate %ld bytes\n", sz);  	tbl->it_map = page_address(page); @@ -676,7 +675,7 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)  		set_bit(0, tbl->it_map);  	/* We only split the IOMMU table if we have 1GB or more of space */ -	if ((tbl->it_size << IOMMU_PAGE_SHIFT) >= (1UL * 1024 * 1024 * 1024)) +	if ((tbl->it_size << tbl->it_page_shift) >= (1UL * 1024 * 1024 * 1024))  		tbl->nr_pools = IOMMU_NR_POOLS;  	else  		tbl->nr_pools = 1; @@ -768,16 +767,16 @@ dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl,  	vaddr = page_address(page) + offset;  	uaddr = (unsigned long)vaddr; -	npages = iommu_num_pages(uaddr, size, IOMMU_PAGE_SIZE); +	npages = iommu_num_pages(uaddr, size, IOMMU_PAGE_SIZE(tbl));  	if (tbl) {  		align = 0; -		if (IOMMU_PAGE_SHIFT < PAGE_SHIFT && size >= PAGE_SIZE && +		if (tbl->it_page_shift < PAGE_SHIFT && size >= PAGE_SIZE &&  		    ((unsigned long)vaddr & ~PAGE_MASK) == 0) -			align = PAGE_SHIFT - IOMMU_PAGE_SHIFT; +			align = PAGE_SHIFT - tbl->it_page_shift;  		dma_handle = iommu_alloc(dev, tbl, vaddr, npages, direction, -					 mask >> IOMMU_PAGE_SHIFT, align, +					 mask >> tbl->it_page_shift, align,  					 attrs);  		if (dma_handle == DMA_ERROR_CODE) {  			if (printk_ratelimit())  { @@ -786,7 +785,7 @@ dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl,  					 npages);  			}  		} else -			dma_handle |= (uaddr & ~IOMMU_PAGE_MASK); +			dma_handle |= (uaddr & ~IOMMU_PAGE_MASK(tbl));  	}  	return dma_handle; @@ -801,7 +800,8 @@ void iommu_unmap_page(struct iommu_table *tbl, dma_addr_t dma_handle,  	BUG_ON(direction == DMA_NONE);  	if (tbl) { -		npages = iommu_num_pages(dma_handle, size, IOMMU_PAGE_SIZE); +		npages = iommu_num_pages(dma_handle, size, +					 IOMMU_PAGE_SIZE(tbl));  		iommu_free(tbl, dma_handle, npages);  	}  } @@ -845,10 +845,10 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,  	memset(ret, 0, size);  	/* Set up tces to cover the allocated range */ -	nio_pages = size >> IOMMU_PAGE_SHIFT; -	io_order = get_iommu_order(size); +	nio_pages = size >> tbl->it_page_shift; +	io_order = get_iommu_order(size, tbl);  	mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL, -			      mask >> IOMMU_PAGE_SHIFT, io_order, NULL); +			      mask >> tbl->it_page_shift, io_order, NULL);  	if (mapping == DMA_ERROR_CODE) {  		free_pages((unsigned long)ret, order);  		return NULL; @@ -864,7 +864,7 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t size,  		unsigned int nio_pages;  		size = PAGE_ALIGN(size); -		nio_pages = size >> IOMMU_PAGE_SHIFT; +		nio_pages = size >> tbl->it_page_shift;  		iommu_free(tbl, dma_handle, nio_pages);  		size = PAGE_ALIGN(size);  		free_pages((unsigned long)vaddr, get_order(size)); @@ -935,10 +935,10 @@ int iommu_tce_clear_param_check(struct iommu_table *tbl,  	if (tce_value)  		return -EINVAL; -	if (ioba & ~IOMMU_PAGE_MASK) +	if (ioba & ~IOMMU_PAGE_MASK(tbl))  		return -EINVAL; -	ioba >>= IOMMU_PAGE_SHIFT; +	ioba >>= tbl->it_page_shift;  	if (ioba < tbl->it_offset)  		return -EINVAL; @@ -955,13 +955,13 @@ int iommu_tce_put_param_check(struct iommu_table *tbl,  	if (!(tce & (TCE_PCI_WRITE | TCE_PCI_READ)))  		return -EINVAL; -	if (tce & ~(IOMMU_PAGE_MASK | TCE_PCI_WRITE | TCE_PCI_READ)) +	if (tce & ~(IOMMU_PAGE_MASK(tbl) | TCE_PCI_WRITE | TCE_PCI_READ))  		return -EINVAL; -	if (ioba & ~IOMMU_PAGE_MASK) +	if (ioba & ~IOMMU_PAGE_MASK(tbl))  		return -EINVAL; -	ioba >>= IOMMU_PAGE_SHIFT; +	ioba >>= tbl->it_page_shift;  	if (ioba < tbl->it_offset)  		return -EINVAL; @@ -1037,7 +1037,7 @@ int iommu_tce_build(struct iommu_table *tbl, unsigned long entry,  	/* if (unlikely(ret))  		pr_err("iommu_tce: %s failed on hwaddr=%lx ioba=%lx kva=%lx ret=%d\n", -				__func__, hwaddr, entry << IOMMU_PAGE_SHIFT, +			__func__, hwaddr, entry << IOMMU_PAGE_SHIFT(tbl),  				hwaddr, ret); */  	return ret; @@ -1049,14 +1049,14 @@ int iommu_put_tce_user_mode(struct iommu_table *tbl, unsigned long entry,  {  	int ret;  	struct page *page = NULL; -	unsigned long hwaddr, offset = tce & IOMMU_PAGE_MASK & ~PAGE_MASK; +	unsigned long hwaddr, offset = tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;  	enum dma_data_direction direction = iommu_tce_direction(tce);  	ret = get_user_pages_fast(tce & PAGE_MASK, 1,  			direction != DMA_TO_DEVICE, &page);  	if (unlikely(ret != 1)) {  		/* pr_err("iommu_tce: get_user_pages_fast failed tce=%lx ioba=%lx ret=%d\n", -				tce, entry << IOMMU_PAGE_SHIFT, ret); */ +				tce, entry << IOMMU_PAGE_SHIFT(tbl), ret); */  		return -EFAULT;  	}  	hwaddr = (unsigned long) page_address(page) + offset; @@ -1067,7 +1067,7 @@ int iommu_put_tce_user_mode(struct iommu_table *tbl, unsigned long entry,  	if (ret < 0)  		pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%d\n", -				__func__, entry << IOMMU_PAGE_SHIFT, tce, ret); +			__func__, entry << tbl->it_page_shift, tce, ret);  	return ret;  } @@ -1088,6 +1088,14 @@ int iommu_take_ownership(struct iommu_table *tbl)  	memset(tbl->it_map, 0xff, sz);  	iommu_clear_tces_and_put_pages(tbl, tbl->it_offset, tbl->it_size); +	/* +	 * Disable iommu bypass, otherwise the user can DMA to all of +	 * our physical memory via the bypass window instead of just +	 * the pages that has been explicitly mapped into the iommu +	 */ +	if (tbl->set_bypass) +		tbl->set_bypass(tbl, false); +  	return 0;  }  EXPORT_SYMBOL_GPL(iommu_take_ownership); @@ -1102,10 +1110,14 @@ void iommu_release_ownership(struct iommu_table *tbl)  	/* Restore bit#0 set by iommu_init_table() */  	if (tbl->it_offset == 0)  		set_bit(0, tbl->it_map); + +	/* The kernel owns the device now, we can restore the iommu bypass */ +	if (tbl->set_bypass) +		tbl->set_bypass(tbl, true);  }  EXPORT_SYMBOL_GPL(iommu_release_ownership); -static int iommu_add_device(struct device *dev) +int iommu_add_device(struct device *dev)  {  	struct iommu_table *tbl;  	int ret = 0; @@ -1127,6 +1139,12 @@ static int iommu_add_device(struct device *dev)  	pr_debug("iommu_tce: adding %s to iommu group %d\n",  			dev_name(dev), iommu_group_id(tbl->it_group)); +	if (PAGE_SIZE < IOMMU_PAGE_SIZE(tbl)) { +		pr_err("iommu_tce: unsupported iommu page size."); +		pr_err("%s has not been added\n", dev_name(dev)); +		return -EINVAL; +	} +  	ret = iommu_group_add_device(tbl->it_group, dev);  	if (ret < 0)  		pr_err("iommu_tce: %s has not been added, ret=%d\n", @@ -1134,52 +1152,23 @@ static int iommu_add_device(struct device *dev)  	return ret;  } +EXPORT_SYMBOL_GPL(iommu_add_device); -static void iommu_del_device(struct device *dev) -{ -	iommu_group_remove_device(dev); -} - -static int iommu_bus_notifier(struct notifier_block *nb, -			      unsigned long action, void *data) +void iommu_del_device(struct device *dev)  { -	struct device *dev = data; - -	switch (action) { -	case BUS_NOTIFY_ADD_DEVICE: -		return iommu_add_device(dev); -	case BUS_NOTIFY_DEL_DEVICE: -		iommu_del_device(dev); -		return 0; -	default: -		return 0; +	/* +	 * Some devices might not have IOMMU table and group +	 * and we needn't detach them from the associated +	 * IOMMU groups +	 */ +	if (!dev->iommu_group) { +		pr_debug("iommu_tce: skipping device %s with no tbl\n", +			 dev_name(dev)); +		return;  	} -} - -static struct notifier_block tce_iommu_bus_nb = { -	.notifier_call = iommu_bus_notifier, -}; - -static int __init tce_iommu_init(void) -{ -	struct pci_dev *pdev = NULL; - -	BUILD_BUG_ON(PAGE_SIZE < IOMMU_PAGE_SIZE); - -	for_each_pci_dev(pdev) -		iommu_add_device(&pdev->dev); - -	bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb); -	return 0; -} - -subsys_initcall_sync(tce_iommu_init); - -#else -void iommu_register_group(struct iommu_table *tbl, -		int pci_domain_number, unsigned long pe_num) -{ +	iommu_group_remove_device(dev);  } +EXPORT_SYMBOL_GPL(iommu_del_device);  #endif /* CONFIG_IOMMU_API */ diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 57d286a78f8..248ee7e5beb 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -304,7 +304,7 @@ void notrace restore_interrupts(void)   * being re-enabled and generally sanitized the lazy irq state,   * and in the latter case it will leave with interrupts hard   * disabled and marked as such, so the local_irq_enable() call - * in cpu_idle() will properly re-enable everything. + * in arch_cpu_idle() will properly re-enable everything.   */  bool prep_irq_for_idle(void)  { @@ -354,8 +354,13 @@ int arch_show_interrupts(struct seq_file *p, int prec)  	seq_printf(p, "%*s: ", prec, "LOC");  	for_each_online_cpu(j) -		seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs); -        seq_printf(p, "  Local timer interrupts\n"); +		seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs_event); +        seq_printf(p, "  Local timer interrupts for timer event device\n"); + +	seq_printf(p, "%*s: ", prec, "LOC"); +	for_each_online_cpu(j) +		seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs_others); +        seq_printf(p, "  Local timer interrupts for others\n");  	seq_printf(p, "%*s: ", prec, "SPU");  	for_each_online_cpu(j) @@ -389,11 +394,12 @@ int arch_show_interrupts(struct seq_file *p, int prec)   */  u64 arch_irq_stat_cpu(unsigned int cpu)  { -	u64 sum = per_cpu(irq_stat, cpu).timer_irqs; +	u64 sum = per_cpu(irq_stat, cpu).timer_irqs_event;  	sum += per_cpu(irq_stat, cpu).pmu_irqs;  	sum += per_cpu(irq_stat, cpu).mce_exceptions;  	sum += per_cpu(irq_stat, cpu).spurious_irqs; +	sum += per_cpu(irq_stat, cpu).timer_irqs_others;  #ifdef CONFIG_PPC_DOORBELL  	sum += per_cpu(irq_stat, cpu).doorbell_irqs;  #endif @@ -459,7 +465,6 @@ static inline void check_stack_overflow(void)  void __do_irq(struct pt_regs *regs)  { -	struct irq_desc *desc;  	unsigned int irq;  	irq_enter(); @@ -481,11 +486,8 @@ void __do_irq(struct pt_regs *regs)  	/* And finally process it */  	if (unlikely(irq == NO_IRQ))  		__get_cpu_var(irq_stat).spurious_irqs++; -	else { -		desc = irq_to_desc(irq); -		if (likely(desc)) -			desc->handle_irq(irq, desc); -	} +	else +		generic_handle_irq(irq);  	trace_irq_exit(regs); @@ -495,14 +497,15 @@ void __do_irq(struct pt_regs *regs)  void do_IRQ(struct pt_regs *regs)  {  	struct pt_regs *old_regs = set_irq_regs(regs); -	struct thread_info *curtp, *irqtp; +	struct thread_info *curtp, *irqtp, *sirqtp;  	/* Switch to the irq stack to handle this */  	curtp = current_thread_info();  	irqtp = hardirq_ctx[raw_smp_processor_id()]; +	sirqtp = softirq_ctx[raw_smp_processor_id()];  	/* Already there ? */ -	if (unlikely(curtp == irqtp)) { +	if (unlikely(curtp == irqtp || curtp == sirqtp)) {  		__do_irq(regs);  		set_irq_regs(old_regs);  		return; @@ -552,8 +555,13 @@ void exc_lvl_ctx_init(void)  #ifdef CONFIG_PPC64  		cpu_nr = i;  #else +#ifdef CONFIG_SMP  		cpu_nr = get_hard_smp_processor_id(i); +#else +		cpu_nr = 0; +#endif  #endif +  		memset((void *)critirq_ctx[cpu_nr], 0, THREAD_SIZE);  		tp = critirq_ctx[cpu_nr];  		tp->cpu = cpu_nr; @@ -593,7 +601,7 @@ void irq_ctx_init(void)  	}  } -static inline void do_softirq_onstack(void) +void do_softirq_own_stack(void)  {  	struct thread_info *curtp, *irqtp; @@ -611,21 +619,6 @@ static inline void do_softirq_onstack(void)  		set_bits(irqtp->flags, &curtp->flags);  } -void do_softirq(void) -{ -	unsigned long flags; - -	if (in_interrupt()) -		return; - -	local_irq_save(flags); - -	if (local_softirq_pending()) -		do_softirq_onstack(); - -	local_irq_restore(flags); -} -  irq_hw_number_t virq_to_hw(unsigned int virq)  {  	struct irq_data *irq_data = irq_get_irq_data(virq); diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index c1eef241017..8504657379f 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -15,7 +15,6 @@   */  #include <linux/kernel.h> -#include <linux/init.h>  #include <linux/kgdb.h>  #include <linux/smp.h>  #include <linux/signal.h> @@ -151,15 +150,16 @@ static int kgdb_handle_breakpoint(struct pt_regs *regs)  	return 1;  } +static DEFINE_PER_CPU(struct thread_info, kgdb_thread_info);  static int kgdb_singlestep(struct pt_regs *regs)  {  	struct thread_info *thread_info, *exception_thread_info; -	struct thread_info *backup_current_thread_info; +	struct thread_info *backup_current_thread_info = +		&__get_cpu_var(kgdb_thread_info);  	if (user_mode(regs))  		return 0; -	backup_current_thread_info = kmalloc(sizeof(struct thread_info), GFP_KERNEL);  	/*  	 * On Book E and perhaps other processors, singlestep is handled on  	 * the critical exception stack.  This causes current_thread_info() @@ -185,7 +185,6 @@ static int kgdb_singlestep(struct pt_regs *regs)  		/* Restore current_thread_info lastly. */  		memcpy(exception_thread_info, backup_current_thread_info, sizeof *thread_info); -	kfree(backup_current_thread_info);  	return 1;  } diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 2156ea90eb5..2f72af82513 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -32,6 +32,7 @@  #include <linux/module.h>  #include <linux/kdebug.h>  #include <linux/slab.h> +#include <asm/code-patching.h>  #include <asm/cacheflush.h>  #include <asm/sstep.h>  #include <asm/uaccess.h> @@ -429,7 +430,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)  	case KPROBE_HIT_SSDONE:  		/*  		 * We increment the nmissed count for accounting, -		 * we can also use npre/npostfault count for accouting +		 * we can also use npre/npostfault count for accounting  		 * these specific fault cases.  		 */  		kprobes_inc_nmissed_count(cur); @@ -491,12 +492,10 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,  	return ret;  } -#ifdef CONFIG_PPC64  unsigned long arch_deref_entry_point(void *entry)  { -	return ((func_descr_t *)entry)->entry; +	return ppc_global_function_entry(entry);  } -#endif  int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)  { @@ -508,8 +507,12 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)  	/* setup return addr to the jprobe handler routine */  	regs->nip = arch_deref_entry_point(jp->entry);  #ifdef CONFIG_PPC64 +#if defined(_CALL_ELF) && _CALL_ELF == 2 +	regs->gpr[12] = (unsigned long)jp->entry; +#else  	regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc);  #endif +#endif  	return 1;  } diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index db28032e320..33aa4ddf597 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -74,7 +74,7 @@  #define KVM_INST_MTSRIN		0x7c0001e4  static bool kvm_patching_worked = true; -static char kvm_tmp[1024 * 1024]; +char kvm_tmp[1024 * 1024];  static int kvm_tmp_index;  static inline void kvm_patch_ins(u32 *inst, u32 new_inst) @@ -413,13 +413,13 @@ static void kvm_map_magic_page(void *data)  {  	u32 *features = data; -	ulong in[8]; +	ulong in[8] = {0};  	ulong out[8];  	in[0] = KVM_MAGIC_PAGE; -	in[1] = KVM_MAGIC_PAGE; +	in[1] = KVM_MAGIC_PAGE | MAGIC_PAGE_FLAG_NOT_MAPPED_NX; -	kvm_hypercall(in, out, KVM_HCALL_TOKEN(KVM_HC_PPC_MAP_MAGIC_PAGE)); +	epapr_hypercall(in, out, KVM_HCALL_TOKEN(KVM_HC_PPC_MAP_MAGIC_PAGE));  	*features = out[0];  } @@ -711,43 +711,6 @@ static void kvm_use_magic_page(void)  			 kvm_patching_worked ? "worked" : "failed");  } -unsigned long kvm_hypercall(unsigned long *in, -			    unsigned long *out, -			    unsigned long nr) -{ -	unsigned long register r0 asm("r0"); -	unsigned long register r3 asm("r3") = in[0]; -	unsigned long register r4 asm("r4") = in[1]; -	unsigned long register r5 asm("r5") = in[2]; -	unsigned long register r6 asm("r6") = in[3]; -	unsigned long register r7 asm("r7") = in[4]; -	unsigned long register r8 asm("r8") = in[5]; -	unsigned long register r9 asm("r9") = in[6]; -	unsigned long register r10 asm("r10") = in[7]; -	unsigned long register r11 asm("r11") = nr; -	unsigned long register r12 asm("r12"); - -	asm volatile("bl	epapr_hypercall_start" -		     : "=r"(r0), "=r"(r3), "=r"(r4), "=r"(r5), "=r"(r6), -		       "=r"(r7), "=r"(r8), "=r"(r9), "=r"(r10), "=r"(r11), -		       "=r"(r12) -		     : "r"(r3), "r"(r4), "r"(r5), "r"(r6), "r"(r7), "r"(r8), -		       "r"(r9), "r"(r10), "r"(r11) -		     : "memory", "cc", "xer", "ctr", "lr"); - -	out[0] = r4; -	out[1] = r5; -	out[2] = r6; -	out[3] = r7; -	out[4] = r8; -	out[5] = r9; -	out[6] = r10; -	out[7] = r11; - -	return r3; -} -EXPORT_SYMBOL_GPL(kvm_hypercall); -  static __init void kvm_free_tmp(void)  {  	free_reserved_area(&kvm_tmp[kvm_tmp_index], diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index 22e88dd2f34..936258881c9 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -35,7 +35,7 @@ static struct legacy_serial_info {  	phys_addr_t			taddr;  } legacy_serial_infos[MAX_LEGACY_SERIAL_PORTS]; -static struct __initdata of_device_id legacy_serial_parents[] = { +static struct of_device_id legacy_serial_parents[] __initdata = {  	{.type = "soc",},  	{.type = "tsi-bridge",},  	{.type = "opb", }, @@ -48,6 +48,9 @@ static struct __initdata of_device_id legacy_serial_parents[] = {  static unsigned int legacy_serial_count;  static int legacy_serial_console = -1; +static const upf_t legacy_port_flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | +	UPF_SHARE_IRQ | UPF_FIXED_PORT; +  static unsigned int tsi_serial_in(struct uart_port *p, int offset)  {  	unsigned int tmp; @@ -71,8 +74,9 @@ static int __init add_legacy_port(struct device_node *np, int want_index,  				  phys_addr_t taddr, unsigned long irq,  				  upf_t flags, int irq_check_parent)  { -	const __be32 *clk, *spd; +	const __be32 *clk, *spd, *rs;  	u32 clock = BASE_BAUD * 16; +	u32 shift = 0;  	int index;  	/* get clock freq. if present */ @@ -83,6 +87,11 @@ static int __init add_legacy_port(struct device_node *np, int want_index,  	/* get default speed if present */  	spd = of_get_property(np, "current-speed", NULL); +	/* get register shift if present */ +	rs = of_get_property(np, "reg-shift", NULL); +	if (rs && *rs) +		shift = be32_to_cpup(rs); +  	/* If we have a location index, then try to use it */  	if (want_index >= 0 && want_index < MAX_LEGACY_SERIAL_PORTS)  		index = want_index; @@ -126,6 +135,7 @@ static int __init add_legacy_port(struct device_node *np, int want_index,  	legacy_serial_ports[index].uartclk = clock;  	legacy_serial_ports[index].irq = irq;  	legacy_serial_ports[index].flags = flags; +	legacy_serial_ports[index].regshift = shift;  	legacy_serial_infos[index].taddr = taddr;  	legacy_serial_infos[index].np = of_node_get(np);  	legacy_serial_infos[index].clock = clock; @@ -153,8 +163,6 @@ static int __init add_legacy_soc_port(struct device_node *np,  {  	u64 addr;  	const __be32 *addrp; -	upf_t flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_SHARE_IRQ -		| UPF_FIXED_PORT;  	struct device_node *tsi = of_get_parent(np);  	/* We only support ports that have a clock frequency properly @@ -163,9 +171,8 @@ static int __init add_legacy_soc_port(struct device_node *np,  	if (of_get_property(np, "clock-frequency", NULL) == NULL)  		return -1; -	/* if reg-shift or offset, don't try to use it */ -	if ((of_get_property(np, "reg-shift", NULL) != NULL) || -		(of_get_property(np, "reg-offset", NULL) != NULL)) +	/* if reg-offset don't try to use it */ +	if ((of_get_property(np, "reg-offset", NULL) != NULL))  		return -1;  	/* if rtas uses this device, don't try to use it as well */ @@ -185,9 +192,11 @@ static int __init add_legacy_soc_port(struct device_node *np,  	 * IO port value. It will be fixed up later along with the irq  	 */  	if (tsi && !strcmp(tsi->type, "tsi-bridge")) -		return add_legacy_port(np, -1, UPIO_TSI, addr, addr, NO_IRQ, flags, 0); +		return add_legacy_port(np, -1, UPIO_TSI, addr, addr, +				       NO_IRQ, legacy_port_flags, 0);  	else -		return add_legacy_port(np, -1, UPIO_MEM, addr, addr, NO_IRQ, flags, 0); +		return add_legacy_port(np, -1, UPIO_MEM, addr, addr, +				       NO_IRQ, legacy_port_flags, 0);  }  static int __init add_legacy_isa_port(struct device_node *np, @@ -233,7 +242,7 @@ static int __init add_legacy_isa_port(struct device_node *np,  	/* Add port, irq will be dealt with later */  	return add_legacy_port(np, index, UPIO_PORT, be32_to_cpu(reg[1]), -			       taddr, NO_IRQ, UPF_BOOT_AUTOCONF, 0); +			       taddr, NO_IRQ, legacy_port_flags, 0);  } @@ -306,7 +315,7 @@ static int __init add_legacy_pci_port(struct device_node *np,  	 * IO port value. It will be fixed up later along with the irq  	 */  	return add_legacy_port(np, index, iotype, base, addr, NO_IRQ, -			       UPF_BOOT_AUTOCONF, np != pci_dev); +			       legacy_port_flags, np != pci_dev);  }  #endif @@ -315,17 +324,20 @@ static void __init setup_legacy_serial_console(int console)  	struct legacy_serial_info *info = &legacy_serial_infos[console];  	struct plat_serial8250_port *port = &legacy_serial_ports[console];  	void __iomem *addr; +	unsigned int stride; + +	stride = 1 << port->regshift;  	/* Check if a translated MMIO address has been found */  	if (info->taddr) {  		addr = ioremap(info->taddr, 0x1000);  		if (addr == NULL)  			return; -		udbg_uart_init_mmio(addr, 1); +		udbg_uart_init_mmio(addr, stride);  	} else {  		/* Check if it's PIO and we support untranslated PIO */  		if (port->iotype == UPIO_PORT && isa_io_special) -			udbg_uart_init_pio(port->iobase, 1); +			udbg_uart_init_pio(port->iobase, stride);  		else  			return;  	} diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c index e1ec57e87b3..015ae55c186 100644 --- a/arch/powerpc/kernel/machine_kexec.c +++ b/arch/powerpc/kernel/machine_kexec.c @@ -18,6 +18,7 @@  #include <linux/ftrace.h>  #include <asm/machdep.h> +#include <asm/pgalloc.h>  #include <asm/prom.h>  #include <asm/sections.h> @@ -75,6 +76,17 @@ void arch_crash_save_vmcoreinfo(void)  #ifndef CONFIG_NEED_MULTIPLE_NODES  	VMCOREINFO_SYMBOL(contig_page_data);  #endif +#if defined(CONFIG_PPC64) && defined(CONFIG_SPARSEMEM_VMEMMAP) +	VMCOREINFO_SYMBOL(vmemmap_list); +	VMCOREINFO_SYMBOL(mmu_vmemmap_psize); +	VMCOREINFO_SYMBOL(mmu_psize_defs); +	VMCOREINFO_STRUCT_SIZE(vmemmap_backing); +	VMCOREINFO_OFFSET(vmemmap_backing, list); +	VMCOREINFO_OFFSET(vmemmap_backing, phys); +	VMCOREINFO_OFFSET(vmemmap_backing, virt_addr); +	VMCOREINFO_STRUCT_SIZE(mmu_psize_def); +	VMCOREINFO_OFFSET(mmu_psize_def, shift); +#endif  }  /* @@ -136,7 +148,7 @@ void __init reserve_crashkernel(void)  		 * a small SLB (128MB) since the crash kernel needs to place  		 * itself and some stacks to be in the first segment.  		 */ -		crashk_res.start = min(0x80000000ULL, (ppc64_rma_size / 2)); +		crashk_res.start = min(0x8000000ULL, (ppc64_rma_size / 2));  #else  		crashk_res.start = KDUMP_KERNELBASE;  #endif @@ -184,7 +196,9 @@ int overlaps_crashkernel(unsigned long start, unsigned long size)  /* Values we need to export to the second kernel via the device tree. */  static phys_addr_t kernel_end; +static phys_addr_t crashk_base;  static phys_addr_t crashk_size; +static unsigned long long mem_limit;  static struct property kernel_end_prop = {  	.name = "linux,kernel-end", @@ -195,7 +209,7 @@ static struct property kernel_end_prop = {  static struct property crashk_base_prop = {  	.name = "linux,crashkernel-base",  	.length = sizeof(phys_addr_t), -	.value = &crashk_res.start, +	.value = &crashk_base  };  static struct property crashk_size_prop = { @@ -207,9 +221,11 @@ static struct property crashk_size_prop = {  static struct property memory_limit_prop = {  	.name = "linux,memory-limit",  	.length = sizeof(unsigned long long), -	.value = &memory_limit, +	.value = &mem_limit,  }; +#define cpu_to_be_ulong	__PASTE(cpu_to_be, BITS_PER_LONG) +  static void __init export_crashk_values(struct device_node *node)  {  	struct property *prop; @@ -225,8 +241,9 @@ static void __init export_crashk_values(struct device_node *node)  		of_remove_property(node, prop);  	if (crashk_res.start != 0) { +		crashk_base = cpu_to_be_ulong(crashk_res.start),  		of_add_property(node, &crashk_base_prop); -		crashk_size = resource_size(&crashk_res); +		crashk_size = cpu_to_be_ulong(resource_size(&crashk_res));  		of_add_property(node, &crashk_size_prop);  	} @@ -234,6 +251,7 @@ static void __init export_crashk_values(struct device_node *node)  	 * memory_limit is required by the kexec-tools to limit the  	 * crash regions to the actual memory used.  	 */ +	mem_limit = cpu_to_be_ulong(memory_limit);  	of_update_property(node, &memory_limit_prop);  } @@ -252,7 +270,7 @@ static int __init kexec_setup(void)  		of_remove_property(node, prop);  	/* information needed by userspace when using default_machine_kexec */ -	kernel_end = __pa(_end); +	kernel_end = cpu_to_be_ulong(__pa(_end));  	of_add_property(node, &kernel_end_prop);  	export_crashk_values(node); diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index 611acdf3009..879b3aacac3 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -237,7 +237,7 @@ static void wake_offline_cpus(void)  		if (!cpu_online(cpu)) {  			printk(KERN_INFO "kexec: Waking offline cpu %d.\n",  			       cpu); -			cpu_up(cpu); +			WARN_ON(cpu_up(cpu));  		}  	}  } @@ -312,7 +312,7 @@ static union thread_union kexec_stack __init_task_data =   */  struct paca_struct kexec_paca; -/* Our assembly helper, in kexec_stub.S */ +/* Our assembly helper, in misc_64.S */  extern void kexec_sequence(void *newstack, unsigned long start,  			   void *image, void *control,  			   void (*clear_all)(void)) __noreturn; @@ -369,6 +369,7 @@ void default_machine_kexec(struct kimage *image)  /* Values we need to export to the second kernel via the device tree. */  static unsigned long htab_base; +static unsigned long htab_size;  static struct property htab_base_prop = {  	.name = "linux,htab-base", @@ -379,7 +380,7 @@ static struct property htab_base_prop = {  static struct property htab_size_prop = {  	.name = "linux,htab-size",  	.length = sizeof(unsigned long), -	.value = &htab_size_bytes, +	.value = &htab_size,  };  static int __init export_htab_values(void) @@ -403,8 +404,9 @@ static int __init export_htab_values(void)  	if (prop)  		of_remove_property(node, prop); -	htab_base = __pa(htab_address); +	htab_base = cpu_to_be64(__pa(htab_address));  	of_add_property(node, &htab_base_prop); +	htab_size = cpu_to_be64(htab_size_bytes);  	of_add_property(node, &htab_size_prop);  	of_node_put(node); diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c new file mode 100644 index 00000000000..a7fd4cb78b7 --- /dev/null +++ b/arch/powerpc/kernel/mce.c @@ -0,0 +1,352 @@ +/* + * Machine check exception handling. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright 2013 IBM Corporation + * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> + */ + +#undef DEBUG +#define pr_fmt(fmt) "mce: " fmt + +#include <linux/types.h> +#include <linux/ptrace.h> +#include <linux/percpu.h> +#include <linux/export.h> +#include <linux/irq_work.h> +#include <asm/mce.h> + +static DEFINE_PER_CPU(int, mce_nest_count); +static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event); + +/* Queue for delayed MCE events. */ +static DEFINE_PER_CPU(int, mce_queue_count); +static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue); + +static void machine_check_process_queued_event(struct irq_work *work); +struct irq_work mce_event_process_work = { +        .func = machine_check_process_queued_event, +}; + +static void mce_set_error_info(struct machine_check_event *mce, +			       struct mce_error_info *mce_err) +{ +	mce->error_type = mce_err->error_type; +	switch (mce_err->error_type) { +	case MCE_ERROR_TYPE_UE: +		mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type; +		break; +	case MCE_ERROR_TYPE_SLB: +		mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type; +		break; +	case MCE_ERROR_TYPE_ERAT: +		mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type; +		break; +	case MCE_ERROR_TYPE_TLB: +		mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type; +		break; +	case MCE_ERROR_TYPE_UNKNOWN: +	default: +		break; +	} +} + +/* + * Decode and save high level MCE information into per cpu buffer which + * is an array of machine_check_event structure. + */ +void save_mce_event(struct pt_regs *regs, long handled, +		    struct mce_error_info *mce_err, +		    uint64_t nip, uint64_t addr) +{ +	uint64_t srr1; +	int index = __get_cpu_var(mce_nest_count)++; +	struct machine_check_event *mce = &__get_cpu_var(mce_event[index]); + +	/* +	 * Return if we don't have enough space to log mce event. +	 * mce_nest_count may go beyond MAX_MC_EVT but that's ok, +	 * the check below will stop buffer overrun. +	 */ +	if (index >= MAX_MC_EVT) +		return; + +	/* Populate generic machine check info */ +	mce->version = MCE_V1; +	mce->srr0 = nip; +	mce->srr1 = regs->msr; +	mce->gpr3 = regs->gpr[3]; +	mce->in_use = 1; + +	mce->initiator = MCE_INITIATOR_CPU; +	if (handled) +		mce->disposition = MCE_DISPOSITION_RECOVERED; +	else +		mce->disposition = MCE_DISPOSITION_NOT_RECOVERED; +	mce->severity = MCE_SEV_ERROR_SYNC; + +	srr1 = regs->msr; + +	/* +	 * Populate the mce error_type and type-specific error_type. +	 */ +	mce_set_error_info(mce, mce_err); + +	if (!addr) +		return; + +	if (mce->error_type == MCE_ERROR_TYPE_TLB) { +		mce->u.tlb_error.effective_address_provided = true; +		mce->u.tlb_error.effective_address = addr; +	} else if (mce->error_type == MCE_ERROR_TYPE_SLB) { +		mce->u.slb_error.effective_address_provided = true; +		mce->u.slb_error.effective_address = addr; +	} else if (mce->error_type == MCE_ERROR_TYPE_ERAT) { +		mce->u.erat_error.effective_address_provided = true; +		mce->u.erat_error.effective_address = addr; +	} else if (mce->error_type == MCE_ERROR_TYPE_UE) { +		mce->u.ue_error.effective_address_provided = true; +		mce->u.ue_error.effective_address = addr; +	} +	return; +} + +/* + * get_mce_event: + *	mce	Pointer to machine_check_event structure to be filled. + *	release Flag to indicate whether to free the event slot or not. + *		0 <= do not release the mce event. Caller will invoke + *		     release_mce_event() once event has been consumed. + *		1 <= release the slot. + * + *	return	1 = success + *		0 = failure + * + * get_mce_event() will be called by platform specific machine check + * handle routine and in KVM. + * When we call get_mce_event(), we are still in interrupt context and + * preemption will not be scheduled until ret_from_expect() routine + * is called. + */ +int get_mce_event(struct machine_check_event *mce, bool release) +{ +	int index = __get_cpu_var(mce_nest_count) - 1; +	struct machine_check_event *mc_evt; +	int ret = 0; + +	/* Sanity check */ +	if (index < 0) +		return ret; + +	/* Check if we have MCE info to process. */ +	if (index < MAX_MC_EVT) { +		mc_evt = &__get_cpu_var(mce_event[index]); +		/* Copy the event structure and release the original */ +		if (mce) +			*mce = *mc_evt; +		if (release) +			mc_evt->in_use = 0; +		ret = 1; +	} +	/* Decrement the count to free the slot. */ +	if (release) +		__get_cpu_var(mce_nest_count)--; + +	return ret; +} + +void release_mce_event(void) +{ +	get_mce_event(NULL, true); +} + +/* + * Queue up the MCE event which then can be handled later. + */ +void machine_check_queue_event(void) +{ +	int index; +	struct machine_check_event evt; + +	if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) +		return; + +	index = __get_cpu_var(mce_queue_count)++; +	/* If queue is full, just return for now. */ +	if (index >= MAX_MC_EVT) { +		__get_cpu_var(mce_queue_count)--; +		return; +	} +	__get_cpu_var(mce_event_queue[index]) = evt; + +	/* Queue irq work to process this event later. */ +	irq_work_queue(&mce_event_process_work); +} + +/* + * process pending MCE event from the mce event queue. This function will be + * called during syscall exit. + */ +static void machine_check_process_queued_event(struct irq_work *work) +{ +	int index; + +	/* +	 * For now just print it to console. +	 * TODO: log this error event to FSP or nvram. +	 */ +	while (__get_cpu_var(mce_queue_count) > 0) { +		index = __get_cpu_var(mce_queue_count) - 1; +		machine_check_print_event_info( +				&__get_cpu_var(mce_event_queue[index])); +		__get_cpu_var(mce_queue_count)--; +	} +} + +void machine_check_print_event_info(struct machine_check_event *evt) +{ +	const char *level, *sevstr, *subtype; +	static const char *mc_ue_types[] = { +		"Indeterminate", +		"Instruction fetch", +		"Page table walk ifetch", +		"Load/Store", +		"Page table walk Load/Store", +	}; +	static const char *mc_slb_types[] = { +		"Indeterminate", +		"Parity", +		"Multihit", +	}; +	static const char *mc_erat_types[] = { +		"Indeterminate", +		"Parity", +		"Multihit", +	}; +	static const char *mc_tlb_types[] = { +		"Indeterminate", +		"Parity", +		"Multihit", +	}; + +	/* Print things out */ +	if (evt->version != MCE_V1) { +		pr_err("Machine Check Exception, Unknown event version %d !\n", +		       evt->version); +		return; +	} +	switch (evt->severity) { +	case MCE_SEV_NO_ERROR: +		level = KERN_INFO; +		sevstr = "Harmless"; +		break; +	case MCE_SEV_WARNING: +		level = KERN_WARNING; +		sevstr = ""; +		break; +	case MCE_SEV_ERROR_SYNC: +		level = KERN_ERR; +		sevstr = "Severe"; +		break; +	case MCE_SEV_FATAL: +	default: +		level = KERN_ERR; +		sevstr = "Fatal"; +		break; +	} + +	printk("%s%s Machine check interrupt [%s]\n", level, sevstr, +	       evt->disposition == MCE_DISPOSITION_RECOVERED ? +	       "Recovered" : "[Not recovered"); +	printk("%s  Initiator: %s\n", level, +	       evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown"); +	switch (evt->error_type) { +	case MCE_ERROR_TYPE_UE: +		subtype = evt->u.ue_error.ue_error_type < +			ARRAY_SIZE(mc_ue_types) ? +			mc_ue_types[evt->u.ue_error.ue_error_type] +			: "Unknown"; +		printk("%s  Error type: UE [%s]\n", level, subtype); +		if (evt->u.ue_error.effective_address_provided) +			printk("%s    Effective address: %016llx\n", +			       level, evt->u.ue_error.effective_address); +		if (evt->u.ue_error.physical_address_provided) +			printk("%s      Physial address: %016llx\n", +			       level, evt->u.ue_error.physical_address); +		break; +	case MCE_ERROR_TYPE_SLB: +		subtype = evt->u.slb_error.slb_error_type < +			ARRAY_SIZE(mc_slb_types) ? +			mc_slb_types[evt->u.slb_error.slb_error_type] +			: "Unknown"; +		printk("%s  Error type: SLB [%s]\n", level, subtype); +		if (evt->u.slb_error.effective_address_provided) +			printk("%s    Effective address: %016llx\n", +			       level, evt->u.slb_error.effective_address); +		break; +	case MCE_ERROR_TYPE_ERAT: +		subtype = evt->u.erat_error.erat_error_type < +			ARRAY_SIZE(mc_erat_types) ? +			mc_erat_types[evt->u.erat_error.erat_error_type] +			: "Unknown"; +		printk("%s  Error type: ERAT [%s]\n", level, subtype); +		if (evt->u.erat_error.effective_address_provided) +			printk("%s    Effective address: %016llx\n", +			       level, evt->u.erat_error.effective_address); +		break; +	case MCE_ERROR_TYPE_TLB: +		subtype = evt->u.tlb_error.tlb_error_type < +			ARRAY_SIZE(mc_tlb_types) ? +			mc_tlb_types[evt->u.tlb_error.tlb_error_type] +			: "Unknown"; +		printk("%s  Error type: TLB [%s]\n", level, subtype); +		if (evt->u.tlb_error.effective_address_provided) +			printk("%s    Effective address: %016llx\n", +			       level, evt->u.tlb_error.effective_address); +		break; +	default: +	case MCE_ERROR_TYPE_UNKNOWN: +		printk("%s  Error type: Unknown\n", level); +		break; +	} +} + +uint64_t get_mce_fault_addr(struct machine_check_event *evt) +{ +	switch (evt->error_type) { +	case MCE_ERROR_TYPE_UE: +		if (evt->u.ue_error.effective_address_provided) +			return evt->u.ue_error.effective_address; +		break; +	case MCE_ERROR_TYPE_SLB: +		if (evt->u.slb_error.effective_address_provided) +			return evt->u.slb_error.effective_address; +		break; +	case MCE_ERROR_TYPE_ERAT: +		if (evt->u.erat_error.effective_address_provided) +			return evt->u.erat_error.effective_address; +		break; +	case MCE_ERROR_TYPE_TLB: +		if (evt->u.tlb_error.effective_address_provided) +			return evt->u.tlb_error.effective_address; +		break; +	default: +	case MCE_ERROR_TYPE_UNKNOWN: +		break; +	} +	return 0; +} +EXPORT_SYMBOL(get_mce_fault_addr); diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c new file mode 100644 index 00000000000..aa9aff3d6ad --- /dev/null +++ b/arch/powerpc/kernel/mce_power.c @@ -0,0 +1,313 @@ +/* + * Machine check exception handling CPU-side for power7 and power8 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright 2013 IBM Corporation + * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> + */ + +#undef DEBUG +#define pr_fmt(fmt) "mce_power: " fmt + +#include <linux/types.h> +#include <linux/ptrace.h> +#include <asm/mmu.h> +#include <asm/mce.h> +#include <asm/machdep.h> + +/* flush SLBs and reload */ +static void flush_and_reload_slb(void) +{ +	struct slb_shadow *slb; +	unsigned long i, n; + +	/* Invalidate all SLBs */ +	asm volatile("slbmte %0,%0; slbia" : : "r" (0)); + +#ifdef CONFIG_KVM_BOOK3S_HANDLER +	/* +	 * If machine check is hit when in guest or in transition, we will +	 * only flush the SLBs and continue. +	 */ +	if (get_paca()->kvm_hstate.in_guest) +		return; +#endif + +	/* For host kernel, reload the SLBs from shadow SLB buffer. */ +	slb = get_slb_shadow(); +	if (!slb) +		return; + +	n = min_t(u32, be32_to_cpu(slb->persistent), SLB_MIN_SIZE); + +	/* Load up the SLB entries from shadow SLB */ +	for (i = 0; i < n; i++) { +		unsigned long rb = be64_to_cpu(slb->save_area[i].esid); +		unsigned long rs = be64_to_cpu(slb->save_area[i].vsid); + +		rb = (rb & ~0xFFFul) | i; +		asm volatile("slbmte %0,%1" : : "r" (rs), "r" (rb)); +	} +} + +static long mce_handle_derror(uint64_t dsisr, uint64_t slb_error_bits) +{ +	long handled = 1; + +	/* +	 * flush and reload SLBs for SLB errors and flush TLBs for TLB errors. +	 * reset the error bits whenever we handle them so that at the end +	 * we can check whether we handled all of them or not. +	 * */ +	if (dsisr & slb_error_bits) { +		flush_and_reload_slb(); +		/* reset error bits */ +		dsisr &= ~(slb_error_bits); +	} +	if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) { +		if (cur_cpu_spec && cur_cpu_spec->flush_tlb) +			cur_cpu_spec->flush_tlb(TLBIEL_INVAL_PAGE); +		/* reset error bits */ +		dsisr &= ~P7_DSISR_MC_TLB_MULTIHIT_MFTLB; +	} +	/* Any other errors we don't understand? */ +	if (dsisr & 0xffffffffUL) +		handled = 0; + +	return handled; +} + +static long mce_handle_derror_p7(uint64_t dsisr) +{ +	return mce_handle_derror(dsisr, P7_DSISR_MC_SLB_ERRORS); +} + +static long mce_handle_common_ierror(uint64_t srr1) +{ +	long handled = 0; + +	switch (P7_SRR1_MC_IFETCH(srr1)) { +	case 0: +		break; +	case P7_SRR1_MC_IFETCH_SLB_PARITY: +	case P7_SRR1_MC_IFETCH_SLB_MULTIHIT: +		/* flush and reload SLBs for SLB errors. */ +		flush_and_reload_slb(); +		handled = 1; +		break; +	case P7_SRR1_MC_IFETCH_TLB_MULTIHIT: +		if (cur_cpu_spec && cur_cpu_spec->flush_tlb) { +			cur_cpu_spec->flush_tlb(TLBIEL_INVAL_PAGE); +			handled = 1; +		} +		break; +	default: +		break; +	} + +	return handled; +} + +static long mce_handle_ierror_p7(uint64_t srr1) +{ +	long handled = 0; + +	handled = mce_handle_common_ierror(srr1); + +	if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) { +		flush_and_reload_slb(); +		handled = 1; +	} +	return handled; +} + +static void mce_get_common_ierror(struct mce_error_info *mce_err, uint64_t srr1) +{ +	switch (P7_SRR1_MC_IFETCH(srr1)) { +	case P7_SRR1_MC_IFETCH_SLB_PARITY: +		mce_err->error_type = MCE_ERROR_TYPE_SLB; +		mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY; +		break; +	case P7_SRR1_MC_IFETCH_SLB_MULTIHIT: +		mce_err->error_type = MCE_ERROR_TYPE_SLB; +		mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT; +		break; +	case P7_SRR1_MC_IFETCH_TLB_MULTIHIT: +		mce_err->error_type = MCE_ERROR_TYPE_TLB; +		mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT; +		break; +	case P7_SRR1_MC_IFETCH_UE: +	case P7_SRR1_MC_IFETCH_UE_IFU_INTERNAL: +		mce_err->error_type = MCE_ERROR_TYPE_UE; +		mce_err->u.ue_error_type = MCE_UE_ERROR_IFETCH; +		break; +	case P7_SRR1_MC_IFETCH_UE_TLB_RELOAD: +		mce_err->error_type = MCE_ERROR_TYPE_UE; +		mce_err->u.ue_error_type = +				MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH; +		break; +	} +} + +static void mce_get_ierror_p7(struct mce_error_info *mce_err, uint64_t srr1) +{ +	mce_get_common_ierror(mce_err, srr1); +	if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) { +		mce_err->error_type = MCE_ERROR_TYPE_SLB; +		mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE; +	} +} + +static void mce_get_derror_p7(struct mce_error_info *mce_err, uint64_t dsisr) +{ +	if (dsisr & P7_DSISR_MC_UE) { +		mce_err->error_type = MCE_ERROR_TYPE_UE; +		mce_err->u.ue_error_type = MCE_UE_ERROR_LOAD_STORE; +	} else if (dsisr & P7_DSISR_MC_UE_TABLEWALK) { +		mce_err->error_type = MCE_ERROR_TYPE_UE; +		mce_err->u.ue_error_type = +				MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE; +	} else if (dsisr & P7_DSISR_MC_ERAT_MULTIHIT) { +		mce_err->error_type = MCE_ERROR_TYPE_ERAT; +		mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT; +	} else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT) { +		mce_err->error_type = MCE_ERROR_TYPE_SLB; +		mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT; +	} else if (dsisr & P7_DSISR_MC_SLB_PARITY_MFSLB) { +		mce_err->error_type = MCE_ERROR_TYPE_SLB; +		mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY; +	} else if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) { +		mce_err->error_type = MCE_ERROR_TYPE_TLB; +		mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT; +	} else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT_PARITY) { +		mce_err->error_type = MCE_ERROR_TYPE_SLB; +		mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE; +	} +} + +static long mce_handle_ue_error(struct pt_regs *regs) +{ +	long handled = 0; + +	/* +	 * On specific SCOM read via MMIO we may get a machine check +	 * exception with SRR0 pointing inside opal. If that is the +	 * case OPAL may have recovery address to re-read SCOM data in +	 * different way and hence we can recover from this MC. +	 */ + +	if (ppc_md.mce_check_early_recovery) { +		if (ppc_md.mce_check_early_recovery(regs)) +			handled = 1; +	} +	return handled; +} + +long __machine_check_early_realmode_p7(struct pt_regs *regs) +{ +	uint64_t srr1, nip, addr; +	long handled = 1; +	struct mce_error_info mce_error_info = { 0 }; + +	srr1 = regs->msr; +	nip = regs->nip; + +	/* +	 * Handle memory errors depending whether this was a load/store or +	 * ifetch exception. Also, populate the mce error_type and +	 * type-specific error_type from either SRR1 or DSISR, depending +	 * whether this was a load/store or ifetch exception +	 */ +	if (P7_SRR1_MC_LOADSTORE(srr1)) { +		handled = mce_handle_derror_p7(regs->dsisr); +		mce_get_derror_p7(&mce_error_info, regs->dsisr); +		addr = regs->dar; +	} else { +		handled = mce_handle_ierror_p7(srr1); +		mce_get_ierror_p7(&mce_error_info, srr1); +		addr = regs->nip; +	} + +	/* Handle UE error. */ +	if (mce_error_info.error_type == MCE_ERROR_TYPE_UE) +		handled = mce_handle_ue_error(regs); + +	save_mce_event(regs, handled, &mce_error_info, nip, addr); +	return handled; +} + +static void mce_get_ierror_p8(struct mce_error_info *mce_err, uint64_t srr1) +{ +	mce_get_common_ierror(mce_err, srr1); +	if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) { +		mce_err->error_type = MCE_ERROR_TYPE_ERAT; +		mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT; +	} +} + +static void mce_get_derror_p8(struct mce_error_info *mce_err, uint64_t dsisr) +{ +	mce_get_derror_p7(mce_err, dsisr); +	if (dsisr & P8_DSISR_MC_ERAT_MULTIHIT_SEC) { +		mce_err->error_type = MCE_ERROR_TYPE_ERAT; +		mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT; +	} +} + +static long mce_handle_ierror_p8(uint64_t srr1) +{ +	long handled = 0; + +	handled = mce_handle_common_ierror(srr1); + +	if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) { +		flush_and_reload_slb(); +		handled = 1; +	} +	return handled; +} + +static long mce_handle_derror_p8(uint64_t dsisr) +{ +	return mce_handle_derror(dsisr, P8_DSISR_MC_SLB_ERRORS); +} + +long __machine_check_early_realmode_p8(struct pt_regs *regs) +{ +	uint64_t srr1, nip, addr; +	long handled = 1; +	struct mce_error_info mce_error_info = { 0 }; + +	srr1 = regs->msr; +	nip = regs->nip; + +	if (P7_SRR1_MC_LOADSTORE(srr1)) { +		handled = mce_handle_derror_p8(regs->dsisr); +		mce_get_derror_p8(&mce_error_info, regs->dsisr); +		addr = regs->dar; +	} else { +		handled = mce_handle_ierror_p8(srr1); +		mce_get_ierror_p8(&mce_error_info, srr1); +		addr = regs->nip; +	} + +	/* Handle UE error. */ +	if (mce_error_info.error_type == MCE_ERROR_TYPE_UE) +		handled = mce_handle_ue_error(regs); + +	save_mce_event(regs, handled, &mce_error_info, nip, addr); +	return handled; +} diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 2b0ad984536..7c6bb4b17b4 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -57,11 +57,14 @@ _GLOBAL(call_do_softirq)  	mtlr	r0  	blr +/* + * void call_do_irq(struct pt_regs *regs, struct thread_info *irqtp); + */  _GLOBAL(call_do_irq)  	mflr	r0  	stw	r0,4(r1)  	lwz	r10,THREAD+KSP_LIMIT(r2) -	addi	r11,r3,THREAD_INFO_GAP +	addi	r11,r4,THREAD_INFO_GAP  	stwu	r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4)  	mr	r1,r4  	stw	r10,8(r1) @@ -344,7 +347,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_UNIFIED_ID_CACHE)   */  _KPROBE(flush_icache_range)  BEGIN_FTR_SECTION -	isync +	PURGE_PREFETCHED_INS  	blr				/* for 601, do nothing */  END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)  	li	r5,L1_CACHE_BYTES-1 @@ -448,6 +451,7 @@ _GLOBAL(invalidate_dcache_range)   */  _GLOBAL(__flush_dcache_icache)  BEGIN_FTR_SECTION +	PURGE_PREFETCHED_INS  	blr  END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)  	rlwinm	r3,r3,0,0,31-PAGE_SHIFT		/* Get page base address */ @@ -489,6 +493,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_44x)   */  _GLOBAL(__flush_dcache_icache_phys)  BEGIN_FTR_SECTION +	PURGE_PREFETCHED_INS  	blr					/* for 601, do nothing */  END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)  	mfmsr	r10 @@ -659,6 +664,20 @@ _GLOBAL(__lshrdi3)  	blr  /* + * 64-bit comparison: __cmpdi2(s64 a, s64 b) + * Returns 0 if a < b, 1 if a == b, 2 if a > b. + */ +_GLOBAL(__cmpdi2) +	cmpw	r3,r5 +	li	r3,1 +	bne	1f +	cmplw	r4,r6 +	beqlr +1:	li	r3,0 +	bltlr +	li	r3,2 +	blr +/*   * 64-bit comparison: __ucmpdi2(u64 a, u64 b)   * Returns 0 if a < b, 1 if a == b, 2 if a > b.   */ diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index e59caf874d0..4e314b90c75 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -34,7 +34,7 @@ _GLOBAL(call_do_softirq)  	std	r0,16(r1)  	stdu	r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)  	mr	r1,r3 -	bl	.__do_softirq +	bl	__do_softirq  	ld	r1,0(r1)  	ld	r0,16(r1)  	mtlr	r0 @@ -45,7 +45,7 @@ _GLOBAL(call_do_irq)  	std	r0,16(r1)  	stdu	r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4)  	mr	r1,r4 -	bl	.__do_irq +	bl	__do_irq  	ld	r1,0(r1)  	ld	r0,16(r1)  	mtlr	r0 @@ -67,6 +67,7 @@ PPC64_CACHES:  _KPROBE(flush_icache_range)  BEGIN_FTR_SECTION +	PURGE_PREFETCHED_INS  	blr  END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)  /* @@ -211,6 +212,11 @@ _GLOBAL(__flush_dcache_icache)   * Different systems have different cache line sizes   */ +BEGIN_FTR_SECTION +	PURGE_PREFETCHED_INS +	blr +END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) +  /* Flush the dcache */   	ld	r7,PPC64_CACHES@toc(r2)  	clrrdi	r3,r3,PAGE_SHIFT           	    /* Page align */ @@ -246,8 +252,8 @@ _GLOBAL(__bswapdi2)  	or	r3,r7,r9  	blr -#if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE) +#ifdef CONFIG_PPC_EARLY_DEBUG_BOOTX  _GLOBAL(rmci_on)  	sync  	isync @@ -277,6 +283,9 @@ _GLOBAL(rmci_off)  	isync  	sync  	blr +#endif /* CONFIG_PPC_EARLY_DEBUG_BOOTX */ + +#if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE)  /*   * Do an IO access in real mode @@ -497,7 +506,7 @@ _GLOBAL(kexec_smp_wait)  	stb	r4,PACAKEXECSTATE(r13)  	SYNC -	b	.kexec_wait +	b	kexec_wait  /*   * switch to real mode (turn mmu off) @@ -567,7 +576,7 @@ _GLOBAL(kexec_sequence)  	/* copy dest pages, flush whole dest image */  	mr	r3,r29 -	bl	.kexec_copy_flush	/* (image) */ +	bl	kexec_copy_flush	/* (image) */  	/* turn off mmu */  	bl	real_mode @@ -577,7 +586,7 @@ _GLOBAL(kexec_sequence)  	mr	r4,r30		/* start, aka phys mem offset */  	li	r5,0x100  	li	r6,0 -	bl	.copy_and_flush	/* (dest, src, copy limit, start offset) */ +	bl	copy_and_flush	/* (dest, src, copy limit, start offset) */  1:	/* assume normal blr return */  	/* release other cpus to the new kernel secondary start at 0x60 */ @@ -586,8 +595,12 @@ _GLOBAL(kexec_sequence)  	stw	r6,kexec_flag-1b(5)  	/* clear out hardware hash page table and tlb */ -	ld	r5,0(r27)		/* deref function descriptor */ -	mtctr	r5 +#if !defined(_CALL_ELF) || _CALL_ELF != 2 +	ld	r12,0(r27)		/* deref function descriptor */ +#else +	mr	r12,r27 +#endif +	mtctr	r12  	bctrl				/* ppc_md.hpte_clear_all(void); */  /* @@ -621,3 +634,31 @@ _GLOBAL(kexec_sequence)  	li	r5,0  	blr	/* image->start(physid, image->start, 0); */  #endif /* CONFIG_KEXEC */ + +#ifdef CONFIG_MODULES +#if defined(_CALL_ELF) && _CALL_ELF == 2 + +#ifdef CONFIG_MODVERSIONS +.weak __crc_TOC. +.section "___kcrctab+TOC.","a" +.globl __kcrctab_TOC. +__kcrctab_TOC.: +	.llong	__crc_TOC. +#endif + +/* + * Export a fake .TOC. since both modpost and depmod will complain otherwise. + * Both modpost and depmod strip the leading . so we do the same here. + */ +.section "__ksymtab_strings","a" +__kstrtab_TOC.: +	.asciz "TOC." + +.section "___ksymtab+TOC.","a" +/* This symbol name is important: it's used by modpost to find exported syms */ +.globl __ksymtab_TOC. +__ksymtab_TOC.: +	.llong 0 /* .value */ +	.llong __kstrtab_TOC. +#endif /* ELFv2 */ +#endif /* MODULES */ diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index 2d275707f41..9547381b631 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -25,8 +25,7 @@  #include <asm/uaccess.h>  #include <asm/firmware.h>  #include <linux/sort.h> - -#include "setup.h" +#include <asm/setup.h>  LIST_HEAD(module_bug_list); diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c index 2e3200ca485..6cff040bf45 100644 --- a/arch/powerpc/kernel/module_32.c +++ b/arch/powerpc/kernel/module_32.c @@ -26,8 +26,7 @@  #include <linux/cache.h>  #include <linux/bug.h>  #include <linux/sort.h> - -#include "setup.h" +#include <asm/setup.h>  #if 0  #define DEBUGP printk diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 6ee59a0eb26..d807ee626af 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -22,12 +22,12 @@  #include <linux/vmalloc.h>  #include <linux/ftrace.h>  #include <linux/bug.h> +#include <linux/uaccess.h>  #include <asm/module.h>  #include <asm/firmware.h>  #include <asm/code-patching.h>  #include <linux/sort.h> - -#include "setup.h" +#include <asm/setup.h>  /* FIXME: We don't do .init separately.  To do this, we'd need to have     a separate r2 value in the init and core section, and stub between @@ -42,35 +42,170 @@  #define DEBUGP(fmt , ...)  #endif +#if defined(_CALL_ELF) && _CALL_ELF == 2 +#define R2_STACK_OFFSET 24 + +/* An address is simply the address of the function. */ +typedef unsigned long func_desc_t; + +static func_desc_t func_desc(unsigned long addr) +{ +	return addr; +} +static unsigned long func_addr(unsigned long addr) +{ +	return addr; +} +static unsigned long stub_func_addr(func_desc_t func) +{ +	return func; +} + +/* PowerPC64 specific values for the Elf64_Sym st_other field.  */ +#define STO_PPC64_LOCAL_BIT	5 +#define STO_PPC64_LOCAL_MASK	(7 << STO_PPC64_LOCAL_BIT) +#define PPC64_LOCAL_ENTRY_OFFSET(other)					\ + (((1 << (((other) & STO_PPC64_LOCAL_MASK) >> STO_PPC64_LOCAL_BIT)) >> 2) << 2) + +static unsigned int local_entry_offset(const Elf64_Sym *sym) +{ +	/* sym->st_other indicates offset to local entry point +	 * (otherwise it will assume r12 is the address of the start +	 * of function and try to derive r2 from it). */ +	return PPC64_LOCAL_ENTRY_OFFSET(sym->st_other); +} +#else +#define R2_STACK_OFFSET 40 + +/* An address is address of the OPD entry, which contains address of fn. */ +typedef struct ppc64_opd_entry func_desc_t; + +static func_desc_t func_desc(unsigned long addr) +{ +	return *(struct ppc64_opd_entry *)addr; +} +static unsigned long func_addr(unsigned long addr) +{ +	return func_desc(addr).funcaddr; +} +static unsigned long stub_func_addr(func_desc_t func) +{ +	return func.funcaddr; +} +static unsigned int local_entry_offset(const Elf64_Sym *sym) +{ +	return 0; +} +#endif +  /* Like PPC32, we need little trampolines to do > 24-bit jumps (into     the kernel itself).  But on PPC64, these need to be used for every     jump, actually, to reset r2 (TOC+0x8000). */  struct ppc64_stub_entry  { -	/* 28 byte jump instruction sequence (7 instructions) */ -	unsigned char jump[28]; -	unsigned char unused[4]; +	/* 28 byte jump instruction sequence (7 instructions). We only +	 * need 6 instructions on ABIv2 but we always allocate 7 so +	 * so we don't have to modify the trampoline load instruction. */ +	u32 jump[7]; +	u32 unused;  	/* Data for the above code */ -	struct ppc64_opd_entry opd; +	func_desc_t funcdata;  }; -/* We use a stub to fix up r2 (TOC ptr) and to jump to the (external) -   function which may be more than 24-bits away.  We could simply -   patch the new r2 value and function pointer into the stub, but it's -   significantly shorter to put these values at the end of the stub -   code, and patch the stub address (32-bits relative to the TOC ptr, -   r2) into the stub. */ -static struct ppc64_stub_entry ppc64_stub = -{ .jump = { -	0x3d, 0x82, 0x00, 0x00, /* addis   r12,r2, <high> */ -	0x39, 0x8c, 0x00, 0x00, /* addi    r12,r12, <low> */ +/* + * PPC64 uses 24 bit jumps, but we need to jump into other modules or + * the kernel which may be further.  So we jump to a stub. + * + * For ELFv1 we need to use this to set up the new r2 value (aka TOC + * pointer).  For ELFv2 it's the callee's responsibility to set up the + * new r2, but for both we need to save the old r2. + * + * We could simply patch the new r2 value and function pointer into + * the stub, but it's significantly shorter to put these values at the + * end of the stub code, and patch the stub address (32-bits relative + * to the TOC ptr, r2) into the stub. + */ + +static u32 ppc64_stub_insns[] = { +	0x3d620000,			/* addis   r11,r2, <high> */ +	0x396b0000,			/* addi    r11,r11, <low> */  	/* Save current r2 value in magic place on the stack. */ -	0xf8, 0x41, 0x00, 0x28, /* std     r2,40(r1) */ -	0xe9, 0x6c, 0x00, 0x20, /* ld      r11,32(r12) */ -	0xe8, 0x4c, 0x00, 0x28, /* ld      r2,40(r12) */ -	0x7d, 0x69, 0x03, 0xa6, /* mtctr   r11 */ -	0x4e, 0x80, 0x04, 0x20  /* bctr */ -} }; +	0xf8410000|R2_STACK_OFFSET,	/* std     r2,R2_STACK_OFFSET(r1) */ +	0xe98b0020,			/* ld      r12,32(r11) */ +#if !defined(_CALL_ELF) || _CALL_ELF != 2 +	/* Set up new r2 from function descriptor */ +	0xe84b0028,			/* ld      r2,40(r11) */ +#endif +	0x7d8903a6,			/* mtctr   r12 */ +	0x4e800420			/* bctr */ +}; + +#ifdef CONFIG_DYNAMIC_FTRACE + +static u32 ppc64_stub_mask[] = { +	0xffff0000, +	0xffff0000, +	0xffffffff, +	0xffffffff, +#if !defined(_CALL_ELF) || _CALL_ELF != 2 +	0xffffffff, +#endif +	0xffffffff, +	0xffffffff +}; + +bool is_module_trampoline(u32 *p) +{ +	unsigned int i; +	u32 insns[ARRAY_SIZE(ppc64_stub_insns)]; + +	BUILD_BUG_ON(sizeof(ppc64_stub_insns) != sizeof(ppc64_stub_mask)); + +	if (probe_kernel_read(insns, p, sizeof(insns))) +		return -EFAULT; + +	for (i = 0; i < ARRAY_SIZE(ppc64_stub_insns); i++) { +		u32 insna = insns[i]; +		u32 insnb = ppc64_stub_insns[i]; +		u32 mask = ppc64_stub_mask[i]; + +		if ((insna & mask) != (insnb & mask)) +			return false; +	} + +	return true; +} + +int module_trampoline_target(struct module *mod, u32 *trampoline, +			     unsigned long *target) +{ +	u32 buf[2]; +	u16 upper, lower; +	long offset; +	void *toc_entry; + +	if (probe_kernel_read(buf, trampoline, sizeof(buf))) +		return -EFAULT; + +	upper = buf[0] & 0xffff; +	lower = buf[1] & 0xffff; + +	/* perform the addis/addi, both signed */ +	offset = ((short)upper << 16) + (short)lower; + +	/* +	 * Now get the address this trampoline jumps to. This +	 * is always 32 bytes into our trampoline stub. +	 */ +	toc_entry = (void *)mod->arch.toc + offset + 32; + +	if (probe_kernel_read(target, toc_entry, sizeof(*target))) +		return -EFAULT; + +	return 0; +} + +#endif  /* Count how many different 24-bit relocations (different symbol,     different addend) */ @@ -173,17 +308,27 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr,  	return relocs * sizeof(struct ppc64_stub_entry);  } +/* Still needed for ELFv2, for .TOC. */  static void dedotify_versions(struct modversion_info *vers,  			      unsigned long size)  {  	struct modversion_info *end;  	for (end = (void *)vers + size; vers < end; vers++) -		if (vers->name[0] == '.') +		if (vers->name[0] == '.') {  			memmove(vers->name, vers->name+1, strlen(vers->name)); +#ifdef ARCH_RELOCATES_KCRCTAB +			/* The TOC symbol has no CRC computed. To avoid CRC +			 * check failing, we must force it to the expected +			 * value (see CRC check in module.c). +			 */ +			if (!strcmp(vers->name, "TOC.")) +				vers->crc = -(unsigned long)reloc_start; +#endif +		}  } -/* Undefined symbols which refer to .funcname, hack to funcname */ +/* Undefined symbols which refer to .funcname, hack to funcname (or .TOC.) */  static void dedotify(Elf64_Sym *syms, unsigned int numsyms, char *strtab)  {  	unsigned int i; @@ -197,6 +342,24 @@ static void dedotify(Elf64_Sym *syms, unsigned int numsyms, char *strtab)  	}  } +static Elf64_Sym *find_dot_toc(Elf64_Shdr *sechdrs, +			       const char *strtab, +			       unsigned int symindex) +{ +	unsigned int i, numsyms; +	Elf64_Sym *syms; + +	syms = (Elf64_Sym *)sechdrs[symindex].sh_addr; +	numsyms = sechdrs[symindex].sh_size / sizeof(Elf64_Sym); + +	for (i = 1; i < numsyms; i++) { +		if (syms[i].st_shndx == SHN_UNDEF +		    && strcmp(strtab + syms[i].st_name, "TOC.") == 0) +			return &syms[i]; +	} +	return NULL; +} +  int module_frob_arch_sections(Elf64_Ehdr *hdr,  			      Elf64_Shdr *sechdrs,  			      char *secstrings, @@ -261,16 +424,12 @@ static inline unsigned long my_r2(Elf64_Shdr *sechdrs, struct module *me)  /* Patch stub to reference function and correct r2 value. */  static inline int create_stub(Elf64_Shdr *sechdrs,  			      struct ppc64_stub_entry *entry, -			      struct ppc64_opd_entry *opd, +			      unsigned long addr,  			      struct module *me)  { -	Elf64_Half *loc1, *loc2;  	long reladdr; -	*entry = ppc64_stub; - -	loc1 = (Elf64_Half *)&entry->jump[2]; -	loc2 = (Elf64_Half *)&entry->jump[6]; +	memcpy(entry->jump, ppc64_stub_insns, sizeof(ppc64_stub_insns));  	/* Stub uses address relative to r2. */  	reladdr = (unsigned long)entry - my_r2(sechdrs, me); @@ -281,35 +440,33 @@ static inline int create_stub(Elf64_Shdr *sechdrs,  	}  	DEBUGP("Stub %p get data from reladdr %li\n", entry, reladdr); -	*loc1 = PPC_HA(reladdr); -	*loc2 = PPC_LO(reladdr); -	entry->opd.funcaddr = opd->funcaddr; -	entry->opd.r2 = opd->r2; +	entry->jump[0] |= PPC_HA(reladdr); +	entry->jump[1] |= PPC_LO(reladdr); +	entry->funcdata = func_desc(addr);  	return 1;  } -/* Create stub to jump to function described in this OPD: we need the +/* Create stub to jump to function described in this OPD/ptr: we need the     stub to set up the TOC ptr (r2) for the function. */  static unsigned long stub_for_addr(Elf64_Shdr *sechdrs, -				   unsigned long opdaddr, +				   unsigned long addr,  				   struct module *me)  {  	struct ppc64_stub_entry *stubs; -	struct ppc64_opd_entry *opd = (void *)opdaddr;  	unsigned int i, num_stubs;  	num_stubs = sechdrs[me->arch.stubs_section].sh_size / sizeof(*stubs);  	/* Find this stub, or if that fails, the next avail. entry */  	stubs = (void *)sechdrs[me->arch.stubs_section].sh_addr; -	for (i = 0; stubs[i].opd.funcaddr; i++) { +	for (i = 0; stub_func_addr(stubs[i].funcdata); i++) {  		BUG_ON(i >= num_stubs); -		if (stubs[i].opd.funcaddr == opd->funcaddr) +		if (stub_func_addr(stubs[i].funcdata) == func_addr(addr))  			return (unsigned long)&stubs[i];  	} -	if (!create_stub(sechdrs, &stubs[i], opd, me)) +	if (!create_stub(sechdrs, &stubs[i], addr, me))  		return 0;  	return (unsigned long)&stubs[i]; @@ -324,7 +481,8 @@ static int restore_r2(u32 *instruction, struct module *me)  		       me->name, *instruction);  		return 0;  	} -	*instruction = 0xe8410028;	/* ld r2,40(r1) */ +	/* ld r2,R2_STACK_OFFSET(r1) */ +	*instruction = 0xe8410000 | R2_STACK_OFFSET;  	return 1;  } @@ -342,6 +500,17 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,  	DEBUGP("Applying ADD relocate section %u to %u\n", relsec,  	       sechdrs[relsec].sh_info); + +	/* First time we're called, we can fix up .TOC. */ +	if (!me->arch.toc_fixed) { +		sym = find_dot_toc(sechdrs, strtab, symindex); +		/* It's theoretically possible that a module doesn't want a +		 * .TOC. so don't fail it just for that. */ +		if (sym) +			sym->st_value = my_r2(sechdrs, me); +		me->arch.toc_fixed = true; +	} +  	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rela); i++) {  		/* This is where to make the change */  		location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr @@ -438,7 +607,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,  					return -ENOENT;  				if (!restore_r2((u32 *)location + 1, me))  					return -ENOEXEC; -			} +			} else +				value += local_entry_offset(sym);  			/* Convert value to relative */  			value -= (unsigned long)location; @@ -459,6 +629,31 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,  			*location = value - (unsigned long)location;  			break; +		case R_PPC64_TOCSAVE: +			/* +			 * Marker reloc indicates we don't have to save r2. +			 * That would only save us one instruction, so ignore +			 * it. +			 */ +			break; + +		case R_PPC64_REL16_HA: +			/* Subtract location pointer */ +			value -= (unsigned long)location; +			value = ((value + 0x8000) >> 16); +			*((uint16_t *) location) +				= (*((uint16_t *) location) & ~0xffff) +				| (value & 0xffff); +			break; + +		case R_PPC64_REL16_LO: +			/* Subtract location pointer */ +			value -= (unsigned long)location; +			*((uint16_t *) location) +				= (*((uint16_t *) location) & ~0xffff) +				| (value & 0xffff); +			break; +  		default:  			printk("%s: Unknown ADD relocation: %lu\n",  			       me->name, diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index 8213ee1eb05..28b898e6818 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -210,7 +210,7 @@ static void __init nvram_print_partitions(char * label)  	printk(KERN_WARNING "--------%s---------\n", label);  	printk(KERN_WARNING "indx\t\tsig\tchks\tlen\tname\n");  	list_for_each_entry(tmp_part, &nvram_partitions, partition) { -		printk(KERN_WARNING "%4d    \t%02x\t%02x\t%d\t%12s\n", +		printk(KERN_WARNING "%4d    \t%02x\t%02x\t%d\t%12.12s\n",  		       tmp_part->index, tmp_part->header.signature,  		       tmp_part->header.checksum, tmp_part->header.length,  		       tmp_part->header.name); @@ -223,9 +223,13 @@ static int __init nvram_write_header(struct nvram_partition * part)  {  	loff_t tmp_index;  	int rc; -	 +	struct nvram_header phead; + +	memcpy(&phead, &part->header, NVRAM_HEADER_LEN); +	phead.length = cpu_to_be16(phead.length); +  	tmp_index = part->index; -	rc = ppc_md.nvram_write((char *)&part->header, NVRAM_HEADER_LEN, &tmp_index);  +	rc = ppc_md.nvram_write((char *)&phead, NVRAM_HEADER_LEN, &tmp_index);  	return rc;  } @@ -505,6 +509,8 @@ int __init nvram_scan_partitions(void)  		memcpy(&phead, header, NVRAM_HEADER_LEN); +		phead.length = be16_to_cpu(phead.length); +  		err = 0;  		c_sum = nvram_checksum(&phead);  		if (c_sum != phead.checksum) { diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 3fc16e3beb9..d6e195e8cd4 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -46,7 +46,7 @@ struct lppaca lppaca[] = {  static struct lppaca *extra_lppacas;  static long __initdata lppaca_size; -static void allocate_lppacas(int nr_cpus, unsigned long limit) +static void __init allocate_lppacas(int nr_cpus, unsigned long limit)  {  	if (nr_cpus <= NR_LPPACAS)  		return; @@ -57,7 +57,7 @@ static void allocate_lppacas(int nr_cpus, unsigned long limit)  						 PAGE_SIZE, limit));  } -static struct lppaca *new_lppaca(int cpu) +static struct lppaca * __init new_lppaca(int cpu)  {  	struct lppaca *lp; @@ -70,7 +70,7 @@ static struct lppaca *new_lppaca(int cpu)  	return lp;  } -static void free_lppacas(void) +static void __init free_lppacas(void)  {  	long new_size = 0, nr; @@ -98,13 +98,32 @@ static inline void free_lppacas(void) { }  /*   * 3 persistent SLBs are registered here.  The buffer will be zero   * initially, hence will all be invaild until we actually write them. + * + * If you make the number of persistent SLB entries dynamic, please also + * update PR KVM to flush and restore them accordingly.   */ -struct slb_shadow slb_shadow[] __cacheline_aligned = { -	[0 ... (NR_CPUS-1)] = { -		.persistent = cpu_to_be32(SLB_NUM_BOLTED), -		.buffer_length = cpu_to_be32(sizeof(struct slb_shadow)), -	}, -}; +static struct slb_shadow *slb_shadow; + +static void __init allocate_slb_shadows(int nr_cpus, int limit) +{ +	int size = PAGE_ALIGN(sizeof(struct slb_shadow) * nr_cpus); +	slb_shadow = __va(memblock_alloc_base(size, PAGE_SIZE, limit)); +	memset(slb_shadow, 0, size); +} + +static struct slb_shadow * __init init_slb_shadow(int cpu) +{ +	struct slb_shadow *s = &slb_shadow[cpu]; + +	s->persistent = cpu_to_be32(SLB_NUM_BOLTED); +	s->buffer_length = cpu_to_be32(sizeof(*s)); + +	return s; +} + +#else /* CONFIG_PPC_STD_MMU_64 */ + +static void __init allocate_slb_shadows(int nr_cpus, int limit) { }  #endif /* CONFIG_PPC_STD_MMU_64 */ @@ -136,14 +155,20 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu)  	new_paca->paca_index = cpu;  	new_paca->kernel_toc = kernel_toc;  	new_paca->kernelbase = (unsigned long) _stext; -	new_paca->kernel_msr = MSR_KERNEL; +	/* Only set MSR:IR/DR when MMU is initialized */ +	new_paca->kernel_msr = MSR_KERNEL & ~(MSR_IR | MSR_DR);  	new_paca->hw_cpu_id = 0xffff;  	new_paca->kexec_state = KEXEC_STATE_NONE;  	new_paca->__current = &init_task;  	new_paca->data_offset = 0xfeeeeeeeeeeeeeeeULL;  #ifdef CONFIG_PPC_STD_MMU_64 -	new_paca->slb_shadow_ptr = &slb_shadow[cpu]; +	new_paca->slb_shadow_ptr = init_slb_shadow(cpu);  #endif /* CONFIG_PPC_STD_MMU_64 */ + +#ifdef CONFIG_PPC_BOOK3E +	/* For now -- if we have threads this will be adjusted later */ +	new_paca->tcd_ptr = &new_paca->tcd; +#endif  }  /* Put the paca pointer into r13 and SPRG_PACA */ @@ -190,6 +215,8 @@ void __init allocate_pacas(void)  	allocate_lppacas(nr_cpu_ids, limit); +	allocate_slb_shadows(nr_cpu_ids, limit); +  	/* Can't use for_each_*_cpu, as they aren't functional yet */  	for (cpu = 0; cpu < nr_cpu_ids; cpu++)  		initialise_paca(&paca[cpu], cpu); diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 905a24bb7ac..b49c72fd7f1 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -21,6 +21,7 @@  #include <linux/string.h>  #include <linux/init.h>  #include <linux/bootmem.h> +#include <linux/delay.h>  #include <linux/export.h>  #include <linux/of_address.h>  #include <linux/of_pci.h> @@ -120,6 +121,25 @@ resource_size_t pcibios_window_alignment(struct pci_bus *bus,  	return 1;  } +void pcibios_reset_secondary_bus(struct pci_dev *dev) +{ +	u16 ctrl; + +	if (ppc_md.pcibios_reset_secondary_bus) { +		ppc_md.pcibios_reset_secondary_bus(dev); +		return; +	} + +	pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &ctrl); +	ctrl |= PCI_BRIDGE_CTL_BUS_RESET; +	pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl); +	msleep(2); + +	ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET; +	pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl); +	ssleep(1); +} +  static resource_size_t pcibios_io_size(const struct pci_controller *hose)  {  #ifdef CONFIG_PPC64 @@ -201,26 +221,6 @@ struct pci_controller* pci_find_hose_for_OF_device(struct device_node* node)  	return NULL;  } -static ssize_t pci_show_devspec(struct device *dev, -		struct device_attribute *attr, char *buf) -{ -	struct pci_dev *pdev; -	struct device_node *np; - -	pdev = to_pci_dev (dev); -	np = pci_device_to_OF_node(pdev); -	if (np == NULL || np->full_name == NULL) -		return 0; -	return sprintf(buf, "%s", np->full_name); -} -static DEVICE_ATTR(devspec, S_IRUGO, pci_show_devspec, NULL); - -/* Add sysfs properties */ -int pcibios_add_platform_entries(struct pci_dev *pdev) -{ -	return device_create_file(&pdev->dev, &dev_attr_devspec); -} -  /*   * Reads the interrupt pin to determine if interrupt is use by card.   * If the interrupt is used, then gets the interrupt line from the @@ -228,7 +228,7 @@ int pcibios_add_platform_entries(struct pci_dev *pdev)   */  static int pci_read_irq_line(struct pci_dev *pci_dev)  { -	struct of_irq oirq; +	struct of_phandle_args oirq;  	unsigned int virq;  	pr_debug("PCI: Try to map irq for %s...\n", pci_name(pci_dev)); @@ -237,7 +237,7 @@ static int pci_read_irq_line(struct pci_dev *pci_dev)  	memset(&oirq, 0xff, sizeof(oirq));  #endif  	/* Try to get a mapping from the device-tree */ -	if (of_irq_map_pci(pci_dev, &oirq)) { +	if (of_irq_parse_pci(pci_dev, &oirq)) {  		u8 line, pin;  		/* If that fails, lets fallback to what is in the config @@ -263,11 +263,10 @@ static int pci_read_irq_line(struct pci_dev *pci_dev)  			irq_set_irq_type(virq, IRQ_TYPE_LEVEL_LOW);  	} else {  		pr_debug(" Got one, spec %d cells (0x%08x 0x%08x...) on %s\n", -			 oirq.size, oirq.specifier[0], oirq.specifier[1], -			 of_node_full_name(oirq.controller)); +			 oirq.args_count, oirq.args[0], oirq.args[1], +			 of_node_full_name(oirq.np)); -		virq = irq_create_of_mapping(oirq.controller, oirq.specifier, -					     oirq.size); +		virq = irq_create_of_mapping(&oirq);  	}  	if(virq == NO_IRQ) {  		pr_debug(" Failed to map !\n"); @@ -667,60 +666,36 @@ void pci_resource_to_user(const struct pci_dev *dev, int bar,  void pci_process_bridge_OF_ranges(struct pci_controller *hose,  				  struct device_node *dev, int primary)  { -	const __be32 *ranges; -	int rlen; -	int pna = of_n_addr_cells(dev); -	int np = pna + 5;  	int memno = 0; -	u32 pci_space; -	unsigned long long pci_addr, cpu_addr, pci_next, cpu_next, size;  	struct resource *res; +	struct of_pci_range range; +	struct of_pci_range_parser parser;  	printk(KERN_INFO "PCI host bridge %s %s ranges:\n",  	       dev->full_name, primary ? "(primary)" : ""); -	/* Get ranges property */ -	ranges = of_get_property(dev, "ranges", &rlen); -	if (ranges == NULL) +	/* Check for ranges property */ +	if (of_pci_range_parser_init(&parser, dev))  		return;  	/* Parse it */ -	while ((rlen -= np * 4) >= 0) { -		/* Read next ranges element */ -		pci_space = of_read_number(ranges, 1); -		pci_addr = of_read_number(ranges + 1, 2); -		cpu_addr = of_translate_address(dev, ranges + 3); -		size = of_read_number(ranges + pna + 3, 2); -		ranges += np; - +	for_each_of_pci_range(&parser, &range) {  		/* If we failed translation or got a zero-sized region  		 * (some FW try to feed us with non sensical zero sized regions  		 * such as power3 which look like some kind of attempt at exposing  		 * the VGA memory hole)  		 */ -		if (cpu_addr == OF_BAD_ADDR || size == 0) +		if (range.cpu_addr == OF_BAD_ADDR || range.size == 0)  			continue; -		/* Now consume following elements while they are contiguous */ -		for (; rlen >= np * sizeof(u32); -		     ranges += np, rlen -= np * 4) { -			if (of_read_number(ranges, 1) != pci_space) -				break; -			pci_next = of_read_number(ranges + 1, 2); -			cpu_next = of_translate_address(dev, ranges + 3); -			if (pci_next != pci_addr + size || -			    cpu_next != cpu_addr + size) -				break; -			size += of_read_number(ranges + pna + 3, 2); -		} -  		/* Act based on address space type */  		res = NULL; -		switch ((pci_space >> 24) & 0x3) { -		case 1:		/* PCI IO space */ +		switch (range.flags & IORESOURCE_TYPE_BITS) { +		case IORESOURCE_IO:  			printk(KERN_INFO  			       "  IO 0x%016llx..0x%016llx -> 0x%016llx\n", -			       cpu_addr, cpu_addr + size - 1, pci_addr); +			       range.cpu_addr, range.cpu_addr + range.size - 1, +			       range.pci_addr);  			/* We support only one IO range */  			if (hose->pci_io_size) { @@ -730,11 +705,12 @@ void pci_process_bridge_OF_ranges(struct pci_controller *hose,  			}  #ifdef CONFIG_PPC32  			/* On 32 bits, limit I/O space to 16MB */ -			if (size > 0x01000000) -				size = 0x01000000; +			if (range.size > 0x01000000) +				range.size = 0x01000000;  			/* 32 bits needs to map IOs here */ -			hose->io_base_virt = ioremap(cpu_addr, size); +			hose->io_base_virt = ioremap(range.cpu_addr, +						range.size);  			/* Expect trouble if pci_addr is not 0 */  			if (primary) @@ -744,20 +720,20 @@ void pci_process_bridge_OF_ranges(struct pci_controller *hose,  			/* pci_io_size and io_base_phys always represent IO  			 * space starting at 0 so we factor in pci_addr  			 */ -			hose->pci_io_size = pci_addr + size; -			hose->io_base_phys = cpu_addr - pci_addr; +			hose->pci_io_size = range.pci_addr + range.size; +			hose->io_base_phys = range.cpu_addr - range.pci_addr;  			/* Build resource */  			res = &hose->io_resource; -			res->flags = IORESOURCE_IO; -			res->start = pci_addr; +			range.cpu_addr = range.pci_addr;  			break; -		case 2:		/* PCI Memory space */ -		case 3:		/* PCI 64 bits Memory space */ +		case IORESOURCE_MEM:  			printk(KERN_INFO  			       " MEM 0x%016llx..0x%016llx -> 0x%016llx %s\n", -			       cpu_addr, cpu_addr + size - 1, pci_addr, -			       (pci_space & 0x40000000) ? "Prefetch" : ""); +			       range.cpu_addr, range.cpu_addr + range.size - 1, +			       range.pci_addr, +			       (range.pci_space & 0x40000000) ? +			       "Prefetch" : "");  			/* We support only 3 memory ranges */  			if (memno >= 3) { @@ -766,28 +742,21 @@ void pci_process_bridge_OF_ranges(struct pci_controller *hose,  				continue;  			}  			/* Handles ISA memory hole space here */ -			if (pci_addr == 0) { +			if (range.pci_addr == 0) {  				if (primary || isa_mem_base == 0) -					isa_mem_base = cpu_addr; -				hose->isa_mem_phys = cpu_addr; -				hose->isa_mem_size = size; +					isa_mem_base = range.cpu_addr; +				hose->isa_mem_phys = range.cpu_addr; +				hose->isa_mem_size = range.size;  			}  			/* Build resource */ -			hose->mem_offset[memno] = cpu_addr - pci_addr; +			hose->mem_offset[memno] = range.cpu_addr - +							range.pci_addr;  			res = &hose->mem_resources[memno++]; -			res->flags = IORESOURCE_MEM; -			if (pci_space & 0x40000000) -				res->flags |= IORESOURCE_PREFETCH; -			res->start = cpu_addr;  			break;  		}  		if (res != NULL) { -			res->name = dev->full_name; -			res->end = res->start + size - 1; -			res->parent = NULL; -			res->sibling = NULL; -			res->child = NULL; +			of_pci_range_to_resource(&range, dev, res);  		}  	}  } @@ -836,7 +805,7 @@ static void pcibios_fixup_resources(struct pci_dev *dev)  		 * at 0 as unset as well, except if PCI_PROBE_ONLY is also set  		 * since in that case, we don't want to re-assign anything  		 */ -		pcibios_resource_to_bus(dev, ®, res); +		pcibios_resource_to_bus(dev->bus, ®, res);  		if (pci_has_flag(PCI_REASSIGN_ALL_RSRC) ||  		    (reg.start == 0 && !pci_has_flag(PCI_PROBE_ONLY))) {  			/* Only print message if not re-assigning */ @@ -887,7 +856,7 @@ static int pcibios_uninitialized_bridge_resource(struct pci_bus *bus,  	/* Job is a bit different between memory and IO */  	if (res->flags & IORESOURCE_MEM) { -		pcibios_resource_to_bus(dev, ®ion, res); +		pcibios_resource_to_bus(dev->bus, ®ion, res);  		/* If the BAR is non-0 then it's probably been initialized */  		if (region.start != 0) diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c index c1e17ae68a0..5b789177aa2 100644 --- a/arch/powerpc/kernel/pci-hotplug.c +++ b/arch/powerpc/kernel/pci-hotplug.c @@ -98,8 +98,7 @@ void pcibios_add_pci_devices(struct pci_bus * bus)  		max = bus->busn_res.start;  		for (pass = 0; pass < 2; pass++) {  			list_for_each_entry(dev, &bus->devices, bus_list) { -				if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE || -				    dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) +				if (pci_is_bridge(dev))  					max = pci_scan_bridge(bus, dev,  							      max, pass);  			} diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index a9e311f7a9d..155013da27e 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -208,8 +208,7 @@ long sys_pciconfig_iobase(long which, unsigned long in_bus,  			  unsigned long in_devfn)  {  	struct pci_controller* hose; -	struct list_head *ln; -	struct pci_bus *bus = NULL; +	struct pci_bus *tmp_bus, *bus = NULL;  	struct device_node *hose_node;  	/* Argh ! Please forgive me for that hack, but that's the @@ -230,11 +229,12 @@ long sys_pciconfig_iobase(long which, unsigned long in_bus,  	 * used on pre-domains setup. We return the first match  	 */ -	for (ln = pci_root_buses.next; ln != &pci_root_buses; ln = ln->next) { -		bus = pci_bus_b(ln); -		if (in_bus >= bus->number && in_bus <= bus->busn_res.end) +	list_for_each_entry(tmp_bus, &pci_root_buses, node) { +		if (in_bus >= tmp_bus->number && +		    in_bus <= tmp_bus->busn_res.end) { +			bus = tmp_bus;  			break; -		bus = NULL; +		}  	}  	if (bus == NULL || bus->dev.of_node == NULL)  		return -ENODEV; diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c index 4368ec6fdc8..44562aa97f1 100644 --- a/arch/powerpc/kernel/pci_of_scan.c +++ b/arch/powerpc/kernel/pci_of_scan.c @@ -111,7 +111,7 @@ static void of_pci_parse_addrs(struct device_node *node, struct pci_dev *dev)  		res->name = pci_name(dev);  		region.start = base;  		region.end = base + size - 1; -		pcibios_bus_to_resource(dev, res, ®ion); +		pcibios_bus_to_resource(dev->bus, res, ®ion);  	}  } @@ -280,7 +280,7 @@ void of_scan_pci_bridge(struct pci_dev *dev)  		res->flags = flags;  		region.start = of_read_number(&ranges[1], 2);  		region.end = region.start + size - 1; -		pcibios_bus_to_resource(dev, res, ®ion); +		pcibios_bus_to_resource(dev->bus, res, ®ion);  	}  	sprintf(bus->name, "PCI Bus %04x:%02x", pci_domain_nr(bus),  		bus->number); @@ -302,8 +302,11 @@ static struct pci_dev *of_scan_pci_dev(struct pci_bus *bus,  			    struct device_node *dn)  {  	struct pci_dev *dev = NULL; -	const u32 *reg; +	const __be32 *reg;  	int reglen, devfn; +#ifdef CONFIG_EEH +	struct eeh_dev *edev = of_node_to_eeh_dev(dn); +#endif  	pr_debug("  * %s\n", dn->full_name);  	if (!of_device_is_available(dn)) @@ -312,7 +315,7 @@ static struct pci_dev *of_scan_pci_dev(struct pci_bus *bus,  	reg = of_get_property(dn, "reg", ®len);  	if (reg == NULL || reglen < 20)  		return NULL; -	devfn = (reg[0] >> 8) & 0xff; +	devfn = (of_read_number(reg, 1) >> 8) & 0xff;  	/* Check if the PCI device is already there */  	dev = pci_get_slot(bus, devfn); @@ -321,6 +324,12 @@ static struct pci_dev *of_scan_pci_dev(struct pci_bus *bus,  		return dev;  	} +	/* Device removed permanently ? */ +#ifdef CONFIG_EEH +	if (edev && (edev->mode & EEH_DEV_REMOVED)) +		return NULL; +#endif +  	/* create a new pci_dev for this device */  	dev = of_create_pci_dev(dn, bus, devfn);  	if (!dev) @@ -362,8 +371,7 @@ static void __of_scan_bus(struct device_node *node, struct pci_bus *bus,  	/* Now scan child busses */  	list_for_each_entry(dev, &bus->devices, bus_list) { -		if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE || -		    dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) { +		if (pci_is_bridge(dev)) {  			of_scan_pci_bridge(dev);  		}  	} diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index 21646dbe1bb..48d17d6fca5 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -79,10 +79,12 @@ EXPORT_SYMBOL(strlen);  EXPORT_SYMBOL(strcmp);  EXPORT_SYMBOL(strncmp); +#ifndef CONFIG_GENERIC_CSUM  EXPORT_SYMBOL(csum_partial);  EXPORT_SYMBOL(csum_partial_copy_generic);  EXPORT_SYMBOL(ip_fast_csum);  EXPORT_SYMBOL(csum_tcpudp_magic); +#endif  EXPORT_SYMBOL(__copy_tofrom_user);  EXPORT_SYMBOL(__clear_user); @@ -98,9 +100,13 @@ EXPORT_SYMBOL(start_thread);  #ifdef CONFIG_PPC_FPU  EXPORT_SYMBOL(giveup_fpu); +EXPORT_SYMBOL(load_fp_state); +EXPORT_SYMBOL(store_fp_state);  #endif  #ifdef CONFIG_ALTIVEC  EXPORT_SYMBOL(giveup_altivec); +EXPORT_SYMBOL(load_vr_state); +EXPORT_SYMBOL(store_vr_state);  #endif /* CONFIG_ALTIVEC */  #ifdef CONFIG_VSX  EXPORT_SYMBOL(giveup_vsx); @@ -114,6 +120,7 @@ EXPORT_SYMBOL(giveup_spe);  EXPORT_SYMBOL(flush_instruction_cache);  #endif  EXPORT_SYMBOL(flush_dcache_range); +EXPORT_SYMBOL(flush_icache_range);  #ifdef CONFIG_SMP  #ifdef CONFIG_PPC32 @@ -143,6 +150,8 @@ EXPORT_SYMBOL(__ashldi3);  EXPORT_SYMBOL(__lshrdi3);  int __ucmpdi2(unsigned long long, unsigned long long);  EXPORT_SYMBOL(__ucmpdi2); +int __cmpdi2(long long, long long); +EXPORT_SYMBOL(__cmpdi2);  #endif  long long __bswapdi2(long long);  EXPORT_SYMBOL(__bswapdi2); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 96d2fdf3aa9..be99774d3f4 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -25,7 +25,6 @@  #include <linux/slab.h>  #include <linux/user.h>  #include <linux/elf.h> -#include <linux/init.h>  #include <linux/prctl.h>  #include <linux/init_task.h>  #include <linux/export.h> @@ -55,6 +54,7 @@  #ifdef CONFIG_PPC64  #include <asm/firmware.h>  #endif +#include <asm/code-patching.h>  #include <linux/kprobes.h>  #include <linux/kdebug.h> @@ -74,6 +74,48 @@ struct task_struct *last_task_used_vsx = NULL;  struct task_struct *last_task_used_spe = NULL;  #endif +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +void giveup_fpu_maybe_transactional(struct task_struct *tsk) +{ +	/* +	 * If we are saving the current thread's registers, and the +	 * thread is in a transactional state, set the TIF_RESTORE_TM +	 * bit so that we know to restore the registers before +	 * returning to userspace. +	 */ +	if (tsk == current && tsk->thread.regs && +	    MSR_TM_ACTIVE(tsk->thread.regs->msr) && +	    !test_thread_flag(TIF_RESTORE_TM)) { +		tsk->thread.tm_orig_msr = tsk->thread.regs->msr; +		set_thread_flag(TIF_RESTORE_TM); +	} + +	giveup_fpu(tsk); +} + +void giveup_altivec_maybe_transactional(struct task_struct *tsk) +{ +	/* +	 * If we are saving the current thread's registers, and the +	 * thread is in a transactional state, set the TIF_RESTORE_TM +	 * bit so that we know to restore the registers before +	 * returning to userspace. +	 */ +	if (tsk == current && tsk->thread.regs && +	    MSR_TM_ACTIVE(tsk->thread.regs->msr) && +	    !test_thread_flag(TIF_RESTORE_TM)) { +		tsk->thread.tm_orig_msr = tsk->thread.regs->msr; +		set_thread_flag(TIF_RESTORE_TM); +	} + +	giveup_altivec(tsk); +} + +#else +#define giveup_fpu_maybe_transactional(tsk)	giveup_fpu(tsk) +#define giveup_altivec_maybe_transactional(tsk)	giveup_altivec(tsk) +#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ +  #ifdef CONFIG_PPC_FPU  /*   * Make sure the floating-point register state in the @@ -102,13 +144,13 @@ void flush_fp_to_thread(struct task_struct *tsk)  			 */  			BUG_ON(tsk != current);  #endif -			giveup_fpu(tsk); +			giveup_fpu_maybe_transactional(tsk);  		}  		preempt_enable();  	}  }  EXPORT_SYMBOL_GPL(flush_fp_to_thread); -#endif +#endif /* CONFIG_PPC_FPU */  void enable_kernel_fp(void)  { @@ -116,11 +158,11 @@ void enable_kernel_fp(void)  #ifdef CONFIG_SMP  	if (current->thread.regs && (current->thread.regs->msr & MSR_FP)) -		giveup_fpu(current); +		giveup_fpu_maybe_transactional(current);  	else  		giveup_fpu(NULL);	/* just enables FP for kernel */  #else -	giveup_fpu(last_task_used_math); +	giveup_fpu_maybe_transactional(last_task_used_math);  #endif /* CONFIG_SMP */  }  EXPORT_SYMBOL(enable_kernel_fp); @@ -132,11 +174,11 @@ void enable_kernel_altivec(void)  #ifdef CONFIG_SMP  	if (current->thread.regs && (current->thread.regs->msr & MSR_VEC)) -		giveup_altivec(current); +		giveup_altivec_maybe_transactional(current);  	else  		giveup_altivec_notask();  #else -	giveup_altivec(last_task_used_altivec); +	giveup_altivec_maybe_transactional(last_task_used_altivec);  #endif /* CONFIG_SMP */  }  EXPORT_SYMBOL(enable_kernel_altivec); @@ -153,7 +195,7 @@ void flush_altivec_to_thread(struct task_struct *tsk)  #ifdef CONFIG_SMP  			BUG_ON(tsk != current);  #endif -			giveup_altivec(tsk); +			giveup_altivec_maybe_transactional(tsk);  		}  		preempt_enable();  	} @@ -182,8 +224,8 @@ EXPORT_SYMBOL(enable_kernel_vsx);  void giveup_vsx(struct task_struct *tsk)  { -	giveup_fpu(tsk); -	giveup_altivec(tsk); +	giveup_fpu_maybe_transactional(tsk); +	giveup_altivec_maybe_transactional(tsk);  	__giveup_vsx(tsk);  } @@ -314,32 +356,32 @@ static DEFINE_PER_CPU(struct arch_hw_breakpoint, current_brk);   */  static void set_debug_reg_defaults(struct thread_struct *thread)  { -	thread->iac1 = thread->iac2 = 0; +	thread->debug.iac1 = thread->debug.iac2 = 0;  #if CONFIG_PPC_ADV_DEBUG_IACS > 2 -	thread->iac3 = thread->iac4 = 0; +	thread->debug.iac3 = thread->debug.iac4 = 0;  #endif -	thread->dac1 = thread->dac2 = 0; +	thread->debug.dac1 = thread->debug.dac2 = 0;  #if CONFIG_PPC_ADV_DEBUG_DVCS > 0 -	thread->dvc1 = thread->dvc2 = 0; +	thread->debug.dvc1 = thread->debug.dvc2 = 0;  #endif -	thread->dbcr0 = 0; +	thread->debug.dbcr0 = 0;  #ifdef CONFIG_BOOKE  	/*  	 * Force User/Supervisor bits to b11 (user-only MSR[PR]=1)  	 */ -	thread->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US |	\ +	thread->debug.dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US |  			DBCR1_IAC3US | DBCR1_IAC4US;  	/*  	 * Force Data Address Compare User/Supervisor bits to be User-only  	 * (0b11 MSR[PR]=1) and set all other bits in DBCR2 register to be 0.  	 */ -	thread->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US; +	thread->debug.dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US;  #else -	thread->dbcr1 = 0; +	thread->debug.dbcr1 = 0;  #endif  } -static void prime_debug_regs(struct thread_struct *thread) +static void prime_debug_regs(struct debug_reg *debug)  {  	/*  	 * We could have inherited MSR_DE from userspace, since @@ -348,22 +390,22 @@ static void prime_debug_regs(struct thread_struct *thread)  	 */  	mtmsr(mfmsr() & ~MSR_DE); -	mtspr(SPRN_IAC1, thread->iac1); -	mtspr(SPRN_IAC2, thread->iac2); +	mtspr(SPRN_IAC1, debug->iac1); +	mtspr(SPRN_IAC2, debug->iac2);  #if CONFIG_PPC_ADV_DEBUG_IACS > 2 -	mtspr(SPRN_IAC3, thread->iac3); -	mtspr(SPRN_IAC4, thread->iac4); +	mtspr(SPRN_IAC3, debug->iac3); +	mtspr(SPRN_IAC4, debug->iac4);  #endif -	mtspr(SPRN_DAC1, thread->dac1); -	mtspr(SPRN_DAC2, thread->dac2); +	mtspr(SPRN_DAC1, debug->dac1); +	mtspr(SPRN_DAC2, debug->dac2);  #if CONFIG_PPC_ADV_DEBUG_DVCS > 0 -	mtspr(SPRN_DVC1, thread->dvc1); -	mtspr(SPRN_DVC2, thread->dvc2); +	mtspr(SPRN_DVC1, debug->dvc1); +	mtspr(SPRN_DVC2, debug->dvc2);  #endif -	mtspr(SPRN_DBCR0, thread->dbcr0); -	mtspr(SPRN_DBCR1, thread->dbcr1); +	mtspr(SPRN_DBCR0, debug->dbcr0); +	mtspr(SPRN_DBCR1, debug->dbcr1);  #ifdef CONFIG_BOOKE -	mtspr(SPRN_DBCR2, thread->dbcr2); +	mtspr(SPRN_DBCR2, debug->dbcr2);  #endif  }  /* @@ -371,12 +413,13 @@ static void prime_debug_regs(struct thread_struct *thread)   * debug registers, set the debug registers from the values   * stored in the new thread.   */ -static void switch_booke_debug_regs(struct thread_struct *new_thread) +void switch_booke_debug_regs(struct debug_reg *new_debug)  { -	if ((current->thread.dbcr0 & DBCR0_IDM) -		|| (new_thread->dbcr0 & DBCR0_IDM)) -			prime_debug_regs(new_thread); +	if ((current->thread.debug.dbcr0 & DBCR0_IDM) +		|| (new_debug->dbcr0 & DBCR0_IDM)) +			prime_debug_regs(new_debug);  } +EXPORT_SYMBOL_GPL(switch_booke_debug_regs);  #else	/* !CONFIG_PPC_ADV_DEBUG_REGS */  #ifndef CONFIG_HAVE_HW_BREAKPOINT  static void set_debug_reg_defaults(struct thread_struct *thread) @@ -453,14 +496,21 @@ static inline int set_dawr(struct arch_hw_breakpoint *brk)  	return 0;  } -int set_breakpoint(struct arch_hw_breakpoint *brk) +void __set_breakpoint(struct arch_hw_breakpoint *brk)  {  	__get_cpu_var(current_brk) = *brk;  	if (cpu_has_feature(CPU_FTR_DAWR)) -		return set_dawr(brk); +		set_dawr(brk); +	else +		set_dabr(brk); +} -	return set_dabr(brk); +void set_breakpoint(struct arch_hw_breakpoint *brk) +{ +	preempt_disable(); +	__set_breakpoint(brk); +	preempt_enable();  }  #ifdef CONFIG_PPC64 @@ -478,7 +528,48 @@ static inline bool hw_brk_match(struct arch_hw_breakpoint *a,  		return false;  	return true;  } +  #ifdef CONFIG_PPC_TRANSACTIONAL_MEM +static void tm_reclaim_thread(struct thread_struct *thr, +			      struct thread_info *ti, uint8_t cause) +{ +	unsigned long msr_diff = 0; + +	/* +	 * If FP/VSX registers have been already saved to the +	 * thread_struct, move them to the transact_fp array. +	 * We clear the TIF_RESTORE_TM bit since after the reclaim +	 * the thread will no longer be transactional. +	 */ +	if (test_ti_thread_flag(ti, TIF_RESTORE_TM)) { +		msr_diff = thr->tm_orig_msr & ~thr->regs->msr; +		if (msr_diff & MSR_FP) +			memcpy(&thr->transact_fp, &thr->fp_state, +			       sizeof(struct thread_fp_state)); +		if (msr_diff & MSR_VEC) +			memcpy(&thr->transact_vr, &thr->vr_state, +			       sizeof(struct thread_vr_state)); +		clear_ti_thread_flag(ti, TIF_RESTORE_TM); +		msr_diff &= MSR_FP | MSR_VEC | MSR_VSX | MSR_FE0 | MSR_FE1; +	} + +	tm_reclaim(thr, thr->regs->msr, cause); + +	/* Having done the reclaim, we now have the checkpointed +	 * FP/VSX values in the registers.  These might be valid +	 * even if we have previously called enable_kernel_fp() or +	 * flush_fp_to_thread(), so update thr->regs->msr to +	 * indicate their current validity. +	 */ +	thr->regs->msr |= msr_diff; +} + +void tm_reclaim_current(uint8_t cause) +{ +	tm_enable(); +	tm_reclaim_thread(¤t->thread, current_thread_info(), cause); +} +  static inline void tm_reclaim_task(struct task_struct *tsk)  {  	/* We have to work out if we're switching from/to a task that's in the @@ -501,9 +592,11 @@ static inline void tm_reclaim_task(struct task_struct *tsk)  	/* Stash the original thread MSR, as giveup_fpu et al will  	 * modify it.  We hold onto it to see whether the task used -	 * FP & vector regs. +	 * FP & vector regs.  If the TIF_RESTORE_TM flag is set, +	 * tm_orig_msr is already set.  	 */ -	thr->tm_orig_msr = thr->regs->msr; +	if (!test_ti_thread_flag(task_thread_info(tsk), TIF_RESTORE_TM)) +		thr->tm_orig_msr = thr->regs->msr;  	TM_DEBUG("--- tm_reclaim on pid %d (NIP=%lx, "  		 "ccr=%lx, msr=%lx, trap=%lx)\n", @@ -511,7 +604,7 @@ static inline void tm_reclaim_task(struct task_struct *tsk)  		 thr->regs->ccr, thr->regs->msr,  		 thr->regs->trap); -	tm_reclaim(thr, thr->regs->msr, TM_CAUSE_RESCHED); +	tm_reclaim_thread(thr, task_thread_info(tsk), TM_CAUSE_RESCHED);  	TM_DEBUG("--- tm_reclaim on pid %d complete\n",  		 tsk->pid); @@ -525,6 +618,31 @@ out_and_saveregs:  	tm_save_sprs(thr);  } +extern void __tm_recheckpoint(struct thread_struct *thread, +			      unsigned long orig_msr); + +void tm_recheckpoint(struct thread_struct *thread, +		     unsigned long orig_msr) +{ +	unsigned long flags; + +	/* We really can't be interrupted here as the TEXASR registers can't +	 * change and later in the trecheckpoint code, we have a userspace R1. +	 * So let's hard disable over this region. +	 */ +	local_irq_save(flags); +	hard_irq_disable(); + +	/* The TM SPRs are restored here, so that TEXASR.FS can be set +	 * before the trecheckpoint and no explosion occurs. +	 */ +	tm_restore_sprs(thread); + +	__tm_recheckpoint(thread, orig_msr); + +	local_irq_restore(flags); +} +  static inline void tm_recheckpoint_new_task(struct task_struct *new)  {  	unsigned long msr; @@ -543,13 +661,10 @@ static inline void tm_recheckpoint_new_task(struct task_struct *new)  	if (!new->thread.regs)  		return; -	/* The TM SPRs are restored here, so that TEXASR.FS can be set -	 * before the trecheckpoint and no explosion occurs. -	 */ -	tm_restore_sprs(&new->thread); - -	if (!MSR_TM_ACTIVE(new->thread.regs->msr)) +	if (!MSR_TM_ACTIVE(new->thread.regs->msr)){ +		tm_restore_sprs(&new->thread);  		return; +	}  	msr = new->thread.tm_orig_msr;  	/* Recheckpoint to restore original checkpointed register state. */  	TM_DEBUG("*** tm_recheckpoint of pid %d " @@ -587,6 +702,43 @@ static inline void __switch_to_tm(struct task_struct *prev)  		tm_reclaim_task(prev);  	}  } + +/* + * This is called if we are on the way out to userspace and the + * TIF_RESTORE_TM flag is set.  It checks if we need to reload + * FP and/or vector state and does so if necessary. + * If userspace is inside a transaction (whether active or + * suspended) and FP/VMX/VSX instructions have ever been enabled + * inside that transaction, then we have to keep them enabled + * and keep the FP/VMX/VSX state loaded while ever the transaction + * continues.  The reason is that if we didn't, and subsequently + * got a FP/VMX/VSX unavailable interrupt inside a transaction, + * we don't know whether it's the same transaction, and thus we + * don't know which of the checkpointed state and the transactional + * state to use. + */ +void restore_tm_state(struct pt_regs *regs) +{ +	unsigned long msr_diff; + +	clear_thread_flag(TIF_RESTORE_TM); +	if (!MSR_TM_ACTIVE(regs->msr)) +		return; + +	msr_diff = current->thread.tm_orig_msr & ~regs->msr; +	msr_diff &= MSR_FP | MSR_VEC | MSR_VSX; +	if (msr_diff & MSR_FP) { +		fp_enable(); +		load_fp_state(¤t->thread.fp_state); +		regs->msr |= current->thread.fpexc_mode; +	} +	if (msr_diff & MSR_VEC) { +		vec_enable(); +		load_vr_state(¤t->thread.vr_state); +	} +	regs->msr |= msr_diff; +} +  #else  #define tm_recheckpoint_new_task(new)  #define __switch_to_tm(prev) @@ -596,21 +748,22 @@ struct task_struct *__switch_to(struct task_struct *prev,  	struct task_struct *new)  {  	struct thread_struct *new_thread, *old_thread; -	unsigned long flags;  	struct task_struct *last;  #ifdef CONFIG_PPC_BOOK3S_64  	struct ppc64_tlb_batch *batch;  #endif -	/* Back up the TAR across context switches. +	WARN_ON(!irqs_disabled()); + +	/* Back up the TAR and DSCR across context switches.  	 * Note that the TAR is not available for use in the kernel.  (To  	 * provide this, the TAR should be backed up/restored on exception  	 * entry/exit instead, and be in pt_regs.  FIXME, this should be in  	 * pt_regs anyway (for debug).) -	 * Save the TAR here before we do treclaim/trecheckpoint as these -	 * will change the TAR. +	 * Save the TAR and DSCR here before we do treclaim/trecheckpoint as +	 * these will change them.  	 */ -	save_tar(&prev->thread); +	save_early_sprs(&prev->thread);  	__switch_to_tm(prev); @@ -681,15 +834,15 @@ struct task_struct *__switch_to(struct task_struct *prev,  #endif /* CONFIG_SMP */  #ifdef CONFIG_PPC_ADV_DEBUG_REGS -	switch_booke_debug_regs(&new->thread); +	switch_booke_debug_regs(&new->thread.debug);  #else  /*   * For PPC_BOOK3S_64, we use the hw-breakpoint interfaces that would   * schedule DABR   */  #ifndef CONFIG_HAVE_HW_BREAKPOINT -	if (unlikely(hw_brk_match(&__get_cpu_var(current_brk), &new->thread.hw_brk))) -		set_breakpoint(&new->thread.hw_brk); +	if (unlikely(!hw_brk_match(&__get_cpu_var(current_brk), &new->thread.hw_brk))) +		__set_breakpoint(&new->thread.hw_brk);  #endif /* CONFIG_HAVE_HW_BREAKPOINT */  #endif @@ -721,8 +874,6 @@ struct task_struct *__switch_to(struct task_struct *prev,  	}  #endif /* CONFIG_PPC_BOOK3S_64 */ -	local_irq_save(flags); -  	/*  	 * We can't take a PMU exception inside _switch() since there is a  	 * window where the kernel stack SLB and the kernel stack are out @@ -742,8 +893,6 @@ struct task_struct *__switch_to(struct task_struct *prev,  	}  #endif /* CONFIG_PPC_BOOK3S_64 */ -	local_irq_restore(flags); -  	return last;  } @@ -860,17 +1009,21 @@ void show_regs(struct pt_regs * regs)  	printk("MSR: "REG" ", regs->msr);  	printbits(regs->msr, msr_bits);  	printk("  CR: %08lx  XER: %08lx\n", regs->ccr, regs->xer); -#ifdef CONFIG_PPC64 -	printk("SOFTE: %ld\n", regs->softe); -#endif  	trap = TRAP(regs);  	if ((regs->trap != 0xc00) && cpu_has_feature(CPU_FTR_CFAR)) -		printk("CFAR: "REG"\n", regs->orig_gpr3); -	if (trap == 0x300 || trap == 0x600) +		printk("CFAR: "REG" ", regs->orig_gpr3); +	if (trap == 0x200 || trap == 0x300 || trap == 0x600)  #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) -		printk("DEAR: "REG", ESR: "REG"\n", regs->dar, regs->dsisr); +		printk("DEAR: "REG" ESR: "REG" ", regs->dar, regs->dsisr);  #else -		printk("DAR: "REG", DSISR: %08lx\n", regs->dar, regs->dsisr); +		printk("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr); +#endif +#ifdef CONFIG_PPC64 +	printk("SOFTE: %ld ", regs->softe); +#endif +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +	if (MSR_TM_ACTIVE(regs->msr)) +		printk("\nPACATMSCRATCH: %016llx ", get_paca()->tm_scratch);  #endif  	for (i = 0;  i < 32;  i++) { @@ -889,9 +1042,6 @@ void show_regs(struct pt_regs * regs)  	printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip);  	printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link);  #endif -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -	printk("PACATMSCRATCH [%llx]\n", get_paca()->tm_scratch); -#endif  	show_stack(current, (unsigned long *) regs->gpr[1]);  	if (!user_mode(regs))  		show_instructions(regs); @@ -928,6 +1078,15 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)  	flush_altivec_to_thread(src);  	flush_vsx_to_thread(src);  	flush_spe_to_thread(src); +	/* +	 * Flush TM state out so we can copy it.  __switch_to_tm() does this +	 * flush but it removes the checkpointed state from the current CPU and +	 * transitions the CPU out of TM mode.  Hence we need to call +	 * tm_recheckpoint_new_task() (on the same task) to restore the +	 * checkpointed state back and the TM mode. +	 */ +	__switch_to_tm(src); +	tm_recheckpoint_new_task(src);  	*dst = *src; @@ -957,7 +1116,9 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,  		struct thread_info *ti = (void *)task_stack_page(p);  		memset(childregs, 0, sizeof(struct pt_regs));  		childregs->gpr[1] = sp + sizeof(struct pt_regs); -		childregs->gpr[14] = usp;	/* function */ +		/* function */ +		if (usp) +			childregs->gpr[14] = ppc_function_entry((void *)usp);  #ifdef CONFIG_PPC64  		clear_tsk_thread_flag(p, TIF_32BIT);  		childregs->softe = 1; @@ -1008,6 +1169,11 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,  	p->thread.ptrace_bps[0] = NULL;  #endif +	p->thread.fp_save_area = NULL; +#ifdef CONFIG_ALTIVEC +	p->thread.vr_save_area = NULL; +#endif +  #ifdef CONFIG_PPC_STD_MMU_64  	if (mmu_has_feature(MMU_FTR_SLB)) {  		unsigned long sp_vsid; @@ -1031,17 +1197,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,  	if (cpu_has_feature(CPU_FTR_HAS_PPR))  		p->thread.ppr = INIT_PPR;  #endif -	/* -	 * The PPC64 ABI makes use of a TOC to contain function  -	 * pointers.  The function (ret_from_except) is actually a pointer -	 * to the TOC entry.  The first entry is a pointer to the actual -	 * function. -	 */ -#ifdef CONFIG_PPC64 -	kregs->nip = *((unsigned long *)f); -#else -	kregs->nip = (unsigned long)f; -#endif +	kregs->nip = ppc_function_entry(f);  	return 0;  } @@ -1083,25 +1239,45 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)  	regs->msr = MSR_USER;  #else  	if (!is_32bit_task()) { -		unsigned long entry, toc; +		unsigned long entry; -		/* start is a relocated pointer to the function descriptor for -		 * the elf _start routine.  The first entry in the function -		 * descriptor is the entry address of _start and the second -		 * entry is the TOC value we need to use. -		 */ -		__get_user(entry, (unsigned long __user *)start); -		__get_user(toc, (unsigned long __user *)start+1); +		if (is_elf2_task()) { +			/* Look ma, no function descriptors! */ +			entry = start; -		/* Check whether the e_entry function descriptor entries -		 * need to be relocated before we can use them. -		 */ -		if (load_addr != 0) { -			entry += load_addr; -			toc   += load_addr; +			/* +			 * Ulrich says: +			 *   The latest iteration of the ABI requires that when +			 *   calling a function (at its global entry point), +			 *   the caller must ensure r12 holds the entry point +			 *   address (so that the function can quickly +			 *   establish addressability). +			 */ +			regs->gpr[12] = start; +			/* Make sure that's restored on entry to userspace. */ +			set_thread_flag(TIF_RESTOREALL); +		} else { +			unsigned long toc; + +			/* start is a relocated pointer to the function +			 * descriptor for the elf _start routine.  The first +			 * entry in the function descriptor is the entry +			 * address of _start and the second entry is the TOC +			 * value we need to use. +			 */ +			__get_user(entry, (unsigned long __user *)start); +			__get_user(toc, (unsigned long __user *)start+1); + +			/* Check whether the e_entry function descriptor entries +			 * need to be relocated before we can use them. +			 */ +			if (load_addr != 0) { +				entry += load_addr; +				toc   += load_addr; +			} +			regs->gpr[2] = toc;  		}  		regs->nip = entry; -		regs->gpr[2] = toc;  		regs->msr = MSR_USER64;  	} else {  		regs->nip = start; @@ -1113,12 +1289,12 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)  #ifdef CONFIG_VSX  	current->thread.used_vsr = 0;  #endif -	memset(current->thread.fpr, 0, sizeof(current->thread.fpr)); -	current->thread.fpscr.val = 0; +	memset(¤t->thread.fp_state, 0, sizeof(current->thread.fp_state)); +	current->thread.fp_save_area = NULL;  #ifdef CONFIG_ALTIVEC -	memset(current->thread.vr, 0, sizeof(current->thread.vr)); -	memset(¤t->thread.vscr, 0, sizeof(current->thread.vscr)); -	current->thread.vscr.u[3] = 0x00010000; /* Java mode disabled */ +	memset(¤t->thread.vr_state, 0, sizeof(current->thread.vr_state)); +	current->thread.vr_state.vscr.u[3] = 0x00010000; /* Java mode disabled */ +	current->thread.vr_save_area = NULL;  	current->thread.vrsave = 0;  	current->thread.used_vr = 0;  #endif /* CONFIG_ALTIVEC */ @@ -1151,6 +1327,19 @@ int set_fpexc_mode(struct task_struct *tsk, unsigned int val)  	if (val & PR_FP_EXC_SW_ENABLE) {  #ifdef CONFIG_SPE  		if (cpu_has_feature(CPU_FTR_SPE)) { +			/* +			 * When the sticky exception bits are set +			 * directly by userspace, it must call prctl +			 * with PR_GET_FPEXC (with PR_FP_EXC_SW_ENABLE +			 * in the existing prctl settings) or +			 * PR_SET_FPEXC (with PR_FP_EXC_SW_ENABLE in +			 * the bits being set).  <fenv.h> functions +			 * saving and restoring the whole +			 * floating-point environment need to do so +			 * anyway to restore the prctl settings from +			 * the saved environment. +			 */ +			tsk->thread.spefscr_last = mfspr(SPRN_SPEFSCR);  			tsk->thread.fpexc_mode = val &  				(PR_FP_EXC_SW_ENABLE | PR_FP_ALL_EXCEPT);  			return 0; @@ -1182,9 +1371,22 @@ int get_fpexc_mode(struct task_struct *tsk, unsigned long adr)  	if (tsk->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE)  #ifdef CONFIG_SPE -		if (cpu_has_feature(CPU_FTR_SPE)) +		if (cpu_has_feature(CPU_FTR_SPE)) { +			/* +			 * When the sticky exception bits are set +			 * directly by userspace, it must call prctl +			 * with PR_GET_FPEXC (with PR_FP_EXC_SW_ENABLE +			 * in the existing prctl settings) or +			 * PR_SET_FPEXC (with PR_FP_EXC_SW_ENABLE in +			 * the bits being set).  <fenv.h> functions +			 * saving and restoring the whole +			 * floating-point environment need to do so +			 * anyway to restore the prctl settings from +			 * the saved environment. +			 */ +			tsk->thread.spefscr_last = mfspr(SPRN_SPEFSCR);  			val = tsk->thread.fpexc_mode; -		else +		} else  			return -EINVAL;  #else  		return -EINVAL; diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index b7634ce41db..b694b073097 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -29,10 +29,11 @@  #include <linux/bitops.h>  #include <linux/export.h>  #include <linux/kexec.h> -#include <linux/debugfs.h>  #include <linux/irq.h>  #include <linux/memblock.h>  #include <linux/of.h> +#include <linux/of_fdt.h> +#include <linux/libfdt.h>  #include <asm/prom.h>  #include <asm/rtas.h> @@ -117,14 +118,14 @@ static void __init move_device_tree(void)  	DBG("-> move_device_tree\n");  	start = __pa(initial_boot_params); -	size = be32_to_cpu(initial_boot_params->totalsize); +	size = fdt_totalsize(initial_boot_params);  	if ((memory_limit && (start + size) > PHYSICAL_START + memory_limit) ||  			overlaps_crashkernel(start, size) ||  			overlaps_initrd(start, size)) {  		p = __va(memblock_alloc(size, PAGE_SIZE));  		memcpy(p, initial_boot_params, size); -		initial_boot_params = (struct boot_param_header *)p; +		initial_boot_params = p;  		DBG("Moved device tree to 0x%p\n", p);  	} @@ -162,7 +163,7 @@ static struct ibm_pa_feature {  	{CPU_FTR_REAL_LE, PPC_FEATURE_TRUE_LE, 5, 0, 0},  }; -static void __init scan_features(unsigned long node, unsigned char *ftrs, +static void __init scan_features(unsigned long node, const unsigned char *ftrs,  				 unsigned long tablelen,  				 struct ibm_pa_feature *fp,  				 unsigned long ft_size) @@ -201,8 +202,8 @@ static void __init scan_features(unsigned long node, unsigned char *ftrs,  static void __init check_cpu_pa_features(unsigned long node)  { -	unsigned char *pa_ftrs; -	unsigned long tablelen; +	const unsigned char *pa_ftrs; +	int tablelen;  	pa_ftrs = of_get_flat_dt_prop(node, "ibm,pa-features", &tablelen);  	if (pa_ftrs == NULL) @@ -215,7 +216,7 @@ static void __init check_cpu_pa_features(unsigned long node)  #ifdef CONFIG_PPC_STD_MMU_64  static void __init check_cpu_slb_size(unsigned long node)  { -	__be32 *slb_size_ptr; +	const __be32 *slb_size_ptr;  	slb_size_ptr = of_get_flat_dt_prop(node, "slb-size", NULL);  	if (slb_size_ptr != NULL) { @@ -256,7 +257,7 @@ static struct feature_property {  static inline void identical_pvr_fixup(unsigned long node)  {  	unsigned int pvr; -	char *model = of_get_flat_dt_prop(node, "model", NULL); +	const char *model = of_get_flat_dt_prop(node, "model", NULL);  	/*  	 * Since 440GR(x)/440EP(x) processors have the same pvr, @@ -294,11 +295,11 @@ static int __init early_init_dt_scan_cpus(unsigned long node,  					  const char *uname, int depth,  					  void *data)  { -	char *type = of_get_flat_dt_prop(node, "device_type", NULL); +	const char *type = of_get_flat_dt_prop(node, "device_type", NULL);  	const __be32 *prop;  	const __be32 *intserv;  	int i, nthreads; -	unsigned long len; +	int len;  	int found = -1;  	int found_thread = 0; @@ -324,9 +325,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node,  		 * version 2 of the kexec param format adds the phys cpuid of  		 * booted proc.  		 */ -		if (be32_to_cpu(initial_boot_params->version) >= 2) { +		if (fdt_version(initial_boot_params) >= 2) {  			if (be32_to_cpu(intserv[i]) == -			    be32_to_cpu(initial_boot_params->boot_cpuid_phys)) { +			    fdt_boot_cpuid_phys(initial_boot_params)) {  				found = boot_cpu_count;  				found_thread = i;  			} @@ -346,52 +347,52 @@ static int __init early_init_dt_scan_cpus(unsigned long node,  #endif  	} -	if (found >= 0) { -		DBG("boot cpu: logical %d physical %d\n", found, -			be32_to_cpu(intserv[found_thread])); -		boot_cpuid = found; -		set_hard_smp_processor_id(found, -			be32_to_cpu(intserv[found_thread])); +	/* Not the boot CPU */ +	if (found < 0) +		return 0; -		/* -		 * PAPR defines "logical" PVR values for cpus that -		 * meet various levels of the architecture: -		 * 0x0f000001	Architecture version 2.04 -		 * 0x0f000002	Architecture version 2.05 -		 * If the cpu-version property in the cpu node contains -		 * such a value, we call identify_cpu again with the -		 * logical PVR value in order to use the cpu feature -		 * bits appropriate for the architecture level. -		 * -		 * A POWER6 partition in "POWER6 architected" mode -		 * uses the 0x0f000002 PVR value; in POWER5+ mode -		 * it uses 0x0f000001. -		 */ -		prop = of_get_flat_dt_prop(node, "cpu-version", NULL); -		if (prop && (be32_to_cpup(prop) & 0xff000000) == 0x0f000000) -			identify_cpu(0, be32_to_cpup(prop)); +	DBG("boot cpu: logical %d physical %d\n", found, +	    be32_to_cpu(intserv[found_thread])); +	boot_cpuid = found; +	set_hard_smp_processor_id(found, be32_to_cpu(intserv[found_thread])); -		identical_pvr_fixup(node); -	} +	/* +	 * PAPR defines "logical" PVR values for cpus that +	 * meet various levels of the architecture: +	 * 0x0f000001	Architecture version 2.04 +	 * 0x0f000002	Architecture version 2.05 +	 * If the cpu-version property in the cpu node contains +	 * such a value, we call identify_cpu again with the +	 * logical PVR value in order to use the cpu feature +	 * bits appropriate for the architecture level. +	 * +	 * A POWER6 partition in "POWER6 architected" mode +	 * uses the 0x0f000002 PVR value; in POWER5+ mode +	 * it uses 0x0f000001. +	 */ +	prop = of_get_flat_dt_prop(node, "cpu-version", NULL); +	if (prop && (be32_to_cpup(prop) & 0xff000000) == 0x0f000000) +		identify_cpu(0, be32_to_cpup(prop)); + +	identical_pvr_fixup(node);  	check_cpu_feature_properties(node);  	check_cpu_pa_features(node);  	check_cpu_slb_size(node); -#ifdef CONFIG_PPC_PSERIES +#ifdef CONFIG_PPC64  	if (nthreads > 1)  		cur_cpu_spec->cpu_features |= CPU_FTR_SMT;  	else  		cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT;  #endif -  	return 0;  }  int __init early_init_dt_scan_chosen_ppc(unsigned long node, const char *uname,  					 int depth, void *data)  { -	unsigned long *lprop; /* All these set by kernel, so no need to convert endian */ +	const unsigned long *lprop; /* All these set by kernel, so no need to convert endian */  	/* Use common scan routine to determine if this is the chosen node */  	if (early_init_dt_scan_chosen(node, uname, depth, data) == 0) @@ -442,8 +443,9 @@ int __init early_init_dt_scan_chosen_ppc(unsigned long node, const char *uname,   */  static int __init early_init_dt_scan_drconf_memory(unsigned long node)  { -	__be32 *dm, *ls, *usm; -	unsigned long l, n, flags; +	const __be32 *dm, *ls, *usm; +	int l; +	unsigned long n, flags;  	u64 base, size, memblock_size;  	unsigned int is_kexec_kdump = 0, rngs; @@ -523,6 +525,20 @@ static int __init early_init_dt_scan_memory_ppc(unsigned long node,  	return early_init_dt_scan_memory(node, uname, depth, data);  } +/* + * For a relocatable kernel, we need to get the memstart_addr first, + * then use it to calculate the virtual kernel start address. This has + * to happen at a very early stage (before machine_init). In this case, + * we just want to get the memstart_address and would not like to mess the + * memblock at this stage. So introduce a variable to skip the memblock_add() + * for this reason. + */ +#ifdef CONFIG_RELOCATABLE +static int add_mem_to_memblock = 1; +#else +#define add_mem_to_memblock 1 +#endif +  void __init early_init_dt_add_memory_arch(u64 base, u64 size)  {  #ifdef CONFIG_PPC64 @@ -543,23 +559,18 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size)  	}  	/* Add the chunk to the MEMBLOCK list */ -	memblock_add(base, size); -} - -#ifdef CONFIG_BLK_DEV_INITRD -void __init early_init_dt_setup_initrd_arch(u64 start, u64 end) -{ -	initrd_start = (unsigned long)__va(start); -	initrd_end = (unsigned long)__va(end); -	initrd_below_start_ok = 1; +	if (add_mem_to_memblock) +		memblock_add(base, size);  } -#endif  static void __init early_reserve_mem_dt(void)  { -	unsigned long i, len, dt_root; +	unsigned long i, dt_root; +	int len;  	const __be32 *prop; +	early_init_fdt_scan_reserved_mem(); +  	dt_root = of_get_flat_dt_root();  	prop = of_get_flat_dt_prop(dt_root, "reserved-ranges", &len); @@ -586,18 +597,10 @@ static void __init early_reserve_mem_dt(void)  static void __init early_reserve_mem(void)  { -	u64 base, size;  	__be64 *reserve_map; -	unsigned long self_base; -	unsigned long self_size;  	reserve_map = (__be64 *)(((unsigned long)initial_boot_params) + -			be32_to_cpu(initial_boot_params->off_mem_rsvmap)); - -	/* before we do anything, lets reserve the dt blob */ -	self_base = __pa((unsigned long)initial_boot_params); -	self_size = be32_to_cpu(initial_boot_params->totalsize); -	memblock_reserve(self_base, self_size); +			fdt_off_mem_rsvmap(initial_boot_params));  	/* Look for the new "reserved-regions" property in the DT */  	early_reserve_mem_dt(); @@ -627,26 +630,12 @@ static void __init early_reserve_mem(void)  			size_32 = be32_to_cpup(reserve_map_32++);  			if (size_32 == 0)  				break; -			/* skip if the reservation is for the blob */ -			if (base_32 == self_base && size_32 == self_size) -				continue;  			DBG("reserving: %x -> %x\n", base_32, size_32);  			memblock_reserve(base_32, size_32);  		}  		return;  	}  #endif -	DBG("Processing reserve map\n"); - -	/* Handle the reserve map in the fdt blob if it exists */ -	while (1) { -		base = be64_to_cpup(reserve_map++); -		size = be64_to_cpup(reserve_map++); -		if (size == 0) -			break; -		DBG("reserving: %llx -> %llx\n", base, size); -		memblock_reserve(base, size); -	}  }  void __init early_init_devtree(void *params) @@ -673,13 +662,6 @@ void __init early_init_devtree(void *params)  	of_scan_flat_dt(early_init_dt_scan_fw_dump, NULL);  #endif -	/* Pre-initialize the cmd_line with the content of boot_commmand_line, -	 * which will be empty except when the content of the variable has -	 * been overriden by a bootloading mechanism. This happens typically -	 * with HAL takeover -	 */ -	strlcpy(cmd_line, boot_command_line, COMMAND_LINE_SIZE); -  	/* Retrieve various informations from the /chosen node of the  	 * device-tree, including the platform type, initrd location and  	 * size, TCE reserve, and more ... @@ -738,6 +720,10 @@ void __init early_init_devtree(void *params)  	 * (altivec support, boot CPU ID, ...)  	 */  	of_scan_flat_dt(early_init_dt_scan_cpus, NULL); +	if (boot_cpuid < 0) { +		printk("Failed to indentify boot CPU !\n"); +		BUG(); +	}  #if defined(CONFIG_SMP) && defined(CONFIG_PPC64)  	/* We'll later wait for secondaries to check in; there are @@ -746,9 +732,38 @@ void __init early_init_devtree(void *params)  	spinning_secondaries = boot_cpu_count - 1;  #endif +#ifdef CONFIG_PPC_POWERNV +	/* Scan and build the list of machine check recoverable ranges */ +	of_scan_flat_dt(early_init_dt_scan_recoverable_ranges, NULL); +#endif +  	DBG(" <- early_init_devtree()\n");  } +#ifdef CONFIG_RELOCATABLE +/* + * This function run before early_init_devtree, so we have to init + * initial_boot_params. + */ +void __init early_get_first_memblock_info(void *params, phys_addr_t *size) +{ +	/* Setup flat device-tree pointer */ +	initial_boot_params = params; + +	/* +	 * Scan the memory nodes and set add_mem_to_memblock to 0 to avoid +	 * mess the memblock. +	 */ +	add_mem_to_memblock = 0; +	of_scan_flat_dt(early_init_dt_scan_root, NULL); +	of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL); +	add_mem_to_memblock = 1; + +	if (size) +		*size = first_memblock_size; +} +#endif +  /*******   *   * New implementation of the OF "find" APIs, return a refcounted @@ -761,37 +776,6 @@ void __init early_init_devtree(void *params)   *******/  /** - *	of_find_next_cache_node - Find a node's subsidiary cache - *	@np:	node of type "cpu" or "cache" - * - *	Returns a node pointer with refcount incremented, use - *	of_node_put() on it when done.  Caller should hold a reference - *	to np. - */ -struct device_node *of_find_next_cache_node(struct device_node *np) -{ -	struct device_node *child; -	const phandle *handle; - -	handle = of_get_property(np, "l2-cache", NULL); -	if (!handle) -		handle = of_get_property(np, "next-level-cache", NULL); - -	if (handle) -		return of_find_node_by_phandle(*handle); - -	/* OF on pmac has nodes instead of properties named "l2-cache" -	 * beneath CPU nodes. -	 */ -	if (!strcmp(np->type, "cpu")) -		for_each_child_of_node(np, child) -			if (!strcmp(child->type, "cache")) -				return child; - -	return NULL; -} - -/**   * of_get_ibm_chip_id - Returns the IBM "chip-id" of a device   * @np: device node of the device   * @@ -817,6 +801,26 @@ int of_get_ibm_chip_id(struct device_node *np)  	return -1;  } +/** + * cpu_to_chip_id - Return the cpus chip-id + * @cpu: The logical cpu number. + * + * Return the value of the ibm,chip-id property corresponding to the given + * logical cpu number. If the chip-id can not be found, returns -1. + */ +int cpu_to_chip_id(int cpu) +{ +	struct device_node *np; + +	np = of_get_cpu_node(cpu, NULL); +	if (!np) +		return -1; + +	of_node_put(np); +	return of_get_ibm_chip_id(np); +} +EXPORT_SYMBOL(cpu_to_chip_id); +  #ifdef CONFIG_PPC_PSERIES  /*   * Fix up the uninitialized fields in a new device node: @@ -891,23 +895,3 @@ bool arch_match_cpu_phys_id(int cpu, u64 phys_id)  {  	return (int)phys_id == get_hard_smp_processor_id(cpu);  } - -#if defined(CONFIG_DEBUG_FS) && defined(DEBUG) -static struct debugfs_blob_wrapper flat_dt_blob; - -static int __init export_flat_device_tree(void) -{ -	struct dentry *d; - -	flat_dt_blob.data = initial_boot_params; -	flat_dt_blob.size = be32_to_cpu(initial_boot_params->totalsize); - -	d = debugfs_create_blob("flat-device-tree", S_IFREG | S_IRUSR, -				powerpc_debugfs_root, &flat_dt_blob); -	if (!d) -		return 1; - -	return 0; -} -__initcall(export_flat_device_tree); -#endif diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 5fe2842e8ba..1a85d8f9673 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -858,7 +858,8 @@ static void __init prom_send_capabilities(void)  {  	ihandle root;  	prom_arg_t ret; -	__be32 *cores; +	u32 cores; +	unsigned char *ptcores;  	root = call_prom("open", 1, 1, ADDR("/"));  	if (root != 0) { @@ -868,15 +869,30 @@ static void __init prom_send_capabilities(void)  		 * (we assume this is the same for all cores) and use it to  		 * divide NR_CPUS.  		 */ -		cores = (__be32 *)&ibm_architecture_vec[IBM_ARCH_VEC_NRCORES_OFFSET]; -		if (be32_to_cpup(cores) != NR_CPUS) { + +		/* The core value may start at an odd address. If such a word +		 * access is made at a cache line boundary, this leads to an +		 * exception which may not be handled at this time. +		 * Forcing a per byte access to avoid exception. +		 */ +		ptcores = &ibm_architecture_vec[IBM_ARCH_VEC_NRCORES_OFFSET]; +		cores = 0; +		cores |= ptcores[0] << 24; +		cores |= ptcores[1] << 16; +		cores |= ptcores[2] << 8; +		cores |= ptcores[3]; +		if (cores != NR_CPUS) {  			prom_printf("WARNING ! "  				    "ibm_architecture_vec structure inconsistent: %lu!\n", -				    be32_to_cpup(cores)); +				    cores);  		} else { -			*cores = cpu_to_be32(DIV_ROUND_UP(NR_CPUS, prom_count_smt_threads())); +			cores = DIV_ROUND_UP(NR_CPUS, prom_count_smt_threads());  			prom_printf("Max number of cores passed to firmware: %lu (NR_CPUS = %lu)\n", -				    be32_to_cpup(cores), NR_CPUS); +				    cores, NR_CPUS); +			ptcores[0] = (cores >> 24) & 0xff; +			ptcores[1] = (cores >> 16) & 0xff; +			ptcores[2] = (cores >> 8) & 0xff; +			ptcores[3] = cores & 0xff;  		}  		/* try calling the ibm,client-architecture-support method */ @@ -1252,201 +1268,6 @@ static u64 __initdata prom_opal_base;  static u64 __initdata prom_opal_entry;  #endif -#ifdef __BIG_ENDIAN__ -/* XXX Don't change this structure without updating opal-takeover.S */ -static struct opal_secondary_data { -	s64				ack;	/*  0 */ -	u64				go;	/*  8 */ -	struct opal_takeover_args	args;	/* 16 */ -} opal_secondary_data; - -static u64 __initdata prom_opal_align; -static u64 __initdata prom_opal_size; -static int __initdata prom_rtas_start_cpu; -static u64 __initdata prom_rtas_data; -static u64 __initdata prom_rtas_entry; - -extern char opal_secondary_entry; - -static void __init prom_query_opal(void) -{ -	long rc; - -	/* We must not query for OPAL presence on a machine that -	 * supports TNK takeover (970 blades), as this uses the same -	 * h-call with different arguments and will crash -	 */ -	if (PHANDLE_VALID(call_prom("finddevice", 1, 1, -				    ADDR("/tnk-memory-map")))) { -		prom_printf("TNK takeover detected, skipping OPAL check\n"); -		return; -	} - -	prom_printf("Querying for OPAL presence... "); - -	rc = opal_query_takeover(&prom_opal_size, -				 &prom_opal_align); -	prom_debug("(rc = %ld) ", rc); -	if (rc != 0) { -		prom_printf("not there.\n"); -		return; -	} -	of_platform = PLATFORM_OPAL; -	prom_printf(" there !\n"); -	prom_debug("  opal_size  = 0x%lx\n", prom_opal_size); -	prom_debug("  opal_align = 0x%lx\n", prom_opal_align); -	if (prom_opal_align < 0x10000) -		prom_opal_align = 0x10000; -} - -static int __init prom_rtas_call(int token, int nargs, int nret, -				 int *outputs, ...) -{ -	struct rtas_args rtas_args; -	va_list list; -	int i; - -	rtas_args.token = token; -	rtas_args.nargs = nargs; -	rtas_args.nret  = nret; -	rtas_args.rets  = (rtas_arg_t *)&(rtas_args.args[nargs]); -	va_start(list, outputs); -	for (i = 0; i < nargs; ++i) -		rtas_args.args[i] = va_arg(list, rtas_arg_t); -	va_end(list); - -	for (i = 0; i < nret; ++i) -		rtas_args.rets[i] = 0; - -	opal_enter_rtas(&rtas_args, prom_rtas_data, -			prom_rtas_entry); - -	if (nret > 1 && outputs != NULL) -		for (i = 0; i < nret-1; ++i) -			outputs[i] = rtas_args.rets[i+1]; -	return (nret > 0)? rtas_args.rets[0]: 0; -} - -static void __init prom_opal_hold_cpus(void) -{ -	int i, cnt, cpu, rc; -	long j; -	phandle node; -	char type[64]; -	u32 servers[8]; -	void *entry = (unsigned long *)&opal_secondary_entry; -	struct opal_secondary_data *data = &opal_secondary_data; - -	prom_debug("prom_opal_hold_cpus: start...\n"); -	prom_debug("    - entry       = 0x%x\n", entry); -	prom_debug("    - data        = 0x%x\n", data); - -	data->ack = -1; -	data->go = 0; - -	/* look for cpus */ -	for (node = 0; prom_next_node(&node); ) { -		type[0] = 0; -		prom_getprop(node, "device_type", type, sizeof(type)); -		if (strcmp(type, "cpu") != 0) -			continue; - -		/* Skip non-configured cpus. */ -		if (prom_getprop(node, "status", type, sizeof(type)) > 0) -			if (strcmp(type, "okay") != 0) -				continue; - -		cnt = prom_getprop(node, "ibm,ppc-interrupt-server#s", servers, -			     sizeof(servers)); -		if (cnt == PROM_ERROR) -			break; -		cnt >>= 2; -		for (i = 0; i < cnt; i++) { -			cpu = servers[i]; -			prom_debug("CPU %d ... ", cpu); -			if (cpu == prom.cpu) { -				prom_debug("booted !\n"); -				continue; -			} -			prom_debug("starting ... "); - -			/* Init the acknowledge var which will be reset by -			 * the secondary cpu when it awakens from its OF -			 * spinloop. -			 */ -			data->ack = -1; -			rc = prom_rtas_call(prom_rtas_start_cpu, 3, 1, -					    NULL, cpu, entry, data); -			prom_debug("rtas rc=%d ...", rc); - -			for (j = 0; j < 100000000 && data->ack == -1; j++) { -				HMT_low(); -				mb(); -			} -			HMT_medium(); -			if (data->ack != -1) -				prom_debug("done, PIR=0x%x\n", data->ack); -			else -				prom_debug("timeout !\n"); -		} -	} -	prom_debug("prom_opal_hold_cpus: end...\n"); -} - -static void __init prom_opal_takeover(void) -{ -	struct opal_secondary_data *data = &opal_secondary_data; -	struct opal_takeover_args *args = &data->args; -	u64 align = prom_opal_align; -	u64 top_addr, opal_addr; - -	args->k_image	= (u64)_stext; -	args->k_size	= _end - _stext; -	args->k_entry	= 0; -	args->k_entry2	= 0x60; - -	top_addr = _ALIGN_UP(args->k_size, align); - -	if (prom_initrd_start != 0) { -		args->rd_image = prom_initrd_start; -		args->rd_size = prom_initrd_end - args->rd_image; -		args->rd_loc = top_addr; -		top_addr = _ALIGN_UP(args->rd_loc + args->rd_size, align); -	} - -	/* Pickup an address for the HAL. We want to go really high -	 * up to avoid problem with future kexecs. On the other hand -	 * we don't want to be all over the TCEs on P5IOC2 machines -	 * which are going to be up there too. We assume the machine -	 * has plenty of memory, and we ask for the HAL for now to -	 * be just below the 1G point, or above the initrd -	 */ -	opal_addr = _ALIGN_DOWN(0x40000000 - prom_opal_size, align); -	if (opal_addr < top_addr) -		opal_addr = top_addr; -	args->hal_addr = opal_addr; - -	/* Copy the command line to the kernel image */ -	strlcpy(boot_command_line, prom_cmd_line, -		COMMAND_LINE_SIZE); - -	prom_debug("  k_image    = 0x%lx\n", args->k_image); -	prom_debug("  k_size     = 0x%lx\n", args->k_size); -	prom_debug("  k_entry    = 0x%lx\n", args->k_entry); -	prom_debug("  k_entry2   = 0x%lx\n", args->k_entry2); -	prom_debug("  hal_addr   = 0x%lx\n", args->hal_addr); -	prom_debug("  rd_image   = 0x%lx\n", args->rd_image); -	prom_debug("  rd_size    = 0x%lx\n", args->rd_size); -	prom_debug("  rd_loc     = 0x%lx\n", args->rd_loc); -	prom_printf("Performing OPAL takeover,this can take a few minutes..\n"); -	prom_close_stdin(); -	mb(); -	data->go = 1; -	for (;;) -		opal_do_takeover(args); -} -#endif /* __BIG_ENDIAN__ */ -  /*   * Allocate room for and instantiate OPAL   */ @@ -1581,12 +1402,6 @@ static void __init prom_instantiate_rtas(void)  			 &val, sizeof(val)) != PROM_ERROR)  		rtas_has_query_cpu_stopped = true; -#if defined(CONFIG_PPC_POWERNV) && defined(__BIG_ENDIAN__) -	/* PowerVN takeover hack */ -	prom_rtas_data = base; -	prom_rtas_entry = entry; -	prom_getprop(rtas_node, "start-cpu", &prom_rtas_start_cpu, 4); -#endif  	prom_debug("rtas base     = 0x%x\n", base);  	prom_debug("rtas entry    = 0x%x\n", entry);  	prom_debug("rtas size     = 0x%x\n", (long)size); @@ -1970,19 +1785,23 @@ static void __init prom_init_stdout(void)  	/* Get the full OF pathname of the stdout device */  	memset(path, 0, 256);  	call_prom("instance-to-path", 3, 1, prom.stdout, path, 255); -	stdout_node = call_prom("instance-to-package", 1, 1, prom.stdout); -	val = cpu_to_be32(stdout_node); -	prom_setprop(prom.chosen, "/chosen", "linux,stdout-package", -		     &val, sizeof(val));  	prom_printf("OF stdout device is: %s\n", of_stdout_device);  	prom_setprop(prom.chosen, "/chosen", "linux,stdout-path",  		     path, strlen(path) + 1); -	/* If it's a display, note it */ -	memset(type, 0, sizeof(type)); -	prom_getprop(stdout_node, "device_type", type, sizeof(type)); -	if (strcmp(type, "display") == 0) -		prom_setprop(stdout_node, path, "linux,boot-display", NULL, 0); +	/* instance-to-package fails on PA-Semi */ +	stdout_node = call_prom("instance-to-package", 1, 1, prom.stdout); +	if (stdout_node != PROM_ERROR) { +		val = cpu_to_be32(stdout_node); +		prom_setprop(prom.chosen, "/chosen", "linux,stdout-package", +			     &val, sizeof(val)); + +		/* If it's a display, note it */ +		memset(type, 0, sizeof(type)); +		prom_getprop(stdout_node, "device_type", type, sizeof(type)); +		if (strcmp(type, "display") == 0) +			prom_setprop(stdout_node, path, "linux,boot-display", NULL, 0); +	}  }  static int __init prom_find_machine_type(void) @@ -3007,16 +2826,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,  		prom_instantiate_rtas();  #ifdef CONFIG_PPC_POWERNV -#ifdef __BIG_ENDIAN__ -	/* Detect HAL and try instanciating it & doing takeover */ -	if (of_platform == PLATFORM_PSERIES_LPAR) { -		prom_query_opal(); -		if (of_platform == PLATFORM_OPAL) { -			prom_opal_hold_cpus(); -			prom_opal_takeover(); -		} -	} else -#endif /* __BIG_ENDIAN__ */  	if (of_platform == PLATFORM_OPAL)  		prom_instantiate_opal();  #endif /* CONFIG_PPC_POWERNV */ diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh index b0c263da219..fe8e54b9ef7 100644 --- a/arch/powerpc/kernel/prom_init_check.sh +++ b/arch/powerpc/kernel/prom_init_check.sh @@ -21,9 +21,7 @@ _end enter_prom memcpy memset reloc_offset __secondary_hold  __secondary_hold_acknowledge __secondary_hold_spinloop __start  strcmp strcpy strlcpy strlen strncmp strstr logo_linux_clut224  reloc_got2 kernstart_addr memstart_addr linux_banner _stext -opal_query_takeover opal_do_takeover opal_enter_rtas opal_secondary_entry -boot_command_line __prom_init_toc_start __prom_init_toc_end -btext_setup_display" +__prom_init_toc_start __prom_init_toc_end btext_setup_display TOC."  NM="$1"  OBJ="$2" diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 9a0d24c390a..2e3d2bf536c 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -362,7 +362,7 @@ static int fpr_get(struct task_struct *target, const struct user_regset *regset,  		   void *kbuf, void __user *ubuf)  {  #ifdef CONFIG_VSX -	double buf[33]; +	u64 buf[33];  	int i;  #endif  	flush_fp_to_thread(target); @@ -371,15 +371,15 @@ static int fpr_get(struct task_struct *target, const struct user_regset *regset,  	/* copy to local buffer then write that out */  	for (i = 0; i < 32 ; i++)  		buf[i] = target->thread.TS_FPR(i); -	memcpy(&buf[32], &target->thread.fpscr, sizeof(double)); +	buf[32] = target->thread.fp_state.fpscr;  	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1);  #else -	BUILD_BUG_ON(offsetof(struct thread_struct, fpscr) != -		     offsetof(struct thread_struct, TS_FPR(32))); +	BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) != +		     offsetof(struct thread_fp_state, fpr[32][0]));  	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, -				   &target->thread.fpr, 0, -1); +				   &target->thread.fp_state, 0, -1);  #endif  } @@ -388,7 +388,7 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset,  		   const void *kbuf, const void __user *ubuf)  {  #ifdef CONFIG_VSX -	double buf[33]; +	u64 buf[33];  	int i;  #endif  	flush_fp_to_thread(target); @@ -400,14 +400,14 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset,  		return i;  	for (i = 0; i < 32 ; i++)  		target->thread.TS_FPR(i) = buf[i]; -	memcpy(&target->thread.fpscr, &buf[32], sizeof(double)); +	target->thread.fp_state.fpscr = buf[32];  	return 0;  #else -	BUILD_BUG_ON(offsetof(struct thread_struct, fpscr) != -		     offsetof(struct thread_struct, TS_FPR(32))); +	BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) != +		     offsetof(struct thread_fp_state, fpr[32][0]));  	return user_regset_copyin(&pos, &count, &kbuf, &ubuf, -				  &target->thread.fpr, 0, -1); +				  &target->thread.fp_state, 0, -1);  #endif  } @@ -440,11 +440,11 @@ static int vr_get(struct task_struct *target, const struct user_regset *regset,  	flush_altivec_to_thread(target); -	BUILD_BUG_ON(offsetof(struct thread_struct, vscr) != -		     offsetof(struct thread_struct, vr[32])); +	BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) != +		     offsetof(struct thread_vr_state, vr[32]));  	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, -				  &target->thread.vr, 0, +				  &target->thread.vr_state, 0,  				  33 * sizeof(vector128));  	if (!ret) {  		/* @@ -471,11 +471,12 @@ static int vr_set(struct task_struct *target, const struct user_regset *regset,  	flush_altivec_to_thread(target); -	BUILD_BUG_ON(offsetof(struct thread_struct, vscr) != -		     offsetof(struct thread_struct, vr[32])); +	BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) != +		     offsetof(struct thread_vr_state, vr[32]));  	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, -				 &target->thread.vr, 0, 33 * sizeof(vector128)); +				 &target->thread.vr_state, 0, +				 33 * sizeof(vector128));  	if (!ret && count > 0) {  		/*  		 * We use only the first word of vrsave. @@ -514,13 +515,13 @@ static int vsr_get(struct task_struct *target, const struct user_regset *regset,  		   unsigned int pos, unsigned int count,  		   void *kbuf, void __user *ubuf)  { -	double buf[32]; +	u64 buf[32];  	int ret, i;  	flush_vsx_to_thread(target);  	for (i = 0; i < 32 ; i++) -		buf[i] = target->thread.fpr[i][TS_VSRLOWOFFSET]; +		buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET];  	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,  				  buf, 0, 32 * sizeof(double)); @@ -531,7 +532,7 @@ static int vsr_set(struct task_struct *target, const struct user_regset *regset,  		   unsigned int pos, unsigned int count,  		   const void *kbuf, const void __user *ubuf)  { -	double buf[32]; +	u64 buf[32];  	int ret,i;  	flush_vsx_to_thread(target); @@ -539,7 +540,7 @@ static int vsr_set(struct task_struct *target, const struct user_regset *regset,  	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,  				 buf, 0, 32 * sizeof(double));  	for (i = 0; i < 32 ; i++) -		target->thread.fpr[i][TS_VSRLOWOFFSET] = buf[i]; +		target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];  	return ret; @@ -657,7 +658,7 @@ static const struct user_regset native_regsets[] = {  #endif  #ifdef CONFIG_SPE  	[REGSET_SPE] = { -		.n = 35, +		.core_note_type = NT_PPC_SPE, .n = 35,  		.size = sizeof(u32), .align = sizeof(u32),  		.active = evr_active, .get = evr_get, .set = evr_set  	}, @@ -854,8 +855,8 @@ void user_enable_single_step(struct task_struct *task)  	if (regs != NULL) {  #ifdef CONFIG_PPC_ADV_DEBUG_REGS -		task->thread.dbcr0 &= ~DBCR0_BT; -		task->thread.dbcr0 |= DBCR0_IDM | DBCR0_IC; +		task->thread.debug.dbcr0 &= ~DBCR0_BT; +		task->thread.debug.dbcr0 |= DBCR0_IDM | DBCR0_IC;  		regs->msr |= MSR_DE;  #else  		regs->msr &= ~MSR_BE; @@ -871,8 +872,8 @@ void user_enable_block_step(struct task_struct *task)  	if (regs != NULL) {  #ifdef CONFIG_PPC_ADV_DEBUG_REGS -		task->thread.dbcr0 &= ~DBCR0_IC; -		task->thread.dbcr0 = DBCR0_IDM | DBCR0_BT; +		task->thread.debug.dbcr0 &= ~DBCR0_IC; +		task->thread.debug.dbcr0 = DBCR0_IDM | DBCR0_BT;  		regs->msr |= MSR_DE;  #else  		regs->msr &= ~MSR_SE; @@ -894,16 +895,16 @@ void user_disable_single_step(struct task_struct *task)  		 * And, after doing so, if all debug flags are off, turn  		 * off DBCR0(IDM) and MSR(DE) .... Torez  		 */ -		task->thread.dbcr0 &= ~DBCR0_IC; +		task->thread.debug.dbcr0 &= ~(DBCR0_IC|DBCR0_BT);  		/*  		 * Test to see if any of the DBCR_ACTIVE_EVENTS bits are set.  		 */ -		if (!DBCR_ACTIVE_EVENTS(task->thread.dbcr0, -					task->thread.dbcr1)) { +		if (!DBCR_ACTIVE_EVENTS(task->thread.debug.dbcr0, +					task->thread.debug.dbcr1)) {  			/*  			 * All debug events were off.....  			 */ -			task->thread.dbcr0 &= ~DBCR0_IDM; +			task->thread.debug.dbcr0 &= ~DBCR0_IDM;  			regs->msr &= ~MSR_DE;  		}  #else @@ -1022,14 +1023,14 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,  	 */  	/* DAC's hold the whole address without any mode flags */ -	task->thread.dac1 = data & ~0x3UL; +	task->thread.debug.dac1 = data & ~0x3UL; -	if (task->thread.dac1 == 0) { +	if (task->thread.debug.dac1 == 0) {  		dbcr_dac(task) &= ~(DBCR_DAC1R | DBCR_DAC1W); -		if (!DBCR_ACTIVE_EVENTS(task->thread.dbcr0, -					task->thread.dbcr1)) { +		if (!DBCR_ACTIVE_EVENTS(task->thread.debug.dbcr0, +					task->thread.debug.dbcr1)) {  			task->thread.regs->msr &= ~MSR_DE; -			task->thread.dbcr0 &= ~DBCR0_IDM; +			task->thread.debug.dbcr0 &= ~DBCR0_IDM;  		}  		return 0;  	} @@ -1041,7 +1042,7 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,  	/* Set the Internal Debugging flag (IDM bit 1) for the DBCR0  	   register */ -	task->thread.dbcr0 |= DBCR0_IDM; +	task->thread.debug.dbcr0 |= DBCR0_IDM;  	/* Check for write and read flags and set DBCR0  	   accordingly */ @@ -1071,10 +1072,10 @@ static long set_instruction_bp(struct task_struct *child,  			      struct ppc_hw_breakpoint *bp_info)  {  	int slot; -	int slot1_in_use = ((child->thread.dbcr0 & DBCR0_IAC1) != 0); -	int slot2_in_use = ((child->thread.dbcr0 & DBCR0_IAC2) != 0); -	int slot3_in_use = ((child->thread.dbcr0 & DBCR0_IAC3) != 0); -	int slot4_in_use = ((child->thread.dbcr0 & DBCR0_IAC4) != 0); +	int slot1_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC1) != 0); +	int slot2_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC2) != 0); +	int slot3_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC3) != 0); +	int slot4_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC4) != 0);  	if (dbcr_iac_range(child) & DBCR_IAC12MODE)  		slot2_in_use = 1; @@ -1093,9 +1094,9 @@ static long set_instruction_bp(struct task_struct *child,  		/* We need a pair of IAC regsisters */  		if ((!slot1_in_use) && (!slot2_in_use)) {  			slot = 1; -			child->thread.iac1 = bp_info->addr; -			child->thread.iac2 = bp_info->addr2; -			child->thread.dbcr0 |= DBCR0_IAC1; +			child->thread.debug.iac1 = bp_info->addr; +			child->thread.debug.iac2 = bp_info->addr2; +			child->thread.debug.dbcr0 |= DBCR0_IAC1;  			if (bp_info->addr_mode ==  					PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE)  				dbcr_iac_range(child) |= DBCR_IAC12X; @@ -1104,9 +1105,9 @@ static long set_instruction_bp(struct task_struct *child,  #if CONFIG_PPC_ADV_DEBUG_IACS > 2  		} else if ((!slot3_in_use) && (!slot4_in_use)) {  			slot = 3; -			child->thread.iac3 = bp_info->addr; -			child->thread.iac4 = bp_info->addr2; -			child->thread.dbcr0 |= DBCR0_IAC3; +			child->thread.debug.iac3 = bp_info->addr; +			child->thread.debug.iac4 = bp_info->addr2; +			child->thread.debug.dbcr0 |= DBCR0_IAC3;  			if (bp_info->addr_mode ==  					PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE)  				dbcr_iac_range(child) |= DBCR_IAC34X; @@ -1126,30 +1127,30 @@ static long set_instruction_bp(struct task_struct *child,  			 */  			if (slot2_in_use || (slot3_in_use == slot4_in_use)) {  				slot = 1; -				child->thread.iac1 = bp_info->addr; -				child->thread.dbcr0 |= DBCR0_IAC1; +				child->thread.debug.iac1 = bp_info->addr; +				child->thread.debug.dbcr0 |= DBCR0_IAC1;  				goto out;  			}  		}  		if (!slot2_in_use) {  			slot = 2; -			child->thread.iac2 = bp_info->addr; -			child->thread.dbcr0 |= DBCR0_IAC2; +			child->thread.debug.iac2 = bp_info->addr; +			child->thread.debug.dbcr0 |= DBCR0_IAC2;  #if CONFIG_PPC_ADV_DEBUG_IACS > 2  		} else if (!slot3_in_use) {  			slot = 3; -			child->thread.iac3 = bp_info->addr; -			child->thread.dbcr0 |= DBCR0_IAC3; +			child->thread.debug.iac3 = bp_info->addr; +			child->thread.debug.dbcr0 |= DBCR0_IAC3;  		} else if (!slot4_in_use) {  			slot = 4; -			child->thread.iac4 = bp_info->addr; -			child->thread.dbcr0 |= DBCR0_IAC4; +			child->thread.debug.iac4 = bp_info->addr; +			child->thread.debug.dbcr0 |= DBCR0_IAC4;  #endif  		} else  			return -ENOSPC;  	}  out: -	child->thread.dbcr0 |= DBCR0_IDM; +	child->thread.debug.dbcr0 |= DBCR0_IDM;  	child->thread.regs->msr |= MSR_DE;  	return slot; @@ -1159,49 +1160,49 @@ static int del_instruction_bp(struct task_struct *child, int slot)  {  	switch (slot) {  	case 1: -		if ((child->thread.dbcr0 & DBCR0_IAC1) == 0) +		if ((child->thread.debug.dbcr0 & DBCR0_IAC1) == 0)  			return -ENOENT;  		if (dbcr_iac_range(child) & DBCR_IAC12MODE) {  			/* address range - clear slots 1 & 2 */ -			child->thread.iac2 = 0; +			child->thread.debug.iac2 = 0;  			dbcr_iac_range(child) &= ~DBCR_IAC12MODE;  		} -		child->thread.iac1 = 0; -		child->thread.dbcr0 &= ~DBCR0_IAC1; +		child->thread.debug.iac1 = 0; +		child->thread.debug.dbcr0 &= ~DBCR0_IAC1;  		break;  	case 2: -		if ((child->thread.dbcr0 & DBCR0_IAC2) == 0) +		if ((child->thread.debug.dbcr0 & DBCR0_IAC2) == 0)  			return -ENOENT;  		if (dbcr_iac_range(child) & DBCR_IAC12MODE)  			/* used in a range */  			return -EINVAL; -		child->thread.iac2 = 0; -		child->thread.dbcr0 &= ~DBCR0_IAC2; +		child->thread.debug.iac2 = 0; +		child->thread.debug.dbcr0 &= ~DBCR0_IAC2;  		break;  #if CONFIG_PPC_ADV_DEBUG_IACS > 2  	case 3: -		if ((child->thread.dbcr0 & DBCR0_IAC3) == 0) +		if ((child->thread.debug.dbcr0 & DBCR0_IAC3) == 0)  			return -ENOENT;  		if (dbcr_iac_range(child) & DBCR_IAC34MODE) {  			/* address range - clear slots 3 & 4 */ -			child->thread.iac4 = 0; +			child->thread.debug.iac4 = 0;  			dbcr_iac_range(child) &= ~DBCR_IAC34MODE;  		} -		child->thread.iac3 = 0; -		child->thread.dbcr0 &= ~DBCR0_IAC3; +		child->thread.debug.iac3 = 0; +		child->thread.debug.dbcr0 &= ~DBCR0_IAC3;  		break;  	case 4: -		if ((child->thread.dbcr0 & DBCR0_IAC4) == 0) +		if ((child->thread.debug.dbcr0 & DBCR0_IAC4) == 0)  			return -ENOENT;  		if (dbcr_iac_range(child) & DBCR_IAC34MODE)  			/* Used in a range */  			return -EINVAL; -		child->thread.iac4 = 0; -		child->thread.dbcr0 &= ~DBCR0_IAC4; +		child->thread.debug.iac4 = 0; +		child->thread.debug.dbcr0 &= ~DBCR0_IAC4;  		break;  #endif  	default: @@ -1231,18 +1232,18 @@ static int set_dac(struct task_struct *child, struct ppc_hw_breakpoint *bp_info)  			dbcr_dac(child) |= DBCR_DAC1R;  		if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)  			dbcr_dac(child) |= DBCR_DAC1W; -		child->thread.dac1 = (unsigned long)bp_info->addr; +		child->thread.debug.dac1 = (unsigned long)bp_info->addr;  #if CONFIG_PPC_ADV_DEBUG_DVCS > 0  		if (byte_enable) { -			child->thread.dvc1 = +			child->thread.debug.dvc1 =  				(unsigned long)bp_info->condition_value; -			child->thread.dbcr2 |= +			child->thread.debug.dbcr2 |=  				((byte_enable << DBCR2_DVC1BE_SHIFT) |  				 (condition_mode << DBCR2_DVC1M_SHIFT));  		}  #endif  #ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE -	} else if (child->thread.dbcr2 & DBCR2_DAC12MODE) { +	} else if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) {  		/* Both dac1 and dac2 are part of a range */  		return -ENOSPC;  #endif @@ -1252,19 +1253,19 @@ static int set_dac(struct task_struct *child, struct ppc_hw_breakpoint *bp_info)  			dbcr_dac(child) |= DBCR_DAC2R;  		if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)  			dbcr_dac(child) |= DBCR_DAC2W; -		child->thread.dac2 = (unsigned long)bp_info->addr; +		child->thread.debug.dac2 = (unsigned long)bp_info->addr;  #if CONFIG_PPC_ADV_DEBUG_DVCS > 0  		if (byte_enable) { -			child->thread.dvc2 = +			child->thread.debug.dvc2 =  				(unsigned long)bp_info->condition_value; -			child->thread.dbcr2 |= +			child->thread.debug.dbcr2 |=  				((byte_enable << DBCR2_DVC2BE_SHIFT) |  				 (condition_mode << DBCR2_DVC2M_SHIFT));  		}  #endif  	} else  		return -ENOSPC; -	child->thread.dbcr0 |= DBCR0_IDM; +	child->thread.debug.dbcr0 |= DBCR0_IDM;  	child->thread.regs->msr |= MSR_DE;  	return slot + 4; @@ -1276,32 +1277,32 @@ static int del_dac(struct task_struct *child, int slot)  		if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0)  			return -ENOENT; -		child->thread.dac1 = 0; +		child->thread.debug.dac1 = 0;  		dbcr_dac(child) &= ~(DBCR_DAC1R | DBCR_DAC1W);  #ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE -		if (child->thread.dbcr2 & DBCR2_DAC12MODE) { -			child->thread.dac2 = 0; -			child->thread.dbcr2 &= ~DBCR2_DAC12MODE; +		if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) { +			child->thread.debug.dac2 = 0; +			child->thread.debug.dbcr2 &= ~DBCR2_DAC12MODE;  		} -		child->thread.dbcr2 &= ~(DBCR2_DVC1M | DBCR2_DVC1BE); +		child->thread.debug.dbcr2 &= ~(DBCR2_DVC1M | DBCR2_DVC1BE);  #endif  #if CONFIG_PPC_ADV_DEBUG_DVCS > 0 -		child->thread.dvc1 = 0; +		child->thread.debug.dvc1 = 0;  #endif  	} else if (slot == 2) {  		if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0)  			return -ENOENT;  #ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE -		if (child->thread.dbcr2 & DBCR2_DAC12MODE) +		if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE)  			/* Part of a range */  			return -EINVAL; -		child->thread.dbcr2 &= ~(DBCR2_DVC2M | DBCR2_DVC2BE); +		child->thread.debug.dbcr2 &= ~(DBCR2_DVC2M | DBCR2_DVC2BE);  #endif  #if CONFIG_PPC_ADV_DEBUG_DVCS > 0 -		child->thread.dvc2 = 0; +		child->thread.debug.dvc2 = 0;  #endif -		child->thread.dac2 = 0; +		child->thread.debug.dac2 = 0;  		dbcr_dac(child) &= ~(DBCR_DAC2R | DBCR_DAC2W);  	} else  		return -EINVAL; @@ -1343,22 +1344,22 @@ static int set_dac_range(struct task_struct *child,  			return -EIO;  	} -	if (child->thread.dbcr0 & +	if (child->thread.debug.dbcr0 &  	    (DBCR0_DAC1R | DBCR0_DAC1W | DBCR0_DAC2R | DBCR0_DAC2W))  		return -ENOSPC;  	if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) -		child->thread.dbcr0 |= (DBCR0_DAC1R | DBCR0_IDM); +		child->thread.debug.dbcr0 |= (DBCR0_DAC1R | DBCR0_IDM);  	if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) -		child->thread.dbcr0 |= (DBCR0_DAC1W | DBCR0_IDM); -	child->thread.dac1 = bp_info->addr; -	child->thread.dac2 = bp_info->addr2; +		child->thread.debug.dbcr0 |= (DBCR0_DAC1W | DBCR0_IDM); +	child->thread.debug.dac1 = bp_info->addr; +	child->thread.debug.dac2 = bp_info->addr2;  	if (mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE) -		child->thread.dbcr2  |= DBCR2_DAC12M; +		child->thread.debug.dbcr2  |= DBCR2_DAC12M;  	else if (mode == PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE) -		child->thread.dbcr2  |= DBCR2_DAC12MX; +		child->thread.debug.dbcr2  |= DBCR2_DAC12MX;  	else	/* PPC_BREAKPOINT_MODE_MASK */ -		child->thread.dbcr2  |= DBCR2_DAC12MM; +		child->thread.debug.dbcr2  |= DBCR2_DAC12MM;  	child->thread.regs->msr |= MSR_DE;  	return 5; @@ -1489,9 +1490,9 @@ static long ppc_del_hwdebug(struct task_struct *child, long data)  		rc = del_dac(child, (int)data - 4);  	if (!rc) { -		if (!DBCR_ACTIVE_EVENTS(child->thread.dbcr0, -					child->thread.dbcr1)) { -			child->thread.dbcr0 &= ~DBCR0_IDM; +		if (!DBCR_ACTIVE_EVENTS(child->thread.debug.dbcr0, +					child->thread.debug.dbcr1)) { +			child->thread.debug.dbcr0 &= ~DBCR0_IDM;  			child->thread.regs->msr &= ~MSR_DE;  		}  	} @@ -1554,10 +1555,10 @@ long arch_ptrace(struct task_struct *child, long request,  			flush_fp_to_thread(child);  			if (fpidx < (PT_FPSCR - PT_FPR0)) -				tmp = ((unsigned long *)child->thread.fpr) -					[fpidx * TS_FPRWIDTH]; +				memcpy(&tmp, &child->thread.TS_FPR(fpidx), +				       sizeof(long));  			else -				tmp = child->thread.fpscr.val; +				tmp = child->thread.fp_state.fpscr;  		}  		ret = put_user(tmp, datalp);  		break; @@ -1587,10 +1588,10 @@ long arch_ptrace(struct task_struct *child, long request,  			flush_fp_to_thread(child);  			if (fpidx < (PT_FPSCR - PT_FPR0)) -				((unsigned long *)child->thread.fpr) -					[fpidx * TS_FPRWIDTH] = data; +				memcpy(&child->thread.TS_FPR(fpidx), &data, +				       sizeof(long));  			else -				child->thread.fpscr.val = data; +				child->thread.fp_state.fpscr = data;  			ret = 0;  		}  		break; @@ -1669,7 +1670,7 @@ long arch_ptrace(struct task_struct *child, long request,  		if (addr > 0)  			break;  #ifdef CONFIG_PPC_ADV_DEBUG_REGS -		ret = put_user(child->thread.dac1, datalp); +		ret = put_user(child->thread.debug.dac1, datalp);  #else  		dabr_fake = ((child->thread.hw_brk.address & (~HW_BRK_TYPE_DABR)) |  			     (child->thread.hw_brk.type & HW_BRK_TYPE_DABR)); diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace32.c index f51599e941c..f52b7db327c 100644 --- a/arch/powerpc/kernel/ptrace32.c +++ b/arch/powerpc/kernel/ptrace32.c @@ -43,7 +43,6 @@  #define FPRNUMBER(i) (((i) - PT_FPR0) >> 1)  #define FPRHALF(i) (((i) - PT_FPR0) & 1)  #define FPRINDEX(i) TS_FPRWIDTH * FPRNUMBER(i) * 2 + FPRHALF(i) -#define FPRINDEX_3264(i) (TS_FPRWIDTH * ((i) - PT_FPR0))  long compat_arch_ptrace(struct task_struct *child, compat_long_t request,  			compat_ulong_t caddr, compat_ulong_t cdata) @@ -105,7 +104,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,  			 * to be an array of unsigned int (32 bits) - the  			 * index passed in is based on this assumption.  			 */ -			tmp = ((unsigned int *)child->thread.fpr) +			tmp = ((unsigned int *)child->thread.fp_state.fpr)  				[FPRINDEX(index)];  		}  		ret = put_user((unsigned int)tmp, (u32 __user *)data); @@ -147,8 +146,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,  		if (numReg >= PT_FPR0) {  			flush_fp_to_thread(child);  			/* get 64 bit FPR */ -			tmp = ((u64 *)child->thread.fpr) -				[FPRINDEX_3264(numReg)]; +			tmp = child->thread.fp_state.fpr[numReg - PT_FPR0][0];  		} else { /* register within PT_REGS struct */  			unsigned long tmp2;  			ret = ptrace_get_reg(child, numReg, &tmp2); @@ -207,7 +205,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,  			 * to be an array of unsigned int (32 bits) - the  			 * index passed in is based on this assumption.  			 */ -			((unsigned int *)child->thread.fpr) +			((unsigned int *)child->thread.fp_state.fpr)  				[FPRINDEX(index)] = data;  			ret = 0;  		} @@ -251,8 +249,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,  			u64 *tmp;  			flush_fp_to_thread(child);  			/* get 64 bit FPR ... */ -			tmp = &(((u64 *)child->thread.fpr) -				[FPRINDEX_3264(numReg)]); +			tmp = &child->thread.fp_state.fpr[numReg - PT_FPR0][0];  			/* ... write the 32 bit part we want */  			((u32 *)tmp)[index % 2] = data;  			ret = 0; @@ -269,7 +266,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,  		if (addr > 0)  			break;  #ifdef CONFIG_PPC_ADV_DEBUG_REGS -		ret = put_user(child->thread.dac1, (u32 __user *)data); +		ret = put_user(child->thread.debug.dac1, (u32 __user *)data);  #else  		dabr_fake = (  			(child->thread.hw_brk.address & (~HW_BRK_TYPE_DABR)) | diff --git a/arch/powerpc/kernel/reloc_64.S b/arch/powerpc/kernel/reloc_64.S index b47a0e1ab00..d88736fbece 100644 --- a/arch/powerpc/kernel/reloc_64.S +++ b/arch/powerpc/kernel/reloc_64.S @@ -69,8 +69,8 @@ _GLOBAL(relocate)  	 * R_PPC64_RELATIVE ones.  	 */  	mtctr	r8 -5:	lwz	r0,12(9)	/* ELF64_R_TYPE(reloc->r_info) */ -	cmpwi	r0,R_PPC64_RELATIVE +5:	ld	r0,8(9)		/* ELF64_R_TYPE(reloc->r_info) */ +	cmpdi	r0,R_PPC64_RELATIVE  	bne	6f  	ld	r6,0(r9)	/* reloc->r_offset */  	ld	r0,16(r9)	/* reloc->r_addend */ @@ -81,6 +81,7 @@ _GLOBAL(relocate)  6:	blr +.balign 8  p_dyn:	.llong	__dynamic_start - 0b  p_rela:	.llong	__rela_dyn_start - 0b  p_st:	.llong	_stext - 0b diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 4cf674d7d5a..8b4c857c142 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -993,32 +993,36 @@ struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log,  		(struct rtas_ext_event_log_v6 *)log->buffer;  	struct pseries_errorlog *sect;  	unsigned char *p, *log_end; +	uint32_t ext_log_length = rtas_error_extended_log_length(log); +	uint8_t log_format = rtas_ext_event_log_format(ext_log); +	uint32_t company_id = rtas_ext_event_company_id(ext_log);  	/* Check that we understand the format */ -	if (log->extended_log_length < sizeof(struct rtas_ext_event_log_v6) || -	    ext_log->log_format != RTAS_V6EXT_LOG_FORMAT_EVENT_LOG || -	    ext_log->company_id != RTAS_V6EXT_COMPANY_ID_IBM) +	if (ext_log_length < sizeof(struct rtas_ext_event_log_v6) || +	    log_format != RTAS_V6EXT_LOG_FORMAT_EVENT_LOG || +	    company_id != RTAS_V6EXT_COMPANY_ID_IBM)  		return NULL; -	log_end = log->buffer + log->extended_log_length; +	log_end = log->buffer + ext_log_length;  	p = ext_log->vendor_log;  	while (p < log_end) {  		sect = (struct pseries_errorlog *)p; -		if (sect->id == section_id) +		if (pseries_errorlog_id(sect) == section_id)  			return sect; -		p += sect->length; +		p += pseries_errorlog_length(sect);  	}  	return NULL;  } +/* We assume to be passed big endian arguments */  asmlinkage int ppc_rtas(struct rtas_args __user *uargs)  {  	struct rtas_args args;  	unsigned long flags;  	char *buff_copy, *errbuf = NULL; -	int nargs; +	int nargs, nret, token;  	int rc;  	if (!capable(CAP_SYS_ADMIN)) @@ -1027,10 +1031,13 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs)  	if (copy_from_user(&args, uargs, 3 * sizeof(u32)) != 0)  		return -EFAULT; -	nargs = args.nargs; +	nargs = be32_to_cpu(args.nargs); +	nret  = be32_to_cpu(args.nret); +	token = be32_to_cpu(args.token); +  	if (nargs > ARRAY_SIZE(args.args) -	    || args.nret > ARRAY_SIZE(args.args) -	    || nargs + args.nret > ARRAY_SIZE(args.args)) +	    || nret > ARRAY_SIZE(args.args) +	    || nargs + nret > ARRAY_SIZE(args.args))  		return -EINVAL;  	/* Copy in args. */ @@ -1038,14 +1045,14 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs)  			   nargs * sizeof(rtas_arg_t)) != 0)  		return -EFAULT; -	if (args.token == RTAS_UNKNOWN_SERVICE) +	if (token == RTAS_UNKNOWN_SERVICE)  		return -EINVAL;  	args.rets = &args.args[nargs]; -	memset(args.rets, 0, args.nret * sizeof(rtas_arg_t)); +	memset(args.rets, 0, nret * sizeof(rtas_arg_t));  	/* Need to handle ibm,suspend_me call specially */ -	if (args.token == ibm_suspend_me_token) { +	if (token == ibm_suspend_me_token) {  		rc = rtas_ibm_suspend_me(&args);  		if (rc)  			return rc; @@ -1062,7 +1069,7 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs)  	/* A -1 return code indicates that the last command couldn't  	   be completed due to a hardware error. */ -	if (args.rets[0] == -1) +	if (be32_to_cpu(args.rets[0]) == -1)  		errbuf = __fetch_rtas_last_error(buff_copy);  	unlock_rtas(flags); @@ -1077,7 +1084,7 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs)  	/* Copy out args. */  	if (copy_to_user(uargs->args + nargs,  			 args.args + nargs, -			 args.nret * sizeof(rtas_arg_t)) != 0) +			 nret * sizeof(rtas_arg_t)) != 0)  		return -EFAULT;  	return 0; @@ -1135,7 +1142,7 @@ void __init rtas_initialize(void)  int __init early_init_dt_scan_rtas(unsigned long node,  		const char *uname, int depth, void *data)  { -	u32 *basep, *entryp, *sizep; +	const u32 *basep, *entryp, *sizep;  	if (depth != 1 || strcmp(uname, "rtas") != 0)  		return 0; diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c index 2f3cdb01506..db2b482af65 100644 --- a/arch/powerpc/kernel/rtas_flash.c +++ b/arch/powerpc/kernel/rtas_flash.c @@ -611,17 +611,19 @@ static void rtas_flash_firmware(int reboot_type)  	for (f = flist; f; f = next) {  		/* Translate data addrs to absolute */  		for (i = 0; i < f->num_blocks; i++) { -			f->blocks[i].data = (char *)__pa(f->blocks[i].data); +			f->blocks[i].data = (char *)cpu_to_be64(__pa(f->blocks[i].data));  			image_size += f->blocks[i].length; +			f->blocks[i].length = cpu_to_be64(f->blocks[i].length);  		}  		next = f->next;  		/* Don't translate NULL pointer for last entry */  		if (f->next) -			f->next = (struct flash_block_list *)__pa(f->next); +			f->next = (struct flash_block_list *)cpu_to_be64(__pa(f->next));  		else  			f->next = NULL;  		/* make num_blocks into the version/length field */  		f->num_blocks = (FLASH_BLOCK_LIST_VERSION << 56) | ((f->num_blocks+1)*16); +		f->num_blocks = cpu_to_be64(f->num_blocks);  	}  	printk(KERN_ALERT "FLASH: flash image is %ld bytes\n", image_size); @@ -705,7 +707,7 @@ static int __init rtas_flash_init(void)  	if (rtas_token("ibm,update-flash-64-and-reboot") ==  		       RTAS_UNKNOWN_SERVICE) {  		pr_info("rtas_flash: no firmware flash support\n"); -		return 1; +		return -EINVAL;  	}  	rtas_validate_flash_data.buf = kzalloc(VALIDATE_BUF_SIZE, GFP_KERNEL); diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c index 6e7b7cdeec6..c168337aef9 100644 --- a/arch/powerpc/kernel/rtas_pci.c +++ b/arch/powerpc/kernel/rtas_pci.c @@ -80,10 +80,6 @@ int rtas_read_config(struct pci_dn *pdn, int where, int size, u32 *val)  	if (ret)  		return PCIBIOS_DEVICE_NOT_FOUND; -	if (returnval == EEH_IO_ERROR_VALUE(size) && -	    eeh_dev_check_failure(of_node_to_eeh_dev(pdn->node))) -		return PCIBIOS_DEVICE_NOT_FOUND; -  	return PCIBIOS_SUCCESSFUL;  } @@ -92,18 +88,39 @@ static int rtas_pci_read_config(struct pci_bus *bus,  				int where, int size, u32 *val)  {  	struct device_node *busdn, *dn; - -	busdn = pci_bus_to_OF_node(bus); +	struct pci_dn *pdn; +	bool found = false; +#ifdef CONFIG_EEH +	struct eeh_dev *edev; +#endif +	int ret;  	/* Search only direct children of the bus */ +	*val = 0xFFFFFFFF; +	busdn = pci_bus_to_OF_node(bus);  	for (dn = busdn->child; dn; dn = dn->sibling) { -		struct pci_dn *pdn = PCI_DN(dn); +		pdn = PCI_DN(dn);  		if (pdn && pdn->devfn == devfn -		    && of_device_is_available(dn)) -			return rtas_read_config(pdn, where, size, val); +		    && of_device_is_available(dn)) { +			found = true; +			break; +		}  	} -	return PCIBIOS_DEVICE_NOT_FOUND; +	if (!found) +		return PCIBIOS_DEVICE_NOT_FOUND; +#ifdef CONFIG_EEH +	edev = of_node_to_eeh_dev(dn); +	if (edev && edev->pe && edev->pe->state & EEH_PE_RESET) +		return PCIBIOS_DEVICE_NOT_FOUND; +#endif + +	ret = rtas_read_config(pdn, where, size, val); +	if (*val == EEH_IO_ERROR_VALUE(size) && +	    eeh_dev_check_failure(of_node_to_eeh_dev(dn))) +		return PCIBIOS_DEVICE_NOT_FOUND; + +	return ret;  }  int rtas_write_config(struct pci_dn *pdn, int where, int size, u32 val) @@ -136,17 +153,34 @@ static int rtas_pci_write_config(struct pci_bus *bus,  				 int where, int size, u32 val)  {  	struct device_node *busdn, *dn; - -	busdn = pci_bus_to_OF_node(bus); +	struct pci_dn *pdn; +	bool found = false; +#ifdef CONFIG_EEH +	struct eeh_dev *edev; +#endif +	int ret;  	/* Search only direct children of the bus */ +	busdn = pci_bus_to_OF_node(bus);  	for (dn = busdn->child; dn; dn = dn->sibling) { -		struct pci_dn *pdn = PCI_DN(dn); +		pdn = PCI_DN(dn);  		if (pdn && pdn->devfn == devfn -		    && of_device_is_available(dn)) -			return rtas_write_config(pdn, where, size, val); +		    && of_device_is_available(dn)) { +			found = true; +			break; +		}  	} -	return PCIBIOS_DEVICE_NOT_FOUND; + +	if (!found) +		return PCIBIOS_DEVICE_NOT_FOUND; +#ifdef CONFIG_EEH +	edev = of_node_to_eeh_dev(dn); +	if (edev && edev->pe && (edev->pe->state & EEH_PE_RESET)) +		return PCIBIOS_DEVICE_NOT_FOUND; +#endif +	ret = rtas_write_config(pdn, where, size, val); + +	return ret;  }  static struct pci_ops rtas_pci_ops = { @@ -223,7 +257,7 @@ unsigned long get_phb_buid(struct device_node *phb)  static int phb_set_bus_ranges(struct device_node *dev,  			      struct pci_controller *phb)  { -	const int *bus_range; +	const __be32 *bus_range;  	unsigned int len;  	bus_range = of_get_property(dev, "bus-range", &len); @@ -231,8 +265,8 @@ static int phb_set_bus_ranges(struct device_node *dev,  		return 1;   	} -	phb->first_busno =  bus_range[0]; -	phb->last_busno  =  bus_range[1]; +	phb->first_busno = be32_to_cpu(bus_range[0]); +	phb->last_busno  = be32_to_cpu(bus_range[1]);  	return 0;  } diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index 1130c53ad65..e736387fee6 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -150,8 +150,8 @@ static void printk_log_rtas(char *buf, int len)  		struct rtas_error_log *errlog = (struct rtas_error_log *)buf;  		printk(RTAS_DEBUG "event: %d, Type: %s, Severity: %d\n", -		       error_log_cnt, rtas_event_type(errlog->type), -		       errlog->severity); +		       error_log_cnt, rtas_event_type(rtas_error_type(errlog)), +		       rtas_error_severity(errlog));  	}  } @@ -159,14 +159,16 @@ static int log_rtas_len(char * buf)  {  	int len;  	struct rtas_error_log *err; +	uint32_t extended_log_length;  	/* rtas fixed header */  	len = 8;  	err = (struct rtas_error_log *)buf; -	if (err->extended && err->extended_log_length) { +	extended_log_length = rtas_error_extended_log_length(err); +	if (rtas_error_extended(err) && extended_log_length) {  		/* extended header */ -		len += err->extended_log_length; +		len += extended_log_length;  	}  	if (rtas_error_log_max == 0) @@ -293,15 +295,13 @@ void prrn_schedule_update(u32 scope)  static void handle_rtas_event(const struct rtas_error_log *log)  { -	if (log->type == RTAS_TYPE_PRRN) { -		/* For PRRN Events the extended log length is used to denote -		 * the scope for calling rtas update-nodes. -		 */ -		if (prrn_is_enabled()) -			prrn_schedule_update(log->extended_log_length); -	} +	if (rtas_error_type(log) != RTAS_TYPE_PRRN || !prrn_is_enabled()) +		return; -	return; +	/* For PRRN Events the extended log length is used to denote +	 * the scope for calling rtas update-nodes. +	 */ +	prrn_schedule_update(rtas_error_extended_log_length(log));  }  #else diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 3d261c071fc..e5b022c55cc 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -62,8 +62,6 @@  #include <mm/mmu_decl.h>  #include <asm/fadump.h> -#include "setup.h" -  #ifdef DEBUG  #include <asm/udbg.h>  #define DBG(fmt...) udbg_printf(fmt) @@ -78,6 +76,9 @@ EXPORT_SYMBOL(ppc_md);  struct machdep_calls *machine_id;  EXPORT_SYMBOL(machine_id); +int boot_cpuid = -1; +EXPORT_SYMBOL_GPL(boot_cpuid); +  unsigned long klimit = (unsigned long) _end;  char cmd_line[COMMAND_LINE_SIZE]; @@ -211,6 +212,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)  {  	unsigned long cpu_id = (unsigned long)v - 1;  	unsigned int pvr; +	unsigned long proc_freq;  	unsigned short maj;  	unsigned short min; @@ -262,12 +264,19 @@ static int show_cpuinfo(struct seq_file *m, void *v)  #endif /* CONFIG_TAU */  	/* -	 * Assume here that all clock rates are the same in a -	 * smp system.  -- Cort +	 * Platforms that have variable clock rates, should implement +	 * the method ppc_md.get_proc_freq() that reports the clock +	 * rate of a given cpu. The rest can use ppc_proc_freq to +	 * report the clock rate that is same across all cpus.  	 */ -	if (ppc_proc_freq) +	if (ppc_md.get_proc_freq) +		proc_freq = ppc_md.get_proc_freq(cpu_id); +	else +		proc_freq = ppc_proc_freq; + +	if (proc_freq)  		seq_printf(m, "clock\t\t: %lu.%06luMHz\n", -			   ppc_proc_freq / 1000000, ppc_proc_freq % 1000000); +			   proc_freq / 1000000, proc_freq % 1000000);  	if (ppc_md.show_percpuinfo != NULL)  		ppc_md.show_percpuinfo(m, cpu_id); @@ -381,9 +390,10 @@ void __init check_for_initrd(void)  #ifdef CONFIG_SMP -int threads_per_core, threads_shift; +int threads_per_core, threads_per_subcore, threads_shift;  cpumask_t threads_core_mask;  EXPORT_SYMBOL_GPL(threads_per_core); +EXPORT_SYMBOL_GPL(threads_per_subcore);  EXPORT_SYMBOL_GPL(threads_shift);  EXPORT_SYMBOL_GPL(threads_core_mask); @@ -392,6 +402,7 @@ static void __init cpu_init_thread_core_maps(int tpc)  	int i;  	threads_per_core = tpc; +	threads_per_subcore = tpc;  	cpumask_clear(&threads_core_mask);  	/* This implementation only supports power of 2 number of threads @@ -458,9 +469,17 @@ void __init smp_setup_cpu_maps(void)  		}  		for (j = 0; j < nthreads && cpu < nr_cpu_ids; j++) { +			bool avail; +  			DBG("    thread %d -> cpu %d (hard id %d)\n",  			    j, cpu, be32_to_cpu(intserv[j])); -			set_cpu_present(cpu, true); + +			avail = of_device_is_available(dn); +			if (!avail) +				avail = !of_property_match_string(dn, +						"enable-method", "spin-table"); + +			set_cpu_present(cpu, avail);  			set_hard_smp_processor_id(cpu, be32_to_cpu(intserv[j]));  			set_cpu_possible(cpu, true);  			cpu++; @@ -481,7 +500,7 @@ void __init smp_setup_cpu_maps(void)  	if (machine_is(pseries) && firmware_has_feature(FW_FEATURE_LPAR) &&  	    (dn = of_find_node_by_path("/rtas"))) {  		int num_addr_cell, num_size_cell, maxcpus; -		const unsigned int *ireg; +		const __be32 *ireg;  		num_addr_cell = of_n_addr_cells(dn);  		num_size_cell = of_n_size_cells(dn); @@ -491,7 +510,7 @@ void __init smp_setup_cpu_maps(void)  		if (!ireg)  			goto out; -		maxcpus = ireg[num_addr_cell + num_size_cell]; +		maxcpus = be32_to_cpup(ireg + num_addr_cell + num_size_cell);  		/* Double maxcpus for processors which have SMT capability */  		if (cpu_has_feature(CPU_FTR_SMT)) @@ -717,33 +736,6 @@ static int powerpc_debugfs_init(void)  arch_initcall(powerpc_debugfs_init);  #endif -#ifdef CONFIG_BOOKE_WDT -extern u32 booke_wdt_enabled; -extern u32 booke_wdt_period; - -/* Checks wdt=x and wdt_period=xx command-line option */ -notrace int __init early_parse_wdt(char *p) -{ -	if (p && strncmp(p, "0", 1) != 0) -		booke_wdt_enabled = 1; - -	return 0; -} -early_param("wdt", early_parse_wdt); - -int __init early_parse_wdt_period(char *p) -{ -	unsigned long ret; -	if (p) { -		if (!kstrtol(p, 0, &ret)) -			booke_wdt_period = ret; -	} - -	return 0; -} -early_param("wdt_period", early_parse_wdt_period); -#endif	/* CONFIG_BOOKE_WDT */ -  void ppc_printk_progress(char *s, unsigned short hex)  {  	pr_info("%s\n", s); diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h deleted file mode 100644 index 4c67ad7fae0..00000000000 --- a/arch/powerpc/kernel/setup.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef _POWERPC_KERNEL_SETUP_H -#define _POWERPC_KERNEL_SETUP_H - -void check_for_initrd(void); -void do_init_bootmem(void); -void setup_panic(void); -extern int do_early_xmon; - -#endif /* _POWERPC_KERNEL_SETUP_H */ diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index a4bbcae7257..ea4fda60e57 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -40,14 +40,10 @@  #include <asm/mmu_context.h>  #include <asm/epapr_hcalls.h> -#include "setup.h" -  #define DBG(fmt...)  extern void bootx_init(unsigned long r4, unsigned long phys); -int boot_cpuid = -1; -EXPORT_SYMBOL_GPL(boot_cpuid);  int boot_cpuid_phys;  EXPORT_SYMBOL_GPL(boot_cpuid_phys); @@ -249,7 +245,12 @@ static void __init exc_lvl_early_init(void)  	/* interrupt stacks must be in lowmem, we get that for free on ppc32  	 * as the memblock is limited to lowmem by MEMBLOCK_REAL_LIMIT */  	for_each_possible_cpu(i) { +#ifdef CONFIG_SMP  		hw_cpu = get_hard_smp_processor_id(i); +#else +		hw_cpu = 0; +#endif +  		critirq_ctx[hw_cpu] = (struct thread_info *)  			__va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));  #ifdef CONFIG_BOOKE @@ -298,9 +299,6 @@ void __init setup_arch(char **cmdline_p)  	if (cpu_has_feature(CPU_FTR_UNIFIED_ID_CACHE))  		ucache_bsize = icache_bsize = dcache_bsize; -	/* reboot on panic */ -	panic_timeout = 180; -  	if (ppc_md.panic)  		setup_panic(); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 278ca93e1f2..ee082d77117 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -36,6 +36,7 @@  #include <linux/lockdep.h>  #include <linux/memblock.h>  #include <linux/hugetlb.h> +#include <linux/memory.h>  #include <asm/io.h>  #include <asm/kdump.h> @@ -68,15 +69,12 @@  #include <asm/hugetlb.h>  #include <asm/epapr_hcalls.h> -#include "setup.h" -  #ifdef DEBUG  #define DBG(fmt...) udbg_printf(fmt)  #else  #define DBG(fmt...)  #endif -int boot_cpuid = 0;  int spinning_secondaries;  u64 ppc64_pft_size; @@ -99,6 +97,38 @@ int dcache_bsize;  int icache_bsize;  int ucache_bsize; +#if defined(CONFIG_PPC_BOOK3E) && defined(CONFIG_SMP) +static void setup_tlb_core_data(void) +{ +	int cpu; + +	BUILD_BUG_ON(offsetof(struct tlb_core_data, lock) != 0); + +	for_each_possible_cpu(cpu) { +		int first = cpu_first_thread_sibling(cpu); + +		paca[cpu].tcd_ptr = &paca[first].tcd; + +		/* +		 * If we have threads, we need either tlbsrx. +		 * or e6500 tablewalk mode, or else TLB handlers +		 * will be racy and could produce duplicate entries. +		 */ +		if (smt_enabled_at_boot >= 2 && +		    !mmu_has_feature(MMU_FTR_USE_TLBRSRV) && +		    book3e_htw_mode != PPC_HTW_E6500) { +			/* Should we panic instead? */ +			WARN_ONCE("%s: unsupported MMU configuration -- expect problems\n", +				  __func__); +		} +	} +} +#else +static void setup_tlb_core_data(void) +{ +} +#endif +  #ifdef CONFIG_SMP  static char *smt_enabled_cmdline; @@ -166,6 +196,19 @@ static void fixup_boot_paca(void)  	get_paca()->data_offset = 0;  } +static void cpu_ready_for_interrupts(void) +{ +	/* Set IR and DR in PACA MSR */ +	get_paca()->kernel_msr = MSR_KERNEL; + +	/* Enable AIL if supported */ +	if (cpu_has_feature(CPU_FTR_HVMODE) && +	    cpu_has_feature(CPU_FTR_ARCH_207S)) { +		unsigned long lpcr = mfspr(SPRN_LPCR); +		mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3); +	} +} +  /*   * Early initialization entry point. This is called by head.S   * with MMU translation disabled. We rely on the "feature" of @@ -232,6 +275,14 @@ void __init early_setup(unsigned long dt_ptr)  	/* Initialize the hash table or TLB handling */  	early_init_mmu(); +	/* +	 * At this point, we can let interrupts switch to virtual mode +	 * (the MMU has been setup), so adjust the MSR in the PACA to +	 * have IR and DR set and enable AIL if it exists +	 */ +	cpu_ready_for_interrupts(); + +	/* Reserve large chunks of memory for use by CMA for KVM */  	kvm_cma_reserve();  	/* @@ -264,6 +315,13 @@ void early_setup_secondary(void)  	/* Initialize the hash table or TLB handling */  	early_init_mmu_secondary(); + +	/* +	 * At this point, we can let interrupts switch to virtual mode +	 * (the MMU has been setup), so adjust the MSR in the PACA to +	 * have IR and DR set. +	 */ +	cpu_ready_for_interrupts();  }  #endif /* CONFIG_SMP */ @@ -284,7 +342,7 @@ void smp_release_cpus(void)  	ptr  = (unsigned long *)((unsigned long)&__secondary_hold_spinloop  			- PHYSICAL_START); -	*ptr = __pa(generic_secondary_smp_init); +	*ptr = ppc_function_entry(generic_secondary_smp_init);  	/* And wait a bit for them to catch up */  	for (i = 0; i < 100000; i++) { @@ -447,6 +505,7 @@ void __init setup_system(void)  	smp_setup_cpu_maps();  	check_smt_enabled(); +	setup_tlb_core_data();  #ifdef CONFIG_SMP  	/* Release secondary cpus out of their spinloops at 0x60 now that @@ -522,23 +581,25 @@ static void __init irqstack_early_init(void)  #ifdef CONFIG_PPC_BOOK3E  static void __init exc_lvl_early_init(void)  { -	extern unsigned int interrupt_base_book3e; -	extern unsigned int exc_debug_debug_book3e; -  	unsigned int i; +	unsigned long sp;  	for_each_possible_cpu(i) { -		critirq_ctx[i] = (struct thread_info *) -			__va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); -		dbgirq_ctx[i] = (struct thread_info *) -			__va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); -		mcheckirq_ctx[i] = (struct thread_info *) -			__va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); +		sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE); +		critirq_ctx[i] = (struct thread_info *)__va(sp); +		paca[i].crit_kstack = __va(sp + THREAD_SIZE); + +		sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE); +		dbgirq_ctx[i] = (struct thread_info *)__va(sp); +		paca[i].dbg_kstack = __va(sp + THREAD_SIZE); + +		sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE); +		mcheckirq_ctx[i] = (struct thread_info *)__va(sp); +		paca[i].mc_kstack = __va(sp + THREAD_SIZE);  	}  	if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC)) -		patch_branch(&interrupt_base_book3e + (0x040 / 4) + 1, -			     (unsigned long)&exc_debug_debug_book3e, 0); +		patch_exception(0x040, exc_debug_debug_book3e);  }  #else  #define exc_lvl_early_init() @@ -546,7 +607,8 @@ static void __init exc_lvl_early_init(void)  /*   * Stack space used when we detect a bad kernel stack pointer, and - * early in SMP boots before relocation is enabled. + * early in SMP boots before relocation is enabled. Exclusive emergency + * stack for machine checks.   */  static void __init emergency_stack_init(void)  { @@ -569,6 +631,13 @@ static void __init emergency_stack_init(void)  		sp  = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit);  		sp += THREAD_SIZE;  		paca[i].emergency_sp = __va(sp); + +#ifdef CONFIG_PPC_BOOK3S_64 +		/* emergency stack for machine check exception handling. */ +		sp  = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit); +		sp += THREAD_SIZE; +		paca[i].mc_emergency_sp = __va(sp); +#endif  	}  } @@ -590,9 +659,6 @@ void __init setup_arch(char **cmdline_p)  	dcache_bsize = ppc64_caches.dline_size;  	icache_bsize = ppc64_caches.iline_size; -	/* reboot on panic */ -	panic_timeout = 180; -  	if (ppc_md.panic)  		setup_panic(); @@ -715,6 +781,15 @@ void __init setup_per_cpu_areas(void)  }  #endif +#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE +unsigned long memory_block_size_bytes(void) +{ +	if (ppc_md.memory_block_size) +		return ppc_md.memory_block_size(); + +	return MIN_MEMORY_BLOCK_SIZE; +} +#endif  #if defined(CONFIG_PPC_INDIRECT_PIO) || defined(CONFIG_PPC_INDIRECT_MMIO)  struct ppc_pci_io ppc_pci_io; diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index 457e97aa294..1c794cef288 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -134,7 +134,7 @@ static int do_signal(struct pt_regs *regs)  	 */  	if (current->thread.hw_brk.address &&  		current->thread.hw_brk.type) -		set_breakpoint(¤t->thread.hw_brk); +		__set_breakpoint(¤t->thread.hw_brk);  #endif  	/* Re-enable the breakpoints for the signal stack */  	thread_change_pc(current, regs); @@ -203,8 +203,7 @@ unsigned long get_tm_stackpointer(struct pt_regs *regs)  #ifdef CONFIG_PPC_TRANSACTIONAL_MEM  	if (MSR_TM_ACTIVE(regs->msr)) { -		tm_enable(); -		tm_reclaim(¤t->thread, regs->msr, TM_CAUSE_SIGNAL); +		tm_reclaim_current(TM_CAUSE_SIGNAL);  		if (MSR_TM_TRANSACTIONAL(regs->msr))  			return current->thread.ckpt_regs.gpr[1];  	} diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index bebdf1a1a54..1bc5a1755ed 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -54,7 +54,6 @@  #include "signal.h" -#undef DEBUG_SIG  #ifdef CONFIG_PPC64  #define sys_rt_sigreturn	compat_sys_rt_sigreturn @@ -265,27 +264,27 @@ struct rt_sigframe {  unsigned long copy_fpr_to_user(void __user *to,  			       struct task_struct *task)  { -	double buf[ELF_NFPREG]; +	u64 buf[ELF_NFPREG];  	int i;  	/* save FPR copy to local buffer then write to the thread_struct */  	for (i = 0; i < (ELF_NFPREG - 1) ; i++)  		buf[i] = task->thread.TS_FPR(i); -	memcpy(&buf[i], &task->thread.fpscr, sizeof(double)); +	buf[i] = task->thread.fp_state.fpscr;  	return __copy_to_user(to, buf, ELF_NFPREG * sizeof(double));  }  unsigned long copy_fpr_from_user(struct task_struct *task,  				 void __user *from)  { -	double buf[ELF_NFPREG]; +	u64 buf[ELF_NFPREG];  	int i;  	if (__copy_from_user(buf, from, ELF_NFPREG * sizeof(double)))  		return 1;  	for (i = 0; i < (ELF_NFPREG - 1) ; i++)  		task->thread.TS_FPR(i) = buf[i]; -	memcpy(&task->thread.fpscr, &buf[i], sizeof(double)); +	task->thread.fp_state.fpscr = buf[i];  	return 0;  } @@ -293,25 +292,25 @@ unsigned long copy_fpr_from_user(struct task_struct *task,  unsigned long copy_vsx_to_user(void __user *to,  			       struct task_struct *task)  { -	double buf[ELF_NVSRHALFREG]; +	u64 buf[ELF_NVSRHALFREG];  	int i;  	/* save FPR copy to local buffer then write to the thread_struct */  	for (i = 0; i < ELF_NVSRHALFREG; i++) -		buf[i] = task->thread.fpr[i][TS_VSRLOWOFFSET]; +		buf[i] = task->thread.fp_state.fpr[i][TS_VSRLOWOFFSET];  	return __copy_to_user(to, buf, ELF_NVSRHALFREG * sizeof(double));  }  unsigned long copy_vsx_from_user(struct task_struct *task,  				 void __user *from)  { -	double buf[ELF_NVSRHALFREG]; +	u64 buf[ELF_NVSRHALFREG];  	int i;  	if (__copy_from_user(buf, from, ELF_NVSRHALFREG * sizeof(double)))  		return 1;  	for (i = 0; i < ELF_NVSRHALFREG ; i++) -		task->thread.fpr[i][TS_VSRLOWOFFSET] = buf[i]; +		task->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];  	return 0;  } @@ -319,27 +318,27 @@ unsigned long copy_vsx_from_user(struct task_struct *task,  unsigned long copy_transact_fpr_to_user(void __user *to,  				  struct task_struct *task)  { -	double buf[ELF_NFPREG]; +	u64 buf[ELF_NFPREG];  	int i;  	/* save FPR copy to local buffer then write to the thread_struct */  	for (i = 0; i < (ELF_NFPREG - 1) ; i++)  		buf[i] = task->thread.TS_TRANS_FPR(i); -	memcpy(&buf[i], &task->thread.transact_fpscr, sizeof(double)); +	buf[i] = task->thread.transact_fp.fpscr;  	return __copy_to_user(to, buf, ELF_NFPREG * sizeof(double));  }  unsigned long copy_transact_fpr_from_user(struct task_struct *task,  					  void __user *from)  { -	double buf[ELF_NFPREG]; +	u64 buf[ELF_NFPREG];  	int i;  	if (__copy_from_user(buf, from, ELF_NFPREG * sizeof(double)))  		return 1;  	for (i = 0; i < (ELF_NFPREG - 1) ; i++)  		task->thread.TS_TRANS_FPR(i) = buf[i]; -	memcpy(&task->thread.transact_fpscr, &buf[i], sizeof(double)); +	task->thread.transact_fp.fpscr = buf[i];  	return 0;  } @@ -347,25 +346,25 @@ unsigned long copy_transact_fpr_from_user(struct task_struct *task,  unsigned long copy_transact_vsx_to_user(void __user *to,  				  struct task_struct *task)  { -	double buf[ELF_NVSRHALFREG]; +	u64 buf[ELF_NVSRHALFREG];  	int i;  	/* save FPR copy to local buffer then write to the thread_struct */  	for (i = 0; i < ELF_NVSRHALFREG; i++) -		buf[i] = task->thread.transact_fpr[i][TS_VSRLOWOFFSET]; +		buf[i] = task->thread.transact_fp.fpr[i][TS_VSRLOWOFFSET];  	return __copy_to_user(to, buf, ELF_NVSRHALFREG * sizeof(double));  }  unsigned long copy_transact_vsx_from_user(struct task_struct *task,  					  void __user *from)  { -	double buf[ELF_NVSRHALFREG]; +	u64 buf[ELF_NVSRHALFREG];  	int i;  	if (__copy_from_user(buf, from, ELF_NVSRHALFREG * sizeof(double)))  		return 1;  	for (i = 0; i < ELF_NVSRHALFREG ; i++) -		task->thread.transact_fpr[i][TS_VSRLOWOFFSET] = buf[i]; +		task->thread.transact_fp.fpr[i][TS_VSRLOWOFFSET] = buf[i];  	return 0;  }  #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ @@ -373,14 +372,14 @@ unsigned long copy_transact_vsx_from_user(struct task_struct *task,  inline unsigned long copy_fpr_to_user(void __user *to,  				      struct task_struct *task)  { -	return __copy_to_user(to, task->thread.fpr, +	return __copy_to_user(to, task->thread.fp_state.fpr,  			      ELF_NFPREG * sizeof(double));  }  inline unsigned long copy_fpr_from_user(struct task_struct *task,  					void __user *from)  { -	return __copy_from_user(task->thread.fpr, from, +	return __copy_from_user(task->thread.fp_state.fpr, from,  			      ELF_NFPREG * sizeof(double));  } @@ -388,14 +387,14 @@ inline unsigned long copy_fpr_from_user(struct task_struct *task,  inline unsigned long copy_transact_fpr_to_user(void __user *to,  					 struct task_struct *task)  { -	return __copy_to_user(to, task->thread.transact_fpr, +	return __copy_to_user(to, task->thread.transact_fp.fpr,  			      ELF_NFPREG * sizeof(double));  }  inline unsigned long copy_transact_fpr_from_user(struct task_struct *task,  						 void __user *from)  { -	return __copy_from_user(task->thread.transact_fpr, from, +	return __copy_from_user(task->thread.transact_fp.fpr, from,  				ELF_NFPREG * sizeof(double));  }  #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ @@ -423,7 +422,7 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,  	/* save altivec registers */  	if (current->thread.used_vr) {  		flush_altivec_to_thread(current); -		if (__copy_to_user(&frame->mc_vregs, current->thread.vr, +		if (__copy_to_user(&frame->mc_vregs, ¤t->thread.vr_state,  				   ELF_NVRREG * sizeof(vector128)))  			return 1;  		/* set MSR_VEC in the saved MSR value to indicate that @@ -445,6 +444,12 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,  #endif /* CONFIG_ALTIVEC */  	if (copy_fpr_to_user(&frame->mc_fregs, current))  		return 1; + +	/* +	 * Clear the MSR VSX bit to indicate there is no valid state attached +	 * to this context, except in the specific case below where we set it. +	 */ +	msr &= ~MSR_VSX;  #ifdef CONFIG_VSX  	/*  	 * Copy VSR 0-31 upper half from thread_struct to local @@ -513,6 +518,13 @@ static int save_tm_user_regs(struct pt_regs *regs,  {  	unsigned long msr = regs->msr; +	/* Remove TM bits from thread's MSR.  The MSR in the sigcontext +	 * just indicates to userland that we were doing a transaction, but we +	 * don't want to return in transactional state.  This also ensures +	 * that flush_fp_to_thread won't set TIF_RESTORE_TM again. +	 */ +	regs->msr &= ~MSR_TS_MASK; +  	/* Make sure floating point registers are stored in regs */  	flush_fp_to_thread(current); @@ -534,17 +546,17 @@ static int save_tm_user_regs(struct pt_regs *regs,  	/* save altivec registers */  	if (current->thread.used_vr) {  		flush_altivec_to_thread(current); -		if (__copy_to_user(&frame->mc_vregs, current->thread.vr, +		if (__copy_to_user(&frame->mc_vregs, ¤t->thread.vr_state,  				   ELF_NVRREG * sizeof(vector128)))  			return 1;  		if (msr & MSR_VEC) {  			if (__copy_to_user(&tm_frame->mc_vregs, -					   current->thread.transact_vr, +					   ¤t->thread.transact_vr,  					   ELF_NVRREG * sizeof(vector128)))  				return 1;  		} else {  			if (__copy_to_user(&tm_frame->mc_vregs, -					   current->thread.vr, +					   ¤t->thread.vr_state,  					   ELF_NVRREG * sizeof(vector128)))  				return 1;  		} @@ -692,11 +704,12 @@ static long restore_user_regs(struct pt_regs *regs,  	regs->msr &= ~MSR_VEC;  	if (msr & MSR_VEC) {  		/* restore altivec registers from the stack */ -		if (__copy_from_user(current->thread.vr, &sr->mc_vregs, +		if (__copy_from_user(¤t->thread.vr_state, &sr->mc_vregs,  				     sizeof(sr->mc_vregs)))  			return 1;  	} else if (current->thread.used_vr) -		memset(current->thread.vr, 0, ELF_NVRREG * sizeof(vector128)); +		memset(¤t->thread.vr_state, 0, +		       ELF_NVRREG * sizeof(vector128));  	/* Always get VRSAVE back */  	if (__get_user(current->thread.vrsave, (u32 __user *)&sr->mc_vregs[32])) @@ -722,7 +735,7 @@ static long restore_user_regs(struct pt_regs *regs,  			return 1;  	} else if (current->thread.used_vsr)  		for (i = 0; i < 32 ; i++) -			current->thread.fpr[i][TS_VSRLOWOFFSET] = 0; +			current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0;  #endif /* CONFIG_VSX */  	/*  	 * force the process to reload the FP registers from @@ -798,15 +811,16 @@ static long restore_tm_user_regs(struct pt_regs *regs,  	regs->msr &= ~MSR_VEC;  	if (msr & MSR_VEC) {  		/* restore altivec registers from the stack */ -		if (__copy_from_user(current->thread.vr, &sr->mc_vregs, +		if (__copy_from_user(¤t->thread.vr_state, &sr->mc_vregs,  				     sizeof(sr->mc_vregs)) || -		    __copy_from_user(current->thread.transact_vr, +		    __copy_from_user(¤t->thread.transact_vr,  				     &tm_sr->mc_vregs,  				     sizeof(sr->mc_vregs)))  			return 1;  	} else if (current->thread.used_vr) { -		memset(current->thread.vr, 0, ELF_NVRREG * sizeof(vector128)); -		memset(current->thread.transact_vr, 0, +		memset(¤t->thread.vr_state, 0, +		       ELF_NVRREG * sizeof(vector128)); +		memset(¤t->thread.transact_vr, 0,  		       ELF_NVRREG * sizeof(vector128));  	} @@ -838,8 +852,8 @@ static long restore_tm_user_regs(struct pt_regs *regs,  			return 1;  	} else if (current->thread.used_vsr)  		for (i = 0; i < 32 ; i++) { -			current->thread.fpr[i][TS_VSRLOWOFFSET] = 0; -			current->thread.transact_fpr[i][TS_VSRLOWOFFSET] = 0; +			current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0; +			current->thread.transact_fp.fpr[i][TS_VSRLOWOFFSET] = 0;  		}  #endif /* CONFIG_VSX */ @@ -866,6 +880,8 @@ static long restore_tm_user_regs(struct pt_regs *regs,  	 * transactional versions should be loaded.  	 */  	tm_enable(); +	/* Make sure the transaction is marked as failed */ +	current->thread.tm_texasr |= TEXASR_FS;  	/* This loads the checkpointed FP/VEC state, if used */  	tm_recheckpoint(¤t->thread, msr);  	/* Get the top half of the MSR */ @@ -891,7 +907,7 @@ static long restore_tm_user_regs(struct pt_regs *regs,  #endif  #ifdef CONFIG_PPC64 -int copy_siginfo_to_user32(struct compat_siginfo __user *d, siginfo_t *s) +int copy_siginfo_to_user32(struct compat_siginfo __user *d, const siginfo_t *s)  {  	int err; @@ -1007,30 +1023,25 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka,  #ifdef CONFIG_PPC_TRANSACTIONAL_MEM  	tm_frame = &rt_sf->uc_transact.uc_mcontext;  	if (MSR_TM_ACTIVE(regs->msr)) { +		if (__put_user((unsigned long)&rt_sf->uc_transact, +			       &rt_sf->uc.uc_link) || +		    __put_user((unsigned long)tm_frame, +			       &rt_sf->uc_transact.uc_regs)) +			goto badframe;  		if (save_tm_user_regs(regs, frame, tm_frame, sigret))  			goto badframe;  	}  	else  #endif  	{ +		if (__put_user(0, &rt_sf->uc.uc_link)) +			goto badframe;  		if (save_user_regs(regs, frame, tm_frame, sigret, 1))  			goto badframe;  	}  	regs->link = tramp; -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -	if (MSR_TM_ACTIVE(regs->msr)) { -		if (__put_user((unsigned long)&rt_sf->uc_transact, -			       &rt_sf->uc.uc_link) -		    || __put_user((unsigned long)tm_frame, &rt_sf->uc_transact.uc_regs)) -			goto badframe; -	} -	else -#endif -		if (__put_user(0, &rt_sf->uc.uc_link)) -			goto badframe; - -	current->thread.fpscr.val = 0;	/* turn off all fp exceptions */ +	current->thread.fp_state.fpscr = 0;	/* turn off all fp exceptions */  	/* create a stack frame for the caller of the handler */  	newsp = ((unsigned long)rt_sf) - (__SIGNAL_FRAMESIZE + 16); @@ -1045,22 +1056,12 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka,  	regs->gpr[5] = (unsigned long) &rt_sf->uc;  	regs->gpr[6] = (unsigned long) rt_sf;  	regs->nip = (unsigned long) ka->sa.sa_handler; -	/* enter the signal handler in big-endian mode */ +	/* enter the signal handler in native-endian mode */  	regs->msr &= ~MSR_LE; -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -	/* Remove TM bits from thread's MSR.  The MSR in the sigcontext -	 * just indicates to userland that we were doing a transaction, but we -	 * don't want to return in transactional state: -	 */ -	regs->msr &= ~MSR_TS_MASK; -#endif +	regs->msr |= (MSR_KERNEL & MSR_LE);  	return 1;  badframe: -#ifdef DEBUG_SIG -	printk("badframe in handle_rt_signal, regs=%p frame=%p newsp=%lx\n", -	       regs, frame, newsp); -#endif  	if (show_unhandled_signals)  		printk_ratelimited(KERN_INFO  				   "%s[%d]: bad frame in handle_rt_signal32: " @@ -1309,7 +1310,7 @@ int sys_debug_setcontext(struct ucontext __user *ctx,  	unsigned char tmp;  	unsigned long new_msr = regs->msr;  #ifdef CONFIG_PPC_ADV_DEBUG_REGS -	unsigned long new_dbcr0 = current->thread.dbcr0; +	unsigned long new_dbcr0 = current->thread.debug.dbcr0;  #endif  	for (i=0; i<ndbg; i++) { @@ -1324,7 +1325,7 @@ int sys_debug_setcontext(struct ucontext __user *ctx,  			} else {  				new_dbcr0 &= ~DBCR0_IC;  				if (!DBCR_ACTIVE_EVENTS(new_dbcr0, -						current->thread.dbcr1)) { +						current->thread.debug.dbcr1)) {  					new_msr &= ~MSR_DE;  					new_dbcr0 &= ~DBCR0_IDM;  				} @@ -1359,7 +1360,7 @@ int sys_debug_setcontext(struct ucontext __user *ctx,  	   the user is really doing something wrong. */  	regs->msr = new_msr;  #ifdef CONFIG_PPC_ADV_DEBUG_REGS -	current->thread.dbcr0 = new_dbcr0; +	current->thread.debug.dbcr0 = new_dbcr0;  #endif  	if (!access_ok(VERIFY_READ, ctx, sizeof(*ctx)) @@ -1462,7 +1463,7 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka,  	regs->link = tramp; -	current->thread.fpscr.val = 0;	/* turn off all fp exceptions */ +	current->thread.fp_state.fpscr = 0;	/* turn off all fp exceptions */  	/* create a stack frame for the caller of the handler */  	newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE; @@ -1475,20 +1476,9 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka,  	regs->nip = (unsigned long) ka->sa.sa_handler;  	/* enter the signal handler in big-endian mode */  	regs->msr &= ~MSR_LE; -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -	/* Remove TM bits from thread's MSR.  The MSR in the sigcontext -	 * just indicates to userland that we were doing a transaction, but we -	 * don't want to return in transactional state: -	 */ -	regs->msr &= ~MSR_TS_MASK; -#endif  	return 1;  badframe: -#ifdef DEBUG_SIG -	printk("badframe in handle_signal, regs=%p frame=%p newsp=%lx\n", -	       regs, frame, newsp); -#endif  	if (show_unhandled_signals)  		printk_ratelimited(KERN_INFO  				   "%s[%d]: bad frame in handle_signal32: " diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index f93ec2835a1..97c1e4b683f 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -38,7 +38,6 @@  #include "signal.h" -#define DEBUG_SIG 0  #define GP_REGS_SIZE	min(sizeof(elf_gregset_t), sizeof(struct pt_regs))  #define FP_REGS_SIZE	sizeof(elf_fpregset_t) @@ -65,8 +64,8 @@ struct rt_sigframe {  	struct siginfo __user *pinfo;  	void __user *puc;  	struct siginfo info; -	/* 64 bit ABI allows for 288 bytes below sp before decrementing it. */ -	char abigap[288]; +	/* New 64 bit little-endian ABI allows redzone of 512 bytes below sp */ +	char abigap[USER_REDZONE_SIZE];  } __attribute__ ((aligned (16)));  static const char fmt32[] = KERN_INFO \ @@ -103,7 +102,8 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,  	if (current->thread.used_vr) {  		flush_altivec_to_thread(current);  		/* Copy 33 vec registers (vr0..31 and vscr) to the stack */ -		err |= __copy_to_user(v_regs, current->thread.vr, 33 * sizeof(vector128)); +		err |= __copy_to_user(v_regs, ¤t->thread.vr_state, +				      33 * sizeof(vector128));  		/* set MSR_VEC in the MSR value in the frame to indicate that sc->v_reg)  		 * contains valid data.  		 */ @@ -121,6 +121,12 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,  	flush_fp_to_thread(current);  	/* copy fpr regs and fpscr */  	err |= copy_fpr_to_user(&sc->fp_regs, current); + +	/* +	 * Clear the MSR VSX bit to indicate there is no valid state attached +	 * to this context, except in the specific case below where we set it. +	 */ +	msr &= ~MSR_VSX;  #ifdef CONFIG_VSX  	/*  	 * Copy VSX low doubleword to local buffer for formatting, @@ -185,6 +191,13 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,  	BUG_ON(!MSR_TM_ACTIVE(regs->msr)); +	/* Remove TM bits from thread's MSR.  The MSR in the sigcontext +	 * just indicates to userland that we were doing a transaction, but we +	 * don't want to return in transactional state.  This also ensures +	 * that flush_fp_to_thread won't set TIF_RESTORE_TM again. +	 */ +	regs->msr &= ~MSR_TS_MASK; +  	flush_fp_to_thread(current);  #ifdef CONFIG_ALTIVEC @@ -195,18 +208,18 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,  	if (current->thread.used_vr) {  		flush_altivec_to_thread(current);  		/* Copy 33 vec registers (vr0..31 and vscr) to the stack */ -		err |= __copy_to_user(v_regs, current->thread.vr, +		err |= __copy_to_user(v_regs, ¤t->thread.vr_state,  				      33 * sizeof(vector128));  		/* If VEC was enabled there are transactional VRs valid too,  		 * else they're a copy of the checkpointed VRs.  		 */  		if (msr & MSR_VEC)  			err |= __copy_to_user(tm_v_regs, -					      current->thread.transact_vr, +					      ¤t->thread.transact_vr,  					      33 * sizeof(vector128));  		else  			err |= __copy_to_user(tm_v_regs, -					      current->thread.vr, +					      ¤t->thread.vr_state,  					      33 * sizeof(vector128));  		/* set MSR_VEC in the MSR value in the frame to indicate @@ -349,10 +362,10 @@ static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig,  		return -EFAULT;  	/* Copy 33 vec registers (vr0..31 and vscr) from the stack */  	if (v_regs != NULL && (msr & MSR_VEC) != 0) -		err |= __copy_from_user(current->thread.vr, v_regs, +		err |= __copy_from_user(¤t->thread.vr_state, v_regs,  					33 * sizeof(vector128));  	else if (current->thread.used_vr) -		memset(current->thread.vr, 0, 33 * sizeof(vector128)); +		memset(¤t->thread.vr_state, 0, 33 * sizeof(vector128));  	/* Always get VRSAVE back */  	if (v_regs != NULL)  		err |= __get_user(current->thread.vrsave, (u32 __user *)&v_regs[33]); @@ -374,7 +387,7 @@ static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig,  		err |= copy_vsx_from_user(current, v_regs);  	else  		for (i = 0; i < 32 ; i++) -			current->thread.fpr[i][TS_VSRLOWOFFSET] = 0; +			current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0;  #endif  	return err;  } @@ -468,14 +481,14 @@ static long restore_tm_sigcontexts(struct pt_regs *regs,  		return -EFAULT;  	/* Copy 33 vec registers (vr0..31 and vscr) from the stack */  	if (v_regs != NULL && tm_v_regs != NULL && (msr & MSR_VEC) != 0) { -		err |= __copy_from_user(current->thread.vr, v_regs, +		err |= __copy_from_user(¤t->thread.vr_state, v_regs,  					33 * sizeof(vector128)); -		err |= __copy_from_user(current->thread.transact_vr, tm_v_regs, +		err |= __copy_from_user(¤t->thread.transact_vr, tm_v_regs,  					33 * sizeof(vector128));  	}  	else if (current->thread.used_vr) { -		memset(current->thread.vr, 0, 33 * sizeof(vector128)); -		memset(current->thread.transact_vr, 0, 33 * sizeof(vector128)); +		memset(¤t->thread.vr_state, 0, 33 * sizeof(vector128)); +		memset(¤t->thread.transact_vr, 0, 33 * sizeof(vector128));  	}  	/* Always get VRSAVE back */  	if (v_regs != NULL && tm_v_regs != NULL) { @@ -507,12 +520,14 @@ static long restore_tm_sigcontexts(struct pt_regs *regs,  		err |= copy_transact_vsx_from_user(current, tm_v_regs);  	} else {  		for (i = 0; i < 32 ; i++) { -			current->thread.fpr[i][TS_VSRLOWOFFSET] = 0; -			current->thread.transact_fpr[i][TS_VSRLOWOFFSET] = 0; +			current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0; +			current->thread.transact_fp.fpr[i][TS_VSRLOWOFFSET] = 0;  		}  	}  #endif  	tm_enable(); +	/* Make sure the transaction is marked as failed */ +	current->thread.tm_texasr |= TEXASR_FS;  	/* This loads the checkpointed FP/VEC state, if used */  	tm_recheckpoint(¤t->thread, msr); @@ -684,10 +699,6 @@ int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5,  	return 0;  badframe: -#if DEBUG_SIG -	printk("badframe in sys_rt_sigreturn, regs=%p uc=%p &uc->uc_mcontext=%p\n", -	       regs, uc, &uc->uc_mcontext); -#endif  	if (show_unhandled_signals)  		printk_ratelimited(regs->msr & MSR_64BIT ? fmt64 : fmt32,  				   current->comm, current->pid, "rt_sigreturn", @@ -700,12 +711,6 @@ badframe:  int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info,  		sigset_t *set, struct pt_regs *regs)  { -	/* Handler is *really* a pointer to the function descriptor for -	 * the signal routine.  The first entry in the function -	 * descriptor is the entry address of signal and the second -	 * entry is the TOC value we need to use. -	 */ -	func_descr_t __user *funct_desc_ptr;  	struct rt_sigframe __user *frame;  	unsigned long newsp = 0;  	long err = 0; @@ -747,14 +752,7 @@ int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info,  		goto badframe;  	/* Make sure signal handler doesn't get spurious FP exceptions */ -	current->thread.fpscr.val = 0; -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -	/* Remove TM bits from thread's MSR.  The MSR in the sigcontext -	 * just indicates to userland that we were doing a transaction, but we -	 * don't want to return in transactional state: -	 */ -	regs->msr &= ~MSR_TS_MASK; -#endif +	current->thread.fp_state.fpscr = 0;  	/* Set up to return from userspace. */  	if (vdso64_rt_sigtramp && current->mm->context.vdso_base) { @@ -765,18 +763,32 @@ int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info,  			goto badframe;  		regs->link = (unsigned long) &frame->tramp[0];  	} -	funct_desc_ptr = (func_descr_t __user *) ka->sa.sa_handler;  	/* Allocate a dummy caller frame for the signal handler. */  	newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE;  	err |= put_user(regs->gpr[1], (unsigned long __user *)newsp);  	/* Set up "regs" so we "return" to the signal handler. */ -	err |= get_user(regs->nip, &funct_desc_ptr->entry); -	/* enter the signal handler in big-endian mode */ +	if (is_elf2_task()) { +		regs->nip = (unsigned long) ka->sa.sa_handler; +		regs->gpr[12] = regs->nip; +	} else { +		/* Handler is *really* a pointer to the function descriptor for +		 * the signal routine.  The first entry in the function +		 * descriptor is the entry address of signal and the second +		 * entry is the TOC value we need to use. +		 */ +		func_descr_t __user *funct_desc_ptr = +			(func_descr_t __user *) ka->sa.sa_handler; + +		err |= get_user(regs->nip, &funct_desc_ptr->entry); +		err |= get_user(regs->gpr[2], &funct_desc_ptr->toc); +	} + +	/* enter the signal handler in native-endian mode */  	regs->msr &= ~MSR_LE; +	regs->msr |= (MSR_KERNEL & MSR_LE);  	regs->gpr[1] = newsp; -	err |= get_user(regs->gpr[2], &funct_desc_ptr->toc);  	regs->gpr[3] = signr;  	regs->result = 0;  	if (ka->sa.sa_flags & SA_SIGINFO) { @@ -792,10 +804,6 @@ int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info,  	return 1;  badframe: -#if DEBUG_SIG -	printk("badframe in setup_rt_frame, regs=%p frame=%p newsp=%lx\n", -	       regs, frame, newsp); -#endif  	if (show_unhandled_signals)  		printk_ratelimited(regs->msr & MSR_64BIT ? fmt64 : fmt32,  				   current->comm, current->pid, "setup_rt_frame", diff --git a/arch/powerpc/kernel/smp-tbsync.c b/arch/powerpc/kernel/smp-tbsync.c index e68fd1ae727..7a37ecd3afa 100644 --- a/arch/powerpc/kernel/smp-tbsync.c +++ b/arch/powerpc/kernel/smp-tbsync.c @@ -9,7 +9,6 @@  #include <linux/sched.h>  #include <linux/smp.h>  #include <linux/unistd.h> -#include <linux/init.h>  #include <linux/slab.h>  #include <linux/atomic.h>  #include <asm/smp.h> diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 8e59abc237d..1007fb802e6 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -35,6 +35,8 @@  #include <asm/ptrace.h>  #include <linux/atomic.h>  #include <asm/irq.h> +#include <asm/hw_irq.h> +#include <asm/kvm_ppc.h>  #include <asm/page.h>  #include <asm/pgtable.h>  #include <asm/prom.h> @@ -145,9 +147,9 @@ static irqreturn_t reschedule_action(int irq, void *data)  	return IRQ_HANDLED;  } -static irqreturn_t call_function_single_action(int irq, void *data) +static irqreturn_t tick_broadcast_ipi_action(int irq, void *data)  { -	generic_smp_call_function_single_interrupt(); +	tick_broadcast_ipi_handler();  	return IRQ_HANDLED;  } @@ -168,14 +170,14 @@ static irqreturn_t debug_ipi_action(int irq, void *data)  static irq_handler_t smp_ipi_action[] = {  	[PPC_MSG_CALL_FUNCTION] =  call_function_action,  	[PPC_MSG_RESCHEDULE] = reschedule_action, -	[PPC_MSG_CALL_FUNC_SINGLE] = call_function_single_action, +	[PPC_MSG_TICK_BROADCAST] = tick_broadcast_ipi_action,  	[PPC_MSG_DEBUGGER_BREAK] = debug_ipi_action,  };  const char *smp_ipi_name[] = {  	[PPC_MSG_CALL_FUNCTION] =  "ipi call function",  	[PPC_MSG_RESCHEDULE] = "ipi reschedule", -	[PPC_MSG_CALL_FUNC_SINGLE] = "ipi call function single", +	[PPC_MSG_TICK_BROADCAST] = "ipi tick-broadcast",  	[PPC_MSG_DEBUGGER_BREAK] = "ipi debugger",  }; @@ -251,8 +253,8 @@ irqreturn_t smp_ipi_demux(void)  			generic_smp_call_function_interrupt();  		if (all & IPI_MESSAGE(PPC_MSG_RESCHEDULE))  			scheduler_ipi(); -		if (all & IPI_MESSAGE(PPC_MSG_CALL_FUNC_SINGLE)) -			generic_smp_call_function_single_interrupt(); +		if (all & IPI_MESSAGE(PPC_MSG_TICK_BROADCAST)) +			tick_broadcast_ipi_handler();  		if (all & IPI_MESSAGE(PPC_MSG_DEBUGGER_BREAK))  			debug_ipi_action(0, NULL);  	} while (info->messages); @@ -280,7 +282,7 @@ EXPORT_SYMBOL_GPL(smp_send_reschedule);  void arch_send_call_function_single_ipi(int cpu)  { -	do_message_pass(cpu, PPC_MSG_CALL_FUNC_SINGLE); +	do_message_pass(cpu, PPC_MSG_CALL_FUNCTION);  }  void arch_send_call_function_ipi_mask(const struct cpumask *mask) @@ -291,6 +293,16 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask)  		do_message_pass(cpu, PPC_MSG_CALL_FUNCTION);  } +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST +void tick_broadcast(const struct cpumask *mask) +{ +	unsigned int cpu; + +	for_each_cpu(cpu, mask) +		do_message_pass(cpu, PPC_MSG_TICK_BROADCAST); +} +#endif +  #if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)  void smp_send_debugger_break(void)  { @@ -369,13 +381,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus)  	cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid));  	cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid)); -	if (smp_ops) -		if (smp_ops->probe) -			max_cpus = smp_ops->probe(); -		else -			max_cpus = NR_CPUS; -	else -		max_cpus = 1; +	if (smp_ops && smp_ops->probe) +		smp_ops->probe();  }  void smp_prepare_boot_cpu(void) @@ -384,6 +391,7 @@ void smp_prepare_boot_cpu(void)  #ifdef CONFIG_PPC64  	paca[boot_cpuid].__current = current;  #endif +	set_numa_node(numa_cpu_lookup_table[boot_cpuid]);  	current_set[boot_cpuid] = task_thread_info(current);  } @@ -451,38 +459,9 @@ int generic_check_cpu_restart(unsigned int cpu)  	return per_cpu(cpu_state, cpu) == CPU_UP_PREPARE;  } -static atomic_t secondary_inhibit_count; - -/* - * Don't allow secondary CPU threads to come online - */ -void inhibit_secondary_onlining(void) -{ -	/* -	 * This makes secondary_inhibit_count stable during cpu -	 * online/offline operations. -	 */ -	get_online_cpus(); - -	atomic_inc(&secondary_inhibit_count); -	put_online_cpus(); -} -EXPORT_SYMBOL_GPL(inhibit_secondary_onlining); - -/* - * Allow secondary CPU threads to come online again - */ -void uninhibit_secondary_onlining(void) -{ -	get_online_cpus(); -	atomic_dec(&secondary_inhibit_count); -	put_online_cpus(); -} -EXPORT_SYMBOL_GPL(uninhibit_secondary_onlining); - -static int secondaries_inhibited(void) +static bool secondaries_inhibited(void)  { -	return atomic_read(&secondary_inhibit_count); +	return kvm_hv_mode_active();  }  #else /* HOTPLUG_CPU */ @@ -511,7 +490,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)  	 * Don't allow secondary threads to come online if inhibited  	 */  	if (threads_per_core > 1 && secondaries_inhibited() && -	    cpu % threads_per_core != 0) +	    cpu_thread_in_subcore(cpu))  		return -EBUSY;  	if (smp_ops == NULL || @@ -580,7 +559,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)  int cpu_to_core_id(int cpu)  {  	struct device_node *np; -	const int *reg; +	const __be32 *reg;  	int id = -1;  	np = of_get_cpu_node(cpu, NULL); @@ -591,28 +570,12 @@ int cpu_to_core_id(int cpu)  	if (!reg)  		goto out; -	id = *reg; +	id = be32_to_cpup(reg);  out:  	of_node_put(np);  	return id;  } -/* Return the value of the chip-id property corresponding - * to the given logical cpu. - */ -int cpu_to_chip_id(int cpu) -{ -	struct device_node *np; - -	np = of_get_cpu_node(cpu, NULL); -	if (!np) -		return -1; - -	of_node_put(np); -	return of_get_ibm_chip_id(np); -} -EXPORT_SYMBOL(cpu_to_chip_id); -  /* Helper routines for cpu to core mapping */  int cpu_core_index_of_thread(int cpu)  { @@ -760,6 +723,12 @@ void start_secondary(void *unused)  	}  	traverse_core_siblings(cpu, true); +	/* +	 * numa_node_id() works after this. +	 */ +	set_numa_node(numa_cpu_lookup_table[cpu]); +	set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu])); +  	smp_wmb();  	notify_cpu_starting(cpu);  	set_cpu_online(cpu, true); @@ -776,6 +745,28 @@ int setup_profiling_timer(unsigned int multiplier)  	return 0;  } +#ifdef CONFIG_SCHED_SMT +/* cpumask of CPUs with asymetric SMT dependancy */ +static int powerpc_smt_flags(void) +{ +	int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES; + +	if (cpu_has_feature(CPU_FTR_ASYM_SMT)) { +		printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n"); +		flags |= SD_ASYM_PACKING; +	} +	return flags; +} +#endif + +static struct sched_domain_topology_level powerpc_topology[] = { +#ifdef CONFIG_SCHED_SMT +	{ cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) }, +#endif +	{ cpu_cpu_mask, SD_INIT_NAME(DIE) }, +	{ NULL, }, +}; +  void __init smp_cpus_done(unsigned int max_cpus)  {  	cpumask_var_t old_mask; @@ -800,15 +791,8 @@ void __init smp_cpus_done(unsigned int max_cpus)  	dump_numa_cpu_topology(); -} +	set_sched_topology(powerpc_topology); -int arch_sd_sibling_asym_packing(void) -{ -	if (cpu_has_feature(CPU_FTR_ASYM_SMT)) { -		printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n"); -		return SD_ASYM_PACKING; -	} -	return 0;  }  #ifdef CONFIG_HOTPLUG_CPU @@ -844,18 +828,6 @@ void __cpu_die(unsigned int cpu)  		smp_ops->cpu_die(cpu);  } -static DEFINE_MUTEX(powerpc_cpu_hotplug_driver_mutex); - -void cpu_hotplug_driver_lock() -{ -	mutex_lock(&powerpc_cpu_hotplug_driver_mutex); -} - -void cpu_hotplug_driver_unlock() -{ -	mutex_unlock(&powerpc_cpu_hotplug_driver_mutex); -} -  void cpu_die(void)  {  	if (ppc_md.cpu_die) diff --git a/arch/powerpc/kernel/swsusp_asm64.S b/arch/powerpc/kernel/swsusp_asm64.S index 22045984835..988f38dced0 100644 --- a/arch/powerpc/kernel/swsusp_asm64.S +++ b/arch/powerpc/kernel/swsusp_asm64.S @@ -114,7 +114,9 @@ _GLOBAL(swsusp_arch_suspend)  	SAVE_SPECIAL(MSR)  	SAVE_SPECIAL(XER)  #ifdef CONFIG_PPC_BOOK3S_64 +BEGIN_FW_FTR_SECTION  	SAVE_SPECIAL(SDR1) +END_FW_FTR_SECTION_IFCLR(FW_FEATURE_LPAR)  #else  	SAVE_SPR(TCR) @@ -231,7 +233,9 @@ nothing_to_copy:  	/* can't use RESTORE_SPECIAL(MSR) */  	ld	r0, SL_MSR(r11)  	mtmsrd	r0, 0 +BEGIN_FW_FTR_SECTION  	RESTORE_SPECIAL(SDR1) +END_FW_FTR_SECTION_IFCLR(FW_FEATURE_LPAR)  #else  	/* Restore SPRG1, be used to save paca */  	ld	r0, SL_SPRG1(r11) diff --git a/arch/powerpc/kernel/swsusp_booke.S b/arch/powerpc/kernel/swsusp_booke.S index 0f204053e5b..553c1405ee0 100644 --- a/arch/powerpc/kernel/swsusp_booke.S +++ b/arch/powerpc/kernel/swsusp_booke.S @@ -74,21 +74,21 @@ _GLOBAL(swsusp_arch_suspend)  	bne	1b  	/* Save SPRGs */ -	mfsprg	r4,0 +	mfspr	r4,SPRN_SPRG0  	stw	r4,SL_SPRG0(r11) -	mfsprg	r4,1 +	mfspr	r4,SPRN_SPRG1  	stw	r4,SL_SPRG1(r11) -	mfsprg	r4,2 +	mfspr	r4,SPRN_SPRG2  	stw	r4,SL_SPRG2(r11) -	mfsprg	r4,3 +	mfspr	r4,SPRN_SPRG3  	stw	r4,SL_SPRG3(r11) -	mfsprg	r4,4 +	mfspr	r4,SPRN_SPRG4  	stw	r4,SL_SPRG4(r11) -	mfsprg	r4,5 +	mfspr	r4,SPRN_SPRG5  	stw	r4,SL_SPRG5(r11) -	mfsprg	r4,6 +	mfspr	r4,SPRN_SPRG6  	stw	r4,SL_SPRG6(r11) -	mfsprg	r4,7 +	mfspr	r4,SPRN_SPRG7  	stw	r4,SL_SPRG7(r11)  	/* Call the low level suspend stuff (we should probably have made @@ -150,21 +150,21 @@ _GLOBAL(swsusp_arch_resume)  	bl	_tlbil_all  	lwz	r4,SL_SPRG0(r11) -	mtsprg	0,r4 +	mtspr	SPRN_SPRG0,r4  	lwz	r4,SL_SPRG1(r11) -	mtsprg	1,r4 +	mtspr	SPRN_SPRG1,r4  	lwz	r4,SL_SPRG2(r11) -	mtsprg	2,r4 +	mtspr	SPRN_SPRG2,r4  	lwz	r4,SL_SPRG3(r11) -	mtsprg	3,r4 +	mtspr	SPRN_SPRG3,r4  	lwz	r4,SL_SPRG4(r11) -	mtsprg	4,r4 +	mtspr	SPRN_SPRG4,r4  	lwz	r4,SL_SPRG5(r11) -	mtsprg	5,r4 +	mtspr	SPRN_SPRG5,r4  	lwz	r4,SL_SPRG6(r11) -	mtsprg	6,r4 +	mtspr	SPRN_SPRG6,r4  	lwz	r4,SL_SPRG7(r11) -	mtsprg	7,r4 +	mtspr	SPRN_SPRG7,r4  	/* restore the MSR */  	lwz	r3,SL_MSR(r11) diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c index 4e3cc47f26b..cd9be9aa016 100644 --- a/arch/powerpc/kernel/syscalls.c +++ b/arch/powerpc/kernel/syscalls.c @@ -34,7 +34,6 @@  #include <linux/ipc.h>  #include <linux/utsname.h>  #include <linux/file.h> -#include <linux/init.h>  #include <linux/personality.h>  #include <asm/uaccess.h> diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 27a90b99ef6..67fd2fd2620 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -17,6 +17,7 @@  #include <asm/machdep.h>  #include <asm/smp.h>  #include <asm/pmc.h> +#include <asm/firmware.h>  #include "cacheinfo.h" @@ -50,8 +51,6 @@ static ssize_t store_smt_snooze_delay(struct device *dev,  		return -EINVAL;  	per_cpu(smt_snooze_delay, cpu->dev.id) = snooze; -	update_smt_snooze_delay(cpu->dev.id, snooze); -  	return count;  } @@ -85,6 +84,304 @@ __setup("smt-snooze-delay=", setup_smt_snooze_delay);  #endif /* CONFIG_PPC64 */ +#ifdef CONFIG_PPC_FSL_BOOK3E +#define MAX_BIT				63 + +static u64 pw20_wt; +static u64 altivec_idle_wt; + +static unsigned int get_idle_ticks_bit(u64 ns) +{ +	u64 cycle; + +	if (ns >= 10000) +		cycle = div_u64(ns + 500, 1000) * tb_ticks_per_usec; +	else +		cycle = div_u64(ns * tb_ticks_per_usec, 1000); + +	if (!cycle) +		return 0; + +	return ilog2(cycle); +} + +static void do_show_pwrmgtcr0(void *val) +{ +	u32 *value = val; + +	*value = mfspr(SPRN_PWRMGTCR0); +} + +static ssize_t show_pw20_state(struct device *dev, +				struct device_attribute *attr, char *buf) +{ +	u32 value; +	unsigned int cpu = dev->id; + +	smp_call_function_single(cpu, do_show_pwrmgtcr0, &value, 1); + +	value &= PWRMGTCR0_PW20_WAIT; + +	return sprintf(buf, "%u\n", value ? 1 : 0); +} + +static void do_store_pw20_state(void *val) +{ +	u32 *value = val; +	u32 pw20_state; + +	pw20_state = mfspr(SPRN_PWRMGTCR0); + +	if (*value) +		pw20_state |= PWRMGTCR0_PW20_WAIT; +	else +		pw20_state &= ~PWRMGTCR0_PW20_WAIT; + +	mtspr(SPRN_PWRMGTCR0, pw20_state); +} + +static ssize_t store_pw20_state(struct device *dev, +				struct device_attribute *attr, +				const char *buf, size_t count) +{ +	u32 value; +	unsigned int cpu = dev->id; + +	if (kstrtou32(buf, 0, &value)) +		return -EINVAL; + +	if (value > 1) +		return -EINVAL; + +	smp_call_function_single(cpu, do_store_pw20_state, &value, 1); + +	return count; +} + +static ssize_t show_pw20_wait_time(struct device *dev, +				struct device_attribute *attr, char *buf) +{ +	u32 value; +	u64 tb_cycle = 1; +	u64 time; + +	unsigned int cpu = dev->id; + +	if (!pw20_wt) { +		smp_call_function_single(cpu, do_show_pwrmgtcr0, &value, 1); +		value = (value & PWRMGTCR0_PW20_ENT) >> +					PWRMGTCR0_PW20_ENT_SHIFT; + +		tb_cycle = (tb_cycle << (MAX_BIT - value + 1)); +		/* convert ms to ns */ +		if (tb_ticks_per_usec > 1000) { +			time = div_u64(tb_cycle, tb_ticks_per_usec / 1000); +		} else { +			u32 rem_us; + +			time = div_u64_rem(tb_cycle, tb_ticks_per_usec, +						&rem_us); +			time = time * 1000 + rem_us * 1000 / tb_ticks_per_usec; +		} +	} else { +		time = pw20_wt; +	} + +	return sprintf(buf, "%llu\n", time > 0 ? time : 0); +} + +static void set_pw20_wait_entry_bit(void *val) +{ +	u32 *value = val; +	u32 pw20_idle; + +	pw20_idle = mfspr(SPRN_PWRMGTCR0); + +	/* Set Automatic PW20 Core Idle Count */ +	/* clear count */ +	pw20_idle &= ~PWRMGTCR0_PW20_ENT; + +	/* set count */ +	pw20_idle |= ((MAX_BIT - *value) << PWRMGTCR0_PW20_ENT_SHIFT); + +	mtspr(SPRN_PWRMGTCR0, pw20_idle); +} + +static ssize_t store_pw20_wait_time(struct device *dev, +				struct device_attribute *attr, +				const char *buf, size_t count) +{ +	u32 entry_bit; +	u64 value; + +	unsigned int cpu = dev->id; + +	if (kstrtou64(buf, 0, &value)) +		return -EINVAL; + +	if (!value) +		return -EINVAL; + +	entry_bit = get_idle_ticks_bit(value); +	if (entry_bit > MAX_BIT) +		return -EINVAL; + +	pw20_wt = value; + +	smp_call_function_single(cpu, set_pw20_wait_entry_bit, +				&entry_bit, 1); + +	return count; +} + +static ssize_t show_altivec_idle(struct device *dev, +				struct device_attribute *attr, char *buf) +{ +	u32 value; +	unsigned int cpu = dev->id; + +	smp_call_function_single(cpu, do_show_pwrmgtcr0, &value, 1); + +	value &= PWRMGTCR0_AV_IDLE_PD_EN; + +	return sprintf(buf, "%u\n", value ? 1 : 0); +} + +static void do_store_altivec_idle(void *val) +{ +	u32 *value = val; +	u32 altivec_idle; + +	altivec_idle = mfspr(SPRN_PWRMGTCR0); + +	if (*value) +		altivec_idle |= PWRMGTCR0_AV_IDLE_PD_EN; +	else +		altivec_idle &= ~PWRMGTCR0_AV_IDLE_PD_EN; + +	mtspr(SPRN_PWRMGTCR0, altivec_idle); +} + +static ssize_t store_altivec_idle(struct device *dev, +				struct device_attribute *attr, +				const char *buf, size_t count) +{ +	u32 value; +	unsigned int cpu = dev->id; + +	if (kstrtou32(buf, 0, &value)) +		return -EINVAL; + +	if (value > 1) +		return -EINVAL; + +	smp_call_function_single(cpu, do_store_altivec_idle, &value, 1); + +	return count; +} + +static ssize_t show_altivec_idle_wait_time(struct device *dev, +				struct device_attribute *attr, char *buf) +{ +	u32 value; +	u64 tb_cycle = 1; +	u64 time; + +	unsigned int cpu = dev->id; + +	if (!altivec_idle_wt) { +		smp_call_function_single(cpu, do_show_pwrmgtcr0, &value, 1); +		value = (value & PWRMGTCR0_AV_IDLE_CNT) >> +					PWRMGTCR0_AV_IDLE_CNT_SHIFT; + +		tb_cycle = (tb_cycle << (MAX_BIT - value + 1)); +		/* convert ms to ns */ +		if (tb_ticks_per_usec > 1000) { +			time = div_u64(tb_cycle, tb_ticks_per_usec / 1000); +		} else { +			u32 rem_us; + +			time = div_u64_rem(tb_cycle, tb_ticks_per_usec, +						&rem_us); +			time = time * 1000 + rem_us * 1000 / tb_ticks_per_usec; +		} +	} else { +		time = altivec_idle_wt; +	} + +	return sprintf(buf, "%llu\n", time > 0 ? time : 0); +} + +static void set_altivec_idle_wait_entry_bit(void *val) +{ +	u32 *value = val; +	u32 altivec_idle; + +	altivec_idle = mfspr(SPRN_PWRMGTCR0); + +	/* Set Automatic AltiVec Idle Count */ +	/* clear count */ +	altivec_idle &= ~PWRMGTCR0_AV_IDLE_CNT; + +	/* set count */ +	altivec_idle |= ((MAX_BIT - *value) << PWRMGTCR0_AV_IDLE_CNT_SHIFT); + +	mtspr(SPRN_PWRMGTCR0, altivec_idle); +} + +static ssize_t store_altivec_idle_wait_time(struct device *dev, +				struct device_attribute *attr, +				const char *buf, size_t count) +{ +	u32 entry_bit; +	u64 value; + +	unsigned int cpu = dev->id; + +	if (kstrtou64(buf, 0, &value)) +		return -EINVAL; + +	if (!value) +		return -EINVAL; + +	entry_bit = get_idle_ticks_bit(value); +	if (entry_bit > MAX_BIT) +		return -EINVAL; + +	altivec_idle_wt = value; + +	smp_call_function_single(cpu, set_altivec_idle_wait_entry_bit, +				&entry_bit, 1); + +	return count; +} + +/* + * Enable/Disable interface: + * 0, disable. 1, enable. + */ +static DEVICE_ATTR(pw20_state, 0600, show_pw20_state, store_pw20_state); +static DEVICE_ATTR(altivec_idle, 0600, show_altivec_idle, store_altivec_idle); + +/* + * Set wait time interface:(Nanosecond) + * Example: Base on TBfreq is 41MHZ. + * 1~48(ns): TB[63] + * 49~97(ns): TB[62] + * 98~195(ns): TB[61] + * 196~390(ns): TB[60] + * 391~780(ns): TB[59] + * 781~1560(ns): TB[58] + * ... + */ +static DEVICE_ATTR(pw20_wait_time, 0600, +			show_pw20_wait_time, +			store_pw20_wait_time); +static DEVICE_ATTR(altivec_idle_wait_time, 0600, +			show_altivec_idle_wait_time, +			store_altivec_idle_wait_time); +#endif +  /*   * Enabling PMCs will slow partition context switch times so we only do   * it the first time we write to the PMCs. @@ -107,16 +404,18 @@ void ppc_enable_pmcs(void)  }  EXPORT_SYMBOL(ppc_enable_pmcs); -#define SYSFS_PMCSETUP(NAME, ADDRESS) \ +#define __SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, EXTRA) \  static void read_##NAME(void *val) \  { \  	*(unsigned long *)val = mfspr(ADDRESS);	\  } \  static void write_##NAME(void *val) \  { \ -	ppc_enable_pmcs(); \ +	EXTRA; \  	mtspr(ADDRESS, *(unsigned long *)val);	\ -} \ +} + +#define __SYSFS_SPRSETUP_SHOW_STORE(NAME) \  static ssize_t show_##NAME(struct device *dev, \  			struct device_attribute *attr, \  			char *buf) \ @@ -139,6 +438,15 @@ static ssize_t __used \  	return count; \  } +#define SYSFS_PMCSETUP(NAME, ADDRESS) \ +	__SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, ppc_enable_pmcs()) \ +	__SYSFS_SPRSETUP_SHOW_STORE(NAME) +#define SYSFS_SPRSETUP(NAME, ADDRESS) \ +	__SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, ) \ +	__SYSFS_SPRSETUP_SHOW_STORE(NAME) + +#define SYSFS_SPRSETUP_SHOW_STORE(NAME) \ +	__SYSFS_SPRSETUP_SHOW_STORE(NAME)  /* Let's define all possible registers, we'll only hook up the ones   * that are implemented on the current processor @@ -174,34 +482,50 @@ SYSFS_PMCSETUP(pmc7, SPRN_PMC7);  SYSFS_PMCSETUP(pmc8, SPRN_PMC8);  SYSFS_PMCSETUP(mmcra, SPRN_MMCRA); -SYSFS_PMCSETUP(purr, SPRN_PURR); -SYSFS_PMCSETUP(spurr, SPRN_SPURR); -SYSFS_PMCSETUP(dscr, SPRN_DSCR); -SYSFS_PMCSETUP(pir, SPRN_PIR); +SYSFS_SPRSETUP(purr, SPRN_PURR); +SYSFS_SPRSETUP(spurr, SPRN_SPURR); +SYSFS_SPRSETUP(pir, SPRN_PIR); +/* +  Lets only enable read for phyp resources and +  enable write when needed with a separate function. +  Lets be conservative and default to pseries. +*/  static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra);  static DEVICE_ATTR(spurr, 0400, show_spurr, NULL); -static DEVICE_ATTR(dscr, 0600, show_dscr, store_dscr); -static DEVICE_ATTR(purr, 0600, show_purr, store_purr); +static DEVICE_ATTR(purr, 0400, show_purr, store_purr);  static DEVICE_ATTR(pir, 0400, show_pir, NULL); -unsigned long dscr_default = 0; -EXPORT_SYMBOL(dscr_default); +static unsigned long dscr_default; -static ssize_t show_dscr_default(struct device *dev, -		struct device_attribute *attr, char *buf) +static void read_dscr(void *val)  { -	return sprintf(buf, "%lx\n", dscr_default); +	*(unsigned long *)val = get_paca()->dscr_default;  } -static void update_dscr(void *dummy) +static void write_dscr(void *val)  { +	get_paca()->dscr_default = *(unsigned long *)val;  	if (!current->thread.dscr_inherit) { -		current->thread.dscr = dscr_default; -		mtspr(SPRN_DSCR, dscr_default); +		current->thread.dscr = *(unsigned long *)val; +		mtspr(SPRN_DSCR, *(unsigned long *)val);  	}  } +SYSFS_SPRSETUP_SHOW_STORE(dscr); +static DEVICE_ATTR(dscr, 0600, show_dscr, store_dscr); + +static void add_write_permission_dev_attr(struct device_attribute *attr) +{ +	attr->attr.mode |= 0200; +} + +static ssize_t show_dscr_default(struct device *dev, +		struct device_attribute *attr, char *buf) +{ +	return sprintf(buf, "%lx\n", dscr_default); +} +  static ssize_t __used store_dscr_default(struct device *dev,  		struct device_attribute *attr, const char *buf,  		size_t count) @@ -214,7 +538,7 @@ static ssize_t __used store_dscr_default(struct device *dev,  		return -EINVAL;  	dscr_default = val; -	on_each_cpu(update_dscr, NULL, 1); +	on_each_cpu(write_dscr, &val, 1);  	return count;  } @@ -238,34 +562,34 @@ SYSFS_PMCSETUP(pa6t_pmc3, SPRN_PA6T_PMC3);  SYSFS_PMCSETUP(pa6t_pmc4, SPRN_PA6T_PMC4);  SYSFS_PMCSETUP(pa6t_pmc5, SPRN_PA6T_PMC5);  #ifdef CONFIG_DEBUG_KERNEL -SYSFS_PMCSETUP(hid0, SPRN_HID0); -SYSFS_PMCSETUP(hid1, SPRN_HID1); -SYSFS_PMCSETUP(hid4, SPRN_HID4); -SYSFS_PMCSETUP(hid5, SPRN_HID5); -SYSFS_PMCSETUP(ima0, SPRN_PA6T_IMA0); -SYSFS_PMCSETUP(ima1, SPRN_PA6T_IMA1); -SYSFS_PMCSETUP(ima2, SPRN_PA6T_IMA2); -SYSFS_PMCSETUP(ima3, SPRN_PA6T_IMA3); -SYSFS_PMCSETUP(ima4, SPRN_PA6T_IMA4); -SYSFS_PMCSETUP(ima5, SPRN_PA6T_IMA5); -SYSFS_PMCSETUP(ima6, SPRN_PA6T_IMA6); -SYSFS_PMCSETUP(ima7, SPRN_PA6T_IMA7); -SYSFS_PMCSETUP(ima8, SPRN_PA6T_IMA8); -SYSFS_PMCSETUP(ima9, SPRN_PA6T_IMA9); -SYSFS_PMCSETUP(imaat, SPRN_PA6T_IMAAT); -SYSFS_PMCSETUP(btcr, SPRN_PA6T_BTCR); -SYSFS_PMCSETUP(pccr, SPRN_PA6T_PCCR); -SYSFS_PMCSETUP(rpccr, SPRN_PA6T_RPCCR); -SYSFS_PMCSETUP(der, SPRN_PA6T_DER); -SYSFS_PMCSETUP(mer, SPRN_PA6T_MER); -SYSFS_PMCSETUP(ber, SPRN_PA6T_BER); -SYSFS_PMCSETUP(ier, SPRN_PA6T_IER); -SYSFS_PMCSETUP(sier, SPRN_PA6T_SIER); -SYSFS_PMCSETUP(siar, SPRN_PA6T_SIAR); -SYSFS_PMCSETUP(tsr0, SPRN_PA6T_TSR0); -SYSFS_PMCSETUP(tsr1, SPRN_PA6T_TSR1); -SYSFS_PMCSETUP(tsr2, SPRN_PA6T_TSR2); -SYSFS_PMCSETUP(tsr3, SPRN_PA6T_TSR3); +SYSFS_SPRSETUP(hid0, SPRN_HID0); +SYSFS_SPRSETUP(hid1, SPRN_HID1); +SYSFS_SPRSETUP(hid4, SPRN_HID4); +SYSFS_SPRSETUP(hid5, SPRN_HID5); +SYSFS_SPRSETUP(ima0, SPRN_PA6T_IMA0); +SYSFS_SPRSETUP(ima1, SPRN_PA6T_IMA1); +SYSFS_SPRSETUP(ima2, SPRN_PA6T_IMA2); +SYSFS_SPRSETUP(ima3, SPRN_PA6T_IMA3); +SYSFS_SPRSETUP(ima4, SPRN_PA6T_IMA4); +SYSFS_SPRSETUP(ima5, SPRN_PA6T_IMA5); +SYSFS_SPRSETUP(ima6, SPRN_PA6T_IMA6); +SYSFS_SPRSETUP(ima7, SPRN_PA6T_IMA7); +SYSFS_SPRSETUP(ima8, SPRN_PA6T_IMA8); +SYSFS_SPRSETUP(ima9, SPRN_PA6T_IMA9); +SYSFS_SPRSETUP(imaat, SPRN_PA6T_IMAAT); +SYSFS_SPRSETUP(btcr, SPRN_PA6T_BTCR); +SYSFS_SPRSETUP(pccr, SPRN_PA6T_PCCR); +SYSFS_SPRSETUP(rpccr, SPRN_PA6T_RPCCR); +SYSFS_SPRSETUP(der, SPRN_PA6T_DER); +SYSFS_SPRSETUP(mer, SPRN_PA6T_MER); +SYSFS_SPRSETUP(ber, SPRN_PA6T_BER); +SYSFS_SPRSETUP(ier, SPRN_PA6T_IER); +SYSFS_SPRSETUP(sier, SPRN_PA6T_SIER); +SYSFS_SPRSETUP(siar, SPRN_PA6T_SIAR); +SYSFS_SPRSETUP(tsr0, SPRN_PA6T_TSR0); +SYSFS_SPRSETUP(tsr1, SPRN_PA6T_TSR1); +SYSFS_SPRSETUP(tsr2, SPRN_PA6T_TSR2); +SYSFS_SPRSETUP(tsr3, SPRN_PA6T_TSR3);  #endif /* CONFIG_DEBUG_KERNEL */  #endif /* HAS_PPC_PMC_PA6T */ @@ -394,8 +718,11 @@ static void register_cpu_online(unsigned int cpu)  	if (cpu_has_feature(CPU_FTR_MMCRA))  		device_create_file(s, &dev_attr_mmcra); -	if (cpu_has_feature(CPU_FTR_PURR)) +	if (cpu_has_feature(CPU_FTR_PURR)) { +		if (!firmware_has_feature(FW_FEATURE_LPAR)) +			add_write_permission_dev_attr(&dev_attr_purr);  		device_create_file(s, &dev_attr_purr); +	}  	if (cpu_has_feature(CPU_FTR_SPURR))  		device_create_file(s, &dev_attr_spurr); @@ -407,6 +734,15 @@ static void register_cpu_online(unsigned int cpu)  		device_create_file(s, &dev_attr_pir);  #endif /* CONFIG_PPC64 */ +#ifdef CONFIG_PPC_FSL_BOOK3E +	if (PVR_VER(cur_cpu_spec->pvr_value) == PVR_VER_E6500) { +		device_create_file(s, &dev_attr_pw20_state); +		device_create_file(s, &dev_attr_pw20_wait_time); + +		device_create_file(s, &dev_attr_altivec_idle); +		device_create_file(s, &dev_attr_altivec_idle_wait_time); +	} +#endif  	cacheinfo_cpu_online(cpu);  } @@ -479,6 +815,15 @@ static void unregister_cpu_online(unsigned int cpu)  		device_remove_file(s, &dev_attr_pir);  #endif /* CONFIG_PPC64 */ +#ifdef CONFIG_PPC_FSL_BOOK3E +	if (PVR_VER(cur_cpu_spec->pvr_value) == PVR_VER_E6500) { +		device_remove_file(s, &dev_attr_pw20_state); +		device_remove_file(s, &dev_attr_pw20_wait_time); + +		device_remove_file(s, &dev_attr_altivec_idle); +		device_remove_file(s, &dev_attr_altivec_idle_wait_time); +	} +#endif  	cacheinfo_cpu_offline(cpu);  } @@ -643,7 +988,8 @@ static int __init topology_init(void)  	int cpu;  	register_nodes(); -	register_cpu_notifier(&sysfs_cpu_nb); + +	cpu_notifier_register_begin();  	for_each_possible_cpu(cpu) {  		struct cpu *c = &per_cpu(cpu_devices, cpu); @@ -667,6 +1013,11 @@ static int __init topology_init(void)  		if (cpu_online(cpu))  			register_cpu_online(cpu);  	} + +	__register_cpu_notifier(&sysfs_cpu_nb); + +	cpu_notifier_register_done(); +  #ifdef CONFIG_PPC64  	sysfs_create_dscr_default();  #endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S index 93219c34af3..895c50ca943 100644 --- a/arch/powerpc/kernel/systbl.S +++ b/arch/powerpc/kernel/systbl.S @@ -17,12 +17,12 @@  #include <asm/ppc_asm.h>  #ifdef CONFIG_PPC64 -#define SYSCALL(func)		.llong	.sys_##func,.sys_##func -#define COMPAT_SYS(func)	.llong	.sys_##func,.compat_sys_##func -#define PPC_SYS(func)		.llong	.ppc_##func,.ppc_##func -#define OLDSYS(func)		.llong	.sys_ni_syscall,.sys_ni_syscall -#define SYS32ONLY(func)		.llong	.sys_ni_syscall,.compat_sys_##func -#define SYSX(f, f3264, f32)	.llong	.f,.f3264 +#define SYSCALL(func)		.llong	DOTSYM(sys_##func),DOTSYM(sys_##func) +#define COMPAT_SYS(func)	.llong	DOTSYM(sys_##func),DOTSYM(compat_sys_##func) +#define PPC_SYS(func)		.llong	DOTSYM(ppc_##func),DOTSYM(ppc_##func) +#define OLDSYS(func)		.llong	DOTSYM(sys_ni_syscall),DOTSYM(sys_ni_syscall) +#define SYS32ONLY(func)		.llong	DOTSYM(sys_ni_syscall),DOTSYM(compat_sys_##func) +#define SYSX(f, f3264, f32)	.llong	DOTSYM(f),DOTSYM(f3264)  #else  #define SYSCALL(func)		.long	sys_##func  #define COMPAT_SYS(func)	.long	sys_##func @@ -36,6 +36,8 @@  #define PPC_SYS_SPU(func)	PPC_SYS(func)  #define SYSX_SPU(f, f3264, f32)	SYSX(f, f3264, f32) +.section .rodata,"a" +  #ifdef CONFIG_PPC64  #define sys_sigpending	sys_ni_syscall  #define sys_old_getrlimit sys_ni_syscall @@ -43,5 +45,7 @@  	.p2align	3  #endif -_GLOBAL(sys_call_table) +.globl sys_call_table +sys_call_table: +  #include <asm/systbl.h> diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 192b051df97..9fff9cdcc51 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -42,6 +42,7 @@  #include <linux/timex.h>  #include <linux/kernel_stat.h>  #include <linux/time.h> +#include <linux/clockchips.h>  #include <linux/init.h>  #include <linux/profile.h>  #include <linux/cpu.h> @@ -106,7 +107,7 @@ struct clock_event_device decrementer_clockevent = {  	.irq            = 0,  	.set_next_event = decrementer_set_next_event,  	.set_mode       = decrementer_set_mode, -	.features       = CLOCK_EVT_FEAT_ONESHOT, +	.features       = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP,  };  EXPORT_SYMBOL(decrementer_clockevent); @@ -213,8 +214,6 @@ static u64 scan_dispatch_log(u64 stop_tb)  	if (i == be64_to_cpu(vpa->dtl_idx))  		return 0;  	while (i < be64_to_cpu(vpa->dtl_idx)) { -		if (dtl_consumer) -			dtl_consumer(dtl, i);  		dtb = be64_to_cpu(dtl->timebase);  		tb_delta = be32_to_cpu(dtl->enqueue_to_dispatch_time) +  			be32_to_cpu(dtl->ready_to_enqueue_time); @@ -227,6 +226,8 @@ static u64 scan_dispatch_log(u64 stop_tb)  		}  		if (dtb > stop_tb)  			break; +		if (dtl_consumer) +			dtl_consumer(dtl, i);  		stolen += tb_delta;  		++i;  		++dtl; @@ -478,6 +479,47 @@ void arch_irq_work_raise(void)  #endif /* CONFIG_IRQ_WORK */ +void __timer_interrupt(void) +{ +	struct pt_regs *regs = get_irq_regs(); +	u64 *next_tb = &__get_cpu_var(decrementers_next_tb); +	struct clock_event_device *evt = &__get_cpu_var(decrementers); +	u64 now; + +	trace_timer_interrupt_entry(regs); + +	if (test_irq_work_pending()) { +		clear_irq_work_pending(); +		irq_work_run(); +	} + +	now = get_tb_or_rtc(); +	if (now >= *next_tb) { +		*next_tb = ~(u64)0; +		if (evt->event_handler) +			evt->event_handler(evt); +		__get_cpu_var(irq_stat).timer_irqs_event++; +	} else { +		now = *next_tb - now; +		if (now <= DECREMENTER_MAX) +			set_dec((int)now); +		/* We may have raced with new irq work */ +		if (test_irq_work_pending()) +			set_dec(1); +		__get_cpu_var(irq_stat).timer_irqs_others++; +	} + +#ifdef CONFIG_PPC64 +	/* collect purr register values often, for accurate calculations */ +	if (firmware_has_feature(FW_FEATURE_SPLPAR)) { +		struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array); +		cu->current_tb = mfspr(SPRN_PURR); +	} +#endif + +	trace_timer_interrupt_exit(regs); +} +  /*   * timer_interrupt - gets called when the decrementer overflows,   * with interrupts disabled. @@ -486,8 +528,6 @@ void timer_interrupt(struct pt_regs * regs)  {  	struct pt_regs *old_regs;  	u64 *next_tb = &__get_cpu_var(decrementers_next_tb); -	struct clock_event_device *evt = &__get_cpu_var(decrementers); -	u64 now;  	/* Ensure a positive value is written to the decrementer, or else  	 * some CPUs will continue to take decrementer exceptions. @@ -510,9 +550,8 @@ void timer_interrupt(struct pt_regs * regs)  	 */  	may_hard_irq_enable(); -	__get_cpu_var(irq_stat).timer_irqs++; -#if defined(CONFIG_PPC32) && defined(CONFIG_PMAC) +#if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC)  	if (atomic_read(&ppc_n_lost_interrupts) != 0)  		do_IRQ(regs);  #endif @@ -520,34 +559,7 @@ void timer_interrupt(struct pt_regs * regs)  	old_regs = set_irq_regs(regs);  	irq_enter(); -	trace_timer_interrupt_entry(regs); - -	if (test_irq_work_pending()) { -		clear_irq_work_pending(); -		irq_work_run(); -	} - -	now = get_tb_or_rtc(); -	if (now >= *next_tb) { -		*next_tb = ~(u64)0; -		if (evt->event_handler) -			evt->event_handler(evt); -	} else { -		now = *next_tb - now; -		if (now <= DECREMENTER_MAX) -			set_dec((int)now); -	} - -#ifdef CONFIG_PPC64 -	/* collect purr register values often, for accurate calculations */ -	if (firmware_has_feature(FW_FEATURE_SPLPAR)) { -		struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array); -		cu->current_tb = mfspr(SPRN_PURR); -	} -#endif - -	trace_timer_interrupt_exit(regs); - +	__timer_interrupt();  	irq_exit();  	set_irq_regs(old_regs);  } @@ -803,6 +815,11 @@ static int decrementer_set_next_event(unsigned long evt,  {  	__get_cpu_var(decrementers_next_tb) = get_tb_or_rtc() + evt;  	set_dec(evt); + +	/* We may have raced with new irq work */ +	if (test_irq_work_pending()) +		set_dec(1); +  	return 0;  } @@ -813,6 +830,15 @@ static void decrementer_set_mode(enum clock_event_mode mode,  		decrementer_set_next_event(DECREMENTER_MAX, dev);  } +/* Interrupt handler for the timer broadcast IPI */ +void tick_broadcast_ipi_handler(void) +{ +	u64 *next_tb = &__get_cpu_var(decrementers_next_tb); + +	*next_tb = get_tb_or_rtc(); +	__timer_interrupt(); +} +  static void register_decrementer_clockevent(int cpu)  {  	struct clock_event_device *dec = &per_cpu(decrementers, cpu); @@ -916,6 +942,7 @@ void __init time_init(void)  	clocksource_init();  	init_decrementer_clockevent(); +	tick_setup_hrtimer_broadcast();  } diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 7b60b985146..2a324f4cb1b 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -10,18 +10,18 @@  #include <asm/ppc-opcode.h>  #include <asm/ptrace.h>  #include <asm/reg.h> +#include <asm/bug.h>  #ifdef CONFIG_VSX -/* See fpu.S, this is very similar but to save/restore checkpointed FPRs/VSRs */ -#define __SAVE_32FPRS_VSRS_TRANSACT(n,c,base)	\ +/* See fpu.S, this is borrowed from there */ +#define __SAVE_32FPRS_VSRS(n,c,base)		\  BEGIN_FTR_SECTION				\  	b	2f;				\  END_FTR_SECTION_IFSET(CPU_FTR_VSX);		\ -	SAVE_32FPRS_TRANSACT(n,base);		\ +	SAVE_32FPRS(n,base);			\  	b	3f;				\ -2:	SAVE_32VSRS_TRANSACT(n,c,base);		\ +2:	SAVE_32VSRS(n,c,base);			\  3: -/* ...and this is just plain borrowed from there. */  #define __REST_32FPRS_VSRS(n,c,base)		\  BEGIN_FTR_SECTION				\  	b	2f;				\ @@ -31,18 +31,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX);		\  2:	REST_32VSRS(n,c,base);			\  3:  #else -#define __SAVE_32FPRS_VSRS_TRANSACT(n,c,base) SAVE_32FPRS_TRANSACT(n, base) -#define __REST_32FPRS_VSRS(n,c,base)	      REST_32FPRS(n, base) +#define __SAVE_32FPRS_VSRS(n,c,base)	SAVE_32FPRS(n, base) +#define __REST_32FPRS_VSRS(n,c,base)	REST_32FPRS(n, base)  #endif -#define SAVE_32FPRS_VSRS_TRANSACT(n,c,base) \ -	__SAVE_32FPRS_VSRS_TRANSACT(n,__REG_##c,__REG_##base) +#define SAVE_32FPRS_VSRS(n,c,base) \ +	__SAVE_32FPRS_VSRS(n,__REG_##c,__REG_##base)  #define REST_32FPRS_VSRS(n,c,base) \  	__REST_32FPRS_VSRS(n,__REG_##c,__REG_##base)  /* Stack frame offsets for local variables. */  #define TM_FRAME_L0	TM_FRAME_SIZE-16  #define TM_FRAME_L1	TM_FRAME_SIZE-8 -#define STACK_PARAM(x)	(48+((x)*8))  /* In order to access the TM SPRs, TM must be enabled.  So, do so: */ @@ -79,7 +78,6 @@ _GLOBAL(tm_abort)  	TABORT(R3)  	blr -  /* void tm_reclaim(struct thread_struct *thread,   *                 unsigned long orig_msr,   *		   uint8_t cause) @@ -102,14 +100,14 @@ _GLOBAL(tm_abort)  _GLOBAL(tm_reclaim)  	mfcr	r6  	mflr	r0 -	std	r6, 8(r1) +	stw	r6, 8(r1)  	std	r0, 16(r1) -	std	r2, 40(r1) +	std	r2, STK_GOT(r1)  	stdu	r1, -TM_FRAME_SIZE(r1)  	/* We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD]. */ -	std	r3, STACK_PARAM(0)(r1) +	std	r3, STK_PARAM(R3)(r1)  	SAVE_NVGPRS(r1)  	/* We need to setup MSR for VSX register save instructions.  Here we @@ -123,6 +121,7 @@ _GLOBAL(tm_reclaim)  	mr	r15, r14  	ori	r15, r15, MSR_FP  	li	r16, MSR_RI +	ori	r16, r16, MSR_EE /* IRQs hard off */  	andc	r15, r15, r16  	oris	r15, r15, MSR_VEC@h  #ifdef CONFIG_VSX @@ -151,10 +150,11 @@ _GLOBAL(tm_reclaim)  	andis.		r0, r4, MSR_VEC@h  	beq	dont_backup_vec -	SAVE_32VRS_TRANSACT(0, r6, r3)	/* r6 scratch, r3 thread */ +	addi	r7, r3, THREAD_TRANSACT_VRSTATE +	SAVE_32VRS(0, r6, r7)	/* r6 scratch, r7 transact vr state */  	mfvscr	vr0 -	li	r6, THREAD_TRANSACT_VSCR -	stvx	vr0, r3, r6 +	li	r6, VRSTATE_VSCR +	stvx	vr0, r7, r6  dont_backup_vec:  	mfspr	r0, SPRN_VRSAVE  	std	r0, THREAD_TRANSACT_VRSAVE(r3) @@ -162,12 +162,20 @@ dont_backup_vec:  	andi.	r0, r4, MSR_FP  	beq	dont_backup_fp -	SAVE_32FPRS_VSRS_TRANSACT(0, R6, R3)	/* r6 scratch, r3 thread */ +	addi	r7, r3, THREAD_TRANSACT_FPSTATE +	SAVE_32FPRS_VSRS(0, R6, R7)	/* r6 scratch, r7 transact fp state */  	mffs    fr0 -	stfd    fr0,THREAD_TRANSACT_FPSCR(r3) +	stfd    fr0,FPSTATE_FPSCR(r7)  dont_backup_fp: +	/* Do sanity check on MSR to make sure we are suspended */ +	li	r7, (MSR_TS_S)@higher +	srdi	r6, r14, 32 +	and	r6, r6, r7 +1:	tdeqi   r6, 0 +	EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0 +  	/* The moment we treclaim, ALL of our GPRs will switch  	 * to user register state.  (FPRs, CCR etc. also!)  	 * Use an sprg and a tm_scratch in the PACA to shuffle. @@ -187,10 +195,17 @@ dont_backup_fp:  	std	r1, PACATMSCRATCH(r13)  	ld	r1, PACAR1(r13) +	/* Store the PPR in r11 and reset to decent value */ +	std	r11, GPR11(r1)			/* Temporary stash */ +	mfspr	r11, SPRN_PPR +	HMT_MEDIUM +  	/* Now get some more GPRS free */  	std	r7, GPR7(r1)			/* Temporary stash */  	std	r12, GPR12(r1)			/* ''   ''    ''   */ -	ld	r12, STACK_PARAM(0)(r1)		/* Param 0, thread_struct * */ +	ld	r12, STK_PARAM(R3)(r1)		/* Param 0, thread_struct * */ + +	std	r11, THREAD_TM_PPR(r12)		/* Store PPR and free r11 */  	addi	r7, r12, PT_CKPT_REGS		/* Thread's ckpt_regs */ @@ -203,15 +218,19 @@ dont_backup_fp:  	SAVE_GPR(0, r7)				/* user r0 */  	SAVE_GPR(2, r7)			/* user r2 */  	SAVE_4GPRS(3, r7)			/* user r3-r6 */ -	SAVE_4GPRS(8, r7)			/* user r8-r11 */ +	SAVE_GPR(8, r7)				/* user r8 */ +	SAVE_GPR(9, r7)				/* user r9 */ +	SAVE_GPR(10, r7)			/* user r10 */  	ld	r3, PACATMSCRATCH(r13)		/* user r1 */  	ld	r4, GPR7(r1)			/* user r7 */ -	ld	r5, GPR12(r1)			/* user r12 */ -	GET_SCRATCH0(6)				/* user r13 */ +	ld	r5, GPR11(r1)			/* user r11 */ +	ld	r6, GPR12(r1)			/* user r12 */ +	GET_SCRATCH0(8)				/* user r13 */  	std	r3, GPR1(r7)  	std	r4, GPR7(r7) -	std	r5, GPR12(r7) -	std	r6, GPR13(r7) +	std	r5, GPR11(r7) +	std	r6, GPR12(r7) +	std	r8, GPR13(r7)  	SAVE_NVGPRS(r7)				/* user r14-r31 */ @@ -234,14 +253,12 @@ dont_backup_fp:  	std	r6, _XER(r7) -	/* ******************** TAR, PPR, DSCR ********** */ +	/* ******************** TAR, DSCR ********** */  	mfspr	r3, SPRN_TAR -	mfspr	r4, SPRN_PPR -	mfspr	r5, SPRN_DSCR +	mfspr	r4, SPRN_DSCR  	std	r3, THREAD_TM_TAR(r12) -	std	r4, THREAD_TM_PPR(r12) -	std	r5, THREAD_TM_DSCR(r12) +	std	r4, THREAD_TM_DSCR(r12)  	/* MSR and flags:  We don't change CRs, and we don't need to alter  	 * MSR. @@ -258,7 +275,7 @@ dont_backup_fp:  	std	r3, THREAD_TM_TFHAR(r12)  	std	r4, THREAD_TM_TFIAR(r12) -	/* AMR and PPR are checkpointed too, but are unsupported by Linux. */ +	/* AMR is checkpointed too, but is unsupported by Linux. */  	/* Restore original MSR/IRQ state & clear TM mode */  	ld	r14, TM_FRAME_L0(r1)		/* Orig MSR */ @@ -269,11 +286,16 @@ dont_backup_fp:  	REST_NVGPRS(r1)  	addi    r1, r1, TM_FRAME_SIZE -	ld	r4, 8(r1) +	lwz	r4, 8(r1)  	ld	r0, 16(r1)  	mtcr	r4  	mtlr	r0 -	ld	r2, 40(r1) +	ld	r2, STK_GOT(r1) + +	/* Load CPU's default DSCR */ +	ld	r0, PACA_DSCR(r13) +	mtspr	SPRN_DSCR, r0 +  	blr @@ -285,12 +307,12 @@ dont_backup_fp:  	 *	Call with IRQs off, stacks get all out of sync for  	 *	some periods in here!  	 */ -_GLOBAL(tm_recheckpoint) +_GLOBAL(__tm_recheckpoint)  	mfcr	r5  	mflr	r0 -	std	r5, 8(r1) +	stw	r5, 8(r1)  	std	r0, 16(r1) -	std	r2, 40(r1) +	std	r2, STK_GOT(r1)  	stdu	r1, -TM_FRAME_SIZE(r1)  	/* We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD]. @@ -298,8 +320,6 @@ _GLOBAL(tm_recheckpoint)  	 */  	SAVE_NVGPRS(r1) -	std	r1, PACAR1(r13) -  	/* Load complete register state from ts_ckpt* registers */  	addi	r7, r3, PT_CKPT_REGS		/* Thread's ckpt_regs */ @@ -337,10 +357,11 @@ _GLOBAL(tm_recheckpoint)  	andis.	r0, r4, MSR_VEC@h  	beq	dont_restore_vec -	li	r5, THREAD_VSCR -	lvx	vr0, r3, r5 +	addi	r8, r3, THREAD_VRSTATE +	li	r5, VRSTATE_VSCR +	lvx	vr0, r8, r5  	mtvscr	vr0 -	REST_32VRS(0, r5, r3)			/* r5 scratch, r3 THREAD ptr */ +	REST_32VRS(0, r5, r8)			/* r5 scratch, r8 ptr */  dont_restore_vec:  	ld	r5, THREAD_VRSAVE(r3)  	mtspr	SPRN_VRSAVE, r5 @@ -349,54 +370,85 @@ dont_restore_vec:  	andi.	r0, r4, MSR_FP  	beq	dont_restore_fp -	lfd	fr0, THREAD_FPSCR(r3) +	addi	r8, r3, THREAD_FPSTATE +	lfd	fr0, FPSTATE_FPSCR(r8)  	MTFSF_L(fr0) -	REST_32FPRS_VSRS(0, R4, R3) +	REST_32FPRS_VSRS(0, R4, R8)  dont_restore_fp:  	mtmsr	r6				/* FP/Vec off again! */  restore_gprs: -	/* ******************** TAR, PPR, DSCR ********** */ -	ld	r4, THREAD_TM_TAR(r3) -	ld	r5, THREAD_TM_PPR(r3) -	ld	r6, THREAD_TM_DSCR(r3) +	/* ******************** CR,LR,CCR,MSR ********** */ +	ld	r4, _CTR(r7) +	ld	r5, _LINK(r7) +	ld	r8, _XER(r7) -	mtspr	SPRN_TAR,	r4 -	mtspr	SPRN_PPR,	r5 -	mtspr	SPRN_DSCR,	r6 +	mtctr	r4 +	mtlr	r5 +	mtxer	r8 -	/* ******************** CR,LR,CCR,MSR ********** */ -	ld	r3, _CTR(r7) -	ld	r4, _LINK(r7) -	ld	r5, _CCR(r7) -	ld	r6, _XER(r7) +	/* ******************** TAR ******************** */ +	ld	r4, THREAD_TM_TAR(r3) +	mtspr	SPRN_TAR,	r4 -	mtctr	r3 -	mtlr	r4 -	mtcr	r5 -	mtxer	r6 +	/* Load up the PPR and DSCR in GPRs only at this stage */ +	ld	r5, THREAD_TM_DSCR(r3) +	ld	r6, THREAD_TM_PPR(r3)  	/* Clear the MSR RI since we are about to change R1.  EE is already off  	 */  	li	r4, 0  	mtmsrd	r4, 1 -	REST_4GPRS(0, r7)			/* GPR0-3 */ -	REST_GPR(4, r7)				/* GPR4-6 */ -	REST_GPR(5, r7) -	REST_GPR(6, r7) +	REST_GPR(0, r7)				/* GPR0 */ +	REST_2GPRS(2, r7)			/* GPR2-3 */ +	REST_GPR(4, r7)				/* GPR4 */  	REST_4GPRS(8, r7)			/* GPR8-11 */  	REST_2GPRS(12, r7)			/* GPR12-13 */  	REST_NVGPRS(r7)				/* GPR14-31 */ -	ld	r7, GPR7(r7)			/* GPR7 */ +	/* Load up PPR and DSCR here so we don't run with user values for long +	 */ +	mtspr	SPRN_DSCR, r5 +	mtspr	SPRN_PPR, r6 + +	/* Do final sanity check on TEXASR to make sure FS is set.  Do this +	 * here before we load up the userspace r1 so any bugs we hit will get +	 * a call chain */ +	mfspr	r5, SPRN_TEXASR +	srdi	r5, r5, 16 +	li	r6, (TEXASR_FS)@h +	and	r6, r6, r5 +1:	tdeqi	r6, 0 +	EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0 + +	/* Do final sanity check on MSR to make sure we are not transactional +	 * or suspended +	 */ +	mfmsr   r6 +	li	r5, (MSR_TS_MASK)@higher +	srdi	r6, r6, 32 +	and	r6, r6, r5 +1:	tdnei   r6, 0 +	EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0 + +	/* Restore CR */ +	ld	r6, _CCR(r7) +	mtcr    r6 + +	REST_GPR(1, r7)				/* GPR1 */ +	REST_GPR(5, r7)				/* GPR5-7 */ +	REST_GPR(6, r7) +	ld	r7, GPR7(r7)  	/* Commit register state as checkpointed state: */  	TRECHKPT +	HMT_MEDIUM +  	/* Our transactional state has now changed.  	 *  	 * Now just get out of here.  Transactional (current) state will be @@ -414,11 +466,16 @@ restore_gprs:  	REST_NVGPRS(r1)  	addi    r1, r1, TM_FRAME_SIZE -	ld	r4, 8(r1) +	lwz	r4, 8(r1)  	ld	r0, 16(r1)  	mtcr	r4  	mtlr	r0 -	ld	r2, 40(r1) +	ld	r2, STK_GOT(r1) + +	/* Load CPU's default DSCR */ +	ld	r0, PACA_DSCR(r13) +	mtspr	SPRN_DSCR, r0 +  	blr  	/* ****************************************************************** */ diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index f783c932fae..239f1cde3ff 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -285,6 +285,23 @@ void system_reset_exception(struct pt_regs *regs)  	/* What should we do here? We could issue a shutdown or hard reset. */  } + +/* + * This function is called in real mode. Strictly no printk's please. + * + * regs->nip and regs->msr contains srr0 and ssr1. + */ +long machine_check_early(struct pt_regs *regs) +{ +	long handled = 0; + +	__get_cpu_var(irq_stat).mce_exceptions++; + +	if (cur_cpu_spec && cur_cpu_spec->machine_check_early) +		handled = cur_cpu_spec->machine_check_early(regs); +	return handled; +} +  #endif  /* @@ -351,8 +368,8 @@ static inline int check_io_access(struct pt_regs *regs)  #define REASON_TRAP		ESR_PTR  /* single-step stuff */ -#define single_stepping(regs)	(current->thread.dbcr0 & DBCR0_IC) -#define clear_single_step(regs)	(current->thread.dbcr0 &= ~DBCR0_IC) +#define single_stepping(regs)	(current->thread.debug.dbcr0 & DBCR0_IC) +#define clear_single_step(regs)	(current->thread.debug.dbcr0 &= ~DBCR0_IC)  #else  /* On non-4xx, the reason for the machine check or program @@ -816,7 +833,7 @@ static void parse_fpe(struct pt_regs *regs)  	flush_fp_to_thread(current); -	code = __parse_fpscr(current->thread.fpscr.val); +	code = __parse_fpscr(current->thread.fp_state.fpscr);  	_exception(SIGFPE, regs, code, regs->nip);  } @@ -1018,6 +1035,13 @@ static int emulate_instruction(struct pt_regs *regs)  		return emulate_isel(regs, instword);  	} +	/* Emulate sync instruction variants */ +	if ((instword & PPC_INST_SYNC_MASK) == PPC_INST_SYNC) { +		PPC_WARN_EMULATED(sync, regs); +		asm volatile("sync"); +		return 0; +	} +  #ifdef CONFIG_PPC64  	/* Emulate the mfspr rD, DSCR. */  	if ((((instword & PPC_INST_MFSPR_DSCR_USER_MASK) == @@ -1069,7 +1093,7 @@ static int emulate_math(struct pt_regs *regs)  		return 0;  	case 1: {  			int code = 0; -			code = __parse_fpscr(current->thread.fpscr.val); +			code = __parse_fpscr(current->thread.fp_state.fpscr);  			_exception(SIGFPE, regs, code, regs->nip);  			return 0;  		} @@ -1357,8 +1381,9 @@ void facility_unavailable_exception(struct pt_regs *regs)  	if (!arch_irq_disabled_regs(regs))  		local_irq_enable(); -	pr_err("%sFacility '%s' unavailable, exception at 0x%lx, MSR=%lx\n", -	       hv ? "Hypervisor " : "", facility, regs->nip, regs->msr); +	pr_err_ratelimited( +		"%sFacility '%s' unavailable, exception at 0x%lx, MSR=%lx\n", +		hv ? "Hypervisor " : "", facility, regs->nip, regs->msr);  	if (user_mode(regs)) {  		_exception(SIGILL, regs, ILL_ILLOPC, regs->nip); @@ -1371,15 +1396,12 @@ void facility_unavailable_exception(struct pt_regs *regs)  #ifdef CONFIG_PPC_TRANSACTIONAL_MEM -extern void do_load_up_fpu(struct pt_regs *regs); -  void fp_unavailable_tm(struct pt_regs *regs)  {  	/* Note:  This does not handle any kind of FP laziness. */  	TM_DEBUG("FP Unavailable trap whilst transactional at 0x%lx, MSR=%lx\n",  		 regs->nip, regs->msr); -	tm_enable();          /* We can only have got here if the task started using FP after           * beginning the transaction.  So, the transactional regs are just a @@ -1388,8 +1410,7 @@ void fp_unavailable_tm(struct pt_regs *regs)           * transaction, and probably retry but now with FP enabled.  So the           * checkpointed FP registers need to be loaded.  	 */ -	tm_reclaim(¤t->thread, current->thread.regs->msr, -		   TM_CAUSE_FAC_UNAV); +	tm_reclaim_current(TM_CAUSE_FAC_UNAV);  	/* Reclaim didn't save out any FPRs to transact_fprs. */  	/* Enable FP for the task: */ @@ -1398,12 +1419,18 @@ void fp_unavailable_tm(struct pt_regs *regs)  	/* This loads and recheckpoints the FP registers from  	 * thread.fpr[].  They will remain in registers after the  	 * checkpoint so we don't need to reload them after. +	 * If VMX is in use, the VRs now hold checkpointed values, +	 * so we don't want to load the VRs from the thread_struct.  	 */ -	tm_recheckpoint(¤t->thread, regs->msr); -} +	tm_recheckpoint(¤t->thread, MSR_FP); -#ifdef CONFIG_ALTIVEC -extern void do_load_up_altivec(struct pt_regs *regs); +	/* If VMX is in use, get the transactional values back */ +	if (regs->msr & MSR_VEC) { +		do_load_up_transact_altivec(¤t->thread); +		/* At this point all the VSX state is loaded, so enable it */ +		regs->msr |= MSR_VSX; +	} +}  void altivec_unavailable_tm(struct pt_regs *regs)  { @@ -1414,18 +1441,21 @@ void altivec_unavailable_tm(struct pt_regs *regs)  	TM_DEBUG("Vector Unavailable trap whilst transactional at 0x%lx,"  		 "MSR=%lx\n",  		 regs->nip, regs->msr); -	tm_enable(); -	tm_reclaim(¤t->thread, current->thread.regs->msr, -		   TM_CAUSE_FAC_UNAV); +	tm_reclaim_current(TM_CAUSE_FAC_UNAV);  	regs->msr |= MSR_VEC; -	tm_recheckpoint(¤t->thread, regs->msr); +	tm_recheckpoint(¤t->thread, MSR_VEC);  	current->thread.used_vr = 1; + +	if (regs->msr & MSR_FP) { +		do_load_up_transact_fpu(¤t->thread); +		regs->msr |= MSR_VSX; +	}  } -#endif -#ifdef CONFIG_VSX  void vsx_unavailable_tm(struct pt_regs *regs)  { +	unsigned long orig_msr = regs->msr; +  	/* See the comments in fp_unavailable_tm().  This works similarly,  	 * though we're loading both FP and VEC registers in here.  	 * @@ -1437,18 +1467,30 @@ void vsx_unavailable_tm(struct pt_regs *regs)  		 "MSR=%lx\n",  		 regs->nip, regs->msr); -	tm_enable(); +	current->thread.used_vsr = 1; + +	/* If FP and VMX are already loaded, we have all the state we need */ +	if ((orig_msr & (MSR_FP | MSR_VEC)) == (MSR_FP | MSR_VEC)) { +		regs->msr |= MSR_VSX; +		return; +	} +  	/* This reclaims FP and/or VR regs if they're already enabled */ -	tm_reclaim(¤t->thread, current->thread.regs->msr, -		   TM_CAUSE_FAC_UNAV); +	tm_reclaim_current(TM_CAUSE_FAC_UNAV);  	regs->msr |= MSR_VEC | MSR_FP | current->thread.fpexc_mode |  		MSR_VSX; -	/* This loads & recheckpoints FP and VRs. */ -	tm_recheckpoint(¤t->thread, regs->msr); -	current->thread.used_vsr = 1; + +	/* This loads & recheckpoints FP and VRs; but we have +	 * to be sure not to overwrite previously-valid state. +	 */ +	tm_recheckpoint(¤t->thread, regs->msr & ~orig_msr); + +	if (orig_msr & MSR_FP) +		do_load_up_transact_fpu(¤t->thread); +	if (orig_msr & MSR_VEC) +		do_load_up_transact_altivec(¤t->thread);  } -#endif  #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */  void performance_monitor_exception(struct pt_regs *regs) @@ -1465,7 +1507,8 @@ void SoftwareEmulation(struct pt_regs *regs)  	if (!user_mode(regs)) {  		debugger(regs); -		die("Kernel Mode Software FPU Emulation", regs, SIGFPE); +		die("Kernel Mode Unimplemented Instruction or SW FPU Emulation", +			regs, SIGFPE);  	}  	if (!emulate_math(regs)) @@ -1486,7 +1529,7 @@ static void handle_debug(struct pt_regs *regs, unsigned long debug_status)  	if (debug_status & (DBSR_DAC1R | DBSR_DAC1W)) {  		dbcr_dac(current) &= ~(DBCR_DAC1R | DBCR_DAC1W);  #ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE -		current->thread.dbcr2 &= ~DBCR2_DAC12MODE; +		current->thread.debug.dbcr2 &= ~DBCR2_DAC12MODE;  #endif  		do_send_trap(regs, mfspr(SPRN_DAC1), debug_status, TRAP_HWBKPT,  			     5); @@ -1497,24 +1540,24 @@ static void handle_debug(struct pt_regs *regs, unsigned long debug_status)  			     6);  		changed |= 0x01;  	}  else if (debug_status & DBSR_IAC1) { -		current->thread.dbcr0 &= ~DBCR0_IAC1; +		current->thread.debug.dbcr0 &= ~DBCR0_IAC1;  		dbcr_iac_range(current) &= ~DBCR_IAC12MODE;  		do_send_trap(regs, mfspr(SPRN_IAC1), debug_status, TRAP_HWBKPT,  			     1);  		changed |= 0x01;  	}  else if (debug_status & DBSR_IAC2) { -		current->thread.dbcr0 &= ~DBCR0_IAC2; +		current->thread.debug.dbcr0 &= ~DBCR0_IAC2;  		do_send_trap(regs, mfspr(SPRN_IAC2), debug_status, TRAP_HWBKPT,  			     2);  		changed |= 0x01;  	}  else if (debug_status & DBSR_IAC3) { -		current->thread.dbcr0 &= ~DBCR0_IAC3; +		current->thread.debug.dbcr0 &= ~DBCR0_IAC3;  		dbcr_iac_range(current) &= ~DBCR_IAC34MODE;  		do_send_trap(regs, mfspr(SPRN_IAC3), debug_status, TRAP_HWBKPT,  			     3);  		changed |= 0x01;  	}  else if (debug_status & DBSR_IAC4) { -		current->thread.dbcr0 &= ~DBCR0_IAC4; +		current->thread.debug.dbcr0 &= ~DBCR0_IAC4;  		do_send_trap(regs, mfspr(SPRN_IAC4), debug_status, TRAP_HWBKPT,  			     4);  		changed |= 0x01; @@ -1524,19 +1567,20 @@ static void handle_debug(struct pt_regs *regs, unsigned long debug_status)  	 * Check all other debug flags and see if that bit needs to be turned  	 * back on or not.  	 */ -	if (DBCR_ACTIVE_EVENTS(current->thread.dbcr0, current->thread.dbcr1)) +	if (DBCR_ACTIVE_EVENTS(current->thread.debug.dbcr0, +			       current->thread.debug.dbcr1))  		regs->msr |= MSR_DE;  	else  		/* Make sure the IDM flag is off */ -		current->thread.dbcr0 &= ~DBCR0_IDM; +		current->thread.debug.dbcr0 &= ~DBCR0_IDM;  	if (changed & 0x01) -		mtspr(SPRN_DBCR0, current->thread.dbcr0); +		mtspr(SPRN_DBCR0, current->thread.debug.dbcr0);  }  void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status)  { -	current->thread.dbsr = debug_status; +	current->thread.debug.dbsr = debug_status;  	/* Hack alert: On BookE, Branch Taken stops on the branch itself, while  	 * on server, it stops on the target of the branch. In order to simulate @@ -1553,8 +1597,8 @@ void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status)  		/* Do the single step trick only when coming from userspace */  		if (user_mode(regs)) { -			current->thread.dbcr0 &= ~DBCR0_BT; -			current->thread.dbcr0 |= DBCR0_IDM | DBCR0_IC; +			current->thread.debug.dbcr0 &= ~DBCR0_BT; +			current->thread.debug.dbcr0 |= DBCR0_IDM | DBCR0_IC;  			regs->msr |= MSR_DE;  			return;  		} @@ -1582,13 +1626,13 @@ void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status)  			return;  		if (user_mode(regs)) { -			current->thread.dbcr0 &= ~DBCR0_IC; -			if (DBCR_ACTIVE_EVENTS(current->thread.dbcr0, -					       current->thread.dbcr1)) +			current->thread.debug.dbcr0 &= ~DBCR0_IC; +			if (DBCR_ACTIVE_EVENTS(current->thread.debug.dbcr0, +					       current->thread.debug.dbcr1))  				regs->msr |= MSR_DE;  			else  				/* Make sure the IDM bit is off */ -				current->thread.dbcr0 &= ~DBCR0_IDM; +				current->thread.debug.dbcr0 &= ~DBCR0_IDM;  		}  		_exception(SIGTRAP, regs, TRAP_TRACE, regs->nip); @@ -1634,7 +1678,7 @@ void altivec_assist_exception(struct pt_regs *regs)  		/* XXX quick hack for now: set the non-Java bit in the VSCR */  		printk_ratelimited(KERN_ERR "Unrecognized altivec instruction "  				   "in %s at %lx\n", current->comm, regs->nip); -		current->thread.vscr.u[3] |= 0x10000; +		current->thread.vr_state.vscr.u[3] |= 0x10000;  	}  }  #endif /* CONFIG_ALTIVEC */ @@ -1815,6 +1859,7 @@ struct ppc_emulated ppc_emulated = {  	WARN_EMULATED_SETUP(popcntb),  	WARN_EMULATED_SETUP(spe),  	WARN_EMULATED_SETUP(string), +	WARN_EMULATED_SETUP(sync),  	WARN_EMULATED_SETUP(unaligned),  #ifdef CONFIG_MATH_EMULATION  	WARN_EMULATED_SETUP(math), @@ -1825,6 +1870,7 @@ struct ppc_emulated ppc_emulated = {  #ifdef CONFIG_PPC64  	WARN_EMULATED_SETUP(mfdscr),  	WARN_EMULATED_SETUP(mtdscr), +	WARN_EMULATED_SETUP(lq_stq),  #endif  }; diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c index a15837519dc..b7aa07279a6 100644 --- a/arch/powerpc/kernel/udbg.c +++ b/arch/powerpc/kernel/udbg.c @@ -62,8 +62,6 @@ void __init udbg_early_init(void)  	udbg_init_cpm();  #elif defined(CONFIG_PPC_EARLY_DEBUG_USBGECKO)  	udbg_init_usbgecko(); -#elif defined(CONFIG_PPC_EARLY_DEBUG_WSP) -	udbg_init_wsp();  #elif defined(CONFIG_PPC_EARLY_DEBUG_MEMCONS)  	/* In memory console */  	udbg_init_memcons(); diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c index 75702e207b2..6e7c4923b5e 100644 --- a/arch/powerpc/kernel/udbg_16550.c +++ b/arch/powerpc/kernel/udbg_16550.c @@ -296,14 +296,3 @@ void __init udbg_init_40x_realmode(void)  }  #endif /* CONFIG_PPC_EARLY_DEBUG_40x */ - - -#ifdef CONFIG_PPC_EARLY_DEBUG_WSP - -void __init udbg_init_wsp(void) -{ -	udbg_uart_init_mmio((void *)WSP_UART_VIRT, 1); -	udbg_uart_setup(57600, 50000000); -} - -#endif /* CONFIG_PPC_EARLY_DEBUG_WSP */ diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c index 59f419b935f..003b20964ea 100644 --- a/arch/powerpc/kernel/uprobes.c +++ b/arch/powerpc/kernel/uprobes.c @@ -186,7 +186,7 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)  	 * emulate_step() returns 1 if the insn was successfully emulated.  	 * For all other cases, we need to single-step in hardware.  	 */ -	ret = emulate_step(regs, auprobe->ainsn); +	ret = emulate_step(regs, auprobe->insn);  	if (ret > 0)  		return true; diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 1d9c92621b3..ce74c335a6a 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -34,8 +34,7 @@  #include <asm/firmware.h>  #include <asm/vdso.h>  #include <asm/vdso_datapage.h> - -#include "setup.h" +#include <asm/setup.h>  #undef DEBUG @@ -716,8 +715,8 @@ int vdso_getcpu_init(void)  	unsigned long cpu, node, val;  	/* -	 * SPRG3 contains the CPU in the bottom 16 bits and the NUMA node in -	 * the next 16 bits. The VDSO uses this to implement getcpu(). +	 * SPRG_VDSO contains the CPU in the bottom 16 bits and the NUMA node +	 * in the next 16 bits.  The VDSO uses this to implement getcpu().  	 */  	cpu = get_cpu();  	WARN_ON_ONCE(cpu > 0xffff); @@ -726,8 +725,8 @@ int vdso_getcpu_init(void)  	WARN_ON_ONCE(node > 0xffff);  	val = (cpu & 0xfff) | ((node & 0xffff) << 16); -	mtspr(SPRN_SPRG3, val); -	get_paca()->sprg3 = val; +	mtspr(SPRN_SPRG_VDSO_WRITE, val); +	get_paca()->sprg_vdso = val;  	put_cpu(); diff --git a/arch/powerpc/kernel/vdso32/getcpu.S b/arch/powerpc/kernel/vdso32/getcpu.S index 47afd08c90f..23eb9a9441b 100644 --- a/arch/powerpc/kernel/vdso32/getcpu.S +++ b/arch/powerpc/kernel/vdso32/getcpu.S @@ -29,7 +29,7 @@   */  V_FUNCTION_BEGIN(__kernel_getcpu)    .cfi_startproc -	mfspr	r5,SPRN_USPRG3 +	mfspr	r5,SPRN_SPRG_VDSO_READ  	cmpdi	cr0,r3,0  	cmpdi	cr1,r4,0  	clrlwi  r6,r5,16 diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S index 6b1f2a6d551..6b2b69616e7 100644 --- a/arch/powerpc/kernel/vdso32/gettimeofday.S +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S @@ -232,9 +232,15 @@ __do_get_tspec:  	lwz	r6,(CFG_TB_ORIG_STAMP+4)(r9)  	/* Get a stable TB value */ +#ifdef CONFIG_8xx +2:	mftbu	r3 +	mftbl	r4 +	mftbu	r0 +#else  2:	mfspr	r3, SPRN_TBRU  	mfspr	r4, SPRN_TBRL  	mfspr	r0, SPRN_TBRU +#endif  	cmplw	cr0,r3,r0  	bne-	2b diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S index f223409629b..e58ee10fa5c 100644 --- a/arch/powerpc/kernel/vdso32/vdso32.lds.S +++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S @@ -4,7 +4,11 @@   */  #include <asm/vdso.h> +#ifdef __LITTLE_ENDIAN__ +OUTPUT_FORMAT("elf32-powerpcle", "elf32-powerpcle", "elf32-powerpcle") +#else  OUTPUT_FORMAT("elf32-powerpc", "elf32-powerpc", "elf32-powerpc") +#endif  OUTPUT_ARCH(powerpc:common)  ENTRY(_start) diff --git a/arch/powerpc/kernel/vdso32/vdso32_wrapper.S b/arch/powerpc/kernel/vdso32/vdso32_wrapper.S index 6e8f507ed32..6ac107ac402 100644 --- a/arch/powerpc/kernel/vdso32/vdso32_wrapper.S +++ b/arch/powerpc/kernel/vdso32/vdso32_wrapper.S @@ -1,4 +1,3 @@ -#include <linux/init.h>  #include <linux/linkage.h>  #include <asm/page.h> @@ -7,7 +6,7 @@  	.globl vdso32_start, vdso32_end  	.balign PAGE_SIZE  vdso32_start: -	.incbin "arch/powerpc/kernel/vdso32/vdso32.so" +	.incbin "arch/powerpc/kernel/vdso32/vdso32.so.dbg"  	.balign PAGE_SIZE  vdso32_end: diff --git a/arch/powerpc/kernel/vdso64/getcpu.S b/arch/powerpc/kernel/vdso64/getcpu.S index 47afd08c90f..23eb9a9441b 100644 --- a/arch/powerpc/kernel/vdso64/getcpu.S +++ b/arch/powerpc/kernel/vdso64/getcpu.S @@ -29,7 +29,7 @@   */  V_FUNCTION_BEGIN(__kernel_getcpu)    .cfi_startproc -	mfspr	r5,SPRN_USPRG3 +	mfspr	r5,SPRN_SPRG_VDSO_READ  	cmpdi	cr0,r3,0  	cmpdi	cr1,r4,0  	clrlwi  r6,r5,16 diff --git a/arch/powerpc/kernel/vdso64/sigtramp.S b/arch/powerpc/kernel/vdso64/sigtramp.S index 45ea281e9a2..542c6f422e4 100644 --- a/arch/powerpc/kernel/vdso64/sigtramp.S +++ b/arch/powerpc/kernel/vdso64/sigtramp.S @@ -142,6 +142,13 @@ V_FUNCTION_END(__kernel_sigtramp_rt64)  /* Size of CR reg in DWARF unwind info. */  #define CRSIZE	4 +/* Offset of CR reg within a full word. */ +#ifdef __LITTLE_ENDIAN__ +#define CROFF 0 +#else +#define CROFF (RSIZE - CRSIZE) +#endif +  /* This is the offset of the VMX reg pointer.  */  #define VREGS	48*RSIZE+33*8 @@ -181,7 +188,14 @@ V_FUNCTION_END(__kernel_sigtramp_rt64)    rsave (31, 31*RSIZE);							\    rsave (67, 32*RSIZE);		/* ap, used as temp for nip */		\    rsave (65, 36*RSIZE);		/* lr */				\ -  rsave (70, 38*RSIZE + (RSIZE - CRSIZE)) /* cr */ +  rsave (68, 38*RSIZE + CROFF);	/* cr fields */				\ +  rsave (69, 38*RSIZE + CROFF);						\ +  rsave (70, 38*RSIZE + CROFF);						\ +  rsave (71, 38*RSIZE + CROFF);						\ +  rsave (72, 38*RSIZE + CROFF);						\ +  rsave (73, 38*RSIZE + CROFF);						\ +  rsave (74, 38*RSIZE + CROFF);						\ +  rsave (75, 38*RSIZE + CROFF)  /* Describe where the FP regs are saved.  */  #define EH_FRAME_FP \ diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso64/vdso64.lds.S index e4863819663..64fb183a47c 100644 --- a/arch/powerpc/kernel/vdso64/vdso64.lds.S +++ b/arch/powerpc/kernel/vdso64/vdso64.lds.S @@ -4,7 +4,11 @@   */  #include <asm/vdso.h> +#ifdef __LITTLE_ENDIAN__ +OUTPUT_FORMAT("elf64-powerpcle", "elf64-powerpcle", "elf64-powerpcle") +#else  OUTPUT_FORMAT("elf64-powerpc", "elf64-powerpc", "elf64-powerpc") +#endif  OUTPUT_ARCH(powerpc:common64)  ENTRY(_start) diff --git a/arch/powerpc/kernel/vdso64/vdso64_wrapper.S b/arch/powerpc/kernel/vdso64/vdso64_wrapper.S index b8553d62b79..df60fca6a13 100644 --- a/arch/powerpc/kernel/vdso64/vdso64_wrapper.S +++ b/arch/powerpc/kernel/vdso64/vdso64_wrapper.S @@ -1,4 +1,3 @@ -#include <linux/init.h>  #include <linux/linkage.h>  #include <asm/page.h> @@ -7,7 +6,7 @@  	.globl vdso64_start, vdso64_end  	.balign PAGE_SIZE  vdso64_start: -	.incbin "arch/powerpc/kernel/vdso64/vdso64.so" +	.incbin "arch/powerpc/kernel/vdso64/vdso64.so.dbg"  	.balign PAGE_SIZE  vdso64_end: diff --git a/arch/powerpc/kernel/vecemu.c b/arch/powerpc/kernel/vecemu.c index 604d0947cb2..c4bfadb2606 100644 --- a/arch/powerpc/kernel/vecemu.c +++ b/arch/powerpc/kernel/vecemu.c @@ -271,7 +271,7 @@ int emulate_altivec(struct pt_regs *regs)  	vb = (instr >> 11) & 0x1f;  	vc = (instr >> 6) & 0x1f; -	vrs = current->thread.vr; +	vrs = current->thread.vr_state.vr;  	switch (instr & 0x3f) {  	case 10:  		switch (vc) { @@ -320,12 +320,12 @@ int emulate_altivec(struct pt_regs *regs)  		case 14:	/* vctuxs */  			for (i = 0; i < 4; ++i)  				vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va, -						¤t->thread.vscr.u[3]); +					¤t->thread.vr_state.vscr.u[3]);  			break;  		case 15:	/* vctsxs */  			for (i = 0; i < 4; ++i)  				vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va, -						¤t->thread.vscr.u[3]); +					¤t->thread.vr_state.vscr.u[3]);  			break;  		default:  			return -EINVAL; diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 9e20999aaef..74f8050518d 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -8,29 +8,6 @@  #include <asm/ptrace.h>  #ifdef CONFIG_PPC_TRANSACTIONAL_MEM -/* - * Wrapper to call load_up_altivec from C. - * void do_load_up_altivec(struct pt_regs *regs); - */ -_GLOBAL(do_load_up_altivec) -	mflr	r0 -	std	r0, 16(r1) -	stdu	r1, -112(r1) - -	subi	r6, r3, STACK_FRAME_OVERHEAD -	/* load_up_altivec expects r12=MSR, r13=PACA, and returns -	 * with r12 = new MSR. -	 */ -	ld	r12,_MSR(r6) -	GET_PACA(r13) -	bl	load_up_altivec -	std	r12,_MSR(r6) - -	ld	r0, 112+16(r1) -	addi	r1, r1, 112 -	mtlr	r0 -	blr -  /* void do_load_up_transact_altivec(struct thread_struct *thread)   *   * This is similar to load_up_altivec but for the transactional version of the @@ -46,10 +23,11 @@ _GLOBAL(do_load_up_transact_altivec)  	li	r4,1  	stw	r4,THREAD_USED_VR(r3) -	li	r10,THREAD_TRANSACT_VSCR +	li	r10,THREAD_TRANSACT_VRSTATE+VRSTATE_VSCR  	lvx	vr0,r10,r3  	mtvscr	vr0 -	REST_32VRS_TRANSACT(0,r4,r3) +	addi	r10,r3,THREAD_TRANSACT_VRSTATE +	REST_32VRS(0,r4,r10)  	/* Disable VEC again. */  	MTMSRD(r6) @@ -59,12 +37,46 @@ _GLOBAL(do_load_up_transact_altivec)  #endif  /* - * load_up_altivec(unused, unused, tsk) + * Enable use of VMX/Altivec for the caller. + */ +_GLOBAL(vec_enable) +	mfmsr	r3 +	oris	r3,r3,MSR_VEC@h +	MTMSRD(r3) +	isync +	blr + +/* + * Load state from memory into VMX registers including VSCR. + * Assumes the caller has enabled VMX in the MSR. + */ +_GLOBAL(load_vr_state) +	li	r4,VRSTATE_VSCR +	lvx	vr0,r4,r3 +	mtvscr	vr0 +	REST_32VRS(0,r4,r3) +	blr + +/* + * Store VMX state into memory, including VSCR. + * Assumes the caller has enabled VMX in the MSR. + */ +_GLOBAL(store_vr_state) +	SAVE_32VRS(0, r4, r3) +	mfvscr	vr0 +	li	r4, VRSTATE_VSCR +	stvx	vr0, r4, r3 +	blr + +/*   * Disable VMX for the task which had it previously,   * and save its vector registers in its thread_struct.   * Enables the VMX for use in the kernel on return.   * On SMP we know the VMX is free, since we give it up every   * switch (ie, no lazy save of the vector registers). + * + * Note that on 32-bit this can only use registers that will be + * restored by fast_exception_return, i.e. r3 - r6, r10 and r11.   */  _GLOBAL(load_up_altivec)  	mfmsr	r5			/* grab the current MSR */ @@ -90,10 +102,11 @@ _GLOBAL(load_up_altivec)  	/* Save VMX state to last_task_used_altivec's THREAD struct */  	toreal(r4)  	addi	r4,r4,THREAD -	SAVE_32VRS(0,r5,r4) +	addi	r6,r4,THREAD_VRSTATE +	SAVE_32VRS(0,r5,r6)  	mfvscr	vr0 -	li	r10,THREAD_VSCR -	stvx	vr0,r10,r4 +	li	r10,VRSTATE_VSCR +	stvx	vr0,r10,r6  	/* Disable VMX for last_task_used_altivec */  	PPC_LL	r5,PT_REGS(r4)  	toreal(r5) @@ -125,12 +138,13 @@ _GLOBAL(load_up_altivec)  	oris	r12,r12,MSR_VEC@h  	std	r12,_MSR(r1)  #endif +	addi	r6,r5,THREAD_VRSTATE  	li	r4,1 -	li	r10,THREAD_VSCR +	li	r10,VRSTATE_VSCR  	stw	r4,THREAD_USED_VR(r5) -	lvx	vr0,r10,r5 +	lvx	vr0,r10,r6  	mtvscr	vr0 -	REST_32VRS(0,r4,r5) +	REST_32VRS(0,r4,r6)  #ifndef CONFIG_SMP  	/* Update last_task_used_altivec to 'current' */  	subi	r4,r5,THREAD		/* Back to 'current' */ @@ -165,12 +179,16 @@ _GLOBAL(giveup_altivec)  	PPC_LCMPI	0,r3,0  	beqlr				/* if no previous owner, done */  	addi	r3,r3,THREAD		/* want THREAD of task */ +	PPC_LL	r7,THREAD_VRSAVEAREA(r3)  	PPC_LL	r5,PT_REGS(r3) -	PPC_LCMPI	0,r5,0 -	SAVE_32VRS(0,r4,r3) +	PPC_LCMPI	0,r7,0 +	bne	2f +	addi	r7,r3,THREAD_VRSTATE +2:	PPC_LCMPI	0,r5,0 +	SAVE_32VRS(0,r4,r7)  	mfvscr	vr0 -	li	r4,THREAD_VSCR -	stvx	vr0,r4,r3 +	li	r4,VRSTATE_VSCR +	stvx	vr0,r4,r7  	beq	1f  	PPC_LL	r4,_MSR-STACK_FRAME_OVERHEAD(r5)  #ifdef CONFIG_VSX diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index 78a350670de..904c66128fa 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -518,16 +518,18 @@ static dma_addr_t vio_dma_iommu_map_page(struct device *dev, struct page *page,                                           struct dma_attrs *attrs)  {  	struct vio_dev *viodev = to_vio_dev(dev); +	struct iommu_table *tbl;  	dma_addr_t ret = DMA_ERROR_CODE; -	if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE))) { +	tbl = get_iommu_table_base(dev); +	if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl)))) {  		atomic_inc(&viodev->cmo.allocs_failed);  		return ret;  	}  	ret = dma_iommu_ops.map_page(dev, page, offset, size, direction, attrs);  	if (unlikely(dma_mapping_error(dev, ret))) { -		vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE)); +		vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl)));  		atomic_inc(&viodev->cmo.allocs_failed);  	} @@ -540,10 +542,12 @@ static void vio_dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle,  				     struct dma_attrs *attrs)  {  	struct vio_dev *viodev = to_vio_dev(dev); +	struct iommu_table *tbl; +	tbl = get_iommu_table_base(dev);  	dma_iommu_ops.unmap_page(dev, dma_handle, size, direction, attrs); -	vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE)); +	vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl)));  }  static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, @@ -551,12 +555,14 @@ static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,                                  struct dma_attrs *attrs)  {  	struct vio_dev *viodev = to_vio_dev(dev); +	struct iommu_table *tbl;  	struct scatterlist *sgl;  	int ret, count = 0;  	size_t alloc_size = 0; +	tbl = get_iommu_table_base(dev);  	for (sgl = sglist; count < nelems; count++, sgl++) -		alloc_size += roundup(sgl->length, IOMMU_PAGE_SIZE); +		alloc_size += roundup(sgl->length, IOMMU_PAGE_SIZE(tbl));  	if (vio_cmo_alloc(viodev, alloc_size)) {  		atomic_inc(&viodev->cmo.allocs_failed); @@ -572,7 +578,7 @@ static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,  	}  	for (sgl = sglist, count = 0; count < ret; count++, sgl++) -		alloc_size -= roundup(sgl->dma_length, IOMMU_PAGE_SIZE); +		alloc_size -= roundup(sgl->dma_length, IOMMU_PAGE_SIZE(tbl));  	if (alloc_size)  		vio_cmo_dealloc(viodev, alloc_size); @@ -585,12 +591,14 @@ static void vio_dma_iommu_unmap_sg(struct device *dev,  		struct dma_attrs *attrs)  {  	struct vio_dev *viodev = to_vio_dev(dev); +	struct iommu_table *tbl;  	struct scatterlist *sgl;  	size_t alloc_size = 0;  	int count = 0; +	tbl = get_iommu_table_base(dev);  	for (sgl = sglist; count < nelems; count++, sgl++) -		alloc_size += roundup(sgl->dma_length, IOMMU_PAGE_SIZE); +		alloc_size += roundup(sgl->dma_length, IOMMU_PAGE_SIZE(tbl));  	dma_iommu_ops.unmap_sg(dev, sglist, nelems, direction, attrs); @@ -706,11 +714,14 @@ static int vio_cmo_bus_probe(struct vio_dev *viodev)  {  	struct vio_cmo_dev_entry *dev_ent;  	struct device *dev = &viodev->dev; +	struct iommu_table *tbl;  	struct vio_driver *viodrv = to_vio_driver(dev->driver);  	unsigned long flags;  	size_t size;  	bool dma_capable = false; +	tbl = get_iommu_table_base(dev); +  	/* A device requires entitlement if it has a DMA window property */  	switch (viodev->family) {  	case VDEVICE: @@ -736,7 +747,8 @@ static int vio_cmo_bus_probe(struct vio_dev *viodev)  			return -EINVAL;  		} -		viodev->cmo.desired = IOMMU_PAGE_ALIGN(viodrv->get_desired_dma(viodev)); +		viodev->cmo.desired = +			IOMMU_PAGE_ALIGN(viodrv->get_desired_dma(viodev), tbl);  		if (viodev->cmo.desired < VIO_CMO_MIN_ENT)  			viodev->cmo.desired = VIO_CMO_MIN_ENT;  		size = VIO_CMO_MIN_ENT; @@ -997,21 +1009,36 @@ static struct device_attribute vio_cmo_dev_attrs[] = {  /* sysfs bus functions and data structures for CMO */  #define viobus_cmo_rd_attr(name)                                        \ -static ssize_t                                                          \ -viobus_cmo_##name##_show(struct bus_type *bt, char *buf)                \ +static ssize_t cmo_##name##_show(struct bus_type *bt, char *buf)        \  {                                                                       \  	return sprintf(buf, "%lu\n", vio_cmo.name);                     \ -} +}                                                                       \ +static BUS_ATTR_RO(cmo_##name)  #define viobus_cmo_pool_rd_attr(name, var)                              \  static ssize_t                                                          \ -viobus_cmo_##name##_pool_show_##var(struct bus_type *bt, char *buf)     \ +cmo_##name##_##var##_show(struct bus_type *bt, char *buf)               \  {                                                                       \  	return sprintf(buf, "%lu\n", vio_cmo.name.var);                 \ +}                                                                       \ +static BUS_ATTR_RO(cmo_##name##_##var) + +viobus_cmo_rd_attr(entitled); +viobus_cmo_rd_attr(spare); +viobus_cmo_rd_attr(min); +viobus_cmo_rd_attr(desired); +viobus_cmo_rd_attr(curr); +viobus_cmo_pool_rd_attr(reserve, size); +viobus_cmo_pool_rd_attr(excess, size); +viobus_cmo_pool_rd_attr(excess, free); + +static ssize_t cmo_high_show(struct bus_type *bt, char *buf) +{ +	return sprintf(buf, "%lu\n", vio_cmo.high);  } -static ssize_t viobus_cmo_high_reset(struct bus_type *bt, const char *buf, -                                     size_t count) +static ssize_t cmo_high_store(struct bus_type *bt, const char *buf, +			      size_t count)  {  	unsigned long flags; @@ -1021,35 +1048,26 @@ static ssize_t viobus_cmo_high_reset(struct bus_type *bt, const char *buf,  	return count;  } - -viobus_cmo_rd_attr(entitled); -viobus_cmo_pool_rd_attr(reserve, size); -viobus_cmo_pool_rd_attr(excess, size); -viobus_cmo_pool_rd_attr(excess, free); -viobus_cmo_rd_attr(spare); -viobus_cmo_rd_attr(min); -viobus_cmo_rd_attr(desired); -viobus_cmo_rd_attr(curr); -viobus_cmo_rd_attr(high); - -static struct bus_attribute vio_cmo_bus_attrs[] = { -	__ATTR(cmo_entitled, S_IRUGO, viobus_cmo_entitled_show, NULL), -	__ATTR(cmo_reserve_size, S_IRUGO, viobus_cmo_reserve_pool_show_size, NULL), -	__ATTR(cmo_excess_size, S_IRUGO, viobus_cmo_excess_pool_show_size, NULL), -	__ATTR(cmo_excess_free, S_IRUGO, viobus_cmo_excess_pool_show_free, NULL), -	__ATTR(cmo_spare,   S_IRUGO, viobus_cmo_spare_show,   NULL), -	__ATTR(cmo_min,     S_IRUGO, viobus_cmo_min_show,     NULL), -	__ATTR(cmo_desired, S_IRUGO, viobus_cmo_desired_show, NULL), -	__ATTR(cmo_curr,    S_IRUGO, viobus_cmo_curr_show,    NULL), -	__ATTR(cmo_high,    S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH, -	       viobus_cmo_high_show, viobus_cmo_high_reset), -	__ATTR_NULL +static BUS_ATTR_RW(cmo_high); + +static struct attribute *vio_bus_attrs[] = { +	&bus_attr_cmo_entitled.attr, +	&bus_attr_cmo_spare.attr, +	&bus_attr_cmo_min.attr, +	&bus_attr_cmo_desired.attr, +	&bus_attr_cmo_curr.attr, +	&bus_attr_cmo_high.attr, +	&bus_attr_cmo_reserve_size.attr, +	&bus_attr_cmo_excess_size.attr, +	&bus_attr_cmo_excess_free.attr, +	NULL,  }; +ATTRIBUTE_GROUPS(vio_bus);  static void vio_cmo_sysfs_init(void)  {  	vio_bus_type.dev_attrs = vio_cmo_dev_attrs; -	vio_bus_type.bus_attrs = vio_cmo_bus_attrs; +	vio_bus_type.bus_groups = vio_bus_groups;  }  #else /* CONFIG_PPC_SMLPAR */  int vio_cmo_entitlement_update(size_t new_entitlement) { return 0; } @@ -1170,9 +1188,10 @@ static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev)  			    &tbl->it_index, &offset, &size);  	/* TCE table size - measured in tce entries */ -	tbl->it_size = size >> IOMMU_PAGE_SHIFT; +	tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K; +	tbl->it_size = size >> tbl->it_page_shift;  	/* offset for VIO should always be 0 */ -	tbl->it_offset = offset >> IOMMU_PAGE_SHIFT; +	tbl->it_offset = offset >> tbl->it_page_shift;  	tbl->it_busno = 0;  	tbl->it_type = TCE_VB;  	tbl->it_blocksize = 16; @@ -1413,8 +1432,8 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node)  		/* needed to ensure proper operation of coherent allocations  		 * later, in case driver doesn't set it explicitly */ -		dma_set_mask(&viodev->dev, DMA_BIT_MASK(64)); -		dma_set_coherent_mask(&viodev->dev, DMA_BIT_MASK(64)); +		viodev->dev.coherent_dma_mask = DMA_BIT_MASK(64); +		viodev->dev.dma_mask = &viodev->dev.coherent_dma_mask;  	}  	/* register with generic device framework */ @@ -1530,11 +1549,15 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,  	const char *cp;  	dn = dev->of_node; -	if (!dn) -		return -ENODEV; +	if (!dn) { +		strcpy(buf, "\n"); +		return strlen(buf); +	}  	cp = of_get_property(dn, "compatible", NULL); -	if (!cp) -		return -ENODEV; +	if (!cp) { +		strcpy(buf, "\n"); +		return strlen(buf); +	}  	return sprintf(buf, "vio:T%sS%s\n", vio_dev->type, cp);  }  | 
