diff options
Diffstat (limited to 'arch/powerpc/math-emu/math_efp.c')
| -rw-r--r-- | arch/powerpc/math-emu/math_efp.c | 316 | 
1 files changed, 238 insertions, 78 deletions
diff --git a/arch/powerpc/math-emu/math_efp.c b/arch/powerpc/math-emu/math_efp.c index a73f0884d35..28337c9709a 100644 --- a/arch/powerpc/math-emu/math_efp.c +++ b/arch/powerpc/math-emu/math_efp.c @@ -20,6 +20,7 @@   */  #include <linux/types.h> +#include <linux/prctl.h>  #include <asm/uaccess.h>  #include <asm/reg.h> @@ -275,21 +276,13 @@ int do_spe_mathemu(struct pt_regs *regs)  		case EFSCTSF:  		case EFSCTUF: -			if (!((vb.wp[1] >> 23) == 0xff && ((vb.wp[1] & 0x7fffff) > 0))) { -				/* NaN */ -				if (((vb.wp[1] >> 23) & 0xff) == 0) { -					/* denorm */ -					vc.wp[1] = 0x0; -				} else if ((vb.wp[1] >> 31) == 0) { -					/* positive normal */ -					vc.wp[1] = (func == EFSCTSF) ? -						0x7fffffff : 0xffffffff; -				} else { /* negative normal */ -					vc.wp[1] = (func == EFSCTSF) ? -						0x80000000 : 0x0; -				} -			} else { /* rB is NaN */ -				vc.wp[1] = 0x0; +			if (SB_c == FP_CLS_NAN) { +				vc.wp[1] = 0; +				FP_SET_EXCEPTION(FP_EX_INVALID); +			} else { +				SB_e += (func == EFSCTSF ? 31 : 32); +				FP_TO_INT_ROUND_S(vc.wp[1], SB, 32, +						(func == EFSCTSF));  			}  			goto update_regs; @@ -306,16 +299,25 @@ int do_spe_mathemu(struct pt_regs *regs)  		}  		case EFSCTSI: -		case EFSCTSIZ:  		case EFSCTUI: +			if (SB_c == FP_CLS_NAN) { +				vc.wp[1] = 0; +				FP_SET_EXCEPTION(FP_EX_INVALID); +			} else { +				FP_TO_INT_ROUND_S(vc.wp[1], SB, 32, +						((func & 0x3) != 0)); +			} +			goto update_regs; + +		case EFSCTSIZ:  		case EFSCTUIZ: -			if (func & 0x4) { -				_FP_ROUND(1, SB); +			if (SB_c == FP_CLS_NAN) { +				vc.wp[1] = 0; +				FP_SET_EXCEPTION(FP_EX_INVALID);  			} else { -				_FP_ROUND_ZERO(1, SB); +				FP_TO_INT_S(vc.wp[1], SB, 32, +						((func & 0x3) != 0));  			} -			FP_TO_INT_S(vc.wp[1], SB, 32, -					(((func & 0x3) != 0) || SB_s));  			goto update_regs;  		default: @@ -404,22 +406,13 @@ cmp_s:  		case EFDCTSF:  		case EFDCTUF: -			if (!((vb.wp[0] >> 20) == 0x7ff && -			   ((vb.wp[0] & 0xfffff) > 0 || (vb.wp[1] > 0)))) { -				/* not a NaN */ -				if (((vb.wp[0] >> 20) & 0x7ff) == 0) { -					/* denorm */ -					vc.wp[1] = 0x0; -				} else if ((vb.wp[0] >> 31) == 0) { -					/* positive normal */ -					vc.wp[1] = (func == EFDCTSF) ? -						0x7fffffff : 0xffffffff; -				} else { /* negative normal */ -					vc.wp[1] = (func == EFDCTSF) ? -						0x80000000 : 0x0; -				} -			} else { /* NaN */ -				vc.wp[1] = 0x0; +			if (DB_c == FP_CLS_NAN) { +				vc.wp[1] = 0; +				FP_SET_EXCEPTION(FP_EX_INVALID); +			} else { +				DB_e += (func == EFDCTSF ? 31 : 32); +				FP_TO_INT_ROUND_D(vc.wp[1], DB, 32, +						(func == EFDCTSF));  			}  			goto update_regs; @@ -437,21 +430,35 @@ cmp_s:  		case EFDCTUIDZ:  		case EFDCTSIDZ: -			_FP_ROUND_ZERO(2, DB); -			FP_TO_INT_D(vc.dp[0], DB, 64, ((func & 0x1) == 0)); +			if (DB_c == FP_CLS_NAN) { +				vc.dp[0] = 0; +				FP_SET_EXCEPTION(FP_EX_INVALID); +			} else { +				FP_TO_INT_D(vc.dp[0], DB, 64, +						((func & 0x1) == 0)); +			}  			goto update_regs;  		case EFDCTUI:  		case EFDCTSI: +			if (DB_c == FP_CLS_NAN) { +				vc.wp[1] = 0; +				FP_SET_EXCEPTION(FP_EX_INVALID); +			} else { +				FP_TO_INT_ROUND_D(vc.wp[1], DB, 32, +						((func & 0x3) != 0)); +			} +			goto update_regs; +  		case EFDCTUIZ:  		case EFDCTSIZ: -			if (func & 0x4) { -				_FP_ROUND(2, DB); +			if (DB_c == FP_CLS_NAN) { +				vc.wp[1] = 0; +				FP_SET_EXCEPTION(FP_EX_INVALID);  			} else { -				_FP_ROUND_ZERO(2, DB); +				FP_TO_INT_D(vc.wp[1], DB, 32, +						((func & 0x3) != 0));  			} -			FP_TO_INT_D(vc.wp[1], DB, 32, -					(((func & 0x3) != 0) || DB_s));  			goto update_regs;  		default: @@ -556,37 +563,60 @@ cmp_d:  			cmp = -1;  			goto cmp_vs; -		case EVFSCTSF: -			__asm__ __volatile__ ("mtspr 512, %4\n" -				"efsctsf %0, %2\n" -				"efsctsf %1, %3\n" -				: "=r" (vc.wp[0]), "=r" (vc.wp[1]) -				: "r" (vb.wp[0]), "r" (vb.wp[1]), "r" (0)); -			goto update_regs; -  		case EVFSCTUF: -			__asm__ __volatile__ ("mtspr 512, %4\n" -				"efsctuf %0, %2\n" -				"efsctuf %1, %3\n" -				: "=r" (vc.wp[0]), "=r" (vc.wp[1]) -				: "r" (vb.wp[0]), "r" (vb.wp[1]), "r" (0)); +		case EVFSCTSF: +			if (SB0_c == FP_CLS_NAN) { +				vc.wp[0] = 0; +				FP_SET_EXCEPTION(FP_EX_INVALID); +			} else { +				SB0_e += (func == EVFSCTSF ? 31 : 32); +				FP_TO_INT_ROUND_S(vc.wp[0], SB0, 32, +						(func == EVFSCTSF)); +			} +			if (SB1_c == FP_CLS_NAN) { +				vc.wp[1] = 0; +				FP_SET_EXCEPTION(FP_EX_INVALID); +			} else { +				SB1_e += (func == EVFSCTSF ? 31 : 32); +				FP_TO_INT_ROUND_S(vc.wp[1], SB1, 32, +						(func == EVFSCTSF)); +			}  			goto update_regs;  		case EVFSCTUI:  		case EVFSCTSI: +			if (SB0_c == FP_CLS_NAN) { +				vc.wp[0] = 0; +				FP_SET_EXCEPTION(FP_EX_INVALID); +			} else { +				FP_TO_INT_ROUND_S(vc.wp[0], SB0, 32, +						((func & 0x3) != 0)); +			} +			if (SB1_c == FP_CLS_NAN) { +				vc.wp[1] = 0; +				FP_SET_EXCEPTION(FP_EX_INVALID); +			} else { +				FP_TO_INT_ROUND_S(vc.wp[1], SB1, 32, +						((func & 0x3) != 0)); +			} +			goto update_regs; +  		case EVFSCTUIZ:  		case EVFSCTSIZ: -			if (func & 0x4) { -				_FP_ROUND(1, SB0); -				_FP_ROUND(1, SB1); +			if (SB0_c == FP_CLS_NAN) { +				vc.wp[0] = 0; +				FP_SET_EXCEPTION(FP_EX_INVALID);  			} else { -				_FP_ROUND_ZERO(1, SB0); -				_FP_ROUND_ZERO(1, SB1); +				FP_TO_INT_S(vc.wp[0], SB0, 32, +						((func & 0x3) != 0)); +			} +			if (SB1_c == FP_CLS_NAN) { +				vc.wp[1] = 0; +				FP_SET_EXCEPTION(FP_EX_INVALID); +			} else { +				FP_TO_INT_S(vc.wp[1], SB1, 32, +						((func & 0x3) != 0));  			} -			FP_TO_INT_S(vc.wp[0], SB0, 32, -					(((func & 0x3) != 0) || SB0_s)); -			FP_TO_INT_S(vc.wp[1], SB1, 32, -					(((func & 0x3) != 0) || SB1_s));  			goto update_regs;  		default: @@ -630,9 +660,27 @@ update_ccr:  	regs->ccr |= (IR << ((7 - ((speinsn >> 23) & 0x7)) << 2));  update_regs: -	__FPU_FPSCR &= ~FP_EX_MASK; +	/* +	 * If the "invalid" exception sticky bit was set by the +	 * processor for non-finite input, but was not set before the +	 * instruction being emulated, clear it.  Likewise for the +	 * "underflow" bit, which may have been set by the processor +	 * for exact underflow, not just inexact underflow when the +	 * flag should be set for IEEE 754 semantics.  Other sticky +	 * exceptions will only be set by the processor when they are +	 * correct according to IEEE 754 semantics, and we must not +	 * clear sticky bits that were already set before the emulated +	 * instruction as they represent the user-visible sticky +	 * exception status.  "inexact" traps to kernel are not +	 * required for IEEE semantics and are not enabled by default, +	 * so the "inexact" sticky bit may have been set by a previous +	 * instruction without the kernel being aware of it. +	 */ +	__FPU_FPSCR +	  &= ~(FP_EX_INVALID | FP_EX_UNDERFLOW) | current->thread.spefscr_last;  	__FPU_FPSCR |= (FP_CUR_EXCEPTIONS & FP_EX_MASK);  	mtspr(SPRN_SPEFSCR, __FPU_FPSCR); +	current->thread.spefscr_last = __FPU_FPSCR;  	current->thread.evr[fc] = vc.wp[0];  	regs->gpr[fc] = vc.wp[1]; @@ -644,6 +692,23 @@ update_regs:  	pr_debug("va: %08x  %08x\n", va.wp[0], va.wp[1]);  	pr_debug("vb: %08x  %08x\n", vb.wp[0], vb.wp[1]); +	if (current->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE) { +		if ((FP_CUR_EXCEPTIONS & FP_EX_DIVZERO) +		    && (current->thread.fpexc_mode & PR_FP_EXC_DIV)) +			return 1; +		if ((FP_CUR_EXCEPTIONS & FP_EX_OVERFLOW) +		    && (current->thread.fpexc_mode & PR_FP_EXC_OVF)) +			return 1; +		if ((FP_CUR_EXCEPTIONS & FP_EX_UNDERFLOW) +		    && (current->thread.fpexc_mode & PR_FP_EXC_UND)) +			return 1; +		if ((FP_CUR_EXCEPTIONS & FP_EX_INEXACT) +		    && (current->thread.fpexc_mode & PR_FP_EXC_RES)) +			return 1; +		if ((FP_CUR_EXCEPTIONS & FP_EX_INVALID) +		    && (current->thread.fpexc_mode & PR_FP_EXC_INV)) +			return 1; +	}  	return 0;  illegal: @@ -662,21 +727,28 @@ int speround_handler(struct pt_regs *regs)  {  	union dw_union fgpr;  	int s_lo, s_hi; -	unsigned long speinsn, type, fc; +	int lo_inexact, hi_inexact; +	int fp_result; +	unsigned long speinsn, type, fb, fc, fptype, func;  	if (get_user(speinsn, (unsigned int __user *) regs->nip))  		return -EFAULT;  	if ((speinsn >> 26) != 4)  		return -EINVAL;         /* not an spe instruction */ -	type = insn_type(speinsn & 0x7ff); +	func = speinsn & 0x7ff; +	type = insn_type(func);  	if (type == XCR) return -ENOSYS;  	__FPU_FPSCR = mfspr(SPRN_SPEFSCR);  	pr_debug("speinsn:%08lx spefscr:%08lx\n", speinsn, __FPU_FPSCR); +	fptype = (speinsn >> 5) & 0x7; +  	/* No need to round if the result is exact */ -	if (!(__FPU_FPSCR & FP_EX_INEXACT)) +	lo_inexact = __FPU_FPSCR & (SPEFSCR_FG | SPEFSCR_FX); +	hi_inexact = __FPU_FPSCR & (SPEFSCR_FGH | SPEFSCR_FXH); +	if (!(lo_inexact || (hi_inexact && fptype == VCT)))  		return 0;  	fc = (speinsn >> 21) & 0x1f; @@ -685,9 +757,68 @@ int speround_handler(struct pt_regs *regs)  	fgpr.wp[0] = current->thread.evr[fc];  	fgpr.wp[1] = regs->gpr[fc]; +	fb = (speinsn >> 11) & 0x1f; +	switch (func) { +	case EFSCTUIZ: +	case EFSCTSIZ: +	case EVFSCTUIZ: +	case EVFSCTSIZ: +	case EFDCTUIDZ: +	case EFDCTSIDZ: +	case EFDCTUIZ: +	case EFDCTSIZ: +		/* +		 * These instructions always round to zero, +		 * independent of the rounding mode. +		 */ +		return 0; + +	case EFSCTUI: +	case EFSCTUF: +	case EVFSCTUI: +	case EVFSCTUF: +	case EFDCTUI: +	case EFDCTUF: +		fp_result = 0; +		s_lo = 0; +		s_hi = 0; +		break; + +	case EFSCTSI: +	case EFSCTSF: +		fp_result = 0; +		/* Recover the sign of a zero result if possible.  */ +		if (fgpr.wp[1] == 0) +			s_lo = regs->gpr[fb] & SIGN_BIT_S; +		break; + +	case EVFSCTSI: +	case EVFSCTSF: +		fp_result = 0; +		/* Recover the sign of a zero result if possible.  */ +		if (fgpr.wp[1] == 0) +			s_lo = regs->gpr[fb] & SIGN_BIT_S; +		if (fgpr.wp[0] == 0) +			s_hi = current->thread.evr[fb] & SIGN_BIT_S; +		break; + +	case EFDCTSI: +	case EFDCTSF: +		fp_result = 0; +		s_hi = s_lo; +		/* Recover the sign of a zero result if possible.  */ +		if (fgpr.wp[1] == 0) +			s_hi = current->thread.evr[fb] & SIGN_BIT_S; +		break; + +	default: +		fp_result = 1; +		break; +	} +  	pr_debug("round fgpr: %08x  %08x\n", fgpr.wp[0], fgpr.wp[1]); -	switch ((speinsn >> 5) & 0x7) { +	switch (fptype) {  	/* Since SPE instructions on E500 core can handle round to nearest  	 * and round toward zero with IEEE-754 complied, we just need  	 * to handle round toward +Inf and round toward -Inf by software. @@ -696,25 +827,52 @@ int speround_handler(struct pt_regs *regs)  		if ((FP_ROUNDMODE) == FP_RND_PINF) {  			if (!s_lo) fgpr.wp[1]++; /* Z > 0, choose Z1 */  		} else { /* round to -Inf */ -			if (s_lo) fgpr.wp[1]++; /* Z < 0, choose Z2 */ +			if (s_lo) { +				if (fp_result) +					fgpr.wp[1]++; /* Z < 0, choose Z2 */ +				else +					fgpr.wp[1]--; /* Z < 0, choose Z2 */ +			}  		}  		break;  	case DPFP:  		if (FP_ROUNDMODE == FP_RND_PINF) { -			if (!s_hi) fgpr.dp[0]++; /* Z > 0, choose Z1 */ +			if (!s_hi) { +				if (fp_result) +					fgpr.dp[0]++; /* Z > 0, choose Z1 */ +				else +					fgpr.wp[1]++; /* Z > 0, choose Z1 */ +			}  		} else { /* round to -Inf */ -			if (s_hi) fgpr.dp[0]++; /* Z < 0, choose Z2 */ +			if (s_hi) { +				if (fp_result) +					fgpr.dp[0]++; /* Z < 0, choose Z2 */ +				else +					fgpr.wp[1]--; /* Z < 0, choose Z2 */ +			}  		}  		break;  	case VCT:  		if (FP_ROUNDMODE == FP_RND_PINF) { -			if (!s_lo) fgpr.wp[1]++; /* Z_low > 0, choose Z1 */ -			if (!s_hi) fgpr.wp[0]++; /* Z_high word > 0, choose Z1 */ +			if (lo_inexact && !s_lo) +				fgpr.wp[1]++; /* Z_low > 0, choose Z1 */ +			if (hi_inexact && !s_hi) +				fgpr.wp[0]++; /* Z_high word > 0, choose Z1 */  		} else { /* round to -Inf */ -			if (s_lo) fgpr.wp[1]++; /* Z_low < 0, choose Z2 */ -			if (s_hi) fgpr.wp[0]++; /* Z_high < 0, choose Z2 */ +			if (lo_inexact && s_lo) { +				if (fp_result) +					fgpr.wp[1]++; /* Z_low < 0, choose Z2 */ +				else +					fgpr.wp[1]--; /* Z_low < 0, choose Z2 */ +			} +			if (hi_inexact && s_hi) { +				if (fp_result) +					fgpr.wp[0]++; /* Z_high < 0, choose Z2 */ +				else +					fgpr.wp[0]--; /* Z_high < 0, choose Z2 */ +			}  		}  		break; @@ -727,6 +885,8 @@ int speround_handler(struct pt_regs *regs)  	pr_debug("  to fgpr: %08x  %08x\n", fgpr.wp[0], fgpr.wp[1]); +	if (current->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE) +		return (current->thread.fpexc_mode & PR_FP_EXC_RES) ? 1 : 0;  	return 0;  }  | 
