aboutsummaryrefslogtreecommitdiff
path: root/arch/powerpc/kernel/align.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kernel/align.c')
-rw-r--r--arch/powerpc/kernel/align.c406
1 files changed, 287 insertions, 119 deletions
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
index e06f75daeba..34f55524d45 100644
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@@ -21,17 +21,17 @@
#include <linux/mm.h>
#include <asm/processor.h>
#include <asm/uaccess.h>
-#include <asm/system.h>
#include <asm/cache.h>
#include <asm/cputable.h>
+#include <asm/emulated_ops.h>
+#include <asm/switch_to.h>
+#include <asm/disassemble.h>
struct aligninfo {
unsigned char len;
unsigned char flags;
};
-#define IS_XFORM(inst) (((inst) >> 26) == 31)
-#define IS_DSFORM(inst) (((inst) >> 26) >= 56)
#define INVALID { 0, 0 }
@@ -48,12 +48,11 @@ struct aligninfo {
#define HARD 0x80 /* string, stwcx. */
#define E4 0x40 /* SPE endianness is word */
#define E8 0x80 /* SPE endianness is double word */
+#define SPLT 0x80 /* VSX SPLAT load */
/* DSISR bits reported for a DCBZ instruction: */
#define DCBZ 0x5f /* 8xx/82xx dcbz faults when cache not enabled */
-#define SWAP(a, b) (t = (a), (a) = (b), (b) = t)
-
/*
* The PowerPC stores certain bits of the instruction that caused the
* alignment exception in the DSISR register. This array maps those
@@ -73,7 +72,7 @@ static struct aligninfo aligninfo[128] = {
{ 8, LD+F }, /* 00 0 1001: lfd */
{ 4, ST+F+S }, /* 00 0 1010: stfs */
{ 8, ST+F }, /* 00 0 1011: stfd */
- INVALID, /* 00 0 1100 */
+ { 16, LD }, /* 00 0 1100: lq */
{ 8, LD }, /* 00 0 1101: ld/ldu/lwa */
INVALID, /* 00 0 1110 */
{ 8, ST }, /* 00 0 1111: std/stdu */
@@ -140,7 +139,7 @@ static struct aligninfo aligninfo[128] = {
{ 2, LD+SW }, /* 10 0 1100: lhbrx */
{ 4, LD+SE }, /* 10 0 1101 lwa */
{ 2, ST+SW }, /* 10 0 1110: sthbrx */
- INVALID, /* 10 0 1111 */
+ { 16, ST }, /* 10 0 1111: stq */
INVALID, /* 10 1 0000 */
INVALID, /* 10 1 0001 */
INVALID, /* 10 1 0010 */
@@ -186,43 +185,12 @@ static struct aligninfo aligninfo[128] = {
{ 4, ST+F+S+U }, /* 11 1 1010: stfsux */
{ 8, ST+F+U }, /* 11 1 1011: stfdux */
INVALID, /* 11 1 1100 */
- INVALID, /* 11 1 1101 */
+ { 4, LD+F }, /* 11 1 1101: lfiwzx */
INVALID, /* 11 1 1110 */
INVALID, /* 11 1 1111 */
};
/*
- * Create a DSISR value from the instruction
- */
-static inline unsigned make_dsisr(unsigned instr)
-{
- unsigned dsisr;
-
-
- /* bits 6:15 --> 22:31 */
- dsisr = (instr & 0x03ff0000) >> 16;
-
- if (IS_XFORM(instr)) {
- /* bits 29:30 --> 15:16 */
- dsisr |= (instr & 0x00000006) << 14;
- /* bit 25 --> 17 */
- dsisr |= (instr & 0x00000040) << 8;
- /* bits 21:24 --> 18:21 */
- dsisr |= (instr & 0x00000780) << 3;
- } else {
- /* bit 5 --> 17 */
- dsisr |= (instr & 0x04000000) >> 12;
- /* bits 1: 4 --> 18:21 */
- dsisr |= (instr & 0x78000000) >> 17;
- /* bits 30:31 --> 12:13 */
- if (IS_DSFORM(instr))
- dsisr |= (instr & 0x00000003) << 18;
- }
-
- return dsisr;
-}
-
-/*
* The dcbz (data cache block zero) instruction
* gives an alignment fault if used on non-cacheable
* memory. We handle the fault mainly for the
@@ -254,11 +222,17 @@ static int emulate_dcbz(struct pt_regs *regs, unsigned char __user *addr)
* bottom 4 bytes of each register, and the loads clear the
* top 4 bytes of the affected register.
*/
+#ifdef __BIG_ENDIAN__
#ifdef CONFIG_PPC64
#define REG_BYTE(rp, i) *((u8 *)((rp) + ((i) >> 2)) + ((i) & 3) + 4)
#else
#define REG_BYTE(rp, i) *((u8 *)(rp) + (i))
#endif
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+#define REG_BYTE(rp, i) (*(((u8 *)((rp) + ((i)>>2)) + ((i)&3))))
+#endif
#define SWIZ_PTR(p) ((unsigned char __user *)((p) ^ swiz))
@@ -303,6 +277,15 @@ static int emulate_multiple(struct pt_regs *regs, unsigned char __user *addr,
nb0 = nb + reg * 4 - 128;
nb = 128 - reg * 4;
}
+#ifdef __LITTLE_ENDIAN__
+ /*
+ * String instructions are endian neutral but the code
+ * below is not. Force byte swapping on so that the
+ * effects of swizzling are undone in the load/store
+ * loops below.
+ */
+ flags ^= SW;
+#endif
} else {
/* lwm, stmw */
nb = (32 - reg) * 4;
@@ -363,30 +346,25 @@ static int emulate_multiple(struct pt_regs *regs, unsigned char __user *addr,
* Only POWER6 has these instructions, and it does true little-endian,
* so we don't need the address swizzling.
*/
-static int emulate_fp_pair(struct pt_regs *regs, unsigned char __user *addr,
- unsigned int reg, unsigned int flags)
+static int emulate_fp_pair(unsigned char __user *addr, unsigned int reg,
+ unsigned int flags)
{
- char *ptr = (char *) &current->thread.fpr[reg];
- int i, ret;
+ char *ptr0 = (char *) &current->thread.TS_FPR(reg);
+ char *ptr1 = (char *) &current->thread.TS_FPR(reg+1);
+ int i, ret, sw = 0;
- if (!(flags & F))
- return 0;
if (reg & 1)
return 0; /* invalid form: FRS/FRT must be even */
- if (!(flags & SW)) {
- /* not byte-swapped - easy */
- if (!(flags & ST))
- ret = __copy_from_user(ptr, addr, 16);
- else
- ret = __copy_to_user(addr, ptr, 16);
- } else {
- /* each FPR value is byte-swapped separately */
- ret = 0;
- for (i = 0; i < 16; ++i) {
- if (!(flags & ST))
- ret |= __get_user(ptr[i^7], addr + i);
- else
- ret |= __put_user(ptr[i^7], addr + i);
+ if (flags & SW)
+ sw = 7;
+ ret = 0;
+ for (i = 0; i < 8; ++i) {
+ if (!(flags & ST)) {
+ ret |= __get_user(ptr0[i^sw], addr + i);
+ ret |= __get_user(ptr1[i^sw], addr + i + 8);
+ } else {
+ ret |= __put_user(ptr0[i^sw], addr + i);
+ ret |= __put_user(ptr1[i^sw], addr + i + 8);
}
}
if (ret)
@@ -394,6 +372,34 @@ static int emulate_fp_pair(struct pt_regs *regs, unsigned char __user *addr,
return 1; /* exception handled and fixed up */
}
+#ifdef CONFIG_PPC64
+static int emulate_lq_stq(struct pt_regs *regs, unsigned char __user *addr,
+ unsigned int reg, unsigned int flags)
+{
+ char *ptr0 = (char *)&regs->gpr[reg];
+ char *ptr1 = (char *)&regs->gpr[reg+1];
+ int i, ret, sw = 0;
+
+ if (reg & 1)
+ return 0; /* invalid form: GPR must be even */
+ if (flags & SW)
+ sw = 7;
+ ret = 0;
+ for (i = 0; i < 8; ++i) {
+ if (!(flags & ST)) {
+ ret |= __get_user(ptr0[i^sw], addr + i);
+ ret |= __get_user(ptr1[i^sw], addr + i + 8);
+ } else {
+ ret |= __put_user(ptr0[i^sw], addr + i);
+ ret |= __put_user(ptr1[i^sw], addr + i + 8);
+ }
+ }
+ if (ret)
+ return -EFAULT;
+ return 1; /* exception handled and fixed up */
+}
+#endif /* CONFIG_PPC64 */
+
#ifdef CONFIG_SPE
static struct aligninfo spe_aligninfo[32] = {
@@ -459,7 +465,7 @@ static struct aligninfo spe_aligninfo[32] = {
static int emulate_spe(struct pt_regs *regs, unsigned int reg,
unsigned int instr)
{
- int t, ret;
+ int ret;
union {
u64 ll;
u32 w[2];
@@ -582,24 +588,18 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg,
if (flags & SW) {
switch (flags & 0xf0) {
case E8:
- SWAP(data.v[0], data.v[7]);
- SWAP(data.v[1], data.v[6]);
- SWAP(data.v[2], data.v[5]);
- SWAP(data.v[3], data.v[4]);
+ data.ll = swab64(data.ll);
break;
case E4:
-
- SWAP(data.v[0], data.v[3]);
- SWAP(data.v[1], data.v[2]);
- SWAP(data.v[4], data.v[7]);
- SWAP(data.v[5], data.v[6]);
+ data.w[0] = swab32(data.w[0]);
+ data.w[1] = swab32(data.w[1]);
break;
/* Its half word endian */
default:
- SWAP(data.v[0], data.v[1]);
- SWAP(data.v[2], data.v[3]);
- SWAP(data.v[4], data.v[5]);
- SWAP(data.v[6], data.v[7]);
+ data.h[0] = swab16(data.h[0]);
+ data.h[1] = swab16(data.h[1]);
+ data.h[2] = swab16(data.h[2]);
+ data.h[3] = swab16(data.h[3]);
break;
}
}
@@ -637,6 +637,94 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg,
}
#endif /* CONFIG_SPE */
+#ifdef CONFIG_VSX
+/*
+ * Emulate VSX instructions...
+ */
+static int emulate_vsx(unsigned char __user *addr, unsigned int reg,
+ unsigned int areg, struct pt_regs *regs,
+ unsigned int flags, unsigned int length,
+ unsigned int elsize)
+{
+ char *ptr;
+ unsigned long *lptr;
+ int ret = 0;
+ int sw = 0;
+ int i, j;
+
+ /* userland only */
+ if (unlikely(!user_mode(regs)))
+ return 0;
+
+ flush_vsx_to_thread(current);
+
+ if (reg < 32)
+ ptr = (char *) &current->thread.fp_state.fpr[reg][0];
+ else
+ ptr = (char *) &current->thread.vr_state.vr[reg - 32];
+
+ lptr = (unsigned long *) ptr;
+
+#ifdef __LITTLE_ENDIAN__
+ if (flags & SW) {
+ elsize = length;
+ sw = length-1;
+ } else {
+ /*
+ * The elements are BE ordered, even in LE mode, so process
+ * them in reverse order.
+ */
+ addr += length - elsize;
+
+ /* 8 byte memory accesses go in the top 8 bytes of the VR */
+ if (length == 8)
+ ptr += 8;
+ }
+#else
+ if (flags & SW)
+ sw = elsize-1;
+#endif
+
+ for (j = 0; j < length; j += elsize) {
+ for (i = 0; i < elsize; ++i) {
+ if (flags & ST)
+ ret |= __put_user(ptr[i^sw], addr + i);
+ else
+ ret |= __get_user(ptr[i^sw], addr + i);
+ }
+ ptr += elsize;
+#ifdef __LITTLE_ENDIAN__
+ addr -= elsize;
+#else
+ addr += elsize;
+#endif
+ }
+
+#ifdef __BIG_ENDIAN__
+#define VSX_HI 0
+#define VSX_LO 1
+#else
+#define VSX_HI 1
+#define VSX_LO 0
+#endif
+
+ if (!ret) {
+ if (flags & U)
+ regs->gpr[areg] = regs->dar;
+
+ /* Splat load copies the same data to top and bottom 8 bytes */
+ if (flags & SPLT)
+ lptr[VSX_LO] = lptr[VSX_HI];
+ /* For 8 byte loads, zero the low 8 bytes */
+ else if (!(flags & ST) && (8 == length))
+ lptr[VSX_LO] = 0;
+ } else
+ return -EFAULT;
+
+ return 1;
+}
+#endif
+
/*
* Called on alignment exception. Attempts to fixup
*
@@ -647,23 +735,33 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg,
int fix_alignment(struct pt_regs *regs)
{
- unsigned int instr, nb, flags;
+ unsigned int instr, nb, flags, instruction = 0;
unsigned int reg, areg;
unsigned int dsisr;
unsigned char __user *addr;
unsigned long p, swiz;
- int ret, t;
- union {
+ int ret, i;
+ union data {
u64 ll;
double dd;
unsigned char v[8];
struct {
+#ifdef __LITTLE_ENDIAN__
+ int low32;
+ unsigned hi32;
+#else
unsigned hi32;
int low32;
+#endif
} x32;
struct {
+#ifdef __LITTLE_ENDIAN__
+ short low16;
+ unsigned char hi48[6];
+#else
unsigned char hi48[6];
short low16;
+#endif
} x16;
} data;
@@ -689,6 +787,7 @@ int fix_alignment(struct pt_regs *regs)
if (cpu_has_feature(CPU_FTR_REAL_LE) && (regs->msr & MSR_LE))
instr = cpu_to_le32(instr);
dsisr = make_dsisr(instr);
+ instruction = instr;
}
/* extract the operation and registers from the dsisr */
@@ -696,8 +795,10 @@ int fix_alignment(struct pt_regs *regs)
areg = dsisr & 0x1f; /* register to update */
#ifdef CONFIG_SPE
- if ((instr >> 26) == 0x4)
+ if ((instr >> 26) == 0x4) {
+ PPC_WARN_ALIGNMENT(spe, regs);
return emulate_spe(regs, reg, instr);
+ }
#endif
instr = (dsisr >> 10) & 0x7f;
@@ -707,10 +808,21 @@ int fix_alignment(struct pt_regs *regs)
nb = aligninfo[instr].len;
flags = aligninfo[instr].flags;
+ /* ldbrx/stdbrx overlap lfs/stfs in the DSISR unfortunately */
+ if (IS_XFORM(instruction) && ((instruction >> 1) & 0x3ff) == 532) {
+ nb = 8;
+ flags = LD+SW;
+ } else if (IS_XFORM(instruction) &&
+ ((instruction >> 1) & 0x3ff) == 660) {
+ nb = 8;
+ flags = ST+SW;
+ }
+
/* Byteswap little endian loads and stores */
swiz = 0;
- if (regs->msr & MSR_LE) {
+ if ((regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE)) {
flags ^= SW;
+#ifdef __BIG_ENDIAN__
/*
* So-called "PowerPC little endian" mode works by
* swizzling addresses rather than by actually doing
@@ -723,25 +835,64 @@ int fix_alignment(struct pt_regs *regs)
*/
if (cpu_has_feature(CPU_FTR_PPC_LE))
swiz = 7;
+#endif
}
/* DAR has the operand effective address */
addr = (unsigned char __user *)regs->dar;
+#ifdef CONFIG_VSX
+ if ((instruction & 0xfc00003e) == 0x7c000018) {
+ unsigned int elsize;
+
+ /* Additional register addressing bit (64 VSX vs 32 FPR/GPR) */
+ reg |= (instruction & 0x1) << 5;
+ /* Simple inline decoder instead of a table */
+ /* VSX has only 8 and 16 byte memory accesses */
+ nb = 8;
+ if (instruction & 0x200)
+ nb = 16;
+
+ /* Vector stores in little-endian mode swap individual
+ elements, so process them separately */
+ elsize = 4;
+ if (instruction & 0x80)
+ elsize = 8;
+
+ flags = 0;
+ if ((regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE))
+ flags |= SW;
+ if (instruction & 0x100)
+ flags |= ST;
+ if (instruction & 0x040)
+ flags |= U;
+ /* splat load needs a special decoder */
+ if ((instruction & 0x400) == 0){
+ flags |= SPLT;
+ nb = 8;
+ }
+ PPC_WARN_ALIGNMENT(vsx, regs);
+ return emulate_vsx(addr, reg, areg, regs, flags, nb, elsize);
+ }
+#endif
/* A size of 0 indicates an instruction we don't support, with
* the exception of DCBZ which is handled as a special case here
*/
- if (instr == DCBZ)
+ if (instr == DCBZ) {
+ PPC_WARN_ALIGNMENT(dcbz, regs);
return emulate_dcbz(regs, addr);
+ }
if (unlikely(nb == 0))
return 0;
/* Load/Store Multiple instructions are handled in their own
* function
*/
- if (flags & M)
+ if (flags & M) {
+ PPC_WARN_ALIGNMENT(multiple, regs);
return emulate_multiple(regs, addr, reg, nb,
flags, instr, swiz);
+ }
/* Verify the address of the operand */
if (unlikely(user_mode(regs) &&
@@ -757,40 +908,58 @@ int fix_alignment(struct pt_regs *regs)
flush_fp_to_thread(current);
}
- /* Special case for 16-byte FP loads and stores */
- if (nb == 16)
- return emulate_fp_pair(regs, addr, reg, flags);
+ if ((nb == 16)) {
+ if (flags & F) {
+ /* Special case for 16-byte FP loads and stores */
+ PPC_WARN_ALIGNMENT(fp_pair, regs);
+ return emulate_fp_pair(addr, reg, flags);
+ } else {
+#ifdef CONFIG_PPC64
+ /* Special case for 16-byte loads and stores */
+ PPC_WARN_ALIGNMENT(lq_stq, regs);
+ return emulate_lq_stq(regs, addr, reg, flags);
+#else
+ return 0;
+#endif
+ }
+ }
+
+ PPC_WARN_ALIGNMENT(unaligned, regs);
/* If we are loading, get the data from user space, else
* get it from register values
*/
if (!(flags & ST)) {
- data.ll = 0;
- ret = 0;
- p = (unsigned long) addr;
+ unsigned int start = 0;
+
switch (nb) {
- case 8:
- ret |= __get_user_inatomic(data.v[0], SWIZ_PTR(p++));
- ret |= __get_user_inatomic(data.v[1], SWIZ_PTR(p++));
- ret |= __get_user_inatomic(data.v[2], SWIZ_PTR(p++));
- ret |= __get_user_inatomic(data.v[3], SWIZ_PTR(p++));
case 4:
- ret |= __get_user_inatomic(data.v[4], SWIZ_PTR(p++));
- ret |= __get_user_inatomic(data.v[5], SWIZ_PTR(p++));
+ start = offsetof(union data, x32.low32);
+ break;
case 2:
- ret |= __get_user_inatomic(data.v[6], SWIZ_PTR(p++));
- ret |= __get_user_inatomic(data.v[7], SWIZ_PTR(p++));
- if (unlikely(ret))
- return -EFAULT;
+ start = offsetof(union data, x16.low16);
+ break;
}
+
+ data.ll = 0;
+ ret = 0;
+ p = (unsigned long)addr;
+
+ for (i = 0; i < nb; i++)
+ ret |= __get_user_inatomic(data.v[start + i],
+ SWIZ_PTR(p++));
+
+ if (unlikely(ret))
+ return -EFAULT;
+
} else if (flags & F) {
- data.dd = current->thread.fpr[reg];
+ data.ll = current->thread.TS_FPR(reg);
if (flags & S) {
/* Single-precision FP store requires conversion... */
#ifdef CONFIG_PPC_FPU
preempt_disable();
enable_kernel_fp();
- cvt_df(&data.dd, (float *)&data.v[4], &current->thread);
+ cvt_df(&data.dd, (float *)&data.x32.low32);
preempt_enable();
#else
return 0;
@@ -802,17 +971,13 @@ int fix_alignment(struct pt_regs *regs)
if (flags & SW) {
switch (nb) {
case 8:
- SWAP(data.v[0], data.v[7]);
- SWAP(data.v[1], data.v[6]);
- SWAP(data.v[2], data.v[5]);
- SWAP(data.v[3], data.v[4]);
+ data.ll = swab64(data.ll);
break;
case 4:
- SWAP(data.v[4], data.v[7]);
- SWAP(data.v[5], data.v[6]);
+ data.x32.low32 = swab32(data.x32.low32);
break;
case 2:
- SWAP(data.v[6], data.v[7]);
+ data.x16.low16 = swab16(data.x16.low16);
break;
}
}
@@ -834,7 +999,7 @@ int fix_alignment(struct pt_regs *regs)
#ifdef CONFIG_PPC_FPU
preempt_disable();
enable_kernel_fp();
- cvt_fd((float *)&data.v[4], &data.dd, &current->thread);
+ cvt_fd((float *)&data.x32.low32, &data.dd);
preempt_enable();
#else
return 0;
@@ -844,25 +1009,28 @@ int fix_alignment(struct pt_regs *regs)
/* Store result to memory or update registers */
if (flags & ST) {
- ret = 0;
- p = (unsigned long) addr;
+ unsigned int start = 0;
+
switch (nb) {
- case 8:
- ret |= __put_user_inatomic(data.v[0], SWIZ_PTR(p++));
- ret |= __put_user_inatomic(data.v[1], SWIZ_PTR(p++));
- ret |= __put_user_inatomic(data.v[2], SWIZ_PTR(p++));
- ret |= __put_user_inatomic(data.v[3], SWIZ_PTR(p++));
case 4:
- ret |= __put_user_inatomic(data.v[4], SWIZ_PTR(p++));
- ret |= __put_user_inatomic(data.v[5], SWIZ_PTR(p++));
+ start = offsetof(union data, x32.low32);
+ break;
case 2:
- ret |= __put_user_inatomic(data.v[6], SWIZ_PTR(p++));
- ret |= __put_user_inatomic(data.v[7], SWIZ_PTR(p++));
+ start = offsetof(union data, x16.low16);
+ break;
}
+
+ ret = 0;
+ p = (unsigned long)addr;
+
+ for (i = 0; i < nb; i++)
+ ret |= __put_user_inatomic(data.v[start + i],
+ SWIZ_PTR(p++));
+
if (unlikely(ret))
return -EFAULT;
} else if (flags & F)
- current->thread.fpr[reg] = data.dd;
+ current->thread.TS_FPR(reg) = data.ll;
else
regs->gpr[reg] = data.ll;