diff options
author | Hal Finkel <hfinkel@anl.gov> | 2013-03-15 15:27:13 +0000 |
---|---|---|
committer | Hal Finkel <hfinkel@anl.gov> | 2013-03-15 15:27:13 +0000 |
commit | 2d37f7b979a24930c444f9783173a90a6e548118 (patch) | |
tree | a30c316b5d17f708c2c44d5d0461baa8452e1622 | |
parent | c0d8dc0eb6e1df872affadba01f60e42275e2863 (diff) |
Enable unaligned memory access on PPC for scalar types
Unaligned access is supported on PPC for non-vector types, and is generally
more efficient than manually expanding the loads and stores.
A few of the existing test cases were using expanded unaligned loads and stores
to test other features (like load/store with update), and for these test cases,
unaligned access remains disabled.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@177160 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/PowerPC/PPCISelLowering.cpp | 29 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCISelLowering.h | 4 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/lbzux.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/structsinmem.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/structsinregs.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/stwu8.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/unaligned.ll | 73 |
7 files changed, 110 insertions, 4 deletions
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 13cb358fc0..a7e9d56fa9 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -57,6 +57,9 @@ cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden); static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden); +static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned", +cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden); + static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) { if (TM.getSubtargetImpl()->isDarwin()) return new TargetLoweringObjectFileMachO(); @@ -6851,6 +6854,32 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, } } +bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, + bool *Fast) const { + if (DisablePPCUnaligned) + return false; + + // PowerPC supports unaligned memory access for simple non-vector types. + // Although accessing unaligned addresses is not as efficient as accessing + // aligned addresses, it is generally more efficient than manual expansion, + // and generally only traps for software emulation when crossing page + // boundaries. + + if (!VT.isSimple()) + return false; + + if (VT.getSimpleVT().isVector()) + return false; + + if (VT == MVT::ppcf128) + return false; + + if (Fast) + *Fast = true; + + return true; +} + /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 3931384d89..8d44d9ff46 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -449,6 +449,10 @@ namespace llvm { bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const; + /// Is unaligned memory access allowed for the given type, and is it fast + /// relative to software emulation. + virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast = 0) const; + /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd diff --git a/test/CodeGen/PowerPC/lbzux.ll b/test/CodeGen/PowerPC/lbzux.ll index 12f1d1f130..98951306fd 100644 --- a/test/CodeGen/PowerPC/lbzux.ll +++ b/test/CodeGen/PowerPC/lbzux.ll @@ -1,6 +1,6 @@ target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" -; RUN: llc < %s | FileCheck %s +; RUN: llc -disable-ppc-unaligned < %s | FileCheck %s define fastcc void @allocateSpace(i1 %cond1, i1 %cond2) nounwind { entry: diff --git a/test/CodeGen/PowerPC/structsinmem.ll b/test/CodeGen/PowerPC/structsinmem.ll index 884d3a89d1..59b108a814 100644 --- a/test/CodeGen/PowerPC/structsinmem.ll +++ b/test/CodeGen/PowerPC/structsinmem.ll @@ -1,4 +1,4 @@ -; RUN: llc -mcpu=pwr7 -O0 -disable-fp-elim < %s | FileCheck %s +; RUN: llc -mcpu=pwr7 -O0 -disable-fp-elim -disable-ppc-unaligned < %s | FileCheck %s ; FIXME: The code generation for packed structs is very poor because the ; PowerPC target wrongly rejects all unaligned loads. This test case will diff --git a/test/CodeGen/PowerPC/structsinregs.ll b/test/CodeGen/PowerPC/structsinregs.ll index c7cbfa8ee6..c746039f43 100644 --- a/test/CodeGen/PowerPC/structsinregs.ll +++ b/test/CodeGen/PowerPC/structsinregs.ll @@ -1,4 +1,4 @@ -; RUN: llc -mcpu=pwr7 -O0 -disable-fp-elim < %s | FileCheck %s +; RUN: llc -mcpu=pwr7 -O0 -disable-fp-elim -disable-ppc-unaligned < %s | FileCheck %s ; FIXME: The code generation for packed structs is very poor because the ; PowerPC target wrongly rejects all unaligned loads. This test case will diff --git a/test/CodeGen/PowerPC/stwu8.ll b/test/CodeGen/PowerPC/stwu8.ll index 897bfc6d6c..e0bd043454 100644 --- a/test/CodeGen/PowerPC/stwu8.ll +++ b/test/CodeGen/PowerPC/stwu8.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s | FileCheck %s +; RUN: llc -disable-ppc-unaligned < %s | FileCheck %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" diff --git a/test/CodeGen/PowerPC/unaligned.ll b/test/CodeGen/PowerPC/unaligned.ll new file mode 100644 index 0000000000..d05080338f --- /dev/null +++ b/test/CodeGen/PowerPC/unaligned.ll @@ -0,0 +1,73 @@ +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s +target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32" + +define void @foo1(i16* %p, i16* %r) nounwind { +entry: + %v = load i16* %p, align 1 + store i16 %v, i16* %r, align 1 + ret void + +; CHECK: @foo1 +; CHECK: lhz +; CHECK: sth +} + +define void @foo2(i32* %p, i32* %r) nounwind { +entry: + %v = load i32* %p, align 1 + store i32 %v, i32* %r, align 1 + ret void + +; CHECK: @foo2 +; CHECK: lwz +; CHECK: stw +} + +define void @foo3(i64* %p, i64* %r) nounwind { +entry: + %v = load i64* %p, align 1 + store i64 %v, i64* %r, align 1 + ret void + +; CHECK: @foo3 +; CHECK: ld +; CHECK: std +} + +define void @foo4(float* %p, float* %r) nounwind { +entry: + %v = load float* %p, align 1 + store float %v, float* %r, align 1 + ret void + +; CHECK: @foo4 +; CHECK: lfs +; CHECK: stfs +} + +define void @foo5(double* %p, double* %r) nounwind { +entry: + %v = load double* %p, align 1 + store double %v, double* %r, align 1 + ret void + +; CHECK: @foo5 +; CHECK: lfd +; CHECK: stfd +} + +define void @foo6(<4 x float>* %p, <4 x float>* %r) nounwind { +entry: + %v = load <4 x float>* %p, align 1 + store <4 x float> %v, <4 x float>* %r, align 1 + ret void + +; These loads and stores are legalized into aligned loads and stores +; using aligned stack slots. +; CHECK: @foo6 +; CHECK: ld +; CHECK: ld +; CHECK: std +; CHECK: std +} + |