Enable unaligned memory access on PPC for scalar types

Unaligned access is supported on PPC for non-vector types, and is generally more efficient than manually expanding the loads and stores. A few of the existing test cases were using expanded unaligned loads and stores to test other features (like load/store with update), and for these test cases, unaligned access remains disabled. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@177160 91177308-0d34-0410-b5e6-96231b3b80d8
author: Hal Finkel <hfinkel@anl.gov> 2013-03-15 15:27:13 +0000
committer: Hal Finkel <hfinkel@anl.gov> 2013-03-15 15:27:13 +0000
commit: 2d37f7b979a24930c444f9783173a90a6e548118 (patch)
tree: a30c316b5d17f708c2c44d5d0461baa8452e1622 /lib
parent: c0d8dc0eb6e1df872affadba01f60e42275e2863 (diff)
2 files changed, 33 insertions, 0 deletions
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 13cb358fc0..a7e9d56fa9 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -57,6 +57,9 @@ cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
 
+static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
+cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
+
 static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
   if (TM.getSubtargetImpl()->isDarwin())
     return new TargetLoweringObjectFileMachO();
@@ -6851,6 +6854,32 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
   }
 }
 
+bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
+                                                      bool *Fast) const {
+  if (DisablePPCUnaligned)
+    return false;
+
+  // PowerPC supports unaligned memory access for simple non-vector types.
+  // Although accessing unaligned addresses is not as efficient as accessing
+  // aligned addresses, it is generally more efficient than manual expansion,
+  // and generally only traps for software emulation when crossing page
+  // boundaries.
+
+  if (!VT.isSimple())
+    return false;
+
+  if (VT.getSimpleVT().isVector())
+    return false;
+
+  if (VT == MVT::ppcf128)
+    return false;
+
+  if (Fast)
+    *Fast = true;
+
+  return true;
+}
+
 /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
 /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
 /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 3931384d89..8d44d9ff46 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -449,6 +449,10 @@ namespace llvm {
                         bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
                         MachineFunction &MF) const;
 
+    /// Is unaligned memory access allowed for the given type, and is it fast
+    /// relative to software emulation.
+    virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast = 0) const;
+
     /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
     /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
     /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
author	Hal Finkel <hfinkel@anl.gov>	2013-03-15 15:27:13 +0000
committer	Hal Finkel <hfinkel@anl.gov>	2013-03-15 15:27:13 +0000
commit	2d37f7b979a24930c444f9783173a90a6e548118 (patch)
tree	a30c316b5d17f708c2c44d5d0461baa8452e1622 /lib
parent	c0d8dc0eb6e1df872affadba01f60e42275e2863 (diff)