aboutsummaryrefslogtreecommitdiff
path: root/lib/CodeGen
diff options
context:
space:
mode:
authorDan Gohman <gohman@apple.com>2008-04-12 04:36:06 +0000
committerDan Gohman <gohman@apple.com>2008-04-12 04:36:06 +0000
commit707e0184233f27e0e9f9aee0309f2daab8cfe7f8 (patch)
tree849460e92ec614c23849f147402c15384fe11756 /lib/CodeGen
parent6f836adafee88669273e9302e3344c4b9cef8a0d (diff)
Drop ISD::MEMSET, ISD::MEMMOVE, and ISD::MEMCPY, which are not Legal
on any current target and aren't optimized in DAGCombiner. Instead of using intermediate nodes, expand the operations, choosing between simple loads/stores, target-specific code, and library calls, immediately. Previously, the code to emit optimized code for these operations was only used at initial SelectionDAG construction time; now it is used at all times. This fixes some cases where rep;movs was being used for small copies where simple loads/stores would be better. This also cleans up code that checks for alignments less than 4; let the targets make that decision instead of doing it in target-independent code. This allows x86 to use rep;movs in low-alignment cases. Also, this fixes a bug that resulted in the use of rep;stos for memsets of 0 with non-constant memory size when the alignment was at least 4. It's better to use the library in this case, which can be significantly faster when the size is large. This also preserves more SourceValue information when memory intrinsics are lowered into simple loads/stores. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@49572 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/CodeGen')
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp118
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.cpp45
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h1
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypesExpand.cpp3
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypesPromote.cpp3
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp369
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp282
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp55
8 files changed, 386 insertions, 490 deletions
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 5cb13e3f44..2df363e5be 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -22,6 +22,7 @@
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetSubtarget.h"
#include "llvm/CallingConv.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
@@ -2842,123 +2843,6 @@ SDOperand SelectionDAGLegalize::LegalizeOp(SDOperand Op) {
break;
}
break;
- case ISD::MEMSET:
- case ISD::MEMCPY:
- case ISD::MEMMOVE: {
- Tmp1 = LegalizeOp(Node->getOperand(0)); // Chain
- Tmp2 = LegalizeOp(Node->getOperand(1)); // Pointer
-
- if (Node->getOpcode() == ISD::MEMSET) { // memset = ubyte
- switch (getTypeAction(Node->getOperand(2).getValueType())) {
- case Expand: assert(0 && "Cannot expand a byte!");
- case Legal:
- Tmp3 = LegalizeOp(Node->getOperand(2));
- break;
- case Promote:
- Tmp3 = PromoteOp(Node->getOperand(2));
- break;
- }
- } else {
- Tmp3 = LegalizeOp(Node->getOperand(2)); // memcpy/move = pointer,
- }
-
- SDOperand Tmp4;
- switch (getTypeAction(Node->getOperand(3).getValueType())) {
- case Expand: {
- // Length is too big, just take the lo-part of the length.
- SDOperand HiPart;
- ExpandOp(Node->getOperand(3), Tmp4, HiPart);
- break;
- }
- case Legal:
- Tmp4 = LegalizeOp(Node->getOperand(3));
- break;
- case Promote:
- Tmp4 = PromoteOp(Node->getOperand(3));
- break;
- }
-
- SDOperand Tmp5;
- switch (getTypeAction(Node->getOperand(4).getValueType())) { // uint
- case Expand: assert(0 && "Cannot expand this yet!");
- case Legal:
- Tmp5 = LegalizeOp(Node->getOperand(4));
- break;
- case Promote:
- Tmp5 = PromoteOp(Node->getOperand(4));
- break;
- }
-
- SDOperand Tmp6;
- switch (getTypeAction(Node->getOperand(5).getValueType())) { // bool
- case Expand: assert(0 && "Cannot expand this yet!");
- case Legal:
- Tmp6 = LegalizeOp(Node->getOperand(5));
- break;
- case Promote:
- Tmp6 = PromoteOp(Node->getOperand(5));
- break;
- }
-
- switch (TLI.getOperationAction(Node->getOpcode(), MVT::Other)) {
- default: assert(0 && "This action not implemented for this operation!");
- case TargetLowering::Custom:
- isCustom = true;
- // FALLTHROUGH
- case TargetLowering::Legal: {
- SDOperand Ops[] = { Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6 };
- Result = DAG.UpdateNodeOperands(Result, Ops, 6);
- if (isCustom) {
- Tmp1 = TLI.LowerOperation(Result, DAG);
- if (Tmp1.Val) Result = Tmp1;
- }
- break;
- }
- case TargetLowering::Expand: {
- // Otherwise, the target does not support this operation. Lower the
- // operation to an explicit libcall as appropriate.
- MVT::ValueType IntPtr = TLI.getPointerTy();
- const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType();
- TargetLowering::ArgListTy Args;
- TargetLowering::ArgListEntry Entry;
-
- const char *FnName = 0;
- if (Node->getOpcode() == ISD::MEMSET) {
- Entry.Node = Tmp2; Entry.Ty = IntPtrTy;
- Args.push_back(Entry);
- // Extend the (previously legalized) ubyte argument to be an int value
- // for the call.
- if (Tmp3.getValueType() > MVT::i32)
- Tmp3 = DAG.getNode(ISD::TRUNCATE, MVT::i32, Tmp3);
- else
- Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Tmp3);
- Entry.Node = Tmp3; Entry.Ty = Type::Int32Ty; Entry.isSExt = true;
- Args.push_back(Entry);
- Entry.Node = Tmp4; Entry.Ty = IntPtrTy; Entry.isSExt = false;
- Args.push_back(Entry);
-
- FnName = "memset";
- } else if (Node->getOpcode() == ISD::MEMCPY ||
- Node->getOpcode() == ISD::MEMMOVE) {
- Entry.Ty = IntPtrTy;
- Entry.Node = Tmp2; Args.push_back(Entry);
- Entry.Node = Tmp3; Args.push_back(Entry);
- Entry.Node = Tmp4; Args.push_back(Entry);
- FnName = Node->getOpcode() == ISD::MEMMOVE ? "memmove" : "memcpy";
- } else {
- assert(0 && "Unknown op!");
- }
-
- std::pair<SDOperand,SDOperand> CallResult =
- TLI.LowerCallTo(Tmp1, Type::VoidTy,
- false, false, false, CallingConv::C, false,
- DAG.getExternalSymbol(FnName, IntPtr), Args, DAG);
- Result = CallResult.second;
- break;
- }
- }
- break;
- }
case ISD::SHL_PARTS:
case ISD::SRA_PARTS:
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 6511cff1c6..380c42220c 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -439,51 +439,6 @@ SDOperand DAGTypeLegalizer::CreateStackStoreLoad(SDOperand Op,
return DAG.getLoad(DestVT, Store, FIPtr, NULL, 0);
}
-/// HandleMemIntrinsic - This handles memcpy/memset/memmove with invalid
-/// operands. This promotes or expands the operands as required.
-SDOperand DAGTypeLegalizer::HandleMemIntrinsic(SDNode *N) {
- // The chain and pointer [operands #0 and #1] are always valid types.
- SDOperand Chain = N->getOperand(0);
- SDOperand Ptr = N->getOperand(1);
- SDOperand Op2 = N->getOperand(2);
-
- // Op #2 is either a value (memset) or a pointer. Promote it if required.
- switch (getTypeAction(Op2.getValueType())) {
- default: assert(0 && "Unknown action for pointer/value operand");
- case Legal: break;
- case Promote: Op2 = GetPromotedOp(Op2); break;
- }
-
- // The length could have any action required.
- SDOperand Length = N->getOperand(3);
- switch (getTypeAction(Length.getValueType())) {
- default: assert(0 && "Unknown action for memop operand");
- case Legal: break;
- case Promote: Length = GetPromotedZExtOp(Length); break;
- case Expand:
- SDOperand Dummy; // discard the high part.
- GetExpandedOp(Length, Length, Dummy);
- break;
- }
-
- SDOperand Align = N->getOperand(4);
- switch (getTypeAction(Align.getValueType())) {
- default: assert(0 && "Unknown action for memop operand");
- case Legal: break;
- case Promote: Align = GetPromotedZExtOp(Align); break;
- }
-
- SDOperand AlwaysInline = N->getOperand(5);
- switch (getTypeAction(AlwaysInline.getValueType())) {
- default: assert(0 && "Unknown action for memop operand");
- case Legal: break;
- case Promote: AlwaysInline = GetPromotedZExtOp(AlwaysInline); break;
- }
-
- SDOperand Ops[] = { Chain, Ptr, Op2, Length, Align, AlwaysInline };
- return DAG.UpdateNodeOperands(SDOperand(N, 0), Ops, 6);
-}
-
/// JoinIntegers - Build an integer with low bits Lo and high bits Hi.
SDOperand DAGTypeLegalizer::JoinIntegers(SDOperand Lo, SDOperand Hi) {
MVT::ValueType LVT = Lo.getValueType();
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 7d245abed5..5b9879315f 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -165,7 +165,6 @@ private:
// Common routines.
SDOperand BitConvertToInteger(SDOperand Op);
SDOperand CreateStackStoreLoad(SDOperand Op, MVT::ValueType DestVT);
- SDOperand HandleMemIntrinsic(SDNode *N);
SDOperand JoinIntegers(SDOperand Lo, SDOperand Hi);
void SplitInteger(SDOperand Op, SDOperand &Lo, SDOperand &Hi);
void SplitInteger(SDOperand Op, MVT::ValueType LoVT, MVT::ValueType HiVT,
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesExpand.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesExpand.cpp
index b872a44fec..fcde8f32d2 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesExpand.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesExpand.cpp
@@ -946,9 +946,6 @@ bool DAGTypeLegalizer::ExpandOperand(SDNode *N, unsigned OpNo) {
case ISD::STORE:
Res = ExpandOperand_STORE(cast<StoreSDNode>(N), OpNo);
break;
- case ISD::MEMSET:
- case ISD::MEMCPY:
- case ISD::MEMMOVE: Res = HandleMemIntrinsic(N); break;
case ISD::BUILD_VECTOR: Res = ExpandOperand_BUILD_VECTOR(N); break;
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesPromote.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesPromote.cpp
index b8118eb039..93c8c60584 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesPromote.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesPromote.cpp
@@ -447,9 +447,6 @@ bool DAGTypeLegalizer::PromoteOperand(SDNode *N, unsigned OpNo) {
case ISD::STORE: Res = PromoteOperand_STORE(cast<StoreSDNode>(N),
OpNo); break;
- case ISD::MEMSET:
- case ISD::MEMCPY:
- case ISD::MEMMOVE: Res = HandleMemIntrinsic(N); break;
case ISD::BUILD_VECTOR: Res = PromoteOperand_BUILD_VECTOR(N); break;
case ISD::INSERT_VECTOR_ELT:
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index f096c70a3e..327a8fe897 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -17,6 +17,7 @@
#include "llvm/Intrinsics.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Assembly/Writer.h"
+#include "llvm/CallingConv.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -2385,28 +2386,357 @@ SDOperand SelectionDAG::getNode(unsigned Opcode, MVT::ValueType VT,
return getNode(Opcode, VT, Ops, 5);
}
-SDOperand SelectionDAG::getMemcpy(SDOperand Chain, SDOperand Dest,
- SDOperand Src, SDOperand Size,
- SDOperand Align,
- SDOperand AlwaysInline) {
- SDOperand Ops[] = { Chain, Dest, Src, Size, Align, AlwaysInline };
- return getNode(ISD::MEMCPY, MVT::Other, Ops, 6);
+/// getMemsetValue - Vectorized representation of the memset value
+/// operand.
+static SDOperand getMemsetValue(SDOperand Value, MVT::ValueType VT,
+ SelectionDAG &DAG) {
+ MVT::ValueType CurVT = VT;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
+ uint64_t Val = C->getValue() & 255;
+ unsigned Shift = 8;
+ while (CurVT != MVT::i8) {
+ Val = (Val << Shift) | Val;
+ Shift <<= 1;
+ CurVT = (MVT::ValueType)((unsigned)CurVT - 1);
+ }
+ return DAG.getConstant(Val, VT);
+ } else {
+ Value = DAG.getNode(ISD::ZERO_EXTEND, VT, Value);
+ unsigned Shift = 8;
+ while (CurVT != MVT::i8) {
+ Value =
+ DAG.getNode(ISD::OR, VT,
+ DAG.getNode(ISD::SHL, VT, Value,
+ DAG.getConstant(Shift, MVT::i8)), Value);
+ Shift <<= 1;
+ CurVT = (MVT::ValueType)((unsigned)CurVT - 1);
+ }
+
+ return Value;
+ }
}
-SDOperand SelectionDAG::getMemmove(SDOperand Chain, SDOperand Dest,
- SDOperand Src, SDOperand Size,
- SDOperand Align,
- SDOperand AlwaysInline) {
- SDOperand Ops[] = { Chain, Dest, Src, Size, Align, AlwaysInline };
- return getNode(ISD::MEMMOVE, MVT::Other, Ops, 6);
+/// getMemsetStringVal - Similar to getMemsetValue. Except this is only
+/// used when a memcpy is turned into a memset when the source is a constant
+/// string ptr.
+static SDOperand getMemsetStringVal(MVT::ValueType VT,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ std::string &Str, unsigned Offset) {
+ uint64_t Val = 0;
+ unsigned MSB = MVT::getSizeInBits(VT) / 8;
+ if (TLI.isLittleEndian())
+ Offset = Offset + MSB - 1;
+ for (unsigned i = 0; i != MSB; ++i) {
+ Val = (Val << 8) | (unsigned char)Str[Offset];
+ Offset += TLI.isLittleEndian() ? -1 : 1;
+ }
+ return DAG.getConstant(Val, VT);
+}
+
+/// getMemBasePlusOffset - Returns base and offset node for the
+static SDOperand getMemBasePlusOffset(SDOperand Base, unsigned Offset,
+ SelectionDAG &DAG) {
+ MVT::ValueType VT = Base.getValueType();
+ return DAG.getNode(ISD::ADD, VT, Base, DAG.getConstant(Offset, VT));
}
-SDOperand SelectionDAG::getMemset(SDOperand Chain, SDOperand Dest,
+/// MeetsMaxMemopRequirement - Determines if the number of memory ops required
+/// to replace the memset / memcpy is below the threshold. It also returns the
+/// types of the sequence of memory ops to perform memset / memcpy.
+static bool MeetsMaxMemopRequirement(std::vector<MVT::ValueType> &MemOps,
+ unsigned Limit, uint64_t Size,
+ unsigned Align,
+ const TargetLowering &TLI) {
+ MVT::ValueType VT;
+
+ if (TLI.allowsUnalignedMemoryAccesses()) {
+ VT = MVT::i64;
+ } else {
+ switch (Align & 7) {
+ case 0:
+ VT = MVT::i64;
+ break;
+ case 4:
+ VT = MVT::i32;
+ break;
+ case 2:
+ VT = MVT::i16;
+ break;
+ default:
+ VT = MVT::i8;
+ break;
+ }
+ }
+
+ MVT::ValueType LVT = MVT::i64;
+ while (!TLI.isTypeLegal(LVT))
+ LVT = (MVT::ValueType)((unsigned)LVT - 1);
+ assert(MVT::isInteger(LVT));
+
+ if (VT > LVT)
+ VT = LVT;
+
+ unsigned NumMemOps = 0;
+ while (Size != 0) {
+ unsigned VTSize = MVT::getSizeInBits(VT) / 8;
+ while (VTSize > Size) {
+ VT = (MVT::ValueType)((unsigned)VT - 1);
+ VTSize >>= 1;
+ }
+ assert(MVT::isInteger(VT));
+
+ if (++NumMemOps > Limit)
+ return false;
+ MemOps.push_back(VT);
+ Size -= VTSize;
+ }
+
+ return true;
+}
+
+static SDOperand getMemcpyLoadsAndStores(SelectionDAG &DAG,
+ SDOperand Chain, SDOperand Dst,
+ SDOperand Src, uint64_t Size,
+ unsigned Align,
+ bool AlwaysInline,
+ Value *DstSV, uint64_t DstOff,
+ Value *SrcSV, uint64_t SrcOff) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // Expand memcpy to a series of store ops if the size operand falls below
+ // a certain threshold.
+ std::vector<MVT::ValueType> MemOps;
+ uint64_t Limit = -1;
+ if (!AlwaysInline)
+ Limit = TLI.getMaxStoresPerMemcpy();
+ if (!MeetsMaxMemopRequirement(MemOps, Limit, Size, Align, TLI))
+ return SDOperand();
+
+ SmallVector<SDOperand, 8> OutChains;
+
+ unsigned NumMemOps = MemOps.size();
+ unsigned SrcDelta = 0;
+ GlobalAddressSDNode *G = NULL;
+ std::string Str;
+ bool CopyFromStr = false;
+
+ if (Src.getOpcode() == ISD::GlobalAddress)
+ G = cast<GlobalAddressSDNode>(Src);
+ else if (Src.getOpcode() == ISD::ADD &&
+ Src.getOperand(0).getOpcode() == ISD::GlobalAddress &&
+ Src.getOperand(1).getOpcode() == ISD::Constant) {
+ G = cast<GlobalAddressSDNode>(Src.getOperand(0));
+ SrcDelta = cast<ConstantSDNode>(Src.getOperand(1))->getValue();
+ }
+ if (G) {
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(G->getGlobal());
+ if (GV && GV->isConstant()) {
+ Str = GV->getStringValue(false);
+ if (!Str.empty()) {
+ CopyFromStr = true;
+ SrcOff += SrcDelta;
+ }
+ }
+ }
+
+ for (unsigned i = 0; i < NumMemOps; i++) {
+ MVT::ValueType VT = MemOps[i];
+ unsigned VTSize = MVT::getSizeInBits(VT) / 8;
+ SDOperand Value, Store;
+
+ if (CopyFromStr) {
+ Value = getMemsetStringVal(VT, DAG, TLI, Str, SrcOff);
+ Store =
+ DAG.getStore(Chain, Value,
+ getMemBasePlusOffset(Dst, DstOff, DAG),
+ DstSV, DstOff);
+ } else {
+ Value = DAG.getLoad(VT, Chain,
+ getMemBasePlusOffset(Src, SrcOff, DAG),
+ SrcSV, SrcOff, false, Align);
+ Store =
+ DAG.getStore(Chain, Value,
+ getMemBasePlusOffset(Dst, DstOff, DAG),
+ DstSV, DstOff, false, Align);
+ }
+ OutChains.push_back(Store);
+ SrcOff += VTSize;
+ DstOff += VTSize;
+ }
+
+ return DAG.getNode(ISD::TokenFactor, MVT::Other,
+ &OutChains[0], OutChains.size());
+}
+
+static SDOperand getMemsetStores(SelectionDAG &DAG,
+ SDOperand Chain, SDOperand Dst,
+ SDOperand Src, uint64_t Size,
+ unsigned Align,
+ Value *DstSV, uint64_t DstOff) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // Expand memset to a series of load/store ops if the size operand
+ // falls below a certain threshold.
+ std::vector<MVT::ValueType> MemOps;
+ if (!MeetsMaxMemopRequirement(MemOps, TLI.getMaxStoresPerMemset(),
+ Size, Align, TLI))
+ return SDOperand();
+
+ SmallVector<SDOperand, 8> OutChains;
+
+ unsigned NumMemOps = MemOps.size();
+ for (unsigned i = 0; i < NumMemOps; i++) {
+ MVT::ValueType VT = MemOps[i];
+ unsigned VTSize = MVT::getSizeInBits(VT) / 8;
+ SDOperand Value = getMemsetValue(Src, VT, DAG);
+ SDOperand Store = DAG.getStore(Chain, Value,
+ getMemBasePlusOffset(Dst, DstOff, DAG),
+ DstSV, DstOff);
+ OutChains.push_back(Store);
+ DstOff += VTSize;
+ }
+
+ return DAG.getNode(ISD::TokenFactor, MVT::Other,
+ &OutChains[0], OutChains.size());
+}
+
+SDOperand SelectionDAG::getMemcpy(SDOperand Chain, SDOperand Dst,
+ SDOperand Src, SDOperand Size,
+ unsigned Align, bool AlwaysInline,
+ Value *DstSV, uint64_t DstOff,
+ Value *SrcSV, uint64_t SrcOff) {
+
+ // Check to see if we should lower the memcpy to loads and stores first.
+ // For cases within the target-specified limits, this is the best choice.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (ConstantSize) {
+ // Memcpy with size zero? Just return the original chain.
+ if (ConstantSize->isNullValue())
+ return Chain;
+
+ SDOperand Result =
+ getMemcpyLoadsAndStores(*this, Chain, Dst, Src, ConstantSize->getValue(),
+ Align, false, DstSV, DstOff, SrcSV, SrcOff);
+ if (Result.Val)
+ return Result;
+ }
+
+ // Then check to see if we should lower the memcpy with target-specific
+ // code. If the target chooses to do this, this is the next best.
+ SDOperand Result =
+ TLI.EmitTargetCodeForMemcpy(*this, Chain, Dst, Src, Size, Align,
+ AlwaysInline,
+ DstSV, DstOff, SrcSV, SrcOff);
+ if (Result.Val)
+ return Result;
+
+ // If we really need inline code and the target declined to provide it,
+ // use a (potentially long) sequence of loads and stores.
+ if (AlwaysInline) {
+ assert(ConstantSize && "AlwaysInline requires a constant size!");
+ return getMemcpyLoadsAndStores(*this, Chain, Dst, Src,
+ ConstantSize->getValue(), Align, true,
+ DstSV, DstOff, SrcSV, SrcOff);
+ }
+
+ // Emit a library call.
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = TLI.getTargetData()->getIntPtrType();
+ Entry.Node = Dst; Args.push_back(Entry);
+ Entry.Node = Src; Args.push_back(Entry);
+ Entry.Node = Size; Args.push_back(Entry);
+ std::pair<SDOperand,SDOperand> CallResult =
+ TLI.LowerCallTo(Chain, Type::VoidTy,
+ false, false, false, CallingConv::C, false,
+ getExternalSymbol("memcpy", TLI.getPointerTy()),
+ Args, *this);
+ return CallResult.second;
+}
+
+SDOperand SelectionDAG::getMemmove(SDOperand Chain, SDOperand Dst,
+ SDOperand Src, SDOperand Size,
+ unsigned Align,
+ Value *DstSV, uint64_t DstOff,
+ Value *SrcSV, uint64_t SrcOff) {
+
+ // TODO: Optimize small memmove cases with simple loads and stores,
+ // ensuring that all loads precede all stores. This can cause severe
+ // register pressure, so targets should be careful with the size limit.
+
+ // Then check to see if we should lower the memmove with target-specific
+ // code. If the target chooses to do this, this is the next best.
+ SDOperand Result =
+ TLI.EmitTargetCodeForMemmove(*this, Chain, Dst, Src, Size, Align,
+ DstSV, DstOff, SrcSV, SrcOff);
+ if (Result.Val)
+ return Result;
+
+ // Emit a library call.
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = TLI.getTargetData()->getIntPtrType();
+ Entry.Node = Dst; Args.push_back(Entry);
+ Entry.Node = Src; Args.push_back(Entry);
+ Entry.Node = Size; Args.push_back(Entry);
+ std::pair<SDOperand,SDOperand> CallResult =
+ TLI.LowerCallTo(Chain, Type::VoidTy,
+ false, false, false, CallingConv::C, false,
+ getExternalSymbol("memmove", TLI.getPointerTy()),
+ Args, *this);
+ return CallResult.second;
+}
+
+SDOperand SelectionDAG::getMemset(SDOperand Chain, SDOperand Dst,
SDOperand Src, SDOperand Size,
- SDOperand Align,
- SDOperand AlwaysInline) {
- SDOperand Ops[] = { Chain, Dest, Src, Size, Align, AlwaysInline };
- return getNode(ISD::MEMSET, MVT::Other, Ops, 6);
+ unsigned Align,
+ Value *DstSV, uint64_t DstOff) {
+
+ // Check to see if we should lower the memset to stores first.
+ // For cases within the target-specified limits, this is the best choice.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (ConstantSize) {
+ // Memset with size zero? Just return the original chain.
+ if (ConstantSize->isNullValue())
+ return Chain;
+
+ SDOperand Result =
+ getMemsetStores(*this, Chain, Dst, Src, ConstantSize->getValue(), Align,
+ DstSV, DstOff);
+ if (Result.Val)
+ return Result;
+ }
+
+ // Then check to see if we should lower the memset with target-specific
+ // code. If the target chooses to do this, this is the next best.
+ SDOperand Result =
+ TLI.EmitTargetCodeForMemset(*this, Chain, Dst, Src, Size, Align,
+ DstSV, DstOff);
+ if (Result.Val)
+ return Result;
+
+ // Emit a library call.
+ const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType();
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = Dst; Entry.Ty = IntPtrTy;
+ Args.push_back(Entry);
+ // Extend or truncate the argument to be an i32 value for the call.
+ if (Src.getValueType() > MVT::i32)
+ Src = getNode(ISD::TRUNCATE, MVT::i32, Src);
+ else
+ Src = getNode(ISD::ZERO_EXTEND, MVT::i32, Src);
+ Entry.Node = Src; Entry.Ty = Type::Int32Ty; Entry.isSExt = true;
+ Args.push_back(Entry);
+ Entry.Node = Size; Entry.Ty = IntPtrTy; Entry.isSExt = false;
+ Args.push_back(Entry);
+ std::pair<SDOperand,SDOperand> CallResult =
+ TLI.LowerCallTo(Chain, Type::VoidTy,
+ false, false, false, CallingConv::C, false,
+ getExternalSymbol("memset", TLI.getPointerTy()),
+ Args, *this);
+ return CallResult.second;
}
SDOperand SelectionDAG::getAtomic(unsigned Opcode, SDOperand Chain,
@@ -4009,11 +4339,6 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::STACKRESTORE: return "stackrestore";
case ISD::TRAP: return "trap";
- // Block memory operations.
- case ISD::MEMSET: return "memset";
- case ISD::MEMCPY: return "memcpy";
- case ISD::MEMMOVE: return "memmove";
-
// Bit manipulation
case ISD::BSWAP: return "bswap";
case ISD::CTPOP: return "ctpop";
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index cfef9acd4f..ac5cfd2e91 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -647,8 +647,6 @@ public:
void visitVAEnd(CallInst &I);
void visitVACopy(CallInst &I);
- void visitMemIntrinsic(CallInst &I, unsigned Op);
-
void visitGetResult(GetResultInst &I);
void visitUserOp1(Instruction &I) {
@@ -2737,18 +2735,48 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
return "_longjmp"+!TLI.usesUnderscoreLongJmp();
break;
case Intrinsic::memcpy_i32:
- case Intrinsic::memcpy_i64:
- visitMemIntrinsic(I, ISD::MEMCPY);
+ case Intrinsic::memcpy_i64: {
+ SDOperand Op1 = getValue(I.getOperand(1));
+ SDOperand Op2 = getValue(I.getOperand(2));
+ SDOperand Op3 = getValue(I.getOperand(3));
+ unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue();
+ DAG.setRoot(DAG.getMemcpy(getRoot(), Op1, Op2, Op3, Align, false,
+ I.getOperand(1), 0, I.getOperand(2), 0));
return 0;
+ }
case Intrinsic::memset_i32:
- case Intrinsic::memset_i64:
- visitMemIntrinsic(I, ISD::MEMSET);
+ case Intrinsic::memset_i64: {
+ SDOperand Op1 = getValue(I.getOperand(1));
+ SDOperand Op2 = getValue(I.getOperand(2));
+ SDOperand Op3 = getValue(I.getOperand(3));
+ unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue();
+ DAG.setRoot(DAG.getMemset(getRoot(), Op1, Op2, Op3, Align,
+ I.getOperand(1), 0));
return 0;
+ }
case Intrinsic::memmove_i32:
- case Intrinsic::memmove_i64:
- visitMemIntrinsic(I, ISD::MEMMOVE);
+ case Intrinsic::memmove_i64: {
+ SDOperand Op1 = getValue(I.getOperand(1));
+ SDOperand Op2 = getValue(I.getOperand(2));
+ SDOperand Op3 = getValue(I.getOperand(3));
+ unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue();
+
+ // If the source and destination are known to not be aliases, we can
+ // lower memmove as memcpy.
+ uint64_t Size = -1ULL;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op3))
+ Size = C->getValue();
+ if (AA.alias(I.getOperand(1), Size, I.getOperand(2), Size) ==
+ AliasAnalysis::NoAlias) {
+ DAG.setRoot(DAG.getMemcpy(getRoot(), Op1, Op2, Op3, Align, false,
+ I.getOperand(1), 0, I.getOperand(2), 0));
+ return 0;
+ }
+
+ DAG.setRoot(DAG.getMemmove(getRoot(), Op1, Op2, Op3, Align,
+ I.getOperand(1), 0, I.getOperand(2), 0));
return 0;
-
+ }
case Intrinsic::dbg_stoppoint: {
MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
DbgStopPointInst &SPI = cast<DbgStopPointInst>(I);
@@ -4342,242 +4370,6 @@ SDOperand TargetLowering::CustomPromoteOperation(SDOperand Op,
return SDOperand();
}
-/// getMemsetValue - Vectorized representation of the memset value
-/// operand.
-static SDOperand getMemsetValue(SDOperand Value, MVT::ValueType VT,
- SelectionDAG &DAG) {
- MVT::ValueType CurVT = VT;
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
- uint64_t Val = C->getValue() & 255;
- unsigned Shift = 8;
- while (CurVT != MVT::i8) {
- Val = (Val << Shift) | Val;
- Shift <<= 1;
- CurVT = (MVT::ValueType)((unsigned)CurVT - 1);
- }
- return DAG.getConstant(Val, VT);
- } else {
- Value = DAG.getNode(ISD::ZERO_EXTEND, VT, Value);
- unsigned Shift = 8;
- while (CurVT != MVT::i8) {
- Value =
- DAG.getNode(ISD::OR, VT,
- DAG.getNode(ISD::SHL, VT, Value,
- DAG.getConstant(Shift, MVT::i8)), Value);
- Shift <<= 1;
- CurVT = (MVT::ValueType)((unsigned)CurVT - 1);
- }
-
- return Value;
- }
-}
-
-/// getMemsetStringVal - Similar to getMemsetValue. Except this is only
-/// used when a memcpy is turned into a memset when the source is a constant
-/// string ptr.
-static SDOperand getMemsetStringVal(MVT::ValueType VT,
- SelectionDAG &DAG, TargetLowering &TLI,
- std::string &Str, unsigned Offset) {
- uint64_t Val = 0;
- unsigned MSB = MVT::getSizeInBits(VT) / 8;
- if (TLI.isLittleEndian())
- Offset = Offset + MSB - 1;
- for (unsigned i = 0; i != MSB; ++i) {
- Val = (Val << 8) | (unsigned char)Str[Offset];
- Offset += TLI.isLittleEndian() ? -1 : 1;
- }
- return DAG.getConstant(Val, VT);
-}
-
-/// getMemBasePlusOffset - Returns base and offset node for the
-static SDOperand getMemBasePlusOffset(SDOperand Base, unsigned Offset,
- SelectionDAG &DAG, TargetLowering &TLI) {
- MVT::ValueType VT = Base.getValueType();
- return DAG.getNode(ISD::ADD, VT, Base, DAG.getConstant(Offset, VT));
-}
-
-/// MeetsMaxMemopRequirement - Determines if the number of memory ops required
-/// to replace the memset / memcpy is below the threshold. It also returns the
-/// types of the sequence of memory ops to perform memset / memcpy.
-static bool MeetsMaxMemopRequirement(std::vector<MVT::ValueType> &MemOps,
- unsigned Limit, uint64_t Size,
- unsigned Align, TargetLowering &TLI) {
- MVT::ValueType VT;
-
- if (TLI.allowsUnalignedMemoryAccesses()) {
- VT = MVT::i64;
- } else {
- switch (Align & 7) {
- case 0:
- VT = MVT::i64;
- break;
- case 4:
- VT = MVT::i32;
- break;
- case 2:
- VT = MVT::i16;
- break;
- default:
- VT = MVT::i8;
- break;
- }
- }
-
- MVT::ValueType LVT = MVT::i64;
- while (!TLI.isTypeLegal(LVT))
- LVT = (MVT::ValueType)((unsigned)LVT - 1);
- assert(MVT::isInteger(LVT));
-
- if (VT > LVT)
- VT = LVT;
-
- unsigned NumMemOps = 0;
- while (Size != 0) {
- unsigned VTSize = MVT::getSizeInBits(VT) / 8;
- while (VTSize > Size) {
- VT = (MVT::ValueType)((unsigned)VT - 1);
- VTSize >>= 1;
- }
- assert(MVT::isInteger(VT));
-
- if (++NumMemOps > Limit)
- return false;
- MemOps.push_back(VT);
- Size -= VTSize;
- }
-
- return true;
-}
-
-void SelectionDAGLowering::visitMemIntrinsic(CallInst &I, unsigned Op) {
- SDOperand Op1 = getValue(I.getOperand(1));
- SDOperand Op2 = getValue(I.getOperand(2));
- SDOperand Op3 = getValue(I.getOperand(3));
- SDOperand Op4 = getValue(I.getOperand(4));
- unsigned Align = (unsigned)cast<ConstantSDNode>(Op4)->getValue();
- if (Align == 0) Align = 1;
-
- // If the source and destination are known to not be aliases, we can
- // lower memmove as memcpy.
- if (Op == ISD::MEMMOVE) {
- uint64_t Size = -1ULL;
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op3))
- Size = C->getValue();
- if (AA.alias(I.getOperand(1), Size, I.getOperand(2), Size) ==
- AliasAnalysis::NoAlias)
- Op = ISD::MEMCPY;
- }
-
- if (ConstantSDNode *Size = dyn_cast<ConstantSDNode>(Op3)) {
- std::vector<MVT::ValueType> MemOps;
-
- // Expand memset / memcpy to a series of load / store ops
- // if the size operand falls below a certain threshold.
- SmallVector<SDOperand, 8> OutChains;
- switch (Op) {
- default: break; // Do nothing for now.
- case ISD::MEMSET: {
- if (MeetsMaxMemopRequirement(MemOps, TLI.getMaxStoresPerMemset(),
- Size->getValue(), Align, TLI)) {
- unsigned NumMemOps = MemOps.size();
- unsigned Offset = 0;
- for (unsigned i = 0; i < NumMemOps; i++) {
- MVT::ValueType VT = MemOps[i];
- unsigned VTSize = MVT::getSizeInBits(VT) / 8;
- SDOperand Value = getMemsetValue(Op2, VT, DAG);
- SDOperand Store = DAG.getStore(getRoot(), Value,
- getMemBasePlusOffset(Op1, Offset, DAG, TLI),
- I.getOperand(1), Offset);
- OutChains.push_back(Store);
- Offset += VTSize;
- }
- }
- break;
- }
- case ISD::MEMCPY: {
- if (MeetsMaxMemopRequirement(MemOps, TLI.getMaxStoresPerMemcpy(),
- Size->getValue(), Align, TLI)) {
- unsigned NumMemOps = MemOps.size();
- unsigned SrcOff = 0, DstOff = 0, SrcDelta = 0;