diff options
Diffstat (limited to 'lib/CodeGen/SelectionDAG/SelectionDAG.cpp')
-rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 124 |
1 files changed, 85 insertions, 39 deletions
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index f000ce38d3..29339405aa 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -12,42 +12,42 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/SelectionDAG.h" -#include "SDNodeOrdering.h" #include "SDNodeDbgValue.h" +#include "SDNodeOrdering.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Assembly/Writer.h" #include "llvm/CallingConv.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/Constants.h" +#include "llvm/DataLayout.h" #include "llvm/DebugInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/GlobalAlias.h" #include "llvm/GlobalVariable.h" #include "llvm/Intrinsics.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/Assembly/Writer.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetSelectionDAGInfo.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetIntrinsicInfo.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Mutex.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetIntrinsicInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSelectionDAGInfo.h" #include <algorithm> #include <cmath> using namespace llvm; @@ -3373,7 +3373,7 @@ static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG, unsigned NumVTBytes = VT.getSizeInBits() / 8; unsigned NumBytes = std::min(NumVTBytes, unsigned(Str.size())); - uint64_t Val = 0; + APInt Val(NumBytes*8, 0); if (TLI.isLittleEndian()) { for (unsigned i = 0; i != NumBytes; ++i) Val |= (uint64_t)(unsigned char)Str[i] << i*8; @@ -3382,7 +3382,9 @@ static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG, Val |= (uint64_t)(unsigned char)Str[i] << (NumVTBytes-i-1)*8; } - return DAG.getConstant(Val, VT); + if (TLI.isIntImmLegal(Val, VT)) + return DAG.getConstant(Val, VT); + return SDValue(0, 0); } /// getMemBasePlusOffset - Returns base and offset node for the @@ -3422,6 +3424,7 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, unsigned DstAlign, unsigned SrcAlign, bool IsZeroVal, bool MemcpyStrSrc, + bool AllowOverlap, SelectionDAG &DAG, const TargetLowering &TLI) { assert((SrcAlign == 0 || SrcAlign >= DstAlign) && @@ -3461,24 +3464,47 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, unsigned NumMemOps = 0; while (Size != 0) { + if (++NumMemOps > Limit) + return false; + unsigned VTSize = VT.getSizeInBits() / 8; while (VTSize > Size) { // For now, only use non-vector load / store's for the left-over pieces. + EVT NewVT; + unsigned NewVTSize; if (VT.isVector() || VT.isFloatingPoint()) { - VT = MVT::i64; - while (!TLI.isTypeLegal(VT)) - VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1); - VTSize = VT.getSizeInBits() / 8; + NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32; + while (!TLI.isOperationLegalOrCustom(ISD::STORE, NewVT)) { + if (NewVT == MVT::i64 && + TLI.isOperationLegalOrCustom(ISD::STORE, MVT::f64)) { + // i64 is usually not legal on 32-bit targets, but f64 may be. + NewVT = MVT::f64; + break; + } + NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1); + } + NewVTSize = NewVT.getSizeInBits() / 8; } else { // This can result in a type that is not legal on the target, e.g. // 1 or 2 bytes on PPC. - VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1); - VTSize >>= 1; + NewVT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1); + NewVTSize = VTSize >> 1; + } + + // If the new VT cannot cover all of the remaining bits, then consider + // issuing a (or a pair of) unaligned and overlapping load / store. + // FIXME: Only does this for 64-bit or more since we don't have proper + // cost model for unaligned load / store. + bool Fast; + if (AllowOverlap && VTSize >= 8 && NewVTSize < Size && + TLI.allowsUnalignedMemoryAccesses(VT, &Fast) && Fast) + VTSize = Size; + else { + VT = NewVT; + VTSize = NewVTSize; } } - if (++NumMemOps > Limit) - return false; MemOps.push_back(VT); Size -= VTSize; } @@ -3523,7 +3549,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, if (!FindOptimalMemOpLowering(MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), (isZeroStr ? 0 : SrcAlign), - true, CopyFromStr, DAG, TLI)) + true, CopyFromStr, true, DAG, TLI)) return SDValue(); if (DstAlignCanChange) { @@ -3545,6 +3571,14 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, unsigned VTSize = VT.getSizeInBits() / 8; SDValue Value, Store; + if (VTSize > Size) { + // Issuing an unaligned load / store pair that overlaps with the previous + // pair. Adjust the offset accordingly. + assert(i == NumMemOps-1 && i != 0); + SrcOff -= VTSize - Size; + DstOff -= VTSize - Size; + } + if (CopyFromStr && (isZeroStr || (VT.isInteger() && !VT.isVector()))) { // It's unlikely a store of a vector immediate can be done in a single @@ -3553,11 +3587,14 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, // FIXME: Handle other cases where store of vector immediate is done in // a single instruction. Value = getMemsetStringVal(VT, dl, DAG, TLI, Str.substr(SrcOff)); - Store = DAG.getStore(Chain, dl, Value, - getMemBasePlusOffset(Dst, DstOff, DAG), - DstPtrInfo.getWithOffset(DstOff), isVol, - false, Align); - } else { + if (Value.getNode()) + Store = DAG.getStore(Chain, dl, Value, + getMemBasePlusOffset(Dst, DstOff, DAG), + DstPtrInfo.getWithOffset(DstOff), isVol, + false, Align); + } + + if (!Store.getNode()) { // The type might not be legal for the target. This should only happen // if the type is smaller than a legal type, as on PPC, so the right // thing to do is generate a LoadExt/StoreTrunc pair. These simplify @@ -3577,6 +3614,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, OutChains.push_back(Store); SrcOff += VTSize; DstOff += VTSize; + Size -= VTSize; } return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, @@ -3613,7 +3651,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, if (!FindOptimalMemOpLowering(MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), - SrcAlign, true, false, DAG, TLI)) + SrcAlign, true, false, false, DAG, TLI)) return SDValue(); if (DstAlignCanChange) { @@ -3689,7 +3727,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue(); if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize), Size, (DstAlignCanChange ? 0 : Align), 0, - IsZeroVal, false, DAG, TLI)) + IsZeroVal, false, true, DAG, TLI)) return SDValue(); if (DstAlignCanChange) { @@ -3716,6 +3754,13 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, for (unsigned i = 0; i < NumMemOps; i++) { EVT VT = MemOps[i]; + unsigned VTSize = VT.getSizeInBits() / 8; + if (VTSize > Size) { + // Issuing an unaligned load / store pair that overlaps with the previous + // pair. Adjust the offset accordingly. + assert(i == NumMemOps-1 && i != 0); + DstOff -= VTSize - Size; + } // If this store is smaller than the largest store see whether we can get // the smaller value for free with a truncate. @@ -3734,6 +3779,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, isVol, false, Align); OutChains.push_back(Store); DstOff += VT.getSizeInBits() / 8; + Size -= VTSize; } return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, |