diff options
author | Derek Schuff <dschuff@chromium.org> | 2012-11-14 16:47:45 -0800 |
---|---|---|
committer | Derek Schuff <dschuff@chromium.org> | 2012-11-15 10:12:39 -0800 |
commit | 923f52fb3f6670e843ffe0b8da2f2bad898d752c (patch) | |
tree | a279587ac525162817217dbbe0f01a91893a9d0b /lib | |
parent | f4099a3c92570a80cd9a3850cda598c5ea446b96 (diff) | |
parent | 08e9cb46feb0c8e08e3d309a0f9fd75a04ca54fb (diff) |
Merge commit '08e9cb46feb0c8e08e3d309a0f9fd75a04ca54fb'
(svn r167699, also the 3.2 branch point)
Conflicts:
lib/Target/X86/X86Subtarget.cpp
Diffstat (limited to 'lib')
25 files changed, 895 insertions, 625 deletions
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 5cac8ca3ba..91a5b84e8a 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -292,7 +292,10 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, unsigned IntBytes = unsigned(CI->getBitWidth()/8); for (unsigned i = 0; i != BytesLeft && ByteOffset != IntBytes; ++i) { - CurPtr[i] = (unsigned char)(Val >> (ByteOffset * 8)); + int n = ByteOffset; + if (!TD.isLittleEndian()) + n = IntBytes - n - 1; + CurPtr[i] = (unsigned char)(Val >> (n * 8)); ++ByteOffset; } return true; @@ -442,10 +445,19 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, BytesLoaded, TD)) return 0; - APInt ResultVal = APInt(IntType->getBitWidth(), RawBytes[BytesLoaded-1]); - for (unsigned i = 1; i != BytesLoaded; ++i) { - ResultVal <<= 8; - ResultVal |= RawBytes[BytesLoaded-1-i]; + APInt ResultVal = APInt(IntType->getBitWidth(), 0); + if (TD.isLittleEndian()) { + ResultVal = RawBytes[BytesLoaded - 1]; + for (unsigned i = 1; i != BytesLoaded; ++i) { + ResultVal <<= 8; + ResultVal |= RawBytes[BytesLoaded-1-i]; + } + } else { + ResultVal = RawBytes[0]; + for (unsigned i = 1; i != BytesLoaded; ++i) { + ResultVal <<= 8; + ResultVal |= RawBytes[i]; + } } return ConstantInt::get(IntType->getContext(), ResultVal); @@ -521,10 +533,8 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, } } - // Try hard to fold loads from bitcasted strange and non-type-safe things. We - // currently don't do any of this for big endian systems. It can be - // generalized in the future if someone is interested. - if (TD && TD->isLittleEndian()) + // Try hard to fold loads from bitcasted strange and non-type-safe things. + if (TD) return FoldReinterpretLoadFromConstPtr(CE, *TD); return 0; } diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 65bc4af99e..4e75d892e5 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -146,6 +146,11 @@ void LiveIntervals::print(raw_ostream &OS, const Module* ) const { OS << PrintReg(Reg) << " = " << getInterval(Reg) << '\n'; } + OS << "RegMasks:"; + for (unsigned i = 0, e = RegMaskSlots.size(); i != e; ++i) + OS << ' ' << RegMaskSlots[i]; + OS << '\n'; + printInstrs(OS); } @@ -1257,10 +1262,15 @@ private: SmallVectorImpl<SlotIndex>::iterator RI = std::lower_bound(LIS.RegMaskSlots.begin(), LIS.RegMaskSlots.end(), OldIdx); - assert(*RI == OldIdx && "No RegMask at OldIdx."); - *RI = NewIdx; - assert(*prior(RI) < *RI && *RI < *next(RI) && - "RegSlots out of order. Did you move one call across another?"); + assert(RI != LIS.RegMaskSlots.end() && *RI == OldIdx.getRegSlot() && + "No RegMask at OldIdx."); + *RI = NewIdx.getRegSlot(); + assert((RI == LIS.RegMaskSlots.begin() || + SlotIndex::isEarlierInstr(*llvm::prior(RI), *RI)) && + "Cannot move regmask instruction above another call"); + assert((llvm::next(RI) == LIS.RegMaskSlots.end() || + SlotIndex::isEarlierInstr(*RI, *llvm::next(RI))) && + "Cannot move regmask instruction below another call"); } // Return the last use of reg between NewIdx and OldIdx. diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index de16932c06..a4817d09c0 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -1583,6 +1583,7 @@ const char *ConvergingScheduler::getReasonStr( case NextDefUse: return "DEF-USE "; case NodeOrder: return "ORDER "; }; + llvm_unreachable("Unknown reason!"); } void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand, diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp index ffa79761f2..e0336342d6 100644 --- a/lib/MC/MCExpr.cpp +++ b/lib/MC/MCExpr.cpp @@ -60,7 +60,8 @@ void MCExpr::print(raw_ostream &OS) const { SRE.getKind() == MCSymbolRefExpr::VK_ARM_GOTOFF || SRE.getKind() == MCSymbolRefExpr::VK_ARM_TPOFF || SRE.getKind() == MCSymbolRefExpr::VK_ARM_GOTTPOFF || - SRE.getKind() == MCSymbolRefExpr::VK_ARM_TARGET1) + SRE.getKind() == MCSymbolRefExpr::VK_ARM_TARGET1 || + SRE.getKind() == MCSymbolRefExpr::VK_ARM_TARGET2) OS << MCSymbolRefExpr::getVariantKindName(SRE.getKind()); else if (SRE.getKind() != MCSymbolRefExpr::VK_None && SRE.getKind() != MCSymbolRefExpr::VK_PPC_DARWIN_HA16 && @@ -199,6 +200,7 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) { case VK_ARM_GOTTPOFF: return "(gottpoff)"; case VK_ARM_TLSGD: return "(tlsgd)"; case VK_ARM_TARGET1: return "(target1)"; + case VK_ARM_TARGET2: return "(target2)"; case VK_PPC_TOC: return "tocbase"; case VK_PPC_TOC_ENTRY: return "toc"; case VK_PPC_DARWIN_HA16: return "ha16"; diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index 43c68f4d1d..7e8b4a3d0d 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -3553,11 +3553,6 @@ void APFloat::toString(SmallVectorImpl<char> &Str, } bool APFloat::getExactInverse(APFloat *inv) const { - // We can only guarantee the existence of an exact inverse for IEEE floats. - if (semantics != &IEEEhalf && semantics != &IEEEsingle && - semantics != &IEEEdouble && semantics != &IEEEquad) - return false; - // Special floats and denormals have no exact inverse. if (category != fcNormal) return false; diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 8d1a301a67..f67decc550 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -853,13 +853,28 @@ void ARMAsmPrinter::emitAttributes() { AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use, ARMBuildAttrs::Allowed); } else if (CPUString == "generic") { - // FIXME: Why these defaults? - AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v4T); + // For a generic CPU, we assume a standard v7a architecture in Subtarget. + AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v7); + AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch_profile, + ARMBuildAttrs::ApplicationProfile); AttrEmitter->EmitAttribute(ARMBuildAttrs::ARM_ISA_use, ARMBuildAttrs::Allowed); AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use, - ARMBuildAttrs::Allowed); - } + ARMBuildAttrs::AllowThumb32); + } else if (Subtarget->hasV7Ops()) { + AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v7); + AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use, + ARMBuildAttrs::AllowThumb32); + } else if (Subtarget->hasV6T2Ops()) + AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v6T2); + else if (Subtarget->hasV6Ops()) + AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v6); + else if (Subtarget->hasV5TEOps()) + AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v5TE); + else if (Subtarget->hasV5TOps()) + AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v5T); + else if (Subtarget->hasV4TOps()) + AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v4T); if (Subtarget->hasNEON() && emitFPU) { /* NEON is not exactly a VFP architecture, but GAS emit one of @@ -1515,31 +1530,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } return; } - case ARM::t2BMOVPCB_CALL: { - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::tMOVr); - TmpInst.addOperand(MCOperand::CreateReg(ARM::LR)); - TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); - // Add predicate operands. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::t2B); - const GlobalValue *GV = MI->getOperand(0).getGlobal(); - MCSymbol *GVSym = Mang->getSymbol(GV); - const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext); - TmpInst.addOperand(MCOperand::CreateExpr(GVSymExpr)); - // Add predicate operands. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } - return; - } case ARM::MOVi16_ga_pcrel: case ARM::t2MOVi16_ga_pcrel: { MCInst TmpInst; diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index dec498a4f7..0893826427 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -1639,18 +1639,12 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (Subtarget->isThumb()) { if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) CallOpc = ARMISD::CALL_NOLINK; - else if (doesNotRet && isDirect && !isARMFunc && - Subtarget->hasRAS() && !Subtarget->isThumb1Only() && - // Emit regular call when code size is the priority - !HasMinSizeAttr) - // "mov lr, pc; b _foo" to avoid confusing the RSP - CallOpc = ARMISD::CALL_NOLINK; else CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL; } else { - if (!isDirect && !Subtarget->hasV5TOps()) { + if (!isDirect && !Subtarget->hasV5TOps()) CallOpc = ARMISD::CALL_NOLINK; - } else if (doesNotRet && isDirect && Subtarget->hasRAS() && + else if (doesNotRet && isDirect && Subtarget->hasRAS() && // Emit regular call when code size is the priority !HasMinSizeAttr) // "mov lr, pc; b _foo" to avoid confusing the RSP diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 248bab6b12..c2800acccd 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -3331,20 +3331,6 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { Requires<[IsThumb2, IsIOS]>; } -let isCall = 1, Defs = [LR], Uses = [SP] in { - // mov lr, pc; b if callee is marked noreturn to avoid confusing the - // return stack predictor. - def t2BMOVPCB_CALL : tPseudoInst<(outs), - (ins t_bltarget:$func), - 6, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>, - Requires<[IsThumb]>; -} - -// Direct calls -def : T2Pat<(ARMcall_nolink texternalsym:$func), - (t2BMOVPCB_CALL texternalsym:$func)>, - Requires<[IsThumb]>; - // IT block let Defs = [ITSTATE] in def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask), diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp index 7a7ce27d48..253d1fa2ab 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -247,6 +247,9 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_ARM_TARGET1: Type = ELF::R_ARM_TARGET1; break; + case MCSymbolRefExpr::VK_ARM_TARGET2: + Type = ELF::R_ARM_TARGET2; + break; } break; case ARM::fixup_arm_ldst_pcrel_12: diff --git a/lib/Target/NVPTX/NVPTX.td b/lib/Target/NVPTX/NVPTX.td index ae7710e54f..7aee3595c6 100644 --- a/lib/Target/NVPTX/NVPTX.td +++ b/lib/Target/NVPTX/NVPTX.td @@ -24,7 +24,30 @@ include "NVPTXInstrInfo.td" // - Need at least one feature to avoid generating zero sized array by // TableGen in NVPTXGenSubtarget.inc. //===----------------------------------------------------------------------===// -def FeatureDummy : SubtargetFeature<"dummy", "dummy", "true", "">; + +// SM Versions +def SM10 : SubtargetFeature<"sm_10", "SmVersion", "10", + "Target SM 1.0">; +def SM11 : SubtargetFeature<"sm_11", "SmVersion", "11", + "Target SM 1.1">; +def SM12 : SubtargetFeature<"sm_12", "SmVersion", "12", + "Target SM 1.2">; +def SM13 : SubtargetFeature<"sm_13", "SmVersion", "13", + "Target SM 1.3">; +def SM20 : SubtargetFeature<"sm_20", "SmVersion", "20", + "Target SM 2.0">; +def SM21 : SubtargetFeature<"sm_21", "SmVersion", "21", + "Target SM 2.1">; +def SM30 : SubtargetFeature<"sm_30", "SmVersion", "30", + "Target SM 3.0">; +def SM35 : SubtargetFeature<"sm_35", "SmVersion", "35", + "Target SM 3.5">; + +// PTX Versions +def PTX30 : SubtargetFeature<"ptx30", "PTXVersion", "30", + "Use PTX version 3.0">; +def PTX31 : SubtargetFeature<"ptx31", "PTXVersion", "31", + "Use PTX version 3.1">; //===----------------------------------------------------------------------===// // NVPTX supported processors. @@ -33,7 +56,14 @@ def FeatureDummy : SubtargetFeature<"dummy", "dummy", "true", "">; class Proc<string Name, list<SubtargetFeature> Features> : Processor<Name, NoItineraries, Features>; -def : Proc<"sm_10", [FeatureDummy]>; +def : Proc<"sm_10", [SM10]>; +def : Proc<"sm_11", [SM11]>; +def : Proc<"sm_12", [SM12]>; +def : Proc<"sm_13", [SM13]>; +def : Proc<"sm_20", [SM20]>; +def : Proc<"sm_21", [SM21]>; +def : Proc<"sm_30", [SM30]>; +def : Proc<"sm_35", [SM35]>; def NVPTXInstrInfo : InstrInfo { diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index d3dfb35e26..3dd9bf5613 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -910,7 +910,8 @@ void NVPTXAsmPrinter::emitHeader (Module &M, raw_ostream &O) { O << "//\n"; O << "\n"; - O << ".version 3.0\n"; + unsigned PTXVersion = nvptxSubtarget.getPTXVersion(); + O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n"; O << ".target "; O << nvptxSubtarget.getTargetName(); @@ -1525,6 +1526,9 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, // <a> = PAL.getparamalignment // size = typeallocsize of element type unsigned align = PAL.getParamAlignment(paramIndex+1); + if (align == 0) + align = TD->getABITypeAlignment(ETy); + unsigned sz = TD->getTypeAllocSize(ETy); O << "\t.param .align " << align << " .b8 "; diff --git a/lib/Target/NVPTX/NVPTXSubtarget.cpp b/lib/Target/NVPTX/NVPTXSubtarget.cpp index 6aadd43e94..7b62cce2c6 100644 --- a/lib/Target/NVPTX/NVPTXSubtarget.cpp +++ b/lib/Target/NVPTX/NVPTXSubtarget.cpp @@ -34,16 +34,18 @@ DriverInterface(cl::desc("Choose driver interface:"), NVPTXSubtarget::NVPTXSubtarget(const std::string &TT, const std::string &CPU, const std::string &FS, bool is64Bit) -:NVPTXGenSubtargetInfo(TT, "", FS), // Don't pass CPU to subtarget, - // because we don't register all - // nvptx targets. - Is64Bit(is64Bit) { +: NVPTXGenSubtargetInfo(TT, CPU, FS), + Is64Bit(is64Bit), + PTXVersion(0), + SmVersion(10) { drvInterface = DriverInterface; // Provide the default CPU if none std::string defCPU = "sm_10"; + ParseSubtargetFeatures((CPU.empty() ? defCPU : CPU), FS); + // Get the TargetName from the FS if available if (FS.empty() && CPU.empty()) TargetName = defCPU; @@ -52,6 +54,12 @@ NVPTXSubtarget::NVPTXSubtarget(const std::string &TT, const std::string &CPU, else llvm_unreachable("we are not using FeatureStr"); - // Set up the SmVersion - SmVersion = atoi(TargetName.c_str()+3); + // We default to PTX 3.1, but we cannot just default to it in the initializer + // since the attribute parser checks if the given option is >= the default. + // So if we set ptx31 as the default, the ptx30 attribute would never match. + // Instead, we use 0 as the default and manually set 31 if the default is + // used. + if (PTXVersion == 0) { + PTXVersion = 31; + } } diff --git a/lib/Target/NVPTX/NVPTXSubtarget.h b/lib/Target/NVPTX/NVPTXSubtarget.h index 8f2a629d22..c3a683a2c6 100644 --- a/lib/Target/NVPTX/NVPTXSubtarget.h +++ b/lib/Target/NVPTX/NVPTXSubtarget.h @@ -25,13 +25,18 @@ namespace llvm { class NVPTXSubtarget : public NVPTXGenSubtargetInfo { - - unsigned int SmVersion; + std::string TargetName; NVPTX::DrvInterface drvInterface; bool dummy; // For the 'dummy' feature, see NVPTX.td bool Is64Bit; + // PTX version x.y is represented as 10*x+y, e.g. 3.1 == 31 + unsigned PTXVersion; + + // SM version x.y is represented as 10*x+y, e.g. 3.1 == 31 + unsigned int SmVersion; + public: /// This constructor initializes the data members to match that /// of the specified module. @@ -69,6 +74,8 @@ public: NVPTX::DrvInterface getDrvInterface() const { return drvInterface; } std::string getTargetName() const { return TargetName; } + unsigned getPTXVersion() const { return PTXVersion; } + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); std::string getDataLayout() const { diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp index 62f973e658..6d4eab1204 100644 --- a/lib/Target/TargetLibraryInfo.cpp +++ b/lib/Target/TargetLibraryInfo.cpp @@ -152,9 +152,12 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = "sqrt", "sqrtf", "sqrtl", + "stpcpy", "strcat", "strchr", + "strcmp", "strcpy", + "strcspn", "strdup", "strlen", "strncat", @@ -162,6 +165,17 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = "strncpy", "strndup", "strnlen", + "strpbrk", + "strrchr", + "strspn", + "strstr", + "strtod", + "strtof", + "strtol", + "strtold", + "strtoll", + "strtoul", + "strtoull", "tan", "tanf", "tanh", @@ -309,6 +323,10 @@ static void initialize(TargetLibraryInfo &TLI, const Triple &T, TLI.setUnavailable(LibFunc::tanf); TLI.setUnavailable(LibFunc::tanhf); } + + // Win32 does *not* provide stpcpy. It is provided on POSIX systems: + // http://pubs.opengroup.org/onlinepubs/9699919799/functions/stpcpy.html + TLI.setUnavailable(LibFunc::stpcpy); } } diff --git a/lib/Target/TargetTransformImpl.cpp b/lib/Target/TargetTransformImpl.cpp index c07332de32..b36e6f858f 100644 --- a/lib/Target/TargetTransformImpl.cpp +++ b/lib/Target/TargetTransformImpl.cpp @@ -214,8 +214,16 @@ unsigned VectorTargetTransformImpl::getCastInstrCost(unsigned Opcode, Type *Dst, // Handle scalar conversions. if (!Src->isVectorTy() && !Dst->isVectorTy()) { - // Scalar bitcasts and truncs are usually free. - if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc) + // Scalar bitcasts are usually free. + if (Opcode == Instruction::BitCast) + return 0; + + if (Opcode == Instruction::Trunc && + TLI->isTruncateFree(SrcLT.second, DstLT.second)) + return 0; + + if (Opcode == Instruction::ZExt && + TLI->isZExtFree(SrcLT.second, DstLT.second)) return 0; // Just check the op cost. If the operation is legal then assume it costs 1. diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index e86c1000f1..42134256e3 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2892,85 +2892,6 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { return Result; } - - // FIXME: Custom handling because TableGen doesn't support multiple implicit - // defs in an instruction pattern - case X86ISD::PCMPESTRI: { - SDValue N0 = Node->getOperand(0); - SDValue N1 = Node->getOperand(1); - SDValue N2 = Node->getOperand(2); - SDValue N3 = Node->getOperand(3); - SDValue N4 = Node->getOperand(4); - - // Make sure last argument is a constant - ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N4); - if (!Cst) - break; - - uint64_t Imm = Cst->getZExtValue(); - - SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, - X86::EAX, N1, SDValue()).getValue(1); - InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EDX, - N3, InFlag).getValue(1); - - SDValue Ops[] = { N0, N2, getI8Imm(Imm), InFlag }; - unsigned Opc = Subtarget->hasAVX() ? X86::VPCMPESTRIrr : - X86::PCMPESTRIrr; - InFlag = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Ops, - array_lengthof(Ops)), 0); - - if (!SDValue(Node, 0).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - X86::ECX, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(SDValue(Node, 0), Result); - } - if (!SDValue(Node, 1).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - X86::EFLAGS, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(SDValue(Node, 1), Result); - } - - return NULL; - } - - // FIXME: Custom handling because TableGen doesn't support multiple implicit - // defs in an instruction pattern - case X86ISD::PCMPISTRI: { - SDValue N0 = Node->getOperand(0); - SDValue N1 = Node->getOperand(1); - SDValue N2 = Node->getOperand(2); - - // Make sure last argument is a constant - ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N2); - if (!Cst) - break; - - uint64_t Imm = Cst->getZExtValue(); - - SDValue Ops[] = { N0, N1, getI8Imm(Imm) }; - unsigned Opc = Subtarget->hasAVX() ? X86::VPCMPISTRIrr : - X86::PCMPISTRIrr; - SDValue InFlag = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Ops, - array_lengthof(Ops)), 0); - - if (!SDValue(Node, 0).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - X86::ECX, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(SDValue(Node, 0), Result); - } - if (!SDValue(Node, 1).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - X86::EFLAGS, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(SDValue(Node, 1), Result); - } - - return NULL; - } } SDNode *ResNode = SelectCode(Node); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index fd0a8a27d6..5610bb5ba3 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -12240,6 +12240,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::FNMSUB: return "X86ISD::FNMSUB"; case X86ISD::FMADDSUB: return "X86ISD::FMADDSUB"; case X86ISD::FMSUBADD: return "X86ISD::FMSUBADD"; + case X86ISD::PCMPESTRI: return "X86ISD::PCMPESTRI"; + case X86ISD::PCMPISTRI: return "X86ISD::PCMPISTRI"; } } @@ -12388,13 +12390,10 @@ X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask, // X86 Scheduler Hooks //===----------------------------------------------------------------------===// -// private utility function - /// Utility function to emit xbegin specifying the start of an RTM region. -MachineBasicBlock * -X86TargetLowering::EmitXBegin(MachineInstr *MI, MachineBasicBlock *MBB) const { +static MachineBasicBlock *EmitXBegin(MachineInstr *MI, MachineBasicBlock *MBB, + const TargetInstrInfo *TII) { DebugLoc DL = MI->getDebugLoc(); - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); const BasicBlock *BB = MBB->getBasicBlock(); MachineFunction::iterator I = MBB; @@ -13033,45 +13032,82 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI, // FIXME: When we get size specific XMM0 registers, i.e. XMM0_V16I8 // or XMM0_V32I8 in AVX all of this code can be replaced with that // in the .td file. -MachineBasicBlock * -X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB, - unsigned numArgs, bool memArg) const { - assert(Subtarget->hasSSE42() && - "Target must have SSE4.2 or AVX features enabled"); +static MachineBasicBlock *EmitPCMPSTRM(MachineInstr *MI, MachineBasicBlock *BB, + const TargetInstrInfo *TII) { + unsigned Opc; + switch (MI->getOpcode()) { + default: llvm_unreachable("illegal opcode!"); + case X86::PCMPISTRM128REG: Opc = X86::PCMPISTRM128rr; break; + case X86::VPCMPISTRM128REG: Opc = X86::VPCMPISTRM128rr; break; + case X86::PCMPISTRM128MEM: Opc = X86::PCMPISTRM128rm; break; + case X86::VPCMPISTRM128MEM: Opc = X86::VPCMPISTRM128rm; break; + case X86::PCMPESTRM128REG: Opc = X86::PCMPESTRM128rr; break; + case X86::VPCMPESTRM128REG: Opc = X86::VPCMPESTRM128rr; break; + case X86::PCMPESTRM128MEM: Opc = X86::PCMPESTRM128rm; break; + case X86::VPCMPESTRM128MEM: Opc = X86::VPCMPESTRM128rm; break; + } DebugLoc dl = MI->getDebugLoc(); - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(Opc)); + + unsigned NumArgs = MI->getNumOperands(); + for (unsigned i = 1; i < NumArgs; ++i) { + MachineOperand &Op = MI->getOperand(i); + if (!(Op.isReg() && Op.isImplicit())) + MIB.addOperand(Op); + } + if (MI->hasOneMemOperand()) + MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + + BuildMI(*BB, MI, dl, + TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg()) + .addReg(X86::XMM0); + + MI->eraseFromParent(); + return BB; +} + +// FIXME: Custom handling because TableGen doesn't support multiple implicit +// defs in an instruction pattern +static MachineBasicBlock *EmitPCMPSTRI(MachineInstr *MI, MachineBasicBlock *BB, + const TargetInstrInfo *TII) { unsigned Opc; - if (!Subtarget->hasAVX()) { - if (memArg) - Opc = numArgs == 3 ? X86::PCMPISTRM128rm : X86::PCMPESTRM128rm; - else - Opc = numArgs == 3 ? X86::PCMPISTRM128rr : X86::PCMPESTRM128rr; - } else { - if (memArg) - Opc = numArgs == 3 ? X86::VPCMPISTRM128rm : X86::VPCMPESTRM128rm; - else - Opc = numArgs == 3 ? X86::VPCMPISTRM128rr : X86::VPCMPESTRM128rr; + switch (MI->getOpcode()) { + default: llvm_unreachable("illegal opcode!"); + case X86::PCMPISTRIREG: Opc = X86::PCMPISTRIrr; break; + case X86::VPCMPISTRIREG: Opc = X86::VPCMPISTRIrr; break; + case X86::PCMPISTRIMEM: Opc = X86::PCMPISTRIrm; break; + case X86::VPCMPISTRIMEM: Opc = X86::VPCMPISTRIrm; break; + case X86::PCMPESTRIREG: Opc = X86::PCMPESTRIrr; break; + case X86::VPCMPESTRIREG: Opc = X86::VPCMPESTRIrr; break; + case X86::PCMPESTRIMEM: Opc = X86::PCMPESTRIrm; break; + case X86::VPCMPESTRIMEM: Opc = X86::VPCMPESTRIrm; break; } + DebugLoc dl = MI->getDebugLoc(); MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(Opc)); - for (unsigned i = 0; i < numArgs; ++i) { - MachineOperand &Op = MI->getOperand(i+1); + + unsigned NumArgs = MI->getNumOperands(); // remove the results + for (unsigned i = 1; i < NumArgs; ++i) { + MachineOperand &Op = MI->getOperand(i); if (!(Op.isReg() && Op.isImplicit())) MIB.addOperand(Op); } + if (MI->hasOneMemOperand()) + MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg()) - .addReg(X86::XMM0); + .addReg(X86::ECX); MI->eraseFromParent(); return BB; } -MachineBasicBlock * -X86TargetLowering::EmitMonitor(MachineInstr *MI, MachineBasicBlock *BB) const { +static MachineBasicBlock * EmitMonitor(MachineInstr *MI, MachineBasicBlock *BB, + const |