diff options
author | Derek Schuff <dschuff@chromium.org> | 2012-11-14 16:47:45 -0800 |
---|---|---|
committer | Derek Schuff <dschuff@chromium.org> | 2012-11-15 10:12:39 -0800 |
commit | 923f52fb3f6670e843ffe0b8da2f2bad898d752c (patch) | |
tree | a279587ac525162817217dbbe0f01a91893a9d0b | |
parent | f4099a3c92570a80cd9a3850cda598c5ea446b96 (diff) | |
parent | 08e9cb46feb0c8e08e3d309a0f9fd75a04ca54fb (diff) |
Merge commit '08e9cb46feb0c8e08e3d309a0f9fd75a04ca54fb'
(svn r167699, also the 3.2 branch point)
Conflicts:
lib/Target/X86/X86Subtarget.cpp
77 files changed, 3419 insertions, 934 deletions
diff --git a/docs/ReleaseNotes.html b/docs/ReleaseNotes.html index 45a9cc5dec..fc3a8b71bd 100644 --- a/docs/ReleaseNotes.html +++ b/docs/ReleaseNotes.html @@ -473,15 +473,31 @@ Release Notes</a>.</h1> <b>-mllvm -force-vector-width=4</b>. The default value is <b>0</b> which means auto-select. <br/> - We can now vectorize this code: + We can now vectorize this function: <pre class="doc_code"> - for (i=0; i<n; i++) { - a[i] = b[i+1] + c[i+3] + i; - sum += d[i]; + unsigned sum_arrays(int *A, int *B, int start, int end) { + unsigned sum = 0; + for (int i = start; i < end; ++i) + sum += A[i] + B[i] + i; + + return sum; } </pre> + We vectorize under the following loops: + <ul> + <li>The inner most loops must have a single basic block.</li> + <li>The number of iterations are known before the loop starts to execute.</li> + <li>The loop counter needs to be incrimented by one.</li> + <li>The loop trip count <b>can</b> be a variable.</li> + <li>Loops do <b>not</b> need to start at zero.</li> + <li>The induction variable can be used inside the loop.</li> + <li>Loop reductions are supported.</li> + <li>Arrays with affine access pattern do <b>not</b> need to be marked as 'noalias' and are checked at runtime.</li> + <li>...</li> + </ul> + </p> <p>SROA - We've re-written SROA to be significantly more powerful. diff --git a/include/llvm/MC/MCExpr.h b/include/llvm/MC/MCExpr.h index 4c10e5114a..00eef270d6 100644 --- a/include/llvm/MC/MCExpr.h +++ b/include/llvm/MC/MCExpr.h @@ -167,6 +167,7 @@ public: VK_ARM_TPOFF, VK_ARM_GOTTPOFF, VK_ARM_TARGET1, + VK_ARM_TARGET2, VK_PPC_TOC, // TOC base VK_PPC_TOC_ENTRY, // TOC entry diff --git a/include/llvm/Target/TargetLibraryInfo.h b/include/llvm/Target/TargetLibraryInfo.h index 2a0a43229f..a2c97d782e 100644 --- a/include/llvm/Target/TargetLibraryInfo.h +++ b/include/llvm/Target/TargetLibraryInfo.h @@ -276,12 +276,18 @@ namespace llvm { sqrtf, /// long double sqrtl(long double x); sqrtl, + /// char *stpcpy(char *s1, const char *s2); + stpcpy, /// char *strcat(char *s1, const char *s2); strcat, /// char *strchr(const char *s, int c); strchr, + /// int strcmp(const char *s1, const char *s2); + strcmp, /// char *strcpy(char *s1, const char *s2); strcpy, + /// size_t strcspn(const char *s1, const char *s2); + strcspn, /// char *strdup(const char *s1); strdup, /// size_t strlen(const char *s); @@ -296,6 +302,29 @@ namespace llvm { strndup, /// size_t strnlen(const char *s, size_t maxlen); strnlen, + /// char *strpbrk(const char *s1, const char *s2); + strpbrk, + /// char *strrchr(const char *s, int c); + strrchr, + /// size_t strspn(const char *s1, const char *s2); + strspn, + /// char *strstr(const char *s1, const char *s2); + strstr, + /// double strtod(const char *nptr, char **endptr); + strtod, + /// float strtof(const char *nptr, char **endptr); + strtof, + /// long int strtol(const char *nptr, char **endptr, int base); + strtol, + /// long double strtold(const char *nptr, char **endptr); + strtold, + /// long long int strtoll(const char *nptr, char **endptr, int base); + strtoll, + /// unsigned long int strtoul(const char *nptr, char **endptr, int base); + strtoul, + /// unsigned long long int strtoull(const char *nptr, char **endptr, + /// int base); + strtoull, /// double tan(double x); tan, /// float tanf(float x); diff --git a/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/include/llvm/Transforms/Utils/SimplifyLibCalls.h index 5db2d00181..fde452bca2 100644 --- a/include/llvm/Transforms/Utils/SimplifyLibCalls.h +++ b/include/llvm/Transforms/Utils/SimplifyLibCalls.h @@ -19,6 +19,7 @@ namespace llvm { class Value; class CallInst; class DataLayout; + class Instruction; class TargetLibraryInfo; class LibCallSimplifierImpl; @@ -35,8 +36,16 @@ namespace llvm { /// optimizeCall - Take the given call instruction and return a more /// optimal value to replace the instruction with or 0 if a more - /// optimal form can't be found. + /// optimal form can't be found. Note that the returned value may + /// be equal to the instruction being optimized. In this case all + /// other instructions that use the given instruction were modified + /// and the given instruction is dead. Value *optimizeCall(CallInst *CI); + + /// replaceAllUsesWith - This method is used when the library call + /// simplifier needs to replace instructions other than the library + /// call being modified. + virtual void replaceAllUsesWith(Instruction *I, Value *With) const; }; } // End llvm namespace diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 5cac8ca3ba..91a5b84e8a 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -292,7 +292,10 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, unsigned IntBytes = unsigned(CI->getBitWidth()/8); for (unsigned i = 0; i != BytesLeft && ByteOffset != IntBytes; ++i) { - CurPtr[i] = (unsigned char)(Val >> (ByteOffset * 8)); + int n = ByteOffset; + if (!TD.isLittleEndian()) + n = IntBytes - n - 1; + CurPtr[i] = (unsigned char)(Val >> (n * 8)); ++ByteOffset; } return true; @@ -442,10 +445,19 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, BytesLoaded, TD)) return 0; - APInt ResultVal = APInt(IntType->getBitWidth(), RawBytes[BytesLoaded-1]); - for (unsigned i = 1; i != BytesLoaded; ++i) { - ResultVal <<= 8; - ResultVal |= RawBytes[BytesLoaded-1-i]; + APInt ResultVal = APInt(IntType->getBitWidth(), 0); + if (TD.isLittleEndian()) { + ResultVal = RawBytes[BytesLoaded - 1]; + for (unsigned i = 1; i != BytesLoaded; ++i) { + ResultVal <<= 8; + ResultVal |= RawBytes[BytesLoaded-1-i]; + } + } else { + ResultVal = RawBytes[0]; + for (unsigned i = 1; i != BytesLoaded; ++i) { + ResultVal <<= 8; + ResultVal |= RawBytes[i]; + } } return ConstantInt::get(IntType->getContext(), ResultVal); @@ -521,10 +533,8 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, } } - // Try hard to fold loads from bitcasted strange and non-type-safe things. We - // currently don't do any of this for big endian systems. It can be - // generalized in the future if someone is interested. - if (TD && TD->isLittleEndian()) + // Try hard to fold loads from bitcasted strange and non-type-safe things. + if (TD) return FoldReinterpretLoadFromConstPtr(CE, *TD); return 0; } diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 65bc4af99e..4e75d892e5 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -146,6 +146,11 @@ void LiveIntervals::print(raw_ostream &OS, const Module* ) const { OS << PrintReg(Reg) << " = " << getInterval(Reg) << '\n'; } + OS << "RegMasks:"; + for (unsigned i = 0, e = RegMaskSlots.size(); i != e; ++i) + OS << ' ' << RegMaskSlots[i]; + OS << '\n'; + printInstrs(OS); } @@ -1257,10 +1262,15 @@ private: SmallVectorImpl<SlotIndex>::iterator RI = std::lower_bound(LIS.RegMaskSlots.begin(), LIS.RegMaskSlots.end(), OldIdx); - assert(*RI == OldIdx && "No RegMask at OldIdx."); - *RI = NewIdx; - assert(*prior(RI) < *RI && *RI < *next(RI) && - "RegSlots out of order. Did you move one call across another?"); + assert(RI != LIS.RegMaskSlots.end() && *RI == OldIdx.getRegSlot() && + "No RegMask at OldIdx."); + *RI = NewIdx.getRegSlot(); + assert((RI == LIS.RegMaskSlots.begin() || + SlotIndex::isEarlierInstr(*llvm::prior(RI), *RI)) && + "Cannot move regmask instruction above another call"); + assert((llvm::next(RI) == LIS.RegMaskSlots.end() || + SlotIndex::isEarlierInstr(*RI, *llvm::next(RI))) && + "Cannot move regmask instruction below another call"); } // Return the last use of reg between NewIdx and OldIdx. diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index de16932c06..a4817d09c0 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -1583,6 +1583,7 @@ const char *ConvergingScheduler::getReasonStr( case NextDefUse: return "DEF-USE "; case NodeOrder: return "ORDER "; }; + llvm_unreachable("Unknown reason!"); } void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand, diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp index ffa79761f2..e0336342d6 100644 --- a/lib/MC/MCExpr.cpp +++ b/lib/MC/MCExpr.cpp @@ -60,7 +60,8 @@ void MCExpr::print(raw_ostream &OS) const { SRE.getKind() == MCSymbolRefExpr::VK_ARM_GOTOFF || SRE.getKind() == MCSymbolRefExpr::VK_ARM_TPOFF || SRE.getKind() == MCSymbolRefExpr::VK_ARM_GOTTPOFF || - SRE.getKind() == MCSymbolRefExpr::VK_ARM_TARGET1) + SRE.getKind() == MCSymbolRefExpr::VK_ARM_TARGET1 || + SRE.getKind() == MCSymbolRefExpr::VK_ARM_TARGET2) OS << MCSymbolRefExpr::getVariantKindName(SRE.getKind()); else if (SRE.getKind() != MCSymbolRefExpr::VK_None && SRE.getKind() != MCSymbolRefExpr::VK_PPC_DARWIN_HA16 && @@ -199,6 +200,7 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) { case VK_ARM_GOTTPOFF: return "(gottpoff)"; case VK_ARM_TLSGD: return "(tlsgd)"; case VK_ARM_TARGET1: return "(target1)"; + case VK_ARM_TARGET2: return "(target2)"; case VK_PPC_TOC: return "tocbase"; case VK_PPC_TOC_ENTRY: return "toc"; case VK_PPC_DARWIN_HA16: return "ha16"; diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index 43c68f4d1d..7e8b4a3d0d 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -3553,11 +3553,6 @@ void APFloat::toString(SmallVectorImpl<char> &Str, } bool APFloat::getExactInverse(APFloat *inv) const { - // We can only guarantee the existence of an exact inverse for IEEE floats. - if (semantics != &IEEEhalf && semantics != &IEEEsingle && - semantics != &IEEEdouble && semantics != &IEEEquad) - return false; - // Special floats and denormals have no exact inverse. if (category != fcNormal) return false; diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 8d1a301a67..f67decc550 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -853,13 +853,28 @@ void ARMAsmPrinter::emitAttributes() { AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use, ARMBuildAttrs::Allowed); } else if (CPUString == "generic") { - // FIXME: Why these defaults? - AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v4T); + // For a generic CPU, we assume a standard v7a architecture in Subtarget. + AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v7); + AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch_profile, + ARMBuildAttrs::ApplicationProfile); AttrEmitter->EmitAttribute(ARMBuildAttrs::ARM_ISA_use, ARMBuildAttrs::Allowed); AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use, - ARMBuildAttrs::Allowed); - } + ARMBuildAttrs::AllowThumb32); + } else if (Subtarget->hasV7Ops()) { + AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v7); + AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use, + ARMBuildAttrs::AllowThumb32); + } else if (Subtarget->hasV6T2Ops()) + AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v6T2); + else if (Subtarget->hasV6Ops()) + AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v6); + else if (Subtarget->hasV5TEOps()) + AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v5TE); + else if (Subtarget->hasV5TOps()) + AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v5T); + else if (Subtarget->hasV4TOps()) + AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v4T); if (Subtarget->hasNEON() && emitFPU) { /* NEON is not exactly a VFP architecture, but GAS emit one of @@ -1515,31 +1530,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } return; } - case ARM::t2BMOVPCB_CALL: { - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::tMOVr); - TmpInst.addOperand(MCOperand::CreateReg(ARM::LR)); - TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); - // Add predicate operands. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::t2B); - const GlobalValue *GV = MI->getOperand(0).getGlobal(); - MCSymbol *GVSym = Mang->getSymbol(GV); - const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext); - TmpInst.addOperand(MCOperand::CreateExpr(GVSymExpr)); - // Add predicate operands. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } - return; - } case ARM::MOVi16_ga_pcrel: case ARM::t2MOVi16_ga_pcrel: { MCInst TmpInst; diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index dec498a4f7..0893826427 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -1639,18 +1639,12 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (Subtarget->isThumb()) { if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) CallOpc = ARMISD::CALL_NOLINK; - else if (doesNotRet && isDirect && !isARMFunc && - Subtarget->hasRAS() && !Subtarget->isThumb1Only() && - // Emit regular call when code size is the priority - !HasMinSizeAttr) - // "mov lr, pc; b _foo" to avoid confusing the RSP - CallOpc = ARMISD::CALL_NOLINK; else CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL; } else { - if (!isDirect && !Subtarget->hasV5TOps()) { + if (!isDirect && !Subtarget->hasV5TOps()) CallOpc = ARMISD::CALL_NOLINK; - } else if (doesNotRet && isDirect && Subtarget->hasRAS() && + else if (doesNotRet && isDirect && Subtarget->hasRAS() && // Emit regular call when code size is the priority !HasMinSizeAttr) // "mov lr, pc; b _foo" to avoid confusing the RSP diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 248bab6b12..c2800acccd 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -3331,20 +3331,6 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { Requires<[IsThumb2, IsIOS]>; } -let isCall = 1, Defs = [LR], Uses = [SP] in { - // mov lr, pc; b if callee is marked noreturn to avoid confusing the - // return stack predictor. - def t2BMOVPCB_CALL : tPseudoInst<(outs), - (ins t_bltarget:$func), - 6, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>, - Requires<[IsThumb]>; -} - -// Direct calls -def : T2Pat<(ARMcall_nolink texternalsym:$func), - (t2BMOVPCB_CALL texternalsym:$func)>, - Requires<[IsThumb]>; - // IT block let Defs = [ITSTATE] in def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask), diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp index 7a7ce27d48..253d1fa2ab 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -247,6 +247,9 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_ARM_TARGET1: Type = ELF::R_ARM_TARGET1; break; + case MCSymbolRefExpr::VK_ARM_TARGET2: + Type = ELF::R_ARM_TARGET2; + break; } break; case ARM::fixup_arm_ldst_pcrel_12: diff --git a/lib/Target/NVPTX/NVPTX.td b/lib/Target/NVPTX/NVPTX.td index ae7710e54f..7aee3595c6 100644 --- a/lib/Target/NVPTX/NVPTX.td +++ b/lib/Target/NVPTX/NVPTX.td @@ -24,7 +24,30 @@ include "NVPTXInstrInfo.td" // - Need at least one feature to avoid generating zero sized array by // TableGen in NVPTXGenSubtarget.inc. //===----------------------------------------------------------------------===// -def FeatureDummy : SubtargetFeature<"dummy", "dummy", "true", "">; + +// SM Versions +def SM10 : SubtargetFeature<"sm_10", "SmVersion", "10", + "Target SM 1.0">; +def SM11 : SubtargetFeature<"sm_11", "SmVersion", "11", + "Target SM 1.1">; +def SM12 : SubtargetFeature<"sm_12", "SmVersion", "12", + "Target SM 1.2">; +def SM13 : SubtargetFeature<"sm_13", "SmVersion", "13", + "Target SM 1.3">; +def SM20 : SubtargetFeature<"sm_20", "SmVersion", "20", + "Target SM 2.0">; +def SM21 : SubtargetFeature<"sm_21", "SmVersion", "21", + "Target SM 2.1">; +def SM30 : SubtargetFeature<"sm_30", "SmVersion", "30", + "Target SM 3.0">; +def SM35 : SubtargetFeature<"sm_35", "SmVersion", "35", + "Target SM 3.5">; + +// PTX Versions +def PTX30 : SubtargetFeature<"ptx30", "PTXVersion", "30", + "Use PTX version 3.0">; +def PTX31 : SubtargetFeature<"ptx31", "PTXVersion", "31", + "Use PTX version 3.1">; //===----------------------------------------------------------------------===// // NVPTX supported processors. @@ -33,7 +56,14 @@ def FeatureDummy : SubtargetFeature<"dummy", "dummy", "true", "">; class Proc<string Name, list<SubtargetFeature> Features> : Processor<Name, NoItineraries, Features>; -def : Proc<"sm_10", [FeatureDummy]>; +def : Proc<"sm_10", [SM10]>; +def : Proc<"sm_11", [SM11]>; +def : Proc<"sm_12", [SM12]>; +def : Proc<"sm_13", [SM13]>; +def : Proc<"sm_20", [SM20]>; +def : Proc<"sm_21", [SM21]>; +def : Proc<"sm_30", [SM30]>; +def : Proc<"sm_35", [SM35]>; def NVPTXInstrInfo : InstrInfo { diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index d3dfb35e26..3dd9bf5613 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -910,7 +910,8 @@ void NVPTXAsmPrinter::emitHeader (Module &M, raw_ostream &O) { O << "//\n"; O << "\n"; - O << ".version 3.0\n"; + unsigned PTXVersion = nvptxSubtarget.getPTXVersion(); + O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n"; O << ".target "; O << nvptxSubtarget.getTargetName(); @@ -1525,6 +1526,9 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, // <a> = PAL.getparamalignment // size = typeallocsize of element type unsigned align = PAL.getParamAlignment(paramIndex+1); + if (align == 0) + align = TD->getABITypeAlignment(ETy); + unsigned sz = TD->getTypeAllocSize(ETy); O << "\t.param .align " << align << " .b8 "; diff --git a/lib/Target/NVPTX/NVPTXSubtarget.cpp b/lib/Target/NVPTX/NVPTXSubtarget.cpp index 6aadd43e94..7b62cce2c6 100644 --- a/lib/Target/NVPTX/NVPTXSubtarget.cpp +++ b/lib/Target/NVPTX/NVPTXSubtarget.cpp @@ -34,16 +34,18 @@ DriverInterface(cl::desc("Choose driver interface:"), NVPTXSubtarget::NVPTXSubtarget(const std::string &TT, const std::string &CPU, const std::string &FS, bool is64Bit) -:NVPTXGenSubtargetInfo(TT, "", FS), // Don't pass CPU to subtarget, - // because we don't register all - // nvptx targets. - Is64Bit(is64Bit) { +: NVPTXGenSubtargetInfo(TT, CPU, FS), + Is64Bit(is64Bit), + PTXVersion(0), + SmVersion(10) { drvInterface = DriverInterface; // Provide the default CPU if none std::string defCPU = "sm_10"; + ParseSubtargetFeatures((CPU.empty() ? defCPU : CPU), FS); + // Get the TargetName from the FS if available if (FS.empty() && CPU.empty()) TargetName = defCPU; @@ -52,6 +54,12 @@ NVPTXSubtarget::NVPTXSubtarget(const std::string &TT, const std::string &CPU, else llvm_unreachable("we are not using FeatureStr"); - // Set up the SmVersion - SmVersion = atoi(TargetName.c_str()+3); + // We default to PTX 3.1, but we cannot just default to it in the initializer + // since the attribute parser checks if the given option is >= the default. + // So if we set ptx31 as the default, the ptx30 attribute would never match. + // Instead, we use 0 as the default and manually set 31 if the default is + // used. + if (PTXVersion == 0) { + PTXVersion = 31; + } } diff --git a/lib/Target/NVPTX/NVPTXSubtarget.h b/lib/Target/NVPTX/NVPTXSubtarget.h index 8f2a629d22..c3a683a2c6 100644 --- a/lib/Target/NVPTX/NVPTXSubtarget.h +++ b/lib/Target/NVPTX/NVPTXSubtarget.h @@ -25,13 +25,18 @@ namespace llvm { class NVPTXSubtarget : public NVPTXGenSubtargetInfo { - - unsigned int SmVersion; + std::string TargetName; NVPTX::DrvInterface drvInterface; bool dummy; // For the 'dummy' feature, see NVPTX.td bool Is64Bit; + // PTX version x.y is represented as 10*x+y, e.g. 3.1 == 31 + unsigned PTXVersion; + + // SM version x.y is represented as 10*x+y, e.g. 3.1 == 31 + unsigned int SmVersion; + public: /// This constructor initializes the data members to match that /// of the specified module. @@ -69,6 +74,8 @@ public: NVPTX::DrvInterface getDrvInterface() const { return drvInterface; } std::string getTargetName() const { return TargetName; } + unsigned getPTXVersion() const { return PTXVersion; } + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); std::string getDataLayout() const { diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp index 62f973e658..6d4eab1204 100644 --- a/lib/Target/TargetLibraryInfo.cpp +++ b/lib/Target/TargetLibraryInfo.cpp @@ -152,9 +152,12 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = "sqrt", "sqrtf", "sqrtl", + "stpcpy", "strcat", "strchr", + "strcmp", "strcpy", + "strcspn", "strdup", "strlen", "strncat", @@ -162,6 +165,17 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = "strncpy", "strndup", "strnlen", + "strpbrk", + "strrchr", + "strspn", + "strstr", + "strtod", + "strtof", + "strtol", + "strtold", + "strtoll", + "strtoul", + "strtoull", "tan", "tanf", "tanh", @@ -309,6 +323,10 @@ static void initialize(TargetLibraryInfo &TLI, const Triple &T, TLI.setUnavailable(LibFunc::tanf); TLI.setUnavailable(LibFunc::tanhf); } + + // Win32 does *not* provide stpcpy. It is provided on POSIX systems: + // http://pubs.opengroup.org/onlinepubs/9699919799/functions/stpcpy.html + TLI.setUnavailable(LibFunc::stpcpy); } } diff --git a/lib/Target/TargetTransformImpl.cpp b/lib/Target/TargetTransformImpl.cpp index c07332de32..b36e6f858f 100644 --- a/lib/Target/TargetTransformImpl.cpp +++ b/lib/Target/TargetTransformImpl.cpp @@ -214,8 +214,16 @@ unsigned VectorTargetTransformImpl::getCastInstrCost(unsigned Opcode, Type *Dst, // Handle scalar conversions. if (!Src->isVectorTy() && !Dst->isVectorTy()) { - // Scalar bitcasts and truncs are usually free. - if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc) + // Scalar bitcasts are usually free. + if (Opcode == Instruction::BitCast) + return 0; + + if (Opcode == Instruction::Trunc && + TLI->isTruncateFree(SrcLT.second, DstLT.second)) + return 0; + + if (Opcode == Instruction::ZExt && + TLI->isZExtFree(SrcLT.second, DstLT.second)) return 0; // Just check the op cost. If the operation is legal then assume it costs 1. diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index e86c1000f1..42134256e3 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2892,85 +2892,6 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { return Result; } - - // FIXME: Custom handling because TableGen doesn't support multiple implicit - // defs in an instruction pattern - case X86ISD::PCMPESTRI: { - SDValue N0 = Node->getOperand(0); - SDValue N1 = Node->getOperand(1); - SDValue N2 = Node->getOperand(2); - SDValue N3 = Node->getOperand(3); - SDValue N4 = Node->getOperand(4); - - // Make sure last argument is a constant - ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N4); - if (!Cst) - break; - - uint64_t Imm = Cst->getZExtValue(); - - SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, - X86::EAX, N1, SDValue()).getValue(1); - InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EDX, - N3, InFlag).getValue(1); - - SDValue Ops[] = { N0, N2, getI8Imm(Imm), InFlag }; - unsigned Opc = Subtarget->hasAVX() ? X86::VPCMPESTRIrr : - X86::PCMPESTRIrr; - InFlag = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Ops, - array_lengthof(Ops)), 0); - - if (!SDValue(Node, 0).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - X86::ECX, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(SDValue(Node, 0), Result); - } - if (!SDValue(Node, 1).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - X86::EFLAGS, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(SDValue(Node, 1), Result); - } - - return NULL; - } - - // FIXME: Custom handling because TableGen doesn't support multiple implicit - // defs in an instruction pattern - case X86ISD::PCMPISTRI: { - SDValue N0 = Node->getOperand(0); - SDValue N1 = Node->getOperand(1); - SDValue N2 = Node->getOperand(2); - - // Make sure last argument is a constant - ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N2); - if (!Cst) - break; - - uint64_t Imm = Cst->getZExtValue(); - - SDValue Ops[] = { N0, N1, getI8Imm(Imm) }; - unsigned Opc = Subtarget->hasAVX() ? X86::VPCMPISTRIrr : - X86::PCMPISTRIrr; - SDValue InFlag = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Ops, - array_lengthof(Ops)), 0); - - if (!SDValue(Node, 0).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - X86::ECX, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(SDValue(Node, 0), Result); - } - if (!SDValue(Node, 1).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - X86::EFLAGS, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(SDValue(Node, 1), Result); - } - - return NULL; - } } SDNode *ResNode = SelectCode(Node); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index fd0a8a27d6..5610bb5ba3 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -12240,6 +12240,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::FNMSUB: return "X86ISD::FNMSUB"; case X86ISD::FMADDSUB: return "X86ISD::FMADDSUB"; case X86ISD::FMSUBADD: return "X86ISD::FMSUBADD"; + case X86ISD::PCMPESTRI: return "X86ISD::PCMPESTRI"; + case X86ISD::PCMPISTRI: return "X86ISD::PCMPISTRI"; } } @@ -12388,13 +12390,10 @@ X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask, // X86 Scheduler Hooks //===----------------------------------------------------------------------===// -// private utility function - /// Utility function to emit xbegin specifying the start of an RTM region. -MachineBasicBlock * -X86TargetLowering::EmitXBegin(MachineInstr *MI, MachineBasicBlock *MBB) const { +static MachineBasicBlock *EmitXBegin(MachineInstr *MI, MachineBasicBlock *MBB, + const TargetInstrInfo *TII) { DebugLoc DL = MI->getDebugLoc(); - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); const BasicBlock *BB = MBB->getBasicBlock(); MachineFunction::iterator I = MBB; @@ -13033,45 +13032,82 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI, // FIXME: When we get size specific XMM0 registers, i.e. XMM0_V16I8 // or XMM0_V32I8 in AVX all of this code can be replaced with that // in the .td file. -MachineBasicBlock * -X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB, - unsigned numArgs, bool memArg) const { - assert(Subtarget->hasSSE42() && - "Target must have SSE4.2 or AVX features enabled"); +static MachineBasicBlock *EmitPCMPSTRM(MachineInstr *MI, MachineBasicBlock *BB, + const TargetInstrInfo *TII) { + unsigned Opc; + switch (MI->getOpcode()) { + default: llvm_unreachable("illegal opcode!"); + case X86::PCMPISTRM128REG: Opc = X86::PCMPISTRM128rr; break; + case X86::VPCMPISTRM128REG: Opc = X86::VPCMPISTRM128rr; break; + case X86::PCMPISTRM128MEM: Opc = X86::PCMPISTRM128rm; break; + case X86::VPCMPISTRM128MEM: Opc = X86::VPCMPISTRM128rm; break; + case X86::PCMPESTRM128REG: Opc = X86::PCMPESTRM128rr; break; + case X86::VPCMPESTRM128REG: Opc = X86::VPCMPESTRM128rr; break; + case X86::PCMPESTRM128MEM: Opc = X86::PCMPESTRM128rm; break; + case X86::VPCMPESTRM128MEM: Opc = X86::VPCMPESTRM128rm; break; + } DebugLoc dl = MI->getDebugLoc(); - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(Opc)); + + unsigned NumArgs = MI->getNumOperands(); + for (unsigned i = 1; i < NumArgs; ++i) { + MachineOperand &Op = MI->getOperand(i); + if (!(Op.isReg() && Op.isImplicit())) + MIB.addOperand(Op); + } + if (MI->hasOneMemOperand()) + MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + + BuildMI(*BB, MI, dl, + TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg()) + .addReg(X86::XMM0); + + MI->eraseFromParent(); + return BB; +} + +// FIXME: Custom handling because TableGen doesn't support multiple implicit +// defs in an instruction pattern +static MachineBasicBlock *EmitPCMPSTRI(MachineInstr *MI, MachineBasicBlock *BB, + const TargetInstrInfo *TII) { unsigned Opc; - if (!Subtarget->hasAVX()) { - if (memArg) - Opc = numArgs == 3 ? X86::PCMPISTRM128rm : X86::PCMPESTRM128rm; - else - Opc = numArgs == 3 ? X86::PCMPISTRM128rr : X86::PCMPESTRM128rr; - } else { - if (memArg) - Opc = numArgs == 3 ? X86::VPCMPISTRM128rm : X86::VPCMPESTRM128rm; - else - Opc = numArgs == 3 ? X86::VPCMPISTRM128rr : X86::VPCMPESTRM128rr; + switch (MI->getOpcode()) { + default: llvm_unreachable("illegal opcode!"); + case X86::PCMPISTRIREG: Opc = X86::PCMPISTRIrr; break; + case X86::VPCMPISTRIREG: Opc = X86::VPCMPISTRIrr; break; + case X86::PCMPISTRIMEM: Opc = X86::PCMPISTRIrm; break; + case X86::VPCMPISTRIMEM: Opc = X86::VPCMPISTRIrm; break; + case X86::PCMPESTRIREG: Opc = X86::PCMPESTRIrr; break; + case X86::VPCMPESTRIREG: Opc = X86::VPCMPESTRIrr; break; + case X86::PCMPESTRIMEM: Opc = X86::PCMPESTRIrm; break; + case X86::VPCMPESTRIMEM: Opc = X86::VPCMPESTRIrm; break; } + DebugLoc dl = MI->getDebugLoc(); MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(Opc)); - for (unsigned i = 0; i < numArgs; ++i) { - MachineOperand &Op = MI->getOperand(i+1); + + unsigned NumArgs = MI->getNumOperands(); // remove the results + for (unsigned i = 1; i < NumArgs; ++i) { + MachineOperand &Op = MI->getOperand(i); if (!(Op.isReg() && Op.isImplicit())) MIB.addOperand(Op); } + if (MI->hasOneMemOperand()) + MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg()) - .addReg(X86::XMM0); + .addReg(X86::ECX); MI->eraseFromParent(); return BB; } -MachineBasicBlock * -X86TargetLowering::EmitMonitor(MachineInstr *MI, MachineBasicBlock *BB) const { +static MachineBasicBlock * EmitMonitor(MachineInstr *MI, MachineBasicBlock *BB, + const TargetInstrInfo *TII, + const X86Subtarget* Subtarget) { DebugLoc dl = MI->getDebugLoc(); - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); // Address into RAX/EAX, other two args into ECX, EDX. unsigned MemOpc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r; @@ -14125,36 +14161,33 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::PCMPESTRM128REG: case X86::VPCMPESTRM128REG: case X86::PCMPESTRM128MEM: - case X86::VPCMPESTRM128MEM: { - unsigned NumArgs; - bool MemArg; - switch (MI->getOpcode()) { - default: llvm_unreachable("illegal opcode!"); - case X86::PCMPISTRM128REG: - case X86::VPCMPISTRM128REG: - NumArgs = 3; MemArg = false; break; - case X86::PCMPISTRM128MEM: - case X86::VPCMPISTRM128MEM: - NumArgs = 3; MemArg = true; break; - case X86::PCMPESTRM128REG: - case X86::VPCMPESTRM128REG: - NumArgs = 5; MemArg = false; break; - case X86::PCMPESTRM128MEM: - case X86::VPCMPESTRM128MEM: - NumArgs = 5; MemArg = true; break; - } - return EmitPCMP(MI, BB, NumArgs, MemArg); - } - - // Thread synchronization. + case X86::VPCMPESTRM128MEM: + assert(Subtarget->hasSSE42() && + "Target must have SSE4.2 or AVX features enabled"); + return EmitPCMPSTRM(MI, BB, getTargetMachine().getInstrInfo()); + + // String/text processing lowering. + case X86::PCMPISTRIREG: + case X86::VPCMPISTRIREG: + case X86::PCMPISTRIMEM: + case X86::VPCMPISTRIMEM: + case X86::PCMPESTRIREG: + case X86::VPCMPESTRIREG: + case X86::PCMPESTRIMEM: + case X86::VPCMPESTRIMEM: + assert(Subtarget->hasSSE42() && + "Target must have SSE4.2 or AVX features enabled"); + return EmitPCMPSTRI(MI, BB, getTargetMachine().getInstrInfo()); + + // Thread synchronization. case X86::MONITOR: - return EmitMonitor(MI, BB); + return EmitMonitor(MI, BB, getTargetMachine().getInstrInfo(), Subtarget); // xbegin case X86::XBEGIN: - return EmitXBegin(MI, BB); + return EmitXBegin(MI, BB, getTargetMachine().getInstrInfo()); - // Atomic Lowering. + // Atomic Lowering. case X86::ATOMAND8: case X86::ATOMAND16: case X86::ATOMAND32: @@ -17993,8 +18026,8 @@ unsigned X86VectorTargetTransformInfo::getCastInstrCost(unsigned Opcode, { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 }, { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 }, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 }, - { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 1 }, - { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 }, + { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 1 }, + { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 6 }, { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 9 }, { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 }, diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 09f175db53..b6e8960f76 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -893,21 +893,6 @@ namespace llvm { const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const; - /// Utility function to emit string processing sse4.2 instructions - /// that return in xmm0. - /// This takes the instruction to expand, the associated machine basic - /// block, the number of args, and whether or not the second arg is - /// in memory or not. - MachineBasicBlock *EmitPCMP(MachineInstr *BInstr, MachineBasicBlock *BB, - unsigned argNum, bool inMem) const; - - /// Utility functions to emit monitor and mwait instructions. These - /// need to make sure that the arguments to the intrinsic are in the - /// correct registers. - MachineBasicBlock *EmitMonitor(MachineInstr *MI, - MachineBasicBlock *BB) const; - MachineBasicBlock *EmitMwait(MachineInstr *MI, MachineBasicBlock *BB) const; - /// Utility function to emit atomic-load-arith operations (and, or, xor, /// nand, max, min, umax, umin). It takes the corresponding instruction to /// expand, the associated machine basic block, and the associated X86 @@ -920,10 +905,6 @@ namespace llvm { MachineBasicBlock *EmitAtomicLoadArith6432(MachineInstr *MI, MachineBasicBlock *MBB) const; - /// Utility function to emit xbegin specifying the start of an RTM region. - MachineBasicBlock *EmitXBegin(MachineInstr *MI, - MachineBasicBlock *MBB) const; - // Utility function to emit the low-level va_arg code for X86-64. MachineBasicBlock *EmitVAARG64WithCustomInserter( MachineInstr *MI, diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 28dfbe7a1f..6f48d7ed7f 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -7002,8 +7002,8 @@ multiclass pseudo_pcmpistrm<string asm> { imm:$src3))]>; def MEM : PseudoI<(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2, i8imm:$src3), - [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 - VR128:$src1, (load addr:$src2), imm:$src3))]>; + [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, + (bc_v16i8 (memopv2i64 addr:$src2)), imm:$src3))]>; } let Defs = [EFLAGS], usesCustomInserter = 1 in { @@ -7011,24 +7011,22 @@ let Defs = [EFLAGS], usesCustomInserter = 1 in { defm PCMPISTRM128 : pseudo_pcmpistrm<"#PCMPISTRM128">, Requires<[UseSSE42]>; } -let Defs = [XMM0, EFLAGS], neverHasSideEffects = 1, Predicates = [HasAVX] in { - def VPCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs), - (ins VR128:$src1, VR128:$src2, i8imm:$src3), - "vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX; +multiclass pcmpistrm_SS42AI<string asm> { + def rr : SS42AI<0x62, MRMSrcReg, (outs), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), + !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), + []>, OpSize; let mayLoad = 1 in - def VPCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs), - (ins VR128:$src1, i128mem:$src2, i8imm:$src3), - "vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX; + def rm :SS42AI<0x62, MRMSrcMem, (outs), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), + !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), + []>, OpSize; } let Defs = [XMM0, EFLAGS], neverHasSideEffects = 1 in { - def PCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs), - (ins VR128:$src1, VR128:$src2, i8imm:$src3), - "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize; - let mayLoad = 1 in - def PCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs), - (ins VR128:$src1, i128mem:$src2, i8imm:$src3), - "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize; + let Predicates = [HasAVX] in + defm VPCMPISTRM128 : pcmpistrm_SS42AI<"vpcmpistrm">, VEX; + defm PCMPISTRM128 : pcmpistrm_SS42AI<"pcmpistrm"> ; } // Packed Compare Explicit Length Strings, Return Mask @@ -7039,8 +7037,8 @@ multiclass pseudo_pcmpestrm<string asm> { VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>; def MEM : PseudoI<(outs VR128:$dst), (ins VR128:$src1, i128mem:$src3, i8imm:$src5), - [(set VR128:$dst, (int_x86_sse42_pcmpestrm128 - VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>; + [(set VR128:$dst, (int_x86_sse42_pcmpestrm128 VR128:$src1, EAX, + (bc_v16i8 (memopv2i64 addr:$src3)), EDX, imm:$src5))]>; } let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in { @@ -7048,64 +7046,94 @@ let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in { defm PCMPESTRM128 : pseudo_pcmpestrm<"#PCMPESTRM128">, Requires<[UseSSE42]>; } -let Predicates = [HasAVX], - Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in { - def VPCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs), - (ins VR128:$src1, VR128:$src3, i8imm:$src5), - "vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX; +multiclass SS42AI_pcmpestrm<string asm> { + def rr : SS42AI<0x60, MRMSrcReg, (outs), + (ins VR128:$src1, VR128:$src3, i8imm:$src5), + !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), + []>, OpSize; let mayLoad = 1 in - def VPCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs), - (ins VR128:$src1, i128mem:$src3, i8imm:$src5), - "vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX; + def rm : SS42AI<0x60, MRMSrcMem, (outs), + (ins VR128:$src1, i128mem:$src3, i8imm:$src5), + !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), + []>, OpSize; } let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in { - def PCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs), - (ins VR128:$src1, VR128:$src3, i8imm:$src5), - "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize; - let mayLoad = 1 in - def PCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs), - (ins VR128:$src1, i128mem:$src3, i8imm:$src5), - "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize; + let Predicates = [HasAVX] in + defm VPCMPESTRM128 : SS42AI_pcmpestrm<"vpcmpestrm">, VEX; + defm PCMPESTRM128 : SS42AI_pcmpestrm<"pcmpestrm">; } // Packed Compare Implicit Length Strings, Return Index -let Defs = [ECX, EFLAGS], neverHasSideEffects = 1 in { - multiclass SS42AI_pcmpistri<string asm> { - def rr : SS42AI<0x63, MRMSrcReg, (outs), - (ins VR128:$src1, VR128:$src2, i8imm:$src3), - !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), - []>, OpSize; - let mayLoad = 1 in - def rm : SS42AI<0x63, MRMSrcMem, (outs), - (ins VR128:$src1, i128mem:$src2, i8imm:$src3), - !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), - []>, OpSize; - } +multiclass pseudo_pcmpistri<string asm> { + def REG : PseudoI<(outs GR32:$dst), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), + [(set GR32:$dst, EFLAGS, + (X86pcmpistri VR128:$src1, VR128:$src2, imm:$src3))]>; + def MEM : PseudoI<(outs GR32:$dst), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), + [(set GR32:$dst, EFLAGS, (X86pcmpistri VR128:$src1, + (bc_v16i8 (memopv2i64 addr:$src2)), imm:$src3))]>; } -let Predicates = [HasAVX] in -defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX; -defm PCMPISTRI : SS42AI_pcmpistri<"pcmpistri">; +let Defs = [EFLAGS], usesCustomInserter = 1 in { + defm VPCMPISTRI : pseudo_pcmpistri<"#VPCMPISTRI">, Requires<[HasAVX]>; + defm PCMPISTRI : pseudo_pcmpistri<"#PCMPISTRI">, Requires<[UseSSE42]>; +} + +multiclass SS42AI_pcmpistri<string asm> { + def rr : SS42AI<0x63, MRMSrcReg, (outs), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), + !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), + []>, OpSize; + let mayLoad = 1 in + def rm : SS42AI<0x63, MRMSrcMem, (outs), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), + !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), + []>, OpSize; +} + +let Defs = [ECX, EFLAGS], neverHasSideEffects = 1 in { + let Predicates = [HasAVX] in + defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX; + defm PCMPISTRI : SS42AI_pcmpistri<"pcmpistri">; +} // Packed Compare Explicit Length Strings, Return Index -let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in { - multiclass SS42AI_pcmpestri<string asm> { - def rr : SS42AI<0x61, MRMSrcReg, (outs), - (ins VR128:$src1, VR128:$src3, i8imm:$src5), - !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), - []>, OpSize; - let mayLoad = 1 in - def rm : SS42AI<0x61, MRMSrcMem, (outs), - (ins VR128:$src1, i128mem:$src3, i8imm:$src5), - !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), - []>, OpSize; - } +multiclass pseudo_pcmpestri<string asm> { + def REG : PseudoI<(outs GR32:$dst), + (ins VR128:$src1, VR128:$src3, i8imm:$src5), + [(set GR32:$dst, EFLAGS, + (X86pcmpestri VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>; + def MEM : PseudoI<(outs GR32:$dst), + (ins VR128:$src1, i128mem:$src3, i8imm:$src5), + [(set GR32:$dst, EFLAGS, + (X86pcmpestri VR128:$src1, EAX, (bc_v16i8 (memopv2i64 addr:$src3)), EDX, + imm:$src5))]>; } -let Predicates = [HasAVX] in -defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX; -defm PCMPESTRI : SS42AI_pcmpestri<"pcmpestri">; +let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in { + defm VPCMPESTRI : pseudo_pcmpestri<"#VPCMPESTRI">, Requires<[HasAVX]>; + defm PCMPESTRI : pseudo_pcmpestri<"#PCMPESTRI">, Requires<[UseSSE42]>; +} + +multiclass SS42AI_pcmpestri<string asm> { + def rr : SS42AI<0x61, MRMSrcReg, (outs), + (ins VR128:$src1, VR128:$src3, i8imm:$src5), + !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), + []>, OpSize; + let mayLoad = 1 in + def rm : SS42AI<0x61, MRMSrcMem, (outs), + (ins VR128:$src1, i128mem:$src3, i8imm:$src5), + !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), + []>, OpSize; +} + +let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in { + let Predicates = [HasAVX] in + defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX; + defm PCMPESTRI : SS42AI_pcmpestri<"pcmpestri">; +} //===----------------------------------------------------------------------===// // SSE4.2 - CRC Instructions diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 31be6b7a7b..0132f81410 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -423,12 +423,11 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, assert((!In64BitMode || HasX86_64) && "64-bit code requested on a subtarget that doesn't support it!"); - // Stack alignment is 16 bytes on Darwin, FreeBSD, Linux, Solaris (both + // Stack alignment is 16 bytes on Darwin, Linux and Solaris (both // 32 and 64 bit), NaCl and for all 64-bit targets. if (StackAlignOverride) stackAlignment = StackAlignOverride; - else if (isTargetDarwin() || isTargetFreeBSD() || isTargetLinux() || - isTargetSolaris() || + else if (isTargetDarwin() || isTargetLinux() || isTargetSolaris() || isTargetNaCl() || // @LOCALMOD In64BitMode) stackAlignment = 16; diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index ccf75bca2b..9a46f25e66 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2367,6 +2367,24 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { return MadeIRChange; } +namespace { +class InstCombinerLibCallSimplifier : public LibCallSimplifier { + InstCombiner *IC; +public: + InstCombinerLibCallSimplifier(const DataLayout *TD, + const TargetLibraryInfo *TLI, + InstCombiner *IC) + : LibCallSimplifier(TD, TLI) { + this->IC = IC; + } + + /// replaceAllUsesWith - override so that instruction replacement + /// can be defined in terms of the instruction combiner framework. + virtual void replaceAllUsesWith(Instruction *I, Value *With) const { + IC->ReplaceInstUsesWith(*I, With); + } +}; +} bool InstCombiner::runOnFunction(Function &F) { TD = getAnalysisIfAvailable<DataLayout>(); @@ -2379,7 +2397,7 @@ bool InstCombiner::runOnFunction(Function &F) { InstCombineIRInserter(Worklist)); Builder = &TheBuilder; - LibCallSimplifier TheSimplifier(TD, TLI); + InstCombinerLibCallSimplifier TheSimplifier(TD, TLI, this); Simplifier = &TheSimplifier; bool EverMadeChange = false; diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index c6244a55c9..9e10fc4416 100644 --- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -97,6 +97,10 @@ struct ThreadSanitizer : public FunctionPass { Function *TsanWrite[kNumberOfAccessSizes]; Function *TsanAtomicLoad[kNumberOfAccessSizes]; Function *TsanAtomicStore[kNumberOfAccessSizes]; + Function *TsanAtomicRMW[AtomicRMWInst::LAST_BINOP + 1][kNumberOfAccessSizes]; + Function *TsanAtomicCAS[kNumberOfAccessSizes]; + Function *TsanAtomicThreadFence; + Function *TsanAtomicSignalFence; Function *TsanVptrUpdate; }; } // namespace @@ -167,10 +171,42 @@ bool ThreadSanitizer::doInitialization(Module &M) { TsanAtomicStore[i] = checkInterfaceFunction(M.getOrInsertFunction( AtomicStoreName, IRB.getVoidTy(), PtrTy, Ty, OrdTy, NULL)); + + for (int op = AtomicRMWInst::FIRST_BINOP; + op <= AtomicRMWInst::LAST_BINOP; ++op) { + TsanAtomicRMW[op][i] = NULL; + const char *NamePart = NULL; + if (op == AtomicRMWInst::Xchg) + NamePart = "_exchange"; + else if (op == AtomicRMWInst::Add) + NamePart = "_fetch_add"; + else if (op == AtomicRMWInst::Sub) + NamePart = "_fetch_sub"; + else if (op == AtomicRMWInst::And) + NamePart = "_fetch_and"; + else if (op == AtomicRMWInst::Or) + NamePart = "_fetch_or"; + else if (op == AtomicRMWInst::Xor) + NamePart = "_fetch_xor"; + else + continue; + SmallString<32> RMWName("__tsan_atomic" + itostr(BitSize) + NamePart); + TsanAtomicRMW[op][i] = checkInterfaceFunction(M.getOrInsertFunction( + RMWName, Ty, PtrTy, Ty, OrdTy, NULL)); + } + + SmallString<32> AtomicCASName("__tsan_atomic" + itostr(BitSize) + + "_compare_exchange_val"); + TsanAtomicCAS[i] = checkInterfaceFunction(M.getOrInsertFunction( + AtomicCASName, Ty, PtrTy, Ty, Ty, OrdTy, NULL)); } TsanVptrUpdate = checkInterfaceFunction(M.getOrInsertFunction( "__tsan_vptr_update", IRB.getVoidTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), NULL)); + TsanAtomicThreadFence = checkInterfaceFunction(M.getOrInsertFunction( + "__tsan_atomic_thread_fence", IRB.getVoidTy(), OrdTy, NULL)); + TsanAtomicSignalFence = checkInterfaceFunction(M.getOrInsertFunction( + "__tsan_atomic_signal_fence", IRB.getVoidTy(), OrdTy, NULL)); return true; } @@ -253,8 +289,8 @@ static bool isAtomic(Instruction *I) { return true; if (isa<AtomicCmpXchgInst>(I)) return true; - if (FenceInst *FI = dyn_cast<FenceInst>(I)) - return FI->getSynchScope() == CrossThread; + if (isa<FenceInst>(I)) + return true; return false; } @@ -354,15 +390,14 @@ static ConstantInt *createOrdering(IRBuilder<> *IRB, AtomicOrdering ord) { switch (ord) { case NotAtomic: assert(false); case Unordered: // Fall-through. - case Monotonic: v = 1 << 0; break; - // case Consume: v = 1 << 1; break; // Not specified yet. - case Acquire: v = 1 << 2; break; - case Release: v = 1 << 3; break; - case AcquireRelease: v = 1 << 4; break; - case SequentiallyConsistent: v = 1 << 5; break; + case Monotonic: v = 0; break; + // case Consume: v = 1; break; // Not specified yet. + case Acquire: v = 2; break; + case Release: v = 3; break; + case AcquireRelease: v = 4; break; + case SequentiallyConsistent: v = 5; break; } - // +100500 is temporal to migrate to new enum values. - return IRB->getInt32(v + 100500); + return IRB->getInt32(v); } bool ThreadSanitizer::instrumentAtomic(Instruction *I) { @@ -397,12 +432,44 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I) { CallInst *C = CallInst::Create(TsanAtomicStore[Idx], ArrayRef<Value*>(Args)); ReplaceInstWithInst(I, C); - } else if (isa<AtomicRMWInst>(I)) { - // FIXME: Not yet supported. - } else if (isa<AtomicCmpXchgInst>(I)) { - // FIXME: Not yet supported. - } else if (isa<FenceInst>(I)) { - // FIXME: Not yet supported. + } else if (AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I)) { + Value *Addr = RMWI->getPointerOperand(); + int Idx = getMemoryAccessFuncIndex(Addr); + if (Idx < 0) + return false; + Function *F = TsanAtomicRMW[RMWI->getOperation()][Idx]; + if (F == NULL) + return false; + const size_t ByteSize = 1 << Idx; + const size_t BitSize = ByteSize * 8; + Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize); + Type *PtrTy = Ty->getPointerTo(); + Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy), + IRB.CreateIntCast(RMWI->getValOperand(), Ty, false), + createOrdering(&IRB, RMWI->getOrdering())}; + CallInst *C = CallInst::Create(F, ArrayRef<Value*>(Args)); + ReplaceInstWithInst(I, C); + } else if (AtomicCmpXchgInst *CASI = dyn_cast<AtomicCmpXchgInst>(I)) { + Value *Addr = CASI->getPointerOperand(); + int Idx = getMemoryAccessFuncIndex(Addr); + if (Idx < 0) + return false; + const size_t ByteSize = 1 << Idx; + const size_t BitSize = ByteSize * 8; + Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize); + Type *PtrTy = Ty->getPointerTo(); + Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy), + IRB.CreateIntCast(CASI->getCompareOperand(), Ty, false), + IRB.CreateIntCast(CASI->getNewValOperand(), Ty, false), + createOrdering(&IRB, CASI->getOrdering())}; + CallInst *C = CallInst::Create(TsanAtomicCAS[Idx], ArrayRef<Value*>(Args)); + ReplaceInstWithInst(I, C); + } else if (FenceInst *FI = dyn_cast<FenceInst>(I)) { + Value *Args[] = {createOrdering(&IRB, FI->getOrdering())}; + Function *F = FI->getSynchScope() == SingleThread ? + TsanAtomicSignalFence : TsanAtomicThreadFence; + CallInst *C = CallInst::Create(F, ArrayRef<Value*>(Args)); + ReplaceInstWithInst(I, C); } return true; } diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 7d652dea48..17d07cdb2d 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -99,243 +99,7 @@ static bool CallHasFloatingPointArgument(const CallInst *CI) { return false; } -/// IsOnlyUsedInEqualityComparison - Return true if it is only used in equality -/// comparisons with With. -static bool IsOnlyUsedInEqualityComparison(Value *V, Value *With) { - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); - UI != E; ++UI) { - if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI)) - if (IC->isEquality() && IC->getOperand(1) == With) - continue; - // Unknown instruction. - return false; - } - return true; -} - -//===----------------------------------------------------------------------===// -// String and Memory LibCall Optimizations -//===----------------------------------------------------------------------===// - namespace { -//===---------------------------------------===// -// 'strcspn' Optimizations - -struct StrCSpnOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 2 || - FT->getParamType(0) != B.getInt8PtrTy() || - FT->getParamType(1) != FT->getParamType(0) || - !FT->getReturnType()->isIntegerTy()) - return 0; - - StringRef S1, S2; - bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1); - bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2); - - // strcspn("", s) -> 0 - if (HasS1 && S1.empty()) - return Constant::getNullValue(CI->getType()); - - // Constant folding. - if (HasS1 && HasS2) { - size_t Pos = S1.find_first_of(S2); - if (Pos == StringRef::npos) Pos = S1.size(); - return ConstantInt::get(CI->getType(), Pos); - } - - // strcspn(s, "") -> strlen(s) - if (TD && HasS2 && S2.empty()) - return EmitStrLen(CI->getArgOperand(0), B, TD, TLI); - - return 0; - } -}; - -//===---------------------------------------===// -// 'strstr' Optimizations - -struct StrStrOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 2 || - !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isPointerTy() || - !FT->getReturnType()->isPointerTy()) - return 0; - - // fold strstr(x, x) -> x. - if (CI->getArgOperand(0) == CI->getArgOperand(1)) - return B.CreateBitCast(CI->getArgOperand(0), CI->getType()); - - // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0 - if (TD && IsOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) { - Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, TD, TLI); - if (!StrLen) - return 0; - Value *StrNCmp = EmitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1), - StrLen, B, TD, TLI); - if (!StrNCmp) - return 0; - for (Value::use_iterator UI = CI->use_begin(), UE = CI->use_end(); - UI != UE; ) { - ICmpInst *Old = cast<ICmpInst>(*UI++); - Value *Cmp = B.CreateICmp(Old->getPredicate(), StrNCmp, - ConstantInt::getNullValue(StrNCmp->getType()), - "cmp"); - Old->replaceAllUsesWith(Cmp); - Old->eraseFromParent(); - } - return CI; - } - - // See if either input string is a constant string. - StringRef SearchStr, ToFindStr; - bool HasStr1 = getConstantStringInfo(CI->getArgOperand(0), SearchStr); - bool HasStr2 = getConstantStringInfo(CI->getArgOperand(1), ToFindStr); - - // fold strstr(x, "") -> x. - if (HasStr2 && ToFindStr.empty()) - return B.CreateBitCast(CI->getArgOperand(0), CI->getType()); - - // If both strings are known, constant fold it. - if (HasStr1 && HasStr2) { - std::string::size_type Offset = SearchStr.find(ToFindStr); - - if (Offset == StringRef::npos) // strstr("foo", "bar") -> null - return Constant::getNullValue(CI->getType()); - - // strstr("abcd", "bc") -> gep((char*)"abcd", 1) - Value *Result = CastToCStr(CI->getArgOperand(0), B); - Result = B.CreateConstInBoundsGEP1_64(Result, Offset, "strstr"); - return B.CreateBitCast(Result, CI->getType()); - } - - // fold strstr(x, "y") -> strchr(x, 'y'). - if (HasStr2 && ToFindStr.size() == 1) { - Value *StrChr= EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TD, TLI); - return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : 0; - } - return 0; - } -}; - - -//===---------------------------------------===// -// 'memcmp' Optimizations - -struct MemCmpOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 3 || !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isPointerTy() || - !FT->getReturnType()->isIntegerTy(32)) - return 0; - - Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1); - - if (LHS == RHS) // memcmp(s,s,x) -> 0 - return Constant::getNullValue(CI->getType()); - - // Make sure we have a constant length. - ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2)); - if (!LenC) return 0; - uint64_t Len = LenC->getZExtValue(); - - if (Len == 0) // memcmp(s1,s2,0) -> 0 - return Constant::getNullValue(CI->getType()); - - // memcmp(S1,S2,1) -> *(unsigned char*)LHS - *(unsigned char*)RHS - if (Len == 1) { - Value *LHSV = B.CreateZExt(B.CreateLoad(CastToCStr(LHS, B), "lhsc"), - CI->getType(), "lhsv"); - Value *RHSV = B.CreateZExt(B.CreateLoad(CastToCStr(RHS, B), "rhsc"), - CI->getType(), "rhsv"); - return B.CreateSub(LHSV, RHSV, "chardiff"); - } - - // Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant) - StringRef LHSStr, RHSStr; - if (getConstantStringInfo(LHS, LHSStr) && - getConstantStringInfo(RHS, RHSStr)) { - // Make sure we're not reading out-of-bounds memory. - if (Len > LHSStr.size() || Len > RHSStr.size()) - return 0; - uint64_t Ret = memcmp(LHSStr.data(), RHSStr.data(), Len); - return ConstantInt::get(CI->getType(), Ret); - } - - return 0; - } -}; - -//===---------------------------------------===// -// 'memcpy' Optimizations - -struct MemCpyOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - // These optimizations require DataLayout. - if (!TD) return 0; - - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || - !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isPointerTy() || - FT->getParamType(2) != TD->getIntPtrType(*Context)) - return 0; - - // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1) - B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), 1); - return CI->getArgOperand(0); - } -}; - -//===---------------------------------------===// -// 'memmove' Optimizations - -struct MemMoveOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - // These optimizations require DataLayout. - if (!TD) return 0; - - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || - !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isPointerTy() || - FT->getParamType(2) != TD->getIntPtrType(*Context)) - return 0; - - // memmove(x, y, n) -> llvm.memmove(x, y, n, 1) - B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), 1); - return CI->getArgOperand(0); - } -}; - -//===---------------------------------------===// -// 'memset' Optimizations - -struct MemSetOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - // These optimizations require DataLayout. - if (!TD) return 0; - - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || - !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isIntegerTy() || - FT->getParamType(2) != TD->getIntPtrType(*Context)) - return 0; - - // memset(p, v, n) -> llvm.memset(p, v, n, 1) - Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false); - B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1); - return CI->getArgOperand(0); - } -}; - //===----------------------------------------------------------------------===// // Math Library Optimizations //===----------------------------------------------------------------------===// @@ -1004,9 +768,6 @@ namespace { TargetLibraryInfo *TLI; StringMap<LibCallOptimization*> Optimizations; - // String and Memory LibCall Optimizations - StrCSpnOpt StrCSpn; StrStrOpt StrStr; - MemCmpOpt MemCmp; MemCpyOpt MemCpy; MemMoveOpt MemMove; MemSetOpt MemSet; // Math Library Optimizations CosOpt Cos; PowOpt Pow; Exp2Opt Exp2; UnaryDoubleFPOpt UnaryDoubleFP, UnsafeUnaryDoubleFP; @@ -1072,14 +833,6 @@ void SimplifyLibCalls::AddOpt(LibFunc::Func F1, LibFunc::Func F2, /// Optimizations - Populate the Optimizations map with all the optimizations /// we know. void SimplifyLibCalls::InitOptimizations() { - // String and Memory LibCall Optimizations - Optimizations["strcspn"] = &StrCSpn; - Optimizations["strstr"] = &StrStr; - Optimizations["memcmp"] = &MemCmp; - AddOpt(LibFunc::memcpy, &MemCpy); - Optimizations["memmove"] = &MemMove; - AddOpt(LibFunc::memset, &MemSet); - // Math Library Optimizations Optimizations["cosf"] = &Cos; Optimizations["cos"] = &Cos; diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index 64c7011660..c3ea63852f 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -34,6 +34,7 @@ protected: Function *Caller; const DataLayout *TD; const TargetLibraryInfo *TLI; + const LibCallSimplifier *LCS; LLVMContext* Context; public: LibCallOptimization() { } @@ -48,10 +49,12 @@ public: =0; Value *optimizeCall(CallInst *CI, const DataLayout *TD, - const TargetLibraryInfo *TLI, IRBuilder<> &B) { + const TargetLibraryInfo *TLI, + const LibCallSimplifier *LCS, IRBuilder<> &B) { Caller = CI->getParent()->getParent(); this->TD = TD; this->TLI = TLI; + this->LCS = LCS; if (CI->getCalledFunction()) Context = &CI->getCalledFunction()->getContext(); @@ -83,6 +86,20 @@ static bool isOnlyUsedInZeroEqualityComparison(Value *V) { return true; } +/// isOnlyUsedInEqualityComparison - Return true if it is only used in equality +/// comparisons with With. +static bool isOnlyUsedInEqualityComparison(Value *V, Value *With) { + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); + UI != E; ++UI) { + if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI)) + if (IC->isEquality() && IC->getOperand(1) == With) + continue; + // Unknown instruction. + return false; + } + return true; +} + //===----------------------------------------------------------------------===// // Fortified Library Call Optimizations //===----------------------------------------------------------------------===// @@ -801,6 +818,204 @@ struct StrSpnOpt : public LibCallOptimization { } }; +struct StrCSpnOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + FT->getParamType(0) != B.getInt8PtrTy() || + FT->getParamType(1) != FT->getParamType(0) || + !FT->getReturnType()->isIntegerTy()) + return 0; + + StringRef S1, S2; + bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1); + bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2); + + // strcspn("", s) -> 0 + if (HasS1 && S1.empty()) + return Constant::getNullValue(CI->getType()); + + // Constant folding. + if (HasS1 && HasS2) { + size_t Pos = S1.find_first_of(S2); + if (Pos == StringRef::npos) Pos = S1.size(); + return ConstantInt::get(CI->getType(), Pos); + } + + // strcspn(s, "") -> strlen(s) + if (TD && HasS2 && S2.empty()) + return EmitStrLen(CI->getArgOperand(0), B, TD, TLI); + + return 0; + } +}; + +struct StrStrOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + !FT->getReturnType()->isPointerTy()) + return 0; + + // fold strstr(x, x) -> x. + if (CI->getArgOperand(0) == CI->getArgOperand(1)) + return B.CreateBitCast(CI->getArgOperand(0), CI->getType()); + + // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0 + if (TD && isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) { + Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, TD, TLI); + if (!StrLen) + return 0; + Value *StrNCmp = EmitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1), + StrLen, B, TD, TLI); + if (!StrNCmp) + return 0; + for (Value::use_iterator UI = CI->use_begin(), UE = CI->use_end(); + UI != UE; ) { + ICmpInst *Old = cast<ICmpInst>(*UI++); + Value *Cmp = B.CreateICmp(Old->getPredicate(), StrNCmp, + ConstantInt::getNullValue(StrNCmp->getType()), + "cmp"); + LCS->replaceAllUsesWith(Old, Cmp); + } + return CI; + } + + // See if either input string is a constant string. + StringRef SearchStr, ToFindStr; + bool HasStr1 = getConstantStringInfo(CI->getArgOperand(0), SearchStr); + bool HasStr2 = getConstantStringInfo(CI->getArgOperand(1), ToFindStr); + + // fold strstr(x, "") -> x. + if (HasStr2 && ToFindStr.empty()) + return B.CreateBitCast(CI->getArgOperand(0), CI->getType()); + + // If both strings are known, constant fold it. + if (HasStr1 && HasStr2) { + std::string::size_type Offset = SearchStr.find(ToFindStr); + + if (Offset == StringRef::npos) // strstr("foo", "bar") -> null + return Constant::getNullValue(CI->getType()); + + // strstr("abcd", "bc") -> gep((char*)"abcd", 1) + Value *Result = CastToCStr(CI->getArgOperand(0), B); + Result = B.CreateConstInBoundsGEP1_64(Result, Offset, "strstr"); + return B.CreateBitCast(Result, CI->getType()); + } + + // fold strstr(x, "y") -> strchr(x, 'y'). + if (HasStr2 && ToFindStr.size() == 1) { + Value *StrChr= EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TD, TLI); + return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : 0; + } + return 0; + } +}; + +struct MemCmpOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + !FT->getReturnType()->isIntegerTy(32)) + return 0; + + Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1); + + if (LHS == RHS) // memcmp(s,s,x) -> 0 + return Constant::getNullValue(CI->getType()); + + // Make sure we have a constant length. + ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2)); + if (!LenC) return 0; + uint64_t Len = LenC->getZExtValue(); + + if (Len == 0) // memcmp(s1,s2,0) -> 0 + return Constant::getNullValue(CI->getType()); + + // memcmp(S1,S2,1) -> *(unsigned char*)LHS - *(unsigned char*)RHS + if (Len == 1) { + Value *LHSV = B.CreateZExt(B.CreateLoad(CastToCStr(LHS, B), "lhsc"), + CI->getType(), "lhsv"); + Value *RHSV = B.CreateZExt(B.CreateLoad(CastToCStr(RHS, B), "rhsc"), + CI->getType(), "rhsv"); + return B.CreateSub(LHSV, RHSV, "chardiff"); + } + + // Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant) + StringRef LHSStr, RHSStr; + if (getConstantStringInfo(LHS, LHSStr) && + getConstantStringInfo(RHS, RHSStr)) { + // Make sure we're not reading out-of-bounds memory. + if (Len > LHSStr.size() || Len > RHSStr.size()) + return 0; + uint64_t Ret = memcmp(LHSStr.data(), RHSStr.data(), Len); + return ConstantInt::get(CI->getType(), Ret); + } + + return 0; + } +}; + +struct MemCpyOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // These optimizations require DataLayout. + if (!TD) return 0; + + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + FT->getParamType(2) != TD->getIntPtrType(*Context)) + return 0; + + // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1) + B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), 1); + return CI->getArgOperand(0); + } +}; + +struct MemMoveOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // These optimizations require DataLayout. + if (!TD) return 0; + + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + FT->getParamType(2) != TD->getIntPtrType(*Context)) + return 0; + + // memmove(x, y, n) -> llvm.memmove(x, y, n, 1) + B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), 1); + return CI->getArgOperand(0); + } +}; + +struct MemSetOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // These optimizations require DataLayout. + if (!TD) return 0; + + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isIntegerTy() || + FT->getParamType(2) != TD->getIntPtrType(*Context)) + return 0; + + // memset(p, v, n) -> llvm.memset(p, v, n, 1) + Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false); + B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1); + return CI->getArgOperand(0); + } +}; + } // End anonymous namespace. namespace llvm { @@ -808,6 +1023,7 @@ namespace llvm { class LibCallSimplifierImpl { const DataLayout *TD; const TargetLibraryInfo *TLI; + const LibCallSimplifier *LCS; StringMap<LibCallOptimization*> Optimizations; // Fortified library call optimizations. @@ -818,7 +1034,7 @@ class LibCallSimplifierImpl { StpCpyChkOpt StpCpyChk; StrNCpyChkOpt StrNCpyChk; - // String and memory library call optimizations. + // String library call optimizations. StrCatOpt StrCat; StrNCatOpt StrNCat; StrChrOpt StrChr; @@ -832,12 +1048,23 @@ class LibCallSimplifierImpl { StrPBrkOpt StrPBrk; StrToOpt StrTo; StrSpnOpt StrSpn; + StrCSpnOpt StrCSpn; + StrStrOpt StrStr; + + // Memory library call optimizations. + MemCmpOpt MemCmp; + MemCpyOpt MemCpy; + MemMoveOpt MemMove; + MemSetOpt MemSet; void initOptimizations(); + void addOpt(LibFunc::Func F, LibCallOptimization* Opt); public: - LibCallSimplifierImpl(const DataLayout *TD, const TargetLibraryInfo *TLI) { + LibCallSimplifierImpl(const DataLayout *TD, const TargetLibraryInfo *TLI, + const LibCallSimplifier *LCS) { this->TD = TD; this->TLI = TLI; + this->LCS = LCS; } Value *optimizeCall(CallInst *CI); @@ -853,26 +1080,34 @@ void LibCallSimplifierImpl::initOptimizations() { Optimizations["__strncpy_chk"] = &StrNCpyChk; Optimizations["__stpncpy_chk"] = &StrNCpyChk; - // String and memory library call optimizations. - Optimizations["strcat"] = &StrCat; - Optimizations["strncat"] = &StrNCat; - Optimizations["strchr"] = &StrChr; - Optimizations["strrchr"] = &StrRChr; - Optimizations["strcmp"] = &StrCmp; - Optimizations["strncmp"] = &StrNCmp; - Optimizations["strcpy"] = &StrCpy; - Optimizations["stpcpy"] = &StpCpy; - Optimizations["strncpy"] = &StrNCpy; - Optimizations["strlen"] = &StrLen; - Optimizations["strpbrk"] = &StrPBrk; - Optimizations["strtol"] = &StrTo; - Optimizations["strtod"] = &StrTo; - Optimizations["strtof"] = &StrTo; - Optimizations["strtoul"] = &StrTo; - Optimizations["strtoll"] = &StrTo; - Optimizations["strtold"] = &StrTo; - Optimizations["strtoull"] = &StrTo; - Optimizations["strspn"] = &StrSpn; + // String library call optimizations. + addOpt(LibFunc::strcat, &StrCat); + addOpt(LibFunc::strncat, &StrNCat); + addOpt(LibFunc::strchr, &StrChr); + addOpt(LibFunc::strrchr, &StrRChr); + addOpt(LibFunc::strcmp, &StrCmp); + addOpt(LibFunc::strncmp, &StrNCmp); + addOpt(LibFunc::strcpy, &StrCpy); + addOpt(LibFunc::stpcpy, &StpCpy); + addOpt(LibFunc::strncpy, &StrNCpy); + addOpt(LibFunc::strlen, &StrLen); + addOpt(LibFunc::strpbrk, &StrPBrk); + addOpt(LibFunc::strtol, &StrTo); + addOpt(LibFunc::strtod, &StrTo); + addOpt(LibFunc::strtof, &StrTo); + addOpt(LibFunc::strtoul, &StrTo); + addOpt(LibFunc::strtoll, &StrTo); + addOpt(LibFunc::strtold, &StrTo); + addOpt(LibFunc::strtoull, &StrTo); + addOpt(LibFunc::strspn, &StrSpn); + addOpt(LibFunc::strcspn, &StrCSpn); + addOpt(LibFunc::strstr, &StrStr); + + // Memory library call optimizations. + addOpt(LibFunc::memcmp, &MemCmp); + addOpt(LibFunc::memcpy, &MemCpy); + addOpt(LibFunc::memmove, &MemMove); + addOpt(LibFunc::memset, &MemSet); } Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) { @@ -883,14 +1118,19 @@ Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) { LibCallOptimization *LCO = Optimizations.lookup(Callee->getName()); if (LCO) { IRBuilder<> Builder(CI); - return LCO->optimizeCall(CI, TD, TLI, Builder); + return LCO->optimizeCall(CI, TD, TLI, LCS, Builder); } return 0; } +void LibCallSimplifierImpl::addOpt(LibFunc::Func F, LibCallOptimization* Opt) { + if (TLI->has(F)) + Optimizations[TLI->getName(F)] = Opt; +} + LibCallSimplifier::LibCallSimplifier(const DataLayout *TD, const TargetLibraryInfo *TLI) { - Impl = new LibCallSimplifierImpl(TD, TLI); + Impl = new LibCallSimplifierImpl(TD, TLI, this); } LibCallSimplifier::~LibCallSimplifier() { @@ -901,4 +1141,9 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) { return Impl->optimizeCall(CI); } +void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) const { + I->replaceAllUsesWith(With); + I->eraseFromParent(); +} + } diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 892808760f..a7ef248e6e 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -78,6 +78,10 @@ VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden, /// We don't vectorize loops with a known constant trip count below this number. const unsigned TinyTripCountThreshold = 16; +/// When performing a runtime memory check, do not check more than this +/// number of pointers. Notice that the check is quadratic! +const unsigned RuntimeMemoryCheckThreshold = 2; + namespace { // Forward declarations. @@ -114,7 +118,7 @@ public: /// Widen each instruction in the old loop to a new one in the new loop. /// Use the Legality module to find the induction and reduction variables. vectorizeLoop(Legal); - // register the new loop. + // Register the new loop and update the analysis passes. updateAnalysis(); } @@ -123,7 +127,8 @@ private: void createEmptyLoop(LoopVectorizationLegality *Legal); /// Copy and widen the instructions from the old loop. void vectorizeLoop(LoopVectorizationLegality *Legal); - /// Insert the new loop to the loop hierarchy and pass manager. + /// Insert the new loop to the loop hierarchy and pass manager + /// and update the analysis passes. void updateAnalysis(); /// This instruction is un-vectorizable. Implement it as a sequence @@ -242,6 +247,15 @@ public: ReductionKind Kind; }; + // This POD struct holds information about the memory runtime legality + // check that a group of pointers do not overlap. + struct RuntimePointerCheck { + /// This flag indicates if we need to add the runtime check. + bool Need; + /// Holds the pointers that we need to check. + SmallVector<Value*, 2> Pointers; + }; + /// ReductionList contains the reduction descriptors for all /// of the reductions that were found in the loop. typedef DenseMap<PHINode*, ReductionDescriptor> ReductionList; @@ -263,9 +277,14 @@ public: /// This check allows us to vectorize A[idx] into a wide load/store. bool isConsecutiveGep(Value *Ptr); + /// Returns true if the value V is uniform within the loop. + bool isUniform(Value *V); + /// Returns true if this instruction will remain scalar after vectorization. bool isUniformAfterVectorization(Instruction* I) {return Uniforms.count(I);} + /// Returns the information that we collected about runtime memory check. + RuntimePointerCheck *getRuntimePointerCheck() {return &PtrRtCheck; } private: /// Check if a single basic block loop is vectorizable. /// At this point we know that this is a loop with a constant trip count @@ -286,6 +305,8 @@ private: bool isReductionInstr(Instruction *I, ReductionKind Kind); /// Returns True, if 'Phi' is an induction variable. bool isInductionVariable(PHINode *Phi); + /// Return true if can compute the address bounds of Ptr within the loop. + bool hasComputableBounds(Value *Ptr); /// The loop that we evaluate. Loop *TheLoop; @@ -306,6 +327,9 @@ private: /// This set holds the variables which are known to be uniform after /// vectorization. SmallPtrSet<Instruction*, 4> Uniforms; + /// We need to check that all of the pointers in this list are disjoint + /// at runtime. + RuntimePointerCheck PtrRtCheck; }; /// LoopVectorizationCostModel - estimates the expected speedups due to @@ -506,6 +530,10 @@ bool LoopVectorizationLegality::isConsecutiveGep(Value *Ptr) { return false; } +bool LoopVectorizationLegality::isUniform(Value *V) { + return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop)); +} + Value *SingleBlockLoopVectorizer::getVectorValue(Value *V) { assert(!V->getType()->isVectorTy() && "Can't widen a vector"); // If we saved a vectorized copy of V, use it. @@ -631,13 +659,29 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { ... */ + OldInduction = Legal->getInduction(); + assert(OldInduction && "We must have a single phi node."); + Type *IdxTy = OldInduction->getType(); + + // Find the loop boundaries. + const SCEV *ExitCount = SE->getExitCount(OrigLoop, OrigLoop->getHeader()); + assert(ExitCount != SE->getCouldNotCompute() && "Invalid loop count"); + + // Get the total trip count from the count by adding 1. + ExitCount = SE->getAddExpr(ExitCount, + SE->getConstant(ExitCount->getType(), 1)); + // We may need to extend the index in case there is a type mismatch. + // We know that the count starts at zero and does not overflow. + // We are using Zext because it should be less expensive. + if (ExitCount->getType() != IdxTy) + ExitCount = SE->getZeroExtendExpr(ExitCount, IdxTy); + // This is the original scalar-loop preheader. BasicBlock *BypassBlock = OrigLoop->getLoopPreheader(); BasicBlock *ExitBlock = OrigLoop->getExitBlock(); assert(ExitBlock && "Must have an exit block"); // The loop index does not have to start at Zero. It starts with this value. - OldInduction = Legal->getInduction(); Value *StartIdx = OldInduction->getIncomingValueForBlock(BypassBlock); assert(OrigLoop->getNumBlocks() == 1 && "Invalid loop"); @@ -655,8 +699,6 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { "scalar.preheader"); // Find the induction variable. BasicBlock *OldBasicBlock = OrigLoop->getHeader(); - assert(OldInduction && "We must have a single phi node."); - Type *IdxTy = OldInduction->getType(); // Use this IR builder to create the loop instructions (Phi, Br, Cmp) // inside the loop. @@ -666,25 +708,11 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { Induction = Builder.CreatePHI(IdxTy, 2, "index"); Constant *Step = ConstantInt::get(IdxTy, VF); - // Find the loop boundaries. - const SCEV *ExitCount = SE->getExitCount(OrigLoop, OrigLoop->getHeader()); - assert(ExitCount != SE->getCouldNotCompute() && "Invalid loop count"); - - // Get the total trip count from the count by adding 1. - ExitCount = SE->getAddExpr(ExitCount, - SE->getConstant(ExitCount->getType(), 1)); - // Expand the trip count and place the new instructions in the preheader. // Notice that the pre-header does not change, only the loop body. SCEVExpander Exp(*SE, "induction"); Instruction *Loc = BypassBlock->getTerminator(); - // We may need to extend the index in case there is a type mismatch. - // We know that the count starts at zero and does not overflow. - // We are using Zext because it should be less expensive. - if (ExitCount->getType() != Induction->getType()) - ExitCount = SE->getZeroExtendExpr(ExitCount, IdxTy); - // Count holds the overall loop count (N). Value *Count = Exp.expandCodeFor(ExitCount, Induction->getType(), Loc); @@ -704,15 +732,85 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { IdxEndRoundDown, StartIdx, "cmp.zero", Loc); + + LoopVectorizationLegality::RuntimePointerCheck *PtrRtCheck = + Legal->getRuntimePointerCheck(); + Value *MemoryRuntimeCheck = 0; + if (PtrRtCheck->Need) { + unsigned NumPointers = PtrRtCheck->Pointers.size(); + SmallVector<Value* , 2> Starts; + SmallVector<Value* , 2> Ends; + + // Use this type for pointer arithmetic. + Type* PtrArithTy = PtrRtCheck->Pointers[0]->getType(); + + for (unsigned i=0; i < NumPointers; ++i) { + Value *Ptr = PtrRtCheck->Pointers[i]; + const SCEV *Sc = SE->getSCEV(Ptr); + + if (SE->isLoopInvariant(Sc, OrigLoop)) { + DEBUG(dbgs() << "LV1: Adding RT check for a loop invariant ptr:" << + *Ptr <<"\n"); + Starts.push_back(Ptr); + Ends.push_back(Ptr); + } else { + DEBUG(dbgs() << "LV: Adding RT check for range:" << *Ptr <<"\n"); + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc); + Value *Start = Exp.expandCodeFor(AR->getStart(), PtrArithTy, Loc); + const SCEV *Ex = SE->getExitCount(OrigLoop, OrigLoop->getHeader()); + const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE); + assert(!isa<SCEVCouldNotCompute>(ScEnd) && "Invalid scev range."); + Value *End = Exp.expandCodeFor(ScEnd, PtrArithTy, Loc); + Starts.push_back(Start); + Ends.push_back(End); + } + } + + for (unsigned i=0; i < NumPointers; ++i) { + for (unsigned j=i+1; j < NumPointers; ++j) { + Value *Cmp0 = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULE, + Starts[0], Ends[1], "bound0", Loc); + Value *Cmp1 = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULE, + Starts[1], Ends[0], "bound1", Loc); + Value *IsConflict = BinaryOperator::Create(Instruction::And, Cmp0, Cmp1, + "found.conflict", Loc); + if (MemoryRuntimeCheck) { + MemoryRuntimeCheck = BinaryOperator::Create(Instruction::Or, + MemoryRuntimeCheck, + IsConflict, + "conflict.rdx", Loc); + } else { + MemoryRuntimeCheck = IsConflict; + } + } + } + }// end of need-runtime-check code. + + // If we are using memory runtime checks, include them in. + if (MemoryRuntimeCheck) { + Cmp = BinaryOperator::Create(Instruction::Or, Cmp, MemoryRuntimeCheck, + "CntOrMem", Loc); + } + BranchInst::Create(MiddleBlock, VectorPH, Cmp, Loc); // Remove the old terminator. Loc->eraseFromParent(); + // We are going to resume the execution of the scalar loop. + // This PHI decides on what number to start. If we come from the + // vector loop then we need to start with the end index minus the + // index modulo VF. If we come from a bypass edge then we need to start + // from the real start. + PHINode* ResumeIndex = PHINode::Create(IdxTy, 2, "resume.idx", + MiddleBlock->getTerminator()); + ResumeIndex->addIncoming(StartIdx, BypassBlock); + ResumeIndex->addIncoming(IdxEndRoundDown, VecBody); + // Add a check in the middle block to see if we have completed // all of the iterations in the first vector loop. // If (N - N%VF) == N, then we *don't* need to run the remainder. Value *CmpN = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, IdxEnd, - IdxEndRoundDown, "cmp.n", + ResumeIndex, "cmp.n", MiddleBlock->getTerminator()); BranchInst::Create(ExitBlock, ScalarPH, CmpN, MiddleBlock->getTerminator()); @@ -732,7 +830,7 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { // Fix the scalar body iteration count. unsigned BlockIdx = OldInduction->getBasicBlockIndex(ScalarPH); - OldInduction->setIncomingValue(BlockIdx, IdxEndRoundDown); + OldInduction->setIncomingValue(BlockIdx, ResumeIndex); // Get ready to start creating new instructions into the vectorized body. Builder.SetInsertPoint(VecBody->getFirstInsertionPt()); @@ -905,7 +1003,12 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { Type *StTy = VectorType::get(SI->getValueOperand()->getType(), VF); Value *Ptr = SI->getPointerOperand(); unsigned Alignment = SI->getAlignment(); + + assert(!Legal->isUniform(Ptr) && + "We do not allow storing to uniform addresses"); + GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr); + // This store does not use GEPs. if (!Legal->isConsecutiveGep(Gep)) { scalarizeInstruction(Inst); @@ -935,8 +1038,9 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { unsigned Alignment = LI->getAlignment(); GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr); - // We don't have a gep. Scalarize the load. - if (!Legal->isConsecutiveGep(Gep)) { + // If we don't have a gep, or that the pointer is loop invariant, + // scalarize the load. + if (!Gep || Legal->isUniform(Gep) || !Legal->isConsecutiveGep(Gep)) { scalarizeInstruction(Inst); break; } @@ -1146,12 +1250,6 @@ bool LoopVectorizationLegality::canVectorize() { BasicBlock *BB = TheLoop->getHeader(); DEBUG(dbgs() << "LV: Found a loop: " << BB->getName() << "\n"); - // Go over each instruction and look at memory deps. - if (!canVectorizeBlock(*BB)) { - DEBUG(dbgs() << "LV: Can't vectorize this loop header\n"); - return false; - } - // ScalarEvolution needs to be able to find the exit count. const SCEV *ExitCount = SE->getExitCount(TheLoop, BB); if (ExitCount == SE->getCouldNotCompute()) { @@ -1167,7 +1265,15 @@ bool LoopVectorizationLegality::canVectorize() { return false; } - DEBUG(dbgs() << "LV: We can vectorize this loop!\n"); + // Go over each instruction and look at memory deps. + if (!canVectorizeBlock(*BB)) { + DEBUG(dbgs() << "LV: Can't vectorize this loop header\n"); + return false; + } + + DEBUG(dbgs() << "LV: We can vectorize this loop" << + (PtrRtCheck.Need ? " (with a runtime bound check)" : "") + <<"!\n"); // Okay! We can vectorize. At this point we don't have any other mem analysis // which may limit our maximum vectorization factor, so just return true with @@ -1304,6 +1410,8 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) { // Holds the Load and Store *instructions*. ValueVector Loads; ValueVector Stores; + PtrRtCheck.Pointers.clear(); + PtrRtCheck.Need = false; // Scan the BB and collect legal loads and stores. for (BasicBlock::iterator it = BB.begin(), e = BB.end(); it != e; ++it) { @@ -1361,6 +1469,12 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) { StoreInst *ST = dyn_cast<StoreInst>(*I); assert(ST && "Bad StoreInst"); Value* Ptr = ST->getPointerOperand(); + + if (isUniform(Ptr)) { + DEBUG(dbgs() << "LV: We don't allow storing to uniform addresses\n"); + return false; + } + // If we did *not* see this pointer before, insert it to // the read-write list. At this phase it is only a 'write' list. if (Seen.insert(Ptr)) @@ -1390,6 +1504,39 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) { return true; } + // Find pointers with computable bounds. We are going to use this information + // to place a runtime bound check. + bool RT = true; + for (I = ReadWrites.begin(), IE = ReadWrites.end(); I != IE; ++I) + if (hasComputableBounds(*I)) { + PtrRtCheck.Pointers.push_back(*I); + DEBUG(dbgs() << "LV: Found a runtime check ptr:" << **I <<"\n"); + } else { + RT = false; + break; + } + for (I = Reads.begin(), IE = Reads.end(); I != IE; ++I) + if (hasComputableBounds(*I)) { + PtrRtCheck.Pointers.push_back(*I); + DEBUG(dbgs() << "LV: Found a runtime check ptr:" << **I <<"\n"); + } else { + RT = false; + break; + } + + // Check that we did not collect too many pointers or found a + // unsizeable pointer. + if (!RT || PtrRtCheck.Pointers.size() > RuntimeMemoryCheckThreshold) { + PtrRtCheck.Pointers.clear(); + RT = false; + } + + PtrRtCheck.Need = RT; + + if (RT) { + DEBUG(dbgs() << "LV: We can perform a memory runtime check if needed.\n"); + } + // Now that the pointers are in two lists (Reads and ReadWrites), we // can check that there are no conflicts between each of the writes and // between the writes to the reads. @@ -1404,12 +1551,12 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) { it != e; ++it) { if (!isIdentifiedObject(*it)) { DEBUG(dbgs() << "LV: Found an unidentified write ptr:"<< **it <<"\n"); - return false; + return RT; } if (!WriteObjects.insert(*it)) { DEBUG(dbgs() << "LV: Found a possible write-write reorder:" << **it <<"\n"); - return false; + return RT; } } TempObjects.clear(); @@ -1422,18 +1569,21 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) { it != e; ++it) { if (!isIdentifiedObject(*it)) { DEBUG(dbgs() << "LV: Found an unidentified read ptr:"<< **it <<"\n"); - return false; + return RT; } if (WriteObjects.count(*it)) { DEBUG(dbgs() << "LV: Found a possible read/write reorder:" << **it <<"\n"); - return false; + return RT; } } TempObjects.clear(); } - // All is okay. + // It is safe to vectorize and we don't need any runtime checks. + DEBUG(dbgs() << "LV: We don't need a runtime memory check.\n"); + PtrRtCheck.Pointers.clear(); + PtrRtCheck.Need = false; return true; } @@ -1556,6 +1706,15 @@ bool LoopVectorizationLegality::isInductionVariable(PHINode *Phi) { return true; } +bool LoopVectorizationLegality::hasComputableBounds(Value *Ptr) { + const SCEV *PhiScev = SE->getSCEV(Ptr); + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); + if (!AR) + return false; + + return AR->isAffine(); +} + unsigned LoopVectorizationCostModel::findBestVectorizationFactor(unsigned VF) { if (!VTTI) { diff --git a/test/CodeGen/ARM/2010-10-19-mc-elf-objheader.ll b/test/CodeGen/ARM/2010-10-19-mc-elf-objheader.ll index 99db63713d..36d15757c3 100644 --- a/test/CodeGen/ARM/2010-10-19-mc-elf-objheader.ll +++ b/test/CodeGen/ARM/2010-10-19-mc-elf-objheader.ll @@ -13,12 +13,12 @@ ; BASIC-NEXT: 0x00000000 ; BASIC-NEXT: 0x00000000 ; BASIC-NEXT: 0x0000003c -; BASIC-NEXT: 0x00000020 +; BASIC-NEXT: 0x00000022 ; BASIC-NEXT: 0x00000000 ; BASIC-NEXT: 0x00000000 ; BASIC-NEXT: 0x00000001 ; BASIC-NEXT: 0x00000000 -; BASIC-NEXT: '411f0000 00616561 62690001 15000000 06020801 09011401 15011703 18011901' +; BASIC-NEXT: '41210000 00616561 62690001 17000000 060a0741 08010902 14011501 17031801 1901' ; CORTEXA8: .ARM.attributes ; CORTEXA8-NEXT: 0x70000003 diff --git a/test/CodeGen/ARM/call-noret-minsize.ll b/test/CodeGen/ARM/call-noret-minsize.ll index 35490ac69b..df3c19eca6 100644 --- a/test/CodeGen/ARM/call-noret-minsize.ll +++ b/test/CodeGen/ARM/call-noret-minsize.ll @@ -1,6 +1,5 @@ ; RUN: llc < %s -mtriple=armv7-apple-ios -mcpu=cortex-a8 | FileCheck %s -check-prefix=ARM ; RUN: llc < %s -mtriple=armv7-apple-ios -mcpu=swift | FileCheck %s -check-prefix=SWIFT -; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 | FileCheck %s -check-prefix=T2 ; rdar://12348580 define void @t1() noreturn minsize nounwind ssp { @@ -10,9 +9,6 @@ entry: ; SWIFT: t1: ; SWIFT: bl _bar - -; T2: t1: -; T2: blx _bar tail call void @bar() noreturn nounwind unreachable } @@ -24,9 +20,6 @@ entry: ; SWIFT: t2: ; SWIFT: bl _t1 - -; T2: t2: -; T2: bl _t1 tail call void @t1() noreturn nounwind unreachable } diff --git a/test/CodeGen/ARM/call-noret.ll b/test/CodeGen/ARM/call-noret.ll index d294f2cf1a..27062dca38 100644 --- a/test/CodeGen/ARM/call-noret.ll +++ b/test/CodeGen/ARM/call-noret.ll @@ -1,6 +1,5 @@ ; RUN: llc < %s -mtriple=armv7-apple-ios -mcpu=cortex-a8 | FileCheck %s -check-prefix=ARM ; RUN: llc < %s -mtriple=armv7-apple-ios -mcpu=swift | FileCheck %s -check-prefix=SWIFT -; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 | FileCheck %s -check-prefix=T2 ; rdar://8979299 define void @t1() noreturn nounwind ssp { @@ -12,9 +11,6 @@ entry: ; SWIFT: t1: ; SWIFT: mov lr, pc ; SWIFT: b _bar - -; T2: t1: -; T2: blx _bar tail call void @bar() noreturn nounwind unreachable } @@ -28,10 +24,6 @@ entry: ; SWIFT: t2: ; SWIFT: mov lr, pc ; SWIFT: b _t1 - -; T2: t2: -; T2: mov lr, pc -; T2: b.w _t1 tail call void @t1() noreturn nounwind unreachable } diff --git a/test/CodeGen/NVPTX/param-align.ll b/test/CodeGen/NVPTX/param-align.ll new file mode 100644 index 0000000000..84ccb650d4 --- /dev/null +++ b/test/CodeGen/NVPTX/param-align.ll @@ -0,0 +1,25 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s + +;;; Need 4-byte alignment on float* passed byval +define ptx_device void @t1(float* byval %x) { +; CHECK: .func t1 +; CHECK: .param .align 4 .b8 t1_param_0[4] + ret void +} + + +;;; Need 8-byte alignment on double* passed byval +define ptx_device void @t2(double* byval %x) { +; CHECK: .func t2 +; CHECK: .param .align 8 .b8 t2_param_0[8] + ret void +} + + +;;; Need 4-byte alignment on float2* passed byval +%struct.float2 = type { float, float } +define ptx_device void @t3(%struct.float2* byval %x) { +; CHECK: .func t3 +; CHECK: .param .align 4 .b8 t3_param_0[8] + ret void +} diff --git a/test/CodeGen/NVPTX/ptx-version-30.ll b/test/CodeGen/NVPTX/ptx-version-30.ll new file mode 100644 index 0000000000..0422b01f4e --- /dev/null +++ b/test/CodeGen/NVPTX/ptx-version-30.ll @@ -0,0 +1,6 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 -mattr=ptx30 | FileCheck %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -mattr=ptx30 | FileCheck %s + + +; CHECK: .version 3.0 + diff --git a/test/CodeGen/NVPTX/ptx-version-31.ll b/test/CodeGen/NVPTX/ptx-version-31.ll new file mode 100644 index 0000000000..d6e57301a3 --- /dev/null +++ b/test/CodeGen/NVPTX/ptx-version-31.ll @@ -0,0 +1,6 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 -mattr=ptx31 | FileCheck %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -mattr=ptx31 | FileCheck %s + + +; CHECK: .version 3.1 + diff --git a/test/CodeGen/NVPTX/sm-version-10.ll b/test/CodeGen/NVPTX/sm-version-10.ll new file mode 100644 index 0000000000..9324a37809 --- /dev/null +++ b/test/CodeGen/NVPTX/sm-version-10.ll @@ -0,0 +1,6 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s + + +; CHECK: .target sm_10 + diff --git a/test/CodeGen/NVPTX/sm-version-11.ll b/test/CodeGen/NVPTX/sm-version-11.ll new file mode 100644 index 0000000000..9033a4eba5 --- /dev/null +++ b/test/CodeGen/NVPTX/sm-version-11.ll @@ -0,0 +1,6 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_11 | FileCheck %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_11 | FileCheck %s + + +; CHECK: .target sm_11 + diff --git a/test/CodeGen/NVPTX/sm-version-12.ll b/test/CodeGen/NVPTX/sm-version-12.ll new file mode 100644 index 0000000000..d8ee85c901 --- /dev/null +++ b/test/CodeGen/NVPTX/sm-version-12.ll @@ -0,0 +1,6 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_12 | FileCheck %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_12 | FileCheck %s + + +; CHECK: .target sm_12 + diff --git a/test/CodeGen/NVPTX/sm-version-13.ll b/test/CodeGen/NVPTX/sm-version-13.ll new file mode 100644 index 0000000000..ad67d642ce --- /dev/null +++ b/test/CodeGen/NVPTX/sm-version-13.ll @@ -0,0 +1,6 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_13 | FileCheck %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_13 | FileCheck %s + + +; CHECK: .target sm_13 + diff --git a/test/CodeGen/NVPTX/sm-version-20.ll b/test/CodeGen/NVPTX/sm-version-20.ll new file mode 100644 index 0000000000..c21f49e6ae --- /dev/null +++ b/test/CodeGen/NVPTX/sm-version-20.ll @@ -0,0 +1,6 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s + + +; CHECK: .target sm_20 + diff --git a/test/CodeGen/NVPTX/sm-version-21.ll b/test/CodeGen/NVPTX/sm-version-21.ll new file mode 100644 index 0000000000..4fb6de3e63 --- /dev/null +++ b/test/CodeGen/NVPTX/sm-version-21.ll @@ -0,0 +1,6 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_21 | FileCheck %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_21 | FileCheck %s + + +; CHECK: .target sm_21 + diff --git a/test/CodeGen/NVPTX/sm-version-30.ll b/test/CodeGen/NVPTX/sm-version-30.ll new file mode 100644 index 0000000000..692b49a0d6 --- /dev/null +++ b/test/CodeGen/NVPTX/sm-version-30.ll @@ -0,0 +1,6 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_30 | FileCheck %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 | FileCheck %s + + +; CHECK: .target sm_30 + diff --git a/test/CodeGen/NVPTX/sm-version-35.ll b/test/CodeGen/NVPTX/sm-version-35.ll new file mode 100644 index 0000000000..25368a0133 --- /dev/null +++ b/test/CodeGen/NVPTX/sm-version-35.ll @@ -0,0 +1,6 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_35 | FileCheck %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s + + +; CHECK: .target sm_35 + diff --git a/test/CodeGen/PowerPC/misched.ll b/test/CodeGen/PowerPC/misched.ll new file mode 100644 index 0000000000..d6fb3b3046 --- /dev/null +++ b/test/CodeGen/PowerPC/misched.ll @@ -0,0 +1,45 @@ +; RUN: llc < %s -enable-misched -verify-machineinstrs +; PR14302 +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-bgq-linux" + +@b = external global [16000 x double], align 32 + +define void @pr14302() nounwind { +entry: + tail call void @putchar() nounwind + br label %for.body + +for.body: ; preds = %for.body, %entry + br i1 undef, label %for.body, label %for.body24.i + +for.body24.i: ; preds = %for.body24.i, %for.body + store double 1.000000e+00, double* undef, align 8 + br i1 undef, label %for.body24.i58, label %for.body24.i + +for.body24.i58: ; preds = %for.body24.i58, %for.body24.i + %arrayidx26.i55.1 = getelementptr inbounds [16000 x double]* @b, i64 0, i64 undef + store double 1.000000e+00, double* %arrayidx26.i55.1, align 8 + br i1 undef, label %for.body24.i64, label %for.body24.i58 + +for.body24.i64: ; preds = %for.body24.i64, %for.body24.i58 + %exitcond.2489 = icmp eq i32 0, 16000 + br i1 %exitcond.2489, label %for.body24.i70, label %for.body24.i64 + +for.body24.i70: ; preds = %for.body24.i70, %for.body24.i64 + br i1 undef, label %for.body24.i76, label %for.body24.i70 + +for.body24.i76: ; preds = %for.body24.i76, %for.body24.i70 + br i1 undef, label %set1d.exit77, label %for.body24.i76 + +set1d.exit77: ; preds = %for.body24.i76 + br label %for.body29 + +for.body29: ; preds = %for.body29, %set1d.exit77 + br i1 undef, label %for.end35, label %for.body29 + +for.end35: ; preds = %for.body29 + ret void +} + +declare void @putchar() diff --git a/test/CodeGen/Thumb/thumb_jump24_fixup.ll b/test/CodeGen/Thumb/thumb_jump24_fixup.ll deleted file mode 100644 index e6a6b25ca1..0000000000 --- a/test/CodeGen/Thumb/thumb_jump24_fixup.ll +++ /dev/null @@ -1,23 +0,0 @@ -; RUN: llc -mtriple thumbv7-none-linux-gnueabi -mcpu=cortex-a8 -march=thumb -mattr=thumb2 -filetype=obj -o - < %s | llvm-objdump -r - | FileCheck %s - -target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:32-n32-S64" -target triple = "thumbv7-none-linux-gnueabi" - -define i32 @test_fixup_t2_uncondbranch() { -b0: - invoke void @__cxa_throw(i8* null, i8* null, i8* null) noreturn - to label %unreachable unwind label %lpad - -; CHECK: {{[0-9]+}} R_ARM_THM_JUMP24 __cxa_throw - -lpad: - %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) cleanup - ret i32 0 - -unreachable: - unreachable -} - -declare i32 @__gxx_personality_v0(...) - -declare void @__cxa_throw(i8*, i8*, i8*) diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll index c44beb4bc2..88ecd5a5d3 100644 --- a/test/CodeGen/X86/avx-intrinsics-x86.ll +++ b/test/CodeGen/X86/avx-intrinsics-x86.ll @@ -1140,9 +1140,9 @@ declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) noun define i32 @test_x86_sse42_pcmpestri128(<16 x i8> %a0, <16 x i8> %a2) { - ; CHECK: movl - ; CHECK: movl - ; CHECK: vpcmpestri + ; CHECK: movl $7 + ; CHECK: movl $7 + ; CHECK: vpcmpestri $7 ; CHECK: movl %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] ret i32 %res @@ -1150,6 +1150,18 @@ define i32 @test_x86_sse42_pcmpestri128(<16 x i8> %a0, <16 x i8> %a2) { declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone +define i32 @test_x86_sse42_pcmpestri128_load(<16 x i8>* %a0, <16 x i8>* %a2) { + ; CHECK: movl $7 + ; CHECK: movl $7 + ; CHECK: vpcmpestri $7, ( + ; CHECK: movl + %1 = load <16 x i8>* %a0 + %2 = load <16 x i8>* %a2 + %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %1, i32 7, <16 x i8> %2, i32 7, i8 7) ; <i32> [#uses=1] + ret i32 %res +} + + define i32 @test_x86_sse42_pcmpestria128(<16 x i8> %a0, <16 x i8> %a2) { ; CHECK: movl ; CHECK: movl @@ -1216,8 +1228,19 @@ define <16 x i8> @test_x86_sse42_pcmpestrm128(<16 x i8> %a0, <16 x i8> %a2) { declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone +define <16 x i8> @test_x86_sse42_pcmpestrm128_load(<16 x i8> %a0, <16 x i8>* %a2) { + ; CHECK: movl $7 + ; CHECK: movl $7 + ; CHECK: vpcmpestrm $7, + ; CHECK-NOT: vmov + %1 = load <16 x i8>* %a2 + %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %1, i32 7, i8 7) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} + + define i32 @test_x86_sse42_pcmpistri128(<16 x i8> %a0, <16 x i8> %a1) { - ; CHECK: vpcmpistri + ; CHECK: vpcmpistri $7 ; CHECK: movl %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] ret i32 %res @@ -1225,6 +1248,16 @@ define i32 @test_x86_sse42_pcmpistri128(<16 x i8> %a0, <16 x i8> %a1) { declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone +define i32 @test_x86_sse42_pcmpistri128_load(<16 x i8>* %a0, <16 x i8>* %a1) { + ; CHECK: vpcmpistri $7, ( + ; CHECK: movl + %1 = load <16 x i8>* %a0 + %2 = load <16 x i8>* %a1 + %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %1, <16 x i8> %2, i8 7) ; <i32> [#uses=1] + ret i32 %res +} + + define i32 @test_x86_sse42_pcmpistria128(<16 x i8> %a0, <16 x i8> %a1) { ; CHECK: vpcmpistri ; CHECK: seta @@ -1271,7 +1304,7 @@ declare i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8>, <16 x i8>, i8) nounwind rea define <16 x i8> @test_x86_sse42_pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1) { - ; CHECK: vpcmpistrm + ; CHECK: vpcmpistrm $7 ; CHECK-NOT: vmov %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<16 x i8>> [#uses=1] ret <16 x i8> %res @@ -1279,6 +1312,15 @@ define <16 x i8> @test_x86_sse42_pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1) { declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone +define <16 x i8> @test_x86_sse42_pcmpistrm128_load(<16 x i8> %a0, <16 x i8>* %a1) { + ; CHECK: vpcmpistrm $7, ( + ; CHECK-NOT: vmov + %1 = load <16 x i8>* %a1 + %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %1, i8 7) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} + + define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) { ; CHECK: vaddss %res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] diff --git a/test/Instrumentation/ThreadSanitizer/atomic.ll b/test/Instrumentation/ThreadSanitizer/atomic.ll index ed3c821205..107dbdc0f2 100644 --- a/test/Instrumentation/ThreadSanitizer/atomic.ll +++ b/test/Instrumentation/ThreadSanitizer/atomic.ll @@ -8,7 +8,7 @@ entry: ret i8 %0 } ; CHECK: atomic8_load_unordered -; CHECK: call i8 @__tsan_atomic8_load(i8* %a, i32 100501) +; CHECK: call i8 @__tsan_atomic8_load(i8* %a, i32 0) define i8 @atomic8_load_monotonic(i8* %a) nounwind uwtable { entry: @@ -16,7 +16,7 @@ entry: ret i8 %0 } ; CHECK: atomic8_load_monotonic -; CHECK: call i8 @__tsan_atomic8_load(i8* %a, i32 100501) +; CHECK: call i8 @__tsan_atomic8_load(i8* %a, i32 0) define i8 @atomic8_load_acquire(i8* %a) nounwind uwtable { entry: @@ -24,7 +24,7 @@ entry: ret i8 %0 } ; CHECK: atomic8_load_acquire -; CHECK: call i8 @__tsan_atomic8_load(i8* %a, i32 100504) +; CHECK: call i8 @__tsan_atomic8_load(i8* %a, i32 2) define i8 @atomic8_load_seq_cst(i8* %a) nounwind uwtable { entry: @@ -32,7 +32,7 @@ entry: ret i8 %0 } ; CHECK: atomic8_load_seq_cst -; CHECK: call i8 @__tsan_atomic8_load(i8* %a, i32 100532) +; CHECK: call i8 @__tsan_atomic8_load(i8* %a, i32 5) define void @atomic8_store_unordered(i8* %a) nounwind uwtable { entry: @@ -40,7 +40,7 @@ entry: ret void } ; CHECK: atomic8_store_unordered -; CHECK: call void @__tsan_atomic8_store(i8* %a, i8 0, i32 100501) +; CHECK: call void @__tsan_atomic8_store(i8* %a, i8 0, i32 0) define void @atomic8_store_monotonic(i8* %a) nounwind uwtable { entry: @@ -48,7 +48,7 @@ entry: ret void } ; CHECK: atomic8_store_monotonic -; CHECK: call void @__tsan_atomic8_store(i8* %a, i8 0, i32 100501) +; CHECK: call void @__tsan_atomic8_store(i8* %a, i8 0, i32 0) define void @atomic8_store_release(i8* %a) nounwind uwtable { entry: @@ -56,7 +56,7 @@ entry: ret void } ; CHECK: atomic8_store_release -; CHECK: call void @__tsan_atomic8_store(i8* %a, i8 0, i32 100508) +; CHECK: call void @__tsan_atomic8_store(i8* %a, i8 0, i32 3) define void @atomic8_store_seq_cst(i8* %a) nounwind uwtable { entry: @@ -64,7 +64,287 @@ entry: ret void } ; CHECK: atomic8_store_seq_cst -; CHECK: call void @__tsan_atomic8_store(i8* %a, i8 0, i32 100532) +; CHECK: call void @__tsan_atomic8_store(i8* %a, i8 0, i32 5) + +define void @atomic8_xchg_monotonic(i8* %a) nounwind uwtable { +entry: + atomicrmw xchg i8* %a, i8 0 monotonic + ret void +} +; CHECK: atomic8_xchg_monotonic +; CHECK: call i8 @__tsan_atomic8_exchange(i8* %a, i8 0, i32 0) + +define void @atomic8_add_monotonic(i8* %a) nounwind uwtable { +entry: + atomicrmw add i8* %a, i8 0 monotonic + ret void +} +; CHECK: atomic8_add_monotonic +; CHECK: call i8 @__tsan_atomic8_fetch_add(i8* %a, i8 0, i32 0) + +define void @atomic8_sub_monotonic(i8* %a) nounwind uwtable { +entry: + atomicrmw sub i8* %a, i8 0 monotonic + ret void +} +; CHECK: atomic8_sub_monotonic +; CHECK: call i8 @__tsan_atomic8_fetch_sub(i8* %a, i8 0, i32 0) + +define void @atomic8_and_monotonic(i8* %a) nounwind uwtable { +entry: + atomicrmw and i8* %a, i8 0 monotonic + ret void +} +; CHECK: atomic8_and_monotonic +; CHECK: call i8 @__tsan_atomic8_fetch_and(i8* %a, i8 0, i32 0) + +define void @atomic8_or_monotonic(i8* %a) nounwind uwtable { +entry: + atomicrmw or i8* %a, i8 0 monotonic + ret void +} +; CHECK: atomic8_or_monotonic +; CHECK: call i8 @__tsan_atomic8_fetch_or(i8* %a, i8 0, i32 0) + +define void @atomic8_xor_monotonic(i8* %a) nounwind uwtable { +entry: + atomicrmw xor i8* %a, i8 0 monotonic + ret void +} +; CHECK: atomic8_xor_monotonic +; CHECK: call i8 @__tsan_atomic8_fetch_xor(i8* %a, i8 0, i32 0) + +define void @atomic8_xchg_acquire(i8* %a) nounwind uwtable { +entry: + atomicrmw xchg i8* %a, i8 0 acquire + ret void +} +; CHECK: atomic8_xchg_acquire +; CHECK: call i8 @__tsan_atomic8_exchange(i8* %a, i8 0, i32 2) + +define void @atomic8_add_acquire(i8* %a) nounwind uwtable { +entry: + atomicrmw add i8* %a, i8 0 acquire + ret void +} +; CHECK: atomic8_add_acquire +; CHECK: call i8 @__tsan_atomic8_fetch_add(i8* %a, i8 0, i32 2) + +define void @atomic8_sub_acquire(i8* %a) nounwind uwtable { +entry: + atomicrmw sub i8* %a, i8 0 acquire + ret void +} +; CHECK: atomic8_sub_acquire +; CHECK: call i8 @__tsan_atomic8_fetch_sub(i8* %a, i8 0, i32 2) + +define void @atomic8_and_acquire(i8* %a) nounwind uwtable { +entry: + atomicrmw and i8* %a, i8 0 acquire + ret void +} +; CHECK: atomic8_and_acquire +; CHECK: call i8 @__tsan_atomic8_fetch_and(i8* %a, i8 0, i32 2) + +define void @atomic8_or_acquire(i8* %a) nounwind uwtable { +entry: + atomicrmw or i8* %a, i8 0 acquire + ret void +} +; CHECK: atomic8_or_acquire +; CHECK: call i8 @__tsan_atomic8_fetch_or(i8* %a, i8 0, i32 2) + +define void @atomic8_xor_acquire(i8* %a) nounwind uwtable { +entry: + atomicrmw xor i8* %a, i8 0 acquire + ret void +} +; CHECK: atomic8_xor_acquire +; CHECK: call i8 @__tsan_atomic8_fetch_xor(i8* %a, i8 0, i32 2) + +define void @atomic8_xchg_release(i8* %a) nounwind uwtable { +entry: + atomicrmw xchg i8* %a, i8 0 release + ret void +} +; CHECK: atomic8_xchg_release +; CHECK: call i8 @__tsan_atomic8_exchange(i8* %a, i8 0, i32 3) + +define void @atomic8_add_release(i8* %a) nounwind uwtable { +entry: + atomicrmw add i8* %a, i8 0 release + ret void +} +; CHECK: atomic8_add_release +; CHECK: call i8 @__tsan_atomic8_fetch_add(i8* %a, i8 0, i32 3) + +define void @atomic8_sub_release(i8* %a) nounwind uwtable { +entry: + atomicrmw sub i8* %a, i8 0 release + ret void +} +; CHECK: atomic8_sub_release +; CHECK: call i8 @__tsan_atomic8_fetch_sub(i8* %a, i8 0, i32 3) + +define void @atomic8_and_release(i8* %a) nounwind uwtable { +entry: + atomicrmw and i8* %a, i8 0 release + ret void +} +; CHECK: atomic8_and_release +; CHECK: call i8 @__tsan_atomic8_fetch_and(i8* %a, i8 0, i32 3) + +define void @atomic8_or_release(i8* %a) nounwind uwtable { +entry: + atomicrmw or i8* %a, i8 0 release + ret void +} +; CHECK: atomic8_or_release +; CHECK: call i8 @__tsan_atomic8_fetch_or(i8* %a, i8 0, i32 3) + +define void @atomic8_xor_release(i8* %a) nounwind uwtable { +entry: + atomicrmw xor i8* %a, i8 0 release + ret void +} +; CHECK: atomic8_xor_release +; CHECK: call i8 @__tsan_atomic8_fetch_xor(i8* %a, i8 0, i32 3) + +define void @atomic8_xchg_acq_rel(i8* %a) nounwind uwtable { +entry: + atomicrmw xchg i8* %a, i8 0 acq_rel + ret void +} +; CHECK: atomic8_xchg_acq_rel +; CHECK: call i8 @__tsan_atomic8_exchange(i8* %a, i8 0, i32 4) + +define void @atomic8_add_acq_rel(i8* %a) nounwind uwtable { +entry: + atomicrmw add i8* %a, i8 0 acq_rel + ret void +} +; CHECK: atomic8_add_acq_rel +; CHECK: call i8 @__tsan_atomic8_fetch_add(i8* %a, i8 0, i32 4) + +define void @atomic8_sub_acq_rel(i8* %a) nounwind uwtable { +entry: + atomicrmw sub i8* %a, i8 0 acq_rel + ret void +} +; CHECK: atomic8_sub_acq_rel +; CHECK: call i8 @__tsan_atomic8_fetch_sub(i8* %a, i8 0, i32 4) + +define void @atomic8_and_acq_rel(i8* %a) nounwind uwtable { +entry: + atomicrmw and i8* %a, i8 0 acq_rel + ret void +} +; CHECK: atomic8_and_acq_rel +; CHECK: call i8 @__tsan_atomic8_fetch_and(i8* %a, i8 0, i32 4) + +define void @atomic8_or_acq_rel(i8* %a) nounwind uwtable { +entry: + atomicrmw or i8* %a, i8 0 acq_rel + ret void +} +; CHECK: atomic8_or_acq_rel +; CHECK: call i8 @__tsan_atomic8_fetch_or(i8* %a, i8 0, i32 4) + +define void @atomic8_xor_acq_rel(i8* %a) nounwind uwtable { +entry: + atomicrmw xor i8* %a, i8 0 acq_rel + ret void +} +; CHECK: atomic8_xor_acq_rel +; CHECK: call i8 @__tsan_atomic8_fetch_xor(i8* %a, i8 0, i32 4) + +define void @atomic8_xchg_seq_cst(i8* %a) nounwind uwtable { +entry: + atomicrmw xchg i8* %a, i8 0 seq_cst + ret void +} +; CHECK: atomic8_xchg_seq_cst +; CHECK: call i8 @__tsan_atomic8_exchange(i8* %a, i8 0, i32 5) + +define void @atomic8_add_seq_cst(i8* %a) nounwind uwtable { +entry: + atomicrmw add i8* %a, i8 0 seq_cst + ret void +} +; CHECK: atomic8_add_seq_cst +; CHECK: call i8 @__tsan_atomic8_fetch_add(i8* %a, i8 0, i32 5) + +define void @atomic8_sub_seq_cst(i8* %a) nounwind uwtable { +entry: + atomicrmw sub i8* %a, i8 0 seq_cst + ret void +} +; CHECK: atomic8_sub_seq_cst +; CHECK: call i8 @__tsan_atomic8_fetch_sub(i8* %a, i8 0, i32 5) + +define void @atomic8_and_seq_cst(i8* %a) nounwind uwtable { +entry: + atomicrmw and i8* %a, i8 0 seq_cst + ret void +} +; CHECK: atomic8_and_seq_cst +; CHECK: call i8 @__tsan_atomic8_fetch_and(i8* %a, i8 0, i32 5) + +define void @atomic8_or_seq_cst(i8* %a) nounwind uwtable { +entry: + atomicrmw or i8* %a, i8 0 seq_cst + ret void +} +; CHECK: atomic8_or_seq_cst +; CHECK: call i8 @__tsan_atomic8_fetch_or(i8* %a, i8 0, i32 5) + +define void @atomic8_xor_seq_cst(i8* %a) nounwind uwtable { +entry: + atomicrmw xor i8* %a, i8 0 seq_cst + ret void +} +; CHECK: atomic8_xor_seq_cst +; CHECK: call i8 @__tsan_atomic8_fetch_xor(i8* %a, i8 0, i32 5) + +define void @atomic8_cas_monotonic(i8* %a) nounwind uwtable { +entry: + cmpxchg i8* %a, i8 0, i8 1 monotonic + ret void +} +; CHECK: atomic8_cas_monotonic +; CHECK: call i8 @__tsan_atomic8_compare_exchange_val(i8* %a, i8 0, i8 1, i32 0) + +define void @atomic8_cas_acquire(i8* %a) nounwind uwtable { +entry: + cmpxchg i8* %a, i8 0, i8 1 acquire + ret void +} +; CHECK: atomic8_cas_acquire +; CHECK: call i8 @__tsan_atomic8_compare_exchange_val(i8* %a, i8 0, i8 1, i32 2) + +define void @atomic8_cas_release(i8* %a) nounwind uwtable { +entry: + cmpxchg i8* %a, i8 0, i8 1 release + ret void +} +; CHECK: atomic8_cas_release +; CHECK: call i8 @__tsan_atomic8_compare_exchange_val(i8* %a, i8 0, i8 1, i32 3) + +define void @atomic8_cas_acq_rel(i8* %a) nounwind uwtable { +entry: + cmpxchg i8* %a, i8 0, i8 1 acq_rel + ret void +} +; CHECK: atomic8_cas_acq_rel +; CHECK: call i8 @__tsan_atomic8_compare_exchange_val(i8* %a, i8 0, i8 1, i32 4) + +define void @atomic8_cas_seq_cst(i8* %a) nounwind uwtable { +entry: + cmpxchg i8* %a, i8 0, i8 1 seq_cst + ret void +} +; CHECK: atomic8_cas_seq_cst +; CHECK: call i8 @__tsan_atomic8_compare_exchange_val(i8* %a, i8 0, i8 1, i32 5) define i16 @atomic16_load_unordered(i16* %a) nounwind uwtable { entry: @@ -72,7 +352,7 @@ entry: ret i16 %0 } ; CHECK: atomic16_load_unordered -; CHECK: call i16 @__tsan_atomic16_load(i16* %a, i32 100501) +; CHECK: call i16 @__tsan_atomic16_load(i16* %a, i32 0) define i16 @atomic16_load_monotonic(i16* %a) nounwind uwtable { entry: @@ -80,7 +360,7 @@ entry: ret i16 %0 } ; CHECK: atomic16_load_monotonic -; CHECK: call i16 @__tsan_atomic16_load(i16* %a, i32 100501) +; CHECK: call i16 @__tsan_atomic16_load(i16* %a, i32 0) define i16 @atomic16_load_acquire(i16* %a) nounwind uwtable { entry: @@ -88,7 +368,7 @@ entry: ret i16 %0 } ; CHECK: atomic16_load_acquire -; CHECK: call i16 @__tsan_atomic16_load(i16* %a, i32 100504) +; CHECK: call i16 @__tsan_atomic16_load(i16* %a, i32 2) define i16 @atomic16_load_seq_cst(i16* %a) nounwind uwtable { entry: @@ -96,7 +376,7 @@ entry: ret i16 %0 } ; CHECK: atomic16_load_seq_cst -; CHECK: call i16 @__tsan_atomic16_load(i16* %a, i32 100532) +; CHECK: call i16 @__tsan_atomic16_load(i16* %a, i32 5) define void @atomic16_store_unordered(i16* %a) nounwind uwtable { entry: @@ -104,7 +384,7 @@ entry: ret void } ; CHECK: atomic16_store_unordered -; CHECK: call void @__tsan_atomic16_store(i16* %a, i16 0, i32 100501) +; CHECK: call void @__tsan_atomic16_store(i16* %a, i16 0, i32 0) define void @atomic16_store_monotonic(i16* %a) nounwind uwtable { entry: @@ -112,7 +392,7 @@ entry: ret void } ; CHECK: atomic16_store_monotonic -; CHECK: call void @__tsan_atomic16_store(i16* %a, i16 0, i32 100501) +; CHECK: call void @__tsan_atomic16_store(i16* %a, i16 0, i32 0) define void @atomic16_store_release(i16* %a) nounwind uwtable { entry: @@ -120,7 +400,7 @@ entry: ret void } ; CHECK: atomic16_store_release -; CHECK: call void @__tsan_atomic16_store(i16* %a, i16 0, i32 100508) +; CHECK: call void @__tsan_atomic16_store(i16* %a, i16 0, i32 3) define void @atomic16_store_seq_cst(i16* %a) nounwind uwtable { entry: @@ -128,7 +408,287 @@ entry: ret void } ; CHECK: atomic16_store_seq_cst -; CHECK: call void @__tsan_atomic16_store(i16* %a, i16 0, i32 100532) +; CHECK: call void @__tsan_atomic16_store(i16* %a, i16 0, i32 5) + +define void @atomic16_xchg_monotonic(i16* %a) nounwind uwtable { +entry: + atomicrmw xchg i16* %a, i16 0 monotonic + ret void +} +; CHECK: atomic16_xchg_monotonic +; CHECK: call i16 @__tsan_atomic16_exchange(i16* %a, i16 0, i32 0) + +define void @atomic16_add_monotonic(i16* %a) nounwind uwtable { +entry: + atomicrmw add i16* %a, i16 0 monotonic + ret void +} +; CHECK: atomic16_add_monotonic +; CHECK: call i16 @__tsan_atomic16_fetch_add(i16* %a, i16 0, i32 0) + +define void @atomic16_sub_monotonic(i16* %a) nounwind uwtable { +entry: + atomicrmw sub i16* %a, i16 0 monotonic + ret void +} +; CHECK: atomic16_sub_monotonic +; CHECK: call i16 @__tsan_atomic16_fetch_sub(i16* %a, i16 0, i32 0) + +define void @atomic16_and_monotonic(i16* %a) nounwind uwtable { +entry: + atomicrmw and i16* %a, i16 0 monotonic + ret void +} +; CHECK: atomic16_and_monotonic +; CHECK: call i16 @__tsan_atomic16_fetch_and(i16* %a, i16 0, i32 0) + +define void @atomic16_or_monotonic(i16* %a) nounwind uwtable { +entry: + atomicrmw or i16* %a, i16 0 monotonic + ret void +} +; CHECK: atomic16_or_monotonic +; CHECK: call i16 @__tsan_atomic16_fetch_or(i16* %a, i16 0, i32 0) + +define void @atomic16_xor_monotonic(i16* %a) nounwind uwtable { +entry: + atomicrmw xor i16* %a, i16 0 monotonic + ret void +} +; CHECK: atomic16_xor_monotonic +; CHECK: call i16 @__tsan_atomic16_fetch_xor(i16* %a, i16 0, i32 0) + +define void @atomic16_xchg_acquire(i16* %a) nounwind uwtable { +entry: + atomicrmw xchg i16* %a, i16 0 acquire + ret void +} +; CHECK: atomic16_xchg_acquire +; CHECK: call i16 @__tsan_atomic16_exchange(i16* %a, i16 0, i32 2) + +define void @atomic16_add_acquire(i16* %a) nounwind uwtable { +entry: + atomicrmw add i16* %a, i16 0 acquire + ret void +} +; CHECK: atomic16_add_acquire +; CHECK: call i16 @__tsan_atomic16_fetch_add(i16* %a, i16 0, i32 2) + +define void @atomic16_sub_acquire(i16* %a) nounwind uwtable { +entry: + atomicrmw sub i16* %a, i16 0 acquire + ret void +} +; CHECK: atomic16_sub_acquire +; CHECK: call i16 @__tsan_atomic16_fetch_sub(i16* %a, i16 0, i32 2) + +define void @atomic16_and_acquire(i16* %a) nounwind uwtable { +entry: + atomicrmw and i16* %a, i16 0 acquire + ret void +} +; CHECK: atomic16_and_acquire +; CHECK: call i16 @__tsan_atomic16_fetch_and(i16* %a, i16 0, i32 2) + +define void @atomic16_or_acquire(i16* %a) nounwind uwtable { +entry: + atomicrmw or i16* %a, i16 0 acquire + ret void +} +; CHECK: atomic16_or_acquire +; CHECK: call i16 @__tsan_atomic16_fetch_or(i16* %a, i16 0, i32 2) + +define void @atomic16_xor_acquire(i16* %a) nounwind uwtable { +entry: + atomicrmw xor i16* %a, i16 0 acquire + ret void +} +; CHECK: atomic16_xor_acquire +; CHECK: call i16 @__tsan_atomic16_fetch_xor(i16* %a, i16 0, i32 2) + +define void @atomic16_xchg_release(i16* %a) nounwind uwtable { +entry: + atomicrmw xchg i16* %a, i16 0 release + ret void +} +; CHECK: atomic16_xchg_release +; CHECK: call i16 @__tsan_atomic16_exchange(i16* %a, i16 0, i32 3) + +define void @atomic16_add_release(i16* %a) nounwind uwtable { +entry: + atomicrmw add i16* %a, i16 0 release + ret void +} +; CHECK: atomic16_add_release +; CHECK: call i16 @__tsan_atomic16_fetch_add(i16* %a, i16 0, i32 3) + +define void @atomic16_sub_release(i16* %a) nounwind uwtable { +entry: + atomicrmw sub i16* %a, i16 0 release + ret void +} +; CHECK: atomic16_sub_release +; CHECK: call i16 @__tsan_atomic16_fetch_sub(i16* %a, i16 0, i32 3) + +define void @atomic16_and_release(i16* %a) nounwind uwtable { +entry: + atomicrmw and i16* %a, i16 0 release + ret void +} +; CHECK: atomic16_and_release +; CHECK: call i16 @__tsan_atomic16_fetch_and(i16* %a, i16 0, i32 3) + +define void @atomic16_or_release(i16* %a) nounwind uwtable { +entry: + atomicrmw or i16* %a, i16 0 release + ret void +} +; CHECK: atomic16_or_release +; CHECK: call i16 @__tsan_atomic16_fetch_or(i16* %a, i16 0, i32 3) + +define void @atomic16_xor_release(i16* %a) nounwind uwtable { +entry: + atomicrmw xor i16* %a, i16 0 release + ret void +} +; CHECK: atomic16_xor_release +; CHECK: call i16 @__tsan_atomic16_fetch_xor(i16* %a, i16 0, i32 3) + +define void @atomic16_xchg_acq_rel(i16* %a) nounwind uwtable { +entry: + atomicrmw xchg i16* %a, i16 0 acq_rel + ret void +} +; CHECK: atomic16_xchg_acq_rel +; CHECK: call i16 @__tsan_atomic16_exchange(i16* %a, i16 0, i32 4) + +define void @atomic16_add_acq_rel(i16* %a) nounwind uwtable { +entry: + atomicrmw add i16* %a, i16 0 acq_rel + ret void +} +; CHECK: atomic16_add_acq_rel +; CHECK: call i16 @__tsan_atomic16_fetch_add(i16* %a, i16 0, i32 4) + +define void @atomic16_sub_acq_rel(i16* %a) nounwind uwtable { +entry: + atomicrmw sub i16* %a, i16 0 acq_rel + ret void +} +; CHECK: atomic16_sub_acq_rel +; CHECK: call i16 @__tsan_atomic16_fetch_sub(i16* %a, i16 0, i32 4) + +define void @atomic16_and_acq_rel(i16* %a) nounwind uwtable { +entry: + atomicrmw and i16* %a, i16 0 acq_rel + ret void +} +; CHECK: atomic16_and_acq_rel +; CHECK: call i16 @__tsan_atomic16_fetch_and(i16* %a, i16 0, i32 4) + +define void @atomic16_or_acq_rel(i16* %a) nounwind uwtable { +entry: + atomicrmw or i16* %a, i16 0 acq_rel + ret void +} +; CHECK: atomic16_or_acq_rel +; CHECK: call i16 @__tsan_atomic16_fetch_or(i16* %a, i16 0, i32 4) + +define void @atomic16_xor_acq_rel(i16* %a) nounwind uwtable { +entry: + atomicrmw xor i16* %a, i16 0 acq_rel + ret void +} +; CHECK: atomic16_xor_acq_rel +; CHECK: call i16 @__tsan_atomic16_fetch_xor(i16* %a, i16 0, i32 4) + +define void @atomic16_xchg_seq_cst(i16* %a) nounwind uwtable { +entry: + atomicrmw xchg i16* %a, i16 0 seq_cst + ret void +} +; CHECK: atomic16_xchg_seq_cst +; CHECK: call i16 @__tsan_atomic16_exchange(i16* %a, i16 0, i32 5) + +define void @atomic16_add_seq_cst(i16* %a) nounwind uwtable { +entry: + atomicrmw add i16* %a, i16 0 seq_cst + ret void +} +; CHECK: atomic16_add_seq_cst +; CHECK: call i16 @__tsan_atomic16_fetch_add(i16* %a, i16 0, i32 5) + +define void @atomic16_sub_seq_cst(i16* %a) nounwind uwtable { +entry: + atomicrmw sub i16* %a, i16 0 seq_cst + ret void +} +; CHECK: atomic16_sub_seq_cst +; CHECK: call i16 @__tsan_atomic16_fetch_sub(i16* %a, i16 0, i32 5) + +define void @atomic16_and_seq_cst(i16* %a) nounwind uwtable { +entry: + atomicrmw and i16* %a, i16 0 seq_cst + ret void +} +; CHECK: atomic16_and_seq_cst +; CHECK: call i16 @__tsan_atomic16_fetch_and(i16* %a, i16 0, i32 5) + +define void @atomic16_or_seq_cst(i16* %a) nounwind uwtable { +entry: + atomicrmw or i16* %a, i16 0 seq_cst + ret void +} +; CHECK: atomic16_or_seq_cst +; CHECK: call i16 @__tsan_atomic16_fetch_or(i16* %a, i16 0, i32 5) + +define void @atomic16_xor_seq_cst(i16* %a) nounwind uwtable { +entry: + atomicrmw xor i16* %a, i16 0 seq_cst + ret void +} +; CHECK: atomic16_xor_seq_cst +; CHECK: call i16 @__tsan_atomic16_fetch_xor(i16* %a, i16 0, i32 5) + +define void @atomic16_cas_monotonic(i16* %a) nounwind uwtable { +entry: + cmpxchg i16* %a, i16 0, i16 1 monotonic + ret void +} +; CHECK: atomic16_cas_monotonic +; CHECK: call i16 @__tsan_atomic16_compare_exchange_val(i16* %a, i16 0, i16 1, i32 0) + +define void @atomic16_cas_acquire(i16* %a) nounwind uwtable { +entry: + cmpxchg i16* %a, i16 0, i16 1 acquire + ret void +} +; CHECK: atomic16_cas_acquire +; CHECK: call i16 @__tsan_atomic16_compare_exchange_val(i16* %a, i16 0, i16 1, i32 2) + +define void @atomic16_cas_release(i16* %a) nounwind uwtable { +entry: + cmpxchg i16* %a, i16 0, i16 1 release + ret void +} +; CHECK: atomic16_cas_release +; CHECK: call i16 @__tsan_atomic16_compare_exchange_val(i16* %a, i16 0, i16 1, i32 3) + +define void @atomic16_cas_acq_rel(i16* %a) nounwind uwtable { +entry: + cmpxchg i16* %a, i16 0, i16 1 acq_rel + ret void +} +; CHECK: atomic16_cas_acq_rel +; CHECK: call i16 @__tsan_atomic16_compare_exchange_val(i16* %a, i16 0, i16 1, i32 4) + +define void @atomic16_cas_seq_cst(i16* %a) nounwind uwtable { +entry: + cmpxchg i16* %a, i16 0, i16 1 seq_cst + ret void +} +; CHECK: atomic16_cas_seq_cst +; CHECK: call i16 @__tsan_atomic16_compare_exchange_val(i16* %a, i16 0, i16 1, i32 5) define i32 @atomic32_load_unordered(i32* %a) nounwind uwtable { entry: @@ -136,7 +696,7 @@ entry: ret i32 %0 } ; CHECK: atomic32_load_unordered -; CHECK: call i32 @__tsan_atomic32_load(i32* %a, i32 100501) +; CHECK: call i32 @__tsan_atomic32_load(i32* %a, i32 0) define i32 @atomic32_load_monotonic(i32* %a) nounwind uwtable { entry: @@ -144,7 +704,7 @@ entry: ret i32 %0 } ; CHECK: atomic32_load_monotonic -; CHECK: call i32 @__tsan_atomic32_load(i32* %a, i32 100501) +; CHECK: call i32 @__tsan_atomic32_load(i32* %a, i32 0) define i32 @atomic32_load_acquire(i32* %a) nounwind uwtable { entry: @@ -152,7 +712,7 @@ entry: ret i32 %0 } ; CHECK: atomic32_load_acquire -; CHECK: call i32 @__tsan_atomic32_load(i32* %a, i32 100504) +; CHECK: call i32 @__tsan_atomic32_load(i32* %a, i32 2) define i32 @atomic32_load_seq_cst(i32* %a) nounwind uwtable { entry: @@ -160,7 +720,7 @@ entry: ret i32 %0 } ; CHECK: atomic32_load_seq_cst -; CHECK: call i32 @__tsan_atomic32_load(i32* %a, i32 100532) +; CHECK: call i32 @__tsan_atomic32_load(i32* %a, i32 5) define void @atomic32_store_unordered(i32* %a) nounwind uwtable { entry: @@ -168,7 +728,7 @@ entry: ret void } ; CHECK: atomic32_store_unordered -; CHECK: call void @__tsan_atomic32_store(i32* %a, i32 0, i32 100501) +; CHECK: call void @__tsan_atomic32_store(i32* %a, i32 0, i32 0) define void @atomic32_store_monotonic(i32* %a) nounwind uwtable { entry: @@ -176,7 +736,7 @@ entry: ret void } ; CHECK: atomic32_store_monotonic -; CHECK: call void @__tsan_atomic32_store(i32* %a, i32 0, i32 100501) +; CHECK: call void @__tsan_atomic32_store(i32* %a, i32 0, i32 0) define void @atomic32_store_release(i32* %a) nounwind uwtable { entry: @@ -184,7 +744,7 @@ entry: ret void } ; CHECK: atomic32_store_release -; CHECK: call void @__tsan_atomic32_store(i32* %a, i32 0, i32 100508) +; CHECK: call void @__tsan_atomic32_store(i32* %a, i32 0, i32 3) define void @atomic32_store_seq_cst(i32* %a) nounwind uwtable { entry: @@ -192,7 +752,287 @@ entry: ret void } ; CHECK: atomic32_store_seq_cst -; CHECK: call void @__tsan_atomic32_store(i32* %a, i32 0, i32 100532) +; CHECK: call void @__tsan_atomic32_store(i32* %a, i32 0, i32 5) + +define void @atomic32_xchg_monotonic(i32* %a) nounwind uwtable { +entry: + atomicrmw xchg i32* %a, i32 0 monotonic + ret void +} +; CHECK: atomic32_xchg_monotonic +; CHECK: call i32 @__tsan_atomic32_exchange(i32* %a, i32 0, i32 0) + +define void @atomic32_add_monotonic(i32* %a) nounwind uwtable { +entry: + atomicrmw add i32* %a, i32 0 monotonic + ret void +} +; CHECK: atomic32_add_monotonic +; CHECK: call i32 @__tsan_atomic32_fetch_add(i32* %a, i32 0, i32 0) + +define void @atomic32_sub_monotonic(i32* %a) nounwind uwtable { +entry: + atomicrmw sub i32* %a, i32 0 monotonic + ret void +} +; CHECK: atomic32_sub_monotonic +; CHECK: call i32 @__tsan_atomic32_fetch_sub(i32* %a, i32 0, i32 0) + +define void @atomic32_and_monotonic(i32* %a) nounwind uwtable { +entry: + atomicrmw and i32* %a, i32 0 monotonic + ret void +} +; CHECK: atomic32_and_monotonic +; CHECK: call i32 @__tsan_atomic32_fetch_and(i32* %a, i32 0, i32 0) + +define void @atomic32_or_monotonic(i32* %a) nounwind uwtable { +entry: + atomicrmw or i32* %a, i32 0 monotonic + ret void +} +; CHECK: atomic32_or_monotonic +; CHECK: call i32 @__tsan_atomic32_fetch_or(i32* %a, i32 0, i32 0) + +define void @atomic32_xor_monotonic(i32* %a) nounwind uwtable { +entry: + atomicrmw xor i32* %a, i32 0 monotonic + ret void +} +; CHECK: atomic32_xor_monotonic +; CHECK: call i32 @__tsan_atomic32_fetch_xor(i32* %a, i32 0, i32 0) + +define void @atomic32_xchg_acquire(i32* %a) nounwind uwtable { +entry: + atomicrmw xchg i32* %a, i32 0 acquire + ret void +} +; CHECK: atomic32_xchg_acquire +; CHECK: call i32 @__tsan_atomic32_exchange(i32* %a, i32 0, i32 2) + +define void @atomic32_add_acquire(i32* %a) nounwind uwtable { +entry: + atomicrmw add i32* %a, i32 0 acquire + ret void +} +; CHECK: atomic32_add_acquire +; CHECK: call i32 @__tsan_atomic32_fetch_add(i32* %a, i32 0, i32 2) + +define void @atomic32_sub_acquire(i32* %a) nounwind uwtable { +entry: + atomicrmw sub i32* %a, i32 0 acquire + ret void +} +; CHECK: atomic32_sub_acquire +; CHECK: call i32 @__tsan_atomic32_fetch_sub(i32* %a, i32 0, i32 2) + +define void @atomic32_and_acquire(i32* %a) nounwind uwtable { +entry: + atomicrmw and i32* %a, i32 0 acquire + ret void +} +; CHECK: atomic32_and_acquire +; CHECK: call i32 @__tsan_atomic32_fetch_and(i32* %a, i32 0, i32 2) + +define void @atomic32_or_acquire(i32* %a) nounwind uwtable { +entry: + atomicrmw or i32* %a, i32 0 acquire + ret void +} +; CHECK: atomic32_or_acquire +; CHECK: call i32 @__tsan_atomic32_fetch_or(i32* %a, i32 0, i32 2) + +define void @atomic32_xor_acquire(i32* %a) nounwind uwtable { +entry: + atomicrmw xor i32* %a, i32 0 acquire + ret void +} +; CHECK: atomic32_xor_acquire +; CHECK: call i32 @__tsan_atomic32_fetch_xor(i32* %a, i32 0, i32 2) + +define void @atomic32_xchg_release(i32* %a) nounwind uwtable { +entry: + atomicrmw xchg i32* %a, i32 0 release + ret void +} +; CHECK: atomic32_xchg_release +; CHECK: call i32 @__tsan_atomic32_exchange(i32* %a, i32 0, i32 3) + +define void @atomic32_add_release(i32* %a) nounwind uwtable { +entry: + atomicrmw add i32* %a, i32 0 release + ret void +} +; CHECK: atomic32_add_release +; CHECK: call i32 @__tsan_atomic32_fetch_add(i32* %a, i32 0, i32 3) + +define void @atomic32_sub_release(i32* %a) nounwind uwtable { +entry: + atomicrmw sub i32* %a, i32 0 release + ret void +} +; CHECK: atomic32_sub_release +; CHECK: call i32 @__tsan_atomic32_fetch_sub(i32* %a, i32 0, i32 3) + +define void @atomic32_and_release(i32* %a) nounwind uwtable { +entry: + atomicrmw and i32* %a, i32 0 release + ret void +} +; CHECK: atomic32_and_release +; CHECK: call i32 @__tsan_atomic32_fetch_and(i32* %a, i32 0, i32 3) + +define void @atomic32_or_release(i32* %a) nounwind uwtable { +entry: + atomicrmw or i32* %a, i32 0 release + ret void +} +; CHECK: atomic32_or_release +; CHECK: call i32 @__tsan_atomic32_fetch_or(i32* %a, i32 0, i32 3) + +define void @atomic32_xor_release(i32* %a) nounwind uwtable { +entry: + atomicrmw xor i32* %a, i32 0 release + ret void +} +; CHECK: atomic32_xor_release +; CHECK: call i32 @__tsan_atomic32_fetch_xor(i32* %a, i32 0, i32 3) + +define void @atomic32_xchg_acq_rel(i32* %a) nounwind uwtable { +entry: + atomicrmw xchg i32* %a, i32 0 acq_rel + ret void +} +; CHECK: atomic32_xchg_acq_rel +; CHECK: call i32 @__tsan_atomic32_exchange(i32* %a, i32 0, i32 4) + +define void @atomic32_add_acq_rel(i32* %a) nounwind uwtable { +entry: + atomicrmw add i32* %a, i32 0 acq_rel + ret void +} +; CHECK: atomic32_add_acq_rel +; CHECK: call i32 @__tsan_atomic32_fetch_add(i32* %a, i32 0, i32 4) + +define void @atomic32_sub_acq_rel(i32* %a) nounwind uwtable { +entry: + atomicrmw sub i32* %a, i32 0 acq_rel + ret void +} +; CHECK: atomic32_sub_acq_rel +; CHECK: call i32 @__tsan_atomic32_fetch_sub(i32* %a, i32 0, i32 4) + +define void @atomic32_and_acq_rel(i32* %a) nounwind uwtable { +entry: + atomicrmw and i32* %a, i32 0 acq_rel + ret void +} +; CHECK: atomic32_and_acq_rel +; CHECK: call i32 @__tsan_atomic32_fetch_and(i32* %a, i32 0, i32 4) + +define void @atomic32_or_acq_rel(i32* %a) nounwind uwtable { +entry: + atomicrmw or i32* %a, i32 0 acq_rel + ret void +} +; CHECK: atomic32_or_acq_rel +; CHECK: call i32 @__tsan_atomic32_fetch_or(i32* %a, i32 0, i32 4) + +define void @atomic32_xor_acq_rel(i32* %a) nounwind uwtable { +entry: + atomicrmw xor i32* %a, i32 0 acq_rel + ret void +} +; CHECK: atomic32_xor_acq_rel +; CHECK: call i32 @__tsan_atomic32_fetch_xor(i32* %a, i32 0, i32 4) + +define void @atomic32_xchg_seq_cst(i32* %a) nounwind uwtable { +entry: + atomicrmw xchg i32* %a, i32 0 seq_cst + ret void +} +; CHECK: atomic32_xchg_seq_cst +; CHECK: call i32 @__tsan_atomic32_exchange(i32* %a, i32 0, i32 5) + +define void @atomic32_add_seq_cst(i32* %a) nounwind uwtable { +entry: + atomicrmw add i32* %a, i32 0 seq_cst + ret void +} +; CHECK: atomic32_add_seq_cst +; CHECK: call i32 @__tsan_atomic32_fetch_add(i32* %a, i32 0, i32 5) + +define void @atomic32_sub_seq_cst(i32* %a) nounwind uwtable { +entry: + atomicrmw sub i32* %a, i32 0 seq_cst + ret void +} +; CHECK: atomic32_sub_seq_cst +; CHECK: call i32 @__tsan_atomic32_fetch_sub(i32* %a, i32 0, i32 5) + +define void @atomic32_and_seq_cst(i32* %a) nounwind uwtable { +entry: + atomicrmw and i32* %a, i32 0 seq_cst + ret void +} +; CHECK: atomic32_and_seq_cst +; CHECK: call i32 @__tsan_atomic32_fetch_and(i32* %a, i32 0, i32 5) + +define void @atomic32_or_seq_cst(i32* %a) nounwind uwtable { +entry: + atomicrmw or i32* %a, i32 0 seq_cst + ret void +} +; CHECK: atomic32_or_seq_cst +; CHECK: call i32 @__tsan_atomic32_fetch_or(i32* %a, i32 0, i32 5) + +define void @atomic32_xor_seq_cst(i32* %a) nounwind uwtable { +entry: + atomicrmw xor i32* %a, i32 0 seq_cst + ret void +} +; CHECK: atomic32_xor_seq_cst +; CHECK: call i32 @__tsan_atomic32_fetch_xor(i32* %a, i32 0, i32 5) + +define void @atomic32_cas_monotonic(i32* %a) nounwind uwtable { +entry: + cmpxchg i32* %a, i32 0, i32 1 monotonic + ret void +} +; CHECK: atomic32_cas_monotonic +; CHECK: call i32 @__tsan_atomic32_compare_exchange_val(i32* %a, i32 0, i32 1, i32 0) + +define void @atomic32_cas_acquire(i32* %a) nounwind uwtable { +entry: + cmpxchg i32* %a, i32 0, i32 1 acquire + ret void +} +; CHECK: atomic32_cas_acquire +; CHECK: call i32 @__tsan_atomic32_compare_exchange_val(i32* %a, i32 0, i32 1, i32 2) + +define void @atomic32_cas_release(i32* %a) nounwind uwtable { +entry: + cmpxchg i32* %a, i32 0, i32 1 release + ret void +} +; CHECK: atomic32_cas_release +; CHECK: call i32 @__tsan_atomic32_compare_exchange_val(i32* %a, i32 0, i32 1, i32 3) + +define void @atomic32_cas_acq_rel(i32* %a) nounwind uwtable { +entry: + cmpxchg i32* %a, i32 0, i32 1 acq_rel + ret void +} +; CHECK: atomic32_cas_acq_rel +; CHECK: call i32 @__tsan_atomic32_compare_exchange_val(i32* %a, i32 0, i32 1, i32 4) + +define void @atomic32_cas_seq_cst(i32* %a) nounwind uwtable { +entry: + cmpxchg i32* %a, i32 0, i32 1 seq_cst + ret void +} +; CHECK: atomic32_cas_seq_cst +; CHECK: call i32 @__tsan_atomic32_compare_exchange_val(i32* %a, i32 0, i32 1, i32 5) define i64 @atomic64_load_unordered(i64* %a) nounwind uwtable { entry: @@ -200,7 +1040,7 @@ entry: ret i64 %0 } ; CHECK: atomic64_load_unordered -; CHECK: call i64 @__tsan_atomic64_load(i64* %a, i32 100501) +; CHECK: call i64 @__tsan_atomic64_load(i64* %a, i32 0) define i64 @atomic64_load_monotonic(i64* %a) nounwind uwtable { entry: @@ -208,7 +1048,7 @@ entry: ret i64 %0 } ; CHECK: atomic64_load_monotonic -; CHECK: call i64 @__tsan_atomic64_load(i64* %a, i32 100501) +; CHECK: call i64 @__tsan_atomic64_load(i64* %a, i32 0) define i64 @atomic64_load_acquire(i64* %a) nounwind uwtable { entry: @@ -216,7 +1056,7 @@ entry: ret i64 %0 } ; CHECK: atomic64_load_acquire -; CHECK: call i64 @__tsan_atomic64_load(i64* %a, i32 100504) +; CHECK: call i64 @__tsan_atomic64_load(i64* %a, i32 2) define i64 @atomic64_load_seq_cst(i64* %a) nounwind uwtable { entry: @@ -224,7 +1064,7 @@ entry: ret i64 %0 } ; CHECK: atomic64_load_seq_cst -; CHECK: call i64 @__tsan_atomic64_load(i64* %a, i32 100532) +; CHECK: call i64 @__tsan_atomic64_load(i64* %a, i32 5) define void @atomic64_store_unordered(i64* %a) nounwind uwtable { entry: @@ -232,7 +1072,7 @@ entry: ret void } ; CHECK: atomic64_store_unordered -; CHECK: call void @__tsan_atomic64_store(i64* %a, i64 0, i32 100501) +; CHECK: call void @__tsan_atomic64_store(i64* %a, i64 0, i32 0) define void @atomic64_store_monotonic(i64* %a) nounwind uwtable { entry: @@ -240,7 +1080,7 @@ entry: ret void } ; CHECK: atomic64_store_monotonic -; CHECK: call void @__tsan_atomic64_store(i64* %a, i64 0, i32 100501) +; CHECK: call void @__tsan_atomic64_store(i64* %a, i64 0, i32 0) define void @atomic64_store_release(i64* %a) nounwind uwtable { entry: @@ -248,7 +1088,7 @@ entry: ret void } ; CHECK: atomic64_store_release -; CHECK: call void @__tsan_atomic64_store(i64* %a, i64 0, i32 100508) +; CHECK: call void @__tsan_atomic64_store(i64* %a, i64 0, i32 3) define void @atomic64_store_seq_cst(i64* %a) nounwind uwtable { entry: @@ -256,7 +1096,287 @@ entry: ret void } ; CHECK: atomic64_store_seq_cst -; CHECK: call void @__tsan_atomic64_store(i64* %a, i64 0, i32 100532) +; CHECK: call void @__tsan_atomic64_store(i64* %a, i64 0, i32 5) + +define void @atomic64_xchg_monotonic(i64* %a) nounwind uwtable { +entry: + atomicrmw xchg i64* %a, i64 0 monotonic + ret void +} +; CHECK: atomic64_xchg_monotonic +; CHECK: call i64 @__tsan_atomic64_exchange(i64* %a, i64 0, i32 0) + +define void @atomic64_add_monotonic(i64* %a) nounwind uwtable { +entry: + atomicrmw add i64* %a, i64 0 monotonic + ret void +} +; CHECK: atomic64_add_monotonic +; CHECK: call i64 @__tsan_atomic64_fetch_add(i64* %a, i64 0, i32 0) + +define void @atomic64_sub_monotonic(i64* %a) nounwind uwtable { +entry: + atomicrmw sub i64* %a, i64 0 monotonic + ret void +} +; CHECK: atomic64_sub_monotonic +; CHECK: call i64 @__tsan_atomic64_fetch_sub(i64* %a, i64 0, i32 0) + +define void @atomic64_and_monotonic(i64* %a) nounwind uwtable { +entry: + atomicrmw and i64* %a, i64 0 monotonic + ret void +} +; CHECK: atomic64_and_monotonic +; CHECK: call i64 @__tsan_atomic64_fetch_and(i64* %a, i64 0, i32 0) + +define void @atomic64_or_monotonic(i64* %a) nounwind uwtable { +entry: + atomicrmw or i64* %a, i64 0 monotonic + ret void +} +; CHECK: atomic64_or_monotonic +; CHECK: call i64 @__tsan_atomic64_fetch_or(i64* %a, i64 0, i32 0) + +define void @atomic64_xor_monotonic(i64* %a) nounwind uwtable { +entry: + atomicrmw xor i64* %a, i64 0 monotonic + ret void +} +; CHECK: atomic64_xor_monotonic +; CHECK: call i64 @__tsan_atomic64_fetch_xor(i64* %a, i64 0, i32 0) + +define void @atomic64_xchg_acquire(i64* %a) nounwind uwtable { +entry: + atomicrmw xchg i64* %a, i64 0 acquire + ret void +} +; CHECK: atomic64_xchg_acquire +; CHECK: call i64 @__tsan_atomic64_exchange(i64* %a, i64 0, i32 2) + +define void @atomic64_add_acquire(i64* %a) nounwind uwtable { +entry: + atomicrmw add i64* %a, i64 0 acquire + ret void +} +; CHECK: atomic64_add_acquire +; CHECK: call i64 @__tsan_atomic64_fetch_add(i64* %a, i64 0, i32 2) + +define void @atomic64_sub_acquire(i64* %a) nounwind uwtable { +entry: + atomicrmw sub i64* %a, i64 0 acquire + ret void +} +; CHECK: atomic64_sub_acquire +; CHECK: call i64 @__tsan_atomic64_fetch_sub(i64* %a, i64 0, i32 2) + +define void @atomic64_and_acquire(i64* %a) nounwind uwtable { +entry: + atomicrmw and i64* %a, i64 0 acquire + ret void +} +; CHECK: atomic64_and_acquire +; CHECK: call i64 @__tsan_atomic64_fetch_and(i64* %a, i64 0, i32 2) + +define void @atomic64_or_acquire(i64* %a) nounwind uwtable { +entry: + atomicrmw or i64* %a, i64 0 acquire + ret void +} +; CHECK: atomic64_or_acquire +; CHECK: call i64 @__tsan_atomic64_fetch_or(i64* %a, i64 0, i32 2) + +define void @atomic64_xor_acquire(i64* %a) nounwind uwtable { +entry: + atomicrmw xor i64* %a, i64 0 acquire + ret void +} +; CHECK: atomic64_xor_acquire +; CHECK: call i64 @__tsan_atomic64_fetch_xor(i64* %a, i64 0, i32 2) + +define void @atomic64_xchg_release(i64* %a) nounwind uwtable { +entry: + atomicrmw xchg i64* %a, i64 0 release + ret void +} +; CHECK: atomic64_xchg_release +; CHECK: call i64 @__tsan_atomic64_exchange(i64* %a, i64 0, i32 3) + +define void @atomic64_add_release(i64* %a) nounwind uwtable { +entry: + atomicrmw add i64* %a, i64 0 release + ret void +} +; CHECK: atomic64_add_release +; CHECK: call i64 @__tsan_atomic64_fetch_add(i64* %a, i64 0, i32 3) + +define void @atomic64_sub_release(i64* %a) nounwind uwtable { +entry: + atomicrmw sub i64* %a, i64 0 release + ret void +} +; CHECK: atomic64_sub_release +; CHECK: call i64 @__tsan_atomic64_fetch_sub(i64* %a, i64 0, i32 3) + +define void @atomic64_and_release(i64* %a) nounwind uwtable { +entry: + atomicrmw and i64* %a, i64 0 release + ret void +} +; CHECK: atomic64_and_release +; CHECK: call i64 @__tsan_atomic64_fetch_and(i64* %a, i64 0, i32 3) + +define void @atomic64_or_release(i64* %a) nounwind uwtable { +entry: + atomicrmw or i64* %a, i64 0 release + ret void +} +; CHECK: atomic64_or_release +; CHECK: call i64 @__tsan_atomic64_fetch_or(i64* %a, i64 0, i32 3) + +define void @atomic64_xor_release(i64* %a) nounwind uwtable { +entry: + atomicrmw xor i64* %a, i64 0 release + ret void +} +; CHECK: atomic64_xor_release +; CHECK: call i64 @__tsan_atomic64_fetch_xor(i64* %a, i64 0, i32 3) + +define void @atomic64_xchg_acq_rel(i64* %a) nounwind uwtable { +entry: + atomicrmw xchg i64* %a, i64 0 acq_rel + ret void +} +; CHECK: atomic64_xchg_acq_rel +; CHECK: call i64 @__tsan_atomic64_exchange(i64* %a, i64 0, i32 4) + +define void @atomic64_add_acq_rel(i64* %a) nounwind uwtable { +entry: + atomicrmw add i64* %a, i64 0 acq_rel + ret void +} +; CHECK: atomic64_add_acq_rel +; CHECK: call i64 @__tsan_atomic64_fetch_add(i64* %a, i64 0, i32 4) + +define void @atomic64_sub_acq_rel(i64* %a) nounwind uwtable { +entry: + atomicrmw sub i64* %a, i64 0 acq_rel + ret void +} +; CHECK: atomic64_sub_acq_rel +; CHECK: call i64 @__tsan_atomic64_fetch_sub(i64* %a, i64 0, i32 4) + +define void @atomic64_and_acq_rel(i64* %a) nounwind uwtable { +entry: + atomicrmw and i64* %a, i64 0 acq_rel + ret void +} +; CHECK: atomic64_and_acq_rel +; CHECK: call i64 @__tsan_atomic64_fetch_and(i64* %a, i64 0, i32 4) + +define void @atomic64_or_acq_rel(i64* %a) nounwind uwtable { +entry: + atomicrmw or i64* %a, i64 0 acq_rel + ret void +} +; CHECK: atomic64_or_acq_rel +; CHECK: call i64 @__tsan_atomic64_fetch_or(i64* %a, i64 0, i32 4) + +define void @atomic64_xor_acq_rel(i64* %a) nounwind uwtable { +entry: + atomicrmw xor i64* %a, i64 0 acq_rel + ret void +} +; CHECK: atomic64_xor_acq_rel +; CHECK: call i64 @__tsan_atomic64_fetch_xor(i64* %a, i64 0, i32 4) + +define void @atomic64_xchg_seq_cst(i64* %a) nounwind uwtable { +entry: + atomicrmw xchg i64* %a, i64 0 seq_cst + ret void +} +; CHECK: atomic64_xchg_seq_cst +; CHECK: call i64 @__tsan_atomic64_exchange(i64* %a, i64 0, i32 5) + +define void @atomic64_add_seq_cst(i64* %a) nounwind uwtable { +entry: + atomicrmw add i64* %a, i64 0 seq_cst + ret void +} +; CHECK: atomic64_add_seq_cst +; CHECK: call i64 @__tsan_atomic64_fetch_add(i64* %a, i64 0, i32 5) + +define void @atomic64_sub_seq_cst(i64* %a) nounwind uwtable { +entry: + atomicrmw sub i64* %a, i64 0 seq_cst + ret void +} +; CHECK: atomic64_sub_seq_cst +; CHECK: call i64 @__tsan_atomic64_fetch_sub(i64* %a, i64 0, i32 5) + +define void @atomic64_and_seq_cst(i64* %a) nounwind uwtable { +entry: + atomicrmw and i64* %a, i64 0 seq_cst + ret void +} +; CHECK: atomic64_and_seq_cst +; CHECK: call i64 @__tsan_atomic64_fetch_and(i64* %a, i64 0, i32 5) + +define void @atomic64_or_seq_cst(i64* %a) nounwind uwtable { +entry: + atomicrmw or i64* %a, i64 0 seq_cst + ret void +} +; CHECK: atomic64_or_seq_cst +; CHECK: call i64 @__tsan_atomic64_fetch_or(i64* %a, i64 0, i32 5) + +define void @atomic64_xor_seq_cst(i64* %a) nounwind uwtable { +entry: + atomicrmw xor i64* %a, i64 0 seq_cst + ret void +} +; CHECK: atomic64_xor_seq_cst +; CHECK: call i64 @__tsan_atomic64_fetch_xor(i64* %a, i64 0, i32 5) + +define void @atomic64_cas_monotonic(i64* %a) nounwind uwtable { +entry: + cmpxchg i64* %a, i64 0, i64 1 monotonic + ret void +} +; CHECK: atomic64_cas_monotonic +; CHECK: call i64 @__tsan_atomic64_compare_exchange_val(i64* %a, i64 0, i64 1, i32 0) + +define void @atomic64_cas_acquire(i64* %a) nounwind uwtable { +entry: + cmpxchg i64* %a, i64 0, i64 1 acquire + ret void +} +; CHECK: atomic64_cas_acquire +; CHECK: call i64 @__tsan_atomic64_compare_exchange_val(i64* %a, i64 0, i64 1, i32 2) + +define void @atomic64_cas_release(i64* %a) nounwind uwtable { +entry: + cmpxchg i64* %a, i64 0, i64 1 release + ret void +} +; CHECK: atomic64_cas_release +; CHECK: call i64 @__tsan_atomic64_compare_exchange_val(i64* %a, i64 0, i64 1, i32 3) + +define void @atomic64_cas_acq_rel(i64* %a) nounwind uwtable { +entry: + cmpxchg i64* %a, i64 0, i64 1 acq_rel + ret void +} +; CHECK: atomic64_cas_acq_rel +; CHECK: call i64 @__tsan_atomic64_compare_exchange_val(i64* %a, i64 0, i64 1, i32 4) + +define void @atomic64_cas_seq_cst(i64* %a) nounwind uwtable { +entry: + cmpxchg i64* %a, i64 0, i64 1 seq_cst + ret void +} +; CHECK: atomic64_cas_seq_cst +; CHECK: call i64 @__tsan_atomic64_compare_exchange_val(i64* %a, i64 0, i64 1, i32 5) define i128 @atomic128_load_unordered(i128* %a) nounwind uwtable { entry: @@ -264,7 +1384,7 @@ entry: ret i128 %0 } ; CHECK: atomic128_load_unordered -; CHECK: call i128 @__tsan_atomic128_load(i128* %a, i32 100501) +; CHECK: call i128 @__tsan_atomic128_load(i128* %a, i32 0) define i128 @atomic128_load_monotonic(i128* %a) nounwind uwtable { entry: @@ -272,7 +1392,7 @@ entry: ret i128 %0 } ; CHECK: atomic128_load_monotonic -; CHECK: call i128 @__tsan_atomic128_load(i128* %a, i32 100501) +; CHECK: call i128 @__tsan_atomic128_load(i128* %a, i32 0) define i128 @atomic128_load_acquire(i128* %a) nounwind uwtable { entry: @@ -280,7 +1400,7 @@ entry: ret i128 %0 } ; CHECK: atomic128_load_acquire -; CHECK: call i128 @__tsan_atomic128_load(i128* %a, i32 100504) +; CHECK: call i128 @__tsan_atomic128_load(i128* %a, i32 2) define i128 @atomic128_load_seq_cst(i128* %a) nounwind uwtable { entry: @@ -288,7 +1408,7 @@ entry: ret i128 %0 } ; CHECK: atomic128_load_seq_cst -; CHECK: call i128 @__tsan_atomic128_load(i128* %a, i32 100532) +; CHECK: call i128 @__tsan_atomic128_load(i128* %a, i32 5) define void @atomic128_store_unordered(i128* %a) nounwind uwtable { entry: @@ -296,7 +1416,7 @@ entry: ret void } ; CHECK: atomic128_store_unordered -; CHECK: call void @__tsan_atomic128_store(i128* %a, i128 0, i32 100501) +; CHECK: call void @__tsan_atomic128_store(i128* %a, i128 0, i32 0) define void @atomic128_store_monotonic(i128* %a) nounwind uwtable { entry: @@ -304,7 +1424,7 @@ entry: ret void } ; CHECK: atomic128_store_monotonic -; CHECK: call void @__tsan_atomic128_store(i128* %a, i128 0, i32 100501) +; CHECK: call void @__tsan_atomic128_store(i128* %a, i128 0, i32 0) define void @atomic128_store_release(i128* %a) nounwind uwtable { entry: @@ -312,7 +1432,7 @@ entry: ret void } ; CHECK: atomic128_store_release -; CHECK: call void @__tsan_atomic128_store(i128* %a, i128 0, i32 100508) +; CHECK: call void @__tsan_atomic128_store(i128* %a, i128 0, i32 3) define void @atomic128_store_seq_cst(i128* %a) nounwind uwtable { entry: @@ -320,4 +1440,348 @@ entry: ret void } ; CHECK: atomic128_store_seq_cst -; CHECK: call void @__tsan_atomic128_store(i128* %a, i128 0, i32 100532) +; CHECK: call void @__tsan_atomic128_store(i128* %a, i128 0, i32 5) + +define void @atomic128_xchg_monotonic(i128* %a) nounwind uwtable { +entry: + atomicrmw xchg i128* %a, i128 0 monotonic + ret void +} +; CHECK: atomic128_xchg_monotonic +; CHECK: call i128 @__tsan_atomic128_exchange(i128* %a, i128 0, i32 0) + +define void @atomic128_add_monotonic(i128* %a) nounwind uwtable { +entry: + atomicrmw add i128* %a, i128 0 monotonic + ret void +} +; CHECK: atomic128_add_monotonic +; CHECK: call i128 @__tsan_atomic128_fetch_add(i128* %a, i128 0, i32 0) + +define void @atomic128_sub_monotonic(i128* %a) nounwind uwtable { +entry: + atomicrmw sub i128* %a, i128 0 monotonic + ret void +} +; CHECK: atomic128_sub_monotonic +; CHECK: call i128 @__tsan_atomic128_fetch_sub(i128* %a, i128 0, i32 0) + +define void @atomic128_and_monotonic(i128* %a) nounwind uwtable { +entry: + atomicrmw and i128* %a, i128 0 monotonic + ret void +} +; CHECK: atomic128_and_monotonic +; CHECK: call i128 @__tsan_atomic128_fetch_and(i128* %a, i128 0, i32 0) + +define void @atomic128_or_monotonic(i128* %a) nounwind uwtable { +entry: + atomicrmw or i128* %a, i128 0 monotonic + ret void +} +; CHECK: atomic128_or_monotonic +; CHECK: call i128 @__tsan_atomic128_fetch_or(i128* %a, i128 0, i32 0) + +define void @atomic128_xor_monotonic(i128* %a) nounwind uwtable { +entry: + atomicrmw xor i128* %a, i128 0 monotonic + ret void +} +; CHECK: atomic128_xor_monotonic +; CHECK: call i128 @__tsan_atomic128_fetch_xor(i128* %a, i128 0, i32 0) + +define void @atomic128_xchg_acquire(i128* %a) nounwind uwtable { +entry: + atomicrmw xchg i128* %a, i128 0 acquire + ret void +} +; CHECK: atomic128_xchg_acquire +; CHECK: call i128 @__tsan_atomic128_exchange(i128* %a, i128 0, i32 2) + +define void @atomic128_add_acquire(i128* %a) nounwind uwtable { +entry: + atomicrmw add i128* %a, i128 0 acquire + ret void +} +; CHECK: atomic128_add_acquire +; CHECK: call i128 @__tsan_atomic128_fetch_add(i128* %a, i128 0, i32 2) + +define void @atomic128_sub_acquire(i128* %a) nounwind uwtable { +entry: + atomicrmw sub i128* %a, i128 0 acquire + ret void +} +; CHECK: atomic128_sub_acquire +; CHECK: call i128 @__tsan_atomic128_fetch_sub(i128* %a, i128 0, i32 2) + +define void @atomic128_and_acquire(i128* %a) nounwind uwtable { +entry: + atomicrmw and i128* %a, i128 0 acquire + ret void +} +; CHECK: atomic128_and_acquire +; CHECK: call i128 @__tsan_atomic128_fetch_and(i128* %a, i128 0, i32 2) + +define void @atomic128_or_acquire(i128* %a) nounwind uwtable { +entry: + atomicrmw or i128* %a, i128 0 acquire + ret void +} +; CHECK: atomic128_or_acquire +; CHECK: call i128 @__tsan_atomic128_fetch_or(i128* %a, i128 0, i32 2) + +define void @atomic128_xor_acquire(i128* %a) nounwind uwtable { +entry: + atomicrmw xor i128* %a, i128 0 acquire + ret void +} +; CHECK: atomic128_xor_acquire +; CHECK: call i128 @__tsan_atomic128_fetch_xor(i128* %a, i128 0, i32 2) + +define void @atomic128_xchg_release(i128* %a) nounwind uwtable { +entry: + atomicrmw xchg i128* %a, i128 0 release + ret void +} +; CHECK: atomic128_xchg_release +; CHECK: call i128 @__tsan_atomic128_exchange(i128* %a, i128 0, i32 3) + +define void @atomic128_add_release(i128* %a) nounwind uwtable { +entry: + atomicrmw add i128* %a, i128 0 release + ret void +} +; CHECK: atomic128_add_release +; CHECK: call i128 @__tsan_atomic128_fetch_add(i128* %a, i128 0, i32 3) + +define void @atomic128_sub_release(i128* %a) nounwind uwtable { +entry: + atomicrmw sub i128* %a, i128 0 release + ret void +} +; CHECK: atomic128_sub_release +; CHECK: call i128 @__tsan_atomic128_fetch_sub(i128* %a, i128 0, i32 3) + +define void @atomic128_and_release(i128* %a) nounwind uwtable { +entry: + atomicrmw and i128* %a, i128 0 release + ret void +} +; CHECK: atomic128_and_release +; CHECK: call i128 @__tsan_atomic128_fetch_and(i128* %a, i128 0, i32 3) + +define void @atomic128_or_release(i128* %a) nounwind uwtable { +entry: + atomicrmw or i128* %a, i128 0 release + ret void +} +; CHECK: atomic128_or_release +; CHECK: call i128 @__tsan_atomic128_fetch_or(i128* %a, i128 0, i32 3) + +define void @atomic128_xor_release(i128* %a) nounwind uwtable { +entry: + atomicrmw xor i128* %a, i128 0 release + ret void +} +; CHECK: atomic128_xor_release +; CHECK: call i128 @__tsan_atomic128_fetch_xor(i128* %a, i128 0, i32 3) + +define void @atomic128_xchg_acq_rel(i128* %a) nounwind uwtable { +entry: + atomicrmw xchg i128* %a, i128 0 acq_rel + ret void +} +; CHECK: atomic128_xchg_acq_rel +; CHECK: call i128 @__tsan_atomic128_exchange(i128* %a, i128 0, i32 4) + +define void @atomic128_add_acq_rel(i128* %a) nounwind uwtable { +entry: + atomicrmw add i128* %a, i128 0 acq_rel + ret void +} +; CHECK: atomic128_add_acq_rel +; CHECK: call i128 @__tsan_atomic128_fetch_add(i128* %a, i128 0, i32 4) + +define void @atomic128_sub_acq_rel(i128* %a) nounwind uwtable { +entry: + atomicrmw sub i128* %a, i128 0 acq_rel + ret void +} +; CHECK: atomic128_sub_acq_rel +; CHECK: call i128 @__tsan_atomic128_fetch_sub(i128* %a, i128 0, i32 4) + +define void @atomic128_and_acq_rel(i128* %a) nounwind uwtable { +entry: + atomicrmw and i128* %a, i128 0 acq_rel + ret void +} +; CHECK: atomic128_and_acq_rel +; CHECK: call i128 @__tsan_atomic128_fetch_and(i128* %a, i128 0, i32 4) + +define void @atomic128_or_acq_rel(i128* %a) nounwind uwtable { +entry: + atomicrmw or i128* %a, i128 0 acq_rel + ret void +} +; CHECK: atomic128_or_acq_rel +; CHECK: call i128 @__tsan_atomic128_fetch_or(i128* %a, i128 0, i32 4) + +define void @atomic128_xor_acq_rel(i128* %a) nounwind uwtable { +entry: + atomicrmw xor i128* %a, i128 0 acq_rel + ret void +} +; CHECK: atomic128_xor_acq_rel +; CHECK: call i128 @__tsan_atomic128_fetch_xor(i128* %a, i128 0, i32 4) + +define void @atomic128_xchg_seq_cst(i128* %a) nounwind uwtable { +entry: + atomicrmw xchg i128* %a, i128 0 seq_cst + ret void +} +; CHECK: atomic128_xchg_seq_cst +; CHECK: call i128 @__tsan_atomic128_exchange(i128* %a, i128 0, i32 5) + +define void @atomic128_add_seq_cst(i128* %a) nounwind uwtable { +entry: + atomicrmw add i128* %a, i128 0 seq_cst + ret void +} +; CHECK: atomic128_add_seq_cst +; CHECK: call i128 @__tsan_atomic128_fetch_add(i128* %a, i128 0, i32 5) + +define void @atomic128_sub_seq_cst(i128* %a) nounwind uwtable { +entry: + atomicrmw sub i128* %a, i128 0 seq_cst + ret void +} +; CHECK: atomic128_sub_seq_cst +; CHECK: call i128 @__tsan_atomic128_fetch_sub(i128* %a, i128 0, i32 5) + +define void @atomic128_and_seq_cst(i128* %a) nounwind uwtable { +entry: + atomicrmw and i128* %a, i128 0 seq_cst + ret void +} +; CHECK: atomic128_and_seq_cst +; CHECK: call i128 @__tsan_atomic128_fetch_and(i128* %a, i128 0, i32 5) + +define void @atomic128_or_seq_cst(i128* %a) nounwind uwtable { +entry: + atomicrmw or i128* %a, i128 0 seq_cst + ret void +} +; CHECK: atomic128_or_seq_cst +; CHECK: call i128 @__tsan_atomic128_fetch_or(i128* %a, i128 0, i32 5) + +define void @atomic128_xor_seq_cst(i128* %a) nounwind uwtable { +entry: + atomicrmw xor i128* %a, i128 0 seq_cst + ret void +} +; CHECK: atomic128_xor_seq_cst +; CHECK: call i128 @__tsan_atomic128_fetch_xor(i128* %a, i128 0, i32 5) + +define void @atomic128_cas_monotonic(i128* %a) nounwind uwtable { +entry: + cmpxchg i128* %a, i128 0, i128 1 monotonic + ret void +} +; CHECK: atomic128_cas_monotonic +; CHECK: call i128 @__tsan_atomic128_compare_exchange_val(i128* %a, i128 0, i128 1, i32 0) + +define void @atomic128_cas_acquire(i128* %a) nounwind uwtable { +entry: + cmpxchg i128* %a, i128 0, i128 1 acquire + ret void +} +; CHECK: atomic128_cas_acquire +; CHECK: call i128 @__tsan_atomic128_compare_exchange_val(i128* %a, i128 0, i128 1, i32 2) + +define void @atomic128_cas_release(i128* %a) nounwind uwtable { +entry: + cmpxchg i128* %a, i128 0, i128 1 release + ret void +} +; CHECK: atomic128_cas_release +; CHECK: call i128 @__tsan_atomic128_compare_exchange_val(i128* %a, i128 0, i128 1, i32 3) + +define void @atomic128_cas_acq_rel(i128* %a) nounwind uwtable { +entry: + cmpxchg i128* %a, i128 0, i128 1 acq_rel + ret void +} +; CHECK: atomic128_cas_acq_rel +; CHECK: call i128 @__tsan_atomic128_compare_exchange_val(i128* %a, i128 0, i128 1, i32 4) + +define void @atomic128_cas_seq_cst(i128* %a) nounwind uwtable { +entry: + cmpxchg i128* %a, i128 0, i128 1 seq_cst + ret void +} +; CHECK: atomic128_cas_seq_cst +; CHECK: call i128 @__tsan_atomic128_compare_exchange_val(i128* %a, i128 0, i128 1, i32 5) + +define void @atomic_signal_fence_acquire() nounwind uwtable { +entry: + fence singlethread acquire + ret void +} +; CHECK: atomic_signal_fence_acquire +; CHECK: call void @__tsan_atomic_signal_fence(i32 2) + +define void @atomic_thread_fence_acquire() nounwind uwtable { +entry: + fence acquire + ret void +} +; CHECK: atomic_thread_fence_acquire +; CHECK: call void @__tsan_atomic_thread_fence(i32 2) + +define void @atomic_signal_fence_release() nounwind uwtable { +entry: + fence singlethread release + ret void +} +; CHECK: atomic_signal_fence_release +; CHECK: call void @__tsan_atomic_signal_fence(i32 3) + +define void @atomic_thread_fence_release() nounwind uwtable { +entry: + fence release + ret void +} +; CHECK: atomic_thread_fence_release +; CHECK: call void @__tsan_atomic_thread_fence(i32 3) + +define void @atomic_signal_fence_acq_rel() nounwind uwtable { +entry: + fence singlethread acq_rel + ret void +} +; CHECK: atomic_signal_fence_acq_rel +; CHECK: call void @__tsan_atomic_signal_fence(i32 4) + +define void @atomic_thread_fence_acq_rel() nounwind uwtable { +entry: + fence acq_rel + ret void +} +; CHECK: atomic_thread_fence_acq_rel +; CHECK: call void @__tsan_atomic_thread_fence(i32 4) + +define void @atomic_signal_fence_seq_cst() nounwind uwtable { +entry: + fence singlethread seq_cst + ret void +} +; CHECK: atomic_signal_fence_seq_cst +; CHECK: call void @__tsan_atomic_signal_fence(i32 5) + +define void @atomic_thread_fence_seq_cst() nounwind uwtable { +entry: + fence seq_cst + ret void +} +; CHECK: atomic_thread_fence_seq_cst +; CHECK: call void @__tsan_atomic_thread_fence(i32 5) diff --git a/test/MC/ARM/elf-jump24-fixup.s b/test/MC/ARM/elf-jump24-fixup.s new file mode 100644 index 0000000000..75a4b869dc --- /dev/null +++ b/test/MC/ARM/elf-jump24-fixup.s @@ -0,0 +1,9 @@ +@ RUN: llvm-mc %s -triple=thumbv7-linux-gnueabi -filetype=obj -o - < %s | llvm-objdump -r - | FileCheck %s + .syntax unified + .text + .code 16 + .thumb_func +foo: + b.w bar + +@ CHECK: {{[0-9]+}} R_ARM_THM_JUMP24 bar diff --git a/test/Transforms/ConstProp/loads.ll b/test/Transforms/ConstProp/loads.ll index 74d80aa187..6794288a0e 100644 --- a/test/Transforms/ConstProp/loads.ll +++ b/test/Transforms/ConstProp/loads.ll @@ -1,17 +1,24 @@ -; RUN: opt < %s -instcombine -S | FileCheck %s - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +; RUN: opt < %s -default-data-layout="e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64" -instcombine -S | FileCheck %s --check-prefix=LE +; RUN: opt < %s -default-data-layout="E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64" -instcombine -S | FileCheck %s --check-prefix=BE +; {{ 0xDEADBEEF, 0xBA }, 0xCAFEBABE} @g1 = constant {{i32,i8},i32} {{i32,i8} { i32 -559038737, i8 186 }, i32 -889275714 } @g2 = constant double 1.0 +; { 0x7B, 0x06B1BFF8 } @g3 = constant {i64, i64} { i64 123, i64 112312312 } ; Simple load define i32 @test1() { %r = load i32* getelementptr ({{i32,i8},i32}* @g1, i32 0, i32 0, i32 0) ret i32 %r -; CHECK: @test1 -; CHECK: ret i32 -559038737 + +; 0xDEADBEEF +; LE: @test1 +; LE: ret i32 -559038737 + +; 0xDEADBEEF +; BE: @test1 +; BE: ret i32 -559038737 } ; PR3152 @@ -20,8 +27,13 @@ define i16 @test2() { %r = load i16* bitcast(i32* getelementptr ({{i32,i8},i32}* @g1, i32 0, i32 0, i32 0) to i16*) ret i16 %r -; CHECK: @test2 -; CHECK: ret i16 -16657 +; 0xBEEF +; LE: @test2 +; LE: ret i16 -16657 + +; 0xDEAD +; BE: @test2 +; BE: ret i16 -8531 } ; Load of second 16 bits of 32-bit value. @@ -29,16 +41,27 @@ define i16 @test3() { %r = load i16* getelementptr(i16* bitcast(i32* getelementptr ({{i32,i8},i32}* @g1, i32 0, i32 0, i32 0) to i16*), i32 1) ret i16 %r -; CHECK: @test3 -; CHECK: ret i16 -8531 +; 0xDEAD +; LE: @test3 +; LE: ret i16 -8531 + +; 0xBEEF +; BE: @test3 +; BE: ret i16 -16657 } ; Load of 8 bit field + tail padding. define i16 @test4() { %r = load i16* getelementptr(i16* bitcast(i32* getelementptr ({{i32,i8},i32}* @g1, i32 0, i32 0, i32 0) to i16*), i32 2) ret i16 %r -; CHECK: @test4 -; CHECK: ret i16 186 + +; 0x00BA +; LE: @test4 +; LE: ret i16 186 + +; 0xBA00 +; BE: @test4 +; BE: ret i16 -17920 } ; Load of double bits. @@ -46,8 +69,13 @@ define i64 @test6() { %r = load i64* bitcast(double* @g2 to i64*) ret i64 %r -; CHECK: @test6 -; CHECK: ret i64 4607182418800017408 +; 0x3FF_0000000000000 +; LE: @test6 +; LE: ret i64 4607182418800017408 + +; 0x3FF_0000000000000 +; BE: @test6 +; BE: ret i64 4607182418800017408 } ; Load of double bits. @@ -55,8 +83,13 @@ define i16 @test7() { %r = load i16* bitcast(double* @g2 to i16*) ret i16 %r -; CHECK: @test7 -; CHECK: ret i16 0 +; 0x0000 +; LE: @test7 +; LE: ret i16 0 + +; 0x3FF0 +; BE: @test7 +; BE: ret i16 16368 } ; Double load. @@ -64,8 +97,11 @@ define double @test8() { %r = load double* bitcast({{i32,i8},i32}* @g1 to double*) ret double %r -; CHECK: @test8 -; CHECK: ret double 0xBADEADBEEF +; LE: @test8 +; LE: ret double 0xBADEADBEEF + +; BE: @test8 +; BE: ret double 0xDEADBEEFBA000000 } @@ -74,8 +110,13 @@ define i128 @test9() { %r = load i128* bitcast({i64, i64}* @g3 to i128*) ret i128 %r -; CHECK: @test9 -; CHECK: ret i128 2071796475790618158476296315 +; 0x00000000_06B1BFF8_00000000_0000007B +; LE: @test9 +; LE: ret i128 2071796475790618158476296315 + +; 0x00000000_0000007B_00000000_06B1BFF8 +; BE: @test9 +; BE: ret i128 2268949521066387161080 } ; vector load. @@ -83,21 +124,30 @@ define <2 x i64> @test10() { %r = load <2 x i64>* bitcast({i64, i64}* @g3 to <2 x i64>*) ret <2 x i64> %r -; CHECK: @test10 -; CHECK: ret <2 x i64> <i64 123, i64 112312312> +; LE: @test10 +; LE: ret <2 x i64> <i64 123, i64 112312312> + +; BE: @test10 +; BE: ret <2 x i64> <i64 123, i64 112312312> } ; PR5287 +; { 0xA1, 0x08 } @g4 = internal constant { i8, i8 } { i8 -95, i8 8 } define i16 @test11() nounwind { entry: %a = load i16* bitcast ({ i8, i8 }* @g4 to i16*) ret i16 %a - -; CHECK: @test11 -; CHECK: ret i16 2209 + +; 0x08A1 +; LE: @test11 +; LE: ret i16 2209 + +; 0xA108 +; BE: @test11 +; BE: ret i16 -24312 } @@ -107,8 +157,14 @@ entry: define i16 @test12() { %a = load i16* getelementptr inbounds ([3 x i16]* bitcast ([6 x i8]* @test12g to [3 x i16]*), i32 0, i64 1) ret i16 %a -; CHECK: @test12 -; CHECK: ret i16 98 + +; 0x0062 +; LE: @test12 +; LE: ret i16 98 + +; 0x6200 +; BE: @test12 +; BE: ret i16 25088 } @@ -117,8 +173,12 @@ define i16 @test12() { define i1 @test13() { %A = load i1* bitcast (i8* @g5 to i1*) ret i1 %A -; CHECK: @test13 -; CHECK: ret i1 false + +; LE: @test13 +; LE: ret i1 false + +; BE: @test13 +; BE: ret i1 false } @g6 = constant [2 x i8*] [i8* inttoptr (i64 1 to i8*), i8* inttoptr (i64 2 to i8*)] @@ -126,14 +186,22 @@ define i64 @test14() nounwind { entry: %tmp = load i64* bitcast ([2 x i8*]* @g6 to i64*) ret i64 %tmp -; CHECK: @test14 -; CHECK: ret i64 1 + +; LE: @test14 +; LE: ret i64 1 + +; BE: @test14 +; BE: ret i64 1 } define i64 @test15() nounwind { entry: %tmp = load i64* bitcast (i8** getelementptr inbounds ([2 x i8*]* @g6, i32 0, i64 1) to i64*) ret i64 %tmp -; CHECK: @test15 -; CHECK: ret i64 2 + +; LE: @test15 +; LE: ret i64 2 + +; BE: @test15 +; BE: ret i64 2 } diff --git a/test/Transforms/GVN/rle.ll b/test/Transforms/GVN/rle.ll index e764169126..72fa819d1c 100644 --- a/test/Transforms/GVN/rle.ll +++ b/test/Transforms/GVN/rle.ll @@ -1,7 +1,5 @@ -; RUN: opt < %s -basicaa -gvn -S -die | FileCheck %s - -; 32-bit little endian target. -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" +; RUN: opt < %s -default-data-layout="e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-n8:16:32" -basicaa -gvn -S -die | FileCheck %s +; RUN: opt < %s -default-data-layout="E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-n32" -basicaa -gvn -S -die | FileCheck %s ;; Trivial RLE test. define i32 @test0(i32 %V, i32* %P) { @@ -318,7 +316,7 @@ define i8 @coerce_offset_nonlocal0(i32* %P, i1 %cond) { %P4 = getelementptr i8* %P3, i32 2 br i1 %cond, label %T, label %F T: - store i32 42, i32* %P + store i32 57005, i32* %P br label %Cont F: diff --git a/test/Transforms/InstCombine/2012-07-25-LoadPart.ll b/test/Transforms/InstCombine/2012-07-25-LoadPart.ll index 73e5a6653e..18aab7f27e 100644 --- a/test/Transforms/InstCombine/2012-07-25-LoadPart.ll +++ b/test/Transforms/InstCombine/2012-07-25-LoadPart.ll @@ -1,12 +1,14 @@ -; RUN: opt < %s -instcombine -S | FileCheck %s +; RUN: opt < %s -default-data-layout="e-p:32:32:32" -instcombine -S | FileCheck %s --check-prefix=LE +; RUN: opt < %s -default-data-layout="E-p:32:32:32" -instcombine -S | FileCheck %s --check-prefix=BE ; PR13442 -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128" - @test = constant [4 x i32] [i32 1, i32 2, i32 3, i32 4] define i64 @foo() { %ret = load i64* bitcast (i8* getelementptr (i8* bitcast ([4 x i32]* @test to i8*), i64 2) to i64*), align 1 ret i64 %ret - ; CHECK: ret i64 844424930263040 + ; 0x00030000_00020000 in [01 00/00 00 02 00 00 00 03 00/00 00 04 00 00 00] + ; LE: ret i64 844424930263040 + ; 0x00000200_00000300 in [00 00/00 01 00 00 00 02 00 00/00 03 00 00 00 04] + ; BE: ret i64 281474976841728 } diff --git a/test/Transforms/InstCombine/disable-simplify-libcalls.ll b/test/Transforms/InstCombine/disable-simplify-libcalls.ll new file mode 100644 index 0000000000..d81e9ae5bd --- /dev/null +++ b/test/Transforms/InstCombine/disable-simplify-libcalls.ll @@ -0,0 +1,236 @@ +; Test that -disable-simplify-libcalls is wired up correctly. +; +; RUN: opt < %s -instcombine -disable-simplify-libcalls -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +@.str = constant [1 x i8] zeroinitializer, align 1 +@.str1 = constant [13 x i8] c"hello, world\00", align 1 +@.str2 = constant [4 x i8] c"foo\00", align 1 +@.str3 = constant [4 x i8] c"bar\00", align 1 +@.str4 = constant [6 x i8] c"123.4\00", align 1 +@.str5 = constant [5 x i8] c"1234\00", align 1 +@empty = constant [1 x i8] c"\00", align 1 + +declare double @ceil(double) +declare double @copysign(double, double) +declare double @cos(double) +declare double @fabs(double) +declare double @floor(double) +declare i8* @strcat(i8*, i8*) +declare i8* @strncat(i8*, i8*, i32) +declare i8* @strchr(i8*, i32) +declare i8* @strrchr(i8*, i32) +declare i32 @strcmp(i8*, i8*) +declare i32 @strncmp(i8*, i8*, i64) +declare i8* @strcpy(i8*, i8*) +declare i8* @stpcpy(i8*, i8*) +declare i8* @strncpy(i8*, i8*, i64) +declare i64 @strlen(i8*) +declare i8* @strpbrk(i8*, i8*) +declare i64 @strspn(i8*, i8*) +declare double @strtod(i8*, i8**) +declare float @strtof(i8*, i8**) +declare x86_fp80 @strtold(i8*, i8**) +declare i64 @strtol(i8*, i8**, i32) +declare i64 @strtoll(i8*, i8**, i32) +declare i64 @strtoul(i8*, i8**, i32) +declare i64 @strtoull(i8*, i8**, i32) +declare i64 @strcspn(i8*, i8*) + +define double @t1(double %x) { +; CHECK: @t1 + %ret = call double @ceil(double %x) + ret double %ret +; CHECK: call double @ceil +} + +define double @t2(double %x, double %y) { +; CHECK: @t2 + %ret = call double @copysign(double %x, double %y) + ret double %ret +; CHECK: call double @copysign +} + +define double @t3(double %x) { +; CHECK: @t3 + %call = call double @cos(double %x) + ret double %call +; CHECK: call double @cos +} + +define double @t4(double %x) { +; CHECK: @t4 + %ret = call double @fabs(double %x) + ret double %ret +; CHECK: call double @fabs +} + +define double @t5(double %x) { +; CHECK: @t5 + %ret = call double @floor(double %x) + ret double %ret +; CHECK: call double @floor +} + +define i8* @t6(i8* %x) { +; CHECK: @t6 + %empty = getelementptr [1 x i8]* @empty, i32 0, i32 0 + %ret = call i8* @strcat(i8* %x, i8* %empty) + ret i8* %ret +; CHECK: call i8* @strcat +} + +define i8* @t7(i8* %x) { +; CHECK: @t7 + %empty = getelementptr [1 x i8]* @empty, i32 0, i32 0 + %ret = call i8* @strncat(i8* %x, i8* %empty, i32 1) + ret i8* %ret +; CHECK: call i8* @strncat +} + +define i8* @t8() { +; CHECK: @t8 + %x = getelementptr inbounds [13 x i8]* @.str1, i32 0, i32 0 + %ret = call i8* @strchr(i8* %x, i32 119) + ret i8* %ret +; CHECK: call i8* @strchr +} + +define i8* @t9() { +; CHECK: @t9 + %x = getelementptr inbounds [13 x i8]* @.str1, i32 0, i32 0 + %ret = call i8* @strrchr(i8* %x, i32 119) + ret i8* %ret +; CHECK: call i8* @strrchr +} + +define i32 @t10() { +; CHECK: @t10 + %x = getelementptr inbounds [4 x i8]* @.str2, i32 0, i32 0 + %y = getelementptr inbounds [4 x i8]* @.str3, i32 0, i32 0 + %ret = call i32 @strcmp(i8* %x, i8* %y) + ret i32 %ret +; CHECK: call i32 @strcmp +} + +define i32 @t11() { +; CHECK: @t11 + %x = getelementptr inbounds [4 x i8]* @.str2, i32 0, i32 0 + %y = getelementptr inbounds [4 x i8]* @.str3, i32 0, i32 0 + %ret = call i32 @strncmp(i8* %x, i8* %y, i64 3) + ret i32 %ret +; CHECK: call i32 @strncmp +} + +define i8* @t12(i8* %x) { +; CHECK: @t12 + %y = getelementptr inbounds [4 x i8]* @.str2, i32 0, i32 0 + %ret = call i8* @strcpy(i8* %x, i8* %y) + ret i8* %ret +; CHECK: call i8* @strcpy +} + +define i8* @t13(i8* %x) { +; CHECK: @t13 + %y = getelementptr inbounds [4 x i8]* @.str2, i32 0, i32 0 + %ret = call i8* @stpcpy(i8* %x, i8* %y) + ret i8* %ret +; CHECK: call i8* @stpcpy +} + +define i8* @t14(i8* %x) { +; CHECK: @t14 + %y = getelementptr inbounds [4 x i8]* @.str2, i32 0, i32 0 + %ret = call i8* @strncpy(i8* %x, i8* %y, i64 3) + ret i8* %ret +; CHECK: call i8* @strncpy +} + +define i64 @t15() { +; CHECK: @t15 + %x = getelementptr inbounds [4 x i8]* @.str2, i32 0, i32 0 + %ret = call i64 @strlen(i8* %x) + ret i64 %ret +; CHECK: call i64 @strlen +} + +define i8* @t16(i8* %x) { +; CHECK: @t16 + %y = getelementptr inbounds [1 x i8]* @.str, i32 0, i32 0 + %ret = call i8* @strpbrk(i8* %x, i8* %y) + ret i8* %ret +; CHECK: call i8* @strpbrk +} + +define i64 @t17(i8* %x) { +; CHECK: @t17 + %y = getelementptr inbounds [1 x i8]* @.str, i32 0, i32 0 + %ret = call i64 @strspn(i8* %x, i8* %y) + ret i64 %ret +; CHECK: call i64 @strspn +} + +define double @t18(i8** %y) { +; CHECK: @t18 + %x = getelementptr inbounds [6 x i8]* @.str4, i64 0, i64 0 + %ret = call double @strtod(i8* %x, i8** %y) + ret double %ret +; CHECK: call double @strtod +} + +define float @t19(i8** %y) { +; CHECK: @t19 + %x = getelementptr inbounds [6 x i8]* @.str4, i64 0, i64 0 + %ret = call float @strtof(i8* %x, i8** %y) + ret float %ret +; CHECK: call float @strtof +} + +define x86_fp80 @t20(i8** %y) { +; CHECK: @t20 + %x = getelementptr inbounds [6 x i8]* @.str4, i64 0, i64 0 + %ret = call x86_fp80 @strtold(i8* %x, i8** %y) + ret x86_fp80 %ret +; CHECK: call x86_fp80 @strtold +} + +define i64 @t21(i8** %y) { +; CHECK: @t21 + %x = getelementptr inbounds [5 x i8]* @.str5, i64 0, i64 0 + %ret = call i64 @strtol(i8* %x, i8** %y, i32 10) + ret i64 %ret +; CHECK: call i64 @strtol +} + +define i64 @t22(i8** %y) { +; CHECK: @t22 + %x = getelementptr inbounds [5 x i8]* @.str5, i64 0, i64 0 + %ret = call i64 @strtoll(i8* %x, i8** %y, i32 10) + ret i64 %ret +; CHECK: call i64 @strtoll +} + +define i64 @t23(i8** %y) { +; CHECK: @t23 + %x = getelementptr inbounds [5 x i8]* @.str5, i64 0, i64 0 + %ret = call i64 @strtoul(i8* %x, i8** %y, i32 10) + ret i64 %ret +; CHECK: call i64 @strtoul +} + +define i64 @t24(i8** %y) { +; CHECK: @t24 + %x = getelementptr inbounds [5 x i8]* @.str5, i64 0, i64 0 + %ret = call i64 @strtoull(i8* %x, i8** %y, i32 10) + ret i64 %ret +; CHECK: call i64 @strtoull +} + +define i64 @t25(i8* %y) { +; CHECK: @t25 + %x = getelementptr [1 x i8]* @empty, i32 0, i32 0 + %ret = call i64 @strcspn(i8* %x, i8* %y) + ret i64 %ret +; CHECK: call i64 @strcspn +} diff --git a/test/Transforms/InstCombine/memcmp-1.ll b/test/Transforms/InstCombine/memcmp-1.ll new file mode 100644 index 0000000000..4238c5f8fb --- /dev/null +++ b/test/Transforms/InstCombine/memcmp-1.ll @@ -0,0 +1,72 @@ +; Test that the memcmp library call simplifier works correctly. +; +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" + +@foo = constant [4 x i8] c"foo\00" +@hel = constant [4 x i8] c"hel\00" +@hello_u = constant [8 x i8] c"hello_u\00" + +declare i32 @memcmp(i8*, i8*, i32) + +; Check memcmp(mem, mem, size) -> 0. + +define i32 @test_simplify1(i8* %mem, i32 %size) { +; CHECK: @test_simplify1 + %ret = call i32 @memcmp(i8* %mem, i8* %mem, i32 %size) + ret i32 %ret +; CHECK: ret i32 0 +} + +; Check memcmp(mem1, mem2, 0) -> 0. + +define i32 @test_simplify2(i8* %mem1, i8* %mem2) { +; CHECK: @test_simplify2 + %ret = call i32 @memcmp(i8* %mem1, i8* %mem2, i32 0) + ret i32 %ret +; CHECK: ret i32 0 +} + +;; Check memcmp(mem1, mem2, 1) -> *(unsigned char*)mem1 - *(unsigned char*)mem2. + +define i32 @test_simplify3(i8* %mem1, i8* %mem2) { +; CHECK: @test_simplify3 + %ret = call i32 @memcmp(i8* %mem1, i8* %mem2, i32 1) +; CHECK: [[LOAD1:%[a-z]+]] = load i8* %mem1, align 1 +; CHECK: [[ZEXT1:%[a-z]+]] = zext i8 [[LOAD1]] to i32 +; CHECK: [[LOAD2:%[a-z]+]] = load i8* %mem2, align 1 +; CHECK: [[ZEXT2:%[a-z]+]] = zext i8 [[LOAD2]] to i32 +; CHECK: [[RET:%[a-z]+]] = sub i32 [[ZEXT1]], [[ZEXT2]] + ret i32 %ret +; CHECK: ret i32 [[RET]] +} + +; Check memcmp(mem1, mem2, size) -> cnst, where all arguments are constants. + +define i32 @test_simplify4() { +; CHECK: @test_simplify4 + %mem1 = getelementptr [4 x i8]* @hel, i32 0, i32 0 + %mem2 = getelementptr [8 x i8]* @hello_u, i32 0, i32 0 + %ret = call i32 @memcmp(i8* %mem1, i8* %mem2, i32 3) + ret i32 %ret +; CHECK: ret i32 0 +} + +define i32 @test_simplify5() { +; CHECK: @test_simplify5 + %mem1 = getelementptr [4 x i8]* @hel, i32 0, i32 0 + %mem2 = getelementptr [4 x i8]* @foo, i32 0, i32 0 + %ret = call i32 @memcmp(i8* %mem1, i8* %mem2, i32 3) + ret i32 %ret +; CHECK: ret i32 {{[0-9]+}} +} + +define i32 @test_simplify6() { +; CHECK: @test_simplify6 + %mem1 = getelementptr [4 x i8]* @foo, i32 0, i32 0 + %mem2 = getelementptr [4 x i8]* @hel, i32 0, i32 0 + %ret = call i32 @memcmp(i8* %mem1, i8* %mem2, i32 3) + ret i32 %ret +; CHECK: ret i32 {{-[0-9]+}} +} diff --git a/test/Transforms/InstCombine/memcmp-2.ll b/test/Transforms/InstCombine/memcmp-2.ll new file mode 100644 index 0000000000..3796117bc2 --- /dev/null +++ b/test/Transforms/InstCombine/memcmp-2.ll @@ -0,0 +1,17 @@ +; Test that the memcmp library call simplifier works correctly. +; +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" + +declare i32* @memcmp(i8*, i8*, i32) + +; Check that memcmp functions with the wrong prototype aren't simplified. + +define i32* @test_no_simplify1(i8* %mem, i32 %size) { +; CHECK: @test_no_simplify1 + %ret = call i32* @memcmp(i8* %mem, i8* %mem, i32 %size) +; CHECK-NEXT: call i32* @memcmp + ret i32* %ret +; CHECK-NEXT: ret i32* %ret +} diff --git a/test/Transforms/InstCombine/memcpy-1.ll b/test/Transforms/InstCombine/memcpy-1.ll new file mode 100644 index 0000000000..65b79ad03d --- /dev/null +++ b/test/Transforms/InstCombine/memcpy-1.ll @@ -0,0 +1,17 @@ +; Test that the memcpy library call simplifier works correctly. +; +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" + +declare i8* @memcpy(i8*, i8*, i32) + +; Check memcpy(mem1, mem2, size) -> llvm.memcpy(mem1, mem2, size, 1). + +define i8* @test_simplify1(i8* %mem1, i8* %mem2, i32 %size) { +; CHECK: @test_simplify1 + %ret = call i8* @memcpy(i8* %mem1, i8* %mem2, i32 %size) +; CHECK: call void @llvm.memcpy + ret i8* %ret +; CHECK: ret i8* %mem1 +} diff --git a/test/Transforms/InstCombine/memcpy-2.ll b/test/Transforms/InstCombine/memcpy-2.ll new file mode 100644 index 0000000000..4a8a02018f --- /dev/null +++ b/test/Transforms/InstCombine/memcpy-2.ll @@ -0,0 +1,17 @@ +; Test that the memcpy library call simplifier works correctly. +; +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" + +declare i8 @memcpy(i8*, i8*, i32) + +; Check that memcpy functions with the wrong prototype aren't simplified. + +define i8 @test_no_simplify1(i8* %mem1, i8* %mem2, i32 %size) { +; CHECK: @test_no_simplify1 + %ret = call i8 @memcpy(i8* %mem1, i8* %mem2, i32 %size) +; CHECK: call i8 @memcpy + ret i8 %ret +; CHECK: ret i8 %ret +} diff --git a/test/Transforms/InstCombine/memmove-1.ll b/test/Transforms/InstCombine/memmove-1.ll new file mode 100644 index 0000000000..53f2f116c7 --- /dev/null +++ b/test/Transforms/InstCombine/memmove-1.ll @@ -0,0 +1,17 @@ +; Test that the memmove library call simplifier works correctly. +; +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" + +declare i8* @memmove(i8*, i8*, i32) + +; Check memmove(mem1, mem2, size) -> llvm.memmove(mem1, mem2, size, 1). + +define i8* @test_simplify1(i8* %mem1, i8* %mem2, i32 %size) { +; CHECK: @test_simplify1 + %ret = call i8* @memmove(i8* %mem1, i8* %mem2, i32 %size) +; CHECK: call void @llvm.memmove + ret i8* %ret +; CHECK: ret i8* %mem1 +} diff --git a/test/Transforms/InstCombine/memmove-2.ll b/test/Transforms/InstCombine/memmove-2.ll new file mode 100644 index 0000000000..23887bce31 --- /dev/null +++ b/test/Transforms/InstCombine/memmove-2.ll @@ -0,0 +1,17 @@ +; Test that the memmove library call simplifier works correctly. +; +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" + +declare i8 @memmove(i8*, i8*, i32) + +; Check that memmove functions with the wrong prototype aren't simplified. + +define i8 @test_no_simplify1(i8* %mem1, i8* %mem2, i32 %size) { +; CHECK: @test_no_simplify1 + %ret = call i8 @memmove(i8* %mem1, i8* %mem2, i32 %size) +; CHECK: call i8 @memmove + ret i8 %ret +; CHECK: ret i8 %ret +} diff --git a/test/Transforms/InstCombine/memset-1.ll b/test/Transforms/InstCombine/memset-1.ll new file mode 100644 index 0000000000..48b433e137 --- /dev/null +++ b/test/Transforms/InstCombine/memset-1.ll @@ -0,0 +1,17 @@ +; Test that the memset library call simplifier works correctly. +; +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" + +declare i8* @memset(i8*, i32, i32) + +; Check memset(mem1, val, size) -> llvm.memset(mem1, val, size, 1). + +define i8* @test_simplify1(i8* %mem, i32 %val, i32 %size) { +; CHECK: @test_simplify1 + %ret = call i8* @memset(i8* %mem, i32 %val, i32 %size) +; CHECK: call void @llvm.memset + ret i8* %ret +; CHECK: ret i8* %mem +} diff --git a/test/Transforms/InstCombine/memset-2.ll b/test/Transforms/InstCombine/memset-2.ll new file mode 100644 index 0000000000..8a9033302d --- /dev/null +++ b/test/Transforms/InstCombine/memset-2.ll @@ -0,0 +1,17 @@ +; Test that the memset library call simplifier works correctly. +; +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" + +declare i8 @memset(i8*, i32, i32) + +; Check that memset functions with the wrong prototype aren't simplified. + +define i8 @test_no_simplify1(i8* %mem, i32 %val, i32 %size) { +; CHECK: @test_no_simplify1 + %ret = call i8 @memset(i8* %mem, i32 %val, i32 %size) +; CHECK: call i8 @memset + ret i8 %ret +; CHECK: ret i8 %ret +} diff --git a/test/Transforms/InstCombine/strcspn-1.ll b/test/Transforms/InstCombine/strcspn-1.ll new file mode 100644 index 0000000000..60fad897b2 --- /dev/null +++ b/test/Transforms/InstCombine/strcspn-1.ll @@ -0,0 +1,57 @@ +; Test that the strcspn library call simplifier works correctly. +; +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +@abcba = constant [6 x i8] c"abcba\00" +@abc = constant [4 x i8] c"abc\00" +@null = constant [1 x i8] zeroinitializer + +declare i64 @strcspn(i8*, i8*) + +; Check strcspn(s, "") -> strlen(s). + +define i64 @test_simplify1(i8* %str) { +; CHECK: @test_simplify1 + %pat = getelementptr [1 x i8]* @null, i32 0, i32 0 + + %ret = call i64 @strcspn(i8* %str, i8* %pat) +; CHECK-NEXT: [[VAR:%[a-z]+]] = call i64 @strlen(i8* %str) + ret i64 %ret +; CHECK-NEXT: ret i64 [[VAR]] +} + +; Check strcspn("", s) -> 0. + +define i64 @test_simplify2(i8* %pat) { +; CHECK: @test_simplify2 + %str = getelementptr [1 x i8]* @null, i32 0, i32 0 + + %ret = call i64 @strcspn(i8* %str, i8* %pat) + ret i64 %ret +; CHECK-NEXT: ret i64 0 +} + +; Check strcspn(s1, s2), where s1 and s2 are constants. + +define i64 @test_simplify3() { +; CHECK: @test_simplify3 + %str = getelementptr [6 x i8]* @abcba, i32 0, i32 0 + %pat = getelementptr [4 x i8]* @abc, i32 0, i32 0 + + %ret = call i64 @strcspn(i8* %str, i8* %pat) + ret i64 %ret +; CHECK-NEXT: ret i64 0 +} + +; Check cases that shouldn't be simplified. + +define i64 @test_no_simplify1(i8* %str, i8* %pat) { +; CHECK: @test_no_simplify1 + + %ret = call i64 @strcspn(i8* %str, i8* %pat) +; CHECK-NEXT: %ret = call i64 @strcspn(i8* %str, i8* %pat) + ret i64 %ret +; CHECK-NEXT: ret i64 %ret +} diff --git a/test/Transforms/InstCombine/strcspn-2.ll b/test/Transforms/InstCombine/strcspn-2.ll new file mode 100644 index 0000000000..4e2393686c --- /dev/null +++ b/test/Transforms/InstCombine/strcspn-2.ll @@ -0,0 +1,21 @@ +; Test that the strcspn library call simplifier works correctly. +; +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +@null = constant [1 x i8] zeroinitializer + +declare double @strcspn(i8*, i8*) + +; Check that strcspn functions with the wrong prototype aren't simplified. + +define double @test_no_simplify1(i8* %pat) { +; CHECK: @test_no_simplify1 + %str = getelementptr [1 x i8]* @null, i32 0, i32 0 + + %ret = call double @strcspn(i8* %str, i8* %pat) +; CHECK-NEXT: call double @strcspn + ret double %ret +; CHECK-NEXT: ret double %ret +} diff --git a/test/Transforms/InstCombine/strncmp-1.ll b/test/Transforms/InstCombine/strncmp-1.ll index 48b26d1a5f..187c2fa50e 100644 --- a/test/Transforms/InstCombine/strncmp-1.ll +++ b/test/Transforms/InstCombine/strncmp-1.ll @@ -67,12 +67,14 @@ define i32 @test5() { } ; strncmp(x,y,1) -> memcmp(x,y,1) -; TODO: Once the memcmp simplifier gets moved into the instcombine pass -; the following memcmp will be folded into two loads and a subtract. define i32 @test6(i8* %str1, i8* %str2) { ; CHECK: @test6 -; CHECK: call i32 @memcmp -; CHECK: ret i32 %memcmp +; CHECK: [[LOAD1:%[a-z]+]] = load i8* %str1, align 1 +; CHECK: [[ZEXT1:%[a-z]+]] = zext i8 [[LOAD1]] to i32 +; CHECK: [[LOAD2:%[a-z]+]] = load i8* %str2, align 1 +; CHECK: [[ZEXT2:%[a-z]+]] = zext i8 [[LOAD2]] to i32 +; CHECK: [[RET:%[a-z]+]] = sub i32 [[ZEXT1]], [[ZEXT2]] +; CHECK: ret i32 [[RET]] %temp1 = call i32 @strncmp(i8* %str1, i8* %str2, i32 1) ret i32 %temp1 diff --git a/test/Transforms/InstCombine/strstr-1.ll b/test/Transforms/InstCombine/strstr-1.ll new file mode 100644 index 0000000000..81f5271874 --- /dev/null +++ b/test/Transforms/InstCombine/strstr-1.ll @@ -0,0 +1,65 @@ +; Test that the strstr library call simplifier works correctly. +; +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +@.str = private constant [1 x i8] zeroinitializer +@.str1 = private constant [2 x i8] c"a\00" +@.str2 = private constant [6 x i8] c"abcde\00" +@.str3 = private constant [4 x i8] c"bcd\00" + +declare i8* @strstr(i8*, i8*) + +; Check strstr(str, "") -> str. + +define i8* @test_simplify1(i8* %str) { +; CHECK: @test_simplify1 + %pat = getelementptr inbounds [1 x i8]* @.str, i32 0, i32 0 + %ret = call i8* @strstr(i8* %str, i8* %pat) + ret i8* %ret +; CHECK-NEXT: ret i8* %str +} + +; Check strstr(str, "a") -> strchr(str, 'a'). + +define i8* @test_simplify2(i8* %str) { +; CHECK: @test_simplify2 + %pat = getelementptr inbounds [2 x i8]* @.str1, i32 0, i32 0 + %ret = call i8* @strstr(i8* %str, i8* %pat) + ret i8* %ret +; CHECK-NEXT: @strchr(i8* %str, i32 97) +} + +; Check strstr("abcde", "bcd") -> "abcde" + 1. + +define i8* @test_simplify3() { +; CHECK: @test_simplify3 + %str = getelementptr inbounds [6 x i8]* @.str2, i32 0, i32 0 + %pat = getelementptr inbounds [4 x i8]* @.str3, i32 0, i32 0 + %ret = call i8* @strstr(i8* %str, i8* %pat) + ret i8* %ret +; CHECK-NEXT: getelementptr inbounds ([6 x i8]* @.str2, i64 0, i64 1) +} + +; Check strstr(str, str) -> str. + +define i8* @test_simplify4(i8* %str) { +; CHECK: @test_simplify4 + %ret = call i8* @strstr(i8* %str, i8* %str) + ret i8* %ret +; CHECK-NEXT: ret i8* %str +} + +; Check strstr(str, pat) == str -> strncmp(str, pat, strlen(str)) == 0. + +define i1 @test_simplify5(i8* %str, i8* %pat) { +; CHECK: @test_simplify5 + %ret = call i8* @strstr(i8* %str, i8* %pat) + %cmp = icmp eq i8* %ret, %str + ret i1 %cmp +; CHECK: [[LEN:%[a-z]+]] = call {{i[0-9]+}} @strlen(i8* %pat) +; CHECK: [[NCMP:%[a-z]+]] = call {{i[0-9]+}} @strncmp(i8* %str, i8* %pat, {{i[0-9]+}} [[LEN]]) +; CHECK: icmp eq {{i[0-9]+}} [[NCMP]], 0 +; CHECK: ret i1 +} diff --git a/test/Transforms/InstCombine/strstr-2.ll b/test/Transforms/InstCombine/strstr-2.ll new file mode 100644 index 0000000000..5092f9b4f8 --- /dev/null +++ b/test/Transforms/InstCombine/strstr-2.ll @@ -0,0 +1,18 @@ +; Test that the strstr library call simplifier works correctly. +; +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +@null = private constant [1 x i8] zeroinitializer + +declare i8 @strstr(i8*, i8*) + +define i8 @test_no_simplify1(i8* %str) { +; CHECK: @test_no_simplify1 + %pat = getelementptr inbounds [1 x i8]* @null, i32 0, i32 0 + %ret = call i8 @strstr(i8* %str, i8* %pat) +; CHECK-NEXT: call i8 @strstr + ret i8 %ret +; CHECK-NEXT: ret i8 %ret +} diff --git a/test/Transforms/LoopVectorize/runtime-check.ll b/test/Transforms/LoopVectorize/runtime-check.ll new file mode 100644 index 0000000000..23933cf7c7 --- /dev/null +++ b/test/Transforms/LoopVectorize/runtime-check.ll @@ -0,0 +1,36 @@ +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.9.0" + +; Make sure we vectorize this loop: +; int foo(float *a, float *b, int n) { +; for (int i=0; i<n; ++i) +; a[i] = b[i] * 3; +; } + +;CHECK: load <4 x float> +define i32 @foo(float* nocapture %a, float* nocapture %b, i32 %n) nounwind uwtable ssp { +entry: + %cmp6 = icmp sgt i32 %n, 0 + br i1 %cmp6, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv + %0 = load float* %arrayidx, align 4, !tbaa !0 + %mul = fmul float %0, 3.000000e+00 + %arrayidx2 = getelementptr inbounds float* %a, i64 %indvars.iv + store float %mul, float* %arrayidx2, align 4, !tbaa !0 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret i32 undef +} + +!0 = metadata !{metadata !"float", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA"} diff --git a/test/Transforms/SCCP/loadtest.ll b/test/Transforms/SCCP/loadtest.ll index add2af483f..dd1dba6914 100644 --- a/test/Transforms/SCCP/loadtest.ll +++ b/test/Transforms/SCCP/loadtest.ll @@ -1,8 +1,9 @@ ; This test makes sure that these instructions are properly constant propagated. -target datalayout = "e-p:32:32" +; RUN: opt < %s -default-data-layout="e-p:32:32" -sccp -S | FileCheck %s +; RUN: opt < %s -default-data-layout="E-p:32:32" -sccp -S | FileCheck %s -; RUN: opt < %s -sccp -S | not grep load +; CHECK-NOT: load @X = constant i32 42 ; <i32*> [#uses=1] diff --git a/test/Transforms/SimplifyLibCalls/StrSpn.ll b/test/Transforms/SimplifyLibCalls/StrSpn.ll deleted file mode 100644 index 2660ee9800..0000000000 --- a/test/Transforms/SimplifyLibCalls/StrSpn.ll +++ /dev/null @@ -1,25 +0,0 @@ -; RUN: opt < %s -simplify-libcalls -S | FileCheck %s - -target datalayout = "-p:64:64:64" - -@abcba = constant [6 x i8] c"abcba\00" -@abc = constant [4 x i8] c"abc\00" -@null = constant [1 x i8] zeroinitializer - -declare i64 @strcspn(i8*, i8*) - -define i64 @testcspn(i8* %s1, i8* %s2) { - %abcba_p = getelementptr [6 x i8]* @abcba, i32 0, i32 0 - %abc_p = getelementptr [4 x i8]* @abc, i32 0, i32 0 - %null_p = getelementptr [1 x i8]* @null, i32 0, i32 0 - %test1 = call i64 @strcspn(i8* %s1, i8* %null_p) -; CHECK: call i64 @strlen(i8* %s1) - %test2 = call i64 @strcspn(i8* %null_p, i8* %s2) - %test3 = call i64 @strcspn(i8* %abcba_p, i8* %abc_p) -; CHECK-NOT: call i64 @strcspn - %test4 = call i64 @strcspn(i8* %s1, i8* %s2) -; CHECK: call i64 @strcspn(i8* %s1, i8* %s2) - %add0 = add i64 %test1, %test3 -; CHECK: add i64 %{{.+}}, 0 - ret i64 %add0 -} diff --git a/test/Transforms/SimplifyLibCalls/StrStr.ll b/test/Transforms/SimplifyLibCalls/StrStr.ll deleted file mode 100644 index eefd2e8006..0000000000 --- a/test/Transforms/SimplifyLibCalls/StrStr.ll +++ /dev/null @@ -1,60 +0,0 @@ -; RUN: opt < %s -simplify-libcalls -S | FileCheck %s -; PR5783 - -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" -target triple = "i386-apple-darwin9.0" - -@.str = private constant [1 x i8] zeroinitializer ; <[1 x i8]*> [#uses=1] -@.str1 = private constant [2 x i8] c"a\00" ; <[2 x i8]*> [#uses=1] -@.str2 = private constant [6 x i8] c"abcde\00" ; <[6 x i8]*> [#uses=1] -@.str3 = private constant [4 x i8] c"bcd\00" ; <[4 x i8]*> [#uses=1] - -define i8* @test1(i8* %P) nounwind readonly { -entry: - %call = tail call i8* @strstr(i8* %P, i8* getelementptr inbounds ([1 x i8]* @.str, i32 0, i32 0)) nounwind ; <i8*> [#uses=1] - ret i8* %call -; strstr(P, "") -> P -; CHECK: @test1 -; CHECK: ret i8* %P -} - -declare i8* @strstr(i8*, i8* nocapture) nounwind readonly - -define i8* @test2(i8* %P) nounwind readonly { -entry: - %call = tail call i8* @strstr(i8* %P, i8* getelementptr inbounds ([2 x i8]* @.str1, i32 0, i32 0)) nounwind ; <i8*> [#uses=1] - ret i8* %call -; strstr(P, "a") -> strchr(P, 'a') -; CHECK: @test2 -; CHECK: @strchr(i8* %P, i32 97) -} - -define i8* @test3(i8* nocapture %P) nounwind readonly { -entry: - %call = tail call i8* @strstr(i8* getelementptr inbounds ([6 x i8]* @.str2, i32 0, i32 0), i8* getelementptr inbounds ([4 x i8]* @.str3, i32 0, i32 0)) nounwind ; <i8*> [#uses=1] - ret i8* %call -; strstr("abcde", "bcd") -> "abcde"+1 -; CHECK: @test3 -; CHECK: getelementptr inbounds ([6 x i8]* @.str2, i32 0, i64 1) -} - -define i8* @test4(i8* %P) nounwind readonly { -entry: - %call = tail call i8* @strstr(i8* %P, i8* %P) nounwind ; <i8*> [#uses=1] - ret i8* %call -; strstr(P, P) -> P -; CHECK: @test4 -; CHECK: ret i8* %P -} - -define i1 @test5(i8* %P, i8* %Q) nounwind readonly { -entry: - %call = tail call i8* @strstr(i8* %P, i8* %Q) nounwind ; <i8*> [#uses=1] - %cmp = icmp eq i8* %call, %P - ret i1 %cmp -; CHECK: @test5 -; CHECK: [[LEN:%[a-z]+]] = call {{i[0-9]+}} @strlen(i8* %Q) -; CHECK: [[NCMP:%[a-z]+]] = call {{i[0-9]+}} @strncmp(i8* %P, i8* %Q, {{i[0-9]+}} [[LEN]]) -; CHECK: icmp eq {{i[0-9]+}} [[NCMP]], 0 -; CHECK: ret i1 -} diff --git a/test/Transforms/SimplifyLibCalls/memcmp.ll b/test/Transforms/SimplifyLibCalls/memcmp.ll deleted file mode 100644 index 6ca4dc97a1..0000000000 --- a/test/Transforms/SimplifyLibCalls/memcmp.ll +++ /dev/null @@ -1,35 +0,0 @@ -; Test that the memcmpOptimizer works correctly -; RUN: opt < %s -simplify-libcalls -S | FileCheck %s - -@h = constant [2 x i8] c"h\00" ; <[2 x i8]*> [#uses=0] -@hel = constant [4 x i8] c"hel\00" ; <[4 x i8]*> [#uses=0] -@hello_u = constant [8 x i8] c"hello_u\00" ; <[8 x i8]*> [#uses=0] - -declare i32 @memcmp(i8*, i8*, i32) - -define void @test(i8* %P, i8* %Q, i32 %N, i32* %IP, i1* %BP) { - %A = call i32 @memcmp( i8* %P, i8* %P, i32 %N ) ; <i32> [#uses=1] -; CHECK-NOT: call {{.*}} memcmp -; CHECK: store volatile - store volatile i32 %A, i32* %IP - %B = call i32 @memcmp( i8* %P, i8* %Q, i32 0 ) ; <i32> [#uses=1] -; CHECK-NOT: call {{.*}} memcmp -; CHECK: store volatile - store volatile i32 %B, i32* %IP - %C = call i32 @memcmp( i8* %P, i8* %Q, i32 1 ) ; <i32> [#uses=1] -; CHECK: load -; CHECK: zext -; CHECK: load -; CHECK: zext -; CHECK: sub -; CHECK: store volatile - store volatile i32 %C, i32* %IP - %F = call i32 @memcmp(i8* getelementptr ([4 x i8]* @hel, i32 0, i32 0), - i8* getelementptr ([8 x i8]* @hello_u, i32 0, i32 0), - i32 3) -; CHECK-NOT: call {{.*}} memcmp -; CHECK: store volatile - store volatile i32 %F, i32* %IP - ret void -} - diff --git a/test/Transforms/SimplifyLibCalls/memmove.ll b/test/Transforms/SimplifyLibCalls/memmove.ll deleted file mode 100644 index 5aaeeeb024..0000000000 --- a/test/Transforms/SimplifyLibCalls/memmove.ll +++ /dev/null @@ -1,12 +0,0 @@ -; RUN: opt < %s -simplify-libcalls -S | grep "llvm.memmove" -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" -target triple = "i686-pc-linux-gnu" - -define i8* @test(i8* %a, i8* %b, i32 %x) { -entry: - %call = call i8* @memmove(i8* %a, i8* %b, i32 %x ) - ret i8* %call -} - -declare i8* @memmove(i8*,i8*,i32) - diff --git a/test/Transforms/SimplifyLibCalls/memset-64.ll b/test/Transforms/SimplifyLibCalls/memset-64.ll deleted file mode 100644 index 92412dee71..0000000000 --- a/test/Transforms/SimplifyLibCalls/memset-64.ll +++ /dev/null @@ -1,12 +0,0 @@ -; RUN: opt < %s -simplify-libcalls -S | grep "llvm.memset" -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" -target triple = "x86_64-pc-linux-gnu" - -define void @a(i8* %x) nounwind { -entry: - %call = call i8* @memset(i8* %x, i32 1, i64 100) ; <i8*> [#uses=0] - ret void -} - -declare i8* @memset(i8*, i32, i64) - diff --git a/test/Transforms/SimplifyLibCalls/memset.ll b/test/Transforms/SimplifyLibCalls/memset.ll deleted file mode 100644 index 853215a4d2..0000000000 --- a/test/Transforms/SimplifyLibCalls/memset.ll +++ /dev/null @@ -1,12 +0,0 @@ -; RUN: opt < %s -simplify-libcalls -S | grep "llvm.memset" -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" -target triple = "i686-pc-linux-gnu" - -define i8* @test(i8* %a, i32 %b, i32 %x) { -entry: - %call = call i8* @memset(i8* %a, i32 %b, i32 %x ) - ret i8* %call -} - -declare i8* @memset(i8*,i32,i32) - diff --git a/unittests/ADT/APFloatTest.cpp b/unittests/ADT/APFloatTest.cpp index 48d5d83019..117b8204b9 100644 --- a/unittests/ADT/APFloatTest.cpp +++ b/unittests/ADT/APFloatTest.cpp @@ -635,6 +635,12 @@ TEST(APFloatTest, exactInverse) { EXPECT_TRUE(inv.bitwiseIsEqual(APFloat(0.5))); EXPECT_TRUE(APFloat(2.0f).getExactInverse(&inv)); EXPECT_TRUE(inv.bitwiseIsEqual(APFloat(0.5f))); + EXPECT_TRUE(APFloat(APFloat::IEEEquad, "2.0").getExactInverse(&inv)); + EXPECT_TRUE(inv.bitwiseIsEqual(APFloat(APFloat::IEEEquad, "0.5"))); + EXPECT_TRUE(APFloat(APFloat::PPCDoubleDouble, "2.0").getExactInverse(&inv)); + EXPECT_TRUE(inv.bitwiseIsEqual(APFloat(APFloat::PPCDoubleDouble, "0.5"))); + EXPECT_TRUE(APFloat(APFloat::x87DoubleExtended, "2.0").getExactInverse(&inv)); + EXPECT_TRUE(inv.bitwiseIsEqual(APFloat(APFloat::x87DoubleExtended, "0.5"))); // FLT_MIN EXPECT_TRUE(APFloat(1.17549435e-38f).getExactInverse(&inv)); diff --git a/unittests/Support/AlignOfTest.cpp b/unittests/Support/AlignOfTest.cpp index f2d11708a2..f01e660939 100644 --- a/unittests/Support/AlignOfTest.cpp +++ b/unittests/Support/AlignOfTest.cpp @@ -66,6 +66,17 @@ struct V6 : S1 { virtual ~V6(); }; struct V7 : virtual V2, virtual V6 { virtual ~V7(); }; struct V8 : V5, virtual V6, V7 { double zz; virtual ~V8(); }; +double S6::f() { return 0.0; } +float D2::g() { return 0.0f; } +V1::~V1() {} +V2::~V2() {} +V3::~V3() {} +V4::~V4() {} +V5::~V5() {} +V6::~V6() {} +V7::~V7() {} +V8::~V8() {} + // Ensure alignment is a compile-time constant. char LLVM_ATTRIBUTE_UNUSED test_arr1 [AlignOf<char>::Alignment > 0] diff --git a/utils/TableGen/CodeEmitterGen.cpp b/utils/TableGen/CodeEmitterGen.cpp index e60aec9568..3e4f626d48 100644 --- a/utils/TableGen/CodeEmitterGen.cpp +++ b/utils/TableGen/CodeEmitterGen.cpp @@ -134,10 +134,13 @@ AddCodeToMergeInOperand(Record *R, BitsInit *BI, const std::string &VarName, assert(!CGI.Operands.isFlatOperandNotEmitted(OpIdx) && "Explicitly used operand also marked as not emitted!"); } else { + unsigned NumberOps = CGI.Operands.size(); /// If this operand is not supposed to be emitted by the /// generated emitter, skip it. - while (CGI.Operands.isFlatOperandNotEmitted(NumberedOp)) + while (NumberedOp < NumberOps && + CGI.Operands.isFlatOperandNotEmitted(NumberedOp)) ++NumberedOp; + OpIdx = NumberedOp++; } diff --git a/utils/lldbDataFormatters.py b/utils/lldbDataFormatters.py index 18b407a02a..1baf398aa5 100644 --- a/utils/lldbDataFormatters.py +++ b/utils/lldbDataFormatters.py @@ -2,6 +2,7 @@ Load into LLDB with: script import lldbDataFormatters type synthetic add -x "^llvm::SmallVectorImpl<.+>$" -l lldbDataFormatters.SmallVectorSynthProvider +type synthetic add -x "^llvm::SmallVector<.+,.+>$" -l lldbDataFormatters.SmallVectorSynthProvider """ # Pretty printer for llvm::SmallVector/llvm::SmallVectorImpl @@ -32,22 +33,15 @@ class SmallVectorSynthProvider: return self.begin.CreateChildAtOffset('['+str(index)+']', offset, self.data_type) - def get_type_from_name(self): - import re - name = self.valobj.GetType().GetName() - # This class works with both SmallVectors and SmallVectorImpls. - res = re.match("^(llvm::)?SmallVectorImpl<(.+)>$", name) - if res: - return res.group(2) - res = re.match("^(llvm::)?SmallVector<(.+), \d+>$", name) - if res: - return res.group(2) - return None - def update(self): self.begin = self.valobj.GetChildMemberWithName('BeginX') self.end = self.valobj.GetChildMemberWithName('EndX') - data_type = self.get_type_from_name() - # FIXME: this sometimes returns an invalid type. - self.data_type = self.valobj.GetTarget().FindFirstType(data_type) + the_type = self.valobj.GetType() + # If this is a reference type we have to dereference it to get to the + # template parameter. + if the_type.IsReferenceType(): + the_type = the_type.GetDereferencedType() + + self.data_type = the_type.GetTemplateArgumentType(0) self.type_size = self.data_type.GetByteSize() + assert self.type_size != 0 |