diff options
77 files changed, 3419 insertions, 934 deletions
diff --git a/docs/ReleaseNotes.html b/docs/ReleaseNotes.html index 45a9cc5dec..fc3a8b71bd 100644 --- a/docs/ReleaseNotes.html +++ b/docs/ReleaseNotes.html @@ -473,15 +473,31 @@ Release Notes</a>.</h1> <b>-mllvm -force-vector-width=4</b>. The default value is <b>0</b> which means auto-select. <br/> - We can now vectorize this code: + We can now vectorize this function: <pre class="doc_code"> - for (i=0; i<n; i++) { - a[i] = b[i+1] + c[i+3] + i; - sum += d[i]; + unsigned sum_arrays(int *A, int *B, int start, int end) { + unsigned sum = 0; + for (int i = start; i < end; ++i) + sum += A[i] + B[i] + i; + + return sum; } </pre> + We vectorize under the following loops: + <ul> + <li>The inner most loops must have a single basic block.</li> + <li>The number of iterations are known before the loop starts to execute.</li> + <li>The loop counter needs to be incrimented by one.</li> + <li>The loop trip count <b>can</b> be a variable.</li> + <li>Loops do <b>not</b> need to start at zero.</li> + <li>The induction variable can be used inside the loop.</li> + <li>Loop reductions are supported.</li> + <li>Arrays with affine access pattern do <b>not</b> need to be marked as 'noalias' and are checked at runtime.</li> + <li>...</li> + </ul> + </p> <p>SROA - We've re-written SROA to be significantly more powerful. diff --git a/include/llvm/MC/MCExpr.h b/include/llvm/MC/MCExpr.h index 4c10e5114a..00eef270d6 100644 --- a/include/llvm/MC/MCExpr.h +++ b/include/llvm/MC/MCExpr.h @@ -167,6 +167,7 @@ public: VK_ARM_TPOFF, VK_ARM_GOTTPOFF, VK_ARM_TARGET1, + VK_ARM_TARGET2, VK_PPC_TOC, // TOC base VK_PPC_TOC_ENTRY, // TOC entry diff --git a/include/llvm/Target/TargetLibraryInfo.h b/include/llvm/Target/TargetLibraryInfo.h index 2a0a43229f..a2c97d782e 100644 --- a/include/llvm/Target/TargetLibraryInfo.h +++ b/include/llvm/Target/TargetLibraryInfo.h @@ -276,12 +276,18 @@ namespace llvm { sqrtf, /// long double sqrtl(long double x); sqrtl, + /// char *stpcpy(char *s1, const char *s2); + stpcpy, /// char *strcat(char *s1, const char *s2); strcat, /// char *strchr(const char *s, int c); strchr, + /// int strcmp(const char *s1, const char *s2); + strcmp, /// char *strcpy(char *s1, const char *s2); strcpy, + /// size_t strcspn(const char *s1, const char *s2); + strcspn, /// char *strdup(const char *s1); strdup, /// size_t strlen(const char *s); @@ -296,6 +302,29 @@ namespace llvm { strndup, /// size_t strnlen(const char *s, size_t maxlen); strnlen, + /// char *strpbrk(const char *s1, const char *s2); + strpbrk, + /// char *strrchr(const char *s, int c); + strrchr, + /// size_t strspn(const char *s1, const char *s2); + strspn, + /// char *strstr(const char *s1, const char *s2); + strstr, + /// double strtod(const char *nptr, char **endptr); + strtod, + /// float strtof(const char *nptr, char **endptr); + strtof, + /// long int strtol(const char *nptr, char **endptr, int base); + strtol, + /// long double strtold(const char *nptr, char **endptr); + strtold, + /// long long int strtoll(const char *nptr, char **endptr, int base); + strtoll, + /// unsigned long int strtoul(const char *nptr, char **endptr, int base); + strtoul, + /// unsigned long long int strtoull(const char *nptr, char **endptr, + /// int base); + strtoull, /// double tan(double x); tan, /// float tanf(float x); diff --git a/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/include/llvm/Transforms/Utils/SimplifyLibCalls.h index 5db2d00181..fde452bca2 100644 --- a/include/llvm/Transforms/Utils/SimplifyLibCalls.h +++ b/include/llvm/Transforms/Utils/SimplifyLibCalls.h @@ -19,6 +19,7 @@ namespace llvm { class Value; class CallInst; class DataLayout; + class Instruction; class TargetLibraryInfo; class LibCallSimplifierImpl; @@ -35,8 +36,16 @@ namespace llvm { /// optimizeCall - Take the given call instruction and return a more /// optimal value to replace the instruction with or 0 if a more - /// optimal form can't be found. + /// optimal form can't be found. Note that the returned value may + /// be equal to the instruction being optimized. In this case all + /// other instructions that use the given instruction were modified + /// and the given instruction is dead. Value *optimizeCall(CallInst *CI); + + /// replaceAllUsesWith - This method is used when the library call + /// simplifier needs to replace instructions other than the library + /// call being modified. + virtual void replaceAllUsesWith(Instruction *I, Value *With) const; }; } // End llvm namespace diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 5cac8ca3ba..91a5b84e8a 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -292,7 +292,10 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, unsigned IntBytes = unsigned(CI->getBitWidth()/8); for (unsigned i = 0; i != BytesLeft && ByteOffset != IntBytes; ++i) { - CurPtr[i] = (unsigned char)(Val >> (ByteOffset * 8)); + int n = ByteOffset; + if (!TD.isLittleEndian()) + n = IntBytes - n - 1; + CurPtr[i] = (unsigned char)(Val >> (n * 8)); ++ByteOffset; } return true; @@ -442,10 +445,19 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, BytesLoaded, TD)) return 0; - APInt ResultVal = APInt(IntType->getBitWidth(), RawBytes[BytesLoaded-1]); - for (unsigned i = 1; i != BytesLoaded; ++i) { - ResultVal <<= 8; - ResultVal |= RawBytes[BytesLoaded-1-i]; + APInt ResultVal = APInt(IntType->getBitWidth(), 0); + if (TD.isLittleEndian()) { + ResultVal = RawBytes[BytesLoaded - 1]; + for (unsigned i = 1; i != BytesLoaded; ++i) { + ResultVal <<= 8; + ResultVal |= RawBytes[BytesLoaded-1-i]; + } + } else { + ResultVal = RawBytes[0]; + for (unsigned i = 1; i != BytesLoaded; ++i) { + ResultVal <<= 8; + ResultVal |= RawBytes[i]; + } } return ConstantInt::get(IntType->getContext(), ResultVal); @@ -521,10 +533,8 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, } } - // Try hard to fold loads from bitcasted strange and non-type-safe things. We - // currently don't do any of this for big endian systems. It can be - // generalized in the future if someone is interested. - if (TD && TD->isLittleEndian()) + // Try hard to fold loads from bitcasted strange and non-type-safe things. + if (TD) return FoldReinterpretLoadFromConstPtr(CE, *TD); return 0; } diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 65bc4af99e..4e75d892e5 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -146,6 +146,11 @@ void LiveIntervals::print(raw_ostream &OS, const Module* ) const { OS << PrintReg(Reg) << " = " << getInterval(Reg) << '\n'; } + OS << "RegMasks:"; + for (unsigned i = 0, e = RegMaskSlots.size(); i != e; ++i) + OS << ' ' << RegMaskSlots[i]; + OS << '\n'; + printInstrs(OS); } @@ -1257,10 +1262,15 @@ private: SmallVectorImpl<SlotIndex>::iterator RI = std::lower_bound(LIS.RegMaskSlots.begin(), LIS.RegMaskSlots.end(), OldIdx); - assert(*RI == OldIdx && "No RegMask at OldIdx."); - *RI = NewIdx; - assert(*prior(RI) < *RI && *RI < *next(RI) && - "RegSlots out of order. Did you move one call across another?"); + assert(RI != LIS.RegMaskSlots.end() && *RI == OldIdx.getRegSlot() && + "No RegMask at OldIdx."); + *RI = NewIdx.getRegSlot(); + assert((RI == LIS.RegMaskSlots.begin() || + SlotIndex::isEarlierInstr(*llvm::prior(RI), *RI)) && + "Cannot move regmask instruction above another call"); + assert((llvm::next(RI) == LIS.RegMaskSlots.end() || + SlotIndex::isEarlierInstr(*RI, *llvm::next(RI))) && + "Cannot move regmask instruction below another call"); } // Return the last use of reg between NewIdx and OldIdx. diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index de16932c06..a4817d09c0 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -1583,6 +1583,7 @@ const char *ConvergingScheduler::getReasonStr( case NextDefUse: return "DEF-USE "; case NodeOrder: return "ORDER "; }; + llvm_unreachable("Unknown reason!"); } void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand, diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp index ffa79761f2..e0336342d6 100644 --- a/lib/MC/MCExpr.cpp +++ b/lib/MC/MCExpr.cpp @@ -60,7 +60,8 @@ void MCExpr::print(raw_ostream &OS) const { SRE.getKind() == MCSymbolRefExpr::VK_ARM_GOTOFF || SRE.getKind() == MCSymbolRefExpr::VK_ARM_TPOFF || SRE.getKind() == MCSymbolRefExpr::VK_ARM_GOTTPOFF || - SRE.getKind() == MCSymbolRefExpr::VK_ARM_TARGET1) + SRE.getKind() == MCSymbolRefExpr::VK_ARM_TARGET1 || + SRE.getKind() == MCSymbolRefExpr::VK_ARM_TARGET2) OS << MCSymbolRefExpr::getVariantKindName(SRE.getKind()); else if (SRE.getKind() != MCSymbolRefExpr::VK_None && SRE.getKind() != MCSymbolRefExpr::VK_PPC_DARWIN_HA16 && @@ -199,6 +200,7 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) { case VK_ARM_GOTTPOFF: return "(gottpoff)"; case VK_ARM_TLSGD: return "(tlsgd)"; case VK_ARM_TARGET1: return "(target1)"; + case VK_ARM_TARGET2: return "(target2)"; case VK_PPC_TOC: return "tocbase"; case VK_PPC_TOC_ENTRY: return "toc"; case VK_PPC_DARWIN_HA16: return "ha16"; diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index 43c68f4d1d..7e8b4a3d0d 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -3553,11 +3553,6 @@ void APFloat::toString(SmallVectorImpl<char> &Str, } bool APFloat::getExactInverse(APFloat *inv) const { - // We can only guarantee the existence of an exact inverse for IEEE floats. - if (semantics != &IEEEhalf && semantics != &IEEEsingle && - semantics != &IEEEdouble && semantics != &IEEEquad) - return false; - // Special floats and denormals have no exact inverse. if (category != fcNormal) return false; diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 8d1a301a67..f67decc550 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -853,13 +853,28 @@ void ARMAsmPrinter::emitAttributes() { AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use, ARMBuildAttrs::Allowed); } else if (CPUString == "generic") { - // FIXME: Why these defaults? - AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v4T); + // For a generic CPU, we assume a standard v7a architecture in Subtarget. + AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v7); + AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch_profile, + ARMBuildAttrs::ApplicationProfile); AttrEmitter->EmitAttribute(ARMBuildAttrs::ARM_ISA_use, ARMBuildAttrs::Allowed); AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use, - ARMBuildAttrs::Allowed); - } + ARMBuildAttrs::AllowThumb32); + } else if (Subtarget->hasV7Ops()) { + AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v7); + AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use, + ARMBuildAttrs::AllowThumb32); + } else if (Subtarget->hasV6T2Ops()) + AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v6T2); + else if (Subtarget->hasV6Ops()) + AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v6); + else if (Subtarget->hasV5TEOps()) + AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v5TE); + else if (Subtarget->hasV5TOps()) + AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v5T); + else if (Subtarget->hasV4TOps()) + AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v4T); if (Subtarget->hasNEON() && emitFPU) { /* NEON is not exactly a VFP architecture, but GAS emit one of @@ -1515,31 +1530,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } return; } - case ARM::t2BMOVPCB_CALL: { - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::tMOVr); - TmpInst.addOperand(MCOperand::CreateReg(ARM::LR)); - TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); - // Add predicate operands. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::t2B); - const GlobalValue *GV = MI->getOperand(0).getGlobal(); - MCSymbol *GVSym = Mang->getSymbol(GV); - const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext); - TmpInst.addOperand(MCOperand::CreateExpr(GVSymExpr)); - // Add predicate operands. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } - return; - } case ARM::MOVi16_ga_pcrel: case ARM::t2MOVi16_ga_pcrel: { MCInst TmpInst; diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index dec498a4f7..0893826427 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -1639,18 +1639,12 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (Subtarget->isThumb()) { if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) CallOpc = ARMISD::CALL_NOLINK; - else if (doesNotRet && isDirect && !isARMFunc && - Subtarget->hasRAS() && !Subtarget->isThumb1Only() && - // Emit regular call when code size is the priority - !HasMinSizeAttr) - // "mov lr, pc; b _foo" to avoid confusing the RSP - CallOpc = ARMISD::CALL_NOLINK; else CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL; } else { - if (!isDirect && !Subtarget->hasV5TOps()) { + if (!isDirect && !Subtarget->hasV5TOps()) CallOpc = ARMISD::CALL_NOLINK; - } else if (doesNotRet && isDirect && Subtarget->hasRAS() && + else if (doesNotRet && isDirect |