aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--docs/ReleaseNotes.html24
-rw-r--r--include/llvm/MC/MCExpr.h1
-rw-r--r--include/llvm/Target/TargetLibraryInfo.h29
-rw-r--r--include/llvm/Transforms/Utils/SimplifyLibCalls.h11
-rw-r--r--lib/Analysis/ConstantFolding.cpp28
-rw-r--r--lib/CodeGen/LiveIntervalAnalysis.cpp18
-rw-r--r--lib/CodeGen/MachineScheduler.cpp1
-rw-r--r--lib/MC/MCExpr.cpp4
-rw-r--r--lib/Support/APFloat.cpp5
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp48
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp10
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td14
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp3
-rw-r--r--lib/Target/NVPTX/NVPTX.td34
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.cpp6
-rw-r--r--lib/Target/NVPTX/NVPTXSubtarget.cpp20
-rw-r--r--lib/Target/NVPTX/NVPTXSubtarget.h11
-rw-r--r--lib/Target/TargetLibraryInfo.cpp18
-rw-r--r--lib/Target/TargetTransformImpl.cpp12
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp79
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp141
-rw-r--r--lib/Target/X86/X86ISelLowering.h19
-rw-r--r--lib/Target/X86/X86InstrSSE.td154
-rw-r--r--lib/Target/X86/X86Subtarget.cpp5
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp20
-rw-r--r--lib/Transforms/Instrumentation/ThreadSanitizer.cpp99
-rw-r--r--lib/Transforms/Scalar/SimplifyLibCalls.cpp247
-rw-r--r--lib/Transforms/Utils/SimplifyLibCalls.cpp295
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp229
-rw-r--r--test/CodeGen/ARM/2010-10-19-mc-elf-objheader.ll4
-rw-r--r--test/CodeGen/ARM/call-noret-minsize.ll7
-rw-r--r--test/CodeGen/ARM/call-noret.ll8
-rw-r--r--test/CodeGen/NVPTX/param-align.ll25
-rw-r--r--test/CodeGen/NVPTX/ptx-version-30.ll6
-rw-r--r--test/CodeGen/NVPTX/ptx-version-31.ll6
-rw-r--r--test/CodeGen/NVPTX/sm-version-10.ll6
-rw-r--r--test/CodeGen/NVPTX/sm-version-11.ll6
-rw-r--r--test/CodeGen/NVPTX/sm-version-12.ll6
-rw-r--r--test/CodeGen/NVPTX/sm-version-13.ll6
-rw-r--r--test/CodeGen/NVPTX/sm-version-20.ll6
-rw-r--r--test/CodeGen/NVPTX/sm-version-21.ll6
-rw-r--r--test/CodeGen/NVPTX/sm-version-30.ll6
-rw-r--r--test/CodeGen/NVPTX/sm-version-35.ll6
-rw-r--r--test/CodeGen/PowerPC/misched.ll45
-rw-r--r--test/CodeGen/Thumb/thumb_jump24_fixup.ll23
-rw-r--r--test/CodeGen/X86/avx-intrinsics-x86.ll52
-rw-r--r--test/Instrumentation/ThreadSanitizer/atomic.ll1544
-rw-r--r--test/MC/ARM/elf-jump24-fixup.s9
-rw-r--r--test/Transforms/ConstProp/loads.ll132
-rw-r--r--test/Transforms/GVN/rle.ll8
-rw-r--r--test/Transforms/InstCombine/2012-07-25-LoadPart.ll10
-rw-r--r--test/Transforms/InstCombine/disable-simplify-libcalls.ll236
-rw-r--r--test/Transforms/InstCombine/memcmp-1.ll72
-rw-r--r--test/Transforms/InstCombine/memcmp-2.ll17
-rw-r--r--test/Transforms/InstCombine/memcpy-1.ll17
-rw-r--r--test/Transforms/InstCombine/memcpy-2.ll17
-rw-r--r--test/Transforms/InstCombine/memmove-1.ll17
-rw-r--r--test/Transforms/InstCombine/memmove-2.ll17
-rw-r--r--test/Transforms/InstCombine/memset-1.ll17
-rw-r--r--test/Transforms/InstCombine/memset-2.ll17
-rw-r--r--test/Transforms/InstCombine/strcspn-1.ll57
-rw-r--r--test/Transforms/InstCombine/strcspn-2.ll21
-rw-r--r--test/Transforms/InstCombine/strncmp-1.ll10
-rw-r--r--test/Transforms/InstCombine/strstr-1.ll65
-rw-r--r--test/Transforms/InstCombine/strstr-2.ll18
-rw-r--r--test/Transforms/LoopVectorize/runtime-check.ll36
-rw-r--r--test/Transforms/SCCP/loadtest.ll5
-rw-r--r--test/Transforms/SimplifyLibCalls/StrSpn.ll25
-rw-r--r--test/Transforms/SimplifyLibCalls/StrStr.ll60
-rw-r--r--test/Transforms/SimplifyLibCalls/memcmp.ll35
-rw-r--r--test/Transforms/SimplifyLibCalls/memmove.ll12
-rw-r--r--test/Transforms/SimplifyLibCalls/memset-64.ll12
-rw-r--r--test/Transforms/SimplifyLibCalls/memset.ll12
-rw-r--r--unittests/ADT/APFloatTest.cpp6
-rw-r--r--unittests/Support/AlignOfTest.cpp11
-rw-r--r--utils/TableGen/CodeEmitterGen.cpp5
-rw-r--r--utils/lldbDataFormatters.py24
77 files changed, 3419 insertions, 934 deletions
diff --git a/docs/ReleaseNotes.html b/docs/ReleaseNotes.html
index 45a9cc5dec..fc3a8b71bd 100644
--- a/docs/ReleaseNotes.html
+++ b/docs/ReleaseNotes.html
@@ -473,15 +473,31 @@ Release Notes</a>.</h1>
<b>-mllvm -force-vector-width=4</b>.
The default value is <b>0</b> which means auto-select.
<br/>
- We can now vectorize this code:
+ We can now vectorize this function:
<pre class="doc_code">
- for (i=0; i&lt;n; i++) {
- a[i] = b[i+1] + c[i+3] + i;
- sum += d[i];
+ unsigned sum_arrays(int *A, int *B, int start, int end) {
+ unsigned sum = 0;
+ for (int i = start; i &lt; end; ++i)
+ sum += A[i] + B[i] + i;
+
+ return sum;
}
</pre>
+ We vectorize under the following loops:
+ <ul>
+ <li>The inner most loops must have a single basic block.</li>
+ <li>The number of iterations are known before the loop starts to execute.</li>
+ <li>The loop counter needs to be incrimented by one.</li>
+ <li>The loop trip count <b>can</b> be a variable.</li>
+ <li>Loops do <b>not</b> need to start at zero.</li>
+ <li>The induction variable can be used inside the loop.</li>
+ <li>Loop reductions are supported.</li>
+ <li>Arrays with affine access pattern do <b>not</b> need to be marked as 'noalias' and are checked at runtime.</li>
+ <li>...</li>
+ </ul>
+
</p>
<p>SROA - We've re-written SROA to be significantly more powerful.
diff --git a/include/llvm/MC/MCExpr.h b/include/llvm/MC/MCExpr.h
index 4c10e5114a..00eef270d6 100644
--- a/include/llvm/MC/MCExpr.h
+++ b/include/llvm/MC/MCExpr.h
@@ -167,6 +167,7 @@ public:
VK_ARM_TPOFF,
VK_ARM_GOTTPOFF,
VK_ARM_TARGET1,
+ VK_ARM_TARGET2,
VK_PPC_TOC, // TOC base
VK_PPC_TOC_ENTRY, // TOC entry
diff --git a/include/llvm/Target/TargetLibraryInfo.h b/include/llvm/Target/TargetLibraryInfo.h
index 2a0a43229f..a2c97d782e 100644
--- a/include/llvm/Target/TargetLibraryInfo.h
+++ b/include/llvm/Target/TargetLibraryInfo.h
@@ -276,12 +276,18 @@ namespace llvm {
sqrtf,
/// long double sqrtl(long double x);
sqrtl,
+ /// char *stpcpy(char *s1, const char *s2);
+ stpcpy,
/// char *strcat(char *s1, const char *s2);
strcat,
/// char *strchr(const char *s, int c);
strchr,
+ /// int strcmp(const char *s1, const char *s2);
+ strcmp,
/// char *strcpy(char *s1, const char *s2);
strcpy,
+ /// size_t strcspn(const char *s1, const char *s2);
+ strcspn,
/// char *strdup(const char *s1);
strdup,
/// size_t strlen(const char *s);
@@ -296,6 +302,29 @@ namespace llvm {
strndup,
/// size_t strnlen(const char *s, size_t maxlen);
strnlen,
+ /// char *strpbrk(const char *s1, const char *s2);
+ strpbrk,
+ /// char *strrchr(const char *s, int c);
+ strrchr,
+ /// size_t strspn(const char *s1, const char *s2);
+ strspn,
+ /// char *strstr(const char *s1, const char *s2);
+ strstr,
+ /// double strtod(const char *nptr, char **endptr);
+ strtod,
+ /// float strtof(const char *nptr, char **endptr);
+ strtof,
+ /// long int strtol(const char *nptr, char **endptr, int base);
+ strtol,
+ /// long double strtold(const char *nptr, char **endptr);
+ strtold,
+ /// long long int strtoll(const char *nptr, char **endptr, int base);
+ strtoll,
+ /// unsigned long int strtoul(const char *nptr, char **endptr, int base);
+ strtoul,
+ /// unsigned long long int strtoull(const char *nptr, char **endptr,
+ /// int base);
+ strtoull,
/// double tan(double x);
tan,
/// float tanf(float x);
diff --git a/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/include/llvm/Transforms/Utils/SimplifyLibCalls.h
index 5db2d00181..fde452bca2 100644
--- a/include/llvm/Transforms/Utils/SimplifyLibCalls.h
+++ b/include/llvm/Transforms/Utils/SimplifyLibCalls.h
@@ -19,6 +19,7 @@ namespace llvm {
class Value;
class CallInst;
class DataLayout;
+ class Instruction;
class TargetLibraryInfo;
class LibCallSimplifierImpl;
@@ -35,8 +36,16 @@ namespace llvm {
/// optimizeCall - Take the given call instruction and return a more
/// optimal value to replace the instruction with or 0 if a more
- /// optimal form can't be found.
+ /// optimal form can't be found. Note that the returned value may
+ /// be equal to the instruction being optimized. In this case all
+ /// other instructions that use the given instruction were modified
+ /// and the given instruction is dead.
Value *optimizeCall(CallInst *CI);
+
+ /// replaceAllUsesWith - This method is used when the library call
+ /// simplifier needs to replace instructions other than the library
+ /// call being modified.
+ virtual void replaceAllUsesWith(Instruction *I, Value *With) const;
};
} // End llvm namespace
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 5cac8ca3ba..91a5b84e8a 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -292,7 +292,10 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
unsigned IntBytes = unsigned(CI->getBitWidth()/8);
for (unsigned i = 0; i != BytesLeft && ByteOffset != IntBytes; ++i) {
- CurPtr[i] = (unsigned char)(Val >> (ByteOffset * 8));
+ int n = ByteOffset;
+ if (!TD.isLittleEndian())
+ n = IntBytes - n - 1;
+ CurPtr[i] = (unsigned char)(Val >> (n * 8));
++ByteOffset;
}
return true;
@@ -442,10 +445,19 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
BytesLoaded, TD))
return 0;
- APInt ResultVal = APInt(IntType->getBitWidth(), RawBytes[BytesLoaded-1]);
- for (unsigned i = 1; i != BytesLoaded; ++i) {
- ResultVal <<= 8;
- ResultVal |= RawBytes[BytesLoaded-1-i];
+ APInt ResultVal = APInt(IntType->getBitWidth(), 0);
+ if (TD.isLittleEndian()) {
+ ResultVal = RawBytes[BytesLoaded - 1];
+ for (unsigned i = 1; i != BytesLoaded; ++i) {
+ ResultVal <<= 8;
+ ResultVal |= RawBytes[BytesLoaded-1-i];
+ }
+ } else {
+ ResultVal = RawBytes[0];
+ for (unsigned i = 1; i != BytesLoaded; ++i) {
+ ResultVal <<= 8;
+ ResultVal |= RawBytes[i];
+ }
}
return ConstantInt::get(IntType->getContext(), ResultVal);
@@ -521,10 +533,8 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,
}
}
- // Try hard to fold loads from bitcasted strange and non-type-safe things. We
- // currently don't do any of this for big endian systems. It can be
- // generalized in the future if someone is interested.
- if (TD && TD->isLittleEndian())
+ // Try hard to fold loads from bitcasted strange and non-type-safe things.
+ if (TD)
return FoldReinterpretLoadFromConstPtr(CE, *TD);
return 0;
}
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 65bc4af99e..4e75d892e5 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -146,6 +146,11 @@ void LiveIntervals::print(raw_ostream &OS, const Module* ) const {
OS << PrintReg(Reg) << " = " << getInterval(Reg) << '\n';
}
+ OS << "RegMasks:";
+ for (unsigned i = 0, e = RegMaskSlots.size(); i != e; ++i)
+ OS << ' ' << RegMaskSlots[i];
+ OS << '\n';
+
printInstrs(OS);
}
@@ -1257,10 +1262,15 @@ private:
SmallVectorImpl<SlotIndex>::iterator RI =
std::lower_bound(LIS.RegMaskSlots.begin(), LIS.RegMaskSlots.end(),
OldIdx);
- assert(*RI == OldIdx && "No RegMask at OldIdx.");
- *RI = NewIdx;
- assert(*prior(RI) < *RI && *RI < *next(RI) &&
- "RegSlots out of order. Did you move one call across another?");
+ assert(RI != LIS.RegMaskSlots.end() && *RI == OldIdx.getRegSlot() &&
+ "No RegMask at OldIdx.");
+ *RI = NewIdx.getRegSlot();
+ assert((RI == LIS.RegMaskSlots.begin() ||
+ SlotIndex::isEarlierInstr(*llvm::prior(RI), *RI)) &&
+ "Cannot move regmask instruction above another call");
+ assert((llvm::next(RI) == LIS.RegMaskSlots.end() ||
+ SlotIndex::isEarlierInstr(*RI, *llvm::next(RI))) &&
+ "Cannot move regmask instruction below another call");
}
// Return the last use of reg between NewIdx and OldIdx.
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index de16932c06..a4817d09c0 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -1583,6 +1583,7 @@ const char *ConvergingScheduler::getReasonStr(
case NextDefUse: return "DEF-USE ";
case NodeOrder: return "ORDER ";
};
+ llvm_unreachable("Unknown reason!");
}
void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand,
diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp
index ffa79761f2..e0336342d6 100644
--- a/lib/MC/MCExpr.cpp
+++ b/lib/MC/MCExpr.cpp
@@ -60,7 +60,8 @@ void MCExpr::print(raw_ostream &OS) const {
SRE.getKind() == MCSymbolRefExpr::VK_ARM_GOTOFF ||
SRE.getKind() == MCSymbolRefExpr::VK_ARM_TPOFF ||
SRE.getKind() == MCSymbolRefExpr::VK_ARM_GOTTPOFF ||
- SRE.getKind() == MCSymbolRefExpr::VK_ARM_TARGET1)
+ SRE.getKind() == MCSymbolRefExpr::VK_ARM_TARGET1 ||
+ SRE.getKind() == MCSymbolRefExpr::VK_ARM_TARGET2)
OS << MCSymbolRefExpr::getVariantKindName(SRE.getKind());
else if (SRE.getKind() != MCSymbolRefExpr::VK_None &&
SRE.getKind() != MCSymbolRefExpr::VK_PPC_DARWIN_HA16 &&
@@ -199,6 +200,7 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
case VK_ARM_GOTTPOFF: return "(gottpoff)";
case VK_ARM_TLSGD: return "(tlsgd)";
case VK_ARM_TARGET1: return "(target1)";
+ case VK_ARM_TARGET2: return "(target2)";
case VK_PPC_TOC: return "tocbase";
case VK_PPC_TOC_ENTRY: return "toc";
case VK_PPC_DARWIN_HA16: return "ha16";
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index 43c68f4d1d..7e8b4a3d0d 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -3553,11 +3553,6 @@ void APFloat::toString(SmallVectorImpl<char> &Str,
}
bool APFloat::getExactInverse(APFloat *inv) const {
- // We can only guarantee the existence of an exact inverse for IEEE floats.
- if (semantics != &IEEEhalf && semantics != &IEEEsingle &&
- semantics != &IEEEdouble && semantics != &IEEEquad)
- return false;
-
// Special floats and denormals have no exact inverse.
if (category != fcNormal)
return false;
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 8d1a301a67..f67decc550 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -853,13 +853,28 @@ void ARMAsmPrinter::emitAttributes() {
AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use,
ARMBuildAttrs::Allowed);
} else if (CPUString == "generic") {
- // FIXME: Why these defaults?
- AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v4T);
+ // For a generic CPU, we assume a standard v7a architecture in Subtarget.
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v7);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch_profile,
+ ARMBuildAttrs::ApplicationProfile);
AttrEmitter->EmitAttribute(ARMBuildAttrs::ARM_ISA_use,
ARMBuildAttrs::Allowed);
AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use,
- ARMBuildAttrs::Allowed);
- }
+ ARMBuildAttrs::AllowThumb32);
+ } else if (Subtarget->hasV7Ops()) {
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v7);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use,
+ ARMBuildAttrs::AllowThumb32);
+ } else if (Subtarget->hasV6T2Ops())
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v6T2);
+ else if (Subtarget->hasV6Ops())
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v6);
+ else if (Subtarget->hasV5TEOps())
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v5TE);
+ else if (Subtarget->hasV5TOps())
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v5T);
+ else if (Subtarget->hasV4TOps())
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v4T);
if (Subtarget->hasNEON() && emitFPU) {
/* NEON is not exactly a VFP architecture, but GAS emit one of
@@ -1515,31 +1530,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
return;
}
- case ARM::t2BMOVPCB_CALL: {
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::tMOVr);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::LR));
- TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
- // Add predicate operands.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::t2B);
- const GlobalValue *GV = MI->getOperand(0).getGlobal();
- MCSymbol *GVSym = Mang->getSymbol(GV);
- const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext);
- TmpInst.addOperand(MCOperand::CreateExpr(GVSymExpr));
- // Add predicate operands.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- return;
- }
case ARM::MOVi16_ga_pcrel:
case ARM::t2MOVi16_ga_pcrel: {
MCInst TmpInst;
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index dec498a4f7..0893826427 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -1639,18 +1639,12 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (Subtarget->isThumb()) {
if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
CallOpc = ARMISD::CALL_NOLINK;
- else if (doesNotRet && isDirect && !isARMFunc &&
- Subtarget->hasRAS() && !Subtarget->isThumb1Only() &&
- // Emit regular call when code size is the priority
- !HasMinSizeAttr)
- // "mov lr, pc; b _foo" to avoid confusing the RSP
- CallOpc = ARMISD::CALL_NOLINK;
else
CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL;
} else {
- if (!isDirect && !Subtarget->hasV5TOps()) {
+ if (!isDirect && !Subtarget->hasV5TOps())
CallOpc = ARMISD::CALL_NOLINK;
- } else if (doesNotRet && isDirect && Subtarget->hasRAS() &&
+ else if (doesNotRet && isDirect