aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--docs/LangRef.html14
-rw-r--r--docs/README.txt2
-rw-r--r--docs/ReleaseNotes.html19
-rw-r--r--docs/subsystems.rst7
-rw-r--r--include/llvm/Analysis/ScalarEvolutionExpander.h4
-rw-r--r--include/llvm/CodeGen/CommandFlags.h228
-rw-r--r--include/llvm/CodeGen/MachineFrameInfo.h9
-rw-r--r--include/llvm/CodeGen/MachineRegisterInfo.h45
-rw-r--r--include/llvm/CodeGen/SchedulerRegistry.h5
-rw-r--r--include/llvm/ExecutionEngine/RuntimeDyld.h2
-rw-r--r--include/llvm/InitializePasses.h3
-rw-r--r--include/llvm/LinkAllPasses.h1
-rw-r--r--include/llvm/MC/MCParser/MCAsmParser.h35
-rw-r--r--include/llvm/MC/MCSchedule.h8
-rw-r--r--include/llvm/Operator.h32
-rw-r--r--include/llvm/Target/TargetMachine.h6
-rw-r--r--include/llvm/Target/TargetTransformImpl.h54
-rw-r--r--include/llvm/TargetTransformInfo.h128
-rw-r--r--include/llvm/Transforms/IPO.h5
-rw-r--r--include/llvm/Transforms/Scalar.h7
-rw-r--r--include/llvm/Transforms/Vectorize.h6
-rw-r--r--lib/Analysis/ScalarEvolutionExpander.cpp12
-rw-r--r--lib/CodeGen/ExecutionDepsFix.cpp2
-rw-r--r--lib/CodeGen/MachineRegisterInfo.cpp4
-rw-r--r--lib/CodeGen/Passes.cpp4
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp2
-rw-r--r--lib/CodeGen/RegAllocFast.cpp53
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp27
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp157
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp3
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h3
-rw-r--r--lib/CodeGen/StackColoring.cpp20
-rw-r--r--lib/CodeGen/TargetSchedule.cpp12
-rw-r--r--lib/DebugInfo/DWARFContext.cpp37
-rw-r--r--lib/MC/MCParser/AsmParser.cpp210
-rw-r--r--lib/Support/Unix/Signals.inc30
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp4
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp6
-rw-r--r--lib/Target/ARM/ARMTargetMachine.h18
-rw-r--r--lib/Target/CMakeLists.txt1
-rw-r--r--lib/Target/CellSPU/SPUTargetMachine.cpp3
-rw-r--r--lib/Target/CellSPU/SPUTargetMachine.h9
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.cpp5
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.h11
-rw-r--r--lib/Target/MBlaze/MBlazeTargetMachine.cpp2
-rw-r--r--lib/Target/MBlaze/MBlazeTargetMachine.h7
-rw-r--r--lib/Target/MSP430/MSP430FrameLowering.cpp14
-rw-r--r--lib/Target/MSP430/MSP430FrameLowering.h1
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.cpp14
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.h2
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.cpp2
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.h10
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.cpp31
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.td267
-rw-r--r--lib/Target/Mips/MipsTargetMachine.cpp2
-rw-r--r--lib/Target/Mips/MipsTargetMachine.h9
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.cpp3
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.h10
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp47
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp3
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.h9
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.cpp2
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.h9
-rw-r--r--lib/Target/Target.cpp1
-rw-r--r--lib/Target/TargetTransformImpl.cpp43
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp58
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp18
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp6
-rw-r--r--lib/Target/X86/X86TargetMachine.h17
-rw-r--r--lib/Target/X86/X86VZeroUpper.cpp2
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.cpp2
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.h9
-rw-r--r--lib/Transforms/IPO/BarrierNoopPass.cpp47
-rw-r--r--lib/Transforms/IPO/CMakeLists.txt1
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp14
-rw-r--r--lib/Transforms/Instrumentation/AddressSanitizer.cpp8
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp134
-rw-r--r--lib/Transforms/Scalar/SROA.cpp167
-rw-r--r--lib/Transforms/Scalar/SimplifyLibCalls.cpp47
-rw-r--r--lib/Transforms/Utils/LowerInvoke.cpp32
-rw-r--r--lib/Transforms/Utils/SimplifyLibCalls.cpp55
-rw-r--r--lib/Transforms/Vectorize/CMakeLists.txt1
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp885
-rw-r--r--lib/Transforms/Vectorize/Vectorize.cpp8
-rw-r--r--lib/VMCore/CMakeLists.txt1
-rw-r--r--lib/VMCore/TargetTransformInfo.cpp27
-rw-r--r--test/CodeGen/MSP430/fp.ll17
-rw-r--r--test/CodeGen/Mips/brconeq.ll38
-rw-r--r--test/CodeGen/Mips/brconeqk.ll22
-rw-r--r--test/CodeGen/Mips/brconeqz.ll20
-rw-r--r--test/CodeGen/Mips/brconge.ll37
-rw-r--r--test/CodeGen/Mips/brcongt.ll25
-rw-r--r--test/CodeGen/Mips/brconle.ll37
-rw-r--r--test/CodeGen/Mips/brconlt.ll27
-rw-r--r--test/CodeGen/Mips/brconne.ll26
-rw-r--r--test/CodeGen/Mips/brconnek.ll25
-rw-r--r--test/CodeGen/Mips/brconnez.ll24
-rw-r--r--test/CodeGen/PowerPC/i64_fp_round.ll27
-rw-r--r--test/CodeGen/X86/2012-10-18-crash-dagco.ll61
-rw-r--r--test/CodeGen/X86/extract-concat.ll17
-rw-r--r--test/CodeGen/X86/pr14090.ll76
-rw-r--r--test/CodeGen/X86/sjlj.ll18
-rw-r--r--test/CodeGen/X86/trunc-fp2int.ll18
-rw-r--r--test/Transforms/InstCombine/strcpy-1.ll45
-rw-r--r--test/Transforms/InstCombine/strcpy-2.ll22
-rw-r--r--test/Transforms/InstCombine/strcpy_chk-1.ll54
-rw-r--r--test/Transforms/LoopVectorize/gcc-examples.ll649
-rw-r--r--test/Transforms/LoopVectorize/lit.local.cfg1
-rw-r--r--test/Transforms/LoopVectorize/non-const-n.ll38
-rw-r--r--test/Transforms/SROA/basictest.ll20
-rw-r--r--test/Transforms/SimplifyLibCalls/StrCpy.ll37
-rw-r--r--test/lit.cfg5
-rw-r--r--tools/bugpoint-passes/bugpoint.exports1
-rw-r--r--tools/llc/llc.cpp244
-rw-r--r--tools/llvm-dwarfdump/llvm-dwarfdump.cpp6
-rw-r--r--tools/lto/LTOCodeGenerator.cpp2
-rw-r--r--tools/opt/CMakeLists.txt2
-rw-r--r--tools/opt/LLVMBuild.txt2
-rw-r--r--tools/opt/Makefile2
-rw-r--r--tools/opt/opt.cpp78
-rw-r--r--unittests/ExecutionEngine/JIT/JITTest.cpp2
-rw-r--r--unittests/ExecutionEngine/JIT/Makefile7
122 files changed, 3757 insertions, 1255 deletions
diff --git a/docs/LangRef.html b/docs/LangRef.html
index 167397ff53..874e12fa44 100644
--- a/docs/LangRef.html
+++ b/docs/LangRef.html
@@ -1364,11 +1364,13 @@ target datalayout = "<i>layout specification</i>"
8-bits. If omitted, the natural stack alignment defaults to "unspecified",
which does not prevent any alignment promotions.</dd>
- <dt><tt>p:<i>size</i>:<i>abi</i>:<i>pref</i></tt></dt>
+ <dt><tt>p[n]:<i>size</i>:<i>abi</i>:<i>pref</i></tt></dt>
<dd>This specifies the <i>size</i> of a pointer and its <i>abi</i> and
- <i>preferred</i> alignments. All sizes are in bits. Specifying
- the <i>pref</i> alignment is optional. If omitted, the
- preceding <tt>:</tt> should be omitted too.</dd>
+ <i>preferred</i> alignments for address space <i>n</i>. All sizes are in
+ bits. Specifying the <i>pref</i> alignment is optional. If omitted, the
+ preceding <tt>:</tt> should be omitted too. The address space,
+ <i>n</i> is optional, and if not specified, denotes the default address
+ space 0. The value of <i>n</i> must be in the range [1,2^23).</dd>
<dt><tt>i<i>size</i>:<i>abi</i>:<i>pref</i></tt></dt>
<dd>This specifies the alignment for an integer type of a given bit
@@ -1409,6 +1411,10 @@ target datalayout = "<i>layout specification</i>"
<ul>
<li><tt>E</tt> - big endian</li>
<li><tt>p:64:64:64</tt> - 64-bit pointers with 64-bit alignment</li>
+ <li><tt>p1:32:32:32</tt> - 32-bit pointers with 32-bit alignment for
+ address space 1</li>
+ <li><tt>p2:16:32:32</tt> - 16-bit pointers with 32-bit alignment for
+ address space 2</li>
<li><tt>i1:8:8</tt> - i1 is 8-bit (byte) aligned</li>
<li><tt>i8:8:8</tt> - i8 is 8-bit (byte) aligned</li>
<li><tt>i16:16:16</tt> - i16 is 16-bit aligned</li>
diff --git a/docs/README.txt b/docs/README.txt
index 2fbbf98740..5ddd599d8a 100644
--- a/docs/README.txt
+++ b/docs/README.txt
@@ -6,7 +6,7 @@ The LLVM documentation is currently written in two formats:
* Plain HTML documentation.
* reStructured Text documentation using the Sphinx documentation generator. It
- is currently tested with Sphinx 1.1.3.
+ is currently tested with Sphinx 1.1.3.
For more information, see the "Sphinx Introduction for LLVM Developers"
document.
diff --git a/docs/ReleaseNotes.html b/docs/ReleaseNotes.html
index 75a6fd1ca1..26c5213b12 100644
--- a/docs/ReleaseNotes.html
+++ b/docs/ReleaseNotes.html
@@ -466,6 +466,18 @@ Release Notes</a>.</h1>
<p>In addition to many minor performance tweaks and bug fixes, this release
includes a few major enhancements and additions to the optimizers:</p>
+<p> Loop Vectorizer - We've added a basic loop vectorizer and we are now able
+ to vectorize small loops. The loop vectorizer is disabled by default and
+ can be enabled using the -mllvm -vectorize flags. We can vectorize this code:
+
+ <pre class="doc_code">
+ for (i=0; i&lt;n; i++) {
+ a[i] = b[i+1] + c[i+3] + i;
+ }
+ </pre>
+
+ </p>
+
<ul>
<li>...</li>
</ul>
@@ -506,6 +518,8 @@ Release Notes</a>.</h1>
We use the lifetime markers to tell the codegen that a certain alloca
is used within a region.</p>
+<p> We now merge consecutive loads and stores. </p>
+
<p>We have put a significant amount of work into the code generator
infrastructure, which allows us to implement more aggressive algorithms and
make it run faster:</p>
@@ -645,6 +659,11 @@ Release Notes</a>.</h1>
<p>In addition, many APIs have changed in this release. Some of the major
LLVM API changes are:</p>
+<p> We've added a new interface for allowing IR-level passes to access
+ target-specific information. A new IR-level pass, called
+ "TargetTransformInfo" provides a number of low-level interfaces.
+ LSR and LowerInvoke already use the new interface. </p>
+
<ul>
<li>...</li>
</ul>
diff --git a/docs/subsystems.rst b/docs/subsystems.rst
index 8c3cdf2417..6f77b79fbe 100644
--- a/docs/subsystems.rst
+++ b/docs/subsystems.rst
@@ -91,3 +91,10 @@ Subsystem Documentation
* :ref:`segmented_stacks`
This document describes segmented stacks and how they are used in LLVM.
+
+* `Howto: Implementing LLVM Integrated Assembler`_
+
+ A simple guide for how to implement an LLVM integrated assembler for an
+ architecture.
+
+.. _`Howto: Implementing LLVM Integrated Assembler`: http://www.embecosm.com/download/ean10.html
diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h
index 3ab9c8256b..3f8f149cb4 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpander.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpander.h
@@ -22,7 +22,7 @@
#include <set>
namespace llvm {
- class ScalarTargetTransformInfo;
+ class TargetLowering;
/// Return true if the given expression is safe to expand in the sense that
/// all materialized values are safe to speculate.
@@ -129,7 +129,7 @@ namespace llvm {
/// representative. Return the number of phis eliminated.
unsigned replaceCongruentIVs(Loop *L, const DominatorTree *DT,
SmallVectorImpl<WeakVH> &DeadInsts,
- const ScalarTargetTransformInfo *STTI = NULL);
+ const TargetLowering *TLI = NULL);
/// expandCodeFor - Insert code to directly compute the specified SCEV
/// expression into the program. The inserted code is inserted into the
diff --git a/include/llvm/CodeGen/CommandFlags.h b/include/llvm/CodeGen/CommandFlags.h
deleted file mode 100644
index 90ee234244..0000000000
--- a/include/llvm/CodeGen/CommandFlags.h
+++ /dev/null
@@ -1,228 +0,0 @@
-//===-- CommandFlags.h - Register Coalescing Interface ----------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains codegen-specific flags that are shared between different
-// command line tools. The tools "llc" and "opt" both use this file to prevent
-// flag duplication.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_COMMAND_LINE_FLAGS_H
-#define LLVM_CODEGEN_COMMAND_LINE_FLAGS_H
-
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/CodeGen.h"
-#include "llvm/Target/TargetMachine.h"
-
-#include <string>
-using namespace llvm;
-
-cl::opt<std::string>
-MArch("march", cl::desc("Architecture to generate code for (see --version)"));
-
-cl::opt<std::string>
-MCPU("mcpu",
- cl::desc("Target a specific cpu type (-mcpu=help for details)"),
- cl::value_desc("cpu-name"),
- cl::init(""));
-
-cl::list<std::string>
-MAttrs("mattr",
- cl::CommaSeparated,
- cl::desc("Target specific attributes (-mattr=help for details)"),
- cl::value_desc("a1,+a2,-a3,..."));
-
-cl::opt<Reloc::Model>
-RelocModel("relocation-model",
- cl::desc("Choose relocation model"),
- cl::init(Reloc::Default),
- cl::values(
- clEnumValN(Reloc::Default, "default",
- "Target default relocation model"),
- clEnumValN(Reloc::Static, "static",
- "Non-relocatable code"),
- clEnumValN(Reloc::PIC_, "pic",
- "Fully relocatable, position independent code"),
- clEnumValN(Reloc::DynamicNoPIC, "dynamic-no-pic",
- "Relocatable external references, non-relocatable code"),
- clEnumValEnd));
-
-cl::opt<llvm::CodeModel::Model>
-CMModel("code-model",
- cl::desc("Choose code model"),
- cl::init(CodeModel::Default),
- cl::values(clEnumValN(CodeModel::Default, "default",
- "Target default code model"),
- clEnumValN(CodeModel::Small, "small",
- "Small code model"),
- clEnumValN(CodeModel::Kernel, "kernel",
- "Kernel code model"),
- clEnumValN(CodeModel::Medium, "medium",
- "Medium code model"),
- clEnumValN(CodeModel::Large, "large",
- "Large code model"),
- clEnumValEnd));
-
-cl::opt<bool>
-RelaxAll("mc-relax-all",
- cl::desc("When used with filetype=obj, "
- "relax all fixups in the emitted object file"));
-
-cl::opt<TargetMachine::CodeGenFileType>
-FileType("filetype", cl::init(TargetMachine::CGFT_AssemblyFile),
- cl::desc("Choose a file type (not all types are supported by all targets):"),
- cl::values(
- clEnumValN(TargetMachine::CGFT_AssemblyFile, "asm",
- "Emit an assembly ('.s') file"),
- clEnumValN(TargetMachine::CGFT_ObjectFile, "obj",
- "Emit a native object ('.o') file"),
- clEnumValN(TargetMachine::CGFT_Null, "null",
- "Emit nothing, for performance testing"),
- clEnumValEnd));
-
-cl::opt<bool> DisableDotLoc("disable-dot-loc", cl::Hidden,
- cl::desc("Do not use .loc entries"));
-
-cl::opt<bool> DisableCFI("disable-cfi", cl::Hidden,
- cl::desc("Do not use .cfi_* directives"));
-
-cl::opt<bool> EnableDwarfDirectory("enable-dwarf-directory", cl::Hidden,
- cl::desc("Use .file directives with an explicit directory."));
-
-cl::opt<bool>
-DisableRedZone("disable-red-zone",
- cl::desc("Do not emit code that uses the red zone."),
- cl::init(false));
-
-cl::opt<bool>
-EnableFPMAD("enable-fp-mad",
- cl::desc("Enable less precise MAD instructions to be generated"),
- cl::init(false));
-
-cl::opt<bool>
-DisableFPElim("disable-fp-elim",
- cl::desc("Disable frame pointer elimination optimization"),
- cl::init(false));
-
-cl::opt<bool>
-DisableFPElimNonLeaf("disable-non-leaf-fp-elim",
- cl::desc("Disable frame pointer elimination optimization for non-leaf funcs"),
- cl::init(false));
-
-cl::opt<bool>
-EnableUnsafeFPMath("enable-unsafe-fp-math",
- cl::desc("Enable optimizations that may decrease FP precision"),
- cl::init(false));
-
-cl::opt<bool>
-EnableNoInfsFPMath("enable-no-infs-fp-math",
- cl::desc("Enable FP math optimizations that assume no +-Infs"),
- cl::init(false));
-
-cl::opt<bool>
-EnableNoNaNsFPMath("enable-no-nans-fp-math",
- cl::desc("Enable FP math optimizations that assume no NaNs"),
- cl::init(false));
-
-cl::opt<bool>
-EnableHonorSignDependentRoundingFPMath("enable-sign-dependent-rounding-fp-math",
- cl::Hidden,
- cl::desc("Force codegen to assume rounding mode can change dynamically"),
- cl::init(false));
-
-cl::opt<bool>
-GenerateSoftFloatCalls("soft-float",
- cl::desc("Generate software floating point library calls"),
- cl::init(false));
-
-cl::opt<llvm::FloatABI::ABIType>
-FloatABIForCalls("float-abi",
- cl::desc("Choose float ABI type"),
- cl::init(FloatABI::Default),
- cl::values(
- clEnumValN(FloatABI::Default, "default",
- "Target default float ABI type"),
- clEnumValN(FloatABI::Soft, "soft",
- "Soft float ABI (implied by -soft-float)"),
- clEnumValN(FloatABI::Hard, "hard",
- "Hard float ABI (uses FP registers)"),
- clEnumValEnd));
-
-cl::opt<llvm::FPOpFusion::FPOpFusionMode>
-FuseFPOps("fp-contract",
- cl::desc("Enable aggresive formation of fused FP ops"),
- cl::init(FPOpFusion::Standard),
- cl::values(
- clEnumValN(FPOpFusion::Fast, "fast",
- "Fuse FP ops whenever profitable"),
- clEnumValN(FPOpFusion::Standard, "on",
- "Only fuse 'blessed' FP ops."),
- clEnumValN(FPOpFusion::Strict, "off",
- "Only fuse FP ops when the result won't be effected."),
- clEnumValEnd));
-
-cl::opt<bool>
-DontPlaceZerosInBSS("nozero-initialized-in-bss",
- cl::desc("Don't place zero-initialized symbols into bss section"),
- cl::init(false));
-
-cl::opt<bool>
-EnableGuaranteedTailCallOpt("tailcallopt",
- cl::desc("Turn fastcc calls into tail calls by (potentially) changing ABI."),
- cl::init(false));
-
-cl::opt<bool>
-DisableTailCalls("disable-tail-calls",
- cl::desc("Never emit tail calls"),
- cl::init(false));
-
-cl::opt<unsigned>
-OverrideStackAlignment("stack-alignment",
- cl::desc("Override default stack alignment"),
- cl::init(0));
-
-cl::opt<bool>
-EnableRealignStack("realign-stack",
- cl::desc("Realign stack if needed"),
- cl::init(true));
-
-cl::opt<std::string>
-TrapFuncName("trap-func", cl::Hidden,
- cl::desc("Emit a call to trap function rather than a trap instruction"),
- cl::init(""));
-
-cl::opt<bool>
-EnablePIE("enable-pie",
- cl::desc("Assume the creation of a position independent executable."),
- cl::init(false));
-
-cl::opt<bool>
-SegmentedStacks("segmented-stacks",
- cl::desc("Use segmented stacks if possible."),
- cl::init(false));
-
-cl::opt<bool>
-UseInitArray("use-init-array",
- cl::desc("Use .init_array instead of .ctors."),
- cl::init(false));
-
-cl::opt<std::string> StopAfter("stop-after",
- cl::desc("Stop compilation after a specific pass"),
- cl::value_desc("pass-name"),
- cl::init(""));
-cl::opt<std::string> StartAfter("start-after",
- cl::desc("Resume compilation after a specific pass"),
- cl::value_desc("pass-name"),
- cl::init(""));
-
-cl::opt<unsigned>
-SSPBufferSize("stack-protector-buffer-size", cl::init(8),
- cl::desc("Lower bound for a buffer to be considered for "
- "stack protection"));
-#endif
diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h
index 7188b1abbd..0e4e132e40 100644
--- a/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/include/llvm/CodeGen/MachineFrameInfo.h
@@ -29,6 +29,7 @@ class MachineBasicBlock;
class TargetFrameLowering;
class BitVector;
class Value;
+class AllocaInst;
/// The CalleeSavedInfo class tracks the information need to locate where a
/// callee saved register is in the current frame.
@@ -106,14 +107,14 @@ class MachineFrameInfo {
/// Alloca - If this stack object is originated from an Alloca instruction
/// this value saves the original IR allocation. Can be NULL.
- const Value *Alloca;
+ const AllocaInst *Alloca;
// PreAllocated - If true, the object was mapped into the local frame
// block and doesn't need additional handling for allocation beyond that.
bool PreAllocated;
StackObject(uint64_t Sz, unsigned Al, int64_t SP, bool IM,
- bool isSS, bool NSP, const Value *Val)
+ bool isSS, bool NSP, const AllocaInst *Val)
: SPOffset(SP), Size(Sz), Alignment(Al), isImmutable(IM),
isSpillSlot(isSS), MayNeedSP(NSP), Alloca(Val), PreAllocated(false) {}
};
@@ -369,7 +370,7 @@ public:
/// getObjectAllocation - Return the underlying Alloca of the specified
/// stack object if it exists. Returns 0 if none exists.
- const Value* getObjectAllocation(int ObjectIdx) const {
+ const AllocaInst* getObjectAllocation(int ObjectIdx) const {
assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
"Invalid Object Idx!");
return Objects[ObjectIdx+NumFixedObjects].Alloca;
@@ -495,7 +496,7 @@ public:
/// a nonnegative identifier to represent it.
///
int CreateStackObject(uint64_t Size, unsigned Alignment, bool isSS,
- bool MayNeedSP = false, const Value *Alloca = 0) {
+ bool MayNeedSP = false, const AllocaInst *Alloca = 0) {
assert(Size != 0 && "Cannot allocate zero size stack objects!");
Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, MayNeedSP,
Alloca));
diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h
index a5bc7f7d39..4e86363f07 100644
--- a/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -77,16 +77,20 @@ class MachineRegisterInfo {
return MO->Contents.Reg.Next;
}
- /// UsedPhysRegs - This is a bit vector that is computed and set by the
+ /// UsedRegUnits - This is a bit vector that is computed and set by the
/// register allocator, and must be kept up to date by passes that run after
/// register allocation (though most don't modify this). This is used
/// so that the code generator knows which callee save registers to save and
/// for other target specific uses.
- /// This vector only has bits set for registers explicitly used, not their
- /// aliases.
- BitVector UsedPhysRegs;
-
- /// UsedPhysRegMask - Additional used physregs, but including aliases.
+ /// This vector has bits set for register units that are modified in the
+ /// current function. It doesn't include registers clobbered by function
+ /// calls with register mask operands.
+ BitVector UsedRegUnits;
+
+ /// UsedPhysRegMask - Additional used physregs including aliases.
+ /// This bit vector represents all the registers clobbered by function calls.
+ /// It can model things that UsedRegUnits can't, such as function calls that
+ /// clobber ymm7 but preserve the low half in xmm7.
BitVector UsedPhysRegMask;
/// ReservedRegs - This is a bit vector of reserved registers. The target
@@ -357,29 +361,27 @@ public:
//===--------------------------------------------------------------------===//
/// isPhysRegUsed - Return true if the specified register is used in this
- /// function. This only works after register allocation.
+ /// function. Also check for clobbered aliases and registers clobbered by
+ /// function calls with register mask operands.
+ ///
+ /// This only works after register allocation. It is primarily used by
+ /// PrologEpilogInserter to determine which callee-saved registers need
+ /// spilling.
bool isPhysRegUsed(unsigned Reg) const {
- return UsedPhysRegs.test(Reg) || UsedPhysRegMask.test(Reg);
- }
-
- /// isPhysRegOrOverlapUsed - Return true if Reg or any overlapping register
- /// is used in this function.
- bool isPhysRegOrOverlapUsed(unsigned Reg) const {
if (UsedPhysRegMask.test(Reg))
return true;
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
- if (UsedPhysRegs.test(*AI))
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
+ if (UsedRegUnits.test(*Units))
return true;
return false;
}
/// setPhysRegUsed - Mark the specified register used in this function.
/// This should only be called during and after register allocation.
- void setPhysRegUsed(unsigned Reg) { UsedPhysRegs.set(Reg); }
-
- /// addPhysRegsUsed - Mark the specified registers used in this function.
- /// This should only be called during and after register allocation.
- void addPhysRegsUsed(const BitVector &Regs) { UsedPhysRegs |= Regs; }
+ void setPhysRegUsed(unsigned Reg) {
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
+ UsedRegUnits.set(*Units);
+ }
/// addPhysRegsUsedFromRegMask - Mark any registers not in RegMask as used.
/// This corresponds to the bit mask attached to register mask operands.
@@ -390,8 +392,9 @@ public:
/// setPhysRegUnused - Mark the specified register unused in this function.
/// This should only be called during and after register allocation.
void setPhysRegUnused(unsigned Reg) {
- UsedPhysRegs.reset(Reg);
UsedPhysRegMask.reset(Reg);
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
+ UsedRegUnits.reset(*Units);
}
diff --git a/include/llvm/CodeGen/SchedulerRegistry.h b/include/llvm/CodeGen/SchedulerRegistry.h
index a582b0c40c..836b73a15a 100644
--- a/include/llvm/CodeGen/SchedulerRegistry.h
+++ b/include/llvm/CodeGen/SchedulerRegistry.h
@@ -102,6 +102,11 @@ ScheduleDAGSDNodes *createVLIWDAGScheduler(SelectionDAGISel *IS,
ScheduleDAGSDNodes *createDefaultScheduler(SelectionDAGISel *IS,
CodeGenOpt::Level OptLevel);
+/// createDAGLinearizer - This creates a "no-scheduling" scheduler which
+/// linearize the DAG using topological order.
+ScheduleDAGSDNodes *createDAGLinearizer(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel);
+
} // end namespace llvm
#endif
diff --git a/include/llvm/ExecutionEngine/RuntimeDyld.h b/include/llvm/ExecutionEngine/RuntimeDyld.h
index 7da4a4e09a..a71b1411c8 100644
--- a/include/llvm/ExecutionEngine/RuntimeDyld.h
+++ b/include/llvm/ExecutionEngine/RuntimeDyld.h
@@ -63,7 +63,7 @@ protected:
// Any relocations already associated with the symbol will be re-resolved.
void reassignSectionAddress(unsigned SectionID, uint64_t Addr);
public:
- RuntimeDyld(RTDyldMemoryManager*);
+ RuntimeDyld(RTDyldMemoryManager *);
~RuntimeDyld();
/// loadObject - prepare the object contained in the input buffer for
diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h
index b5eeb7bac4..8e0d2c4503 100644
--- a/include/llvm/InitializePasses.h
+++ b/include/llvm/InitializePasses.h
@@ -66,6 +66,7 @@ void initializeAliasDebuggerPass(PassRegistry&);
void initializeAliasSetPrinterPass(PassRegistry&);
void initializeAlwaysInlinerPass(PassRegistry&);
void initializeArgPromotionPass(PassRegistry&);
+void initializeBarrierNoopPass(PassRegistry&);
void initializeBasicAliasAnalysisPass(PassRegistry&);
void initializeBasicCallGraphPass(PassRegistry&);
void initializeBlockExtractorPassPass(PassRegistry&);
@@ -248,7 +249,6 @@ void initializeTailCallElimPass(PassRegistry&);
void initializeTailDuplicatePassPass(PassRegistry&);
void initializeTargetPassConfigPass(PassRegistry&);
void initializeDataLayoutPass(PassRegistry&);
-void initializeTargetTransformInfoPass(PassRegistry&);
void initializeTargetLibraryInfoPass(PassRegistry&);
void initializeTwoAddressInstructionPassPass(PassRegistry&);
void initializeTypeBasedAliasAnalysisPass(PassRegistry&);
@@ -261,6 +261,7 @@ void initializeVirtRegRewriterPass(PassRegistry&);
void initializeInstSimplifierPass(PassRegistry&);
void initializeUnpackMachineBundlesPass(PassRegistry&);
void initializeFinalizeMachineBundlesPass(PassRegistry&);
+void initializeLoopVectorizePass(PassRegistry&);
void initializeBBVectorizePass(PassRegistry&);
void initializeMachineFunctionPrinterPassPass(PassRegistry&);
void initializeExpandCtorsPass(PassRegistry&); // @LOCALMOD
diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h
index 4b10d0e541..8652acd941 100644
--- a/include/llvm/LinkAllPasses.h
+++ b/include/llvm/LinkAllPasses.h
@@ -156,6 +156,7 @@ namespace {
(void) llvm::createCorrelatedValuePropagationPass();
(void) llvm::createMemDepPrinter();
(void) llvm::createInstructionSimplifierPass();
+ (void) llvm::createLoopVectorizePass();
(void) llvm::createBBVectorizePass();
(void)new llvm::IntervalPartition();
diff --git a/include/llvm/MC/MCParser/MCAsmParser.h b/include/llvm/MC/MCParser/MCAsmParser.h
index 52948f749d..554cdfabf6 100644
--- a/include/llvm/MC/MCParser/MCAsmParser.h
+++ b/include/llvm/MC/MCParser/MCAsmParser.h
@@ -20,6 +20,8 @@ class MCAsmLexer;
class MCAsmParserExtension;
class MCContext;
class MCExpr;
+class MCInstPrinter;
+class MCInstrInfo;
class MCParsedAsmOperand;
class MCStreamer;
class MCTargetAsmParser;
@@ -29,6 +31,13 @@ class SourceMgr;
class StringRef;
class Twine;
+/// MCAsmParserSemaCallback - Generic Sema callback for assembly parser.
+class MCAsmParserSemaCallback {
+public:
+ virtual void *LookupInlineAsmIdentifier(StringRef Name, void *Loc,
+ unsigned &Size) = 0;
+};
+
/// MCAsmParser - Generic assembler parser interface, for use by target specific
/// assembly parsers.
class MCAsmParser {
@@ -77,25 +86,19 @@ public:
virtual void setParsingInlineAsm(bool V) = 0;
virtual bool isParsingInlineAsm() = 0;
+ /// ParseMSInlineAsm - Parse ms-style inline assembly.
+ virtual bool ParseMSInlineAsm(void *AsmLoc, std::string &AsmString,
+ unsigned &NumOutputs, unsigned &NumInputs,
+ SmallVectorImpl<void *> &OpDecls,
+ SmallVectorImpl<std::string> &Constraints,
+ SmallVectorImpl<std::string> &Clobbers,
+ const MCInstrInfo *MII,
+ const MCInstPrinter *IP,
+ MCAsmParserSemaCallback &SI) = 0;
+
/// ParseStatement - Parse the next statement.
virtual bool ParseStatement() = 0;
- /// getNumParsedOperands - Returns the number of MCAsmParsedOperands from the
- /// previously parsed statement.
- virtual unsigned getNumParsedOperands() = 0;
-
- /// getParsedOperand - Get a MCAsmParsedOperand.
- virtual MCParsedAsmOperand &getParsedOperand(unsigned OpNum) = 0;
-
- /// freeParsedOperands - Free the MCAsmParsedOperands.
- virtual void freeParsedOperands() = 0;
-
- /// isInstruction - Was the previously parsed statement an instruction?
- virtual bool isInstruction() = 0;
-
- /// getOpcode - Get the opcode from the previously parsed instruction.
- virtual unsigned getOpcode() = 0;
-
/// Warning - Emit a warning at the location \p L, with the message \p Msg.
///
/// \return The return value is true, if warnings are fatal.
diff --git a/include/llvm/MC/MCSchedule.h b/include/llvm/MC/MCSchedule.h
index 0504dc13c8..c9a060c79b 100644
--- a/include/llvm/MC/MCSchedule.h
+++ b/include/llvm/MC/MCSchedule.h
@@ -54,10 +54,12 @@ struct MCWriteProcResEntry {
};
/// Specify the latency in cpu cycles for a particular scheduling class and def
-/// index. Also identify the WriteResources of this def. When the operand
-/// expands to a sequence of writes, this ID is the last write in the sequence.
+/// index. -1 indicates an invalid latency. Heuristics would typically consider
+/// an instruction with invalid latency to have infinite latency. Also identify
+/// the WriteResources of this def. When the operand expands to a sequence of
+/// writes, this ID is the last write in the sequence.
struct MCWriteLatencyEntry {
- unsigned Cycles;
+ int Cycles;
unsigned WriteResourceID;
bool operator==(const MCWriteLatencyEntry &Other) const {
diff --git a/include/llvm/Operator.h b/include/llvm/Operator.h
index bc5da8e8aa..462324a669 100644
--- a/include/llvm/Operator.h
+++ b/include/llvm/Operator.h
@@ -36,8 +36,8 @@ private:
void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
void *operator new(size_t s) LLVM_DELETED_FUNCTION;
Operator() LLVM_DELETED_FUNCTION;
- // NOTE: cannot use LLVM_DELETED_FUNCTION because gcc errors when deleting
- // an override of a non-deleted function.
+ // NOTE: cannot use LLVM_DELETED_FUNCTION because it's not legal to delete
+ // an overridden method that's not deleted in the base class.
~Operator();
public:
@@ -191,7 +191,7 @@ public:
/// opcodes.
template<typename SuperClass, unsigned Opc>
class ConcreteOperator : public SuperClass {
- ~ConcreteOperator() LLVM_DELETED_FUNCTION;
+ ~ConcreteOperator(); // DO NOT IMPLEMENT
public:
static inline bool classof(const Instruction *I) {
return I->getOpcode() == Opc;
@@ -207,44 +207,44 @@ public:
class AddOperator
: public ConcreteOperator<OverflowingBinaryOperator, Instruction::Add> {
- ~AddOperator() LLVM_DELETED_FUNCTION;
+ ~AddOperator(); // DO NOT IMPLEMENT
};
class SubOperator
: public ConcreteOperator<OverflowingBinaryOperator, Instruction::Sub> {
- ~SubOperator() LLVM_DELETED_FUNCTION;
+ ~SubOperator(); // DO NOT IMPLEMENT
};
class MulOperator
: public ConcreteOperator<OverflowingBinaryOperator, Instruction::Mul> {
- ~MulOperator() LLVM_DELETED_FUNCTION;
+ ~MulOperator(); // DO NOT IMPLEMENT
};
class ShlOperator
: public ConcreteOperator<OverflowingBinaryOperator, Instruction::Shl> {
- ~ShlOperator() LLVM_DELETED_FUNCTION;
+ ~ShlOperator(); // DO NOT IMPLEMENT
};
-
+
class SDivOperator
: public ConcreteOperator<PossiblyExactOperator, Instruction::SDiv> {
- ~SDivOperator() LLVM_DELETED_FUNCTION;
+ ~SDivOperator(); // DO NOT IMPLEMENT
};
class UDivOperator
: public ConcreteOperator<PossiblyExactOperator, Instruction::UDiv> {
- ~UDivOperator() LLVM_DELETED_FUNCTION;
+ ~UDivOperator(); // DO NOT IMPLEMENT
};
class AShrOperator
: public ConcreteOperator<PossiblyExactOperator, Instruction::AShr> {
- ~AShrOperator() LLVM_DELETED_FUNCTION;
+ ~AShrOperator(); // DO NOT IMPLEMENT
};
class LShrOperator
: public ConcreteOperator<PossiblyExactOperator, Instruction::LShr> {
- ~LShrOperator() LLVM_DELETED_FUNCTION;
+ ~LShrOperator(); // DO NOT IMPLEMENT
};
-
-
-
+
+
+
class GEPOperator
: public ConcreteOperator<Operator, Instruction::GetElementPtr> {
- ~GEPOperator() LLVM_DELETED_FUNCTION;
+ ~GEPOperator(); // DO NOT IMPLEMENT
enum {
IsInBounds = (1 << 0)
diff --git a/include/llvm/Target/TargetMachine.h b/include/llvm/Target/TargetMachine.h
index 18e589e2bc..988916f9d9 100644
--- a/include/llvm/Target/TargetMachine.h
+++ b/include/llvm/Target/TargetMachine.h
@@ -17,8 +17,6 @@
#include "llvm/Pass.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/TargetTransformInfo.h"
-#include "llvm/Target/TargetTransformImpl.h"
#include "llvm/ADT/StringRef.h"
#include <cassert>
#include <string>
@@ -109,10 +107,6 @@ public:
virtual const TargetLowering *getTargetLowering() const { return 0; }
virtual const TargetSelectionDAGInfo *getSelectionDAGInfo() const{ return 0; }
virtual const DataLayout *getDataLayout() const { return 0; }
- virtual const ScalarTargetTransformInfo*
- getScalarTargetTransformInfo() const { return 0; }
- virtual const VectorTargetTransformInfo*
- getVectorTargetTransformInfo() const { return 0; }
/// getMCAsmInfo - Return target specific asm information.
///
diff --git a/include/llvm/Target/TargetTransformImpl.h b/include/llvm/Target/TargetTransformImpl.h
deleted file mode 100644
index 7648f4f935..0000000000
--- a/include/llvm/Target/TargetTransformImpl.h
+++ /dev/null
@@ -1,54 +0,0 @@
-//=- llvm/Target/TargetTransformImpl.h - Target Loop Trans Info----*- C++ -*-=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the target-specific implementations of the
-// TargetTransform interfaces.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TARGET_TARGET_TRANSFORMATION_IMPL_H
-#define LLVM_TARGET_TARGET_TRANSFORMATION_IMPL_H
-
-#include "llvm/TargetTransformInfo.h"
-
-namespace llvm {
-
-class TargetLowering;
-
-/// ScalarTargetTransformInfo - This is a default implementation for the
-/// ScalarTargetTransformInfo interface. Different targets can implement
-/// this interface differently.
-class ScalarTargetTransformImpl : public ScalarTargetTransformInfo {
-private:
- const TargetLowering *TLI;
-
-public:
- /// Ctor
- explicit ScalarTargetTransformImpl(const TargetLowering *TL) : TLI(TL) {}
-
- virtual bool isLegalAddImmediate(int64_t imm) const;
-
- virtual bool isLegalICmpImmediate(int64_t imm) const;
-
- virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const;
-
- virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const;
-
- virtual bool isTypeLegal(Type *Ty) const;
-
- virtual unsigned getJumpBufAlignment() const;
-
- virtual unsigned getJumpBufSize() const;
-};
-
-class VectorTargetTransformImpl : public VectorTargetTransformInfo { };
-
-} // end llvm namespace
-
-#endif
diff --git a/include/llvm/TargetTransformInfo.h b/include/llvm/TargetTransformInfo.h
deleted file mode 100644
index 82fc14dbd7..0000000000
--- a/include/llvm/TargetTransformInfo.h
+++ /dev/null
@@ -1,128 +0,0 @@
-//===- llvm/Transforms/TargetTransformInfo.h --------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass exposes codegen information to IR-level passes. Every
-// transformation that uses codegen information is broken into three parts:
-// 1. The IR-level analysis pass.
-// 2. The IR-level transformation interface which provides the needed
-// information.
-// 3. Codegen-level implementation which uses target-specific hooks.
-//
-// This file defines #2, which is the interface that IR-level transformations
-// use for querying the codegen.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TRANSFORMS_TARGET_TRANSFORM_INTERFACE
-#define LLVM_TRANSFORMS_TARGET_TRANSFORM_INTERFACE
-
-#include "llvm/Pass.h"
-#include "llvm/AddressingMode.h"
-#include "llvm/Support/DataTypes.h"
-#include "llvm/Type.h"
-
-namespace llvm {
-
-class ScalarTargetTransformInfo;
-class VectorTargetTransformInfo;
-
-/// TargetTransformInfo - This pass provides access to the codegen
-/// interfaces that are needed for IR-level transformations.
-class TargetTransformInfo : public ImmutablePass {
-private:
- const ScalarTargetTransformInfo *STTI;
- const VectorTargetTransformInfo *VTTI;
-public:
- /// Default ctor.
- ///
- /// @note This has to exist, because this is a pass, but it should never be
- /// used.
- TargetTransformInfo();
-
- explicit TargetTransformInfo(const ScalarTargetTransformInfo* S,
- const VectorTargetTransformInfo *V)
- : ImmutablePass(ID), STTI(S), VTTI(V) {
- initializeTargetTransformInfoPass(*PassRegistry::getPassRegistry());
- }
-
- TargetTransformInfo(const TargetTransformInfo &T) :
- ImmutablePass(ID), STTI(T.STTI), VTTI(T.VTTI) { }
-
- const ScalarTargetTransformInfo* getScalarTargetTransformInfo() {
- return STTI;
- }
- const VectorTargetTransformInfo* getVectorTargetTransformInfo() {
- return VTTI;
- }
-
- /// Pass identification, replacement for typeid.
- static char ID;
-};
-
-// ---------------------------------------------------------------------------//
-// The classes below are inherited and implemented by target-specific classes
-// in the codegen.
-// ---------------------------------------------------------------------------//
-
-/// ScalarTargetTransformInfo - This interface is used by IR-level passes
-/// that need target-dependent information for generic scalar transformations.
-/// LSR, and LowerInvoke use this interface.
-class ScalarTargetTransformInfo {
-public:
- virtual ~ScalarTargetTransformInfo() {}
-
- /// isLegalAddImmediate - Return true if the specified immediate is legal
- /// add immediate, that is the target has add instructions which can add
- /// a register with the immediate without having to materialize the
- /// immediate into a register.
- virtual bool isLegalAddImmediate(int64_t) const {
- return false;
- }
- /// isLegalICmpImmediate - Return true if the specified immediate is legal
- /// icmp immediate, that is the target has icmp instructions which can compare
- /// a register against the immediate without having to materialize the
- /// immediate into a register.
- virtual bool isLegalICmpImmediate(int64_t) const {
- return false;
- }
- /// isLegalAddressingMode - Return true if the addressing mode represented by
- /// AM is legal for this target, for a load/store of the specified type.
- /// The type may be VoidTy, in which case only return true if the addressing
- /// mode is legal for a load/store of any legal type.
- /// TODO: Handle pre/postinc as well.
- virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const {
- return false;
- }
- /// isTruncateFree - Return true if it's free to truncate a value of
- /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
- /// register EAX to i16 by referencing its sub-register AX.
- virtual bool isTruncateFree(Type * /*Ty1*/, Type * /*Ty2*/) const {
- return false;
- }
- /// Is this type legal.
- virtual bool isTypeLegal(Type *Ty) const {
- return false;
- }
- /// getJumpBufAlignment - returns the target's jmp_buf alignment in bytes
- virtual unsigned getJumpBufAlignment() const {
- return 0;
- }
- /// getJumpBufSize - returns the target's jmp_buf size in bytes.
- virtual unsigned getJumpBufSize() const {
- return 0;
- }
-};
-
-class VectorTargetTransformInfo {
- // TODO: define an interface for VectorTargetTransformInfo.
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/include/llvm/Transforms/IPO.h b/include/llvm/Transforms/IPO.h
index 962cb63758..08d3bbd941 100644
--- a/include/llvm/Transforms/IPO.h
+++ b/include/llvm/Transforms/IPO.h
@@ -198,6 +198,11 @@ ModulePass *createPartialInliningPass();
//
ModulePass *createMetaRenamerPass();
+//===----------------------------------------------------------------------===//
+/// createBarrierNoopPass - This pass is purely a module pass barrier in a pass
+/// manager.
+ModulePass *createBarrierNoopPass();
+
} // End llvm namespace
#endif
diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h
index 12239adb63..1ddca844c9 100644
--- a/include/llvm/Transforms/Scalar.h
+++ b/include/llvm/Transforms/Scalar.h
@@ -119,7 +119,7 @@ Pass *createLICMPass();
// optional parameter used to consult the target machine whether certain
// transformations are profitable.
//
-Pass *createLoopStrengthReducePass();
+Pass *createLoopStrengthReducePass(const TargetLowering *TLI = 0);
Pass *createGlobalMergePass(const TargetLowering *TLI = 0);
@@ -249,8 +249,9 @@ extern char &LowerSwitchID;
// purpose "my LLVM-to-LLVM pass doesn't support the invoke instruction yet"
// lowering pass.
//
-FunctionPass *createLowerInvokePass();
-FunctionPass *createLowerInvokePass(bool useExpensiveEHSupport);
+FunctionPass *createLowerInvokePass(const TargetLowering *TLI = 0);
+FunctionPass *createLowerInvokePass(const TargetLowering *TLI,
+ bool useExpensiveEHSupport);
extern char &LowerInvokePassID;
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/Transforms/Vectorize.h b/include/llvm/Transforms/Vectorize.h
index 1e49a9c01e..41e53a83e2 100644
--- a/include/llvm/Transforms/Vectorize.h
+++ b/include/llvm/Transforms/Vectorize.h
@@ -107,6 +107,12 @@ BasicBlockPass *
createBBVectorizePass(const VectorizeConfig &C = VectorizeConfig());
//===----------------------------------------------------------------------===//
+//
+// LoopVectorize - Create a loop vectorization pass.
+//
+Pass * createLoopVectorizePass();
+
+//===----------------------------------------------------------------------===//
/// @brief Vectorize the BasicBlock.
///
/// @param BB The BasicBlock to be vectorized
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 5c2a49e767..5e05f4c8ca 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -19,8 +19,8 @@
#include "llvm/LLVMContext.h"
#include "llvm/Support/Debug.h"
#include "llvm/DataLayout.h"
+#include "llvm/Target/TargetLowering.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/TargetTransformInfo.h"
using namespace llvm;
@@ -1599,15 +1599,15 @@ static bool width_descending(Value *lhs, Value *rhs) {
/// This does not depend on any SCEVExpander state but should be used in
/// the same context that SCEVExpander is used.
unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
- SmallVectorImpl<WeakVH> &DeadInsts,
- const ScalarTargetTransformInfo *STTI) {
+ SmallVectorImpl<WeakVH> &DeadInsts,
+ const TargetLowering *TLI) {
// Find integer phis in order of increasing width.
SmallVector<PHINode*, 8> Phis;
for (BasicBlock::iterator I = L->getHeader()->begin();
PHINode *Phi = dyn_cast<PHINode>(I); ++I) {
Phis.push_back(Phi);
}
- if (STTI)
+ if (TLI)
std::sort(Phis.begin(), Phis.end(), width_descending);
unsigned NumElim = 0;
@@ -1624,8 +1624,8 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(Phi)];
if (!OrigPhiRef) {
OrigPhiRef = Phi;
- if (Phi->getType()->isIntegerTy() && STTI &&
- STTI->isTruncateFree(Phi->getType(), Phis.back()->getType())) {
+ if (Phi->getType()->isIntegerTy() && TLI
+ && TLI->isTruncateFree(Phi->getType(), Phis.back()->getType())) {
// This phi can be freely truncated to the narrowest phi type. Map the
// truncated expression to it so it will be reused for narrow types.
const SCEV *TruncExpr =
diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp
index 2311842671..ed78f19421 100644
--- a/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/lib/CodeGen/ExecutionDepsFix.cpp
@@ -657,7 +657,7 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
bool anyregs = false;
for (TargetRegisterClass::const_iterator I = RC->begin(), E = RC->end();
I != E; ++I)
- if (MF->getRegInfo().isPhysRegOrOverlapUsed(*I)) {
+ if (MF->getRegInfo().isPhysRegUsed(*I)) {
anyregs = true;
break;
}
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index ae7c15be15..95d7a7dd68 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -21,7 +21,7 @@ MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI)
: TRI(&TRI), IsSSA(true), TracksLiveness(true) {
VRegInfo.reserve(256);
RegAllocHints.reserve(256);
- UsedPhysRegs.resize(TRI.getNumRegs());
+ UsedRegUnits.resize(TRI.getNumRegUnits());
UsedPhysRegMask.resize(TRI.getNumRegs());
// Create the physreg use/def lists.
@@ -32,7 +32,7 @@ MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI)
MachineRegisterInfo::~MachineRegisterInfo() {
#ifndef NDEBUG
clearVirtRegs();
- for (unsigned i = 0, e = UsedPhysRegs.size(); i != e; ++i)
+ for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i)
assert(!PhysRegUseDefLists[i] &&
"PhysRegUseDefLists has entries after all instructions are deleted");
#endif
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index deca31263c..7c7d2c8045 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -369,7 +369,7 @@ void TargetPassConfig::addIRPasses() {
// Run loop strength reduction before anything else.
if (getOptLevel() != CodeGenOpt::None && !DisableLSR) {
- addPass(createLoopStrengthReducePass());
+ addPass(createLoopStrengthReducePass(getTargetLowering()));
if (PrintLSR)
addPass(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs()));
}
@@ -399,7 +399,7 @@ void TargetPassConfig::addPassesToHandleExceptions() {
addPass(createDwarfEHPass(TM));
break;
case ExceptionHandling::None:
- addPass(createLowerInvokePass());
+ addPass(createLowerInvokePass(TM->getTargetLowering()));
// The lower invoke pass may create unreachable code. Remove it.
addPass(createUnreachableBlockEliminationPass());
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 86df0a127b..77554d691c 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -227,7 +227,7 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
std::vector<CalleeSavedInfo> CSI;
for (unsigned i = 0; CSRegs[i]; ++i) {
unsigned Reg = CSRegs[i];
- if (Fn.getRegInfo().isPhysRegOrOverlapUsed(Reg)) {
+ if (Fn.getRegInfo().isPhysRegUsed(Reg)) {
// If the reg is modified, save it!
CSI.push_back(CalleeSavedInfo(Reg));
}
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index e096240e04..d6ed36ef95 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -113,9 +113,11 @@ namespace {
// PhysRegState - One of the RegState enums, or a virtreg.
std::vector<unsigned> PhysRegState;
- // UsedInInstr - BitVector of physregs that are used in the current
- // instruction, and so cannot be allocated.
- BitVector UsedInInstr;
+ typedef SparseSet<unsigned> UsedInInstrSet;
+
+ // UsedInInstr - Set of physregs that are used in the current instruction,
+ // and so cannot be allocated.
+ UsedInInstrSet UsedInInstr;
// SkippedInstrs - Descriptors of instructions whose clobber list was
// ignored because all registers were spilled. It is still necessary to
@@ -340,7 +342,7 @@ void RAFast::usePhysReg(MachineOperand &MO) {
PhysRegState[PhysReg] = regFree;
// Fall through
case regFree:
- UsedInInstr.set(PhysReg);
+ UsedInInstr.insert(PhysReg);
MO.setIsKill();
return;
default:
@@ -360,13 +362,13 @@ void RAFast::usePhysReg(MachineOperand &MO) {
"Instruction is not using a subregister of a reserved register");
// Leave the superregister in the working set.
PhysRegState[Alias] = regFree;
- UsedInInstr.set(Alias);
+ UsedInInstr.insert(Alias);
MO.getParent()->addRegisterKilled(Alias, TRI, true);
return;
case regFree:
if (TRI->isSuperRegister(PhysReg, Alias)) {
// Leave the superregister in the working set.
- UsedInInstr.set(Alias);
+ UsedInInstr.insert(Alias);
MO.getParent()->addRegisterKilled(Alias, TRI, true);
return;
}
@@ -380,7 +382,7 @@ void RAFast::usePhysReg(MachineOperand &MO) {
// All aliases are disabled, bring register into working set.
PhysRegState[PhysReg] = regFree;
- UsedInInstr.set(PhysReg);
+ UsedInInstr.insert(PhysReg);
MO.setIsKill();
}
@@ -389,7 +391,7 @@ void RAFast::usePhysReg(MachineOperand &MO) {
/// reserved instead of allocated.
void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg,
RegState NewState) {
- UsedInInstr.set(PhysReg);
+ UsedInInstr.insert(PhysReg);
switch (unsigned VirtReg = PhysRegState[PhysReg]) {
case regDisabled:
break;
@@ -429,7 +431,7 @@ void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg,
// can be allocated directly.
// Returns spillImpossible when PhysReg or an alias can't be spilled.
unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
- if (UsedInInstr.test(PhysReg)) {
+ if (UsedInInstr.count(PhysReg)) {
DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is already used in instr.\n");
return spillImpossible;
}
@@ -454,7 +456,7 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
unsigned Cost = 0;
for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
unsigned Alias = *AI;
- if (UsedInInstr.test(Alias))
+ if (UsedInInstr.count(Alias))
return spillImpossible;
switch (unsigned VirtReg = PhysRegState[Alias]) {
case regDisabled:
@@ -530,7 +532,7 @@ RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI,
// First try to find a completely free register.
for (ArrayRef<unsigned>::iterator I = AO.begin(), E = AO.end(); I != E; ++I) {
unsigned PhysReg = *I;
- if (PhysRegState[PhysReg] == regFree && !UsedInInstr.test(PhysReg)) {
+ if (PhysRegState[PhysReg] == regFree && !UsedInInstr.count(PhysReg)) {
assignVirtToPhysReg(*LRI, PhysReg);
return LRI;
}
@@ -596,7 +598,7 @@ RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum,
LRI->LastUse = MI;
LRI->LastOpNum = OpNum;
LRI->Dirty = true;
- UsedInInstr.set(LRI->PhysReg);
+ UsedInInstr.insert(LRI->PhysReg);
return LRI;
}
@@ -646,7 +648,7 @@ RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum,
assert(LRI->PhysReg && "Register not assigned");
LRI->LastUse = MI;
LRI->LastOpNum = OpNum;
- UsedInInstr.set(LRI->PhysReg);
+ UsedInInstr.insert(LRI->PhysReg);
return LRI;
}
@@ -708,7 +710,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
unsigned Reg = MO.getReg();
if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
- UsedInInstr.set(*AI);
+ UsedInInstr.insert(*AI);
if (ThroughRegs.count(PhysRegState[*AI]))
definePhysReg(MI, *AI, regFree);
}
@@ -756,7 +758,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
}
// Restore UsedInInstr to a state usable for allocating normal virtual uses.
- UsedInInstr.reset();
+ UsedInInstr.clear();
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue;
@@ -764,12 +766,12 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
DEBUG(dbgs() << "\tSetting " << PrintReg(Reg, TRI)
<< " as used in instr\n");
- UsedInInstr.set(Reg);
+ UsedInInstr.insert(Reg);
}
// Also mark PartialDefs as used to avoid reallocation.
for (unsigned i = 0, e = PartialDefs.size(); i != e; ++i)
- UsedInInstr.set(PartialDefs[i]);
+ UsedInInstr.insert(PartialDefs[i]);
}
/// addRetOperand - ensure that a return instruction has an operand for each
@@ -942,7 +944,7 @@ void RAFast::AllocateBasicBlock() {
}
// Track registers used by instruction.
- UsedInInstr.reset();
+ UsedInInstr.clear();
// First scan.
// Mark physreg uses and early clobbers as used.
@@ -1016,11 +1018,13 @@ void RAFast::AllocateBasicBlock() {
}
}
- MRI->addPhysRegsUsed(UsedInInstr);
+ for (UsedInInstrSet::iterator
+ I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I)
+ MRI->setPhysRegUsed(*I);
// Track registers defined by instruction - early clobbers and tied uses at
// this point.
- UsedInInstr.reset();
+ UsedInInstr.clear();
if (hasEarlyClobbers) {
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
@@ -1030,7 +1034,7 @@ void RAFast::AllocateBasicBlock() {
// Look for physreg defs and tied uses.
if (!MO.isDef() && !MI->isRegTiedToDefOperand(i)) continue;
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
- UsedInInstr.set(*AI);
+ UsedInInstr.insert(*AI);
}
}
@@ -1080,7 +1084,9 @@ void RAFast::AllocateBasicBlock() {
killVirtReg(VirtDead[i]);
VirtDead.clear();
- MRI->addPhysRegsUsed(UsedInInstr);
+ for (UsedInInstrSet::iterator
+ I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I)
+ MRI->setPhysRegUsed(*I);
if (CopyDst && CopyDst == CopySrc && CopyDstSub == CopySrcSub) {
DEBUG(dbgs() << "-- coalescing: " << *MI);
@@ -1118,7 +1124,8 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
TII = TM->getInstrInfo();
MRI->freezeReservedRegs(Fn);
RegClassInfo.runOnMachineFunction(Fn);
- UsedInInstr.resize(TRI->getNumRegs());
+ UsedInInstr.clear();
+ UsedInInstr.setUniverse(TRI->getNumRegs());
assert(!MRI->isSSA() && "regalloc requires leaving SSA");
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index f46d09bc86..5b5c0bdf5b 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -5308,10 +5308,6 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
if (Reduced.getNode())
return Reduced;
}
- // fold (trunc (fptoXi x)) -> (smaller fptoXi x)
- if ((N0.getOpcode() == ISD::FP_TO_UINT ||
- N0.getOpcode() == ISD::FP_TO_SINT) && !LegalTypes)
- return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, N0.getOperand(0));
// fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
// where ... are all 'undef'.
if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
@@ -8610,8 +8606,8 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
return SDValue();
// Only handle cases where both indexes are constants with the same type.
- ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
- ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
+ ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
if (InsIdx && ExtIdx &&
InsIdx->getValueType(0).getSizeInBits() <= 64 &&
@@ -8628,6 +8624,21 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
}
}
+ if (V->getOpcode() == ISD::CONCAT_VECTORS) {
+ // Combine:
+ // (extract_subvec (concat V1, V2, ...), i)
+ // Into:
+ // Vi if possible
+ for (unsigned i = 0, e = V->getNumOperands(); i != e; ++i)
+ if (V->getOperand(i).getValueType() != NVT)
+ return SDValue();
+ unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ unsigned NumElems = NVT.getVectorNumElements();
+ assert((Idx % NumElems) == 0 &&
+ "IDX in concat is not a multiple of the result vector length.");
+ return V->getOperand(Idx / NumElems);
+ }
+
return SDValue();
}
@@ -9064,6 +9075,10 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) ||
(RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode)))
return false;
+ // The loads must not depend on one another.
+ if (LLD->isPredecessorOf(RLD) ||
+ RLD->isPredecessorOf(LLD))
+ return false;
Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(),
LLD->getBasePtr().getValueType(),
TheSelect->getOperand(0), LLD->getBasePtr(),
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 515eff3b25..81e3ff6afe 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -13,6 +13,7 @@
#define DEBUG_TYPE "pre-RA-sched"
#include "ScheduleDAGSDNodes.h"
+#include "InstrEmitter.h"
#include "llvm/InlineAsm.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
@@ -34,6 +35,10 @@ STATISTIC(NumPRCopies, "Number of physical copies");
static RegisterScheduler
fastDAGScheduler("fast", "Fast suboptimal list scheduling",
createFastDAGScheduler);
+static RegisterScheduler
+ linearizeDAGScheduler("linearize", "Linearize DAG, no scheduling",
+ createDAGLinearizer);
+
namespace {
/// FastPriorityQueue - A degenerate priority queue that considers
@@ -629,6 +634,153 @@ void ScheduleDAGFast::ListScheduleBottomUp() {
#endif
}
+
+//===----------------------------------------------------------------------===//
+// ScheduleDAGLinearize - No scheduling scheduler, it simply linearize the
+// DAG in topological order.
+// IMPORTANT: this may not work for targets with phyreg dependency.
+//
+class ScheduleDAGLinearize : public ScheduleDAGSDNodes {
+public:
+ ScheduleDAGLinearize(MachineFunction &mf) : ScheduleDAGSDNodes(mf) {}
+
+ void Schedule();
+
+ MachineBasicBlock *EmitSchedule(MachineBasicBlock::iterator &InsertPos);
+
+private:
+ std::vector<SDNode*> Sequence;
+ DenseMap<SDNode*, SDNode*> GluedMap; // Cache glue to its user
+
+ void ScheduleNode(SDNode *N);
+};
+
+void ScheduleDAGLinearize::ScheduleNode(SDNode *N) {
+ if (N->getNodeId() != 0)
+ llvm_unreachable(0);
+
+ if (!N->isMachineOpcode() &&
+ (N->getOpcode() == ISD::EntryToken || isPassiveNode(N)))
+ // These nodes do not need to be translated into MIs.
+ return;
+
+ DEBUG(dbgs() << "\n*** Scheduling: ");
+ DEBUG(N->dump(DAG));
+ Sequence.push_back(N);
+
+ unsigned NumOps = N->getNumOperands();
+ if (unsigned NumLeft = NumOps) {
+ SDNode *GluedOpN = 0;
+ do {
+ const SDValue &Op = N->getOperand(NumLeft-1);
+ SDNode *OpN = Op.getNode();
+
+ if (NumLeft == NumOps && Op.getValueType() == MVT::Glue) {
+ // Schedule glue operand right above N.
+ GluedOpN = OpN;
+ assert(OpN->getNodeId() != 0 && "Glue operand not ready?");
+ OpN->setNodeId(0);
+ ScheduleNode(OpN);
+ continue;
+ }
+
+ if (OpN == GluedOpN)
+ // Glue operand is already scheduled.
+ continue;
+
+ DenseMap<SDNode*, SDNode*>::iterator DI = GluedMap.find(OpN);
+ if (DI != GluedMap.end() && DI->second != N)
+ // Users of glues are counted against the glued users.
+ OpN = DI->second;
+
+ unsigned Degree = OpN->getNodeId();
+ assert(Degree > 0 && "Predecessor over-released!");
+ OpN->setNodeId(--Degree);
+ if (Degree == 0)
+ ScheduleNode(OpN);
+ } while (--NumLeft);
+ }
+}
+
+/// findGluedUser - Find the representative use of a glue value by walking
+/// the use chain.
+static SDNode *findGluedUser(SDNode *N) {
+ while (SDNode *Glued = N->getGluedUser())
+ N = Glued;
+ return N;
+}
+
+void ScheduleDAGLinearize::Schedule() {
+ DEBUG(dbgs() << "********** DAG Linearization **********\n");
+
+ SmallVector<SDNode*, 8> Glues;
+ unsigned DAGSize = 0;
+ for (SelectionDAG::allnodes_iterator I = DAG->allnodes_begin(),
+ E = DAG->allnodes_end(); I != E; ++I) {
+ SDNode *N = I;
+
+ // Use node id to record degree.
+ unsigned Degree = N->use_size();
+ N->setNodeId(Degree);
+ unsigned NumVals = N->getNumValues();
+ if (NumVals && N->getValueType(NumVals-1) == MVT::Glue &&
+ N->hasAnyUseOfValue(NumVals-1)) {
+ SDNode *User = findGluedUser(N);
+ if (User) {
+ Glues.push_back(N);
+ GluedMap.insert(std::make_pair(N, User));
+ }
+ }
+
+ if (N->isMachineOpcode() ||
+ (N->getOpcode() != ISD::EntryToken && !isPassiveNode(N)))
+ ++DAGSize;
+ }
+
+ for (unsigned i = 0, e = Glues.size(); i != e; ++i) {
+ SDNode *Glue = Glues[i];
+ SDNode *GUser = GluedMap[Glue];
+ unsigned Degree = Glue->getNodeId();
+ unsigned UDegree = GUser->getNodeId();
+
+ // Glue user must be scheduled together with the glue operand. So other
+ // users of the glue operand must be treated as its users.
+ SDNode *ImmGUser = Glue->getGluedUser();
+ for (SDNode::use_iterator ui = Glue->use_begin(), ue = Glue->use_end();
+ ui != ue; ++ui)
+ if (*ui == ImmGUser)
+ --Degree;
+ GUser->setNodeId(UDegree + Degree);
+ Glue->setNodeId(1);
+ }
+
+ Sequence.reserve(DAGSize);
+ ScheduleNode(DAG->getRoot().getNode());
+}
+
+MachineBasicBlock*
+ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
+ InstrEmitter Emitter(BB, InsertPos);
+ DenseMap<SDValue, unsigned> VRBaseMap;
+
+ DEBUG({
+ dbgs() << "\n*** Final schedule ***\n";
+ });
+
+ // FIXME: Handle dbg_values.
+ unsigned NumNodes = Sequence.size();
+ for (unsigned i = 0; i != NumNodes; ++i) {
+ SDNode *N = Sequence[NumNodes-i-1];
+ DEBUG(N->dump(DAG));
+ Emitter.EmitNode(N, false, false, VRBaseMap);
+ }
+
+ DEBUG(dbgs() << '\n');
+
+ InsertPos = Emitter.getInsertPos();
+ return Emitter.getBlock();
+}
+
//===----------------------------------------------------------------------===//
// Public Constructor Functions
//===----------------------------------------------------------------------===//
@@ -637,3 +789,8 @@ llvm::ScheduleDAGSDNodes *
llvm::createFastDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
return new ScheduleDAGFast(*IS->MF);
}
+
+llvm::ScheduleDAGSDNodes *
+llvm::createDAGLinearizer(SelectionDAGISel *IS, CodeGenOpt::Level) {
+ return new ScheduleDAGLinearize(*IS->MF);
+}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 660223a505..714471f559 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -831,8 +831,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
}
SmallVector<SDNode *, 4> GluedNodes;
- for (SDNode *N = SU->getNode()->getGluedNode(); N;
- N = N->getGluedNode())
+ for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode())
GluedNodes.push_back(N);
while (!GluedNodes.empty()) {
SDNode *N = GluedNodes.back();
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 8e7bd82201..907356fd21 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -114,7 +114,8 @@ namespace llvm {
/// EmitSchedule - Insert MachineInstrs into the MachineBasicBlock
/// according to the order specified in Sequence.
///
- MachineBasicBlock *EmitSchedule(MachineBasicBlock::iterator &InsertPos);
+ virtual MachineBasicBlock*
+ EmitSchedule(MachineBasicBlock::iterator &InsertPos);
virtual void dumpNode(const SUnit *SU) const;
diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp
index 54d8c8cde7..1cbee843a1 100644
--- a/lib/CodeGen/StackColoring.cpp
+++ b/lib/CodeGen/StackColoring.cpp
@@ -48,6 +48,7 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/DebugInfo.h"
+#include "llvm/Instructions.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -260,7 +261,7 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) {
MarkersFound++;
- const Value *Allocation = MFI->getObjectAllocation(Slot);
+ const AllocaInst *Allocation = MFI->getObjectAllocation(Slot);
if (Allocation) {
DEBUG(dbgs()<<"Found a lifetime marker for slot #"<<Slot<<
" with allocation: "<< Allocation->getName()<<"\n");
@@ -480,11 +481,11 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
}
// Keep a list of *allocas* which need to be remapped.
- DenseMap<const Value*, const Value*> Allocas;
+ DenseMap<const AllocaInst*, const AllocaInst*> Allocas;
for (DenseMap<int, int>::iterator it = SlotRemap.begin(),
e = SlotRemap.end(); it != e; ++it) {
- const Value *From = MFI->getObjectAllocation(it->first);
- const Value *To = MFI->getObjectAllocation(it->second);
+ const AllocaInst *From = MFI->getObjectAllocation(it->first);
+ const AllocaInst *To = MFI->getObjectAllocation(it->second);
assert(To && From && "Invalid allocation object");
Allocas[From] = To;
}
@@ -514,10 +515,17 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
V = GetUnderlyingObject(V);
// If we did not find one, or if the one that we found is not in our
// map, then move on.
- if (!V || !Allocas.count(V))
+ if (!V || !isa<AllocaInst>(V)) {
+ // Clear mem operand since we don't know for sure that it doesn't
+ // alias a merged alloca.
+ MMO->setValue(0);
+ continue;
+ }
+ const AllocaInst *AI= cast<AllocaInst>(V);
+ if (!Allocas.count(AI))
continue;
- MMO->setValue(Allocas[V]);
+ MMO->setValue(Allocas[AI]);
FixedMemOp++;
}
diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp
index 7a6e2604d7..6a096a16c4 100644
--- a/lib/CodeGen/TargetSchedule.cpp
+++ b/lib/CodeGen/TargetSchedule.cpp
@@ -58,6 +58,14 @@ unsigned TargetSchedModel::getNumMicroOps(MachineInstr *MI) const {
return MI->isTransient() ? 0 : 1;
}
+// The machine model may explicitly specify an invalid latency, which
+// effectively means infinite latency. Since users of the TargetSchedule API
+// don't know how to handle this, we convert it to a very large latency that is
+// easy to distinguish when debugging the DAG but won't induce overflow.
+static unsigned convertLatency(int Cycles) {
+ return Cycles >= 0 ? Cycles : 1000;
+}
+
/// If we can determine the operand latency from the def only, without machine
/// model or itinerary lookup, do so. Otherwise return -1.
int TargetSchedModel::getDefLatency(const MachineInstr *DefMI,
@@ -178,7 +186,7 @@ unsigned TargetSchedModel::computeOperandLatency(
const MCWriteLatencyEntry *WLEntry =
STI->getWriteLatencyEntry(SCDesc, DefIdx);
unsigned WriteID = WLEntry->WriteResourceID;
- unsigned Latency = WLEntry->Cycles;
+ unsigned Latency = convertLatency(WLEntry->Cycles);
if (!UseMI)
return Latency;
@@ -219,7 +227,7 @@ unsigned TargetSchedModel::computeInstrLatency(const MachineInstr *MI) const {
// Lookup the definition's write latency in SubtargetInfo.
const MCWriteLatencyEntry *WLEntry =
STI->getWriteLatencyEntry(SCDesc, DefIdx);
- Latency = std::max(Latency, WLEntry->Cycles);
+ Latency = std::max(Latency, convertLatency(WLEntry->Cycles));
}
return Latency;
}
diff --git a/lib/DebugInfo/DWARFContext.cpp b/lib/DebugInfo/DWARFContext.cpp
index 241f55eaed..afd614cc35 100644
--- a/lib/DebugInfo/DWARFContext.cpp
+++ b/lib/DebugInfo/DWARFContext.cpp
@@ -17,6 +17,8 @@
using namespace llvm;
using namespace dwarf;
+typedef DWARFDebugLine::LineTable DWARFLineTable;
+
void DWARFContext::dump(raw_ostream &OS) {
OS << ".debug_abbrev contents:\n";
getDebugAbbrev()->dump(OS);
@@ -94,7 +96,7 @@ const DWARFDebugAranges *DWARFContext::getDebugAranges() {
return Aranges.get();
}
-const DWARFDebugLine::LineTable *
+const DWARFLineTable *
DWARFContext::getLineTableForCompileUnit(DWARFCompileUnit *cu) {
if (!Line)
Line.reset(new DWARFDebugLine());
@@ -106,7 +108,7 @@ DWARFContext::getLineTableForCompileUnit(DWARFCompileUnit *cu) {
return 0; // No line table for this compile unit.
// See if the line table is cached.
- if (const DWARFDebugLine::LineTable *lt = Line->getLineTable(stmtOffset))
+ if (const DWARFLineTable *lt = Line->getLineTable(stmtOffset))
return lt;
// We have to parse it first.
@@ -117,11 +119,11 @@ DWARFContext::getLineTableForCompileUnit(DWARFCompileUnit *cu) {
void DWARFContext::parseCompileUnits() {
uint32_t offset = 0;
- const DataExtractor &debug_info_data = DataExtractor(getInfoSection(),
- isLittleEndian(), 0);
- while (debug_info_data.isValidOffset(offset)) {
+ const DataExtractor &DIData = DataExtractor(getInfoSection(),
+ isLittleEndian(), 0);
+ while (DIData.isValidOffset(offset)) {
CUs.push_back(DWARFCompileUnit(*this));
- if (!CUs.back().extract(debug_info_data, &offset)) {
+ if (!CUs.back().extract(DIData, &offset)) {
CUs.pop_back();
break;
}
@@ -163,9 +165,11 @@ DWARFCompileUnit *DWARFContext::getCompileUnitForAddress(uint64_t Address) {
return getCompileUnitForOffset(CUOffset);
}
-static bool getFileNameForCompileUnit(
- DWARFCompileUnit *CU, const DWARFDebugLine::LineTable *LineTable,
- uint64_t FileIndex, bool NeedsAbsoluteFilePath, std::string &FileName) {
+static bool getFileNameForCompileUnit(DWARFCompileUnit *CU,
+ const DWARFLineTable *LineTable,
+ uint64_t FileIndex,
+ bool NeedsAbsoluteFilePath,
+ std::string &FileName) {
if (CU == 0 ||
LineTable == 0 ||
!LineTable->getFileNameByIndex(FileIndex, NeedsAbsoluteFilePath,
@@ -183,10 +187,12 @@ static bool getFileNameForCompileUnit(
return true;
}
-static bool getFileLineInfoForCompileUnit(
- DWARFCompileUnit *CU, const DWARFDebugLine::LineTable *LineTable,
- uint64_t Address, bool NeedsAbsoluteFilePath, std::string &FileName,
- uint32_t &Line, uint32_t &Column) {
+static bool getFileLineInfoForCompileUnit(DWARFCompileUnit *CU,
+ const DWARFLineTable *LineTable,
+ uint64_t Address,
+ bool NeedsAbsoluteFilePath,
+ std::string &FileName,
+ uint32_t &Line, uint32_t &Column) {
if (CU == 0 || LineTable == 0)
return false;
// Get the index of row we're looking for in the line table.
@@ -225,8 +231,7 @@ DILineInfo DWARFContext::getLineInfoForAddress(uint64_t Address,
}
}
if (Specifier.needs(DILineInfoSpecifier::FileLineInfo)) {
- const DWARFDebugLine::LineTable *LineTable =
- getLineTableForCompileUnit(CU);
+ const DWARFLineTable *LineTable = getLineTableForCompileUnit(CU);
const bool NeedsAbsoluteFilePath =
Specifier.needs(DILineInfoSpecifier::AbsoluteFilePath);
getFileLineInfoForCompileUnit(CU, LineTable, Address,
@@ -250,7 +255,7 @@ DIInliningInfo DWARFContext::getInliningInfoForAddress(uint64_t Address,
DIInliningInfo InliningInfo;
uint32_t CallFile = 0, CallLine = 0, CallColumn = 0;
- const DWARFDebugLine::LineTable *LineTable = 0;
+ const DWARFLineTable *LineTable = 0;
for (uint32_t i = 0, n = InlinedChain.size(); i != n; i++) {
const DWARFDebugInfoEntryMinimal &FunctionDIE = InlinedChain[i];
std::string FileName = "<invalid>";
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index f58f75bf0e..f5b0d08337 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -19,6 +19,8 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCParser/AsmCond.h"
#include "llvm/MC/MCParser/AsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
@@ -35,6 +37,8 @@
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
#include <cctype>
+#include <set>
+#include <string>
#include <vector>
using namespace llvm;
@@ -139,7 +143,8 @@ private:
/// ParsedOperands - The parsed operands from the last parsed statement.
SmallVector<MCParsedAsmOperand*, 8> ParsedOperands;
- /// Opcode - The opcode from the last parsed instruction.
+ /// Opcode - The opcode from the last parsed instruction. This is MS-style
+ /// inline asm specific.
unsigned Opcode;
public:
@@ -180,21 +185,17 @@ public:
virtual const AsmToken &Lex();
- bool ParseStatement();
void setParsingInlineAsm(bool V) { ParsingInlineAsm = V; }
bool isParsingInlineAsm() { return ParsingInlineAsm; }
- unsigned getNumParsedOperands() { return ParsedOperands.size(); }
- MCParsedAsmOperand &getParsedOperand(unsigned OpNum) {
- assert (ParsedOperands.size() > OpNum);
- return *ParsedOperands[OpNum];
- }
- void freeParsedOperands() {
- for (unsigned i = 0, e = ParsedOperands.size(); i != e; ++i)
- delete ParsedOperands[i];
- ParsedOperands.clear();
- }
- bool isInstruction() { return Opcode != (unsigned)~0x0; }
- unsigned getOpcode() { return Opcode; }
+
+ bool ParseMSInlineAsm(void *AsmLoc, std::string &AsmString,
+ unsigned &NumOutputs, unsigned &NumInputs,
+ SmallVectorImpl<void *> &OpDecls,
+ SmallVectorImpl<std::string> &Constraints,
+ SmallVectorImpl<std::string> &Clobbers,
+ const MCInstrInfo *MII,
+ const MCInstPrinter *IP,
+ MCAsmParserSemaCallback &SI);
bool ParseExpression(const MCExpr *&Res);
virtual bool ParseExpression(const MCExpr *&Res, SMLoc &EndLoc);
@@ -206,6 +207,7 @@ public:
private:
void CheckForValidSection();
+ bool ParseStatement();
void EatToEndOfLine();
bool ParseCppHashLineFilenameComment(const SMLoc &L);
@@ -318,6 +320,10 @@ private:
bool ParseDirectiveIrp(SMLoc DirectiveLoc); // ".irp"
bool ParseDirectiveIrpc(SMLoc DirectiveLoc); // ".irpc"
bool ParseDirectiveEndr(SMLoc DirectiveLoc); // ".endr"
+
+ // MS-style inline assembly parsing.
+ bool isInstruction() { return Opcode != (unsigned)~0x0; }
+ unsigned getOpcode() { return Opcode; }
};
/// \brief Generic implementations of directive handling, etc. which is shared
@@ -1403,10 +1409,13 @@ bool AsmParser::ParseStatement() {
ParsingInlineAsm);
}
- // Free any parsed operands. If parsing ms-style inline assembly it is the
- // responsibility of the caller (i.e., clang) to free the parsed operands.
- if (!ParsingInlineAsm)
- freeParsedOperands();
+ // Free any parsed operands. If parsing ms-style inline assembly the operands
+ // will be freed by the ParseMSInlineAsm() function.
+ if (!ParsingInlineAsm) {
+ for (unsigned i = 0, e = ParsedOperands.size(); i != e; ++i)
+ delete ParsedOperands[i];
+ ParsedOperands.clear();
+ }
// Don't skip the rest of the line, the instruction parser is responsible for
// that.
@@ -3631,6 +3640,171 @@ bool AsmParser::ParseDirectiveEndr(SMLoc DirectiveLoc) {
return false;
}
+namespace {
+enum AsmOpRewriteKind {
+ AOK_Imm,
+ AOK_Input,
+ AOK_Output
+};
+
+struct AsmOpRewrite {
+ AsmOpRewriteKind Kind;
+ SMLoc Loc;
+ unsigned Len;
+
+public:
+ AsmOpRewrite(AsmOpRewriteKind kind, SMLoc loc, unsigned len)
+ : Kind(kind), Loc(loc), Len(len) { }
+};
+}
+
+bool AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString,
+ unsigned &NumOutputs, unsigned &NumInputs,
+ SmallVectorImpl<void *> &OpDecls,
+ SmallVectorImpl<std::string> &Constraints,
+ SmallVectorImpl<std::string> &Clobbers,
+ const MCInstrInfo *MII,
+ const MCInstPrinter *IP,
+ MCAsmParserSemaCallback &SI) {
+ SmallVector<void*, 4> InputDecls;
+ SmallVector<void*, 4> OutputDecls;
+ SmallVector<std::string, 4> InputConstraints;
+ SmallVector<std::string, 4> OutputConstraints;
+ std::set<std::string> ClobberRegs;
+
+ SmallVector<struct AsmOpRewrite, 4> AsmStrRewrites;
+
+ // Prime the lexer.
+ Lex();
+
+ // While we have input, parse each statement.
+ unsigned InputIdx = 0;
+ unsigned OutputIdx = 0;
+ while (getLexer().isNot(AsmToken::Eof)) {
+ if (ParseStatement()) return true;
+
+ if (isInstruction()) {
+ const MCInstrDesc &Desc = MII->get(getOpcode());
+
+ // Build the list of clobbers, outputs and inputs.
+ for (unsigned i = 1, e = ParsedOperands.size(); i != e; ++i) {
+ MCParsedAsmOperand *Operand = ParsedOperands[i];
+
+ // Immediate.
+ if (Operand->isImm()) {
+ AsmStrRewrites.push_back(AsmOpRewrite(AOK_Imm,
+ Operand->getStartLoc(),
+ Operand->getNameLen()));
+ continue;
+ }
+
+ // Register operand.
+ if (Operand->isReg()) {
+ unsigned NumDefs = Desc.getNumDefs();
+ // Clobber.
+ if (NumDefs && Operand->getMCOperandNum() < NumDefs) {
+ std::string Reg;
+ raw_string_ostream OS(Reg);
+ IP->printRegName(OS, Operand->getReg());
+ ClobberRegs.insert(StringRef(OS.str()));
+ }
+ continue;
+ }
+
+ // Expr/Input or Output.
+ unsigned Size;
+ void *OpDecl = SI.LookupInlineAsmIdentifier(Operand->getName(), AsmLoc,
+ Size);
+ if (OpDecl) {
+ bool isOutput = (i == 1) && Desc.mayStore();
+ if (isOutput) {
+ std::string Constraint = "=";
+ ++InputIdx;
+ OutputDecls.push_back(OpDecl);
+ Constraint += Operand->getConstraint().str();
+ OutputConstraints.push_back(Constraint);
+ AsmStrRewrites.push_back(AsmOpRewrite(AOK_Output,
+ Operand->getStartLoc(),
+ Operand->getNameLen()));
+ } else {
+ InputDecls.push_back(OpDecl);
+ InputConstraints.push_back(Operand->getConstraint().str());
+ AsmStrRewrites.push_back(AsmOpRewrite(AOK_Input,
+ Operand->getStartLoc(),
+ Operand->getNameLen()));
+ }
+ }
+ }
+ // Free any parsed operands.
+ for (unsigned i = 0, e = ParsedOperands.size(); i != e; ++i)
+ delete ParsedOperands[i];
+ ParsedOperands.clear();
+ }
+ }
+
+ // Set the number of Outputs and Inputs.
+ NumOutputs = OutputDecls.size();
+ NumInputs = InputDecls.size();
+
+ // Set the unique clobbers.
+ for (std::set<std::string>::iterator I = ClobberRegs.begin(),
+ E = ClobberRegs.end(); I != E; ++I)
+ Clobbers.push_back(*I);
+
+ // Merge the various outputs and inputs. Output are expected first.
+ if (NumOutputs || NumInputs) {
+ unsigned NumExprs = NumOutputs + NumInputs;
+ OpDecls.resize(NumExprs);
+ Constraints.resize(NumExprs);
+ for (unsigned i = 0; i < NumOutputs; ++i) {
+ OpDecls[i] = OutputDecls[i];
+ Constraints[i] = OutputConstraints[i];
+ }
+ for (unsigned i = 0, j = NumOutputs; i < NumInputs; ++i, ++j) {
+ OpDecls[j] = InputDecls[i];
+ Constraints[j] = InputConstraints[i];
+ }
+ }
+
+ // Build the IR assembly string.
+ std::string AsmStringIR;
+ raw_string_ostream OS(AsmStringIR);
+ const char *Start = SrcMgr.getMemoryBuffer(0)->getBufferStart();
+ for (SmallVectorImpl<struct AsmOpRewrite>::iterator
+ I = AsmStrRewrites.begin(), E = AsmStrRewrites.end(); I != E; ++I) {
+ const char *Loc = (*I).Loc.getPointer();
+
+ // Emit everything up to the immediate/expression.
+ OS << StringRef(Start, Loc - Start);
+
+ // Rewrite expressions in $N notation.
+ switch ((*I).Kind) {
+ case AOK_Imm:
+ OS << Twine("$$") + StringRef(Loc, (*I).Len);
+ break;
+ case AOK_Input:
+ OS << '$';
+ OS << InputIdx++;
+ break;
+ case AOK_Output:
+ OS << '$';
+ OS << OutputIdx++;
+ break;
+ }
+
+ // Skip the original expression.
+ Start = Loc + (*I).Len;
+ }
+
+ // Emit the remainder of the asm string.
+ const char *AsmEnd = SrcMgr.getMemoryBuffer(0)->getBufferEnd();
+ if (Start != AsmEnd)
+ OS << StringRef(Start, AsmEnd - Start);
+
+ AsmString = OS.str();
+ return false;
+}
+
/// \brief Create an MCAsmParser instance.
MCAsmParser *llvm::createMCAsmParser(SourceMgr &SM,
MCContext &C, MCStreamer &Out,
diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc
index 6d874ea0d0..264fa5dbde 100644
--- a/lib/Support/Unix/Signals.inc
+++ b/lib/Support/Unix/Signals.inc
@@ -125,17 +125,29 @@ static void UnregisterHandlers() {
/// NB: This must be an async signal safe function. It cannot allocate or free
/// memory, even in debug builds.
static void RemoveFilesToRemove() {
- // Note: avoid iterators in case of debug iterators that allocate or release
+ // We avoid iterators in case of debug iterators that allocate or release
// memory.
for (unsigned i = 0, e = FilesToRemove.size(); i != e; ++i) {
- // Note that we don't want to use any external code here, and we don't care
- // about errors. We're going to try as hard as we can as often as we need
- // to to make these files go away. If these aren't files, too bad.
- //
- // We do however rely on a std::string implementation for which repeated
- // calls to 'c_str()' don't allocate memory. We pre-call 'c_str()' on all
- // of these strings to try to ensure this is safe.
- unlink(FilesToRemove[i].c_str());
+ // We rely on a std::string implementation for which repeated calls to
+ // 'c_str()' don't allocate memory. We pre-call 'c_str()' on all of these
+ // strings to try to ensure this is safe.
+ const char *path = FilesToRemove[i].c_str();
+
+ // Get the status so we can determine if it's a file or directory. If we
+ // can't stat the file, ignore it.
+ struct stat buf;
+ if (stat(path, &buf) != 0)
+ continue;
+
+ // If this is not a regular file, ignore it. We want to prevent removal of
+ // special files like /dev/null, even if the compiler is being run with the
+ // super-user permissions.
+ if (!S_ISREG(buf.st_mode))
+ continue;
+
+ // Otherwise, remove the file. We ignore any errors here as there is nothing
+ // else we can do.
+ unlink(path);
}
}
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 6e991032d0..2379c425aa 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -1258,7 +1258,7 @@ static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) {
MachineRegisterInfo &MRI = MF.getRegInfo();
unsigned NumSpills = 0;
for (; NumSpills < 8; ++NumSpills)
- if (!MRI.isPhysRegOrOverlapUsed(ARM::D8 + NumSpills))
+ if (!MRI.isPhysRegUsed(ARM::D8 + NumSpills))
break;
// Don't do this for just one d-register. It's not worth it.
@@ -1331,7 +1331,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
for (unsigned i = 0; CSRegs[i]; ++i) {
unsigned Reg = CSRegs[i];
bool Spilled = false;
- if (MF.getRegInfo().isPhysRegOrOverlapUsed(Reg)) {
+ if (MF.getRegInfo().isPhysRegUsed(Reg)) {
Spilled = true;
CanEliminateFrame = false;
}
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 30fa6bc2c7..d968dc9f3d 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -78,8 +78,7 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT,
ELFWriterInfo(*this),
TLInfo(*this),
TSInfo(*this),
- FrameLowering(Subtarget),
- STTI(&TLInfo) {
+ FrameLowering(Subtarget) {
if (!Subtarget.hasARMOps())
report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not "
"support ARM mode execution!");
@@ -112,8 +111,7 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT,
TSInfo(*this),
FrameLowering(Subtarget.hasThumb2()
? new ARMFrameLowering(Subtarget)
- : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)),
- STTI(&TLInfo){
+ : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) {
}
namespace {
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index 25ab8295f0..076ad01fee 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -25,7 +25,6 @@
#include "Thumb1FrameLowering.h"
#include "Thumb2InstrInfo.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetTransformImpl.h"
#include "llvm/DataLayout.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/ADT/OwningPtr.h"
@@ -75,8 +74,6 @@ class ARMTargetMachine : public ARMBaseTargetMachine {
ARMTargetLowering TLInfo;
ARMSelectionDAGInfo TSInfo;
ARMFrameLowering FrameLowering;
- ScalarTargetTransformImpl STTI;
- VectorTargetTransformImpl VTTI;
public:
ARMTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
@@ -98,12 +95,7 @@ class ARMTargetMachine : public ARMBaseTargetMachine {
virtual const ARMFrameLowering *getFrameLowering() const {
return &FrameLowering;
}
- virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
- return &STTI;
- }
- virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
- return &VTTI;
- }
+
virtual const ARMInstrInfo *getInstrInfo() const { return &InstrInfo; }
virtual const DataLayout *getDataLayout() const { return &DL; }
virtual const ARMELFWriterInfo *getELFWriterInfo() const {
@@ -125,8 +117,6 @@ class ThumbTargetMachine : public ARMBaseTargetMachine {
ARMSelectionDAGInfo TSInfo;
// Either Thumb1FrameLowering or ARMFrameLowering.
OwningPtr<ARMFrameLowering> FrameLowering;
- ScalarTargetTransformImpl STTI;
- VectorTargetTransformImpl VTTI;
public:
ThumbTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
@@ -155,12 +145,6 @@ public:
virtual const ARMFrameLowering *getFrameLowering() const {
return FrameLowering.get();
}
- virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
- return &STTI;
- }
- virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
- return &VTTI;
- }
virtual const DataLayout *getDataLayout() const { return &DL; }
virtual const ARMELFWriterInfo *getELFWriterInfo() const {
return Subtarget.isTargetELF() ? &ELFWriterInfo : 0;
diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt
index 48df199437..096ef001ed 100644
--- a/lib/Target/CMakeLists.txt
+++ b/lib/Target/CMakeLists.txt
@@ -11,7 +11,6 @@ add_llvm_library(LLVMTarget
TargetMachineC.cpp
TargetRegisterInfo.cpp
TargetSubtargetInfo.cpp
- TargetTransformImpl.cpp
)
foreach(t ${LLVM_TARGETS_TO_BUILD})
diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp
index e92ad01e1d..a37ad7f85a 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.cpp
+++ b/lib/Target/CellSPU/SPUTargetMachine.cpp
@@ -43,8 +43,7 @@ SPUTargetMachine::SPUTargetMachine(const Target &T, StringRef TT,
FrameLowering(Subtarget),
TLInfo(*this),
TSInfo(*this),
- InstrItins(Subtarget.getInstrItineraryData()),
- STTI(&TLInfo){
+ InstrItins(Subtarget.getInstrItineraryData()) {
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h
index 7f53ea6fbe..58699a30d2 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.h
+++ b/lib/Target/CellSPU/SPUTargetMachine.h
@@ -20,7 +20,6 @@
#include "SPUSelectionDAGInfo.h"
#include "SPUFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetTransformImpl.h"
#include "llvm/DataLayout.h"
namespace llvm {
@@ -35,8 +34,6 @@ class SPUTargetMachine : public LLVMTargetMachine {
SPUTargetLowering TLInfo;
SPUSelectionDAGInfo TSInfo;
InstrItineraryData InstrItins;
- ScalarTargetTransformImpl STTI;
- VectorTargetTransformImpl VTTI;
public:
SPUTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
@@ -80,12 +77,6 @@ public:
virtual const InstrItineraryData *getInstrItineraryData() const {
return &InstrItins;
}
- virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
- return &STTI;
- }
- virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
- return &VTTI;
- }
// Pass Pipeline Configuration
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 353542a809..d198a3f45b 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -74,8 +74,7 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT,
Subtarget(TT, CPU, FS), InstrInfo(Subtarget), TLInfo(*this),
TSInfo(*this),
FrameLowering(Subtarget),
- InstrItins(&Subtarget.getInstrItineraryData()),
- STTI(&TLInfo) {
+ InstrItins(&Subtarget.getInstrItineraryData()) {
setMCUseCFI(false);
}
@@ -88,7 +87,7 @@ bool HexagonTargetMachine::addPassesForOptimizations(PassManagerBase &PM) {
PM.add(createDeadCodeEliminationPass());
PM.add(createConstantPropagationPass());
PM.add(createLoopUnrollPass());
- PM.add(createLoopStrengthReducePass());
+ PM.add(createLoopStrengthReducePass(getTargetLowering()));
return true;
}
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.h b/lib/Target/Hexagon/HexagonTargetMachine.h
index 7a4215c119..ade5b3e9c1 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.h
+++ b/lib/Target/Hexagon/HexagonTargetMachine.h
@@ -21,7 +21,6 @@
#include "HexagonFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetTransformImpl.h"
namespace llvm {
@@ -35,8 +34,6 @@ class HexagonTargetMachine : public LLVMTargetMachine {
HexagonSelectionDAGInfo TSInfo;
HexagonFrameLowering FrameLowering;
const InstrItineraryData* InstrItins;
- ScalarTargetTransformImpl STTI;
- VectorTargetTransformImpl VTTI;
public:
HexagonTargetMachine(const Target &T, StringRef TT,StringRef CPU,
@@ -71,14 +68,6 @@ public:
return &TSInfo;
}
- virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
- return &STTI;
- }
-
- virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
- return &VTTI;
- }
-
virtual const DataLayout *getDataLayout() const { return &DL; }
static unsigned getModuleMatchQuality(const Module &M);
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
index cb5f46062d..1f2cf6d9d2 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
@@ -42,7 +42,7 @@ MBlazeTargetMachine(const Target &T, StringRef TT,
InstrInfo(*this),
FrameLowering(Subtarget),
TLInfo(*this), TSInfo(*this), ELFWriterInfo(*this),
- InstrItins(Subtarget.getInstrItineraryData()), STTI(&TLInfo) {
+ InstrItins(Subtarget.getInstrItineraryData()) {
}
namespace {
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.h b/lib/Target/MBlaze/MBlazeTargetMachine.h
index 34648b9b9a..d949e54f0d 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.h
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.h
@@ -25,7 +25,6 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/DataLayout.h"
#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetTransformImpl.h"
namespace llvm {
class formatted_raw_ostream;
@@ -40,8 +39,6 @@ namespace llvm {
MBlazeIntrinsicInfo IntrinsicInfo;
MBlazeELFWriterInfo ELFWriterInfo;
InstrItineraryData InstrItins;
- ScalarTargetTransformImpl STTI;
- VectorTargetTransformImpl VTTI;
public:
MBlazeTargetMachine(const Target &T, StringRef TT,
@@ -80,10 +77,6 @@ namespace llvm {
virtual const MBlazeELFWriterInfo *getELFWriterInfo() const {
return &ELFWriterInfo;
}
- virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const
- { return &STTI; }
- virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const
- { return &VTTI; }
// Pass Pipeline Configuration
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
diff --git a/lib/Target/MSP430/MSP430FrameLowering.cpp b/lib/Target/MSP430/MSP430FrameLowering.cpp
index a312c8d5b2..2e170f17bf 100644
--- a/lib/Target/MSP430/MSP430FrameLowering.cpp
+++ b/lib/Target/MSP430/MSP430FrameLowering.cpp
@@ -221,3 +221,17 @@ MSP430FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
return true;
}
+
+void
+MSP430FrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
+ const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ // Create a frame entry for the FPW register that must be saved.
+ if (TFI->hasFP(MF)) {
+ int FrameIdx = MF.getFrameInfo()->CreateFixedObject(2, -4, true);
+ (void)FrameIdx;
+ assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() &&
+ "Slot for FPW register must be last in order to be found!");
+ }
+}
diff --git a/lib/Target/MSP430/MSP430FrameLowering.h b/lib/Target/MSP430/MSP430FrameLowering.h
index b636827da7..cb02545852 100644
--- a/lib/Target/MSP430/MSP430FrameLowering.h
+++ b/lib/Target/MSP430/MSP430FrameLowering.h
@@ -46,6 +46,7 @@ public:
bool hasFP(const MachineFunction &MF) const;
bool hasReservedCallFrame(const MachineFunction &MF) const;
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
};
} // End llvm namespace
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp
index aed46a2ec5..9ae238f66f 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -220,20 +220,6 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.getOperand(i+1).ChangeToImmediate(Offset);
}
-void
-MSP430RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF)
- const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
- // Create a frame entry for the FPW register that must be saved.
- if (TFI->hasFP(MF)) {
- int FrameIdx = MF.getFrameInfo()->CreateFixedObject(2, -4, true);
- (void)FrameIdx;
- assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() &&
- "Slot for FPW register must be last in order to be found!");
- }
-}
-
unsigned MSP430RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h
index 9ee0a03f63..64a43bcafb 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.h
+++ b/lib/Target/MSP430/MSP430RegisterInfo.h
@@ -49,8 +49,6 @@ public:
void eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, RegScavenger *RS = NULL) const;
- void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
-
// Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const;
};
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index 29ea681216..da5899b86d 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -36,7 +36,7 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T,
// FIXME: Check DataLayout string.
DL("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"),
InstrInfo(*this), TLInfo(*this), TSInfo(*this),
- FrameLowering(Subtarget), STTI(&TLInfo) { }
+ FrameLowering(Subtarget) { }
namespace {
/// MSP430 Code Generator Pass Configuration Options.
diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h
index 186172ede4..ba3cef1f2a 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.h
+++ b/lib/Target/MSP430/MSP430TargetMachine.h
@@ -24,7 +24,6 @@
#include "llvm/DataLayout.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetTransformImpl.h"
namespace llvm {
@@ -37,8 +36,6 @@ class MSP430TargetMachine : public LLVMTargetMachine {
MSP430TargetLowering TLInfo;
MSP430SelectionDAGInfo TSInfo;
MSP430FrameLowering FrameLowering;
- ScalarTargetTransformImpl STTI;
- VectorTargetTransformImpl VTTI;
public:
MSP430TargetMachine(const Target &T, StringRef TT,
@@ -64,12 +61,7 @@ public:
virtual const MSP430SelectionDAGInfo* getSelectionDAGInfo() const {
return &TSInfo;
}
- virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
- return &STTI;
- }
- virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
- return &VTTI;
- }
+
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
}; // MSP430TargetMachine.
diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp
index 8991433005..5e33fed0cc 100644
--- a/lib/Target/Mips/Mips16InstrInfo.cpp
+++ b/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -25,7 +25,7 @@
using namespace llvm;
Mips16InstrInfo::Mips16InstrInfo(MipsTargetMachine &tm)
- : MipsInstrInfo(tm, /* FIXME: set mips16 unconditional br */ 0),
+ : MipsInstrInfo(tm, Mips::BimmX16),
RI(*tm.getSubtargetImpl()) {}
const MipsRegisterInfo &Mips16InstrInfo::getRegisterInfo() const {
@@ -137,12 +137,39 @@ bool Mips16InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
/// GetOppositeBranchOpc - Return the inverse of the specified
/// opcode, e.g. turning BEQ to BNE.
unsigned Mips16InstrInfo::GetOppositeBranchOpc(unsigned Opc) const {
+ switch (Opc) {
+ default: llvm_unreachable("Illegal opcode!");
+ case Mips::BeqzRxImmX16: return Mips::BnezRxImmX16;
+ case Mips::BnezRxImmX16: return Mips::BeqzRxImmX16;
+ case Mips::BteqzT8CmpX16: return Mips::BtnezT8CmpX16;
+ case Mips::BteqzT8SltX16: return Mips::BtnezT8SltX16;
+ case Mips::BteqzT8SltiX16: return Mips::BtnezT8SltiX16;
+ case Mips::BtnezX16: return Mips::BteqzX16;
+ case Mips::BtnezT8CmpiX16: return Mips::BteqzT8CmpiX16;
+ case Mips::BtnezT8SltuX16: return Mips::BteqzT8SltuX16;
+ case Mips::BtnezT8SltiuX16: return Mips::BteqzT8SltiuX16;
+ case Mips::BteqzX16: return Mips::BtnezX16;
+ case Mips::BteqzT8CmpiX16: return Mips::BtnezT8CmpiX16;
+ case Mips::BteqzT8SltuX16: return Mips::BtnezT8SltuX16;
+ case Mips::BteqzT8SltiuX16: return Mips::BtnezT8SltiuX16;
+ case Mips::BtnezT8CmpX16: return Mips::BteqzT8CmpX16;
+ case Mips::BtnezT8SltX16: return Mips::BteqzT8SltX16;
+ case Mips::BtnezT8SltiX16: return Mips::BteqzT8SltiX16;
+ }
assert(false && "Implement this function.");
return 0;
}
unsigned Mips16InstrInfo::GetAnalyzableBrOpc(unsigned Opc) const {
- return 0;
+ return (Opc == Mips::BeqzRxImmX16 || Opc == Mips::BimmX16 ||
+ Opc == Mips::BnezRxImmX16 || Opc == Mips::BteqzX16 ||
+ Opc == Mips::BteqzT8CmpX16 || Opc == Mips::BteqzT8CmpiX16 ||
+ Opc == Mips::BteqzT8SltX16 || Opc == Mips::BteqzT8SltuX16 ||
+ Opc == Mips::BteqzT8SltiX16 || Opc == Mips::BteqzT8SltiuX16 ||
+ Opc == Mips::BtnezX16 || Opc == Mips::BtnezT8CmpX16 ||
+ Opc == Mips::BtnezT8CmpiX16 || Opc == Mips::BtnezT8SltX16 ||
+ Opc == Mips::BtnezT8SltuX16 || Opc == Mips::BtnezT8SltiX16 ||
+ Opc == Mips::BtnezT8SltiuX16 ) ? Opc : 0;
}
void Mips16InstrInfo::ExpandRetRA16(MachineBasicBlock &MBB,
diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td
index eba201a0ea..2694b09206 100644
--- a/lib/Target/Mips/Mips16InstrInfo.td
+++ b/lib/Target/Mips/Mips16InstrInfo.td
@@ -21,6 +21,26 @@ def mem16 : Operand<i32> {
}
//
+// EXT-I instruction format
+//
+class FEXT_I16_ins<bits<5> eop, string asmstr, InstrItinClass itin> :
+ FEXT_I16<eop, (outs), (ins brtarget:$imm16),
+ !strconcat(asmstr, "\t$imm16"),[], itin>;
+
+//
+// EXT-I8 instruction format
+//
+
+class FEXT_I816_ins_base<bits<3> _func, string asmstr,
+ string asmstr2, InstrItinClass itin>:
+ FEXT_I816<_func, (outs), (ins uimm16:$imm), !strconcat(asmstr, asmstr2),
+ [], itin>;
+
+class FEXT_I816_ins<bits<3> _func, string asmstr,
+ InstrItinClass itin>:
+ FEXT_I816_ins_base<_func, asmstr, "\t$imm", itin>;
+
+//
// Assembler formats in alphabetical order.
// Natural and pseudos are mixed together.
//
@@ -40,6 +60,11 @@ class FEXT_RI16_ins<bits<5> _op, string asmstr,
class FEXT_RI16_PC_ins<bits<5> _op, string asmstr, InstrItinClass itin>:
FEXT_RI16_ins_base<_op, asmstr, "\t$rx, $$pc, $imm", itin>;
+class FEXT_RI16_B_ins<bits<5> _op, string asmstr,
+ InstrItinClass itin>:
+ FEXT_RI16<_op, (outs), (ins CPU16Regs:$rx, brtarget:$imm),
+ !strconcat(asmstr, "\t$rx, $imm"), [], itin>;
+
class FEXT_2RI16_ins<bits<5> _op, string asmstr,
InstrItinClass itin>:
FEXT_RI16<_op, (outs CPU16Regs:$rx), (ins CPU16Regs:$rx_, simm16:$imm),
@@ -47,6 +72,7 @@ class FEXT_2RI16_ins<bits<5> _op, string asmstr,
let Constraints = "$rx_ = $rx";
}
+
// this has an explicit sp argument that we ignore to work around a problem
// in the compiler
class FEXT_RI16_SP_explicit_ins<bits<5> _op, string asmstr,
@@ -75,6 +101,31 @@ class FEXT_SHIFT16_ins<bits<2> _f, string asmstr, InstrItinClass itin>:
FEXT_SHIFT16<_f, (outs CPU16Regs:$rx), (ins CPU16Regs:$ry, shamt:$sa),
!strconcat(asmstr, "\t$rx, $ry, $sa"), [], itin>;
+//
+// EXT-T8I8
+//
+class FEXT_T8I816_ins<bits<3> _func, string asmstr, string asmstr2,
+ InstrItinClass itin>:
+ FEXT_I816<_func, (outs),
+ (ins CPU16Regs:$rx, CPU16Regs:$ry, brtarget:$imm),
+ !strconcat(asmstr2, !strconcat("\t$rx, $ry\n\t",
+ !strconcat(asmstr, "\t$imm"))),[], itin> {
+ let isCodeGenOnly=1;
+}
+
+//
+// EXT-T8I8I
+//
+class FEXT_T8I8I16_ins<bits<3> _func, string asmstr, string asmstr2,
+ InstrItinClass itin>:
+ FEXT_I816<_func, (outs),
+ (ins CPU16Regs:$rx, simm16:$imm, brtarget:$targ),
+ !strconcat(asmstr2, !strconcat("\t$rx, $imm\n\t",
+ !strconcat(asmstr, "\t$targ"))), [], itin> {
+ let isCodeGenOnly=1;
+}
+//
+
//
// I8_MOVR32 instruction format (used only by the MOVR32 instructio
@@ -165,6 +216,17 @@ class ArithLogic16Defs<bit isCom=0> {
bit neverHasSideEffects = 1;
}
+class branch16 {
+ bit isBranch = 1;
+ bit isTerminator = 1;
+ bit isBarrier = 1;
+}
+
+class cbranch16 {
+ bit isBranch = 1;
+ bit isTerminator = 1;
+}
+
class MayLoad {
bit mayLoad = 1;
}
@@ -204,6 +266,69 @@ def AdduRxRyRz16: FRRR16_ins<01, "addu", IIAlu>, ArithLogic16Defs<1>;
// To do a bitwise logical AND.
def AndRxRxRy16: FRxRxRy16_ins<0b01100, "and", IIAlu>, ArithLogic16Defs<1>;
+
+
+//
+// Format: BEQZ rx, offset MIPS16e
+// Purpose: Branch on Equal to Zero (Extended)
+// To test a GPR then do a PC-relative conditional branch.
+//
+def BeqzRxImmX16: FEXT_RI16_B_ins<0b00100, "beqz", IIAlu>, cbranch16;
+
+// Format: B offset MIPS16e
+// Purpose: Unconditional Branch
+// To do an unconditional PC-relative branch.
+//
+def BimmX16: FEXT_I16_ins<0b00010, "b", IIAlu>, branch16;
+
+//
+// Format: BNEZ rx, offset MIPS16e
+// Purpose: Branch on Not Equal to Zero (Extended)
+// To test a GPR then do a PC-relative conditional branch.
+//
+def BnezRxImmX16: FEXT_RI16_B_ins<0b00101, "bnez", IIAlu>, cbranch16;
+
+//
+// Format: BTEQZ offset MIPS16e
+// Purpose: Branch on T Equal to Zero (Extended)
+// To test special register T then do a PC-relative conditional branch.
+//
+def BteqzX16: FEXT_I816_ins<0b000, "bteqz", IIAlu>, cbranch16;
+
+def BteqzT8CmpX16: FEXT_T8I816_ins<0b000, "bteqz", "cmp", IIAlu>, cbranch16;
+
+def BteqzT8CmpiX16: FEXT_T8I8I16_ins<0b000, "bteqz", "cmpi", IIAlu>,
+ cbranch16;
+
+def BteqzT8SltX16: FEXT_T8I816_ins<0b000, "bteqz", "slt", IIAlu>, cbranch16;
+
+def BteqzT8SltuX16: FEXT_T8I816_ins<0b000, "bteqz", "sltu", IIAlu>, cbranch16;
+
+def BteqzT8SltiX16: FEXT_T8I8I16_ins<0b000, "bteqz", "slti", IIAlu>, cbranch16;
+
+def BteqzT8SltiuX16: FEXT_T8I8I16_ins<0b000, "bteqz", "sltiu", IIAlu>,
+ cbranch16;
+
+//
+// Format: BTNEZ offset MIPS16e
+// Purpose: Branch on T Not Equal to Zero (Extended)
+// To test special register T then do a PC-relative conditional branch.
+//
+def BtnezX16: FEXT_I816_ins<0b001, "btnez", IIAlu> ,cbranch16;
+
+def BtnezT8CmpX16: FEXT_T8I816_ins<0b000, "btnez", "cmp", IIAlu>, cbranch16;
+
+def BtnezT8CmpiX16: FEXT_T8I8I16_ins<0b000, "btnez", "cmpi", IIAlu>, cbranch16;
+
+def BtnezT8SltX16: FEXT_T8I816_ins<0b000, "btnez", "slt", IIAlu>, cbranch16;
+
+def BtnezT8SltuX16: FEXT_T8I816_ins<0b000, "btnez", "sltu", IIAlu>, cbranch16;
+
+def BtnezT8SltiX16: FEXT_T8I8I16_ins<0b000, "btnez", "slti", IIAlu>, cbranch16;
+
+def BtnezT8SltiuX16: FEXT_T8I8I16_ins<0b000, "btnez", "sltiu", IIAlu>,
+ cbranch16;
+
//
// Format: DIV rx, ry MIPS16e
// Purpose: Divide Word
@@ -562,6 +687,11 @@ def: StoreM16_pat<truncstorei8, SbRxRyOffMemX16>;
def: StoreM16_pat<truncstorei16, ShRxRyOffMemX16>;
def: StoreM16_pat<store, SwRxRyOffMemX16>;
+// Unconditional branch
+class UncondBranch16_pat<SDNode OpNode, Instruction I>:
+ Mips16Pat<(OpNode bb:$imm16), (I bb:$imm16)> {
+ let Predicates = [RelocPIC, InMips16Mode];
+ }
// Jump and Link (Call)
let isCall=1, hasDelaySlot=1 in
@@ -574,7 +704,144 @@ let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, hasCtrlDep=1,
hasExtraSrcRegAllocReq = 1 in
def RetRA16 : MipsPseudo16<(outs), (ins), "", [(MipsRet)]>;
+
+//
+// Some branch conditional patterns are not generated by llvm at this time.
+// Some are for seemingly arbitrary reasons not used: i.e. with signed number
+// comparison they are used and for unsigned a different pattern is used.
+// I am pushing upstream from the full mips16 port and it seemed that I needed
+// these earlier and the mips32 port has these but now I cannot create test
+// cases that use these patterns. While I sort this all out I will leave these
+// extra patterns commented out and if I can be sure they are really not used,
+// I will delete the code. I don't want to check the code in uncommented without
+// a valid test case. In some cases, the compiler is generating patterns with
+// setcc instead and earlier I had implemented setcc first so may have masked
+// the problem. The setcc variants are suboptimal for mips16 so I may wantto
+// figure out how to enable the brcond patterns or else possibly new
+// combinations of of brcond and setcc.
+//
+//
+// bcond-seteq
+//
+def: Mips16Pat
+ <(brcond (i32 (seteq CPU16Regs:$rx, CPU16Regs:$ry)), bb:$imm16),
+ (BteqzT8CmpX16 CPU16Regs:$rx, CPU16Regs:$ry, bb:$imm16)
+ >;
+
+
+def: Mips16Pat
+ <(brcond (i32 (seteq CPU16Regs:$rx, immZExt16:$imm)), bb:$targ16),
+ (BteqzT8CmpiX16 CPU16Regs:$rx, immSExt16:$imm, bb:$targ16)
+ >;
+
+def: Mips16Pat
+ <(brcond (i32 (seteq CPU16Regs:$rx, 0)), bb:$targ16),
+ (BeqzRxImmX16 CPU16Regs:$rx, bb:$targ16)
+ >;
+
+//
+// bcond-setgt (do we need to have this pair of setlt, setgt??)
+//
+def: Mips16Pat
+ <(brcond (i32 (setgt CPU16Regs:$rx, CPU16Regs:$ry)), bb:$imm16),
+ (BtnezT8SltX16 CPU16Regs:$ry, CPU16Regs:$rx, bb:$imm16)
+ >;
+
+//
+// bcond-setge
+//
+def: Mips16Pat
+ <(brcond (i32 (setge CPU16Regs:$rx, CPU16Regs:$ry)), bb:$imm16),
+ (BteqzT8SltX16 CPU16Regs:$rx, CPU16Regs:$ry, bb:$imm16)
+ >;
+
+//
+// never called because compiler transforms a >= k to a > (k-1)
+//def: Mips16Pat
+// <(brcond (i32 (setge CPU16Regs:$rx, immSExt16:$imm)), bb:$imm16),
+// (BteqzT8SltiX16 CPU16Regs:$rx, immSExt16:$imm, bb:$imm16)
+// >;
+
+//
+// bcond-setlt
+//
+def: Mips16Pat
+ <(brcond (i32 (setlt CPU16Regs:$rx, CPU16Regs:$ry)), bb:$imm16),
+ (BtnezT8SltX16 CPU16Regs:$rx, CPU16Regs:$ry, bb:$imm16)
+ >;
+
+def: Mips16Pat
+ <(brcond (i32 (setlt CPU16Regs:$rx, immSExt16:$imm)), bb:$imm16),
+ (BtnezT8SltiX16 CPU16Regs:$rx, immSExt16:$imm, bb:$imm16)
+ >;
+
+//
+// bcond-setle
+//
+def: Mips16Pat
+ <(brcond (i32 (setle CPU16Regs:$rx, CPU16Regs:$ry)), bb:$imm16),
+ (BteqzT8SltX16 CPU16Regs:$ry, CPU16Regs:$rx, bb:$imm16)
+ >;
+
+//
+// bcond-setne
+//
+def: Mips16Pat
+ <(brcond (i32 (setne CPU16Regs:$rx, CPU16Regs:$ry)), bb:$imm16),
+ (BtnezT8CmpX16 CPU16Regs:$rx, CPU16Regs:$ry, bb:$imm16)
+ >;
+
+def: Mips16Pat
+ <(brcond (i32 (setne CPU16Regs:$rx, immZExt16:$imm)), bb:$targ16),
+ (BtnezT8CmpiX16 CPU16Regs:$rx, immSExt16:$imm, bb:$targ16)
+ >;
+
+def: Mips16Pat
+ <(brcond (i32 (setne CPU16Regs:$rx, 0)), bb:$targ16),
+ (BnezRxImmX16 CPU16Regs:$rx, bb:$targ16)
+ >;
+
+//
+// This needs to be there but I forget which code will generate it
+//
+def: Mips16Pat
+ <(brcond CPU16Regs:$rx, bb:$targ16),
+ (BnezRxImmX16 CPU16Regs:$rx, bb:$targ16)
+ >;
+
+//
+
+//
+// bcond-setugt
+//
+//def: Mips16Pat
+// <(brcond (i32 (setugt CPU16Regs:$rx, CPU16Regs:$ry)), bb:$imm16),
+// (BtnezT8SltuX16 CPU16Regs:$ry, CPU16Regs:$rx, bb:$imm16)
+// >;
+
+//
+// bcond-setuge
+//
+//def: Mips16Pat
+// <(brcond (i32 (setuge CPU16Regs:$rx, CPU16Regs:$ry)), bb:$imm16),
+// (BteqzT8SltuX16 CPU16Regs:$rx, CPU16Regs:$ry, bb:$imm16)
+// >;
+
+
+//
+// bcond-setult
+//
+//def: Mips16Pat
+// <(brcond (i32 (setult CPU16Regs:$rx, CPU16Regs:$ry)), bb:$imm16),
+// (BtnezT8SltuX16 CPU16Regs:$rx, CPU16Regs:$ry, bb:$imm16)
+// >;
+
+def: UncondBranch16_pat<br, BimmX16>;
+
// Small immediates
+def: Mips16Pat<(i32 immSExt16:$in),
+ (AddiuRxRxImmX16 (Move32R16 ZERO), immSExt16:$in)>;
+
def: Mips16Pat<(i32 immZExt16:$in), (LiRxImmX16 immZExt16:$in)>;
//
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index 5e8062373f..199fe5f0d3 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -53,7 +53,7 @@ MipsTargetMachine(const Target &T, StringRef TT,
InstrInfo(MipsInstrInfo::create(*this)),
FrameLowering(MipsFrameLowering::create(*this, Subtarget)),
TLInfo(*this), TSInfo(*this), JITInfo(),
- ELFWriterInfo(false, isLittle), STTI(&TLInfo) {
+ ELFWriterInfo(false, isLittle) {
}
void MipsebTargetMachine::anchor() { }
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index 60822d0c05..3a01828dd1 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -24,7 +24,6 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/DataLayout.h"
#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetTransformImpl.h"
namespace llvm {
class formatted_raw_ostream;
@@ -39,8 +38,6 @@ class MipsTargetMachine : public LLVMTargetMachine {
MipsSelectionDAGInfo TSInfo;
MipsJITInfo JITInfo;
MipsELFWriterInfo ELFWriterInfo;
- ScalarTargetTransformImpl STTI;
- VectorTargetTransformInfo VTTI;
public:
MipsTargetMachine(const Target &T, StringRef TT,
@@ -77,12 +74,6 @@ public:
virtual const MipsELFWriterInfo *getELFWriterInfo() const {
return &ELFWriterInfo;
}
- virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
- return &STTI;
- }
- virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
- return &VTTI;
- }
// Pass Pipeline Configuration
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 7519b4a083..dbfc660687 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -72,8 +72,7 @@ NVPTXTargetMachine::NVPTXTargetMachine(const Target &T,
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
Subtarget(TT, CPU, FS, is64bit),
DL(Subtarget.getDataLayout()),
- InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(*this,is64bit),
- STTI(&TLInfo)
+ InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(*this,is64bit)
/*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ {
}
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h
index 11bc9d4fa6..d58a076858 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.h
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -25,7 +25,6 @@
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetSelectionDAGInfo.h"
-#include "llvm/Target/TargetTransformImpl.h"
namespace llvm {
@@ -45,9 +44,6 @@ class NVPTXTargetMachine : public LLVMTargetMachine {
// Hold Strings that can be free'd all together with NVPTXTargetMachine
ManagedStringPool ManagedStrPool;
- ScalarTargetTransformImpl STTI;
- VectorTargetTransformImpl VTTI;
-
//bool addCommonCodeGenPasses(PassManagerBase &, CodeGenOpt::Level,
// bool DisableVerify, MCContext *&OutCtx);
@@ -76,12 +72,6 @@ public:
virtual const TargetSelectionDAGInfo *getSelectionDAGInfo() const {
return &TSInfo;
}
- virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
- return &STTI;
- }
- virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
- return &VTTI;
- }
//virtual bool addInstSelector(PassManagerBase &PM,
// CodeGenOpt::Level OptLevel);
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index c18250a78f..36db4b5179 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -4224,7 +4224,52 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op,
return SDValue();
if (Op.getOperand(0).getValueType() == MVT::i64) {
- SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op.getOperand(0));
+ SDValue SINT = Op.getOperand(0);
+ // When converting to single-precision, we actually need to convert
+ // to double-precision first and then round to single-precision.
+ // To avoid double-rounding effects during that operation, we have
+ // to prepare the input operand. Bits that might be truncated when
+ // converting to double-precision are replaced by a bit that won't
+ // be lost at this stage, but is below the single-precision rounding
+ // position.
+ //
+ // However, if -enable-unsafe-fp-math is in effect, accept double
+ // rounding to avoid the extra overhead.
+ if (Op.getValueType() == MVT::f32 &&
+ !DAG.getTarget().Options.UnsafeFPMath) {
+
+ // Twiddle input to make sure the low 11 bits are zero. (If this
+ // is the case, we are guaranteed the value will fit into the 53 bit
+ // mantissa of an IEEE double-precision value without rounding.)
+ // If any of those low 11 bits were not zero originally, make sure
+ // bit 12 (value 2048) is set instead, so that the final rounding
+ // to single-precision gets the correct result.
+ SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
+ SINT, DAG.getConstant(2047, MVT::i64));
+ Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
+ Round, DAG.getConstant(2047, MVT::i64));
+ Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
+ Round = DAG.getNode(ISD::AND, dl, MVT::i64,
+ Round, DAG.getConstant(-2048, MVT::i64));
+
+ // However, we cannot use that value unconditionally: if the magnitude
+ // of the input value is small, the bit-twiddling we did above might
+ // end up visibly changing the output. Fortunately, in that case, we
+ // don't need to twiddle bits since the original input will convert
+ // exactly to double-precision floating-point already. Therefore,
+ // construct a conditional to use the original value if the top 11
+ // bits are all sign-bit copies, and use the rounded value computed
+ // above otherwise.
+ SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
+ SINT, DAG.getConstant(53, MVT::i32));
+ Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
+ Cond, DAG.getConstant(1, MVT::i64));
+ Cond = DAG.getSetCC(dl, MVT::i32,
+ Cond, DAG.getConstant(1, MVT::i64), ISD::SETUGT);
+
+ SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
+ }
+ SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Bits);
if (Op.getValueType() == MVT::f32)
FP = DAG.getNode(ISD::FP_ROUND, dl,
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index b861383475..5f39b8d2c2 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -43,8 +43,7 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT,
DL(Subtarget.getDataLayoutString()), InstrInfo(*this),
FrameLowering(Subtarget), JITInfo(*this, is64Bit),
TLInfo(*this), TSInfo(*this),
- InstrItins(Subtarget.getInstrItineraryData()),
- STTI(&TLInfo){
+ InstrItins(Subtarget.getInstrItineraryData()) {
// The binutils for the BG/P are too old for CFI.
if (Subtarget.isBGP())
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index c168433a71..02d69fd15d 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -21,7 +21,6 @@
#include "PPCISelLowering.h"
#include "PPCSelectionDAGInfo.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetTransformImpl.h"
#include "llvm/DataLayout.h"
namespace llvm {
@@ -37,8 +36,6 @@ class PPCTargetMachine : public LLVMTargetMachine {
PPCTargetLowering TLInfo;
PPCSelectionDAGInfo TSInfo;
InstrItineraryData InstrItins;
- ScalarTargetTransformImpl STTI;
- VectorTargetTransformImpl VTTI;
public:
PPCTargetMachine(const Target &T, StringRef TT,
@@ -66,12 +63,6 @@ public:
virtual const InstrItineraryData *getInstrItineraryData() const {
return &InstrItins;
}
- virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
- return &STTI;
- }
- virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
- return &VTTI;
- }
// Pass Pipeline Configuration
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index 1d8cc771dd..8b7559c2f9 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -36,7 +36,7 @@ SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT,
DL(Subtarget.getDataLayout()),
InstrInfo(Subtarget),
TLInfo(*this), TSInfo(*this),
- FrameLowering(Subtarget),STTI(&TLInfo) {
+ FrameLowering(Subtarget) {
}
namespace {
diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h
index 0fbe2d7cda..c9f2d68eb1 100644
--- a/lib/Target/Sparc/SparcTargetMachine.h
+++ b/lib/Target/Sparc/SparcTargetMachine.h
@@ -22,7 +22,6 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/DataLayout.h"
#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetTransformImpl.h"
namespace llvm {
@@ -33,8 +32,6 @@ class SparcTargetMachine : public LLVMTargetMachine {
SparcTargetLowering TLInfo;
SparcSelectionDAGInfo TSInfo;
SparcFrameLowering FrameLowering;
- ScalarTargetTransformImpl STTI;
- VectorTargetTransformImpl VTTI;
public:
SparcTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
@@ -55,12 +52,6 @@ public:
virtual const SparcSelectionDAGInfo* getSelectionDAGInfo() const {
return &TSInfo;
}
- virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
- return &STTI;
- }
- virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
- return &VTTI;
- }
virtual const DataLayout *getDataLayout() const { return &DL; }
// Pass Pipeline Configuration
diff --git a/lib/Target/Target.cpp b/lib/Target/Target.cpp
index 393178a469..96c30a1847 100644
--- a/lib/Target/Target.cpp
+++ b/lib/Target/Target.cpp
@@ -26,7 +26,6 @@ using namespace llvm;
void llvm::initializeTarget(PassRegistry &Registry) {
initializeDataLayoutPass(Registry);
initializeTargetLibraryInfoPass(Registry);
- initializeTargetTransformInfoPass(Registry);
}
void LLVMInitializeTarget(LLVMPassRegistryRef R) {
diff --git a/lib/Target/TargetTransformImpl.cpp b/lib/Target/TargetTransformImpl.cpp
deleted file mode 100644
index 1cb5edab9d..0000000000
--- a/lib/Target/TargetTransformImpl.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-// llvm/Target/TargetTransformImpl.cpp - Target Loop Trans Info ---*- C++ -*-=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Target/TargetTransformImpl.h"
-#include "llvm/Target/TargetLowering.h"
-
-using namespace llvm;
-
-bool ScalarTargetTransformImpl::isLegalAddImmediate(int64_t imm) const {
- return TLI->isLegalAddImmediate(imm);
-}
-
-bool ScalarTargetTransformImpl::isLegalICmpImmediate(int64_t imm) const {
- return TLI->isLegalICmpImmediate(imm);
-}
-
-bool ScalarTargetTransformImpl::isLegalAddressingMode(const AddrMode &AM,
- Type *Ty) const {
- return TLI->isLegalAddressingMode(AM, Ty);
-}
-
-bool ScalarTargetTransformImpl::isTruncateFree(Type *Ty1, Type *Ty2) const {
- return TLI->isTruncateFree(Ty1, Ty2);
-}
-
-bool ScalarTargetTransformImpl::isTypeLegal(Type *Ty) const {
- EVT T = TLI->getValueType(Ty);
- return TLI->isTypeLegal(T);
-}
-
-unsigned ScalarTargetTransformImpl::getJumpBufAlignment() const {
- return TLI->getJumpBufAlignment();
-}
-
-unsigned ScalarTargetTransformImpl::getJumpBufSize() const {
- return TLI->getJumpBufSize();
-}
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 488a832785..813f753032 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -467,7 +467,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
// NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intened to support
// SjLj exception handling but a light-weight setjmp/longjmp replacement to
- // support continuation, user-level threading, and etc.. As a result, not
+ // support continuation, user-level threading, and etc.. As a result, no
// other SjLj exception interfaces are implemented and please don't build
// your own exception handling based on them.
// LLVM/Clang supports zero-cost DWARF exception handling.
@@ -13503,7 +13503,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
// For v = setjmp(buf), we generate
//
// thisMBB:
- // buf[Label_Offset] = ljMBB
+ // buf[LabelOffset] = restoreMBB
// SjLjSetup restoreMBB
//
// mainMBB:
@@ -13531,18 +13531,48 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
// thisMBB:
- unsigned PtrImmStoreOpc = (PVT == MVT::i64) ? X86::MOV64mi32 : X86::MOV32mi;
- const int64_t Label_Offset = 1 * PVT.getStoreSize();
-
+ unsigned PtrStoreOpc = 0;
+ unsigned LabelReg = 0;
+ const int64_t LabelOffset = 1 * PVT.getStoreSize();
+ Reloc::Model RM = getTargetMachine().getRelocationModel();
+ bool UseImmLabel = (getTargetMachine().getCodeModel() == CodeModel::Small) &&
+ (RM == Reloc::Static || RM == Reloc::DynamicNoPIC);
+
+ // Prepare IP either in reg or imm.
+ if (!UseImmLabel) {
+ PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr;
+ const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
+ LabelReg = MRI.createVirtualRegister(PtrRC);
+ if (Subtarget->is64Bit()) {
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::LEA64r), LabelReg)
+ .addReg(X86::RIP)
+ .addImm(0)
+ .addReg(0)
+ .addMBB(restoreMBB)
+ .addReg(0);
+ } else {
+ const X86InstrInfo *XII = static_cast<const X86InstrInfo*>(TII);
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::LEA32r), LabelReg)
+ .addReg(XII->getGlobalBaseReg(MF))
+ .addImm(0)
+ .addReg(0)
+ .addMBB(restoreMBB, Subtarget->ClassifyBlockAddressReference())
+ .addReg(0);
+ }
+ } else
+ PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mi32 : X86::MOV32mi;
// Store IP
- MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrImmStoreOpc));
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrStoreOpc));
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
if (i == X86::AddrDisp)
- MIB.addDisp(MI->getOperand(MemOpndSlot + i), Label_Offset);
+ MIB.addDisp(MI->getOperand(MemOpndSlot + i), LabelOffset);
else
MIB.addOperand(MI->getOperand(MemOpndSlot + i));
}
- MIB.addMBB(restoreMBB);
+ if (!UseImmLabel)
+ MIB.addReg(LabelReg);
+ else
+ MIB.addMBB(restoreMBB);
MIB.setMemRefs(MMOBegin, MMOEnd);
// Setup
MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::EH_SjLj_Setup))
@@ -13597,8 +13627,8 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
MachineInstrBuilder MIB;
- const int64_t Label_Offset = 1 * PVT.getStoreSize();
- const int64_t SP_Offset = 2 * PVT.getStoreSize();
+ const int64_t LabelOffset = 1 * PVT.getStoreSize();
+ const int64_t SPOffset = 2 * PVT.getStoreSize();
unsigned PtrLoadOpc = (PVT == MVT::i64) ? X86::MOV64rm : X86::MOV32rm;
unsigned IJmpOpc = (PVT == MVT::i64) ? X86::JMP64r : X86::JMP32r;
@@ -13612,7 +13642,7 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), Tmp);
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
if (i == X86::AddrDisp)
- MIB.addDisp(MI->getOperand(i), Label_Offset);
+ MIB.addDisp(MI->getOperand(i), LabelOffset);
else
MIB.addOperand(MI->getOperand(i));
}
@@ -13621,7 +13651,7 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), SP);
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
if (i == X86::AddrDisp)
- MIB.addDisp(MI->getOperand(i), SP_Offset);
+ MIB.addDisp(MI->getOperand(i), SPOffset);
else
MIB.addOperand(MI->getOperand(i));
}
@@ -15645,11 +15675,11 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
ISD::LoadExtType Ext = Ld->getExtensionType();
// If this is a vector EXT Load then attempt to optimize it using a
- // shuffle. We need SSE4 for the shuffles.
+ // shuffle. We need SSSE3 shuffles.
// TODO: It is possible to support ZExt by zeroing the undef values
// during the shuffle phase or after the shuffle.
if (RegVT.isVector() && RegVT.isInteger() &&
- Ext == ISD::EXTLOAD && Subtarget->hasSSE41()) {
+ Ext == ISD::EXTLOAD && Subtarget->hasSSSE3()) {
assert(MemVT != RegVT && "Cannot extend to the same type");
assert(MemVT.isVector() && "Must load a vector from memory");
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index 1fe8976b60..20bc85e65f 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -67,15 +67,11 @@ MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const {
/// operand to an MCSymbol.
MCSymbol *X86MCInstLower::
GetSymbolFromOperand(const MachineOperand &MO) const {
- assert((MO.isGlobal() || MO.isSymbol()) && "Isn't a symbol reference");
+ assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) && "Isn't a symbol reference");
SmallString<128> Name;
- if (!MO.isGlobal()) {
- assert(MO.isSymbol());
- Name += MAI.getGlobalPrefix();
- Name += MO.getSymbolName();
- } else {
+ if (MO.isGlobal()) {
const GlobalValue *GV = MO.getGlobal();
bool isImplicitlyPrivate = false;
if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB ||
@@ -85,6 +81,11 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
isImplicitlyPrivate = true;
Mang->getNameWithPrefix(Name, GV, isImplicitlyPrivate);
+ } else if (MO.isSymbol()) {
+ Name += MAI.getGlobalPrefix();
+ Name += MO.getSymbolName();
+ } else if (MO.isMBB()) {
+ Name += MO.getMBB()->getSymbol()->getName();
}
// If the target flags on the operand changes the name of the symbol, do that
@@ -215,7 +216,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
if (Expr == 0)
Expr = MCSymbolRefExpr::Create(Sym, RefKind, Ctx);
- if (!MO.isJTI() && MO.getOffset())
+ if (!MO.isJTI() && !MO.isMBB() && MO.getOffset())
Expr = MCBinaryExpr::CreateAdd(Expr,
MCConstantExpr::Create(MO.getOffset(), Ctx),
Ctx);
@@ -348,9 +349,6 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
MCOp = MCOperand::CreateImm(MO.getImm());
break;
case MachineOperand::MO_MachineBasicBlock:
- MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
- MO.getMBB()->getSymbol(), Ctx));
- break;
case MachineOperand::MO_GlobalAddress:
case MachineOperand::MO_ExternalSymbol:
MCOp = LowerSymbolOperand(MO, GetSymbolFromOperand(MO));
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index c804195f27..ed5e6f5227 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -50,8 +50,7 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT,
InstrInfo(*this),
TSInfo(*this),
TLInfo(*this),
- JITInfo(*this),
- STTI(&TLInfo) {
+ JITInfo(*this) {
}
void X86_64TargetMachine::anchor() { }
@@ -70,8 +69,7 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT,
InstrInfo(*this),
TSInfo(*this),
TLInfo(*this),
- JITInfo(*this),
- STTI(&TLInfo) {
+ JITInfo(*this) {
}
/// X86TargetMachine ctor - Create an X86 target.
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index def028f191..5c625ac953 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -28,7 +28,6 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/DataLayout.h"
#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetTransformImpl.h"
namespace llvm {
@@ -93,8 +92,6 @@ class X86_32TargetMachine : public X86TargetMachine {
#else
X86JITInfo JITInfo;
#endif
- ScalarTargetTransformImpl STTI;
- VectorTargetTransformImpl VTTI;
public:
X86_32TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
@@ -113,12 +110,6 @@ public:
virtual X86JITInfo *getJITInfo() {
return &JITInfo;
}
- virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
- return &STTI;
- }
- virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
- return &VTTI;
- }
};
/// X86_64TargetMachine - X86 64-bit target machine.
@@ -130,8 +121,6 @@ class X86_64TargetMachine : public X86TargetMachine {
X86SelectionDAGInfo TSInfo;
X86TargetLowering TLInfo;
X86JITInfo JITInfo;
- ScalarTargetTransformImpl STTI;
- VectorTargetTransformImpl VTTI;
public:
X86_64TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
@@ -150,12 +139,6 @@ public:
virtual X86JITInfo *getJITInfo() {
return &JITInfo;
}
- virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
- return &STTI;
- }
- virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
- return &VTTI;
- }
};
} // End llvm namespace
diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp
index 449eed3d8d..c4a58874a4 100644
--- a/lib/Target/X86/X86VZeroUpper.cpp
+++ b/lib/Target/X86/X86VZeroUpper.cpp
@@ -147,7 +147,7 @@ bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
const TargetRegisterClass *RC = &X86::VR256RegClass;
for (TargetRegisterClass::iterator i = RC->begin(), e = RC->end();
i != e; i++) {
- if (MRI.isPhysRegUsed(*i)) {
+ if (!MRI.reg_nodbg_empty(*i)) {
YMMUsed = true;
break;
}
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index 0b7e3e10d4..c71d978ad8 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -32,7 +32,7 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT,
InstrInfo(),
FrameLowering(Subtarget),
TLInfo(*this),
- TSInfo(*this), STTI(&TLInfo) {
+ TSInfo(*this) {
}
namespace {
diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h
index c60c6a37f9..f7fec29f54 100644
--- a/lib/Target/XCore/XCoreTargetMachine.h
+++ b/lib/Target/XCore/XCoreTargetMachine.h
@@ -20,7 +20,6 @@
#include "XCoreISelLowering.h"
#include "XCoreSelectionDAGInfo.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetTransformImpl.h"
#include "llvm/DataLayout.h"
namespace llvm {
@@ -32,8 +31,6 @@ class XCoreTargetMachine : public LLVMTargetMachine {
XCoreFrameLowering FrameLowering;
XCoreTargetLowering TLInfo;
XCoreSelectionDAGInfo TSInfo;
- ScalarTargetTransformImpl STTI;
- VectorTargetTransformImpl VTTI;
public:
XCoreTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
@@ -56,12 +53,6 @@ public:
virtual const TargetRegisterInfo *getRegisterInfo() const {
return &InstrInfo.getRegisterInfo();
}
- virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
- return &STTI;
- }
- virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
- return &VTTI;
- }
virtual const DataLayout *getDataLayout() const { return &DL; }
// Pass Pipeline Configuration
diff --git a/lib/Transforms/IPO/BarrierNoopPass.cpp b/lib/Transforms/IPO/BarrierNoopPass.cpp
new file mode 100644
index 0000000000..2e32240621
--- /dev/null
+++ b/lib/Transforms/IPO/BarrierNoopPass.cpp
@@ -0,0 +1,47 @@
+//===- BarrierNoopPass.cpp - A barrier pass for the pass manager ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// NOTE: DO NOT USE THIS IF AVOIDABLE
+//
+// This pass is a nonce pass intended to allow manipulation of the implicitly
+// nesting pass manager. For example, it can be used to cause a CGSCC pass
+// manager to be closed prior to running a new collection of function passes.
+//
+// FIXME: This is a huge HACK. This should be removed when the pass manager's
+// nesting is made explicit instead of implicit.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Pass.h"
+#include "llvm/Transforms/IPO.h"
+using namespace llvm;
+
+namespace {
+/// \brief A nonce module pass used to place a barrier in a pass manager.
+///
+/// There is no mechanism for ending a CGSCC pass manager once one is started.
+/// This prevents extension points from having clear deterministic ordering
+/// when they are phrased as non-module passes.
+class BarrierNoop : public ModulePass {
+public:
+ static char ID; // Pass identification.
+
+ BarrierNoop() : ModulePass(ID) {
+ initializeBarrierNoopPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) { return false; }
+};
+}
+
+ModulePass *llvm::createBarrierNoopPass() { return new BarrierNoop(); }
+
+char BarrierNoop::ID = 0;
+INITIALIZE_PASS(BarrierNoop, "barrier", "A No-Op Barrier Pass",
+ false, false)
diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt
index 3f6b1de614..90c1c33e6d 100644
--- a/lib/Transforms/IPO/CMakeLists.txt
+++ b/lib/Transforms/IPO/CMakeLists.txt
@@ -1,5 +1,6 @@
add_llvm_library(LLVMipo
ArgumentPromotion.cpp
+ BarrierNoopPass.cpp
ConstantMerge.cpp
DeadArgumentElimination.cpp
ExtractGV.cpp
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 9e328b9ac9..04163f751f 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -119,6 +119,14 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
MPM.add(Inliner);
Inliner = 0;
}
+
+ // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC
+ // pass manager, but we don't want to add extensions into that pass manager.
+ // To prevent this we must insert a no-op module pass to reset the pass
+ // manager to get the same behavior as EP_OptimizerLast in non-O0 builds.
+ if (!GlobalExtensions->empty() || !Extensions.empty())
+ MPM.add(createBarrierNoopPass());
+
addExtensionsToPM(EP_EnabledOnOptLevel0, MPM);
return;
}
@@ -176,6 +184,12 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars
MPM.add(createLoopIdiomPass()); // Recognize idioms like memset.
MPM.add(createLoopDeletionPass()); // Delete dead loops
+
+ if (Vectorize) {
+ MPM.add(createLoopVectorizePass());
+ MPM.add(createLICMPass());
+ }
+
if (!DisableUnrollLoops)
MPM.add(createLoopUnrollPass()); // Unroll small loops
addExtensionsToPM(EP_LoopOptimizerEnd, MPM);
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index b566994edf..75f42f30fa 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -534,7 +534,7 @@ void AddressSanitizer::createInitializerPoisonCalls(Module &M,
bool AddressSanitizer::ShouldInstrumentGlobal(GlobalVariable *G) {
Type *Ty = cast<PointerType>(G->getType())->getElementType();
- DEBUG(dbgs() << "GLOBAL: " << *G);
+ DEBUG(dbgs() << "GLOBAL: " << *G << "\n");
if (BL->isIn(*G)) return false;
if (!Ty->isSized()) return false;
@@ -682,7 +682,7 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
FirstDynamic = LastDynamic;
}
- DEBUG(dbgs() << "NEW GLOBAL:\n" << *NewGlobal);
+ DEBUG(dbgs() << "NEW GLOBAL: " << *NewGlobal << "\n");
}
ArrayType *ArrayOfGlobalStructTy = ArrayType::get(GlobalStructTy, n);
@@ -851,6 +851,7 @@ bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
bool AddressSanitizer::runOnFunction(Function &F) {
if (BL->isIn(F)) return false;
if (&F == AsanCtorFunction) return false;
+ DEBUG(dbgs() << "ASAN instrumenting:\n" << F << "\n");
// If needed, insert __asan_init before checking for AddressSafety attr.
maybeInsertAsanInitAtFunctionEntry(F);
@@ -914,8 +915,6 @@ bool AddressSanitizer::runOnFunction(Function &F) {
NumInstrumented++;
}
- DEBUG(dbgs() << F);
-
bool ChangedStack = poisonStackInFunction(F);
// We must unpoison the stack before every NoReturn call (throw, _exit, etc).
@@ -925,6 +924,7 @@ bool AddressSanitizer::runOnFunction(Function &F) {
IRBuilder<> IRB(CI);
IRB.CreateCall(AsanHandleNoReturnFunc);
}
+ DEBUG(dbgs() << "ASAN done instrumenting:\n" << F << "\n");
return NumInstrumented > 0 || ChangedStack || !NoReturnCalls.empty();
}
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 99a62dbe62..958348d9fa 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -37,7 +37,7 @@
//
// TODO: Handle multiple loops at a time.
//
-// TODO: Should AddrMode::BaseGV be changed to a ConstantExpr
+// TODO: Should TargetLowering::AddrMode::BaseGV be changed to a ConstantExpr
// instead of a GlobalValue?
//
// TODO: When truncation is free, truncate ICmp users' operands to make it a
@@ -67,7 +67,6 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/TargetTransformInfo.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/DenseSet.h"
@@ -75,6 +74,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
#include <algorithm>
using namespace llvm;
@@ -1118,7 +1118,7 @@ public:
enum KindType {
Basic, ///< A normal use, with no folding.
Special, ///< A special case of basic, allowing -1 scales.
- Address, ///< An address use; folding according to ScalarTargetTransformInfo.
+ Address, ///< An address use; folding according to TargetLowering
ICmpZero ///< An equality icmp with both operands folded into one.
// TODO: Add a generic icmp too?
};
@@ -1272,12 +1272,12 @@ void LSRUse::dump() const {
/// address-mode folding and special icmp tricks.
static bool isLegalUse(const AddrMode &AM,
LSRUse::KindType Kind, Type *AccessTy,
- const ScalarTargetTransformInfo *STTI) {
+ const TargetLowering *TLI) {
switch (Kind) {
case LSRUse::Address:
// If we have low-level target information, ask the target if it can
// completely fold this address.
- if (STTI) return STTI->isLegalAddressingMode(AM, AccessTy);
+ if (TLI) return TLI->isLegalAddressingMode(AM, AccessTy);
// Otherwise, just guess that reg+reg addressing is legal.
return !AM.BaseGV && AM.BaseOffs == 0 && AM.Scale <= 1;
@@ -1300,7 +1300,7 @@ static bool isLegalUse(const AddrMode &AM,
// If we have low-level target information, ask the target if it can fold an
// integer immediate on an icmp.
if (AM.BaseOffs != 0) {
- if (!STTI)
+ if (!TLI)
return false;
// We have one of:
// ICmpZero BaseReg + Offset => ICmp BaseReg, -Offset
@@ -1309,7 +1309,7 @@ static bool isLegalUse(const AddrMode &AM,
int64_t Offs = AM.BaseOffs;
if (AM.Scale == 0)
Offs = -(uint64_t)Offs; // The cast does the right thing with INT64_MIN.
- return STTI->isLegalICmpImmediate(Offs);
+ return TLI->isLegalICmpImmediate(Offs);
}
// ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg
@@ -1330,20 +1330,20 @@ static bool isLegalUse(const AddrMode &AM,
static bool isLegalUse(AddrMode AM,
int64_t MinOffset, int64_t MaxOffset,
LSRUse::KindType Kind, Type *AccessTy,
- const ScalarTargetTransformInfo *LTTI) {
+ const TargetLowering *TLI) {
// Check for overflow.
if (((int64_t)((uint64_t)AM.BaseOffs + MinOffset) > AM.BaseOffs) !=
(MinOffset > 0))
return false;
AM.BaseOffs = (uint64_t)AM.BaseOffs + MinOffset;
- if (isLegalUse(AM, Kind, AccessTy, LTTI)) {
+ if (isLegalUse(AM, Kind, AccessTy, TLI)) {
AM.BaseOffs = (uint64_t)AM.BaseOffs - MinOffset;
// Check for overflow.
if (((int64_t)((uint64_t)AM.BaseOffs + MaxOffset) > AM.BaseOffs) !=
(MaxOffset > 0))
return false;
AM.BaseOffs = (uint64_t)AM.BaseOffs + MaxOffset;
- return isLegalUse(AM, Kind, AccessTy, LTTI);
+ return isLegalUse(AM, Kind, AccessTy, TLI);
}
return false;
}
@@ -1352,7 +1352,7 @@ static bool isAlwaysFoldable(int64_t BaseOffs,
GlobalValue *BaseGV,
bool HasBaseReg,
LSRUse::KindType Kind, Type *AccessTy,
- const ScalarTargetTransformInfo *LTTI) {
+ const TargetLowering *TLI) {
// Fast-path: zero is always foldable.
if (BaseOffs == 0 && !BaseGV) return true;
@@ -1371,14 +1371,14 @@ static bool isAlwaysFoldable(int64_t BaseOffs,
AM.HasBaseReg = true;
}
- return isLegalUse(AM, Kind, AccessTy, LTTI);
+ return isLegalUse(AM, Kind, AccessTy, TLI);
}
static bool isAlwaysFoldable(const SCEV *S,
int64_t MinOffset, int64_t MaxOffset,
bool HasBaseReg,
LSRUse::KindType Kind, Type *AccessTy,
- const ScalarTargetTransformInfo *LTTI,
+ const TargetLowering *TLI,
ScalarEvolution &SE) {
// Fast-path: zero is always foldable.
if (S->isZero()) return true;
@@ -1402,7 +1402,7 @@ static bool isAlwaysFoldable(const SCEV *S,
AM.HasBaseReg = HasBaseReg;
AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
- return isLegalUse(AM, MinOffset, MaxOffset, Kind, AccessTy, LTTI);
+ return isLegalUse(AM, MinOffset, MaxOffset, Kind, AccessTy, TLI);
}
namespace {
@@ -1502,7 +1502,7 @@ class LSRInstance {
ScalarEvolution &SE;
DominatorTree &DT;
LoopInfo &LI;
- const ScalarTargetTransformInfo *const STTI;
+ const TargetLowering *const TLI;
Loop *const L;
bool Changed;
@@ -1638,7 +1638,7 @@ class LSRInstance {
Pass *P);
public:
- LSRInstance(const ScalarTargetTransformInfo *ltti, Loop *l, Pass *P);
+ LSRInstance(const TargetLowering *tli, Loop *l, Pass *P);
bool getChanged() const { return Changed; }
@@ -1688,10 +1688,11 @@ void LSRInstance::OptimizeShadowIV() {
}
if (!DestTy) continue;
- if (STTI) {
+ if (TLI) {
// If target does not support DestTy natively then do not apply
// this transformation.
- if (!STTI->isTypeLegal(DestTy)) continue;
+ EVT DVT = TLI->getValueType(DestTy);
+ if (!TLI->isTypeLegal(DVT)) continue;
}
PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
@@ -2014,18 +2015,18 @@ LSRInstance::OptimizeLoopTermCond() {
if (C->getValue().getMinSignedBits() >= 64 ||
C->getValue().isMinSignedValue())
goto decline_post_inc;
- // Without STTI, assume that any stride might be valid, and so any
+ // Without TLI, assume that any stride might be valid, and so any
// use might be shared.
- if (!STTI)
+ if (!TLI)
goto decline_post_inc;
// Check for possible scaled-address reuse.
Type *AccessTy = getAccessType(UI->getUser());
AddrMode AM;
AM.Scale = C->getSExtValue();
- if (STTI->isLegalAddressingMode(AM, AccessTy))
+ if (TLI->isLegalAddressingMode(AM, AccessTy))
goto decline_post_inc;
AM.Scale = -AM.Scale;
- if (STTI->isLegalAddressingMode(AM, AccessTy))
+ if (TLI->isLegalAddressingMode(AM, AccessTy))
goto decline_post_inc;
}
}
@@ -2096,12 +2097,12 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
// Conservatively assume HasBaseReg is true for now.
if (NewOffset < LU.MinOffset) {
if (!isAlwaysFoldable(LU.MaxOffset - NewOffset, 0, HasBaseReg,
- Kind, AccessTy, STTI))
+ Kind, AccessTy, TLI))
return false;
NewMinOffset = NewOffset;
} else if (NewOffset > LU.MaxOffset) {
if (!isAlwaysFoldable(NewOffset - LU.MinOffset, 0, HasBaseReg,
- Kind, AccessTy, STTI))
+ Kind, AccessTy, TLI))
return false;
NewMaxOffset = NewOffset;
}
@@ -2130,7 +2131,7 @@ LSRInstance::getUse(const SCEV *&Expr,
int64_t Offset = ExtractImmediate(Expr, SE);
// Basic uses can't accept any offset, for example.
- if (!isAlwaysFoldable(Offset, 0, /*HasBaseReg=*/true, Kind, AccessTy, STTI)) {
+ if (!isAlwaysFoldable(Offset, 0, /*HasBaseReg=*/true, Kind, AccessTy, TLI)) {
Expr = Copy;
Offset = 0;
}
@@ -2395,7 +2396,7 @@ bool IVChain::isProfitableIncrement(const SCEV *OperExpr,
/// TODO: Consider IVInc free if it's already used in another chains.
static bool
isProfitableChain(IVChain &Chain, SmallPtrSet<Instruction*, 4> &Users,
- ScalarEvolution &SE, const ScalarTargetTransformInfo *STTI) {
+ ScalarEvolution &SE, const TargetLowering *TLI) {
if (StressIVChain)
return true;
@@ -2653,7 +2654,7 @@ void LSRInstance::CollectChains() {
for (unsigned UsersIdx = 0, NChains = IVChainVec.size();
UsersIdx < NChains; ++UsersIdx) {
if (!isProfitableChain(IVChainVec[UsersIdx],
- ChainUsersVec[UsersIdx].FarUsers, SE, STTI))
+ ChainUsersVec[UsersIdx].FarUsers, SE, TLI))
continue;
// Preserve the chain at UsesIdx.
if (ChainIdx != UsersIdx)
@@ -2680,8 +2681,7 @@ void LSRInstance::FinalizeChain(IVChain &Chain) {
/// Return true if the IVInc can be folded into an addressing mode.
static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
- Value *Operand,
- const ScalarTargetTransformInfo *STTI) {
+ Value *Operand, const TargetLowering *TLI) {
const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
if (!IncConst || !isAddressUse(UserInst, Operand))
return false;
@@ -2691,7 +2691,7 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
int64_t IncOffset = IncConst->getValue()->getSExtValue();
if (!isAlwaysFoldable(IncOffset, /*BaseGV=*/0, /*HaseBaseReg=*/false,
- LSRUse::Address, getAccessType(UserInst), STTI))
+ LSRUse::Address, getAccessType(UserInst), TLI))
return false;
return true;
@@ -2762,7 +2762,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
// If an IV increment can't be folded, use it as the next IV value.
if (!canFoldIVIncExpr(LeftOverExpr, IncI->UserInst, IncI->IVOperand,
- STTI)) {
+ TLI)) {
assert(IVTy == IVOper->getType() && "inconsistent IV increment type");
IVSrc = IVOper;
LeftOverExpr = 0;
@@ -3108,7 +3108,7 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
// into an immediate field.
if (isAlwaysFoldable(*J, LU.MinOffset, LU.MaxOffset,
Base.getNumRegs() > 1,
- LU.Kind, LU.AccessTy, STTI, SE))
+ LU.Kind, LU.AccessTy, TLI, SE))
continue;
// Collect all operands except *J.
@@ -3122,7 +3122,7 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
if (InnerAddOps.size() == 1 &&
isAlwaysFoldable(InnerAddOps[0], LU.MinOffset, LU.MaxOffset,
Base.getNumRegs() > 1,
- LU.Kind, LU.AccessTy, STTI, SE))
+ LU.Kind, LU.AccessTy, TLI, SE))
continue;
const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
@@ -3132,9 +3132,9 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
// Add the remaining pieces of the add back into the new formula.
const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
- if (STTI && InnerSumSC &&
+ if (TLI && InnerSumSC &&
SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&
- STTI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
+ TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
InnerSumSC->getValue()->getZExtValue())) {
F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
InnerSumSC->getValue()->getZExtValue();
@@ -3144,8 +3144,8 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
// Add J as its own register, or an unfolded immediate.
const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J);
- if (STTI && SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
- STTI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
+ if (TLI && SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
+ TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
SC->getValue()->getZExtValue()))
F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
SC->getValue()->getZExtValue();
@@ -3205,7 +3205,7 @@ void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
Formula F = Base;
F.AM.BaseGV = GV;
if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset,
- LU.Kind, LU.AccessTy, STTI))
+ LU.Kind, LU.AccessTy, TLI))
continue;
F.BaseRegs[i] = G;
(void)InsertFormula(LU, LUIdx, F);
@@ -3230,7 +3230,7 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
Formula F = Base;
F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs - *I;
if (isLegalUse(F.AM, LU.MinOffset - *I, LU.MaxOffset - *I,
- LU.Kind, LU.AccessTy, STTI)) {
+ LU.Kind, LU.AccessTy, TLI)) {
// Add the offset to the base register.
const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G);
// If it cancelled out, drop the base register, otherwise update it.
@@ -3250,7 +3250,7 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
Formula F = Base;
F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Imm;
if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset,
- LU.Kind, LU.AccessTy, STTI))
+ LU.Kind, LU.AccessTy, TLI))
continue;
F.BaseRegs[i] = G;
(void)InsertFormula(LU, LUIdx, F);
@@ -3297,7 +3297,7 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
F.AM.BaseOffs = NewBaseOffs;
// Check that this scale is legal.
- if (!isLegalUse(F.AM, Offset, Offset, LU.Kind, LU.AccessTy, STTI))
+ if (!isLegalUse(F.AM, Offset, Offset, LU.Kind, LU.AccessTy, TLI))
continue;
// Compensate for the use having MinOffset built into it.
@@ -3353,12 +3353,12 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
Base.AM.HasBaseReg = Base.BaseRegs.size() > 1;
// Check whether this scale is going to be legal.
if (!isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset,
- LU.Kind, LU.AccessTy, STTI)) {
+ LU.Kind, LU.AccessTy, TLI)) {
// As a special-case, handle special out-of-loop Basic users specially.
// TODO: Reconsider this special case.
if (LU.Kind == LSRUse::Basic &&
isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset,
- LSRUse::Special, LU.AccessTy, STTI) &&
+ LSRUse::Special, LU.AccessTy, TLI) &&
LU.AllFixupsOutsideLoop)
LU.Kind = LSRUse::Special;
else
@@ -3391,8 +3391,8 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
/// GenerateTruncates - Generate reuse formulae from different IV types.
void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
- // This requires ScalarTargetTransformInfo to tell us which truncates are free.
- if (!STTI) return;
+ // This requires TargetLowering to tell us which truncates are free.
+ if (!TLI) return;
// Don't bother truncating symbolic values.
if (Base.AM.BaseGV) return;
@@ -3405,7 +3405,7 @@ void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
for (SmallSetVector<Type *, 4>::const_iterator
I = Types.begin(), E = Types.end(); I != E; ++I) {
Type *SrcTy = *I;
- if (SrcTy != DstTy && STTI->isTruncateFree(SrcTy, DstTy)) {
+ if (SrcTy != DstTy && TLI->isTruncateFree(SrcTy, DstTy)) {
Formula F = Base;
if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, *I);
@@ -3561,7 +3561,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
Formula NewF = F;
NewF.AM.BaseOffs = Offs;
if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset,
- LU.Kind, LU.AccessTy, STTI))
+ LU.Kind, LU.AccessTy, TLI))
continue;
NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg);
@@ -3586,9 +3586,9 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
Formula NewF = F;
NewF.AM.BaseOffs = (uint64_t)NewF.AM.BaseOffs + Imm;
if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset,
- LU.Kind, LU.AccessTy, STTI)) {
- if (!STTI ||
- !STTI->isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm))
+ LU.Kind, LU.AccessTy, TLI)) {
+ if (!TLI ||
+ !TLI->isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm))
continue;
NewF = F;
NewF.UnfoldedOffset = (uint64_t)NewF.UnfoldedOffset + Imm;
@@ -3900,7 +3900,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
Formula &F = LUThatHas->Formulae[i];
if (!isLegalUse(F.AM,
LUThatHas->MinOffset, LUThatHas->MaxOffset,
- LUThatHas->Kind, LUThatHas->AccessTy, STTI)) {
+ LUThatHas->Kind, LUThatHas->AccessTy, TLI)) {
DEBUG(dbgs() << " Deleting "; F.print(dbgs());
dbgs() << '\n');
LUThatHas->DeleteFormula(F);
@@ -4589,12 +4589,12 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
}
-LSRInstance::LSRInstance(const ScalarTargetTransformInfo *stti, Loop *l, Pass *P)
+LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
: IU(P->getAnalysis<IVUsers>()),
SE(P->getAnalysis<ScalarEvolution>()),
DT(P->getAnalysis<DominatorTree>()),
LI(P->getAnalysis<LoopInfo>()),
- STTI(stti), L(l), Changed(false), IVIncInsertPos(0) {
+ TLI(tli), L(l), Changed(false), IVIncInsertPos(0) {
// If LoopSimplify form is not available, stay out of trouble.
if (!L->isLoopSimplifyForm())
@@ -4684,7 +4684,7 @@ LSRInstance::LSRInstance(const ScalarTargetTransformInfo *stti, Loop *l, Pass *P
for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
JE = LU.Formulae.end(); J != JE; ++J)
assert(isLegalUse(J->AM, LU.MinOffset, LU.MaxOffset,
- LU.Kind, LU.AccessTy, STTI) &&
+ LU.Kind, LU.AccessTy, TLI) &&
"Illegal formula generated!");
};
#endif
@@ -4757,13 +4757,13 @@ void LSRInstance::dump() const {
namespace {
class LoopStrengthReduce : public LoopPass {
- /// ScalarTargetTransformInfo provides target information that is needed
- /// for strength reducing loops.
- const ScalarTargetTransformInfo *STTI;
+ /// TLI - Keep a pointer of a TargetLowering to consult for determining
+ /// transformation profitability.
+ const TargetLowering *const TLI;
public:
static char ID; // Pass ID, replacement for typeid
- LoopStrengthReduce();
+ explicit LoopStrengthReduce(const TargetLowering *tli = 0);
private:
bool runOnLoop(Loop *L, LPPassManager &LPM);
@@ -4783,12 +4783,13 @@ INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce",
"Loop Strength Reduction", false, false)
-Pass *llvm::createLoopStrengthReducePass() {
- return new LoopStrengthReduce();
+
+Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
+ return new LoopStrengthReduce(TLI);
}
-LoopStrengthReduce::LoopStrengthReduce()
- : LoopPass(ID), STTI(0) {
+LoopStrengthReduce::LoopStrengthReduce(const TargetLowering *tli)
+ : LoopPass(ID), TLI(tli) {
initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
}
@@ -4814,13 +4815,8 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
bool Changed = false;
- TargetTransformInfo *TTI = getAnalysisIfAvailable<TargetTransformInfo>();
-
- if (TTI)
- STTI = TTI->getScalarTargetTransformInfo();
-
// Run the main LSR transformation.
- Changed |= LSRInstance(STTI, L, this).getChanged();
+ Changed |= LSRInstance(TLI, L, this).getChanged();
// Remove any extra phis created by processing inner loops.
Changed |= DeleteDeadPHIs(L->getHeader());
@@ -4831,7 +4827,7 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
Rewriter.setDebugType(DEBUG_TYPE);
#endif
unsigned numFolded = Rewriter.
- replaceCongruentIVs(L, &getAnalysis<DominatorTree>(), DeadInsts, STTI);
+ replaceCongruentIVs(L, &getAnalysis<DominatorTree>(), DeadInsts, TLI);
if (numFolded) {
Changed = true;
DeleteTriviallyDeadInstructions(DeadInsts);
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 3e84a91c1d..377a6250de 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -132,9 +132,6 @@ public:
///
/// We flag partitions as splittable when they are formed entirely due to
/// accesses by trivially splittable operations such as memset and memcpy.
- ///
- /// FIXME: At some point we should consider loads and stores of FCAs to be
- /// splittable and eagerly split them into scalar values.
bool IsSplittable;
/// \brief Test whether a partition has been marked as dead.
@@ -1785,9 +1782,9 @@ static Value *getNaturalGEPWithType(IRBuilder<> &IRB, const DataLayout &TD,
break;
if (SequentialType *SeqTy = dyn_cast<SequentialType>(ElementTy)) {
ElementTy = SeqTy->getElementType();
- Indices.push_back(IRB.getInt(APInt(TD.getPointerSizeInBits(
- ElementTy->isPointerTy() ?
- cast<PointerType>(ElementTy)->getAddressSpace(): 0), 0)));
+ // Note that we use the default address space as this index is over an
+ // array or a vector, not a pointer.
+ Indices.push_back(IRB.getInt(APInt(TD.getPointerSizeInBits(0), 0)));
} else if (StructType *STy = dyn_cast<StructType>(ElementTy)) {
if (STy->element_begin() == STy->element_end())
break; // Nothing left to descend into.
@@ -1828,7 +1825,7 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD,
if (ElementSizeInBits % 8)
return 0; // GEPs over non-multiple of 8 size vector elements are invalid.
APInt ElementSize(Offset.getBitWidth(), ElementSizeInBits / 8);
- APInt NumSkippedElements = Offset.udiv(ElementSize);
+ APInt NumSkippedElements = Offset.sdiv(ElementSize);
if (NumSkippedElements.ugt(VecTy->getNumElements()))
return 0;
Offset -= NumSkippedElements * ElementSize;
@@ -1840,7 +1837,7 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD,
if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
Type *ElementTy = ArrTy->getElementType();
APInt ElementSize(Offset.getBitWidth(), TD.getTypeAllocSize(ElementTy));
- APInt NumSkippedElements = Offset.udiv(ElementSize);
+ APInt NumSkippedElements = Offset.sdiv(ElementSize);
if (NumSkippedElements.ugt(ArrTy->getNumElements()))
return 0;
@@ -1896,7 +1893,7 @@ static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const DataLayout &TD,
APInt ElementSize(Offset.getBitWidth(), TD.getTypeAllocSize(ElementTy));
if (ElementSize == 0)
return 0; // Zero-length arrays can't help us build a natural GEP.
- APInt NumSkippedElements = Offset.udiv(ElementSize);
+ APInt NumSkippedElements = Offset.sdiv(ElementSize);
Offset -= NumSkippedElements * ElementSize;
Indices.push_back(IRB.getInt(NumSkippedElements));
@@ -2211,6 +2208,48 @@ static bool isIntegerWideningViable(const DataLayout &TD,
return WholeAllocaOp;
}
+static Value *extractInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *V,
+ IntegerType *Ty, uint64_t Offset,
+ const Twine &Name) {
+ IntegerType *IntTy = cast<IntegerType>(V->getType());
+ assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) &&
+ "Element extends past full value");
+ uint64_t ShAmt = 8*Offset;
+ if (DL.isBigEndian())
+ ShAmt = 8*(DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset);
+ if (ShAmt)
+ V = IRB.CreateLShr(V, ShAmt, Name + ".shift");
+ assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
+ "Cannot extract to a larger integer!");
+ if (Ty != IntTy)
+ V = IRB.CreateTrunc(V, Ty, Name + ".trunc");
+ return V;
+}
+
+static Value *insertInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *Old,
+ Value *V, uint64_t Offset, const Twine &Name) {
+ IntegerType *IntTy = cast<IntegerType>(Old->getType());
+ IntegerType *Ty = cast<IntegerType>(V->getType());
+ assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
+ "Cannot insert a larger integer!");
+ if (Ty != IntTy)
+ V = IRB.CreateZExt(V, IntTy, Name + ".ext");
+ assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) &&
+ "Element store outside of alloca store");
+ uint64_t ShAmt = 8*Offset;
+ if (DL.isBigEndian())
+ ShAmt = 8*(DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset);
+ if (ShAmt)
+ V = IRB.CreateShl(V, ShAmt, Name + ".shift");
+
+ if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) {
+ APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt);
+ Old = IRB.CreateAnd(Old, Mask, Name + ".mask");
+ V = IRB.CreateOr(Old, V, Name + ".insert");
+ }
+ return V;
+}
+
namespace {
/// \brief Visitor to rewrite instructions using a partition of an alloca to
/// use a new alloca.
@@ -2371,60 +2410,6 @@ private:
return IRB.getInt32(Index);
}
- Value *extractInteger(IRBuilder<> &IRB, IntegerType *TargetTy,
- uint64_t Offset) {
- assert(IntTy && "We cannot extract an integer from the alloca");
- Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- getName(".load"));
- V = convertValue(TD, IRB, V, IntTy);
- assert(Offset >= NewAllocaBeginOffset && "Out of bounds offset");
- uint64_t RelOffset = Offset - NewAllocaBeginOffset;
- assert(TD.getTypeStoreSize(TargetTy) + RelOffset <=
- TD.getTypeStoreSize(IntTy) &&
- "Element load outside of alloca store");
- uint64_t ShAmt = 8*RelOffset;
- if (TD.isBigEndian())
- ShAmt = 8*(TD.getTypeStoreSize(IntTy) -
- TD.getTypeStoreSize(TargetTy) - RelOffset);
- if (ShAmt)
- V = IRB.CreateLShr(V, ShAmt, getName(".shift"));
- assert(TargetTy->getBitWidth() <= IntTy->getBitWidth() &&
- "Cannot extract to a larger integer!");
- if (TargetTy != IntTy)
- V = IRB.CreateTrunc(V, TargetTy, getName(".trunc"));
- return V;
- }
-
- StoreInst *insertInteger(IRBuilder<> &IRB, Value *V, uint64_t Offset) {
- IntegerType *Ty = cast<IntegerType>(V->getType());
- assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
- "Cannot insert a larger integer!");
- if (Ty != IntTy)
- V = IRB.CreateZExt(V, IntTy, getName(".ext"));
- assert(Offset >= NewAllocaBeginOffset && "Out of bounds offset");
- uint64_t RelOffset = Offset - NewAllocaBeginOffset;
- assert(TD.getTypeStoreSize(Ty) + RelOffset <=
- TD.getTypeStoreSize(IntTy) &&
- "Element store outside of alloca store");
- uint64_t ShAmt = 8*RelOffset;
- if (TD.isBigEndian())
- ShAmt = 8*(TD.getTypeStoreSize(IntTy) - TD.getTypeStoreSize(Ty)
- - RelOffset);
- if (ShAmt)
- V = IRB.CreateShl(V, ShAmt, getName(".shift"));
-
- if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) {
- APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt);
- Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- getName(".oldload"));
- Old = convertValue(TD, IRB, Old, IntTy);
- Old = IRB.CreateAnd(Old, Mask, getName(".mask"));
- V = IRB.CreateOr(Old, V, getName(".insert"));
- }
- V = convertValue(TD, IRB, V, NewAllocaTy);
- return IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
- }
-
void deleteIfTriviallyDead(Value *V) {
Instruction *I = cast<Instruction>(V);
if (isInstructionTriviallyDead(I))
@@ -2452,12 +2437,18 @@ private:
}
bool rewriteIntegerLoad(IRBuilder<> &IRB, LoadInst &LI) {
+ assert(IntTy && "We cannot insert an integer to the alloca");
assert(!LI.isVolatile());
- Value *Result = extractInteger(IRB, cast<IntegerType>(LI.getType()),
- BeginOffset);
- LI.replaceAllUsesWith(Result);
+ Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ getName(".load"));
+ V = convertValue(TD, IRB, V, IntTy);
+ assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
+ uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
+ V = extractInteger(TD, IRB, V, cast<IntegerType>(LI.getType()), Offset,
+ getName(".extract"));
+ LI.replaceAllUsesWith(V);
Pass.DeadInsts.push_back(&LI);
- DEBUG(dbgs() << " to: " << *Result << "\n");
+ DEBUG(dbgs() << " to: " << *V << "\n");
return true;
}
@@ -2519,8 +2510,20 @@ private:
}
bool rewriteIntegerStore(IRBuilder<> &IRB, StoreInst &SI) {
+ assert(IntTy && "We cannot extract an integer from the alloca");
assert(!SI.isVolatile());
- StoreInst *Store = insertInteger(IRB, SI.getValueOperand(), BeginOffset);
+ Value *V = SI.getValueOperand();
+ if (TD.getTypeSizeInBits(V->getType()) != IntTy->getBitWidth()) {
+ Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ getName(".oldload"));
+ Old = convertValue(TD, IRB, Old, IntTy);
+ assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
+ uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
+ V = insertInteger(TD, IRB, Old, SI.getValueOperand(), Offset,
+ getName(".insert"));
+ }
+ V = convertValue(TD, IRB, V, NewAllocaTy);
+ StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
Pass.DeadInsts.push_back(&SI);
(void)Store;
DEBUG(dbgs() << " to: " << *Store << "\n");
@@ -2652,10 +2655,12 @@ private:
if (IntTy && (BeginOffset > NewAllocaBeginOffset ||
EndOffset < NewAllocaEndOffset)) {
assert(!II.isVolatile());
- StoreInst *Store = insertInteger(IRB, V, BeginOffset);
- (void)Store;
- DEBUG(dbgs() << " to: " << *Store << "\n");
- return true;
+ Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ getName(".oldload"));
+ Old = convertValue(TD, IRB, Old, IntTy);
+ assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
+ uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
+ V = insertInteger(TD, IRB, Old, V, Offset, getName(".insert"));
}
if (V->getType() != AllocaTy)
@@ -2811,17 +2816,25 @@ private:
getIndex(IRB, BeginOffset),
getName(".copyextract"));
} else if (IntTy && !IsWholeAlloca && !IsDest) {
- Src = extractInteger(IRB, SubIntTy, BeginOffset);
+ Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ getName(".load"));
+ Src = convertValue(TD, IRB, Src, IntTy);
+ assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
+ uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
+ Src = extractInteger(TD, IRB, Src, SubIntTy, Offset, getName(".extract"));
} else {
Src = IRB.CreateAlignedLoad(SrcPtr, Align, II.isVolatile(),
getName(".copyload"));
}
if (IntTy && !IsWholeAlloca && IsDest) {
- StoreInst *Store = insertInteger(IRB, Src, BeginOffset);
- (void)Store;
- DEBUG(dbgs() << " to: " << *Store << "\n");
- return true;
+ Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ getName(".oldload"));
+ Old = convertValue(TD, IRB, Old, IntTy);
+ assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
+ uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
+ Src = insertInteger(TD, IRB, Old, Src, Offset, getName(".insert"));
+ Src = convertValue(TD, IRB, Src, NewAllocaTy);
}
if (IsVectorElement && IsDest) {
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index d86c4cbc9f..90efa8ae0e 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -135,47 +135,6 @@ static bool IsOnlyUsedInEqualityComparison(Value *V, Value *With) {
namespace {
//===---------------------------------------===//
-// 'strcpy' Optimizations
-
-struct StrCpyOpt : public LibCallOptimization {
- bool OptChkCall; // True if it's optimizing a __strcpy_chk libcall.
-
- StrCpyOpt(bool c) : OptChkCall(c) {}
-
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- // Verify the "strcpy" function prototype.
- unsigned NumParams = OptChkCall ? 3 : 2;
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != NumParams ||
- FT->getReturnType() != FT->getParamType(0) ||
- FT->getParamType(0) != FT->getParamType(1) ||
- FT->getParamType(0) != B.getInt8PtrTy())
- return 0;
-
- Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
- if (Dst == Src) // strcpy(x,x) -> x
- return Src;
-
- // These optimizations require DataLayout.
- if (!TD) return 0;
-
- // See if we can get the length of the input string.
- uint64_t Len = GetStringLength(Src);
- if (Len == 0) return 0;
-
- // We have enough information to now generate the memcpy call to do the
- // concatenation for us. Make a memcpy to copy the nul byte with align = 1.
- if (!OptChkCall ||
- !EmitMemCpyChk(Dst, Src,
- ConstantInt::get(TD->getIntPtrType(*Context), Len),
- CI->getArgOperand(2), B, TD, TLI))
- B.CreateMemCpy(Dst, Src,
- ConstantInt::get(TD->getIntPtrType(*Context), Len), 1);
- return Dst;
- }
-};
-
-//===---------------------------------------===//
// 'stpcpy' Optimizations
struct StpCpyOpt: public LibCallOptimization {
@@ -1275,7 +1234,6 @@ namespace {
StringMap<LibCallOptimization*> Optimizations;
// String and Memory LibCall Optimizations
- StrCpyOpt StrCpy; StrCpyOpt StrCpyChk;
StpCpyOpt StpCpy; StpCpyOpt StpCpyChk;
StrNCpyOpt StrNCpy;
StrLenOpt StrLen; StrPBrkOpt StrPBrk;
@@ -1295,8 +1253,7 @@ namespace {
bool Modified; // This is only used by doInitialization.
public:
static char ID; // Pass identification
- SimplifyLibCalls() : FunctionPass(ID), StrCpy(false), StrCpyChk(true),
- StpCpy(false), StpCpyChk(true),
+ SimplifyLibCalls() : FunctionPass(ID), StpCpy(false), StpCpyChk(true),
UnaryDoubleFP(false), UnsafeUnaryDoubleFP(true) {
initializeSimplifyLibCallsPass(*PassRegistry::getPassRegistry());
}
@@ -1348,7 +1305,6 @@ void SimplifyLibCalls::AddOpt(LibFunc::Func F1, LibFunc::Func F2,
/// we know.
void SimplifyLibCalls::InitOptimizations() {
// String and Memory LibCall Optimizations
- Optimizations["strcpy"] = &StrCpy;
Optimizations["strncpy"] = &StrNCpy;
Optimizations["stpcpy"] = &StpCpy;
Optimizations["strlen"] = &StrLen;
@@ -1369,7 +1325,6 @@ void SimplifyLibCalls::InitOptimizations() {
AddOpt(LibFunc::memset, &MemSet);
// _chk variants of String and Memory LibCall Optimizations.
- Optimizations["__strcpy_chk"] = &StrCpyChk;
Optimizations["__stpcpy_chk"] = &StpCpyChk;
// Math Library Optimizations
diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp
index f35cbbdde5..930555424d 100644
--- a/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/lib/Transforms/Utils/LowerInvoke.cpp
@@ -45,10 +45,10 @@
#include "llvm/Pass.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/TargetTransformInfo.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetLowering.h"
#include <csetjmp>
#include <set>
using namespace llvm;
@@ -70,14 +70,15 @@ namespace {
Constant *SetJmpFn, *LongJmpFn, *StackSaveFn, *StackRestoreFn;
bool useExpensiveEHSupport;
- // We peek in STTI to grab the target's jmp_buf size and alignment
- const ScalarTargetTransformInfo *STTI;
+ // We peek in TLI to grab the target's jmp_buf size and alignment
+ const TargetLowering *TLI;
public:
static char ID; // Pass identification, replacement for typeid
- explicit LowerInvoke(bool useExpensiveEHSupport = ExpensiveEHSupport)
+ explicit LowerInvoke(const TargetLowering *tli = NULL,
+ bool useExpensiveEHSupport = ExpensiveEHSupport)
: FunctionPass(ID), useExpensiveEHSupport(useExpensiveEHSupport),
- STTI(0) {
+ TLI(tli) {
initializeLowerInvokePass(*PassRegistry::getPassRegistry());
}
bool doInitialization(Module &M);
@@ -107,24 +108,21 @@ INITIALIZE_PASS(LowerInvoke, "lowerinvoke",
char &llvm::LowerInvokePassID = LowerInvoke::ID;
// Public Interface To the LowerInvoke pass.
-FunctionPass *llvm::createLowerInvokePass() {
- return new LowerInvoke(ExpensiveEHSupport);
+FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI) {
+ return new LowerInvoke(TLI, ExpensiveEHSupport);
}
-FunctionPass *llvm::createLowerInvokePass(bool useExpensiveEHSupport) {
- return new LowerInvoke(useExpensiveEHSupport);
+FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI,
+ bool useExpensiveEHSupport) {
+ return new LowerInvoke(TLI, useExpensiveEHSupport);
}
// doInitialization - Make sure that there is a prototype for abort in the
// current module.
bool LowerInvoke::doInitialization(Module &M) {
- TargetTransformInfo *TTI = getAnalysisIfAvailable<TargetTransformInfo>();
- if (TTI)
- STTI = TTI->getScalarTargetTransformInfo();
-
Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
if (useExpensiveEHSupport) {
// Insert a type for the linked list of jump buffers.
- unsigned JBSize = STTI ? STTI->getJumpBufSize() : 0;
+ unsigned JBSize = TLI ? TLI->getJumpBufSize() : 0;
JBSize = JBSize ? JBSize : 200;
Type *JmpBufTy = ArrayType::get(VoidPtrTy, JBSize);
@@ -432,7 +430,7 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
// Create an alloca for the incoming jump buffer ptr and the new jump buffer
// that needs to be restored on all exits from the function. This is an
// alloca because the value needs to be live across invokes.
- unsigned Align = STTI ? STTI->getJumpBufAlignment() : 0;
+ unsigned Align = TLI ? TLI->getJumpBufAlignment() : 0;
AllocaInst *JmpBuf =
new AllocaInst(JBLinkTy, 0, Align,
"jblink", F.begin()->begin());
@@ -577,10 +575,6 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
}
bool LowerInvoke::runOnFunction(Function &F) {
- TargetTransformInfo *TTI = getAnalysisIfAvailable<TargetTransformInfo>();
- if (TTI)
- STTI = TTI->getScalarTargetTransformInfo();
-
if (useExpensiveEHSupport)
return insertExpensiveEHSupport(F);
else
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index bd28ec3527..b15acdff63 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -183,14 +183,30 @@ struct StrCpyChkOpt : public InstFortifiedLibCallOptimization {
FT->getParamType(2) != TD->getIntPtrType(Context))
return 0;
+ Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
+ if (Dst == Src) // __strcpy_chk(x,x) -> x
+ return Src;
+
// If a) we don't have any length information, or b) we know this will
// fit then just lower to a plain st[rp]cpy. Otherwise we'll keep our
// st[rp]cpy_chk call which may fail at runtime if the size is too long.
// TODO: It might be nice to get a maximum length out of the possible
// string lengths for varying.
if (isFoldable(2, 1, true)) {
- Value *Ret = EmitStrCpy(CI->getArgOperand(0), CI->getArgOperand(1), B, TD,
- TLI, Name.substr(2, 6));
+ Value *Ret = EmitStrCpy(Dst, Src, B, TD, TLI, Name.substr(2, 6));
+ return Ret;
+ } else {
+ // Maybe we can stil fold __strcpy_chk to __memcpy_chk.
+ uint64_t Len = GetStringLength(Src);
+ if (Len == 0) return 0;
+
+ // This optimization require DataLayout.
+ if (!TD) return 0;
+
+ Value *Ret =
+ EmitMemCpyChk(Dst, Src,
+ ConstantInt::get(TD->getIntPtrType(Context), Len),
+ CI->getArgOperand(2), B, TD, TLI);
return Ret;
}
return 0;
@@ -497,6 +513,35 @@ struct StrNCmpOpt : public LibCallOptimization {
}
};
+struct StrCpyOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strcpy" function prototype.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != B.getInt8PtrTy())
+ return 0;
+
+ Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
+ if (Dst == Src) // strcpy(x,x) -> x
+ return Src;
+
+ // These optimizations require DataLayout.
+ if (!TD) return 0;
+
+ // See if we can get the length of the input string.
+ uint64_t Len = GetStringLength(Src);
+ if (Len == 0) return 0;
+
+ // We have enough information to now generate the memcpy call to do the
+ // copy for us. Make a memcpy to copy the nul byte with align = 1.
+ B.CreateMemCpy(Dst, Src,
+ ConstantInt::get(TD->getIntPtrType(*Context), Len), 1);
+ return Dst;
+ }
+};
+
} // End anonymous namespace.
namespace llvm {
@@ -520,6 +565,7 @@ class LibCallSimplifierImpl {
StrRChrOpt StrRChr;
StrCmpOpt StrCmp;
StrNCmpOpt StrNCmp;
+ StrCpyOpt StrCpy;
void initOptimizations();
public:
@@ -540,14 +586,15 @@ void LibCallSimplifierImpl::initOptimizations() {
Optimizations["__stpcpy_chk"] = &StrCpyChk;
Optimizations["__strncpy_chk"] = &StrNCpyChk;
Optimizations["__stpncpy_chk"] = &StrNCpyChk;
- Optimizations["strcmp"] = &StrCmp;
- Optimizations["strncmp"] = &StrNCmp;
// String and memory library call optimizations.
Optimizations["strcat"] = &StrCat;
Optimizations["strncat"] = &StrNCat;
Optimizations["strchr"] = &StrChr;
Optimizations["strrchr"] = &StrRChr;
+ Optimizations["strcmp"] = &StrCmp;
+ Optimizations["strncmp"] = &StrNCmp;
+ Optimizations["strcpy"] = &StrCpy;
}
Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) {
diff --git a/lib/Transforms/Vectorize/CMakeLists.txt b/lib/Transforms/Vectorize/CMakeLists.txt
index 06cf1e4e53..e64034ab26 100644
--- a/lib/Transforms/Vectorize/CMakeLists.txt
+++ b/lib/Transforms/Vectorize/CMakeLists.txt
@@ -1,6 +1,7 @@
add_llvm_library(LLVMVectorize
BBVectorize.cpp
Vectorize.cpp
+ LoopVectorize.cpp
)
add_dependencies(LLVMVectorize intrinsics_gen)
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
new file mode 100644
index 0000000000..9bbd9ab60b
--- /dev/null
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -0,0 +1,885 @@
+//===- LoopVectorize.cpp - A Loop Vectorizer ------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a simple loop vectorizer. We currently only support single block
+// loops. We have a very simple and restrictive legality check: we need to read
+// and write from disjoint memory locations. We still don't have a cost model.
+// This pass has three parts:
+// 1. The main loop pass that drives the different parts.
+// 2. LoopVectorizationLegality - A helper class that checks for the legality
+// of the vectorization.
+// 3. SingleBlockLoopVectorizer - A helper class that performs the actual
+// widening of instructions.
+//
+//===----------------------------------------------------------------------===//
+#define LV_NAME "loop-vectorize"
+#define DEBUG_TYPE LV_NAME
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Value.h"
+#include "llvm/Function.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/DataLayout.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <algorithm>
+using namespace llvm;
+
+static cl::opt<unsigned>
+DefaultVectorizationFactor("default-loop-vectorize-width",
+ cl::init(4), cl::Hidden,
+ cl::desc("Set the default loop vectorization width"));
+
+namespace {
+
+/// Vectorize a simple loop. This class performs the widening of simple single
+/// basic block loops into vectors. It does not perform any
+/// vectorization-legality checks, and just does it. It widens the vectors
+/// to a given vectorization factor (VF).
+class SingleBlockLoopVectorizer {
+public:
+ /// Ctor.
+ SingleBlockLoopVectorizer(Loop *OrigLoop, ScalarEvolution *Se, LoopInfo *Li,
+ LPPassManager *Lpm, unsigned VecWidth):
+ Orig(OrigLoop), SE(Se), LI(Li), LPM(Lpm), VF(VecWidth),
+ Builder(0), Induction(0), OldInduction(0) { }
+
+ ~SingleBlockLoopVectorizer() {
+ delete Builder;
+ }
+
+ // Perform the actual loop widening (vectorization).
+ void vectorize() {
+ ///Create a new empty loop. Unlink the old loop and connect the new one.
+ createEmptyLoop();
+ /// Widen each instruction in the old loop to a new one in the new loop.
+ vectorizeLoop();
+ // register the new loop.
+ cleanup();
+ }
+
+private:
+ /// Create an empty loop, based on the loop ranges of the old loop.
+ void createEmptyLoop();
+ /// Copy and widen the instructions from the old loop.
+ void vectorizeLoop();
+ /// Insert the new loop to the loop hierarchy and pass manager.
+ void cleanup();
+
+ /// This instruction is un-vectorizable. Implement it as a sequence
+ /// of scalars.
+ void scalarizeInstruction(Instruction *Instr);
+
+ /// Create a broadcast instruction. This method generates a broadcast
+ /// instruction (shuffle) for loop invariant values and for the induction
+ /// value. If this is the induction variable then we extend it to N, N+1, ...
+ /// this is needed because each iteration in the loop corresponds to a SIMD
+ /// element.
+ Value *getBroadcastInstrs(Value *V);
+
+ /// This is a helper function used by getBroadcastInstrs. It adds 0, 1, 2 ..
+ /// for each element in the vector. Starting from zero.
+ Value *getConsecutiveVector(Value* Val);
+
+ /// Check that the GEP operands are all uniform except for the last index
+ /// which has to be the induction variable.
+ bool isConsecutiveGep(GetElementPtrInst *Gep);
+
+ /// When we go over instructions in the basic block we rely on previous
+ /// values within the current basic block or on loop invariant values.
+ /// When we widen (vectorize) values we place them in the map. If the values
+ /// are not within the map, they have to be loop invariant, so we simply
+ /// broadcast them into a vector.
+ Value *getVectorValue(Value *V);
+
+ typedef DenseMap<Value*, Value*> ValueMap;
+
+ /// The original loop.
+ Loop *Orig;
+ // Scev analysis to use.
+ ScalarEvolution *SE;
+ // Loop Info.
+ LoopInfo *LI;
+ // Loop Pass Manager;
+ LPPassManager *LPM;
+ // The vectorization factor to use.
+ unsigned VF;
+
+ // The builder that we use
+ IRBuilder<> *Builder;
+
+ // --- Vectorization state ---
+
+ /// The new Induction variable which was added to the new block.
+ PHINode *Induction;
+ /// The induction variable of the old basic block.
+ PHINode *OldInduction;
+ // Maps scalars to widened vectors.
+ ValueMap WidenMap;
+};
+
+/// Perform the vectorization legality check. This class does not look at the
+/// profitability of vectorization, only the legality. At the moment the checks
+/// are very simple and focus on single basic block loops with a constant
+/// iteration count and no reductions.
+class LoopVectorizationLegality {
+public:
+ LoopVectorizationLegality(Loop *Lp, ScalarEvolution *Se, DataLayout *Dl):
+ TheLoop(Lp), SE(Se), DL(Dl) { }
+
+ /// Returns the maximum vectorization factor that we *can* use to vectorize
+ /// this loop. This does not mean that it is profitable to vectorize this
+ /// loop, only that it is legal to do so. This may be a large number. We
+ /// can vectorize to any SIMD width below this number.
+ unsigned getLoopMaxVF();
+
+private:
+ /// Check if a single basic block loop is vectorizable.
+ /// At this point we know that this is a loop with a constant trip count
+ /// and we only need to check individual instructions.
+ bool canVectorizeBlock(BasicBlock &BB);
+
+ // Check if a pointer value is known to be disjoint.
+ // Example: Alloca, Global, NoAlias.
+ bool isidentifiedSafeObject(Value* Val);
+
+ /// The loop that we evaluate.
+ Loop *TheLoop;
+ /// Scev analysis.
+ ScalarEvolution *SE;
+ /// DataLayout analysis.
+ DataLayout *DL;
+};
+
+struct LoopVectorize : public LoopPass {
+ static char ID; // Pass identification, replacement for typeid
+
+ LoopVectorize() : LoopPass(ID) {
+ initializeLoopVectorizePass(*PassRegistry::getPassRegistry());
+ }
+
+ ScalarEvolution *SE;
+ DataLayout *DL;
+ LoopInfo *LI;
+
+ virtual bool runOnLoop(Loop *L, LPPassManager &LPM) {
+ // Only vectorize innermost loops.
+ if (!L->empty())
+ return false;
+
+ SE = &getAnalysis<ScalarEvolution>();
+ DL = getAnalysisIfAvailable<DataLayout>();
+ LI = &getAnalysis<LoopInfo>();
+
+ DEBUG(dbgs() << "LV: Checking a loop in \"" <<
+ L->getHeader()->getParent()->getName() << "\"\n");
+
+ // Check if it is legal to vectorize the loop.
+ LoopVectorizationLegality LVL(L, SE, DL);
+ unsigned MaxVF = LVL.getLoopMaxVF();
+
+ // Check that we can vectorize using the chosen vectorization width.
+ if (MaxVF < DefaultVectorizationFactor) {
+ DEBUG(dbgs() << "LV: non-vectorizable MaxVF ("<< MaxVF << ").\n");
+ return false;
+ }
+
+ DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< MaxVF << ").\n");
+
+ // If we decided that is is *legal* to vectorizer the loop. Do it.
+ SingleBlockLoopVectorizer LB(L, SE, LI, &LPM, DefaultVectorizationFactor);
+ LB.vectorize();
+
+ DEBUG(verifyFunction(*L->getHeader()->getParent()));
+ return true;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ LoopPass::getAnalysisUsage(AU);
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addRequired<LoopInfo>();
+ AU.addRequired<ScalarEvolution>();
+ }
+
+};
+
+Value *SingleBlockLoopVectorizer::getBroadcastInstrs(Value *V) {
+ // Instructions that access the old induction variable
+ // actually want to get the new one.
+ if (V == OldInduction)
+ V = Induction;
+ // Create the types.
+ LLVMContext &C = V->getContext();
+ Type *VTy = VectorType::get(V->getType(), VF);
+ Type *I32 = IntegerType::getInt32Ty(C);
+ Constant *Zero = ConstantInt::get(I32, 0);
+ Value *Zeros = ConstantAggregateZero::get(VectorType::get(I32, VF));
+ Value *UndefVal = UndefValue::get(VTy);
+ // Insert the value into a new vector.
+ Value *SingleElem = Builder->CreateInsertElement(UndefVal, V, Zero);
+ // Broadcast the scalar into all locations in the vector.
+ Value *Shuf = Builder->CreateShuffleVector(SingleElem, UndefVal, Zeros,
+ "broadcast");
+ // We are accessing the induction variable. Make sure to promote the
+ // index for each consecutive SIMD lane. This adds 0,1,2 ... to all lanes.
+ if (V == Induction)
+ return getConsecutiveVector(Shuf);
+ return Shuf;
+}
+
+Value *SingleBlockLoopVectorizer::getConsecutiveVector(Value* Val) {
+ assert(Val->getType()->isVectorTy() && "Must be a vector");
+ assert(Val->getType()->getScalarType()->isIntegerTy() &&
+ "Elem must be an integer");
+ // Create the types.
+ Type *ITy = Val->getType()->getScalarType();
+ VectorType *Ty = cast<VectorType>(Val->getType());
+ unsigned VLen = Ty->getNumElements();
+ SmallVector<Constant*, 8> Indices;
+
+ // Create a vector of consecutive numbers from zero to VF.
+ for (unsigned i = 0; i < VLen; ++i)
+ Indices.push_back(ConstantInt::get(ITy, i));
+
+ // Add the consecutive indices to the vector value.
+ Constant *Cv = ConstantVector::get(Indices);
+ assert(Cv->getType() == Val->getType() && "Invalid consecutive vec");
+ return Builder->CreateAdd(Val, Cv, "induction");
+}
+
+
+bool SingleBlockLoopVectorizer::isConsecutiveGep(GetElementPtrInst *Gep) {
+ if (!Gep)
+ return false;
+
+ unsigned NumOperands = Gep->getNumOperands();
+ Value *LastIndex = Gep->getOperand(NumOperands - 1);
+
+ // Check that all of the gep indices are uniform except for the last.
+ for (unsigned i = 0; i < NumOperands - 1; ++i)
+ if (!SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), Orig))
+ return false;
+
+ // We can emit wide load/stores only of the last index is the induction
+ // variable.
+ const SCEV *Last = SE->getSCEV(LastIndex);
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Last)) {
+ const SCEV *Step = AR->getStepRecurrence(*SE);
+
+ // The memory is consecutive because the last index is consecutive
+ // and all other indices are loop invariant.
+ if (Step->isOne())
+ return true;
+ }
+
+ return false;
+}
+
+Value *SingleBlockLoopVectorizer::getVectorValue(Value *V) {
+ // If we saved a vectorized copy of V, use it.
+ ValueMap::iterator it = WidenMap.find(V);
+ if (it != WidenMap.end())
+ return it->second;
+
+ // Broadcast V and save the value for future uses.
+ Value *B = getBroadcastInstrs(V);
+ WidenMap[V] = B;
+ return B;
+}
+
+void SingleBlockLoopVectorizer::scalarizeInstruction(Instruction *Instr) {
+ assert(!Instr->getType()->isAggregateType() && "Can't handle vectors");
+ // Holds vector parameters or scalars, in case of uniform vals.
+ SmallVector<Value*, 8> Params;
+
+ // Find all of the vectorized parameters.
+ for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) {
+ Value *SrcOp = Instr->getOperand(op);
+
+ // If we are accessing the old induction variable, use the new one.
+ if (SrcOp == OldInduction) {
+ Params.push_back(getBroadcastInstrs(Induction));
+ continue;
+ }
+
+ // Try using previously calculated values.
+ Instruction *SrcInst = dyn_cast<Instruction>(SrcOp);
+
+ // If the src is an instruction that appeared earlier in the basic block
+ // then it should already be vectorized.
+ if (SrcInst && SrcInst->getParent() == Instr->getParent()) {
+ assert(WidenMap.count(SrcInst) && "Source operand is unavailable");
+ // The parameter is a vector value from earlier.
+ Params.push_back(WidenMap[SrcInst]);
+ } else {
+ // The parameter is a scalar from outside the loop. Maybe even a constant.
+ Params.push_back(SrcOp);
+ }
+ }
+
+ assert(Params.size() == Instr->getNumOperands() &&
+ "Invalid number of operands");
+
+ // Does this instruction return a value ?
+ bool IsVoidRetTy = Instr->getType()->isVoidTy();
+ Value *VecResults = 0;
+
+ // If we have a return value, create an empty vector. We place the scalarized
+ // instructions in this vector.
+ if (!IsVoidRetTy)
+ VecResults = UndefValue::get(VectorType::get(Instr->getType(), VF));
+
+ // For each scalar that we create.
+ for (unsigned i = 0; i < VF; ++i) {
+ Instruction *Cloned = Instr->clone();
+ if (!IsVoidRetTy)
+ Cloned->setName(Instr->getName() + ".cloned");
+ // Replace the operands of the cloned instrucions with extracted scalars.
+ for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) {
+ Value *Op = Params[op];
+ // Param is a vector. Need to extract the right lane.
+ if (Op->getType()->isVectorTy())
+ Op = Builder->CreateExtractElement(Op, Builder->getInt32(i));
+ Cloned->setOperand(op, Op);
+ }
+
+ // Place the cloned scalar in the new loop.
+ Builder->Insert(Cloned);
+
+ // If the original scalar returns a value we need to place it in a vector
+ // so that future users will be able to use it.
+ if (!IsVoidRetTy)
+ VecResults = Builder->CreateInsertElement(VecResults, Cloned,
+ Builder->getInt32(i));
+ }
+
+ if (!IsVoidRetTy)
+ WidenMap[Instr] = VecResults;
+}
+
+void SingleBlockLoopVectorizer::createEmptyLoop() {
+ /*
+ In this function we generate a new loop. The new loop will contain
+ the vectorized instructions while the old loop will continue to run the
+ scalar remainder.
+
+ [ ] <-- vector loop bypass.
+ / |
+ / v
+| [ ] <-- vector pre header.
+| |
+| v
+| [ ] \
+| [ ]_| <-- vector loop.
+| |
+ \ v
+ >[ ] <--- middle-block.
+ / |
+ / v
+| [ ] <--- new preheader.
+| |
+| v
+| [ ] \
+| [ ]_| <-- old scalar loop to handle remainder. ()
+ \ |
+ \ v
+ >[ ] <-- exit block.
+ ...
+ */
+
+ // This is the original scalar-loop preheader.
+ BasicBlock *BypassBlock = Orig->getLoopPreheader();
+ BasicBlock *ExitBlock = Orig->getExitBlock();
+ assert(ExitBlock && "Must have an exit block");
+
+ assert(Orig->getNumBlocks() == 1 && "Invalid loop");
+ assert(BypassBlock && "Invalid loop structure");
+
+ BasicBlock *VectorPH =
+ BypassBlock->splitBasicBlock(BypassBlock->getTerminator(), "vector.ph");
+ BasicBlock *VecBody = VectorPH->splitBasicBlock(VectorPH->getTerminator(),
+ "vector.body");
+
+ BasicBlock *MiddleBlock = VecBody->splitBasicBlock(VecBody->getTerminator(),
+ "middle.block");
+
+
+ BasicBlock *ScalarPH =
+ MiddleBlock->splitBasicBlock(MiddleBlock->getTerminator(),
+ "scalar.preheader");
+
+ // Find the induction variable.
+ BasicBlock *OldBasicBlock = Orig->getHeader();
+ OldInduction = dyn_cast<PHINode>(OldBasicBlock->begin());
+ assert(OldInduction && "We must have a single phi node.");
+ Type *IdxTy = OldInduction->getType();
+
+ // Use this IR builder to create the loop instructions (Phi, Br, Cmp)
+ // inside the loop.
+ Builder = new IRBuilder<>(VecBody);
+ Builder->SetInsertPoint(VecBody->getFirstInsertionPt());
+
+ // Generate the induction variable.
+ Induction = Builder->CreatePHI(IdxTy, 2, "index");
+ Constant *Zero = ConstantInt::get(IdxTy, 0);
+ Constant *Step = ConstantInt::get(IdxTy, VF);
+
+ // Find the loop boundaries.
+ const SCEV *ExitCount = SE->getExitCount(Orig, Orig->getHeader());
+ assert(ExitCount != SE->getCouldNotCompute() && "Invalid loop count");
+
+ // Get the total trip count from the count by adding 1.
+ ExitCount = SE->getAddExpr(ExitCount,
+ SE->getConstant(ExitCount->getType(), 1));
+
+ // Expand the trip count and place the new instructions in the preheader.
+ // Notice that the pre-header does not change, only the loop body.
+ SCEVExpander Exp(*SE, "induction");
+ Instruction *Loc = BypassBlock->getTerminator();
+
+ // We may need to extend the index in case there is a type mismatch.
+ // We know that the count starts at zero and does not overflow.
+ // We are using Zext because it should be less expensive.
+ if (ExitCount->getType() != Induction->getType())
+ ExitCount = SE->getZeroExtendExpr(ExitCount, IdxTy);
+
+ // Count holds the overall loop count (N).
+ Value *Count = Exp.expandCodeFor(ExitCount, Induction->getType(), Loc);
+ // Now we need to generate the expression for N - (N % VF), which is
+ // the part that the vectorized body will execute.
+ Constant *CIVF = ConstantInt::get(IdxTy, VF);
+ Value *R = BinaryOperator::CreateURem(Count, CIVF, "n.mod.vf", Loc);
+ Value *CountRoundDown = BinaryOperator::CreateSub(Count, R, "n.vec", Loc);
+
+ // Now, compare the new count to zero. If it is zero, jump to the scalar part.
+ Value *Cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
+ CountRoundDown, ConstantInt::getNullValue(IdxTy),
+ "cmp.zero", Loc);
+ BranchInst::Create(MiddleBlock, VectorPH, Cmp, Loc);
+ // Remove the old terminator.
+ Loc->eraseFromParent();
+
+ // Add a check in the middle block to see if we have completed
+ // all of the iterations in the first vector loop.
+ // If (N - N%VF) == N, then we *don't* need to run the remainder.
+ Value *CmpN = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, Count,
+ CountRoundDown, "cmp.n",
+ MiddleBlock->getTerminator());
+
+ BranchInst::Create(ExitBlock, ScalarPH, CmpN, MiddleBlock->getTerminator());
+ // Remove the old terminator.
+ MiddleBlock->getTerminator()->eraseFromParent();
+
+ // Create i+1 and fill the PHINode.
+ Value *NextIdx = Builder->CreateAdd(Induction, Step, "index.next");
+ Induction->addIncoming(Zero, VectorPH);
+ Induction->addIncoming(NextIdx, VecBody);
+ // Create the compare.
+ Value *ICmp = Builder->CreateICmpEQ(NextIdx, CountRoundDown);
+ Builder->CreateCondBr(ICmp, MiddleBlock, VecBody);
+
+ // Now we have two terminators. Remove the old one from the block.
+ VecBody->getTerminator()->eraseFromParent();
+
+ // Fix the scalar body iteration count.
+ unsigned BlockIdx = OldInduction->getBasicBlockIndex(ScalarPH);
+ OldInduction->setIncomingValue(BlockIdx, CountRoundDown);
+
+ // Get ready to start creating new instructions into the vectorized body.
+ Builder->SetInsertPoint(VecBody->getFirstInsertionPt());
+
+ // Register the new loop.
+ Loop* Lp = new Loop();
+ LPM->insertLoop(Lp, Orig->getParentLoop());
+
+ Lp->addBasicBlockToLoop(VecBody, LI->getBase());
+
+ Loop *ParentLoop = Orig->getParentLoop();
+ if (ParentLoop) {
+ ParentLoop->addBasicBlockToLoop(ScalarPH, LI->getBase());
+ ParentLoop->addBasicBlockToLoop(VectorPH, LI->getBase());
+ ParentLoop->addBasicBlockToLoop(MiddleBlock, LI->getBase());
+ }
+}
+
+void SingleBlockLoopVectorizer::vectorizeLoop() {
+ BasicBlock &BB = *Orig->getHeader();
+
+ // For each instruction in the old loop.
+ for (BasicBlock::iterator it = BB.begin(), e = BB.end(); it != e; ++it) {
+ Instruction *Inst = it;
+
+ switch (Inst->getOpcode()) {
+ case Instruction::PHI:
+ case Instruction::Br:
+ // Nothing to do for PHIs and BR, since we already took care of the
+ // loop control flow instructions.
+ continue;
+
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ // Just widen binops.
+ BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst);
+ Value *A = getVectorValue(Inst->getOperand(0));
+ Value *B = getVectorValue(Inst->getOperand(1));
+ // Use this vector value for all users of the original instruction.
+ WidenMap[Inst] = Builder->CreateBinOp(BinOp->getOpcode(), A, B);
+ break;
+ }
+ case Instruction::Select: {
+ // Widen selects.
+ Value *A = getVectorValue(Inst->getOperand(0));
+ Value *B = getVectorValue(Inst->getOperand(1));
+ Value *C = getVectorValue(Inst->getOperand(2));
+ WidenMap[Inst] = Builder->CreateSelect(A, B, C);
+ break;
+ }
+
+ case Instruction::ICmp:
+ case Instruction::FCmp: {
+ // Widen compares. Generate vector compares.
+ bool FCmp = (Inst->getOpcode() == Instruction::FCmp);
+ CmpInst *Cmp = dyn_cast<CmpInst>(Inst);
+ Value *A = getVectorValue(Inst->getOperand(0));
+ Value *B = getVectorValue(Inst->getOperand(1));
+ if (FCmp)
+ WidenMap[Inst] = Builder->CreateFCmp(Cmp->getPredicate(), A, B);
+ else
+ WidenMap[Inst] = Builder->CreateICmp(Cmp->getPredicate(), A, B);
+ break;
+ }
+
+ case Instruction::Store: {
+ // Attempt to issue a wide store.
+ StoreInst *SI = dyn_cast<StoreInst>(Inst);
+ Type *StTy = VectorType::get(SI->getValueOperand()->getType(), VF);
+ Value *Ptr = SI->getPointerOperand();
+ unsigned Alignment = SI->getAlignment();
+ GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
+ // This store does not use GEPs.
+ if (!isConsecutiveGep(Gep)) {
+ scalarizeInstruction(Inst);
+ break;
+ }
+
+ // Create the new GEP with the new induction variable.
+ GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
+ unsigned NumOperands = Gep->getNumOperands();
+ Gep2->setOperand(NumOperands - 1, Induction);
+ Ptr = Builder->Insert(Gep2);
+ Ptr = Builder->CreateBitCast(Ptr, StTy->getPointerTo());
+ Value *Val = getVectorValue(SI->getValueOperand());
+ Builder->CreateStore(Val, Ptr)->setAlignment(Alignment);
+ break;
+ }
+ case Instruction::Load: {
+ // Attempt to issue a wide load.
+ LoadInst *LI = dyn_cast<LoadInst>(Inst);
+ Type *RetTy = VectorType::get(LI->getType(), VF);
+ Value *Ptr = LI->getPointerOperand();
+ unsigned Alignment = LI->getAlignment();
+ GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
+
+ // We don't have a gep. Scalarize the load.
+ if (!isConsecutiveGep(Gep)) {
+ scalarizeInstruction(Inst);
+ break;
+ }
+
+ // Create the new GEP with the new induction variable.
+ GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
+ unsigned NumOperands = Gep->getNumOperands();
+ Gep2->setOperand(NumOperands - 1, Induction);
+ Ptr = Builder->Insert(Gep2);
+ Ptr = Builder->CreateBitCast(Ptr, RetTy->getPointerTo());
+ LI = Builder->CreateLoad(Ptr);
+ LI->setAlignment(Alignment);
+ // Use this vector value for all users of the load.
+ WidenMap[Inst] = LI;
+ break;
+ }
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ case Instruction::BitCast: {
+ /// Vectorize bitcasts.
+ CastInst *CI = dyn_cast<CastInst>(Inst);
+ Value *A = getVectorValue(Inst->getOperand(0));
+ Type *DestTy = VectorType::get(CI->getType()->getScalarType(), VF);
+ WidenMap[Inst] = Builder->CreateCast(CI->getOpcode(), A, DestTy);
+ break;
+ }
+
+ default:
+ /// All other instructions are unsupported. Scalarize them.
+ scalarizeInstruction(Inst);
+ break;
+ }// end of switch.
+ }// end of for_each instr.
+}
+
+void SingleBlockLoopVectorizer::cleanup() {
+ // The original basic block.
+ SE->forgetLoop(Orig);
+}
+
+unsigned LoopVectorizationLegality::getLoopMaxVF() {
+ if (!TheLoop->getLoopPreheader()) {
+ assert(false && "No preheader!!");
+ DEBUG(dbgs() << "LV: Loop not normalized." << "\n");
+ return 1;
+ }
+
+ // We can only vectorize single basic block loops.
+ unsigned NumBlocks = TheLoop->getNumBlocks();
+ if (NumBlocks != 1) {
+ DEBUG(dbgs() << "LV: Too many blocks:" << NumBlocks << "\n");
+ return 1;
+ }
+
+ // We need to have a loop header.
+ BasicBlock *BB = TheLoop->getHeader();
+ DEBUG(dbgs() << "LV: Found a loop: " << BB->getName() << "\n");
+
+ // Go over each instruction and look at memory deps.
+ if (!canVectorizeBlock(*BB)) {
+ DEBUG(dbgs() << "LV: Can't vectorize this loop header\n");
+ return 1;
+ }
+
+ // ScalarEvolution needs to be able to find the exit count.
+ const SCEV *ExitCount = SE->getExitCount(TheLoop, BB);
+ if (ExitCount == SE->getCouldNotCompute()) {
+ DEBUG(dbgs() << "LV: SCEV could not compute the loop exit count.\n");
+ return 1;
+ }
+
+ DEBUG(dbgs() << "LV: We can vectorize this loop!\n");
+
+ // Okay! We can vectorize. At this point we don't have any other mem analysis
+ // which may limit our maximum vectorization factor, so just return the
+ // maximum SIMD size.
+ return DefaultVectorizationFactor;
+}
+
+bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) {
+ // Holds the read and write pointers that we find.
+ typedef SmallVector<Value*, 10> ValueVector;
+ ValueVector Reads;
+ ValueVector Writes;
+
+ unsigned NumPhis = 0;
+ for (BasicBlock::iterator it = BB.begin(), e = BB.end(); it != e; ++it) {
+ Instruction *I = it;
+
+ PHINode *Phi = dyn_cast<PHINode>(I);
+ if (Phi) {
+ NumPhis++;
+ // We only look at integer phi nodes.
+ if (!Phi->getType()->isIntegerTy()) {
+ DEBUG(dbgs() << "LV: Found an non-int PHI.\n");
+ return false;
+ }
+
+ // If we found an induction variable.
+ if (NumPhis > 1) {
+ DEBUG(dbgs() << "LV: Found more than one PHI.\n");
+ return false;
+ }
+
+ // This should not happen because the loop should be normalized.
+ if (Phi->getNumIncomingValues() != 2) {
+ DEBUG(dbgs() << "LV: Found an invalid PHI.\n");
+ return false;
+ }
+
+ // Check that the PHI is consecutive and starts at zero.
+ const SCEV *PhiScev = SE->getSCEV(Phi);
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
+ if (!AR) {
+ DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
+ return false;
+ }
+
+ const SCEV *Step = AR->getStepRecurrence(*SE);
+ const SCEV *Start = AR->getStart();
+
+ if (!Step->isOne() || !Start->isZero()) {
+ DEBUG(dbgs() << "LV: PHI does not start at zero or steps by one.\n");
+ return false;
+ }
+ }
+
+ // If this is a load, record its pointer. If it is not a load, abort.
+ // Notice that we don't handle function calls that read or write.
+ if (I->mayReadFromMemory()) {
+ LoadInst *Ld = dyn_cast<LoadInst>(I);
+ if (!Ld) return false;
+ if (!Ld->isSimple()) {
+ DEBUG(dbgs() << "LV: Found a non-simple load.\n");
+ return false;
+ }
+ GetUnderlyingObjects(Ld->getPointerOperand(), Reads, DL);
+ }
+
+ // Record store pointers. Abort on all other instructions that write to
+ // memory.
+ if (I->mayWriteToMemory()) {
+ StoreInst *St = dyn_cast<StoreInst>(I);
+ if (!St) return false;
+ if (!St->isSimple()) {
+ DEBUG(dbgs() << "LV: Found a non-simple store.\n");
+ return false;
+ }
+ GetUnderlyingObjects(St->getPointerOperand(), Writes, DL);
+ }
+
+ // We still don't handle functions.
+ CallInst *CI = dyn_cast<CallInst>(I);
+ if (CI) {
+ DEBUG(dbgs() << "LV: Found a call site:"<<
+ CI->getCalledFunction()->getName() << "\n");
+ return false;
+ }
+
+ // We do not re-vectorize vectors.
+ if (!VectorType::isValidElementType(I->getType()) &&
+ !I->getType()->isVoidTy()) {
+ DEBUG(dbgs() << "LV: Found unvectorizable type." << "\n");
+ return false;
+ }
+ //Check that all of the users of the loop are inside the BB.
+ for (Value::use_iterator it = I->use_begin(), e = I->use_end();
+ it != e; ++it) {
+ Instruction *U = cast<Instruction>(*it);
+ BasicBlock *Parent = U->getParent();
+ if (Parent != &BB) {
+ DEBUG(dbgs() << "LV: Found an outside user for : "<< *U << "\n");
+ return false;
+ }
+ }
+ } // next instr.
+
+ if (NumPhis != 1) {
+ DEBUG(dbgs() << "LV: Did not find a Phi node.\n");
+ return false;
+ }
+
+ // Check that the underlying objects of the reads and writes are either
+ // disjoint memory locations, or that they are no-alias arguments.
+ ValueVector::iterator r, re, w, we;
+ for (r = Reads.begin(), re = Reads.end(); r != re; ++r) {
+ if (!isidentifiedSafeObject(*r)) {
+ DEBUG(dbgs() << "LV: Found a bad read Ptr: "<< **r << "\n");
+ return false;
+ }
+ }
+
+ for (w = Writes.begin(), we = Writes.end(); w != we; ++w) {
+ if (!isidentifiedSafeObject(*w)) {
+ DEBUG(dbgs() << "LV: Found a bad write Ptr: "<< **w << "\n");
+ return false;
+ }
+ }
+
+ // Check that there are no multiple write locations to the same pointer.
+ SmallPtrSet<Value*, 8> WritePointerSet;
+ for (w = Writes.begin(), we = Writes.end(); w != we; ++w) {
+ if (!WritePointerSet.insert(*w)) {
+ DEBUG(dbgs() << "LV: Multiple writes to the same index :"<< **w << "\n");
+ return false;
+ }
+ }
+
+ // Check that the reads and the writes are disjoint.
+ for (r = Reads.begin(), re = Reads.end(); r != re; ++r) {
+ if (WritePointerSet.count(*r)) {
+ DEBUG(dbgs() << "Vectorizer: Found a read/write ptr:"<< **r << "\n");
+ return false;
+ }
+ }
+
+ // All is okay.
+ return true;
+}
+
+/// Checks if the value is a Global variable or if it is an Arguments
+/// marked with the NoAlias attribute.
+bool LoopVectorizationLegality::isidentifiedSafeObject(Value* Val) {
+ assert(Val && "Invalid value");
+ if (dyn_cast<GlobalValue>(Val))
+ return true;
+ if (dyn_cast<AllocaInst>(Val))
+ return true;
+ Argument *A = dyn_cast<Argument>(Val);
+ if (!A)
+ return false;
+ return A->hasNoAliasAttr();
+}
+
+} // namespace
+
+char LoopVectorize::ID = 0;
+static const char lv_name[] = "Loop Vectorization";
+INITIALIZE_PASS_BEGIN(LoopVectorize, LV_NAME, lv_name, false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false)
+
+namespace llvm {
+ Pass *createLoopVectorizePass() {
+ return new LoopVectorize();
+ }
+
+}
+
diff --git a/lib/Transforms/Vectorize/Vectorize.cpp b/lib/Transforms/Vectorize/Vectorize.cpp
index 1ef60029bc..d26973a7b3 100644
--- a/lib/Transforms/Vectorize/Vectorize.cpp
+++ b/lib/Transforms/Vectorize/Vectorize.cpp
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements common infrastructure for libLLVMVectorizeOpts.a, which
+// This file implements common infrastructure for libLLVMVectorizeOpts.a, which
// implements several vectorization transformations over the LLVM intermediate
// representation, including the C bindings for that library.
//
@@ -23,10 +23,11 @@
using namespace llvm;
-/// initializeVectorizationPasses - Initialize all passes linked into the
+/// initializeVectorizationPasses - Initialize all passes linked into the
/// Vectorization library.
void llvm::initializeVectorization(PassRegistry &Registry) {
initializeBBVectorizePass(Registry);
+ initializeLoopVectorizePass(Registry);
}
void LLVMInitializeVectorization(LLVMPassRegistryRef R) {
@@ -37,3 +38,6 @@ void LLVMAddBBVectorizePass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createBBVectorizePass());
}
+void LLVMAddLoopVectorizePass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLoopVectorizePass());
+}
diff --git a/lib/VMCore/CMakeLists.txt b/lib/VMCore/CMakeLists.txt
index ba807fcacc..6c30967974 100644
--- a/lib/VMCore/CMakeLists.txt
+++ b/lib/VMCore/CMakeLists.txt
@@ -33,7 +33,6 @@ add_llvm_library(LLVMCore
PrintModulePass.cpp
Type.cpp
TypeFinder.cpp
- TargetTransformInfo.cpp
Use.cpp
User.cpp
Value.cpp
diff --git a/lib/VMCore/TargetTransformInfo.cpp b/lib/VMCore/TargetTransformInfo.cpp
deleted file mode 100644
index 3af0222a21..0000000000
--- a/lib/VMCore/TargetTransformInfo.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-//===- llvm/VMCore/TargetTransformInfo.cpp ----------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/TargetTransformInfo.h"
-#include "llvm/Support/ErrorHandling.h"
-
-using namespace llvm;
-
-/// Default ctor.
-///
-/// @note This has to exist, because this is a pass, but it should never be
-/// used.
-TargetTransformInfo::TargetTransformInfo() : ImmutablePass(ID) {
- report_fatal_error("Bad TargetTransformInfo ctor used. "
- "Tool did not specify a TargetTransformInfo to use?");
-}
-
-INITIALIZE_PASS(TargetTransformInfo, "TargetTransformInfo",
- "Target Transform Info", false, true)
-char TargetTransformInfo::ID = 0;
-
diff --git a/test/CodeGen/MSP430/fp.ll b/test/CodeGen/MSP430/fp.ll
new file mode 100644
index 0000000000..c3273eff05
--- /dev/null
+++ b/test/CodeGen/MSP430/fp.ll
@@ -0,0 +1,17 @@
+; RUN: llc -O0 -disable-fp-elim < %s | FileCheck %s
+
+target datalayout = "e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"
+target triple = "msp430---elf"
+
+define void @fp() nounwind {
+entry:
+; CHECK: fp:
+; CHECK: push.w r4
+; CHECK: mov.w r1, r4
+; CHECK: sub.w #2, r1
+ %i = alloca i16, align 2
+; CHECK: mov.w #0, -2(r4)
+ store i16 0, i16* %i, align 2
+; CHECK: pop.w r4
+ ret void
+}
diff --git a/test/CodeGen/Mips/brconeq.ll b/test/CodeGen/Mips/brconeq.ll
new file mode 100644
index 0000000000..613391557e
--- /dev/null
+++ b/test/CodeGen/Mips/brconeq.ll
@@ -0,0 +1,38 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 5, align 4
+@j = global i32 10, align 4
+@result = global i32 0, align 4
+
+define void @test() nounwind {
+entry:
+ %0 = load i32* @i, align 4
+ %1 = load i32* @j, align 4
+ %cmp = icmp eq i32 %0, %1
+; 16: cmp ${{[0-9]+}}, ${{[0-9]+}}
+; 16: bteqz $[[LABEL:[0-9A-Ba-b_]+]]
+; 16: $[[LABEL]]:
+ br i1 %cmp, label %if.end, label %if.then
+
+if.then: ; preds = %entry
+ store i32 1, i32* @result, align 4
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/test/CodeGen/Mips/brconeqk.ll b/test/CodeGen/Mips/brconeqk.ll
new file mode 100644
index 0000000000..2c0e72dabd
--- /dev/null
+++ b/test/CodeGen/Mips/brconeqk.ll
@@ -0,0 +1,22 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 5, align 4
+@result = global i32 0, align 4
+
+define void @test() nounwind {
+entry:
+ %0 = load i32* @i, align 4
+ %cmp = icmp eq i32 %0, 10
+ br i1 %cmp, label %if.end, label %if.then
+; 16: cmpi ${{[0-9]+}}, {{[0-9]+}}
+; 16: bteqz $[[LABEL:[0-9A-Ba-b_]+]]
+; 16: $[[LABEL]]:
+if.then: ; preds = %entry
+ store i32 1, i32* @result, align 4
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
+
diff --git a/test/CodeGen/Mips/brconeqz.ll b/test/CodeGen/Mips/brconeqz.ll
new file mode 100644
index 0000000000..5586e7b976
--- /dev/null
+++ b/test/CodeGen/Mips/brconeqz.ll
@@ -0,0 +1,20 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 5, align 4
+@result = global i32 0, align 4
+
+define void @test() nounwind {
+entry:
+ %0 = load i32* @i, align 4
+ %cmp = icmp eq i32 %0, 0
+ br i1 %cmp, label %if.end, label %if.then
+; 16: beqz ${{[0-9]+}}, $[[LABEL:[0-9A-Ba-b_]+]]
+; 16: $[[LABEL]]:
+if.then: ; preds = %entry
+ store i32 1, i32* @result, align 4
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
diff --git a/test/CodeGen/Mips/brconge.ll b/test/CodeGen/Mips/brconge.ll
new file mode 100644
index 0000000000..02f0a633b3
--- /dev/null
+++ b/test/CodeGen/Mips/brconge.ll
@@ -0,0 +1,37 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 5, align 4
+@j = global i32 10, align 4
+@k = global i32 5, align 4
+@result1 = global i32 0, align 4
+@result2 = global i32 1, align 4
+
+define void @test() nounwind {
+entry:
+ %0 = load i32* @i, align 4
+ %1 = load i32* @j, align 4
+ %cmp = icmp slt i32 %0, %1
+ br i1 %cmp, label %if.then, label %if.end
+
+; 16: slt ${{[0-9]+}}, ${{[0-9]+}}
+; 16: bteqz $[[LABEL:[0-9A-Ba-b_]+]]
+; 16: $[[LABEL]]:
+
+if.then: ; preds = %entry
+ store i32 1, i32* @result1, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ %2 = load i32* @k, align 4
+ %cmp1 = icmp slt i32 %0, %2
+ br i1 %cmp1, label %if.then2, label %if.end3
+
+if.then2: ; preds = %if.end
+ store i32 1, i32* @result1, align 4
+ br label %if.end3
+
+if.end3: ; preds = %if.then2, %if.end
+ ret void
+}
+
+
diff --git a/test/CodeGen/Mips/brcongt.ll b/test/CodeGen/Mips/brcongt.ll
new file mode 100644
index 0000000000..767b51b21b
--- /dev/null
+++ b/test/CodeGen/Mips/brcongt.ll
@@ -0,0 +1,25 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 5, align 4
+@j = global i32 10, align 4
+@k = global i32 5, align 4
+@result = global i32 0, align 4
+
+define void @test() nounwind {
+entry:
+ %0 = load i32* @i, align 4
+ %1 = load i32* @j, align 4
+ %cmp = icmp sgt i32 %0, %1
+ br i1 %cmp, label %if.end, label %if.then
+; 16: slt ${{[0-9]+}}, ${{[0-9]+}}
+; 16: btnez $[[LABEL:[0-9A-Ba-b_]+]]
+; 16: $[[LABEL]]:
+if.then: ; preds = %entry
+ store i32 1, i32* @result, align 4
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
+
diff --git a/test/CodeGen/Mips/brconle.ll b/test/CodeGen/Mips/brconle.ll
new file mode 100644
index 0000000000..854b2481c6
--- /dev/null
+++ b/test/CodeGen/Mips/brconle.ll
@@ -0,0 +1,37 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 -5, align 4
+@j = global i32 10, align 4
+@k = global i32 -5, align 4
+@result1 = global i32 0, align 4
+@result2 = global i32 1, align 4
+
+define void @test() nounwind {
+entry:
+ %0 = load i32* @j, align 4
+ %1 = load i32* @i, align 4
+ %cmp = icmp sgt i32 %0, %1
+ br i1 %cmp, label %if.then, label %if.end
+
+; 16: slt ${{[0-9]+}}, ${{[0-9]+}}
+; 16: bteqz $[[LABEL:[0-9A-Ba-b_]+]]
+; 16: $[[LABEL]]:
+
+if.then: ; preds = %entry
+ store i32 1, i32* @result1, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ %2 = load i32* @k, align 4
+ %cmp1 = icmp sgt i32 %1, %2
+ br i1 %cmp1, label %if.then2, label %if.end3
+
+if.then2: ; preds = %if.end
+ store i32 0, i32* @result1, align 4
+ br label %if.end3
+
+if.end3: ; preds = %if.then2, %if.end
+ ret void
+}
+
+
diff --git a/test/CodeGen/Mips/brconlt.ll b/test/CodeGen/Mips/brconlt.ll
new file mode 100644
index 0000000000..931a3e8c7b
--- /dev/null
+++ b/test/CodeGen/Mips/brconlt.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 5, align 4
+@j = global i32 10, align 4
+@k = global i32 5, align 4
+@result = global i32 0, align 4
+
+define void @test() nounwind {
+entry:
+ %0 = load i32* @j, align 4
+ %1 = load i32* @i, align 4
+ %cmp = icmp slt i32 %0, %1
+ br i1 %cmp, label %if.end, label %if.then
+
+; 16: slt ${{[0-9]+}}, ${{[0-9]+}}
+; 16: btnez $[[LABEL:[0-9A-Ba-b_]+]]
+; 16: $[[LABEL]]:
+
+if.then: ; preds = %entry
+ store i32 1, i32* @result, align 4
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
+
diff --git a/test/CodeGen/Mips/brconne.ll b/test/CodeGen/Mips/brconne.ll
new file mode 100644
index 0000000000..5d5bde3fcf
--- /dev/null
+++ b/test/CodeGen/Mips/brconne.ll
@@ -0,0 +1,26 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 5, align 4
+@j = global i32 5, align 4
+@result = global i32 0, align 4
+
+define void @test() nounwind {
+entry:
+ %0 = load i32* @j, align 4
+ %1 = load i32* @i, align 4
+ %cmp = icmp eq i32 %0, %1
+ br i1 %cmp, label %if.then, label %if.end
+; 16: cmp ${{[0-9]+}}, ${{[0-9]+}}
+; 16: btnez $[[LABEL:[0-9A-Ba-b_]+]]
+; 16: lw ${{[0-9]+}}, %got(result)(${{[0-9]+}})
+; 16: $[[LABEL]]:
+
+if.then: ; preds = %entry
+ store i32 1, i32* @result, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ ret void
+}
+
+
diff --git a/test/CodeGen/Mips/brconnek.ll b/test/CodeGen/Mips/brconnek.ll
new file mode 100644
index 0000000000..6208d7c5a0
--- /dev/null
+++ b/test/CodeGen/Mips/brconnek.ll
@@ -0,0 +1,25 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@j = global i32 5, align 4
+@result = global i32 0, align 4
+
+define void @test() nounwind {
+entry:
+ %0 = load i32* @j, align 4
+ %cmp = icmp eq i32 %0, 5
+ br i1 %cmp, label %if.then, label %if.end
+
+; 16: cmpi ${{[0-9]+}}, {{[0-9]+}}
+; 16: btnez $[[LABEL:[0-9A-Ba-b_]+]]
+; 16: lw ${{[0-9]+}}, %got(result)(${{[0-9]+}})
+; 16: $[[LABEL]]:
+
+if.then: ; preds = %entry
+ store i32 1, i32* @result, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ ret void
+}
+
+
diff --git a/test/CodeGen/Mips/brconnez.ll b/test/CodeGen/Mips/brconnez.ll
new file mode 100644
index 0000000000..47db7901b5
--- /dev/null
+++ b/test/CodeGen/Mips/brconnez.ll
@@ -0,0 +1,24 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@j = global i32 0, align 4
+@result = global i32 0, align 4
+
+define void @test() nounwind {
+entry:
+ %0 = load i32* @j, align 4
+ %cmp = icmp eq i32 %0, 0
+ br i1 %cmp, label %if.then, label %if.end
+
+; 16: bnez ${{[0-9]+}}, $[[LABEL:[0-9A-Ba-b_]+]]
+; 16: lw ${{[0-9]+}}, %got(result)(${{[0-9]+}})
+; 16: $[[LABEL]]:
+
+if.then: ; preds = %entry
+ store i32 1, i32* @result, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ ret void
+}
+
+
diff --git a/test/CodeGen/PowerPC/i64_fp_round.ll b/test/CodeGen/PowerPC/i64_fp_round.ll
new file mode 100644
index 0000000000..5a0c072c9c
--- /dev/null
+++ b/test/CodeGen/PowerPC/i64_fp_round.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define float @test(i64 %x) nounwind readnone {
+entry:
+ %conv = sitofp i64 %x to float
+ ret float %conv
+}
+
+; Verify that we get the code sequence needed to avoid double-rounding.
+; Note that only parts of the sequence are checked for here, to allow
+; for minor code generation differences.
+
+; CHECK: sradi [[REGISTER:[0-9]+]], 3, 53
+; CHECK: addi [[REGISTER:[0-9]+]], [[REGISTER]], 1
+; CHECK: cmpldi 0, [[REGISTER]], 1
+; CHECK: isel [[REGISTER:[0-9]+]], {{[0-9]+}}, 3, 1
+; CHECK: std [[REGISTER]], -{{[0-9]+}}(1)
+
+
+; Also check that with -enable-unsafe-fp-math we do not get that extra
+; code sequence. Simply verify that there is no "isel" present.
+
+; RUN: llc -mcpu=pwr7 -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=UNSAFE
+; CHECK-UNSAFE-NOT: isel
+
diff --git a/test/CodeGen/X86/2012-10-18-crash-dagco.ll b/test/CodeGen/X86/2012-10-18-crash-dagco.ll
new file mode 100644
index 0000000000..5b98624a37
--- /dev/null
+++ b/test/CodeGen/X86/2012-10-18-crash-dagco.ll
@@ -0,0 +1,61 @@
+; RUN: llc -march=x86-64 -mcpu=corei7 -disable-cgp-select2branch < %s
+
+; We should not crash on this test.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin9.0.0"
+
+@global = external constant [411 x i8], align 1
+
+define void @snork() nounwind {
+bb:
+ br i1 undef, label %bb26, label %bb27
+
+bb26: ; preds = %bb48, %bb26, %bb
+ switch i32 undef, label %bb26 [
+ i32 142771596, label %bb28
+ ]
+
+bb27: ; preds = %bb48, %bb
+ switch i32 undef, label %bb49 [
+ i32 142771596, label %bb28
+ ]
+
+bb28: ; preds = %bb27, %bb26
+ %tmp = load i32* null
+ %tmp29 = trunc i32 %tmp to i8
+ store i8* undef, i8** undef
+ %tmp30 = load i32* null
+ %tmp31 = icmp eq i32 %tmp30, 0
+ %tmp32 = getelementptr inbounds [411 x i8]* @global, i32 0, i32 undef
+ %tmp33 = load i8* %tmp32, align 1
+ %tmp34 = getelementptr inbounds [411 x i8]* @global, i32 0, i32 0
+ %tmp35 = load i8* %tmp34, align 1
+ %tmp36 = select i1 %tmp31, i8 %tmp35, i8 %tmp33
+ %tmp37 = select i1 undef, i8 %tmp29, i8 %tmp36
+ %tmp38 = zext i8 %tmp37 to i32
+ %tmp39 = select i1 undef, i32 0, i32 %tmp38
+ %tmp40 = getelementptr inbounds i32* null, i32 %tmp39
+ %tmp41 = load i32* %tmp40, align 4
+ %tmp42 = load i32* undef, align 4
+ %tmp43 = load i32* undef
+ %tmp44 = xor i32 %tmp42, %tmp43
+ %tmp45 = lshr i32 %tmp44, 8
+ %tmp46 = lshr i32 %tmp44, 7
+ call void @spam()
+ unreachable
+
+bb47: ; No predecessors!
+ ret void
+
+bb48: ; No predecessors!
+ br i1 undef, label %bb27, label %bb26
+
+bb49: ; preds = %bb49, %bb27
+ br label %bb49
+
+bb50: ; preds = %bb50
+ br label %bb50
+}
+
+declare void @spam() noreturn nounwind
diff --git a/test/CodeGen/X86/extract-concat.ll b/test/CodeGen/X86/extract-concat.ll
new file mode 100644
index 0000000000..704309eb65
--- /dev/null
+++ b/test/CodeGen/X86/extract-concat.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mcpu=corei7 -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+
+define void @foo(<4 x float> %in, <4 x i8>* %out) {
+ %t0 = fptosi <4 x float> %in to <4 x i32>
+ %t1 = trunc <4 x i32> %t0 to <4 x i16>
+ %t2 = shufflevector <4 x i16> %t1, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %t3 = trunc <8 x i16> %t2 to <8 x i8>
+ %t4 = shufflevector <8 x i8> %t3, <8 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %t5 = insertelement <4 x i8> %t4, i8 -1, i32 3
+ store <4 x i8> %t5, <4 x i8>* %out
+ ret void
+; CHECK: foo
+; CHECK: cvttps2dq
+; CHECK-NOT: pextrd
+; CHECK: pshufb
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/pr14090.ll b/test/CodeGen/X86/pr14090.ll
new file mode 100644
index 0000000000..d76b912fd8
--- /dev/null
+++ b/test/CodeGen/X86/pr14090.ll
@@ -0,0 +1,76 @@
+; RUN: llc < %s -march=x86-64 -print-before=stack-coloring -print-after=stack-coloring >%t 2>&1 && FileCheck <%t %s
+
+define void @foo(i64* %retval.i, i32 %call, i32* %.ph.i80, i32 %fourteen, i32* %out.lo, i32* %out.hi) nounwind align 2 {
+entry:
+ %_Tmp.i39 = alloca i64, align 8
+ %retval.i33 = alloca i64, align 8
+ %_Tmp.i = alloca i64, align 8
+ %retval.i.i = alloca i64, align 8
+ %_First.i = alloca i64, align 8
+
+ %0 = load i64* %retval.i, align 8
+
+ %1 = load i64* %retval.i, align 8
+
+ %_Tmp.i39.0.cast73 = bitcast i64* %_Tmp.i39 to i8*
+ call void @llvm.lifetime.start(i64 8, i8* %_Tmp.i39.0.cast73)
+ store i64 %1, i64* %_Tmp.i39, align 8
+ %cmp.i.i.i40 = icmp slt i32 %call, 0
+ %2 = lshr i64 %1, 32
+ %3 = trunc i64 %2 to i32
+ %sub.i.i.i44 = sub i32 0, %call
+ %cmp2.i.i.i45 = icmp ult i32 %3, %sub.i.i.i44
+ %or.cond.i.i.i46 = and i1 %cmp.i.i.i40, %cmp2.i.i.i45
+ %add.i.i.i47 = add i32 %3, %call
+ %sub5.i.i.i48 = lshr i32 %add.i.i.i47, 5
+ %trunc.i50 = trunc i64 %1 to i32
+ %inttoptr.i51 = inttoptr i32 %trunc.i50 to i32*
+ %add61617.i.i.i52 = or i32 %sub5.i.i.i48, -134217728
+ %add61617.i.sub5.i.i.i53 = select i1 %or.cond.i.i.i46, i32 %add61617.i.i.i52, i32 %sub5.i.i.i48
+ %storemerge2.i.i54 = getelementptr inbounds i32* %inttoptr.i51, i32 %add61617.i.sub5.i.i.i53
+ %_Tmp.i39.0.cast74 = bitcast i64* %_Tmp.i39 to i32**
+ store i32* %storemerge2.i.i54, i32** %_Tmp.i39.0.cast74, align 8
+ %storemerge.i.i55 = and i32 %add.i.i.i47, 31
+ %_Tmp.i39.4.raw_idx = getelementptr inbounds i8* %_Tmp.i39.0.cast73, i32 4
+ %_Tmp.i39.4.cast = bitcast i8* %_Tmp.i39.4.raw_idx to i32*
+ store i32 %storemerge.i.i55, i32* %_Tmp.i39.4.cast, align 4
+ %srcval.i56 = load i64* %_Tmp.i39, align 8
+ call void @llvm.lifetime.end(i64 8, i8* %_Tmp.i39.0.cast73)
+
+; CHECK: Before Merge disjoint stack slots
+; CHECK: [[PREFIX15:MOV64mr.*<fi#]]{{[0-9]}}[[SUFFIX15:.*;]] mem:ST8[%fifteen]
+; CHECK: [[PREFIX87:MOV32mr.*;]] mem:ST4[%sunkaddr87]
+
+; CHECK: After Merge disjoint stack slots
+; CHECK: [[PREFIX15]]{{[0-9]}}[[SUFFIX15]] mem:ST8[%_Tmp.i39]
+; CHECK: [[PREFIX87]] mem:ST4[<unknown>]
+
+ %fifteen = bitcast i64* %retval.i.i to i32**
+ %sixteen = bitcast i64* %retval.i.i to i8*
+ call void @llvm.lifetime.start(i64 8, i8* %sixteen)
+ store i32* %.ph.i80, i32** %fifteen, align 8, !tbaa !0
+ %sunkaddr = ptrtoint i64* %retval.i.i to i32
+ %sunkaddr86 = add i32 %sunkaddr, 4
+ %sunkaddr87 = inttoptr i32 %sunkaddr86 to i32*
+ store i32 %fourteen, i32* %sunkaddr87, align 4, !tbaa !3
+ %seventeen = load i64* %retval.i.i, align 8
+ call void @llvm.lifetime.end(i64 8, i8* %sixteen)
+ %eighteen = lshr i64 %seventeen, 32
+ %nineteen = trunc i64 %eighteen to i32
+ %shl.i.i.i = shl i32 1, %nineteen
+
+ store i32 %shl.i.i.i, i32* %out.lo, align 8
+ store i32 %nineteen, i32* %out.hi, align 8
+
+ ret void
+}
+
+declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
+
+declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"any pointer", metadata !1}
+!4 = metadata !{metadata !"vtable pointer", metadata !2}
diff --git a/test/CodeGen/X86/sjlj.ll b/test/CodeGen/X86/sjlj.ll
index d594e98299..681db00943 100644
--- a/test/CodeGen/X86/sjlj.ll
+++ b/test/CodeGen/X86/sjlj.ll
@@ -1,5 +1,7 @@
; RUN: llc < %s -mtriple=i386-pc-linux -mcpu=corei7 -relocation-model=static | FileCheck --check-prefix=X86 %s
-; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=corei7 | FileCheck --check-prefix=X64 %s
+; RUN: llc < %s -mtriple=i386-pc-linux -mcpu=corei7 -relocation-model=pic | FileCheck --check-prefix=PIC86 %s
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=corei7 -relocation-model=static | FileCheck --check-prefix=X64 %s
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=corei7 -relocation-model=pic | FileCheck --check-prefix=PIC64 %s
@buf = internal global [5 x i8*] zeroinitializer
@@ -20,14 +22,26 @@ define i32 @sj0() nounwind {
ret i32 %r
; X86: sj0
; x86: movl %ebp, buf
-; x86: movl ${{.*LBB.*}}, buf+4
; X86: movl %esp, buf+8
+; x86: movl ${{.*LBB.*}}, buf+4
; X86: ret
+; PIC86: sj0
+; PIC86: movl %ebp, buf@GOTOFF(%[[GOT:.*]])
+; PIC86: movl %esp, buf@GOTOFF+8(%[[GOT]])
+; PIC86: leal {{.*LBB.*}}@GOTOFF(%[[GOT]]), %[[LREG:.*]]
+; PIC86: movl %[[LREG]], buf@GOTOFF+4
+; PIC86: ret
; X64: sj0
; x64: movq %rbp, buf(%rip)
; x64: movq ${{.*LBB.*}}, buf+8(%rip)
; X64: movq %rsp, buf+16(%rip)
; X64: ret
+; PIC64: sj0
+; PIC64: movq %rbp, buf(%rip)
+; PIC64: movq %rsp, buf+16(%rip)
+; PIC64: leaq {{.*LBB.*}}(%rip), %[[LREG:.*]]
+; PIC64: movq %[[LREG]], buf+8(%rip)
+; PIC64: ret
}
define void @lj0() nounwind {
diff --git a/test/CodeGen/X86/trunc-fp2int.ll b/test/CodeGen/X86/trunc-fp2int.ll
deleted file mode 100644
index 792af16c95..0000000000
--- a/test/CodeGen/X86/trunc-fp2int.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7 | FileCheck %s
-
-define <4 x i8> @bar(<4 x float> %in) nounwind readnone alwaysinline {
- %1 = fptoui <4 x float> %in to <4 x i8>
- ret <4 x i8> %1
-; CHECK: bar
-; CHECK: cvttps2dq
-}
-define <4 x i8> @foo(<4 x float> %in) nounwind readnone alwaysinline {
- %1 = fptoui <4 x float> %in to <4 x i32>
- %2 = trunc <4 x i32> %1 to <4 x i16>
- %3 = shufflevector <4 x i16> %2, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
- %4 = trunc <8 x i16> %3 to <8 x i8>
- %5 = shufflevector <8 x i8> %4, <8 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- ret <4 x i8> %5
-; CHECK: foo
-; CHECK: cvttps2dq
-}
diff --git a/test/Transforms/InstCombine/strcpy-1.ll b/test/Transforms/InstCombine/strcpy-1.ll
new file mode 100644
index 0000000000..b6cf048b2a
--- /dev/null
+++ b/test/Transforms/InstCombine/strcpy-1.ll
@@ -0,0 +1,45 @@
+; Test that the strcpy library call simplifier works correctly.
+; rdar://6839935
+; RUN: opt < %s -instcombine -S | FileCheck %s
+;
+; This transformation requires the pointer size, as it assumes that size_t is
+; the size of a pointer.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+
+@hello = constant [6 x i8] c"hello\00"
+@a = common global [32 x i8] zeroinitializer, align 1
+@b = common global [32 x i8] zeroinitializer, align 1
+
+declare i8* @strcpy(i8*, i8*)
+
+define void @test_simplify1() {
+; CHECK: @test_simplify1
+
+ %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+ %src = getelementptr [6 x i8]* @hello, i32 0, i32 0
+
+ call i8* @strcpy(i8* %dst, i8* %src)
+; CHECK: @llvm.memcpy.p0i8.p0i8.i32
+ ret void
+}
+
+define i8* @test_simplify2() {
+; CHECK: @test_simplify2
+
+ %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+
+ %ret = call i8* @strcpy(i8* %dst, i8* %dst)
+; CHECK: ret i8* getelementptr inbounds ([32 x i8]* @a, i32 0, i32 0)
+ ret i8* %ret
+}
+
+define i8* @test_no_simplify1() {
+; CHECK: @test_no_simplify1
+
+ %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+ %src = getelementptr [32 x i8]* @b, i32 0, i32 0
+
+ %ret = call i8* @strcpy(i8* %dst, i8* %src)
+; CHECK: call i8* @strcpy
+ ret i8* %ret
+}
diff --git a/test/Transforms/InstCombine/strcpy-2.ll b/test/Transforms/InstCombine/strcpy-2.ll
new file mode 100644
index 0000000000..779e9fdd95
--- /dev/null
+++ b/test/Transforms/InstCombine/strcpy-2.ll
@@ -0,0 +1,22 @@
+; Test that the strcpy library call simplifier works correctly.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+;
+; This transformation requires the pointer size, as it assumes that size_t is
+; the size of a pointer.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+
+@hello = constant [6 x i8] c"hello\00"
+@a = common global [32 x i8] zeroinitializer, align 1
+
+declare i16* @strcpy(i8*, i8*)
+
+define void @test_no_simplify1() {
+; CHECK: @test_no_simplify1
+
+ %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+ %src = getelementptr [6 x i8]* @hello, i32 0, i32 0
+
+ call i16* @strcpy(i8* %dst, i8* %src)
+; CHECK: call i16* @strcpy
+ ret void
+}
diff --git a/test/Transforms/InstCombine/strcpy_chk-1.ll b/test/Transforms/InstCombine/strcpy_chk-1.ll
index c03e8a348b..3e48f4fd30 100644
--- a/test/Transforms/InstCombine/strcpy_chk-1.ll
+++ b/test/Transforms/InstCombine/strcpy_chk-1.ll
@@ -7,16 +7,16 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
@a = common global [60 x i8] zeroinitializer, align 1
@b = common global [60 x i8] zeroinitializer, align 1
-@.str = private constant [8 x i8] c"abcdefg\00"
+@.str = private constant [12 x i8] c"abcdefghijk\00"
; Check cases where slen >= strlen (src).
define void @test_simplify1() {
; CHECK: @test_simplify1
%dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
- %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0
+ %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
-; CHECK-NEXT: call i8* @strcpy
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32
call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 60)
ret void
}
@@ -24,19 +24,19 @@ define void @test_simplify1() {
define void @test_simplify2() {
; CHECK: @test_simplify2
%dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
- %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0
+ %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
-; CHECK-NEXT: call i8* @strcpy
- call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 8)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32
+ call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 12)
ret void
}
define void @test_simplify3() {
; CHECK: @test_simplify3
%dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
- %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0
+ %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
-; CHECK-NEXT: call i8* @strcpy
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32
call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 -1)
ret void
}
@@ -53,36 +53,42 @@ define void @test_simplify4() {
ret void
}
-define void @test_no_simplify1() {
-; CHECK: @test_no_simplify1
+; Check case where the string length is not constant.
+
+define void @test_simplify5() {
+; CHECK: @test_simplify5
%dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
- %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0
+ %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
-; CHECK-NEXT: call i8* @__strcpy_chk
- call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 8)
+; CHECK: @__memcpy_chk
+ %len = call i32 @llvm.objectsize.i32(i8* %dst, i1 false)
+ call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 %len)
ret void
}
-; Check case were slen < strlen (src).
+; Check case where the source and destination are the same.
-define void @test_no_simplify2() {
-; CHECK: @test_no_simplify2
+define i8* @test_simplify6() {
+; CHECK: @test_simplify6
%dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
- %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0
-; CHECK-NEXT: call i8* @__strcpy_chk
- call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 3)
- ret void
+; CHECK: getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0)
+ %len = call i32 @llvm.objectsize.i32(i8* %dst, i1 false)
+ %ret = call i8* @__strcpy_chk(i8* %dst, i8* %dst, i32 %len)
+ ret i8* %ret
}
-define void @test_no_simplify3() {
-; CHECK: @test_no_simplify3
+; Check case where slen < strlen (src).
+
+define void @test_no_simplify1() {
+; CHECK: @test_no_simplify1
%dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
- %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0
+ %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0
; CHECK-NEXT: call i8* @__strcpy_chk
- call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 0)
+ call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 8)
ret void
}
declare i8* @__strcpy_chk(i8*, i8*, i32) nounwind
+declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readonly
diff --git a/test/Transforms/LoopVectorize/gcc-examples.ll b/test/Transforms/LoopVectorize/gcc-examples.ll
new file mode 100644
index 0000000000..4e9e6f940e
--- /dev/null
+++ b/test/Transforms/LoopVectorize/gcc-examples.ll
@@ -0,0 +1,649 @@
+; RUN: opt < %s -loop-vectorize -dce -instcombine -licm -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@b = common global [2048 x i32] zeroinitializer, align 16
+@c = common global [2048 x i32] zeroinitializer, align 16
+@a = common global [2048 x i32] zeroinitializer, align 16
+@G = common global [32 x [1024 x i32]] zeroinitializer, align 16
+@ub = common global [1024 x i32] zeroinitializer, align 16
+@uc = common global [1024 x i32] zeroinitializer, align 16
+@d = common global [2048 x i32] zeroinitializer, align 16
+@fa = common global [1024 x float] zeroinitializer, align 16
+@fb = common global [1024 x float] zeroinitializer, align 16
+@ic = common global [1024 x i32] zeroinitializer, align 16
+@da = common global [1024 x float] zeroinitializer, align 16
+@db = common global [1024 x float] zeroinitializer, align 16
+@dc = common global [1024 x float] zeroinitializer, align 16
+@dd = common global [1024 x float] zeroinitializer, align 16
+@dj = common global [1024 x i32] zeroinitializer, align 16
+
+;CHECK: @example1
+;CHECK: load <4 x i32>
+;CHECK: add <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret void
+define void @example1() nounwind uwtable ssp {
+ br label %1
+
+; <label>:1 ; preds = %1, %0
+ %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+ %2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv
+ %3 = load i32* %2, align 4
+ %4 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv
+ %5 = load i32* %4, align 4
+ %6 = add nsw i32 %5, %3
+ %7 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+ store i32 %6, i32* %7, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 256
+ br i1 %exitcond, label %8, label %1
+
+; <label>:8 ; preds = %1
+ ret void
+}
+
+;CHECK: @example2
+;CHECK: store <4 x i32>
+;CHECK: ret void
+define void @example2(i32 %n, i32 %x) nounwind uwtable ssp {
+ %1 = icmp sgt i32 %n, 0
+ br i1 %1, label %.lr.ph5, label %.preheader
+
+..preheader_crit_edge: ; preds = %.lr.ph5
+ %phitmp = sext i32 %n to i64
+ br label %.preheader
+
+.preheader: ; preds = %..preheader_crit_edge, %0
+ %i.0.lcssa = phi i64 [ %phitmp, %..preheader_crit_edge ], [ 0, %0 ]
+ %2 = icmp eq i32 %n, 0
+ br i1 %2, label %._crit_edge, label %.lr.ph
+
+.lr.ph5: ; preds = %0, %.lr.ph5
+ %indvars.iv6 = phi i64 [ %indvars.iv.next7, %.lr.ph5 ], [ 0, %0 ]
+ %3 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv6
+ store i32 %x, i32* %3, align 4
+ %indvars.iv.next7 = add i64 %indvars.iv6, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next7 to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %..preheader_crit_edge, label %.lr.ph5
+
+.lr.ph: ; preds = %.preheader, %.lr.ph
+ %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ %i.0.lcssa, %.preheader ]
+ %.02 = phi i32 [ %4, %.lr.ph ], [ %n, %.preheader ]
+ %4 = add nsw i32 %.02, -1
+ %5 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv
+ %6 = load i32* %5, align 4
+ %7 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv
+ %8 = load i32* %7, align 4
+ %9 = and i32 %8, %6
+ %10 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+ store i32 %9, i32* %10, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %11 = icmp eq i32 %4, 0
+ br i1 %11, label %._crit_edge, label %.lr.ph
+
+._crit_edge: ; preds = %.lr.ph, %.preheader
+ ret void
+}
+
+; We can't vectorize this loop because it has non constant loop bounds.
+;CHECK: @example3
+;CHECK-NOT: <4 x i32>
+;CHECK: ret void
+define void @example3(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture %q) nounwind uwtable ssp {
+ %1 = icmp eq i32 %n, 0
+ br i1 %1, label %._crit_edge, label %.lr.ph
+
+.lr.ph: ; preds = %0, %.lr.ph
+ %.05 = phi i32 [ %2, %.lr.ph ], [ %n, %0 ]
+ %.014 = phi i32* [ %5, %.lr.ph ], [ %p, %0 ]
+ %.023 = phi i32* [ %3, %.lr.ph ], [ %q, %0 ]
+ %2 = add nsw i32 %.05, -1
+ %3 = getelementptr inbounds i32* %.023, i64 1
+ %4 = load i32* %.023, align 16
+ %5 = getelementptr inbounds i32* %.014, i64 1
+ store i32 %4, i32* %.014, align 16
+ %6 = icmp eq i32 %2, 0
+ br i1 %6, label %._crit_edge, label %.lr.ph
+
+._crit_edge: ; preds = %.lr.ph, %0
+ ret void
+}
+
+;CHECK: @example4
+;CHECK: load <4 x i32>
+;CHECK: ret void
+define void @example4(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture %q) nounwind uwtable ssp {
+ %1 = add nsw i32 %n, -1
+ %2 = icmp eq i32 %n, 0
+ br i1 %2, label %.preheader4, label %.lr.ph10
+
+.preheader4: ; preds = %0
+ %3 = icmp sgt i32 %1, 0
+ br i1 %3, label %.lr.ph6, label %._crit_edge
+
+.lr.ph10: ; preds = %0, %.lr.ph10
+ %4 = phi i32 [ %9, %.lr.ph10 ], [ %1, %0 ]
+ %.018 = phi i32* [ %8, %.lr.ph10 ], [ %p, %0 ]
+ %.027 = phi i32* [ %5, %.lr.ph10 ], [ %q, %0 ]
+ %5 = getelementptr inbounds i32* %.027, i64 1
+ %6 = load i32* %.027, align 16
+ %7 = add nsw i32 %6, 5
+ %8 = getelementptr inbounds i32* %.018, i64 1
+ store i32 %7, i32* %.018, align 16
+ %9 = add nsw i32 %4, -1
+ %10 = icmp eq i32 %4, 0
+ br i1 %10, label %._crit_edge, label %.lr.ph10
+
+.preheader: ; preds = %.lr.ph6
+ br i1 %3, label %.lr.ph, label %._crit_edge
+
+.lr.ph6: ; preds = %.preheader4, %.lr.ph6
+ %indvars.iv11 = phi i64 [ %indvars.iv.next12, %.lr.ph6 ], [ 0, %.preheader4 ]
+ %indvars.iv.next12 = add i64 %indvars.iv11, 1
+ %11 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv.next12
+ %12 = load i32* %11, align 4
+ %13 = add nsw i64 %indvars.iv11, 3
+ %14 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %13
+ %15 = load i32* %14, align 4
+ %16 = add nsw i32 %15, %12
+ %17 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv11
+ store i32 %16, i32* %17, align 4
+ %lftr.wideiv13 = trunc i64 %indvars.iv.next12 to i32
+ %exitcond14 = icmp eq i32 %lftr.wideiv13, %1
+ br i1 %exitcond14, label %.preheader, label %.lr.ph6
+
+.lr.ph: ; preds = %.preheader, %.lr.ph
+ %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.preheader ]
+ %18 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+ %19 = load i32* %18, align 4
+ %20 = icmp sgt i32 %19, 4
+ %21 = select i1 %20, i32 4, i32 0
+ %22 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv
+ store i32 %21, i32* %22, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %1
+ br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge: ; preds = %.lr.ph10, %.preheader4, %.lr.ph, %.preheader
+ ret void
+}
+
+;CHECK: @example8
+;CHECK: store <4 x i32>
+;CHECK: ret void
+define void @example8(i32 %x) nounwind uwtable ssp {
+ br label %.preheader
+
+.preheader: ; preds = %3, %0
+ %indvars.iv3 = phi i64 [ 0, %0 ], [ %indvars.iv.next4, %3 ]
+ br label %1
+
+; <label>:1 ; preds = %1, %.preheader
+ %indvars.iv = phi i64 [ 0, %.preheader ], [ %indvars.iv.next, %1 ]
+ %2 = getelementptr inbounds [32 x [1024 x i32]]* @G, i64 0, i64 %indvars.iv3, i64 %indvars.iv
+ store i32 %x, i32* %2, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 1024
+ br i1 %exitcond, label %3, label %1
+
+; <label>:3 ; preds = %1
+ %indvars.iv.next4 = add i64 %indvars.iv3, 1
+ %lftr.wideiv5 = trunc i64 %indvars.iv.next4 to i32
+ %exitcond6 = icmp eq i32 %lftr.wideiv5, 32
+ br i1 %exitcond6, label %4, label %.preheader
+
+; <label>:4 ; preds = %3
+ ret void
+}
+
+; We can't vectorize because it has a reduction variable.
+;CHECK: @example9
+;CHECK-NOT: <4 x i32>
+;CHECK: ret i32
+define i32 @example9() nounwind uwtable readonly ssp {
+ br label %1
+
+; <label>:1 ; preds = %1, %0
+ %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+ %diff.01 = phi i32 [ 0, %0 ], [ %7, %1 ]
+ %2 = getelementptr inbounds [1024 x i32]* @ub, i64 0, i64 %indvars.iv
+ %3 = load i32* %2, align 4
+ %4 = getelementptr inbounds [1024 x i32]* @uc, i64 0, i64 %indvars.iv
+ %5 = load i32* %4, align 4
+ %6 = add i32 %3, %diff.01
+ %7 = sub i32 %6, %5
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 1024
+ br i1 %exitcond, label %8, label %1
+
+; <label>:8 ; preds = %1
+ ret i32 %7
+}
+
+;CHECK: @example10a
+;CHECK: load <4 x i16>
+;CHECK: add <4 x i16>
+;CHECK: store <4 x i16>
+;CHECK: ret void
+define void @example10a(i16* noalias nocapture %sa, i16* noalias nocapture %sb, i16* noalias nocapture %sc, i32* noalias nocapture %ia, i32* noalias nocapture %ib, i32* noalias nocapture %ic) nounwind uwtable ssp {
+ br label %1
+
+; <label>:1 ; preds = %1, %0
+ %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+ %2 = getelementptr inbounds i32* %ib, i64 %indvars.iv
+ %3 = load i32* %2, align 4
+ %4 = getelementptr inbounds i32* %ic, i64 %indvars.iv
+ %5 = load i32* %4, align 4
+ %6 = add nsw i32 %5, %3
+ %7 = getelementptr inbounds i32* %ia, i64 %indvars.iv
+ store i32 %6, i32* %7, align 4
+ %8 = getelementptr inbounds i16* %sb, i64 %indvars.iv
+ %9 = load i16* %8, align 2
+ %10 = getelementptr inbounds i16* %sc, i64 %indvars.iv
+ %11 = load i16* %10, align 2
+ %12 = add i16 %11, %9
+ %13 = getelementptr inbounds i16* %sa, i64 %indvars.iv
+ store i16 %12, i16* %13, align 2
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 1024
+ br i1 %exitcond, label %14, label %1
+
+; <label>:14 ; preds = %1
+ ret void
+}
+
+;CHECK: @example10b
+;CHECK: load <4 x i16>
+;CHECK: sext <4 x i16>
+;CHECK: store <4 x i32>
+;CHECK: ret void
+define void @example10b(i16* noalias nocapture %sa, i16* noalias nocapture %sb, i16* noalias nocapture %sc, i32* noalias nocapture %ia, i32* noalias nocapture %ib, i32* noalias nocapture %ic) nounwind uwtable ssp {
+ br label %1
+
+; <label>:1 ; preds = %1, %0
+ %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+ %2 = getelementptr inbounds i16* %sb, i64 %indvars.iv
+ %3 = load i16* %2, align 2
+ %4 = sext i16 %3 to i32
+ %5 = getelementptr inbounds i32* %ia, i64 %indvars.iv
+ store i32 %4, i32* %5, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 1024
+ br i1 %exitcond, label %6, label %1
+
+; <label>:6 ; preds = %1
+ ret void
+}
+
+;CHECK: @example11
+;CHECK: load i32
+;CHECK: load i32
+;CHECK: load i32
+;CHECK: load i32
+;CHECK: insertelement
+;CHECK: insertelement
+;CHECK: insertelement
+;CHECK: insertelement
+;CHECK: ret void
+define void @example11() nounwind uwtable ssp {
+ br label %1
+
+; <label>:1 ; preds = %1, %0
+ %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+ %2 = shl nsw i64 %indvars.iv, 1
+ %3 = or i64 %2, 1
+ %4 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %3
+ %5 = load i32* %4, align 4
+ %6 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %3
+ %7 = load i32* %6, align 4
+ %8 = mul nsw i32 %7, %5
+ %9 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %2
+ %10 = load i32* %9, align 8
+ %11 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %2
+ %12 = load i32* %11, align 8
+ %13 = mul nsw i32 %12, %10
+ %14 = sub nsw i32 %8, %13
+ %15 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+ store i32 %14, i32* %15, align 4
+ %16 = mul nsw i32 %7, %10
+ %17 = mul nsw i32 %12, %5
+ %18 = add nsw i32 %17, %16
+ %19 = getelementptr inbounds [2048 x i32]* @d, i64 0, i64 %indvars.iv
+ store i32 %18, i32* %19, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 512
+ br i1 %exitcond, label %20, label %1
+
+; <label>:20 ; preds = %1
+ ret void
+}
+
+;CHECK: @example12
+;CHECK: trunc <4 x i64>
+;CHECK: store <4 x i32>
+;CHECK: ret void
+define void @example12() nounwind uwtable ssp {
+ br label %1
+
+; <label>:1 ; preds = %1, %0
+ %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+ %2 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+ %3 = trunc i64 %indvars.iv to i32
+ store i32 %3, i32* %2, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 1024
+ br i1 %exitcond, label %4, label %1
+
+; <label>:4 ; preds = %1
+ ret void
+}
+
+; Can't vectorize because of reductions.
+;CHECK: @example13
+;CHECK-NOT: <4 x i32>
+;CHECK: ret void
+define void @example13(i32** nocapture %A, i32** nocapture %B, i32* nocapture %out) nounwind uwtable ssp {
+ br label %.preheader
+
+.preheader: ; preds = %14, %0
+ %indvars.iv4 = phi i64 [ 0, %0 ], [ %indvars.iv.next5, %14 ]
+ %1 = getelementptr inbounds i32** %A, i64 %indvars.iv4
+ %2 = load i32** %1, align 8
+ %3 = getelementptr inbounds i32** %B, i64 %indvars.iv4
+ %4 = load i32** %3, align 8
+ br label %5
+
+; <label>:5 ; preds = %.preheader, %5
+ %indvars.iv = phi i64 [ 0, %.preheader ], [ %indvars.iv.next, %5 ]
+ %diff.02 = phi i32 [ 0, %.preheader ], [ %11, %5 ]
+ %6 = getelementptr inbounds i32* %2, i64 %indvars.iv
+ %7 = load i32* %6, align 4
+ %8 = getelementptr inbounds i32* %4, i64 %indvars.iv
+ %9 = load i32* %8, align 4
+ %10 = add i32 %7, %diff.02
+ %11 = sub i32 %10, %9
+ %indvars.iv.next = add i64 %indvars.iv, 8
+ %12 = trunc i64 %indvars.iv.next to i32
+ %13 = icmp slt i32 %12, 1024
+ br i1 %13, label %5, label %14
+
+; <label>:14 ; preds = %5
+ %15 = getelementptr inbounds i32* %out, i64 %indvars.iv4
+ store i32 %11, i32* %15, align 4
+ %indvars.iv.next5 = add i64 %indvars.iv4, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next5 to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 32
+ br i1 %exitcond, label %16, label %.preheader
+
+; <label>:16 ; preds = %14
+ ret void
+}
+
+; Can't vectorize because of reductions.
+;CHECK: @example14
+;CHECK-NOT: <4 x i32>
+;CHECK: ret void
+define void @example14(i32** nocapture %in, i32** nocapture %coeff, i32* nocapture %out) nounwind uwtable ssp {
+.preheader3:
+ br label %.preheader
+
+.preheader: ; preds = %11, %.preheader3
+ %indvars.iv7 = phi i64 [ 0, %.preheader3 ], [ %indvars.iv.next8, %11 ]
+ %sum.05 = phi i32 [ 0, %.preheader3 ], [ %10, %11 ]
+ br label %0
+
+; <label>:0 ; preds = %0, %.preheader
+ %indvars.iv = phi i64 [ 0, %.preheader ], [ %indvars.iv.next, %0 ]
+ %sum.12 = phi i32 [ %sum.05, %.preheader ], [ %10, %0 ]
+ %1 = getelementptr inbounds i32** %in, i64 %indvars.iv
+ %2 = load i32** %1, align 8
+ %3 = getelementptr inbounds i32* %2, i64 %indvars.iv7
+ %4 = load i32* %3, align 4
+ %5 = getelementptr inbounds i32** %coeff, i64 %indvars.iv
+ %6 = load i32** %5, align 8
+ %7 = getelementptr inbounds i32* %6, i64 %indvars.iv7
+ %8 = load i32* %7, align 4
+ %9 = mul nsw i32 %8, %4
+ %10 = add nsw i32 %9, %sum.12
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 1024
+ br i1 %exitcond, label %11, label %0
+
+; <label>:11 ; preds = %0
+ %indvars.iv.next8 = add i64 %indvars.iv7, 1
+ %lftr.wideiv9 = trunc i64 %indvars.iv.next8 to i32
+ %exitcond10 = icmp eq i32 %lftr.wideiv9, 32
+ br i1 %exitcond10, label %.preheader3.1, label %.preheader
+
+.preheader3.1: ; preds = %11
+ store i32 %10, i32* %out, align 4
+ br label %.preheader.1
+
+.preheader.1: ; preds = %24, %.preheader3.1
+ %indvars.iv7.1 = phi i64 [ 0, %.preheader3.1 ], [ %indvars.iv.next8.1, %24 ]
+ %sum.05.1 = phi i32 [ 0, %.preheader3.1 ], [ %23, %24 ]
+ br label %12
+
+; <label>:12 ; preds = %12, %.preheader.1
+ %indvars.iv.1 = phi i64 [ 0, %.preheader.1 ], [ %13, %12 ]
+ %sum.12.1 = phi i32 [ %sum.05.1, %.preheader.1 ], [ %23, %12 ]
+ %13 = add nsw i64 %indvars.iv.1, 1
+ %14 = getelementptr inbounds i32** %in, i64 %13
+ %15 = load i32** %14, align 8
+ %16 = getelementptr inbounds i32* %15, i64 %indvars.iv7.1
+ %17 = load i32* %16, align 4
+ %18 = getelementptr inbounds i32** %coeff, i64 %indvars.iv.1
+ %19 = load i32** %18, align 8
+ %20 = getelementptr inbounds i32* %19, i64 %indvars.iv7.1
+ %21 = load i32* %20, align 4
+ %22 = mul nsw i32 %21, %17
+ %23 = add nsw i32 %22, %sum.12.1
+ %lftr.wideiv.1 = trunc i64 %13 to i32
+ %exitcond.1 = icmp eq i32 %lftr.wideiv.1, 1024
+ br i1 %exitcond.1, label %24, label %12
+
+; <label>:24 ; preds = %12
+ %indvars.iv.next8.1 = add i64 %indvars.iv7.1, 1
+ %lftr.wideiv9.1 = trunc i64 %indvars.iv.next8.1 to i32
+ %exitcond10.1 = icmp eq i32 %lftr.wideiv9.1, 32
+ br i1 %exitcond10.1, label %.preheader3.2, label %.preheader.1
+
+.preheader3.2: ; preds = %24
+ %25 = getelementptr inbounds i32* %out, i64 1
+ store i32 %23, i32* %25, align 4
+ br label %.preheader.2
+
+.preheader.2: ; preds = %38, %.preheader3.2
+ %indvars.iv7.2 = phi i64 [ 0, %.preheader3.2 ], [ %indvars.iv.next8.2, %38 ]
+ %sum.05.2 = phi i32 [ 0, %.preheader3.2 ], [ %37, %38 ]
+ br label %26
+
+; <label>:26 ; preds = %26, %.preheader.2
+ %indvars.iv.2 = phi i64 [ 0, %.preheader.2 ], [ %indvars.iv.next.2, %26 ]
+ %sum.12.2 = phi i32 [ %sum.05.2, %.preheader.2 ], [ %37, %26 ]
+ %27 = add nsw i64 %indvars.iv.2, 2
+ %28 = getelementptr inbounds i32** %in, i64 %27
+ %29 = load i32** %28, align 8
+ %30 = getelementptr inbounds i32* %29, i64 %indvars.iv7.2
+ %31 = load i32* %30, align 4
+ %32 = getelementptr inbounds i32** %coeff, i64 %indvars.iv.2
+ %33 = load i32** %32, align 8
+ %34 = getelementptr inbounds i32* %33, i64 %indvars.iv7.2
+ %35 = load i32* %34, align 4
+ %36 = mul nsw i32 %35, %31
+ %37 = add nsw i32 %36, %sum.12.2
+ %indvars.iv.next.2 = add i64 %indvars.iv.2, 1
+ %lftr.wideiv.2 = trunc i64 %indvars.iv.next.2 to i32
+ %exitcond.2 = icmp eq i32 %lftr.wideiv.2, 1024
+ br i1 %exitcond.2, label %38, label %26
+
+; <label>:38 ; preds = %26
+ %indvars.iv.next8.2 = add i64 %indvars.iv7.2, 1
+ %lftr.wideiv9.2 = trunc i64 %indvars.iv.next8.2 to i32
+ %exitcond10.2 = icmp eq i32 %lftr.wideiv9.2, 32
+ br i1 %exitcond10.2, label %.preheader3.3, label %.preheader.2
+
+.preheader3.3: ; preds = %38
+ %39 = getelementptr inbounds i32* %out, i64 2
+ store i32 %37, i32* %39, align 4
+ br label %.preheader.3
+
+.preheader.3: ; preds = %52, %.preheader3.3
+ %indvars.iv7.3 = phi i64 [ 0, %.preheader3.3 ], [ %indvars.iv.next8.3, %52 ]
+ %sum.05.3 = phi i32 [ 0, %.preheader3.3 ], [ %51, %52 ]
+ br label %40
+
+; <label>:40 ; preds = %40, %.preheader.3
+ %indvars.iv.3 = phi i64 [ 0, %.preheader.3 ], [ %indvars.iv.next.3, %40 ]
+ %sum.12.3 = phi i32 [ %sum.05.3, %.preheader.3 ], [ %51, %40 ]
+ %41 = add nsw i64 %indvars.iv.3, 3
+ %42 = getelementptr inbounds i32** %in, i64 %41
+ %43 = load i32** %42, align 8
+ %44 = getelementptr inbounds i32* %43, i64 %indvars.iv7.3
+ %45 = load i32* %44, align 4
+ %46 = getelementptr inbounds i32** %coeff, i64 %indvars.iv.3
+ %47 = load i32** %46, align 8
+ %48 = getelementptr inbounds i32* %47, i64 %indvars.iv7.3
+ %49 = load i32* %48, align 4
+ %50 = mul nsw i32 %49, %45
+ %51 = add nsw i32 %50, %sum.12.3
+ %indvars.iv.next.3 = add i64 %indvars.iv.3, 1
+ %lftr.wideiv.3 = trunc i64 %indvars.iv.next.3 to i32
+ %exitcond.3 = icmp eq i32 %lftr.wideiv.3, 1024
+ br i1 %exitcond.3, label %52, label %40
+
+; <label>:52 ; preds = %40
+ %indvars.iv.next8.3 = add i64 %indvars.iv7.3, 1
+ %lftr.wideiv9.3 = trunc i64 %indvars.iv.next8.3 to i32
+ %exitcond10.3 = icmp eq i32 %lftr.wideiv9.3, 32
+ br i1 %exitcond10.3, label %53, label %.preheader.3
+
+; <label>:53 ; preds = %52
+ %54 = getelementptr inbounds i32* %out, i64 3
+ store i32 %51, i32* %54, align 4
+ ret void
+}
+
+; Can't vectorize because the src and dst pointers are not disjoint.
+;CHECK: @example21
+;CHECK-NOT: <4 x i32>
+;CHECK: ret i32
+define i32 @example21(i32* nocapture %b, i32 %n) nounwind uwtable readonly ssp {
+ %1 = icmp sgt i32 %n, 0
+ br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph: ; preds = %0
+ %2 = sext i32 %n to i64
+ br label %3
+
+; <label>:3 ; preds = %.lr.ph, %3
+ %indvars.iv = phi i64 [ %2, %.lr.ph ], [ %indvars.iv.next, %3 ]
+ %a.02 = phi i32 [ 0, %.lr.ph ], [ %6, %3 ]
+ %indvars.iv.next = add i64 %indvars.iv, -1
+ %4 = getelementptr inbounds i32* %b, i64 %indvars.iv.next
+ %5 = load i32* %4, align 4
+ %6 = add nsw i32 %5, %a.02
+ %7 = trunc i64 %indvars.iv.next to i32
+ %8 = icmp sgt i32 %7, 0
+ br i1 %8, label %3, label %._crit_edge
+
+._crit_edge: ; preds = %3, %0
+ %a.0.lcssa = phi i32 [ 0, %0 ], [ %6, %3 ]
+ ret i32 %a.0.lcssa
+}
+
+; Can't vectorize because there are multiple PHIs.
+;CHECK: @example23
+;CHECK-NOT: <4 x i32>
+;CHECK: ret void
+define void @example23(i16* nocapture %src, i32* nocapture %dst) nounwind uwtable ssp {
+ br label %1
+
+; <label>:1 ; preds = %1, %0
+ %.04 = phi i16* [ %src, %0 ], [ %2, %1 ]
+ %.013 = phi i32* [ %dst, %0 ], [ %6, %1 ]
+ %i.02 = phi i32 [ 0, %0 ], [ %7, %1 ]
+ %2 = getelementptr inbounds i16* %.04, i64 1
+ %3 = load i16* %.04, align 2
+ %4 = zext i16 %3 to i32
+ %5 = shl nuw nsw i32 %4, 7
+ %6 = getelementptr inbounds i32* %.013, i64 1
+ store i32 %5, i32* %.013, align 4
+ %7 = add nsw i32 %i.02, 1
+ %exitcond = icmp eq i32 %7, 256
+ br i1 %exitcond, label %8, label %1
+
+; <label>:8 ; preds = %1
+ ret void
+}
+
+;CHECK: @example24
+;CHECK: shufflevector <4 x i16>
+;CHECK: ret void
+define void @example24(i16 signext %x, i16 signext %y) nounwind uwtable ssp {
+ br label %1
+
+; <label>:1 ; preds = %1, %0
+ %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+ %2 = getelementptr inbounds [1024 x float]* @fa, i64 0, i64 %indvars.iv
+ %3 = load float* %2, align 4
+ %4 = getelementptr inbounds [1024 x float]* @fb, i64 0, i64 %indvars.iv
+ %5 = load float* %4, align 4
+ %6 = fcmp olt float %3, %5
+ %x.y = select i1 %6, i16 %x, i16 %y
+ %7 = sext i16 %x.y to i32
+ %8 = getelementptr inbounds [1024 x i32]* @ic, i64 0, i64 %indvars.iv
+ store i32 %7, i32* %8, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 1024
+ br i1 %exitcond, label %9, label %1
+
+; <label>:9 ; preds = %1
+ ret void
+}
+
+;CHECK: @example25
+;CHECK: and <4 x i1>
+;CHECK: zext <4 x i1>
+;CHECK: ret void
+define void @example25() nounwind uwtable ssp {
+ br label %1
+
+; <label>:1 ; preds = %1, %0
+ %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+ %2 = getelementptr inbounds [1024 x float]* @da, i64 0, i64 %indvars.iv
+ %3 = load float* %2, align 4
+ %4 = getelementptr inbounds [1024 x float]* @db, i64 0, i64 %indvars.iv
+ %5 = load float* %4, align 4
+ %6 = fcmp olt float %3, %5
+ %7 = getelementptr inbounds [1024 x float]* @dc, i64 0, i64 %indvars.iv
+ %8 = load float* %7, align 4
+ %9 = getelementptr inbounds [1024 x float]* @dd, i64 0, i64 %indvars.iv
+ %10 = load float* %9, align 4
+ %11 = fcmp olt float %8, %10
+ %12 = and i1 %6, %11
+ %13 = zext i1 %12 to i32
+ %14 = getelementptr inbounds [1024 x i32]* @dj, i64 0, i64 %indvars.iv
+ store i32 %13, i32* %14, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 1024
+ br i1 %exitcond, label %15, label %1
+
+; <label>:15 ; preds = %1
+ ret void
+}
+
diff --git a/test/Transforms/LoopVectorize/lit.local.cfg b/test/Transforms/LoopVectorize/lit.local.cfg
new file mode 100644
index 0000000000..19eebc0ac7
--- /dev/null
+++ b/test/Transforms/LoopVectorize/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopVectorize/non-const-n.ll b/test/Transforms/LoopVectorize/non-const-n.ll
new file mode 100644
index 0000000000..04c5c84a4f
--- /dev/null
+++ b/test/Transforms/LoopVectorize/non-const-n.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -loop-vectorize -dce -instcombine -licm -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@b = common global [2048 x i32] zeroinitializer, align 16
+@c = common global [2048 x i32] zeroinitializer, align 16
+@a = common global [2048 x i32] zeroinitializer, align 16
+
+;CHECK: @example1
+;CHECK: shl i32
+;CHECK: zext i32
+;CHECK: load <4 x i32>
+;CHECK: add <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret void
+define void @example1(i32 %n) nounwind uwtable ssp {
+ %n4 = shl i32 %n, 2
+ br label %1
+
+; <label>:1 ; preds = %1, %0
+ %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+ %2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv
+ %3 = load i32* %2, align 4
+ %4 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv
+ %5 = load i32* %4, align 4
+ %6 = add nsw i32 %5, %3
+ %7 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+ store i32 %6, i32* %7, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n4
+ br i1 %exitcond, label %8, label %1
+
+; <label>:8 ; preds = %1
+ ret void
+}
+
diff --git a/test/Transforms/SROA/basictest.ll b/test/Transforms/SROA/basictest.ll
index 644fda167d..03120f7a32 100644
--- a/test/Transforms/SROA/basictest.ll
+++ b/test/Transforms/SROA/basictest.ll
@@ -1063,3 +1063,23 @@ entry:
call void @llvm.lifetime.end(i64 -1, i8* %0)
ret void
}
+
+define void @PR14105({ [16 x i8] }* %ptr) {
+; Ensure that when rewriting the GEP index '-1' for this alloca we preserve is
+; sign as negative. We use a volatile memcpy to ensure promotion never actually
+; occurs.
+; CHECK: @PR14105
+
+entry:
+ %a = alloca { [16 x i8] }, align 8
+; CHECK: alloca [16 x i8], align 8
+
+ %gep = getelementptr inbounds { [16 x i8] }* %ptr, i64 -1
+; CHECK-NEXT: getelementptr inbounds { [16 x i8] }* %ptr, i64 -1, i32 0, i64 0
+
+ %cast1 = bitcast { [16 x i8 ] }* %gep to i8*
+ %cast2 = bitcast { [16 x i8 ] }* %a to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %cast1, i8* %cast2, i32 16, i32 8, i1 true)
+ ret void
+; CHECK: ret
+}
diff --git a/test/Transforms/SimplifyLibCalls/StrCpy.ll b/test/Transforms/SimplifyLibCalls/StrCpy.ll
deleted file mode 100644
index 83406ff8f8..0000000000
--- a/test/Transforms/SimplifyLibCalls/StrCpy.ll
+++ /dev/null
@@ -1,37 +0,0 @@
-; Test that the StrCpyOptimizer works correctly
-; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
-
-; This transformation requires the pointer size, as it assumes that size_t is
-; the size of a pointer.
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
-
-@hello = constant [6 x i8] c"hello\00"
-
-declare i8* @strcpy(i8*, i8*)
-
-declare i8* @__strcpy_chk(i8*, i8*, i32) nounwind
-
-declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readonly
-
-; rdar://6839935
-
-define i32 @t1() {
-; CHECK: @t1
- %target = alloca [1024 x i8]
- %arg1 = getelementptr [1024 x i8]* %target, i32 0, i32 0
- %arg2 = getelementptr [6 x i8]* @hello, i32 0, i32 0
- %rslt1 = call i8* @strcpy( i8* %arg1, i8* %arg2 )
-; CHECK: @llvm.memcpy.p0i8.p0i8.i32
- ret i32 0
-}
-
-define i32 @t2() {
-; CHECK: @t2
- %target = alloca [1024 x i8]
- %arg1 = getelementptr [1024 x i8]* %target, i32 0, i32 0
- %arg2 = getelementptr [6 x i8]* @hello, i32 0, i32 0
- %tmp1 = call i32 @llvm.objectsize.i32(i8* %arg1, i1 false)
- %rslt1 = call i8* @__strcpy_chk(i8* %arg1, i8* %arg2, i32 %tmp1)
-; CHECK: @__memcpy_chk
- ret i32 0
-}
diff --git a/test/lit.cfg b/test/lit.cfg
index dc37317ba9..f24a854dc7 100644
--- a/test/lit.cfg
+++ b/test/lit.cfg
@@ -152,7 +152,10 @@ config.target_triple += lit.valgrindTriple
# Provide a substition for those tests that need to run the jit to obtain data
# but simply want use the currently considered most reliable jit for platform
-defaultIsMCJIT='true' if 'arm' in config.target_triple else 'false'
+if 'arm' in config.target_triple:
+ defaultIsMCJIT = 'true'
+else:
+ defaultIsMCJIT = 'false'
config.substitutions.append( ('%defaultjit', '-use-mcjit='+defaultIsMCJIT) )
# Process jit implementation option
diff --git a/tools/bugpoint-passes/bugpoint.exports b/tools/bugpoint-passes/bugpoint.exports
index d8fdd6a576..e69de29bb2 100644
--- a/tools/bugpoint-passes/bugpoint.exports
+++ b/tools/bugpoint-passes/bugpoint.exports
@@ -1 +0,0 @@
-_ZN4llvm14BasicBlockPass14doFinalizationERNS_6ModuleE
diff --git a/tools/llc/llc.cpp b/tools/llc/llc.cpp
index d586b22120..a6c670a7fc 100644
--- a/tools/llc/llc.cpp
+++ b/tools/llc/llc.cpp
@@ -22,7 +22,6 @@
#include "llvm/Assembly/PrintModulePass.h"
#include "llvm/Support/DataStream.h" // @LOCALMOD
#include "llvm/Support/IRReader.h"
-#include "llvm/CodeGen/CommandFlags.h"
#include "llvm/CodeGen/IntrinsicLowering.h" // @LOCALMOD
#include "llvm/CodeGen/LinkAllAsmWriterComponents.h"
#include "llvm/CodeGen/LinkAllCodegenComponents.h"
@@ -84,6 +83,26 @@ OutputFilename("o", cl::desc("Output filename"), cl::value_desc("filename"));
static cl::opt<std::string>
MetadataTextFilename("metadata-text", cl::desc("Metadata as text, out filename"),
cl::value_desc("filename"));
+
+// Using bitcode streaming has a couple of ramifications. Primarily it means
+// that the module in the file will be compiled one function at a time rather
+// than the whole module. This allows earlier functions to be compiled before
+// later functions are read from the bitcode but of course means no whole-module
+// optimizations. For now, streaming is only supported for files and stdin.
+static cl::opt<bool>
+LazyBitcode("streaming-bitcode",
+ cl::desc("Use lazy bitcode streaming for file inputs"),
+ cl::init(false));
+
+// The option below overlaps very much with bitcode streaming.
+// We keep it separate because it is still experimental and we want
+// to use it without changing the outside behavior which is especially
+// relevant for the sandboxed case.
+static cl::opt<bool>
+ReduceMemoryFootprint("reduce-memory-footprint",
+ cl::desc("Aggressively reduce memory used by llc"),
+ cl::init(false));
+
// @LOCALMOD-END
// Determine optimization level.
@@ -98,34 +117,216 @@ OptLevel("O",
static cl::opt<std::string>
TargetTriple("mtriple", cl::desc("Override target triple for module"));
+static cl::opt<std::string>
+MArch("march", cl::desc("Architecture to generate code for (see --version)"));
+
+static cl::opt<std::string>
+MCPU("mcpu",
+ cl::desc("Target a specific cpu type (-mcpu=help for details)"),
+ cl::value_desc("cpu-name"),
+ cl::init(""));
+
+static cl::list<std::string>
+MAttrs("mattr",
+ cl::CommaSeparated,
+ cl::desc("Target specific attributes (-mattr=help for details)"),
+ cl::value_desc("a1,+a2,-a3,..."));
+
+static cl::opt<Reloc::Model>
+RelocModel("relocation-model",
+ cl::desc("Choose relocation model"),
+ cl::init(Reloc::Default),
+ cl::values(
+ clEnumValN(Reloc::Default, "default",
+ "Target default relocation model"),
+ clEnumValN(Reloc::Static, "static",
+ "Non-relocatable code"),
+ clEnumValN(Reloc::PIC_, "pic",
+ "Fully relocatable, position independent code"),
+ clEnumValN(Reloc::DynamicNoPIC, "dynamic-no-pic",
+ "Relocatable external references, non-relocatable code"),
+ clEnumValEnd));
+
+static cl::opt<llvm::CodeModel::Model>
+CMModel("code-model",
+ cl::desc("Choose code model"),
+ cl::init(CodeModel::Default),
+ cl::values(clEnumValN(CodeModel::Default, "default",
+ "Target default code model"),
+ clEnumValN(CodeModel::Small, "small",
+ "Small code model"),
+ clEnumValN(CodeModel::Kernel, "kernel",
+ "Kernel code model"),
+ clEnumValN(CodeModel::Medium, "medium",
+ "Medium code model"),
+ clEnumValN(CodeModel::Large, "large",
+ "Large code model"),
+ clEnumValEnd));
+
+static cl::opt<bool>
+RelaxAll("mc-relax-all",
+ cl::desc("When used with filetype=obj, "
+ "relax all fixups in the emitted object file"));
+
+cl::opt<TargetMachine::CodeGenFileType>
+FileType("filetype", cl::init(TargetMachine::CGFT_AssemblyFile),
+ cl::desc("Choose a file type (not all types are supported by all targets):"),
+ cl::values(
+ clEnumValN(TargetMachine::CGFT_AssemblyFile, "asm",
+ "Emit an assembly ('.s') file"),
+ clEnumValN(TargetMachine::CGFT_ObjectFile, "obj",
+ "Emit a native object ('.o') file"),
+ clEnumValN(TargetMachine::CGFT_Null, "null",
+ "Emit nothing, for performance testing"),
+ clEnumValEnd));
+
cl::opt<bool> NoVerify("disable-verify", cl::Hidden,
cl::desc("Do not verify input module"));
-cl::opt<bool>
+cl::opt<bool> DisableDotLoc("disable-dot-loc", cl::Hidden,
+ cl::desc("Do not use .loc entries"));
+
+cl::opt<bool> DisableCFI("disable-cfi", cl::Hidden,
+ cl::desc("Do not use .cfi_* directives"));
+
+cl::opt<bool> EnableDwarfDirectory("enable-dwarf-directory", cl::Hidden,
+ cl::desc("Use .file directives with an explicit directory."));
+
+static cl::opt<bool>
+DisableRedZone("disable-red-zone",
+ cl::desc("Do not emit code that uses the red zone."),
+ cl::init(false));
+
+static cl::opt<bool>
+EnableFPMAD("enable-fp-mad",
+ cl::desc("Enable less precise MAD instructions to be generated"),
+ cl::init(false));
+
+static cl::opt<bool>
+DisableFPElim("disable-fp-elim",
+ cl::desc("Disable frame pointer elimination optimization"),
+ cl::init(false));
+
+static cl::opt<bool>
+DisableFPElimNonLeaf("disable-non-leaf-fp-elim",
+ cl::desc("Disable frame pointer elimination optimization for non-leaf funcs"),
+ cl::init(false));
+
+static cl::opt<bool>
+EnableUnsafeFPMath("enable-unsafe-fp-math",
+ cl::desc("Enable optimizations that may decrease FP precision"),
+ cl::init(false));
+
+static cl::opt<bool>
+EnableNoInfsFPMath("enable-no-infs-fp-math",
+ cl::desc("Enable FP math optimizations that assume no +-Infs"),
+ cl::init(false));
+
+static cl::opt<bool>
+EnableNoNaNsFPMath("enable-no-nans-fp-math",
+ cl::desc("Enable FP math optimizations that assume no NaNs"),
+ cl::init(false));
+
+static cl::opt<bool>
+EnableHonorSignDependentRoundingFPMath("enable-sign-dependent-rounding-fp-math",
+ cl::Hidden,
+ cl::desc("Force codegen to assume rounding mode can change dynamically"),
+ cl::init(false));
+
+static cl::opt<bool>
+GenerateSoftFloatCalls("soft-float",
+ cl::desc("Generate software floating point library calls"),
+ cl::init(false));
+
+static cl::opt<llvm::FloatABI::ABIType>
+FloatABIForCalls("float-abi",
+ cl::desc("Choose float ABI type"),
+ cl::init(FloatABI::Default),
+ cl::values(
+ clEnumValN(FloatABI::Default, "default",
+ "Target default float ABI type"),
+ clEnumValN(FloatABI::Soft, "soft",
+ "Soft float ABI (implied by -soft-float)"),
+ clEnumValN(FloatABI::Hard, "hard",
+ "Hard float ABI (uses FP registers)"),
+ clEnumValEnd));
+
+static cl::opt<llvm::FPOpFusion::FPOpFusionMode>
+FuseFPOps("fp-contract",
+ cl::desc("Enable aggresive formation of fused FP ops"),
+ cl::init(FPOpFusion::Standard),
+ cl::values(
+ clEnumValN(FPOpFusion::Fast, "fast",
+ "Fuse FP ops whenever profitable"),
+ clEnumValN(FPOpFusion::Standard, "on",
+ "Only fuse 'blessed' FP ops."),
+ clEnumValN(FPOpFusion::Strict, "off",
+ "Only fuse FP ops when the result won't be effected."),
+ clEnumValEnd));
+
+static cl::opt<bool>
+DontPlaceZerosInBSS("nozero-initialized-in-bss",
+ cl::desc("Don't place zero-initialized symbols into bss section"),
+ cl::init(false));
+
+static cl::opt<bool>
DisableSimplifyLibCalls("disable-simplify-libcalls",
- cl::desc("Disable simplify-libcalls"),
- cl::init(false));
+ cl::desc("Disable simplify-libcalls"),
+ cl::init(false));
-// @LOCALMOD-BEGIN
-// Using bitcode streaming has a couple of ramifications. Primarily it means
-// that the module in the file will be compiled one function at a time rather
-// than the whole module. This allows earlier functions to be compiled before
-// later functions are read from the bitcode but of course means no whole-module
-// optimizations. For now, streaming is only supported for files and stdin.
static cl::opt<bool>
-LazyBitcode("streaming-bitcode",
- cl::desc("Use lazy bitcode streaming for file inputs"),
+EnableGuaranteedTailCallOpt("tailcallopt",
+ cl::desc("Turn fastcc calls into tail calls by (potentially) changing ABI."),
cl::init(false));
-// The option below overlaps very much with bitcode streaming.
-// We keep it separate because it is still experimental and we want
-// to use it without changing the outside behavior which is especially
-// relevant for the sandboxed case.
static cl::opt<bool>
-ReduceMemoryFootprint("reduce-memory-footprint",
- cl::desc("Aggressively reduce memory used by llc"),
+DisableTailCalls("disable-tail-calls",
+ cl::desc("Never emit tail calls"),
cl::init(false));
-// @LOCALMOD-END
+
+static cl::opt<unsigned>
+OverrideStackAlignment("stack-alignment",
+ cl::desc("Override default stack alignment"),
+ cl::init(0));
+
+static cl::opt<bool>
+EnableRealignStack("realign-stack",
+ cl::desc("Realign stack if needed"),
+ cl::init(true));
+
+static cl::opt<std::string>
+TrapFuncName("trap-func", cl::Hidden,
+ cl::desc("Emit a call to trap function rather than a trap instruction"),
+ cl::init(""));
+
+static cl::opt<bool>
+EnablePIE("enable-pie",
+ cl::desc("Assume the creation of a position independent executable."),
+ cl::init(false));
+
+static cl::opt<bool>
+SegmentedStacks("segmented-stacks",
+ cl::desc("Use segmented stacks if possible."),
+ cl::init(false));
+
+static cl::opt<bool>
+UseInitArray("use-init-array",
+ cl::desc("Use .init_array instead of .ctors."),
+ cl::init(false));
+
+static cl::opt<std::string> StopAfter("stop-after",
+ cl::desc("Stop compilation after a specific pass"),
+ cl::value_desc("pass-name"),
+ cl::init(""));
+static cl::opt<std::string> StartAfter("start-after",
+ cl::desc("Resume compilation after a specific pass"),
+ cl::value_desc("pass-name"),
+ cl::init(""));
+
+static cl::opt<unsigned>
+SSPBufferSize("stack-protector-buffer-size", cl::init(8),
+ cl::desc("Lower bound for a buffer to be considered for "
+ "stack protection"));
// GetFileNameRoot - Helper function to get the basename of a filename.
static inline std::string
@@ -474,11 +675,6 @@ int llc_main(int argc, char **argv) {
TLI->disableAllFunctions();
PM->add(TLI);
- if (target.get()) {
- PM->add(new TargetTransformInfo(target->getScalarTargetTransformInfo(),
- target->getVectorTargetTransformInfo()));
- }
-
// Add the target data from the target machine, if it exists, or the module.
if (const DataLayout *TD = Target.getDataLayout())
PM->add(new DataLayout(*TD));
diff --git a/tools/llvm-dwarfdump/llvm-dwarfdump.cpp b/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
index 38c3a1e76f..309bc4ecd4 100644
--- a/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
+++ b/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
@@ -1,4 +1,4 @@
-//===-- llvm-dwarfdump.cpp - Debug info dumping utility for llvm -----------===//
+//===-- llvm-dwarfdump.cpp - Debug info dumping utility for llvm ----------===//
//
// The LLVM Compiler Infrastructure
//
@@ -118,8 +118,8 @@ static void DumpInput(const StringRef &Filename) {
if (PrintFunctions)
SpecFlags |= DILineInfoSpecifier::FunctionName;
if (PrintInlining) {
- DIInliningInfo InliningInfo = dictx->getInliningInfoForAddress(
- Address, SpecFlags);
+ DIInliningInfo InliningInfo =
+ dictx->getInliningInfoForAddress(Address, SpecFlags);
uint32_t n = InliningInfo.getNumberOfFrames();
if (n == 0) {
// Print one empty debug line info in any case.
diff --git a/tools/lto/LTOCodeGenerator.cpp b/tools/lto/LTOCodeGenerator.cpp
index 5d79fda5aa..f417f5f4fd 100644
--- a/tools/lto/LTOCodeGenerator.cpp
+++ b/tools/lto/LTOCodeGenerator.cpp
@@ -519,8 +519,6 @@ bool LTOCodeGenerator::generateObjectFile(raw_ostream &out,
// Add an appropriate DataLayout instance for this module...
passes.add(new DataLayout(*_target->getDataLayout()));
- passes.add(new TargetTransformInfo(_target->getScalarTargetTransformInfo(),
- _target->getVectorTargetTransformInfo()));
// Enabling internalize here would use its AllButMain variant. It
// keeps only main if it exists and does nothing for libraries. Instead
diff --git a/tools/opt/CMakeLists.txt b/tools/opt/CMakeLists.txt
index 32de6d4060..7daf22aa9e 100644
--- a/tools/opt/CMakeLists.txt
+++ b/tools/opt/CMakeLists.txt
@@ -1,4 +1,4 @@
-set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} bitreader asmparser bitwriter instrumentation scalaropts ipo vectorize)
+set(LLVM_LINK_COMPONENTS bitreader asmparser bitwriter instrumentation scalaropts ipo vectorize)
add_llvm_tool(opt
AnalysisWrappers.cpp
diff --git a/tools/opt/LLVMBuild.txt b/tools/opt/LLVMBuild.txt
index b174431e04..4de99f51c8 100644
--- a/tools/opt/LLVMBuild.txt
+++ b/tools/opt/LLVMBuild.txt
@@ -19,4 +19,4 @@
type = Tool
name = opt
parent = Tools
-required_libraries = AsmParser BitReader BitWriter IPO Instrumentation Scalar all-targets
+required_libraries = AsmParser BitReader BitWriter IPO Instrumentation Scalar
diff --git a/tools/opt/Makefile b/tools/opt/Makefile
index ee7e1cf796..16d116da5d 100644
--- a/tools/opt/Makefile
+++ b/tools/opt/Makefile
@@ -9,6 +9,6 @@
LEVEL := ../..
TOOLNAME := opt
-LINK_COMPONENTS := bitreader bitwriter asmparser instrumentation scalaropts ipo vectorize all-targets
+LINK_COMPONENTS := bitreader bitwriter asmparser instrumentation scalaropts ipo vectorize
include $(LEVEL)/Makefile.common
diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp
index 751c8e7da0..35ab2d50cb 100644
--- a/tools/opt/opt.cpp
+++ b/tools/opt/opt.cpp
@@ -18,7 +18,6 @@
#include "llvm/Module.h"
#include "llvm/PassManager.h"
#include "llvm/CallGraphSCCPass.h"
-#include "llvm/CodeGen/CommandFlags.h"
#include "llvm/Bitcode/ReaderWriter.h"
#include "llvm/Assembly/PrintModulePass.h"
#include "llvm/Analysis/Verifier.h"
@@ -37,9 +36,7 @@
#include "llvm/Support/PluginLoader.h"
#include "llvm/Support/PrettyStackTrace.h"
#include "llvm/Support/SystemUtils.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/ToolOutputFile.h"
-#include "llvm/MC/SubtargetFeature.h"
#include "llvm/LinkAllPasses.h"
#include "llvm/LinkAllVMCore.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
@@ -481,75 +478,6 @@ static void AddStandardLinkPasses(PassManagerBase &PM) {
/*RunInliner=*/ !DisableInline);
}
-//===----------------------------------------------------------------------===//
-// CodeGen-related helper functions.
-//
-static TargetOptions GetTargetOptions() {
- TargetOptions Options;
- Options.LessPreciseFPMADOption = EnableFPMAD;
- Options.NoFramePointerElim = DisableFPElim;
- Options.NoFramePointerElimNonLeaf = DisableFPElimNonLeaf;
- Options.AllowFPOpFusion = FuseFPOps;
- Options.UnsafeFPMath = EnableUnsafeFPMath;
- Options.NoInfsFPMath = EnableNoInfsFPMath;
- Options.NoNaNsFPMath = EnableNoNaNsFPMath;
- Options.HonorSignDependentRoundingFPMathOption =
- EnableHonorSignDependentRoundingFPMath;
- Options.UseSoftFloat = GenerateSoftFloatCalls;
- if (FloatABIForCalls != FloatABI::Default)
- Options.FloatABIType = FloatABIForCalls;
- Options.NoZerosInBSS = DontPlaceZerosInBSS;
- Options.GuaranteedTailCallOpt = EnableGuaranteedTailCallOpt;
- Options.DisableTailCalls = DisableTailCalls;
- Options.StackAlignmentOverride = OverrideStackAlignment;
- Options.RealignStack = EnableRealignStack;
- Options.TrapFuncName = TrapFuncName;
- Options.PositionIndependentExecutable = EnablePIE;
- Options.EnableSegmentedStacks = SegmentedStacks;
- Options.UseInitArray = UseInitArray;
- Options.SSPBufferSize = SSPBufferSize;
- return Options;
-}
-
-CodeGenOpt::Level GetCodeGenOptLevel() {
- if (OptLevelO1)
- return CodeGenOpt::Less;
- if (OptLevelO2)
- return CodeGenOpt::Default;
- if (OptLevelO3)
- return CodeGenOpt::Aggressive;
- return CodeGenOpt::None;
-}
-
-// Returns the TargetMachine instance or zero if no triple is provided.
-static TargetMachine* GetTargetMachine(std::string TripleStr) {
- if (TripleStr.empty())
- return 0;
-
- // Get the target specific parser.
- std::string Error;
- Triple TheTriple(Triple::normalize(TargetTriple));
-
- const Target *TheTarget = TargetRegistry::lookupTarget(MArch, TheTriple,
- Error);
- if (!TheTarget) {
- return 0;
- }
-
- // Package up features to be passed to target/subtarget
- std::string FeaturesStr;
- if (MAttrs.size()) {
- SubtargetFeatures Features;
- for (unsigned i = 0; i != MAttrs.size(); ++i)
- Features.AddFeature(MAttrs[i]);
- FeaturesStr = Features.getString();
- }
-
- return TheTarget->createTargetMachine(TheTriple.getTriple(),
- MCPU, FeaturesStr, GetTargetOptions(),
- RelocModel, CMModel,
- GetCodeGenOptLevel());
-}
//===----------------------------------------------------------------------===//
// main for opt
@@ -652,12 +580,6 @@ int main(int argc, char **argv) {
if (TD)
Passes.add(TD);
- std::auto_ptr<TargetMachine> TM(GetTargetMachine(TargetTriple));
- if (TM.get()) {
- Passes.add(new TargetTransformInfo(TM->getScalarTargetTransformInfo(),
- TM->getVectorTargetTransformInfo()));
- }
-
OwningPtr<FunctionPassManager> FPasses;
if (OptLevelO1 || OptLevelO2 || OptLevelOs || OptLevelOz || OptLevelO3) {
FPasses.reset(new FunctionPassManager(M.get()));
diff --git a/unittests/ExecutionEngine/JIT/JITTest.cpp b/unittests/ExecutionEngine/JIT/JITTest.cpp
index ae6855e68b..6933091949 100644
--- a/unittests/ExecutionEngine/JIT/JITTest.cpp
+++ b/unittests/ExecutionEngine/JIT/JITTest.cpp
@@ -606,7 +606,7 @@ TEST_F(JITTest, FunctionIsRecompiledAndRelinked) {
// program from the IR input to the JIT to assert that the JIT doesn't use its
// definition.
extern "C" int32_t JITTest_AvailableExternallyGlobal;
-int32_t JITTest_AvailableExternallyGlobal = 42;
+int32_t JITTest_AvailableExternallyGlobal LLVM_ATTRIBUTE_USED = 42;
namespace {
// Tests on ARM disabled as we're running the old jit
diff --git a/unittests/ExecutionEngine/JIT/Makefile b/unittests/ExecutionEngine/JIT/Makefile
index b535a6b296..9e0bb9ea59 100644
--- a/unittests/ExecutionEngine/JIT/Makefile
+++ b/unittests/ExecutionEngine/JIT/Makefile
@@ -35,8 +35,15 @@ ifeq ($(USE_OPROFILE), 1)
LINK_COMPONENTS += oprofilejit
endif
+EXPORTED_SYMBOL_FILE = $(PROJ_OBJ_DIR)/JITTests.exports
include $(LLVM_SRC_ROOT)/unittests/Makefile.unittest
# Permit these tests to use the JIT's symbolic lookup.
LD.Flags += $(RDYNAMIC)
+
+# Symbol exports are necessary (at least for now) when building with LTO.
+$(LLVMUnitTestExe): $(NativeExportsFile)
+$(PROJ_OBJ_DIR)/JITTests.exports: $(PROJ_SRC_DIR)/JITTests.def $(PROJ_OBJ_DIR)/.dir
+ tail -n +2 $< > $@
+