diff options
122 files changed, 3757 insertions, 1255 deletions
diff --git a/docs/LangRef.html b/docs/LangRef.html index 167397ff53..874e12fa44 100644 --- a/docs/LangRef.html +++ b/docs/LangRef.html @@ -1364,11 +1364,13 @@ target datalayout = "<i>layout specification</i>" 8-bits. If omitted, the natural stack alignment defaults to "unspecified", which does not prevent any alignment promotions.</dd> - <dt><tt>p:<i>size</i>:<i>abi</i>:<i>pref</i></tt></dt> + <dt><tt>p[n]:<i>size</i>:<i>abi</i>:<i>pref</i></tt></dt> <dd>This specifies the <i>size</i> of a pointer and its <i>abi</i> and - <i>preferred</i> alignments. All sizes are in bits. Specifying - the <i>pref</i> alignment is optional. If omitted, the - preceding <tt>:</tt> should be omitted too.</dd> + <i>preferred</i> alignments for address space <i>n</i>. All sizes are in + bits. Specifying the <i>pref</i> alignment is optional. If omitted, the + preceding <tt>:</tt> should be omitted too. The address space, + <i>n</i> is optional, and if not specified, denotes the default address + space 0. The value of <i>n</i> must be in the range [1,2^23).</dd> <dt><tt>i<i>size</i>:<i>abi</i>:<i>pref</i></tt></dt> <dd>This specifies the alignment for an integer type of a given bit @@ -1409,6 +1411,10 @@ target datalayout = "<i>layout specification</i>" <ul> <li><tt>E</tt> - big endian</li> <li><tt>p:64:64:64</tt> - 64-bit pointers with 64-bit alignment</li> + <li><tt>p1:32:32:32</tt> - 32-bit pointers with 32-bit alignment for + address space 1</li> + <li><tt>p2:16:32:32</tt> - 16-bit pointers with 32-bit alignment for + address space 2</li> <li><tt>i1:8:8</tt> - i1 is 8-bit (byte) aligned</li> <li><tt>i8:8:8</tt> - i8 is 8-bit (byte) aligned</li> <li><tt>i16:16:16</tt> - i16 is 16-bit aligned</li> diff --git a/docs/README.txt b/docs/README.txt index 2fbbf98740..5ddd599d8a 100644 --- a/docs/README.txt +++ b/docs/README.txt @@ -6,7 +6,7 @@ The LLVM documentation is currently written in two formats: * Plain HTML documentation. * reStructured Text documentation using the Sphinx documentation generator. It - is currently tested with Sphinx 1.1.3. + is currently tested with Sphinx 1.1.3. For more information, see the "Sphinx Introduction for LLVM Developers" document. diff --git a/docs/ReleaseNotes.html b/docs/ReleaseNotes.html index 75a6fd1ca1..26c5213b12 100644 --- a/docs/ReleaseNotes.html +++ b/docs/ReleaseNotes.html @@ -466,6 +466,18 @@ Release Notes</a>.</h1> <p>In addition to many minor performance tweaks and bug fixes, this release includes a few major enhancements and additions to the optimizers:</p> +<p> Loop Vectorizer - We've added a basic loop vectorizer and we are now able + to vectorize small loops. The loop vectorizer is disabled by default and + can be enabled using the -mllvm -vectorize flags. We can vectorize this code: + + <pre class="doc_code"> + for (i=0; i<n; i++) { + a[i] = b[i+1] + c[i+3] + i; + } + </pre> + + </p> + <ul> <li>...</li> </ul> @@ -506,6 +518,8 @@ Release Notes</a>.</h1> We use the lifetime markers to tell the codegen that a certain alloca is used within a region.</p> +<p> We now merge consecutive loads and stores. </p> + <p>We have put a significant amount of work into the code generator infrastructure, which allows us to implement more aggressive algorithms and make it run faster:</p> @@ -645,6 +659,11 @@ Release Notes</a>.</h1> <p>In addition, many APIs have changed in this release. Some of the major LLVM API changes are:</p> +<p> We've added a new interface for allowing IR-level passes to access + target-specific information. A new IR-level pass, called + "TargetTransformInfo" provides a number of low-level interfaces. + LSR and LowerInvoke already use the new interface. </p> + <ul> <li>...</li> </ul> diff --git a/docs/subsystems.rst b/docs/subsystems.rst index 8c3cdf2417..6f77b79fbe 100644 --- a/docs/subsystems.rst +++ b/docs/subsystems.rst @@ -91,3 +91,10 @@ Subsystem Documentation * :ref:`segmented_stacks` This document describes segmented stacks and how they are used in LLVM. + +* `Howto: Implementing LLVM Integrated Assembler`_ + + A simple guide for how to implement an LLVM integrated assembler for an + architecture. + +.. _`Howto: Implementing LLVM Integrated Assembler`: http://www.embecosm.com/download/ean10.html diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h index 3ab9c8256b..3f8f149cb4 100644 --- a/include/llvm/Analysis/ScalarEvolutionExpander.h +++ b/include/llvm/Analysis/ScalarEvolutionExpander.h @@ -22,7 +22,7 @@ #include <set> namespace llvm { - class ScalarTargetTransformInfo; + class TargetLowering; /// Return true if the given expression is safe to expand in the sense that /// all materialized values are safe to speculate. @@ -129,7 +129,7 @@ namespace llvm { /// representative. Return the number of phis eliminated. unsigned replaceCongruentIVs(Loop *L, const DominatorTree *DT, SmallVectorImpl<WeakVH> &DeadInsts, - const ScalarTargetTransformInfo *STTI = NULL); + const TargetLowering *TLI = NULL); /// expandCodeFor - Insert code to directly compute the specified SCEV /// expression into the program. The inserted code is inserted into the diff --git a/include/llvm/CodeGen/CommandFlags.h b/include/llvm/CodeGen/CommandFlags.h deleted file mode 100644 index 90ee234244..0000000000 --- a/include/llvm/CodeGen/CommandFlags.h +++ /dev/null @@ -1,228 +0,0 @@ -//===-- CommandFlags.h - Register Coalescing Interface ----------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains codegen-specific flags that are shared between different -// command line tools. The tools "llc" and "opt" both use this file to prevent -// flag duplication. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_COMMAND_LINE_FLAGS_H -#define LLVM_CODEGEN_COMMAND_LINE_FLAGS_H - -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/CodeGen.h" -#include "llvm/Target/TargetMachine.h" - -#include <string> -using namespace llvm; - -cl::opt<std::string> -MArch("march", cl::desc("Architecture to generate code for (see --version)")); - -cl::opt<std::string> -MCPU("mcpu", - cl::desc("Target a specific cpu type (-mcpu=help for details)"), - cl::value_desc("cpu-name"), - cl::init("")); - -cl::list<std::string> -MAttrs("mattr", - cl::CommaSeparated, - cl::desc("Target specific attributes (-mattr=help for details)"), - cl::value_desc("a1,+a2,-a3,...")); - -cl::opt<Reloc::Model> -RelocModel("relocation-model", - cl::desc("Choose relocation model"), - cl::init(Reloc::Default), - cl::values( - clEnumValN(Reloc::Default, "default", - "Target default relocation model"), - clEnumValN(Reloc::Static, "static", - "Non-relocatable code"), - clEnumValN(Reloc::PIC_, "pic", - "Fully relocatable, position independent code"), - clEnumValN(Reloc::DynamicNoPIC, "dynamic-no-pic", - "Relocatable external references, non-relocatable code"), - clEnumValEnd)); - -cl::opt<llvm::CodeModel::Model> -CMModel("code-model", - cl::desc("Choose code model"), - cl::init(CodeModel::Default), - cl::values(clEnumValN(CodeModel::Default, "default", - "Target default code model"), - clEnumValN(CodeModel::Small, "small", - "Small code model"), - clEnumValN(CodeModel::Kernel, "kernel", - "Kernel code model"), - clEnumValN(CodeModel::Medium, "medium", - "Medium code model"), - clEnumValN(CodeModel::Large, "large", - "Large code model"), - clEnumValEnd)); - -cl::opt<bool> -RelaxAll("mc-relax-all", - cl::desc("When used with filetype=obj, " - "relax all fixups in the emitted object file")); - -cl::opt<TargetMachine::CodeGenFileType> -FileType("filetype", cl::init(TargetMachine::CGFT_AssemblyFile), - cl::desc("Choose a file type (not all types are supported by all targets):"), - cl::values( - clEnumValN(TargetMachine::CGFT_AssemblyFile, "asm", - "Emit an assembly ('.s') file"), - clEnumValN(TargetMachine::CGFT_ObjectFile, "obj", - "Emit a native object ('.o') file"), - clEnumValN(TargetMachine::CGFT_Null, "null", - "Emit nothing, for performance testing"), - clEnumValEnd)); - -cl::opt<bool> DisableDotLoc("disable-dot-loc", cl::Hidden, - cl::desc("Do not use .loc entries")); - -cl::opt<bool> DisableCFI("disable-cfi", cl::Hidden, - cl::desc("Do not use .cfi_* directives")); - -cl::opt<bool> EnableDwarfDirectory("enable-dwarf-directory", cl::Hidden, - cl::desc("Use .file directives with an explicit directory.")); - -cl::opt<bool> -DisableRedZone("disable-red-zone", - cl::desc("Do not emit code that uses the red zone."), - cl::init(false)); - -cl::opt<bool> -EnableFPMAD("enable-fp-mad", - cl::desc("Enable less precise MAD instructions to be generated"), - cl::init(false)); - -cl::opt<bool> -DisableFPElim("disable-fp-elim", - cl::desc("Disable frame pointer elimination optimization"), - cl::init(false)); - -cl::opt<bool> -DisableFPElimNonLeaf("disable-non-leaf-fp-elim", - cl::desc("Disable frame pointer elimination optimization for non-leaf funcs"), - cl::init(false)); - -cl::opt<bool> -EnableUnsafeFPMath("enable-unsafe-fp-math", - cl::desc("Enable optimizations that may decrease FP precision"), - cl::init(false)); - -cl::opt<bool> -EnableNoInfsFPMath("enable-no-infs-fp-math", - cl::desc("Enable FP math optimizations that assume no +-Infs"), - cl::init(false)); - -cl::opt<bool> -EnableNoNaNsFPMath("enable-no-nans-fp-math", - cl::desc("Enable FP math optimizations that assume no NaNs"), - cl::init(false)); - -cl::opt<bool> -EnableHonorSignDependentRoundingFPMath("enable-sign-dependent-rounding-fp-math", - cl::Hidden, - cl::desc("Force codegen to assume rounding mode can change dynamically"), - cl::init(false)); - -cl::opt<bool> -GenerateSoftFloatCalls("soft-float", - cl::desc("Generate software floating point library calls"), - cl::init(false)); - -cl::opt<llvm::FloatABI::ABIType> -FloatABIForCalls("float-abi", - cl::desc("Choose float ABI type"), - cl::init(FloatABI::Default), - cl::values( - clEnumValN(FloatABI::Default, "default", - "Target default float ABI type"), - clEnumValN(FloatABI::Soft, "soft", - "Soft float ABI (implied by -soft-float)"), - clEnumValN(FloatABI::Hard, "hard", - "Hard float ABI (uses FP registers)"), - clEnumValEnd)); - -cl::opt<llvm::FPOpFusion::FPOpFusionMode> -FuseFPOps("fp-contract", - cl::desc("Enable aggresive formation of fused FP ops"), - cl::init(FPOpFusion::Standard), - cl::values( - clEnumValN(FPOpFusion::Fast, "fast", - "Fuse FP ops whenever profitable"), - clEnumValN(FPOpFusion::Standard, "on", - "Only fuse 'blessed' FP ops."), - clEnumValN(FPOpFusion::Strict, "off", - "Only fuse FP ops when the result won't be effected."), - clEnumValEnd)); - -cl::opt<bool> -DontPlaceZerosInBSS("nozero-initialized-in-bss", - cl::desc("Don't place zero-initialized symbols into bss section"), - cl::init(false)); - -cl::opt<bool> -EnableGuaranteedTailCallOpt("tailcallopt", - cl::desc("Turn fastcc calls into tail calls by (potentially) changing ABI."), - cl::init(false)); - -cl::opt<bool> -DisableTailCalls("disable-tail-calls", - cl::desc("Never emit tail calls"), - cl::init(false)); - -cl::opt<unsigned> -OverrideStackAlignment("stack-alignment", - cl::desc("Override default stack alignment"), - cl::init(0)); - -cl::opt<bool> -EnableRealignStack("realign-stack", - cl::desc("Realign stack if needed"), - cl::init(true)); - -cl::opt<std::string> -TrapFuncName("trap-func", cl::Hidden, - cl::desc("Emit a call to trap function rather than a trap instruction"), - cl::init("")); - -cl::opt<bool> -EnablePIE("enable-pie", - cl::desc("Assume the creation of a position independent executable."), - cl::init(false)); - -cl::opt<bool> -SegmentedStacks("segmented-stacks", - cl::desc("Use segmented stacks if possible."), - cl::init(false)); - -cl::opt<bool> -UseInitArray("use-init-array", - cl::desc("Use .init_array instead of .ctors."), - cl::init(false)); - -cl::opt<std::string> StopAfter("stop-after", - cl::desc("Stop compilation after a specific pass"), - cl::value_desc("pass-name"), - cl::init("")); -cl::opt<std::string> StartAfter("start-after", - cl::desc("Resume compilation after a specific pass"), - cl::value_desc("pass-name"), - cl::init("")); - -cl::opt<unsigned> -SSPBufferSize("stack-protector-buffer-size", cl::init(8), - cl::desc("Lower bound for a buffer to be considered for " - "stack protection")); -#endif diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h index 7188b1abbd..0e4e132e40 100644 --- a/include/llvm/CodeGen/MachineFrameInfo.h +++ b/include/llvm/CodeGen/MachineFrameInfo.h @@ -29,6 +29,7 @@ class MachineBasicBlock; class TargetFrameLowering; class BitVector; class Value; +class AllocaInst; /// The CalleeSavedInfo class tracks the information need to locate where a /// callee saved register is in the current frame. @@ -106,14 +107,14 @@ class MachineFrameInfo { /// Alloca - If this stack object is originated from an Alloca instruction /// this value saves the original IR allocation. Can be NULL. - const Value *Alloca; + const AllocaInst *Alloca; // PreAllocated - If true, the object was mapped into the local frame // block and doesn't need additional handling for allocation beyond that. bool PreAllocated; StackObject(uint64_t Sz, unsigned Al, int64_t SP, bool IM, - bool isSS, bool NSP, const Value *Val) + bool isSS, bool NSP, const AllocaInst *Val) : SPOffset(SP), Size(Sz), Alignment(Al), isImmutable(IM), isSpillSlot(isSS), MayNeedSP(NSP), Alloca(Val), PreAllocated(false) {} }; @@ -369,7 +370,7 @@ public: /// getObjectAllocation - Return the underlying Alloca of the specified /// stack object if it exists. Returns 0 if none exists. - const Value* getObjectAllocation(int ObjectIdx) const { + const AllocaInst* getObjectAllocation(int ObjectIdx) const { assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() && "Invalid Object Idx!"); return Objects[ObjectIdx+NumFixedObjects].Alloca; @@ -495,7 +496,7 @@ public: /// a nonnegative identifier to represent it. /// int CreateStackObject(uint64_t Size, unsigned Alignment, bool isSS, - bool MayNeedSP = false, const Value *Alloca = 0) { + bool MayNeedSP = false, const AllocaInst *Alloca = 0) { assert(Size != 0 && "Cannot allocate zero size stack objects!"); Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, MayNeedSP, Alloca)); diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h index a5bc7f7d39..4e86363f07 100644 --- a/include/llvm/CodeGen/MachineRegisterInfo.h +++ b/include/llvm/CodeGen/MachineRegisterInfo.h @@ -77,16 +77,20 @@ class MachineRegisterInfo { return MO->Contents.Reg.Next; } - /// UsedPhysRegs - This is a bit vector that is computed and set by the + /// UsedRegUnits - This is a bit vector that is computed and set by the /// register allocator, and must be kept up to date by passes that run after /// register allocation (though most don't modify this). This is used /// so that the code generator knows which callee save registers to save and /// for other target specific uses. - /// This vector only has bits set for registers explicitly used, not their - /// aliases. - BitVector UsedPhysRegs; - - /// UsedPhysRegMask - Additional used physregs, but including aliases. + /// This vector has bits set for register units that are modified in the + /// current function. It doesn't include registers clobbered by function + /// calls with register mask operands. + BitVector UsedRegUnits; + + /// UsedPhysRegMask - Additional used physregs including aliases. + /// This bit vector represents all the registers clobbered by function calls. + /// It can model things that UsedRegUnits can't, such as function calls that + /// clobber ymm7 but preserve the low half in xmm7. BitVector UsedPhysRegMask; /// ReservedRegs - This is a bit vector of reserved registers. The target @@ -357,29 +361,27 @@ public: //===--------------------------------------------------------------------===// /// isPhysRegUsed - Return true if the specified register is used in this - /// function. This only works after register allocation. + /// function. Also check for clobbered aliases and registers clobbered by + /// function calls with register mask operands. + /// + /// This only works after register allocation. It is primarily used by + /// PrologEpilogInserter to determine which callee-saved registers need + /// spilling. bool isPhysRegUsed(unsigned Reg) const { - return UsedPhysRegs.test(Reg) || UsedPhysRegMask.test(Reg); - } - - /// isPhysRegOrOverlapUsed - Return true if Reg or any overlapping register - /// is used in this function. - bool isPhysRegOrOverlapUsed(unsigned Reg) const { if (UsedPhysRegMask.test(Reg)) return true; - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - if (UsedPhysRegs.test(*AI)) + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) + if (UsedRegUnits.test(*Units)) return true; return false; } /// setPhysRegUsed - Mark the specified register used in this function. /// This should only be called during and after register allocation. - void setPhysRegUsed(unsigned Reg) { UsedPhysRegs.set(Reg); } - - /// addPhysRegsUsed - Mark the specified registers used in this function. - /// This should only be called during and after register allocation. - void addPhysRegsUsed(const BitVector &Regs) { UsedPhysRegs |= Regs; } + void setPhysRegUsed(unsigned Reg) { + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) + UsedRegUnits.set(*Units); + } /// addPhysRegsUsedFromRegMask - Mark any registers not in RegMask as used. /// This corresponds to the bit mask attached to register mask operands. @@ -390,8 +392,9 @@ public: /// setPhysRegUnused - Mark the specified register unused in this function. /// This should only be called during and after register allocation. void setPhysRegUnused(unsigned Reg) { - UsedPhysRegs.reset(Reg); UsedPhysRegMask.reset(Reg); + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) + UsedRegUnits.reset(*Units); } diff --git a/include/llvm/CodeGen/SchedulerRegistry.h b/include/llvm/CodeGen/SchedulerRegistry.h index a582b0c40c..836b73a15a 100644 --- a/include/llvm/CodeGen/SchedulerRegistry.h +++ b/include/llvm/CodeGen/SchedulerRegistry.h @@ -102,6 +102,11 @@ ScheduleDAGSDNodes *createVLIWDAGScheduler(SelectionDAGISel *IS, ScheduleDAGSDNodes *createDefaultScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel); +/// createDAGLinearizer - This creates a "no-scheduling" scheduler which +/// linearize the DAG using topological order. +ScheduleDAGSDNodes *createDAGLinearizer(SelectionDAGISel *IS, + CodeGenOpt::Level OptLevel); + } // end namespace llvm #endif diff --git a/include/llvm/ExecutionEngine/RuntimeDyld.h b/include/llvm/ExecutionEngine/RuntimeDyld.h index 7da4a4e09a..a71b1411c8 100644 --- a/include/llvm/ExecutionEngine/RuntimeDyld.h +++ b/include/llvm/ExecutionEngine/RuntimeDyld.h @@ -63,7 +63,7 @@ protected: // Any relocations already associated with the symbol will be re-resolved. void reassignSectionAddress(unsigned SectionID, uint64_t Addr); public: - RuntimeDyld(RTDyldMemoryManager*); + RuntimeDyld(RTDyldMemoryManager *); ~RuntimeDyld(); /// loadObject - prepare the object contained in the input buffer for diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h index b5eeb7bac4..8e0d2c4503 100644 --- a/include/llvm/InitializePasses.h +++ b/include/llvm/InitializePasses.h @@ -66,6 +66,7 @@ void initializeAliasDebuggerPass(PassRegistry&); void initializeAliasSetPrinterPass(PassRegistry&); void initializeAlwaysInlinerPass(PassRegistry&); void initializeArgPromotionPass(PassRegistry&); +void initializeBarrierNoopPass(PassRegistry&); void initializeBasicAliasAnalysisPass(PassRegistry&); void initializeBasicCallGraphPass(PassRegistry&); void initializeBlockExtractorPassPass(PassRegistry&); @@ -248,7 +249,6 @@ void initializeTailCallElimPass(PassRegistry&); void initializeTailDuplicatePassPass(PassRegistry&); void initializeTargetPassConfigPass(PassRegistry&); void initializeDataLayoutPass(PassRegistry&); -void initializeTargetTransformInfoPass(PassRegistry&); void initializeTargetLibraryInfoPass(PassRegistry&); void initializeTwoAddressInstructionPassPass(PassRegistry&); void initializeTypeBasedAliasAnalysisPass(PassRegistry&); @@ -261,6 +261,7 @@ void initializeVirtRegRewriterPass(PassRegistry&); void initializeInstSimplifierPass(PassRegistry&); void initializeUnpackMachineBundlesPass(PassRegistry&); void initializeFinalizeMachineBundlesPass(PassRegistry&); +void initializeLoopVectorizePass(PassRegistry&); void initializeBBVectorizePass(PassRegistry&); void initializeMachineFunctionPrinterPassPass(PassRegistry&); void initializeExpandCtorsPass(PassRegistry&); // @LOCALMOD diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h index 4b10d0e541..8652acd941 100644 --- a/include/llvm/LinkAllPasses.h +++ b/include/llvm/LinkAllPasses.h @@ -156,6 +156,7 @@ namespace { (void) llvm::createCorrelatedValuePropagationPass(); (void) llvm::createMemDepPrinter(); (void) llvm::createInstructionSimplifierPass(); + (void) llvm::createLoopVectorizePass(); (void) llvm::createBBVectorizePass(); (void)new llvm::IntervalPartition(); diff --git a/include/llvm/MC/MCParser/MCAsmParser.h b/include/llvm/MC/MCParser/MCAsmParser.h index 52948f749d..554cdfabf6 100644 --- a/include/llvm/MC/MCParser/MCAsmParser.h +++ b/include/llvm/MC/MCParser/MCAsmParser.h @@ -20,6 +20,8 @@ class MCAsmLexer; class MCAsmParserExtension; class MCContext; class MCExpr; +class MCInstPrinter; +class MCInstrInfo; class MCParsedAsmOperand; class MCStreamer; class MCTargetAsmParser; @@ -29,6 +31,13 @@ class SourceMgr; class StringRef; class Twine; +/// MCAsmParserSemaCallback - Generic Sema callback for assembly parser. +class MCAsmParserSemaCallback { +public: + virtual void *LookupInlineAsmIdentifier(StringRef Name, void *Loc, + unsigned &Size) = 0; +}; + /// MCAsmParser - Generic assembler parser interface, for use by target specific /// assembly parsers. class MCAsmParser { @@ -77,25 +86,19 @@ public: virtual void setParsingInlineAsm(bool V) = 0; virtual bool isParsingInlineAsm() = 0; + /// ParseMSInlineAsm - Parse ms-style inline assembly. + virtual bool ParseMSInlineAsm(void *AsmLoc, std::string &AsmString, + unsigned &NumOutputs, unsigned &NumInputs, + SmallVectorImpl<void *> &OpDecls, + SmallVectorImpl<std::string> &Constraints, + SmallVectorImpl<std::string> &Clobbers, + const MCInstrInfo *MII, + const MCInstPrinter *IP, + MCAsmParserSemaCallback &SI) = 0; + /// ParseStatement - Parse the next statement. virtual bool ParseStatement() = 0; - /// getNumParsedOperands - Returns the number of MCAsmParsedOperands from the - /// previously parsed statement. - virtual unsigned getNumParsedOperands() = 0; - - /// getParsedOperand - Get a MCAsmParsedOperand. - virtual MCParsedAsmOperand &getParsedOperand(unsigned OpNum) = 0; - - /// freeParsedOperands - Free the MCAsmParsedOperands. - virtual void freeParsedOperands() = 0; - - /// isInstruction - Was the previously parsed statement an instruction? - virtual bool isInstruction() = 0; - - /// getOpcode - Get the opcode from the previously parsed instruction. - virtual unsigned getOpcode() = 0; - /// Warning - Emit a warning at the location \p L, with the message \p Msg. /// /// \return The return value is true, if warnings are fatal. diff --git a/include/llvm/MC/MCSchedule.h b/include/llvm/MC/MCSchedule.h index 0504dc13c8..c9a060c79b 100644 --- a/include/llvm/MC/MCSchedule.h +++ b/include/llvm/MC/MCSchedule.h @@ -54,10 +54,12 @@ struct MCWriteProcResEntry { }; /// Specify the latency in cpu cycles for a particular scheduling class and def -/// index. Also identify the WriteResources of this def. When the operand -/// expands to a sequence of writes, this ID is the last write in the sequence. +/// index. -1 indicates an invalid latency. Heuristics would typically consider +/// an instruction with invalid latency to have infinite latency. Also identify +/// the WriteResources of this def. When the operand expands to a sequence of +/// writes, this ID is the last write in the sequence. struct MCWriteLatencyEntry { - unsigned Cycles; + int Cycles; unsigned WriteResourceID; bool operator==(const MCWriteLatencyEntry &Other) const { diff --git a/include/llvm/Operator.h b/include/llvm/Operator.h index bc5da8e8aa..462324a669 100644 --- a/include/llvm/Operator.h +++ b/include/llvm/Operator.h @@ -36,8 +36,8 @@ private: void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION; void *operator new(size_t s) LLVM_DELETED_FUNCTION; Operator() LLVM_DELETED_FUNCTION; - // NOTE: cannot use LLVM_DELETED_FUNCTION because gcc errors when deleting - // an override of a non-deleted function. + // NOTE: cannot use LLVM_DELETED_FUNCTION because it's not legal to delete + // an overridden method that's not deleted in the base class. ~Operator(); public: @@ -191,7 +191,7 @@ public: /// opcodes. template<typename SuperClass, unsigned Opc> class ConcreteOperator : public SuperClass { - ~ConcreteOperator() LLVM_DELETED_FUNCTION; + ~ConcreteOperator(); // DO NOT IMPLEMENT public: static inline bool classof(const Instruction *I) { return I->getOpcode() == Opc; @@ -207,44 +207,44 @@ public: class AddOperator : public ConcreteOperator<OverflowingBinaryOperator, Instruction::Add> { - ~AddOperator() LLVM_DELETED_FUNCTION; + ~AddOperator(); // DO NOT IMPLEMENT }; class SubOperator : public ConcreteOperator<OverflowingBinaryOperator, Instruction::Sub> { - ~SubOperator() LLVM_DELETED_FUNCTION; + ~SubOperator(); // DO NOT IMPLEMENT }; class MulOperator : public ConcreteOperator<OverflowingBinaryOperator, Instruction::Mul> { - ~MulOperator() LLVM_DELETED_FUNCTION; + ~MulOperator(); // DO NOT IMPLEMENT }; class ShlOperator : public ConcreteOperator<OverflowingBinaryOperator, Instruction::Shl> { - ~ShlOperator() LLVM_DELETED_FUNCTION; + ~ShlOperator(); // DO NOT IMPLEMENT }; - + class SDivOperator : public ConcreteOperator<PossiblyExactOperator, Instruction::SDiv> { - ~SDivOperator() LLVM_DELETED_FUNCTION; + ~SDivOperator(); // DO NOT IMPLEMENT }; class UDivOperator : public ConcreteOperator<PossiblyExactOperator, Instruction::UDiv> { - ~UDivOperator() LLVM_DELETED_FUNCTION; + ~UDivOperator(); // DO NOT IMPLEMENT }; class AShrOperator : public ConcreteOperator<PossiblyExactOperator, Instruction::AShr> { - ~AShrOperator() LLVM_DELETED_FUNCTION; + ~AShrOperator(); // DO NOT IMPLEMENT }; class LShrOperator : public ConcreteOperator<PossiblyExactOperator, Instruction::LShr> { - ~LShrOperator() LLVM_DELETED_FUNCTION; + ~LShrOperator(); // DO NOT IMPLEMENT }; - - - + + + class GEPOperator : public ConcreteOperator<Operator, Instruction::GetElementPtr> { - ~GEPOperator() LLVM_DELETED_FUNCTION; + ~GEPOperator(); // DO NOT IMPLEMENT enum { IsInBounds = (1 << 0) diff --git a/include/llvm/Target/TargetMachine.h b/include/llvm/Target/TargetMachine.h index 18e589e2bc..988916f9d9 100644 --- a/include/llvm/Target/TargetMachine.h +++ b/include/llvm/Target/TargetMachine.h @@ -17,8 +17,6 @@ #include "llvm/Pass.h" #include "llvm/Support/CodeGen.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/TargetTransformInfo.h" -#include "llvm/Target/TargetTransformImpl.h" #include "llvm/ADT/StringRef.h" #include <cassert> #include <string> @@ -109,10 +107,6 @@ public: virtual const TargetLowering *getTargetLowering() const { return 0; } virtual const TargetSelectionDAGInfo *getSelectionDAGInfo() const{ return 0; } virtual const DataLayout *getDataLayout() const { return 0; } - virtual const ScalarTargetTransformInfo* - getScalarTargetTransformInfo() const { return 0; } - virtual const VectorTargetTransformInfo* - getVectorTargetTransformInfo() const { return 0; } /// getMCAsmInfo - Return target specific asm information. /// diff --git a/include/llvm/Target/TargetTransformImpl.h b/include/llvm/Target/TargetTransformImpl.h deleted file mode 100644 index 7648f4f935..0000000000 --- a/include/llvm/Target/TargetTransformImpl.h +++ /dev/null @@ -1,54 +0,0 @@ -//=- llvm/Target/TargetTransformImpl.h - Target Loop Trans Info----*- C++ -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the target-specific implementations of the -// TargetTransform interfaces. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TARGET_TARGET_TRANSFORMATION_IMPL_H -#define LLVM_TARGET_TARGET_TRANSFORMATION_IMPL_H - -#include "llvm/TargetTransformInfo.h" - -namespace llvm { - -class TargetLowering; - -/// ScalarTargetTransformInfo - This is a default implementation for the -/// ScalarTargetTransformInfo interface. Different targets can implement -/// this interface differently. -class ScalarTargetTransformImpl : public ScalarTargetTransformInfo { -private: - const TargetLowering *TLI; - -public: - /// Ctor - explicit ScalarTargetTransformImpl(const TargetLowering *TL) : TLI(TL) {} - - virtual bool isLegalAddImmediate(int64_t imm) const; - - virtual bool isLegalICmpImmediate(int64_t imm) const; - - virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const; - - virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const; - - virtual bool isTypeLegal(Type *Ty) const; - - virtual unsigned getJumpBufAlignment() const; - - virtual unsigned getJumpBufSize() const; -}; - -class VectorTargetTransformImpl : public VectorTargetTransformInfo { }; - -} // end llvm namespace - -#endif diff --git a/include/llvm/TargetTransformInfo.h b/include/llvm/TargetTransformInfo.h deleted file mode 100644 index 82fc14dbd7..0000000000 --- a/include/llvm/TargetTransformInfo.h +++ /dev/null @@ -1,128 +0,0 @@ -//===- llvm/Transforms/TargetTransformInfo.h --------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass exposes codegen information to IR-level passes. Every -// transformation that uses codegen information is broken into three parts: -// 1. The IR-level analysis pass. -// 2. The IR-level transformation interface which provides the needed -// information. -// 3. Codegen-level implementation which uses target-specific hooks. -// -// This file defines #2, which is the interface that IR-level transformations -// use for querying the codegen. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TRANSFORMS_TARGET_TRANSFORM_INTERFACE -#define LLVM_TRANSFORMS_TARGET_TRANSFORM_INTERFACE - -#include "llvm/Pass.h" -#include "llvm/AddressingMode.h" -#include "llvm/Support/DataTypes.h" -#include "llvm/Type.h" - -namespace llvm { - -class ScalarTargetTransformInfo; -class VectorTargetTransformInfo; - -/// TargetTransformInfo - This pass provides access to the codegen -/// interfaces that are needed for IR-level transformations. -class TargetTransformInfo : public ImmutablePass { -private: - const ScalarTargetTransformInfo *STTI; - const VectorTargetTransformInfo *VTTI; -public: - /// Default ctor. - /// - /// @note This has to exist, because this is a pass, but it should never be - /// used. - TargetTransformInfo(); - - explicit TargetTransformInfo(const ScalarTargetTransformInfo* S, - const VectorTargetTransformInfo *V) - : ImmutablePass(ID), STTI(S), VTTI(V) { - initializeTargetTransformInfoPass(*PassRegistry::getPassRegistry()); - } - - TargetTransformInfo(const TargetTransformInfo &T) : - ImmutablePass(ID), STTI(T.STTI), VTTI(T.VTTI) { } - - const ScalarTargetTransformInfo* getScalarTargetTransformInfo() { - return STTI; - } - const VectorTargetTransformInfo* getVectorTargetTransformInfo() { - return VTTI; - } - - /// Pass identification, replacement for typeid. - static char ID; -}; - -// ---------------------------------------------------------------------------// -// The classes below are inherited and implemented by target-specific classes -// in the codegen. -// ---------------------------------------------------------------------------// - -/// ScalarTargetTransformInfo - This interface is used by IR-level passes -/// that need target-dependent information for generic scalar transformations. -/// LSR, and LowerInvoke use this interface. -class ScalarTargetTransformInfo { -public: - virtual ~ScalarTargetTransformInfo() {} - - /// isLegalAddImmediate - Return true if the specified immediate is legal - /// add immediate, that is the target has add instructions which can add - /// a register with the immediate without having to materialize the - /// immediate into a register. - virtual bool isLegalAddImmediate(int64_t) const { - return false; - } - /// isLegalICmpImmediate - Return true if the specified immediate is legal - /// icmp immediate, that is the target has icmp instructions which can compare - /// a register against the immediate without having to materialize the - /// immediate into a register. - virtual bool isLegalICmpImmediate(int64_t) const { - return false; - } - /// isLegalAddressingMode - Return true if the addressing mode represented by - /// AM is legal for this target, for a load/store of the specified type. - /// The type may be VoidTy, in which case only return true if the addressing - /// mode is legal for a load/store of any legal type. - /// TODO: Handle pre/postinc as well. - virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const { - return false; - } - /// isTruncateFree - Return true if it's free to truncate a value of - /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in - /// register EAX to i16 by referencing its sub-register AX. - virtual bool isTruncateFree(Type * /*Ty1*/, Type * /*Ty2*/) const { - return false; - } - /// Is this type legal. - virtual bool isTypeLegal(Type *Ty) const { - return false; - } - /// getJumpBufAlignment - returns the target's jmp_buf alignment in bytes - virtual unsigned getJumpBufAlignment() const { - return 0; - } - /// getJumpBufSize - returns the target's jmp_buf size in bytes. - virtual unsigned getJumpBufSize() const { - return 0; - } -}; - -class VectorTargetTransformInfo { - // TODO: define an interface for VectorTargetTransformInfo. -}; - -} // End llvm namespace - -#endif diff --git a/include/llvm/Transforms/IPO.h b/include/llvm/Transforms/IPO.h index 962cb63758..08d3bbd941 100644 --- a/include/llvm/Transforms/IPO.h +++ b/include/llvm/Transforms/IPO.h @@ -198,6 +198,11 @@ ModulePass *createPartialInliningPass(); // ModulePass *createMetaRenamerPass(); +//===----------------------------------------------------------------------===// +/// createBarrierNoopPass - This pass is purely a module pass barrier in a pass +/// manager. +ModulePass *createBarrierNoopPass(); + } // End llvm namespace #endif diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h index 12239adb63..1ddca844c9 100644 --- a/include/llvm/Transforms/Scalar.h +++ b/include/llvm/Transforms/Scalar.h @@ -119,7 +119,7 @@ Pass *createLICMPass(); // optional parameter used to consult the target machine whether certain // transformations are profitable. // -Pass *createLoopStrengthReducePass(); +Pass *createLoopStrengthReducePass(const TargetLowering *TLI = 0); Pass *createGlobalMergePass(const TargetLowering *TLI = 0); @@ -249,8 +249,9 @@ extern char &LowerSwitchID; // purpose "my LLVM-to-LLVM pass doesn't support the invoke instruction yet" // lowering pass. // -FunctionPass *createLowerInvokePass(); -FunctionPass *createLowerInvokePass(bool useExpensiveEHSupport); +FunctionPass *createLowerInvokePass(const TargetLowering *TLI = 0); +FunctionPass *createLowerInvokePass(const TargetLowering *TLI, + bool useExpensiveEHSupport); extern char &LowerInvokePassID; //===----------------------------------------------------------------------===// diff --git a/include/llvm/Transforms/Vectorize.h b/include/llvm/Transforms/Vectorize.h index 1e49a9c01e..41e53a83e2 100644 --- a/include/llvm/Transforms/Vectorize.h +++ b/include/llvm/Transforms/Vectorize.h @@ -107,6 +107,12 @@ BasicBlockPass * createBBVectorizePass(const VectorizeConfig &C = VectorizeConfig()); //===----------------------------------------------------------------------===// +// +// LoopVectorize - Create a loop vectorization pass. +// +Pass * createLoopVectorizePass(); + +//===----------------------------------------------------------------------===// /// @brief Vectorize the BasicBlock. /// /// @param BB The BasicBlock to be vectorized diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index 5c2a49e767..5e05f4c8ca 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -19,8 +19,8 @@ #include "llvm/LLVMContext.h" #include "llvm/Support/Debug.h" #include "llvm/DataLayout.h" +#include "llvm/Target/TargetLowering.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/TargetTransformInfo.h" using namespace llvm; @@ -1599,15 +1599,15 @@ static bool width_descending(Value *lhs, Value *rhs) { /// This does not depend on any SCEVExpander state but should be used in /// the same context that SCEVExpander is used. unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, - SmallVectorImpl<WeakVH> &DeadInsts, - const ScalarTargetTransformInfo *STTI) { + SmallVectorImpl<WeakVH> &DeadInsts, + const TargetLowering *TLI) { // Find integer phis in order of increasing width. SmallVector<PHINode*, 8> Phis; for (BasicBlock::iterator I = L->getHeader()->begin(); PHINode *Phi = dyn_cast<PHINode>(I); ++I) { Phis.push_back(Phi); } - if (STTI) + if (TLI) std::sort(Phis.begin(), Phis.end(), width_descending); unsigned NumElim = 0; @@ -1624,8 +1624,8 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(Phi)]; if (!OrigPhiRef) { OrigPhiRef = Phi; - if (Phi->getType()->isIntegerTy() && STTI && - STTI->isTruncateFree(Phi->getType(), Phis.back()->getType())) { + if (Phi->getType()->isIntegerTy() && TLI + && TLI->isTruncateFree(Phi->getType(), Phis.back()->getType())) { // This phi can be freely truncated to the narrowest phi type. Map the // truncated expression to it so it will be reused for narrow types. const SCEV *TruncExpr = diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp index 2311842671..ed78f19421 100644 --- a/lib/CodeGen/ExecutionDepsFix.cpp +++ b/lib/CodeGen/ExecutionDepsFix.cpp @@ -657,7 +657,7 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { bool anyregs = false; for (TargetRegisterClass::const_iterator I = RC->begin(), E = RC->end(); I != E; ++I) - if (MF->getRegInfo().isPhysRegOrOverlapUsed(*I)) { + if (MF->getRegInfo().isPhysRegUsed(*I)) { anyregs = true; break; } diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index ae7c15be15..95d7a7dd68 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -21,7 +21,7 @@ MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) : TRI(&TRI), IsSSA(true), TracksLiveness(true) { VRegInfo.reserve(256); RegAllocHints.reserve(256); - UsedPhysRegs.resize(TRI.getNumRegs()); + UsedRegUnits.resize(TRI.getNumRegUnits()); UsedPhysRegMask.resize(TRI.getNumRegs()); // Create the physreg use/def lists. @@ -32,7 +32,7 @@ MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) MachineRegisterInfo::~MachineRegisterInfo() { #ifndef NDEBUG clearVirtRegs(); - for (unsigned i = 0, e = UsedPhysRegs.size(); i != e; ++i) + for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) assert(!PhysRegUseDefLists[i] && "PhysRegUseDefLists has entries after all instructions are deleted"); #endif diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index deca31263c..7c7d2c8045 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -369,7 +369,7 @@ void TargetPassConfig::addIRPasses() { // Run loop strength reduction before anything else. if (getOptLevel() != CodeGenOpt::None && !DisableLSR) { - addPass(createLoopStrengthReducePass()); + addPass(createLoopStrengthReducePass(getTargetLowering())); if (PrintLSR) addPass(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs())); } @@ -399,7 +399,7 @@ void TargetPassConfig::addPassesToHandleExceptions() { addPass(createDwarfEHPass(TM)); break; case ExceptionHandling::None: - addPass(createLowerInvokePass()); + addPass(createLowerInvokePass(TM->getTargetLowering())); // The lower invoke pass may create unreachable code. Remove it. addPass(createUnreachableBlockEliminationPass()); diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 86df0a127b..77554d691c 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -227,7 +227,7 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { std::vector<CalleeSavedInfo> CSI; for (unsigned i = 0; CSRegs[i]; ++i) { unsigned Reg = CSRegs[i]; - if (Fn.getRegInfo().isPhysRegOrOverlapUsed(Reg)) { + if (Fn.getRegInfo().isPhysRegUsed(Reg)) { // If the reg is modified, save it! CSI.push_back(CalleeSavedInfo(Reg)); } diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index e096240e04..d6ed36ef95 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -113,9 +113,11 @@ namespace { // PhysRegState - One of the RegState enums, or a virtreg. std::vector<unsigned> PhysRegState; - // UsedInInstr - BitVector of physregs that are used in the current - // instruction, and so cannot be allocated. - BitVector UsedInInstr; + typedef SparseSet<unsigned> UsedInInstrSet; + + // UsedInInstr - Set of physregs that are used in the current instruction, + // and so cannot be allocated. + UsedInInstrSet UsedInInstr; // SkippedInstrs - Descriptors of instructions whose clobber list was // ignored because all registers were spilled. It is still necessary to @@ -340,7 +342,7 @@ void RAFast::usePhysReg(MachineOperand &MO) { PhysRegState[PhysReg] = regFree; // Fall through case regFree: - UsedInInstr.set(PhysReg); + UsedInInstr.insert(PhysReg); MO.setIsKill(); return; default: @@ -360,13 +362,13 @@ void RAFast::usePhysReg(MachineOperand &MO) { "Instruction is not using a subregister of a reserved register"); // Leave the superregister in the working set. PhysRegState[Alias] = regFree; - UsedInInstr.set(Alias); + UsedInInstr.insert(Alias); MO.getParent()->addRegisterKilled(Alias, TRI, true); return; case regFree: if (TRI->isSuperRegister(PhysReg, Alias)) { // Leave the superregister in the working set. - UsedInInstr.set(Alias); + UsedInInstr.insert(Alias); MO.getParent()->addRegisterKilled(Alias, TRI, true); return; } @@ -380,7 +382,7 @@ void RAFast::usePhysReg(MachineOperand &MO) { // All aliases are disabled, bring register into working set. PhysRegState[PhysReg] = regFree; - UsedInInstr.set(PhysReg); + UsedInInstr.insert(PhysReg); MO.setIsKill(); } @@ -389,7 +391,7 @@ void RAFast::usePhysReg(MachineOperand &MO) { /// reserved instead of allocated. void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg, RegState NewState) { - UsedInInstr.set(PhysReg); + UsedInInstr.insert(PhysReg); switch (unsigned VirtReg = PhysRegState[PhysReg]) { case regDisabled: break; @@ -429,7 +431,7 @@ void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg, // can be allocated directly. // Returns spillImpossible when PhysReg or an alias can't be spilled. unsigned RAFast::calcSpillCost(unsigned PhysReg) const { - if (UsedInInstr.test(PhysReg)) { + if (UsedInInstr.count(PhysReg)) { DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is already used in instr.\n"); return spillImpossible; } @@ -454,7 +456,7 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const { unsigned Cost = 0; for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) { unsigned Alias = *AI; - if (UsedInInstr.test(Alias)) + if (UsedInInstr.count(Alias)) return spillImpossible; switch (unsigned VirtReg = PhysRegState[Alias]) { case regDisabled: @@ -530,7 +532,7 @@ RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI, // First try to find a completely free register. for (ArrayRef<unsigned>::iterator I = AO.begin(), E = AO.end(); I != E; ++I) { unsigned PhysReg = *I; - if (PhysRegState[PhysReg] == regFree && !UsedInInstr.test(PhysReg)) { + if (PhysRegState[PhysReg] == regFree && !UsedInInstr.count(PhysReg)) { assignVirtToPhysReg(*LRI, PhysReg); return LRI; } @@ -596,7 +598,7 @@ RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum, LRI->LastUse = MI; LRI->LastOpNum = OpNum; LRI->Dirty = true; - UsedInInstr.set(LRI->PhysReg); + UsedInInstr.insert(LRI->PhysReg); return LRI; } @@ -646,7 +648,7 @@ RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum, assert(LRI->PhysReg && "Register not assigned"); LRI->LastUse = MI; LRI->LastOpNum = OpNum; - UsedInInstr.set(LRI->PhysReg); + UsedInInstr.insert(LRI->PhysReg); return LRI; } @@ -708,7 +710,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI, unsigned Reg = MO.getReg(); if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { - UsedInInstr.set(*AI); + UsedInInstr.insert(*AI); if (ThroughRegs.count(PhysRegState[*AI])) definePhysReg(MI, *AI, regFree); } @@ -756,7 +758,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI, } // Restore UsedInInstr to a state usable for allocating normal virtual uses. - UsedInInstr.reset(); + UsedInInstr.clear(); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue; @@ -764,12 +766,12 @@ void RAFast::handleThroughOperands(MachineInstr *MI, if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; DEBUG(dbgs() << "\tSetting " << PrintReg(Reg, TRI) << " as used in instr\n"); - UsedInInstr.set(Reg); + UsedInInstr.insert(Reg); } // Also mark PartialDefs as used to avoid reallocation. for (unsigned i = 0, e = PartialDefs.size(); i != e; ++i) - UsedInInstr.set(PartialDefs[i]); + UsedInInstr.insert(PartialDefs[i]); } /// addRetOperand - ensure that a return instruction has an operand for each @@ -942,7 +944,7 @@ void RAFast::AllocateBasicBlock() { } // Track registers used by instruction. - UsedInInstr.reset(); + UsedInInstr.clear(); // First scan. // Mark physreg uses and early clobbers as used. @@ -1016,11 +1018,13 @@ void RAFast::AllocateBasicBlock() { } } - MRI->addPhysRegsUsed(UsedInInstr); + for (UsedInInstrSet::iterator + I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I) + MRI->setPhysRegUsed(*I); // Track registers defined by instruction - early clobbers and tied uses at // this point. - UsedInInstr.reset(); + UsedInInstr.clear(); if (hasEarlyClobbers) { for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); @@ -1030,7 +1034,7 @@ void RAFast::AllocateBasicBlock() { // Look for physreg defs and tied uses. if (!MO.isDef() && !MI->isRegTiedToDefOperand(i)) continue; for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - UsedInInstr.set(*AI); + UsedInInstr.insert(*AI); } } @@ -1080,7 +1084,9 @@ void RAFast::AllocateBasicBlock() { killVirtReg(VirtDead[i]); VirtDead.clear(); - MRI->addPhysRegsUsed(UsedInInstr); + for (UsedInInstrSet::iterator + I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I) + MRI->setPhysRegUsed(*I); if (CopyDst && CopyDst == CopySrc && CopyDstSub == CopySrcSub) { DEBUG(dbgs() << "-- coalescing: " << *MI); @@ -1118,7 +1124,8 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) { TII = TM->getInstrInfo(); MRI->freezeReservedRegs(Fn); RegClassInfo.runOnMachineFunction(Fn); - UsedInInstr.resize(TRI->getNumRegs()); + UsedInInstr.clear(); + UsedInInstr.setUniverse(TRI->getNumRegs()); assert(!MRI->isSSA() && "regalloc requires leaving SSA"); diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index f46d09bc86..5b5c0bdf5b 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5308,10 +5308,6 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { if (Reduced.getNode()) return Reduced; } - // fold (trunc (fptoXi x)) -> (smaller fptoXi x) - if ((N0.getOpcode() == ISD::FP_TO_UINT || - N0.getOpcode() == ISD::FP_TO_SINT) && !LegalTypes) - return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, N0.getOperand(0)); // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)), // where ... are all 'undef'. if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) { @@ -8610,8 +8606,8 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { return SDValue(); // Only handle cases where both indexes are constants with the same type. - ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(N->getOperand(1)); - ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(V->getOperand(2)); + ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1)); + ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2)); if (InsIdx && ExtIdx && InsIdx->getValueType(0).getSizeInBits() <= 64 && @@ -8628,6 +8624,21 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { } } + if (V->getOpcode() == ISD::CONCAT_VECTORS) { + // Combine: + // (extract_subvec (concat V1, V2, ...), i) + // Into: + // Vi if possible + for (unsigned i = 0, e = V->getNumOperands(); i != e; ++i) + if (V->getOperand(i).getValueType() != NVT) + return SDValue(); + unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + unsigned NumElems = NVT.getVectorNumElements(); + assert((Idx % NumElems) == 0 && + "IDX in concat is not a multiple of the result vector length."); + return V->getOperand(Idx / NumElems); + } + return SDValue(); } @@ -9064,6 +9075,10 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) || (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode))) return false; + // The loads must not depend on one another. + if (LLD->isPredecessorOf(RLD) || + RLD->isPredecessorOf(LLD)) + return false; Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(), LLD->getBasePtr().getValueType(), TheSelect->getOperand(0), LLD->getBasePtr(), diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 515eff3b25..81e3ff6afe 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -13,6 +13,7 @@ #define DEBUG_TYPE "pre-RA-sched" #include "ScheduleDAGSDNodes.h" +#include "InstrEmitter.h" #include "llvm/InlineAsm.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAGISel.h" @@ -34,6 +35,10 @@ STATISTIC(NumPRCopies, "Number of physical copies"); static RegisterScheduler fastDAGScheduler("fast", "Fast suboptimal list scheduling", createFastDAGScheduler); +static RegisterScheduler + linearizeDAGScheduler("linearize", "Linearize DAG, no scheduling", + createDAGLinearizer); + namespace { /// FastPriorityQueue - A degenerate priority queue that considers @@ -629,6 +634,153 @@ void ScheduleDAGFast::ListScheduleBottomUp() { #endif } + +//===----------------------------------------------------------------------===// +// ScheduleDAGLinearize - No scheduling scheduler, it simply linearize the +// DAG in topological order. +// IMPORTANT: this may not work for targets with phyreg dependency. +// +class ScheduleDAGLinearize : public ScheduleDAGSDNodes { +public: + ScheduleDAGLinearize(MachineFunction &mf) : ScheduleDAGSDNodes(mf) {} + + void Schedule(); + + MachineBasicBlock *EmitSchedule(MachineBasicBlock::iterator &InsertPos); + +private: + std::vector<SDNode*> Sequence; + DenseMap<SDNode*, SDNode*> GluedMap; // Cache glue to its user + + void ScheduleNode(SDNode *N); +}; + +void ScheduleDAGLinearize::ScheduleNode(SDNode *N) { + if (N->getNodeId() != 0) + llvm_unreachable(0); + + if (!N->isMachineOpcode() && + (N->getOpcode() == ISD::EntryToken || isPassiveNode(N))) + // These nodes do not need to be translated into MIs. + return; + + DEBUG(dbgs() << "\n*** Scheduling: "); + DEBUG(N->dump(DAG)); + Sequence.push_back(N); + + unsigned NumOps = N->getNumOperands(); + if (unsigned NumLeft = NumOps) { + SDNode *GluedOpN = 0; + do { + const SDValue &Op = N->getOperand(NumLeft-1); + SDNode *OpN = Op.getNode(); + + if (NumLeft == NumOps && Op.getValueType() == MVT::Glue) { + // Schedule glue operand right above N. + GluedOpN = OpN; + assert(OpN->getNodeId() != 0 && "Glue operand not ready?"); + OpN->setNodeId(0); + ScheduleNode(OpN); + continue; + } + + if (OpN == GluedOpN) + // Glue operand is already scheduled. + continue; + + DenseMap<SDNode*, SDNode*>::iterator DI = GluedMap.find(OpN); + if (DI != GluedMap.end() && DI->second != N) + // Users of glues are counted against the glued users. + OpN = DI->second; + + unsigned Degree = OpN->getNodeId(); + assert(Degree > 0 && "Predecessor over-released!"); + OpN->setNodeId(--Degree); + if (Degree == 0) + ScheduleNode(OpN); + } while (--NumLeft); + } +} + +/// findGluedUser - Find the representative use of a glue value by walking +/// the use chain. +static SDNode *findGluedUser(SDNode *N) { + while (SDNode *Glued = N->getGluedUser()) + N = Glued; + return N; +} + +void ScheduleDAGLinearize::Schedule() { + DEBUG(dbgs() << "********** DAG Linearization **********\n"); + + SmallVector<SDNode*, 8> Glues; + unsigned DAGSize = 0; + for (SelectionDAG::allnodes_iterator I = DAG->allnodes_begin(), + E = DAG->allnodes_end(); I != E; ++I) { + SDNode *N = I; + + // Use node id to record degree. + unsigned Degree = N->use_size(); + N->setNodeId(Degree); + unsigned NumVals = N->getNumValues(); + if (NumVals && N->getValueType(NumVals-1) == MVT::Glue && + N->hasAnyUseOfValue(NumVals-1)) { + SDNode *User = findGluedUser(N); + if (User) { + Glues.push_back(N); + GluedMap.insert(std::make_pair(N, User)); + } + } + + if (N->isMachineOpcode() || + (N->getOpcode() != ISD::EntryToken && !isPassiveNode(N))) + ++DAGSize; + } + + for (unsigned i = 0, e = Glues.size(); i != e; ++i) { + SDNode *Glue = Glues[i]; + SDNode *GUser = GluedMap[Glue]; + unsigned Degree = Glue->getNodeId(); + unsigned UDegree = GUser->getNodeId(); + + // Glue user must be scheduled together with the glue operand. So other + // users of the glue operand must be treated as its users. + SDNode *ImmGUser = Glue->getGluedUser(); + for (SDNode::use_iterator ui = Glue->use_begin(), ue = Glue->use_end(); + ui != ue; ++ui) + if (*ui == ImmGUser) + --Degree; + GUser->setNodeId(UDegree + Degree); + Glue->setNodeId(1); + } + + Sequence.reserve(DAGSize); + ScheduleNode(DAG->getRoot().getNode()); +} + +MachineBasicBlock* +ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) { + InstrEmitter Emitter(BB, InsertPos); + DenseMap<SDValue, unsigned> VRBaseMap; + + DEBUG({ + dbgs() << "\n*** Final schedule ***\n"; + }); + + // FIXME: Handle dbg_values. + unsigned NumNodes = Sequence.size(); + for (unsigned i = 0; i != NumNodes; ++i) { + SDNode *N = Sequence[NumNodes-i-1]; + DEBUG(N->dump(DAG)); + Emitter.EmitNode(N, false, false, VRBaseMap); + } + + DEBUG(dbgs() << '\n'); + + InsertPos = Emitter.getInsertPos(); + return Emitter.getBlock(); +} + //===----------------------------------------------------------------------===// // Public Constructor Functions //===----------------------------------------------------------------------===// @@ -637,3 +789,8 @@ llvm::ScheduleDAGSDNodes * llvm::createFastDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { return new ScheduleDAGFast(*IS->MF); } + +llvm::ScheduleDAGSDNodes * +llvm::createDAGLinearizer(SelectionDAGISel *IS, CodeGenOpt::Level) { + return new ScheduleDAGLinearize(*IS->MF); +} diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 660223a505..714471f559 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -831,8 +831,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { } SmallVector<SDNode *, 4> GluedNodes; - for (SDNode *N = SU->getNode()->getGluedNode(); N; - N = N->getGluedNode()) + for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode()) GluedNodes.push_back(N); while (!GluedNodes.empty()) { SDNode *N = GluedNodes.back(); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 8e7bd82201..907356fd21 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -114,7 +114,8 @@ namespace llvm { /// EmitSchedule - Insert MachineInstrs into the MachineBasicBlock /// according to the order specified in Sequence. /// - MachineBasicBlock *EmitSchedule(MachineBasicBlock::iterator &InsertPos); + virtual MachineBasicBlock* + EmitSchedule(MachineBasicBlock::iterator &InsertPos); virtual void dumpNode(const SUnit *SU) const; diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp index 54d8c8cde7..1cbee843a1 100644 --- a/lib/CodeGen/StackColoring.cpp +++ b/lib/CodeGen/StackColoring.cpp @@ -48,6 +48,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/DebugInfo.h" +#include "llvm/Instructions.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -260,7 +261,7 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) { MarkersFound++; - const Value *Allocation = MFI->getObjectAllocation(Slot); + const AllocaInst *Allocation = MFI->getObjectAllocation(Slot); if (Allocation) { DEBUG(dbgs()<<"Found a lifetime marker for slot #"<<Slot<< " with allocation: "<< Allocation->getName()<<"\n"); @@ -480,11 +481,11 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) { } // Keep a list of *allocas* which need to be remapped. - DenseMap<const Value*, const Value*> Allocas; + DenseMap<const AllocaInst*, const AllocaInst*> Allocas; for (DenseMap<int, int>::iterator it = SlotRemap.begin(), e = SlotRemap.end(); it != e; ++it) { - const Value *From = MFI->getObjectAllocation(it->first); - const Value *To = MFI->getObjectAllocation(it->second); + const AllocaInst *From = MFI->getObjectAllocation(it->first); + const AllocaInst *To = MFI->getObjectAllocation(it->second); assert(To && From && "Invalid allocation object"); Allocas[From] = To; } @@ -514,10 +515,17 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) { V = GetUnderlyingObject(V); // If we did not find one, or if the one that we found is not in our // map, then move on. - if (!V || !Allocas.count(V)) + if (!V || !isa<AllocaInst>(V)) { + // Clear mem operand since we don't know for sure that it doesn't + // alias a merged alloca. + MMO->setValue(0); + continue; + } + const AllocaInst *AI= cast<AllocaInst>(V); + if (!Allocas.count(AI)) continue; - MMO->setValue(Allocas[V]); + MMO->setValue(Allocas[AI]); FixedMemOp++; } diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp index 7a6e2604d7..6a096a16c4 100644 --- a/lib/CodeGen/TargetSchedule.cpp +++ b/lib/CodeGen/TargetSchedule.cpp @@ -58,6 +58,14 @@ unsigned TargetSchedModel::getNumMicroOps(MachineInstr *MI) const { return MI->isTransient() ? 0 : 1; } +// The machine model may explicitly specify an invalid latency, which +// effectively means infinite latency. Since users of the TargetSchedule API +// don't know how to handle this, we convert it to a very large latency that is +// easy to distinguish when debugging the DAG but won't induce overflow. +static unsigned convertLatency(int Cycles) { + return Cycles >= 0 ? Cycles : 1000; +} + /// If we can determine the operand latency from the def only, without machine /// model or itinerary lookup, do so. Otherwise return -1. int TargetSchedModel::getDefLatency(const MachineInstr *DefMI, @@ -178,7 +186,7 @@ unsigned TargetSchedModel::computeOperandLatency( const MCWriteLatencyEntry *WLEntry = STI->getWriteLatencyEntry(SCDesc, DefIdx); unsigned WriteID = WLEntry->WriteResourceID; - unsigned Latency = WLEntry->Cycles; + unsigned Latency = convertLatency(WLEntry->Cycles); if (!UseMI) return Latency; @@ -219,7 +227,7 @@ unsigned TargetSchedModel::computeInstrLatency(const MachineInstr *MI) const { // Lookup the definition's write latency in SubtargetInfo. const MCWriteLatencyEntry *WLEntry = STI->getWriteLatencyEntry(SCDesc, DefIdx); - Latency = std::max(Latency, WLEntry->Cycles); + Latency = std::max(Latency, convertLatency(WLEntry->Cycles)); } return Latency; } diff --git a/lib/DebugInfo/DWARFContext.cpp b/lib/DebugInfo/DWARFContext.cpp index 241f55eaed..afd614cc35 100644 --- a/lib/DebugInfo/DWARFContext.cpp +++ b/lib/DebugInfo/DWARFContext.cpp @@ -17,6 +17,8 @@ using namespace llvm; using namespace dwarf; +typedef DWARFDebugLine::LineTable DWARFLineTable; + void DWARFContext::dump(raw_ostream &OS) { OS << ".debug_abbrev contents:\n"; getDebugAbbrev()->dump(OS); @@ -94,7 +96,7 @@ const DWARFDebugAranges *DWARFContext::getDebugAranges() { return Aranges.get(); } -const DWARFDebugLine::LineTable * +const DWARFLineTable * DWARFContext::getLineTableForCompileUnit(DWARFCompileUnit *cu) { if (!Line) Line.reset(new DWARFDebugLine()); @@ -106,7 +108,7 @@ DWARFContext::getLineTableForCompileUnit(DWARFCompileUnit *cu) { return 0; // No line table for this compile unit. // See if the line table is cached. - if (const DWARFDebugLine::LineTable *lt = Line->getLineTable(stmtOffset)) + if (const DWARFLineTable *lt = Line->getLineTable(stmtOffset)) return lt; // We have to parse it first. @@ -117,11 +119,11 @@ DWARFContext::getLineTableForCompileUnit(DWARFCompileUnit *cu) { void DWARFContext::parseCompileUnits() { uint32_t offset = 0; - const DataExtractor &debug_info_data = DataExtractor(getInfoSection(), - isLittleEndian(), 0); - while (debug_info_data.isValidOffset(offset)) { + const DataExtractor &DIData = DataExtractor(getInfoSection(), + isLittleEndian(), 0); + while (DIData.isValidOffset(offset)) { CUs.push_back(DWARFCompileUnit(*this)); - if (!CUs.back().extract(debug_info_data, &offset)) { + if (!CUs.back().extract(DIData, &offset)) { CUs.pop_back(); break; } @@ -163,9 +165,11 @@ DWARFCompileUnit *DWARFContext::getCompileUnitForAddress(uint64_t Address) { return getCompileUnitForOffset(CUOffset); } -static bool getFileNameForCompileUnit( - DWARFCompileUnit *CU, const DWARFDebugLine::LineTable *LineTable, - uint64_t FileIndex, bool NeedsAbsoluteFilePath, std::string &FileName) { +static bool getFileNameForCompileUnit(DWARFCompileUnit *CU, + const DWARFLineTable *LineTable, + uint64_t FileIndex, + bool NeedsAbsoluteFilePath, + std::string &FileName) { if (CU == 0 || LineTable == 0 || !LineTable->getFileNameByIndex(FileIndex, NeedsAbsoluteFilePath, @@ -183,10 +187,12 @@ static bool getFileNameForCompileUnit( return true; } -static bool getFileLineInfoForCompileUnit( - DWARFCompileUnit *CU, const DWARFDebugLine::LineTable *LineTable, - uint64_t Address, bool NeedsAbsoluteFilePath, std::string &FileName, - uint32_t &Line, uint32_t &Column) { +static bool getFileLineInfoForCompileUnit(DWARFCompileUnit *CU, + const DWARFLineTable *LineTable, + uint64_t Address, + bool NeedsAbsoluteFilePath, + std::string &FileName, + uint32_t &Line, uint32_t &Column) { if (CU == 0 || LineTable == 0) return false; // Get the index of row we're looking for in the line table. @@ -225,8 +231,7 @@ DILineInfo DWARFContext::getLineInfoForAddress(uint64_t Address, } } if (Specifier.needs(DILineInfoSpecifier::FileLineInfo)) { - const DWARFDebugLine::LineTable *LineTable = - getLineTableForCompileUnit(CU); + const DWARFLineTable *LineTable = getLineTableForCompileUnit(CU); const bool NeedsAbsoluteFilePath = Specifier.needs(DILineInfoSpecifier::AbsoluteFilePath); getFileLineInfoForCompileUnit(CU, LineTable, Address, @@ -250,7 +255,7 @@ DIInliningInfo DWARFContext::getInliningInfoForAddress(uint64_t Address, DIInliningInfo InliningInfo; uint32_t CallFile = 0, CallLine = 0, CallColumn = 0; - const DWARFDebugLine::LineTable *LineTable = 0; + const DWARFLineTable *LineTable = 0; for (uint32_t i = 0, n = InlinedChain.size(); i != n; i++) { const DWARFDebugInfoEntryMinimal &FunctionDIE = InlinedChain[i]; std::string FileName = "<invalid>"; diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index f58f75bf0e..f5b0d08337 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -19,6 +19,8 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCParser/AsmCond.h" #include "llvm/MC/MCParser/AsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" @@ -35,6 +37,8 @@ #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" #include <cctype> +#include <set> +#include <string> #include <vector> using namespace llvm; @@ -139,7 +143,8 @@ private: /// ParsedOperands - The parsed operands from the last parsed statement. SmallVector<MCParsedAsmOperand*, 8> ParsedOperands; - /// Opcode - The opcode from the last parsed instruction. + /// Opcode - The opcode from the last parsed instruction. This is MS-style + /// inline asm specific. unsigned Opcode; public: @@ -180,21 +185,17 @@ public: virtual const AsmToken &Lex(); - bool ParseStatement(); void setParsingInlineAsm(bool V) { ParsingInlineAsm = V; } bool isParsingInlineAsm() { return ParsingInlineAsm; } - unsigned getNumParsedOperands() { return ParsedOperands.size(); } - MCParsedAsmOperand &getParsedOperand(unsigned OpNum) { - assert (ParsedOperands.size() > OpNum); - return *ParsedOperands[OpNum]; - } - void freeParsedOperands() { - for (unsigned i = 0, e = ParsedOperands.size(); i != e; ++i) - delete ParsedOperands[i]; - ParsedOperands.clear(); - } - bool isInstruction() { return Opcode != (unsigned)~0x0; } - unsigned getOpcode() { return Opcode; } + + bool ParseMSInlineAsm(void *AsmLoc, std::string &AsmString, + unsigned &NumOutputs, unsigned &NumInputs, + SmallVectorImpl<void *> &OpDecls, + SmallVectorImpl<std::string> &Constraints, + SmallVectorImpl<std::string> &Clobbers, + const MCInstrInfo *MII, + const MCInstPrinter *IP, + MCAsmParserSemaCallback &SI); bool ParseExpression(const MCExpr *&Res); virtual bool ParseExpression(const MCExpr *&Res, SMLoc &EndLoc); @@ -206,6 +207,7 @@ public: private: void CheckForValidSection(); + bool ParseStatement(); void EatToEndOfLine(); bool ParseCppHashLineFilenameComment(const SMLoc &L); @@ -318,6 +320,10 @@ private: bool ParseDirectiveIrp(SMLoc DirectiveLoc); // ".irp" bool ParseDirectiveIrpc(SMLoc DirectiveLoc); // ".irpc" bool ParseDirectiveEndr(SMLoc DirectiveLoc); // ".endr" + + // MS-style inline assembly parsing. + bool isInstruction() { return Opcode != (unsigned)~0x0; } + unsigned getOpcode() { return Opcode; } }; /// \brief Generic implementations of directive handling, etc. which is shared @@ -1403,10 +1409,13 @@ bool AsmParser::ParseStatement() { ParsingInlineAsm); } - // Free any parsed operands. If parsing ms-style inline assembly it is the - // responsibility of the caller (i.e., clang) to free the parsed operands. - if (!ParsingInlineAsm) - freeParsedOperands(); + // Free any parsed operands. If parsing ms-style inline assembly the operands + // will be freed by the ParseMSInlineAsm() function. + if (!ParsingInlineAsm) { + for (unsigned i = 0, e = ParsedOperands.size(); i != e; ++i) + delete ParsedOperands[i]; + ParsedOperands.clear(); + } // Don't skip the rest of the line, the instruction parser is responsible for // that. @@ -3631,6 +3640,171 @@ bool AsmParser::ParseDirectiveEndr(SMLoc DirectiveLoc) { return false; } +namespace { +enum AsmOpRewriteKind { + AOK_Imm, + AOK_Input, + AOK_Output +}; + +struct AsmOpRewrite { + AsmOpRewriteKind Kind; + SMLoc Loc; + unsigned Len; + +public: + AsmOpRewrite(AsmOpRewriteKind kind, SMLoc loc, unsigned len) + : Kind(kind), Loc(loc), Len(len) { } +}; +} + +bool AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString, + unsigned &NumOutputs, unsigned &NumInputs, + SmallVectorImpl<void *> &OpDecls, + SmallVectorImpl<std::string> &Constraints, + SmallVectorImpl<std::string> &Clobbers, + const MCInstrInfo *MII, + const MCInstPrinter *IP, + MCAsmParserSemaCallback &SI) { + SmallVector<void*, 4> InputDecls; + SmallVector<void*, 4> OutputDecls; + SmallVector<std::string, 4> InputConstraints; + SmallVector<std::string, 4> OutputConstraints; + std::set<std::string> ClobberRegs; + + SmallVector<struct AsmOpRewrite, 4> AsmStrRewrites; + + // Prime the lexer. + Lex(); + + // While we have input, parse each statement. + unsigned InputIdx = 0; + unsigned OutputIdx = 0; + while (getLexer().isNot(AsmToken::Eof)) { + if (ParseStatement()) return true; + + if (isInstruction()) { + const MCInstrDesc &Desc = MII->get(getOpcode()); + + // Build the list of clobbers, outputs and inputs. + for (unsigned i = 1, e = ParsedOperands.size(); i != e; ++i) { + MCParsedAsmOperand *Operand = ParsedOperands[i]; + + // Immediate. + if (Operand->isImm()) { + AsmStrRewrites.push_back(AsmOpRewrite(AOK_Imm, + Operand->getStartLoc(), + Operand->getNameLen())); + continue; + } + + // Register operand. + if (Operand->isReg()) { + unsigned NumDefs = Desc.getNumDefs(); + // Clobber. + if (NumDefs && Operand->getMCOperandNum() < NumDefs) { + std::string Reg; + raw_string_ostream OS(Reg); + IP->printRegName(OS, Operand->getReg()); + ClobberRegs.insert(StringRef(OS.str())); + } + continue; + } + + // Expr/Input or Output. + unsigned Size; + void *OpDecl = SI.LookupInlineAsmIdentifier(Operand->getName(), AsmLoc, + Size); + if (OpDecl) { + bool isOutput = (i == 1) && Desc.mayStore(); + if (isOutput) { + std::string Constraint = "="; + ++InputIdx; + OutputDecls.push_back(OpDecl); + Constraint += Operand->getConstraint().str(); + OutputConstraints.push_back(Constraint); + AsmStrRewrites.push_back(AsmOpRewrite(AOK_Output, + Operand->getStartLoc(), + Operand->getNameLen())); + } else { + InputDecls.push_back(OpDecl); + InputConstraints.push_back(Operand->getConstraint().str()); + AsmStrRewrites.push_back(AsmOpRewrite(AOK_Input, + Operand->getStartLoc(), + Operand->getNameLen())); + } + } + } + // Free any parsed operands. + for (unsigned i = 0, e = ParsedOperands.size(); i != e; ++i) + delete ParsedOperands[i]; + ParsedOperands.clear(); + } + } + + // Set the number of Outputs and Inputs. + NumOutputs = OutputDecls.size(); + NumInputs = InputDecls.size(); + + // Set the unique clobbers. + for (std::set<std::string>::iterator I = ClobberRegs.begin(), + E = ClobberRegs.end(); I != E; ++I) + Clobbers.push_back(*I); + + // Merge the various outputs and inputs. Output are expected first. + if (NumOutputs || NumInputs) { + unsigned NumExprs = NumOutputs + NumInputs; + OpDecls.resize(NumExprs); + Constraints.resize(NumExprs); + for (unsigned i = 0; i < NumOutputs; ++i) { + OpDecls[i] = OutputDecls[i]; + Constraints[i] = OutputConstraints[i]; + } + for (unsigned i = 0, j = NumOutputs; i < NumInputs; ++i, ++j) { + OpDecls[j] = InputDecls[i]; + Constraints[j] = InputConstraints[i]; + } + } + + // Build the IR assembly string. + std::string AsmStringIR; + raw_string_ostream OS(AsmStringIR); + const char *Start = SrcMgr.getMemoryBuffer(0)->getBufferStart(); + for (SmallVectorImpl<struct AsmOpRewrite>::iterator + I = AsmStrRewrites.begin(), E = AsmStrRewrites.end(); I != E; ++I) { + const char *Loc = (*I).Loc.getPointer(); + + // Emit everything up to the immediate/expression. + OS << StringRef(Start, Loc - Start); + + // Rewrite expressions in $N notation. + switch ((*I).Kind) { + case AOK_Imm: + OS << Twine("$$") + StringRef(Loc, (*I).Len); + break; + case AOK_Input: + OS << '$'; + OS << InputIdx++; + break; + case AOK_Output: + OS << '$'; + OS << OutputIdx++; + break; + } + + // Skip the original expression. + Start = Loc + (*I).Len; + } + + // Emit the remainder of the asm string. + const char *AsmEnd = SrcMgr.getMemoryBuffer(0)->getBufferEnd(); + if (Start != AsmEnd) + OS << StringRef(Start, AsmEnd - Start); + + AsmString = OS.str(); + return false; +} + /// \brief Create an MCAsmParser instance. MCAsmParser *llvm::createMCAsmParser(SourceMgr &SM, MCContext &C, MCStreamer &Out, diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc index 6d874ea0d0..264fa5dbde 100644 --- a/lib/Support/Unix/Signals.inc +++ b/lib/Support/Unix/Signals.inc @@ -125,17 +125,29 @@ static void UnregisterHandlers() { /// NB: This must be an async signal safe function. It cannot allocate or free /// memory, even in debug builds. static void RemoveFilesToRemove() { - // Note: avoid iterators in case of debug iterators that allocate or release + // We avoid iterators in case of debug iterators that allocate or release // memory. for (unsigned i = 0, e = FilesToRemove.size(); i != e; ++i) { - // Note that we don't want to use any external code here, and we don't care - // about errors. We're going to try as hard as we can as often as we need - // to to make these files go away. If these aren't files, too bad. - // - // We do however rely on a std::string implementation for which repeated - // calls to 'c_str()' don't allocate memory. We pre-call 'c_str()' on all - // of these strings to try to ensure this is safe. - unlink(FilesToRemove[i].c_str()); + // We rely on a std::string implementation for which repeated calls to + // 'c_str()' don't allocate memory. We pre-call 'c_str()' on all of these + // strings to try to ensure this is safe. + const char *path = FilesToRemove[i].c_str(); + + // Get the status so we can determine if it's a file or directory. If we + // can't stat the file, ignore it. + struct stat buf; + if (stat(path, &buf) != 0) + continue; + + // If this is not a regular file, ignore it. We want to prevent removal of + // special files like /dev/null, even if the compiler is being run with the + // super-user permissions. + if (!S_ISREG(buf.st_mode)) + continue; + + // Otherwise, remove the file. We ignore any errors here as there is nothing + // else we can do. + unlink(path); } } diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 6e991032d0..2379c425aa 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -1258,7 +1258,7 @@ static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) { MachineRegisterInfo &MRI = MF.getRegInfo(); unsigned NumSpills = 0; for (; NumSpills < 8; ++NumSpills) - if (!MRI.isPhysRegOrOverlapUsed(ARM::D8 + NumSpills)) + if (!MRI.isPhysRegUsed(ARM::D8 + NumSpills)) break; // Don't do this for just one d-register. It's not worth it. @@ -1331,7 +1331,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, for (unsigned i = 0; CSRegs[i]; ++i) { unsigned Reg = CSRegs[i]; bool Spilled = false; - if (MF.getRegInfo().isPhysRegOrOverlapUsed(Reg)) { + if (MF.getRegInfo().isPhysRegUsed(Reg)) { Spilled = true; CanEliminateFrame = false; } diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 30fa6bc2c7..d968dc9f3d 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -78,8 +78,7 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT, ELFWriterInfo(*this), TLInfo(*this), TSInfo(*this), - FrameLowering(Subtarget), - STTI(&TLInfo) { + FrameLowering(Subtarget) { if (!Subtarget.hasARMOps()) report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not " "support ARM mode execution!"); @@ -112,8 +111,7 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT, TSInfo(*this), FrameLowering(Subtarget.hasThumb2() ? new ARMFrameLowering(Subtarget) - : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)), - STTI(&TLInfo){ + : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) { } namespace { diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index 25ab8295f0..076ad01fee 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -25,7 +25,6 @@ #include "Thumb1FrameLowering.h" #include "Thumb2InstrInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetTransformImpl.h" #include "llvm/DataLayout.h" #include "llvm/MC/MCStreamer.h" #include "llvm/ADT/OwningPtr.h" @@ -75,8 +74,6 @@ class ARMTargetMachine : public ARMBaseTargetMachine { ARMTargetLowering TLInfo; ARMSelectionDAGInfo TSInfo; ARMFrameLowering FrameLowering; - ScalarTargetTransformImpl STTI; - VectorTargetTransformImpl VTTI; public: ARMTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, @@ -98,12 +95,7 @@ class ARMTargetMachine : public ARMBaseTargetMachine { virtual const ARMFrameLowering *getFrameLowering() const { return &FrameLowering; } - virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { - return &STTI; - } - virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { - return &VTTI; - } + virtual const ARMInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const DataLayout *getDataLayout() const { return &DL; } virtual const ARMELFWriterInfo *getELFWriterInfo() const { @@ -125,8 +117,6 @@ class ThumbTargetMachine : public ARMBaseTargetMachine { ARMSelectionDAGInfo TSInfo; // Either Thumb1FrameLowering or ARMFrameLowering. OwningPtr<ARMFrameLowering> FrameLowering; - ScalarTargetTransformImpl STTI; - VectorTargetTransformImpl VTTI; public: ThumbTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, @@ -155,12 +145,6 @@ public: virtual const ARMFrameLowering *getFrameLowering() const { return FrameLowering.get(); } - virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { - return &STTI; - } - virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { - return &VTTI; - } virtual const DataLayout *getDataLayout() const { return &DL; } virtual const ARMELFWriterInfo *getELFWriterInfo() const { return Subtarget.isTargetELF() ? &ELFWriterInfo : 0; diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt index 48df199437..096ef001ed 100644 --- a/lib/Target/CMakeLists.txt +++ b/lib/Target/CMakeLists.txt @@ -11,7 +11,6 @@ add_llvm_library(LLVMTarget TargetMachineC.cpp TargetRegisterInfo.cpp TargetSubtargetInfo.cpp - TargetTransformImpl.cpp ) foreach(t ${LLVM_TARGETS_TO_BUILD}) diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp index e92ad01e1d..a37ad7f85a 100644 --- a/lib/Target/CellSPU/SPUTargetMachine.cpp +++ b/lib/Target/CellSPU/SPUTargetMachine.cpp @@ -43,8 +43,7 @@ SPUTargetMachine::SPUTargetMachine(const Target &T, StringRef TT, FrameLowering(Subtarget), TLInfo(*this), TSInfo(*this), - InstrItins(Subtarget.getInstrItineraryData()), - STTI(&TLInfo){ + InstrItins(Subtarget.getInstrItineraryData()) { } //===----------------------------------------------------------------------===// diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h index 7f53ea6fbe..58699a30d2 100644 --- a/lib/Target/CellSPU/SPUTargetMachine.h +++ b/lib/Target/CellSPU/SPUTargetMachine.h @@ -20,7 +20,6 @@ #include "SPUSelectionDAGInfo.h" #include "SPUFrameLowering.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetTransformImpl.h" #include "llvm/DataLayout.h" namespace llvm { @@ -35,8 +34,6 @@ class SPUTargetMachine : public LLVMTargetMachine { SPUTargetLowering TLInfo; SPUSelectionDAGInfo TSInfo; InstrItineraryData InstrItins; - ScalarTargetTransformImpl STTI; - VectorTargetTransformImpl VTTI; public: SPUTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -80,12 +77,6 @@ public: virtual const InstrItineraryData *getInstrItineraryData() const { return &InstrItins; } - virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { - return &STTI; - } - virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { - return &VTTI; - } // Pass Pipeline Configuration virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp index 353542a809..d198a3f45b 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -74,8 +74,7 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT, Subtarget(TT, CPU, FS), InstrInfo(Subtarget), TLInfo(*this), TSInfo(*this), FrameLowering(Subtarget), - InstrItins(&Subtarget.getInstrItineraryData()), - STTI(&TLInfo) { + InstrItins(&Subtarget.getInstrItineraryData()) { setMCUseCFI(false); } @@ -88,7 +87,7 @@ bool HexagonTargetMachine::addPassesForOptimizations(PassManagerBase &PM) { PM.add(createDeadCodeEliminationPass()); PM.add(createConstantPropagationPass()); PM.add(createLoopUnrollPass()); - PM.add(createLoopStrengthReducePass()); + PM.add(createLoopStrengthReducePass(getTargetLowering())); return true; } diff --git a/lib/Target/Hexagon/HexagonTargetMachine.h b/lib/Target/Hexagon/HexagonTargetMachine.h index 7a4215c119..ade5b3e9c1 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.h +++ b/lib/Target/Hexagon/HexagonTargetMachine.h @@ -21,7 +21,6 @@ #include "HexagonFrameLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/DataLayout.h" -#include "llvm/Target/TargetTransformImpl.h" namespace llvm { @@ -35,8 +34,6 @@ class HexagonTargetMachine : public LLVMTargetMachine { HexagonSelectionDAGInfo TSInfo; HexagonFrameLowering FrameLowering; const InstrItineraryData* InstrItins; - ScalarTargetTransformImpl STTI; - VectorTargetTransformImpl VTTI; public: HexagonTargetMachine(const Target &T, StringRef TT,StringRef CPU, @@ -71,14 +68,6 @@ public: return &TSInfo; } - virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { - return &STTI; - } - - virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { - return &VTTI; - } - virtual const DataLayout *getDataLayout() const { return &DL; } static unsigned getModuleMatchQuality(const Module &M); diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp index cb5f46062d..1f2cf6d9d2 100644 --- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp +++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp @@ -42,7 +42,7 @@ MBlazeTargetMachine(const Target &T, StringRef TT, InstrInfo(*this), FrameLowering(Subtarget), TLInfo(*this), TSInfo(*this), ELFWriterInfo(*this), - InstrItins(Subtarget.getInstrItineraryData()), STTI(&TLInfo) { + InstrItins(Subtarget.getInstrItineraryData()) { } namespace { diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.h b/lib/Target/MBlaze/MBlazeTargetMachine.h index 34648b9b9a..d949e54f0d 100644 --- a/lib/Target/MBlaze/MBlazeTargetMachine.h +++ b/lib/Target/MBlaze/MBlazeTargetMachine.h @@ -25,7 +25,6 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetTransformImpl.h" namespace llvm { class formatted_raw_ostream; @@ -40,8 +39,6 @@ namespace llvm { MBlazeIntrinsicInfo IntrinsicInfo; MBlazeELFWriterInfo ELFWriterInfo; InstrItineraryData InstrItins; - ScalarTargetTransformImpl STTI; - VectorTargetTransformImpl VTTI; public: MBlazeTargetMachine(const Target &T, StringRef TT, @@ -80,10 +77,6 @@ namespace llvm { virtual const MBlazeELFWriterInfo *getELFWriterInfo() const { return &ELFWriterInfo; } - virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const - { return &STTI; } - virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const - { return &VTTI; } // Pass Pipeline Configuration virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); diff --git a/lib/Target/MSP430/MSP430FrameLowering.cpp b/lib/Target/MSP430/MSP430FrameLowering.cpp index a312c8d5b2..2e170f17bf 100644 --- a/lib/Target/MSP430/MSP430FrameLowering.cpp +++ b/lib/Target/MSP430/MSP430FrameLowering.cpp @@ -221,3 +221,17 @@ MSP430FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, return true; } + +void +MSP430FrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) + const { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + + // Create a frame entry for the FPW register that must be saved. + if (TFI->hasFP(MF)) { + int FrameIdx = MF.getFrameInfo()->CreateFixedObject(2, -4, true); + (void)FrameIdx; + assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() && + "Slot for FPW register must be last in order to be found!"); + } +} diff --git a/lib/Target/MSP430/MSP430FrameLowering.h b/lib/Target/MSP430/MSP430FrameLowering.h index b636827da7..cb02545852 100644 --- a/lib/Target/MSP430/MSP430FrameLowering.h +++ b/lib/Target/MSP430/MSP430FrameLowering.h @@ -46,6 +46,7 @@ public: bool hasFP(const MachineFunction &MF) const; bool hasReservedCallFrame(const MachineFunction &MF) const; + void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; }; } // End llvm namespace diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp index aed46a2ec5..9ae238f66f 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.cpp +++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp @@ -220,20 +220,6 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.getOperand(i+1).ChangeToImmediate(Offset); } -void -MSP430RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) - const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - - // Create a frame entry for the FPW register that must be saved. - if (TFI->hasFP(MF)) { - int FrameIdx = MF.getFrameInfo()->CreateFixedObject(2, -4, true); - (void)FrameIdx; - assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() && - "Slot for FPW register must be last in order to be found!"); - } -} - unsigned MSP430RegisterInfo::getFrameRegister(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h index 9ee0a03f63..64a43bcafb 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.h +++ b/lib/Target/MSP430/MSP430RegisterInfo.h @@ -49,8 +49,6 @@ public: void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, RegScavenger *RS = NULL) const; - void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; - // Debug information queries. unsigned getFrameRegister(const MachineFunction &MF) const; }; diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp index 29ea681216..da5899b86d 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.cpp +++ b/lib/Target/MSP430/MSP430TargetMachine.cpp @@ -36,7 +36,7 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T, // FIXME: Check DataLayout string. DL("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"), InstrInfo(*this), TLInfo(*this), TSInfo(*this), - FrameLowering(Subtarget), STTI(&TLInfo) { } + FrameLowering(Subtarget) { } namespace { /// MSP430 Code Generator Pass Configuration Options. diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h index 186172ede4..ba3cef1f2a 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.h +++ b/lib/Target/MSP430/MSP430TargetMachine.h @@ -24,7 +24,6 @@ #include "llvm/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetTransformImpl.h" namespace llvm { @@ -37,8 +36,6 @@ class MSP430TargetMachine : public LLVMTargetMachine { MSP430TargetLowering TLInfo; MSP430SelectionDAGInfo TSInfo; MSP430FrameLowering FrameLowering; - ScalarTargetTransformImpl STTI; - VectorTargetTransformImpl VTTI; public: MSP430TargetMachine(const Target &T, StringRef TT, @@ -64,12 +61,7 @@ public: virtual const MSP430SelectionDAGInfo* getSelectionDAGInfo() const { return &TSInfo; } - virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { - return &STTI; - } - virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { - return &VTTI; - } + virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); }; // MSP430TargetMachine. diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp index 8991433005..5e33fed0cc 100644 --- a/lib/Target/Mips/Mips16InstrInfo.cpp +++ b/lib/Target/Mips/Mips16InstrInfo.cpp @@ -25,7 +25,7 @@ using namespace llvm; Mips16InstrInfo::Mips16InstrInfo(MipsTargetMachine &tm) - : MipsInstrInfo(tm, /* FIXME: set mips16 unconditional br */ 0), + : MipsInstrInfo(tm, Mips::BimmX16), RI(*tm.getSubtargetImpl()) {} const MipsRegisterInfo &Mips16InstrInfo::getRegisterInfo() const { @@ -137,12 +137,39 @@ bool Mips16InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { /// GetOppositeBranchOpc - Return the inverse of the specified /// opcode, e.g. turning BEQ to BNE. unsigned Mips16InstrInfo::GetOppositeBranchOpc(unsigned Opc) const { + switch (Opc) { + default: llvm_unreachable("Illegal opcode!"); + case Mips::BeqzRxImmX16: return Mips::BnezRxImmX16; + case Mips::BnezRxImmX16: return Mips::BeqzRxImmX16; + case Mips::BteqzT8CmpX16: return Mips::BtnezT8CmpX16; + case Mips::BteqzT8SltX16: return Mips::BtnezT8SltX16; + case Mips::BteqzT8SltiX16: return Mips::BtnezT8SltiX16; + case Mips::BtnezX16: return Mips::BteqzX16; + case Mips::BtnezT8CmpiX16: return Mips::BteqzT8CmpiX16; + case Mips::BtnezT8SltuX16: return Mips::BteqzT8SltuX16; + case Mips::BtnezT8SltiuX16: return Mips::BteqzT8SltiuX16; + case Mips::BteqzX16: return Mips::BtnezX16; + case Mips::BteqzT8CmpiX16: return Mips::BtnezT8CmpiX16; + case Mips::BteqzT8SltuX16: return Mips::BtnezT8SltuX16; + case Mips::BteqzT8SltiuX16: return Mips::BtnezT8SltiuX16; + case Mips::BtnezT8CmpX16: return Mips::BteqzT8CmpX16; + case Mips::BtnezT8SltX16: return Mips::BteqzT8SltX16; + case Mips::BtnezT8SltiX16: return Mips::BteqzT8SltiX16; + } assert(false && "Implement this function."); return 0; } unsigned Mips16InstrInfo::GetAnalyzableBrOpc(unsigned Opc) const { - return 0; + return (Opc == Mips::BeqzRxImmX16 || Opc == Mips::BimmX16 || + Opc == Mips::BnezRxImmX16 || Opc == Mips::BteqzX16 || + Opc == Mips::BteqzT8CmpX16 || Opc == Mips::BteqzT8CmpiX16 || + Opc == Mips::BteqzT8SltX16 || Opc == Mips::BteqzT8SltuX16 || + Opc == Mips::BteqzT8SltiX16 || Opc == Mips::BteqzT8SltiuX16 || + Opc == Mips::BtnezX16 || Opc == Mips::BtnezT8CmpX16 || + Opc == Mips::BtnezT8CmpiX16 || Opc == Mips::BtnezT8SltX16 || + Opc == Mips::BtnezT8SltuX16 || Opc == Mips::BtnezT8SltiX16 || + Opc == Mips::BtnezT8SltiuX16 ) ? Opc : 0; } void Mips16InstrInfo::ExpandRetRA16(MachineBasicBlock &MBB, diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td index eba201a0ea..2694b09206 100644 --- a/lib/Target/Mips/Mips16InstrInfo.td +++ b/lib/Target/Mips/Mips16InstrInfo.td @@ -21,6 +21,26 @@ def mem16 : Operand<i32> { } // +// EXT-I instruction format +// +class FEXT_I16_ins<bits<5> eop, string asmstr, InstrItinClass itin> : + FEXT_I16<eop, (outs), (ins brtarget:$imm16), + !strconcat(asmstr, "\t$imm16"),[], itin>; + +// +// EXT-I8 instruction format +// + +class FEXT_I816_ins_base<bits<3> _func, string asmstr, + string asmstr2, InstrItinClass itin>: + FEXT_I816<_func, (outs), (ins uimm16:$imm), !strconcat(asmstr, asmstr2), + [], itin>; + +class FEXT_I816_ins<bits<3> _func, string asmstr, + InstrItinClass itin>: + FEXT_I816_ins_base<_func, asmstr, "\t$imm", itin>; + +// // Assembler formats in alphabetical order. // Natural and pseudos are mixed together. // @@ -40,6 +60,11 @@ class FEXT_RI16_ins<bits<5> _op, string asmstr, class FEXT_RI16_PC_ins<bits<5> _op, string asmstr, InstrItinClass itin>: FEXT_RI16_ins_base<_op, asmstr, "\t$rx, $$pc, $imm", itin>; +class FEXT_RI16_B_ins<bits<5> _op, string asmstr, + InstrItinClass itin>: + FEXT_RI16<_op, (outs), (ins CPU16Regs:$rx, brtarget:$imm), + !strconcat(asmstr, "\t$rx, $imm"), [], itin>; + class FEXT_2RI16_ins<bits<5> _op, string asmstr, InstrItinClass itin>: FEXT_RI16<_op, (outs CPU16Regs:$rx), (ins CPU16Regs:$rx_, simm16:$imm), @@ -47,6 +72,7 @@ class FEXT_2RI16_ins<bits<5> _op, string asmstr, let Constraints = "$rx_ = $rx"; } + // this has an explicit sp argument that we ignore to work around a problem // in the compiler class FEXT_RI16_SP_explicit_ins<bits<5> _op, string asmstr, @@ -75,6 +101,31 @@ class FEXT_SHIFT16_ins<bits<2> _f, string asmstr, InstrItinClass itin>: FEXT_SHIFT16<_f, (outs CPU16Regs:$rx), (ins CPU16Regs:$ry, shamt:$sa), !strconcat(asmstr, "\t$rx, $ry, $sa"), [], itin>; +// +// EXT-T8I8 +// +class FEXT_T8I816_ins<bits<3> _func, string asmstr, string asmstr2, + InstrItinClass itin>: + FEXT_I816<_func, (outs), + (ins CPU16Regs:$rx, CPU16Regs:$ry, brtarget:$imm), + !strconcat(asmstr2, !strconcat("\t$rx, $ry\n\t", + !strconcat(asmstr, "\t$imm"))),[], itin> { + let isCodeGenOnly=1; +} + +// +// EXT-T8I8I +// +class FEXT_T8I8I16_ins<bits<3> _func, string asmstr, string asmstr2, + InstrItinClass itin>: + FEXT_I816<_func, (outs), + (ins CPU16Regs:$rx, simm16:$imm, brtarget:$targ), + !strconcat(asmstr2, !strconcat("\t$rx, $imm\n\t", + !strconcat(asmstr, "\t$targ"))), [], itin> { + let isCodeGenOnly=1; +} +// + // // I8_MOVR32 instruction format (used only by the MOVR32 instructio @@ -165,6 +216,17 @@ class ArithLogic16Defs<bit isCom=0> { bit neverHasSideEffects = 1; } +class branch16 { + bit isBranch = 1; + bit isTerminator = 1; + bit isBarrier = 1; +} + +class cbranch16 { + bit isBranch = 1; + bit isTerminator = 1; +} + class MayLoad { bit mayLoad = 1; } @@ -204,6 +266,69 @@ def AdduRxRyRz16: FRRR16_ins<01, "addu", IIAlu>, ArithLogic16Defs<1>; // To do a bitwise logical AND. def AndRxRxRy16: FRxRxRy16_ins<0b01100, "and", IIAlu>, ArithLogic16Defs<1>; + + +// +// Format: BEQZ rx, offset MIPS16e +// Purpose: Branch on Equal to Zero (Extended) +// To test a GPR then do a PC-relative conditional branch. +// +def BeqzRxImmX16: FEXT_RI16_B_ins<0b00100, "beqz", IIAlu>, cbranch16; + +// Format: B offset MIPS16e +// Purpose: Unconditional Branch +// To do an unconditional PC-relative branch. +// +def BimmX16: FEXT_I16_ins<0b00010, "b", IIAlu>, branch16; + +// +// Format: BNEZ rx, offset MIPS16e +// Purpose: Branch on Not Equal to Zero (Extended) +// To test a GPR then do a PC-relative conditional branch. +// +def BnezRxImmX16: FEXT_RI16_B_ins<0b00101, "bnez", IIAlu>, cbranch16; + +// +// Format: BTEQZ offset MIPS16e +// Purpose: Branch on T Equal to Zero (Extended) +// To test special register T then do a PC-relative conditional branch. +// +def BteqzX16: FEXT_I816_ins<0b000, "bteqz", IIAlu>, cbranch16; + +def BteqzT8CmpX16: FEXT_T8I816_ins<0b000, "bteqz", "cmp", IIAlu>, cbranch16; + +def BteqzT8CmpiX16: FEXT_T8I8I16_ins<0b000, "bteqz", "cmpi", IIAlu>, + cbranch16; + +def BteqzT8SltX16: FEXT_T8I816_ins<0b000, "bteqz", "slt", IIAlu>, cbranch16; + +def BteqzT8SltuX16: FEXT_T8I816_ins<0b000, "bteqz", "sltu", IIAlu>, cbranch16; + +def BteqzT8SltiX16: FEXT_T8I8I16_ins<0b000, "bteqz", "slti", IIAlu>, cbranch16; + +def BteqzT8SltiuX16: FEXT_T8I8I16_ins<0b000, "bteqz", "sltiu", IIAlu>, + cbranch16; + +// +// Format: BTNEZ offset MIPS16e +// Purpose: Branch on T Not Equal to Zero (Extended) +// To test special register T then do a PC-relative conditional branch. +// +def BtnezX16: FEXT_I816_ins<0b001, "btnez", IIAlu> ,cbranch16; + +def BtnezT8CmpX16: FEXT_T8I816_ins<0b000, "btnez", "cmp", IIAlu>, cbranch16; + +def BtnezT8CmpiX16: FEXT_T8I8I16_ins<0b000, "btnez", "cmpi", IIAlu>, cbranch16; + +def BtnezT8SltX16: FEXT_T8I816_ins<0b000, "btnez", "slt", IIAlu>, cbranch16; + +def BtnezT8SltuX16: FEXT_T8I816_ins<0b000, "btnez", "sltu", IIAlu>, cbranch16; + +def BtnezT8SltiX16: FEXT_T8I8I16_ins<0b000, "btnez", "slti", IIAlu>, cbranch16; + +def BtnezT8SltiuX16: FEXT_T8I8I16_ins<0b000, "btnez", "sltiu", IIAlu>, + cbranch16; + // // Format: DIV rx, ry MIPS16e // Purpose: Divide Word @@ -562,6 +687,11 @@ def: StoreM16_pat<truncstorei8, SbRxRyOffMemX16>; def: StoreM16_pat<truncstorei16, ShRxRyOffMemX16>; def: StoreM16_pat<store, SwRxRyOffMemX16>; +// Unconditional branch +class UncondBranch16_pat<SDNode OpNode, Instruction I>: + Mips16Pat<(OpNode bb:$imm16), (I bb:$imm16)> { + let Predicates = [RelocPIC, InMips16Mode]; + } // Jump and Link (Call) let isCall=1, hasDelaySlot=1 in @@ -574,7 +704,144 @@ let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, hasCtrlDep=1, hasExtraSrcRegAllocReq = 1 in def RetRA16 : MipsPseudo16<(outs), (ins), "", [(MipsRet)]>; + +// +// Some branch conditional patterns are not generated by llvm at this time. +// Some are for seemingly arbitrary reasons not used: i.e. with signed number +// comparison they are used and for unsigned a different pattern is used. +// I am pushing upstream from the full mips16 port and it seemed that I needed +// these earlier and the mips32 port has these but now I cannot create test +// cases that use these patterns. While I sort this all out I will leave these +// extra patterns commented out and if I can be sure they are really not used, +// I will delete the code. I don't want to check the code in uncommented without +// a valid test case. In some cases, the compiler is generating patterns with +// setcc instead and earlier I had implemented setcc first so may have masked +// the problem. The setcc variants are suboptimal for mips16 so I may wantto +// figure out how to enable the brcond patterns or else possibly new +// combinations of of brcond and setcc. +// +// +// bcond-seteq +// +def: Mips16Pat + <(brcond (i32 (seteq CPU16Regs:$rx, CPU16Regs:$ry)), bb:$imm16), + (BteqzT8CmpX16 CPU16Regs:$rx, CPU16Regs:$ry, bb:$imm16) + >; + + +def: Mips16Pat + <(brcond (i32 (seteq CPU16Regs:$rx, immZExt16:$imm)), bb:$targ16), + (BteqzT8CmpiX16 CPU16Regs:$rx, immSExt16:$imm, bb:$targ16) + >; + +def: Mips16Pat + <(brcond (i32 (seteq CPU16Regs:$rx, 0)), bb:$targ16), + (BeqzRxImmX16 CPU16Regs:$rx, bb:$targ16) + >; + +// +// bcond-setgt (do we need to have this pair of setlt, setgt??) +// +def: Mips16Pat + <(brcond (i32 (setgt CPU16Regs:$rx, CPU16Regs:$ry)), bb:$imm16), + (BtnezT8SltX16 CPU16Regs:$ry, CPU16Regs:$rx, bb:$imm16) + >; + +// +// bcond-setge +// +def: Mips16Pat + <(brcond (i32 (setge CPU16Regs:$rx, CPU16Regs:$ry)), bb:$imm16), + (BteqzT8SltX16 CPU16Regs:$rx, CPU16Regs:$ry, bb:$imm16) + >; + +// +// never called because compiler transforms a >= k to a > (k-1) +//def: Mips16Pat +// <(brcond (i32 (setge CPU16Regs:$rx, immSExt16:$imm)), bb:$imm16), +// (BteqzT8SltiX16 CPU16Regs:$rx, immSExt16:$imm, bb:$imm16) +// >; + +// +// bcond-setlt +// +def: Mips16Pat + <(brcond (i32 (setlt CPU16Regs:$rx, CPU16Regs:$ry)), bb:$imm16), + (BtnezT8SltX16 CPU16Regs:$rx, CPU16Regs:$ry, bb:$imm16) + >; + +def: Mips16Pat + <(brcond (i32 (setlt CPU16Regs:$rx, immSExt16:$imm)), bb:$imm16), + (BtnezT8SltiX16 CPU16Regs:$rx, immSExt16:$imm, bb:$imm16) + >; + +// +// bcond-setle +// +def: Mips16Pat + <(brcond (i32 (setle CPU16Regs:$rx, CPU16Regs:$ry)), bb:$imm16), + (BteqzT8SltX16 CPU16Regs:$ry, CPU16Regs:$rx, bb:$imm16) + >; + +// +// bcond-setne +// +def: Mips16Pat + <(brcond (i32 (setne CPU16Regs:$rx, CPU16Regs:$ry)), bb:$imm16), + (BtnezT8CmpX16 CPU16Regs:$rx, CPU16Regs:$ry, bb:$imm16) + >; + +def: Mips16Pat + <(brcond (i32 (setne CPU16Regs:$rx, immZExt16:$imm)), bb:$targ16), + (BtnezT8CmpiX16 CPU16Regs:$rx, immSExt16:$imm, bb:$targ16) + >; + +def: Mips16Pat + <(brcond (i32 (setne CPU16Regs:$rx, 0)), bb:$targ16), + (BnezRxImmX16 CPU16Regs:$rx, bb:$targ16) + >; + +// +// This needs to be there but I forget which code will generate it +// +def: Mips16Pat + <(brcond CPU16Regs:$rx, bb:$targ16), + (BnezRxImmX16 CPU16Regs:$rx, bb:$targ16) + >; + +// + +// +// bcond-setugt +// +//def: Mips16Pat +// <(brcond (i32 (setugt CPU16Regs:$rx, CPU16Regs:$ry)), bb:$imm16), +// (BtnezT8SltuX16 CPU16Regs:$ry, CPU16Regs:$rx, bb:$imm16) +// >; + +// +// bcond-setuge +// +//def: Mips16Pat +// <(brcond (i32 (setuge CPU16Regs:$rx, CPU16Regs:$ry)), bb:$imm16), +// (BteqzT8SltuX16 CPU16Regs:$rx, CPU16Regs:$ry, bb:$imm16) +// >; + + +// +// bcond-setult +// +//def: Mips16Pat +// <(brcond (i32 (setult CPU16Regs:$rx, CPU16Regs:$ry)), bb:$imm16), +// (BtnezT8SltuX16 CPU16Regs:$rx, CPU16Regs:$ry, bb:$imm16) +// >; + +def: UncondBranch16_pat<br, BimmX16>; + // Small immediates +def: Mips16Pat<(i32 immSExt16:$in), + (AddiuRxRxImmX16 (Move32R16 ZERO), immSExt16:$in)>; + def: Mips16Pat<(i32 immZExt16:$in), (LiRxImmX16 immZExt16:$in)>; // diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index 5e8062373f..199fe5f0d3 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -53,7 +53,7 @@ MipsTargetMachine(const Target &T, StringRef TT, InstrInfo(MipsInstrInfo::create(*this)), FrameLowering(MipsFrameLowering::create(*this, Subtarget)), TLInfo(*this), TSInfo(*this), JITInfo(), - ELFWriterInfo(false, isLittle), STTI(&TLInfo) { + ELFWriterInfo(false, isLittle) { } void MipsebTargetMachine::anchor() { } diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h index 60822d0c05..3a01828dd1 100644 --- a/lib/Target/Mips/MipsTargetMachine.h +++ b/lib/Target/Mips/MipsTargetMachine.h @@ -24,7 +24,6 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetTransformImpl.h" namespace llvm { class formatted_raw_ostream; @@ -39,8 +38,6 @@ class MipsTargetMachine : public LLVMTargetMachine { MipsSelectionDAGInfo TSInfo; MipsJITInfo JITInfo; MipsELFWriterInfo ELFWriterInfo; - ScalarTargetTransformImpl STTI; - VectorTargetTransformInfo VTTI; public: MipsTargetMachine(const Target &T, StringRef TT, @@ -77,12 +74,6 @@ public: virtual const MipsELFWriterInfo *getELFWriterInfo() const { return &ELFWriterInfo; } - virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { - return &STTI; - } - virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { - return &VTTI; - } // Pass Pipeline Configuration virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 7519b4a083..dbfc660687 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -72,8 +72,7 @@ NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), Subtarget(TT, CPU, FS, is64bit), DL(Subtarget.getDataLayout()), - InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(*this,is64bit), - STTI(&TLInfo) + InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(*this,is64bit) /*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ { } diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h index 11bc9d4fa6..d58a076858 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.h +++ b/lib/Target/NVPTX/NVPTXTargetMachine.h @@ -25,7 +25,6 @@ #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetSelectionDAGInfo.h" -#include "llvm/Target/TargetTransformImpl.h" namespace llvm { @@ -45,9 +44,6 @@ class NVPTXTargetMachine : public LLVMTargetMachine { // Hold Strings that can be free'd all together with NVPTXTargetMachine ManagedStringPool ManagedStrPool; - ScalarTargetTransformImpl STTI; - VectorTargetTransformImpl VTTI; - //bool addCommonCodeGenPasses(PassManagerBase &, CodeGenOpt::Level, // bool DisableVerify, MCContext *&OutCtx); @@ -76,12 +72,6 @@ public: virtual const TargetSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; } - virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { - return &STTI; - } - virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { - return &VTTI; - } //virtual bool addInstSelector(PassManagerBase &PM, // CodeGenOpt::Level OptLevel); diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index c18250a78f..36db4b5179 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -4224,7 +4224,52 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, return SDValue(); if (Op.getOperand(0).getValueType() == MVT::i64) { - SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op.getOperand(0)); + SDValue SINT = Op.getOperand(0); + // When converting to single-precision, we actually need to convert + // to double-precision first and then round to single-precision. + // To avoid double-rounding effects during that operation, we have + // to prepare the input operand. Bits that might be truncated when + // converting to double-precision are replaced by a bit that won't + // be lost at this stage, but is below the single-precision rounding + // position. + // + // However, if -enable-unsafe-fp-math is in effect, accept double + // rounding to avoid the extra overhead. + if (Op.getValueType() == MVT::f32 && + !DAG.getTarget().Options.UnsafeFPMath) { + + // Twiddle input to make sure the low 11 bits are zero. (If this + // is the case, we are guaranteed the value will fit into the 53 bit + // mantissa of an IEEE double-precision value without rounding.) + // If any of those low 11 bits were not zero originally, make sure + // bit 12 (value 2048) is set instead, so that the final rounding + // to single-precision gets the correct result. + SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64, + SINT, DAG.getConstant(2047, MVT::i64)); + Round = DAG.getNode(ISD::ADD, dl, MVT::i64, + Round, DAG.getConstant(2047, MVT::i64)); + Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT); + Round = DAG.getNode(ISD::AND, dl, MVT::i64, + Round, DAG.getConstant(-2048, MVT::i64)); + + // However, we cannot use that value unconditionally: if the magnitude + // of the input value is small, the bit-twiddling we did above might + // end up visibly changing the output. Fortunately, in that case, we + // don't need to twiddle bits since the original input will convert + // exactly to double-precision floating-point already. Therefore, + // construct a conditional to use the original value if the top 11 + // bits are all sign-bit copies, and use the rounded value computed + // above otherwise. + SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64, + SINT, DAG.getConstant(53, MVT::i32)); + Cond = DAG.getNode(ISD::ADD, dl, MVT::i64, + Cond, DAG.getConstant(1, MVT::i64)); + Cond = DAG.getSetCC(dl, MVT::i32, + Cond, DAG.getConstant(1, MVT::i64), ISD::SETUGT); + + SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT); + } + SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT); SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Bits); if (Op.getValueType() == MVT::f32) FP = DAG.getNode(ISD::FP_ROUND, dl, diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index b861383475..5f39b8d2c2 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -43,8 +43,7 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, DL(Subtarget.getDataLayoutString()), InstrInfo(*this), FrameLowering(Subtarget), JITInfo(*this, is64Bit), TLInfo(*this), TSInfo(*this), - InstrItins(Subtarget.getInstrItineraryData()), - STTI(&TLInfo){ + InstrItins(Subtarget.getInstrItineraryData()) { // The binutils for the BG/P are too old for CFI. if (Subtarget.isBGP()) diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h index c168433a71..02d69fd15d 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.h +++ b/lib/Target/PowerPC/PPCTargetMachine.h @@ -21,7 +21,6 @@ #include "PPCISelLowering.h" #include "PPCSelectionDAGInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetTransformImpl.h" #include "llvm/DataLayout.h" namespace llvm { @@ -37,8 +36,6 @@ class PPCTargetMachine : public LLVMTargetMachine { PPCTargetLowering TLInfo; PPCSelectionDAGInfo TSInfo; InstrItineraryData InstrItins; - ScalarTargetTransformImpl STTI; - VectorTargetTransformImpl VTTI; public: PPCTargetMachine(const Target &T, StringRef TT, @@ -66,12 +63,6 @@ public: virtual const InstrItineraryData *getInstrItineraryData() const { return &InstrItins; } - virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { - return &STTI; - } - virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { - return &VTTI; - } // Pass Pipeline Configuration virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp index 1d8cc771dd..8b7559c2f9 100644 --- a/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/lib/Target/Sparc/SparcTargetMachine.cpp @@ -36,7 +36,7 @@ SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT, DL(Subtarget.getDataLayout()), InstrInfo(Subtarget), TLInfo(*this), TSInfo(*this), - FrameLowering(Subtarget),STTI(&TLInfo) { + FrameLowering(Subtarget) { } namespace { diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h index 0fbe2d7cda..c9f2d68eb1 100644 --- a/lib/Target/Sparc/SparcTargetMachine.h +++ b/lib/Target/Sparc/SparcTargetMachine.h @@ -22,7 +22,6 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetTransformImpl.h" namespace llvm { @@ -33,8 +32,6 @@ class SparcTargetMachine : public LLVMTargetMachine { SparcTargetLowering TLInfo; SparcSelectionDAGInfo TSInfo; SparcFrameLowering FrameLowering; - ScalarTargetTransformImpl STTI; - VectorTargetTransformImpl VTTI; public: SparcTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -55,12 +52,6 @@ public: virtual const SparcSelectionDAGInfo* getSelectionDAGInfo() const { return &TSInfo; } - virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { - return &STTI; - } - virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { - return &VTTI; - } virtual const DataLayout *getDataLayout() const { return &DL; } // Pass Pipeline Configuration diff --git a/lib/Target/Target.cpp b/lib/Target/Target.cpp index 393178a469..96c30a1847 100644 --- a/lib/Target/Target.cpp +++ b/lib/Target/Target.cpp @@ -26,7 +26,6 @@ using namespace llvm; void llvm::initializeTarget(PassRegistry &Registry) { initializeDataLayoutPass(Registry); initializeTargetLibraryInfoPass(Registry); - initializeTargetTransformInfoPass(Registry); } void LLVMInitializeTarget(LLVMPassRegistryRef R) { diff --git a/lib/Target/TargetTransformImpl.cpp b/lib/Target/TargetTransformImpl.cpp deleted file mode 100644 index 1cb5edab9d..0000000000 --- a/lib/Target/TargetTransformImpl.cpp +++ /dev/null @@ -1,43 +0,0 @@ -// llvm/Target/TargetTransformImpl.cpp - Target Loop Trans Info ---*- C++ -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Target/TargetTransformImpl.h" -#include "llvm/Target/TargetLowering.h" - -using namespace llvm; - -bool ScalarTargetTransformImpl::isLegalAddImmediate(int64_t imm) const { - return TLI->isLegalAddImmediate(imm); -} - -bool ScalarTargetTransformImpl::isLegalICmpImmediate(int64_t imm) const { - return TLI->isLegalICmpImmediate(imm); -} - -bool ScalarTargetTransformImpl::isLegalAddressingMode(const AddrMode &AM, - Type *Ty) const { - return TLI->isLegalAddressingMode(AM, Ty); -} - -bool ScalarTargetTransformImpl::isTruncateFree(Type *Ty1, Type *Ty2) const { - return TLI->isTruncateFree(Ty1, Ty2); -} - -bool ScalarTargetTransformImpl::isTypeLegal(Type *Ty) const { - EVT T = TLI->getValueType(Ty); - return TLI->isTypeLegal(T); -} - -unsigned ScalarTargetTransformImpl::getJumpBufAlignment() const { - return TLI->getJumpBufAlignment(); -} - -unsigned ScalarTargetTransformImpl::getJumpBufSize() const { - return TLI->getJumpBufSize(); -} diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 488a832785..813f753032 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -467,7 +467,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::EH_RETURN , MVT::Other, Custom); // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intened to support // SjLj exception handling but a light-weight setjmp/longjmp replacement to - // support continuation, user-level threading, and etc.. As a result, not + // support continuation, user-level threading, and etc.. As a result, no // other SjLj exception interfaces are implemented and please don't build // your own exception handling based on them. // LLVM/Clang supports zero-cost DWARF exception handling. @@ -13503,7 +13503,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, // For v = setjmp(buf), we generate // // thisMBB: - // buf[Label_Offset] = ljMBB + // buf[LabelOffset] = restoreMBB // SjLjSetup restoreMBB // // mainMBB: @@ -13531,18 +13531,48 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); // thisMBB: - unsigned PtrImmStoreOpc = (PVT == MVT::i64) ? X86::MOV64mi32 : X86::MOV32mi; - const int64_t Label_Offset = 1 * PVT.getStoreSize(); - + unsigned PtrStoreOpc = 0; + unsigned LabelReg = 0; + const int64_t LabelOffset = 1 * PVT.getStoreSize(); + Reloc::Model RM = getTargetMachine().getRelocationModel(); + bool UseImmLabel = (getTargetMachine().getCodeModel() == CodeModel::Small) && + (RM == Reloc::Static || RM == Reloc::DynamicNoPIC); + + // Prepare IP either in reg or imm. + if (!UseImmLabel) { + PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr; + const TargetRegisterClass *PtrRC = getRegClassFor(PVT); + LabelReg = MRI.createVirtualRegister(PtrRC); + if (Subtarget->is64Bit()) { + MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::LEA64r), LabelReg) + .addReg(X86::RIP) + .addImm(0) + .addReg(0) + .addMBB(restoreMBB) + .addReg(0); + } else { + const X86InstrInfo *XII = static_cast<const X86InstrInfo*>(TII); + MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::LEA32r), LabelReg) + .addReg(XII->getGlobalBaseReg(MF)) + .addImm(0) + .addReg(0) + .addMBB(restoreMBB, Subtarget->ClassifyBlockAddressReference()) + .addReg(0); + } + } else + PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mi32 : X86::MOV32mi; // Store IP - MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrImmStoreOpc)); + MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrStoreOpc)); for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { if (i == X86::AddrDisp) - MIB.addDisp(MI->getOperand(MemOpndSlot + i), Label_Offset); + MIB.addDisp(MI->getOperand(MemOpndSlot + i), LabelOffset); else MIB.addOperand(MI->getOperand(MemOpndSlot + i)); } - MIB.addMBB(restoreMBB); + if (!UseImmLabel) + MIB.addReg(LabelReg); + else + MIB.addMBB(restoreMBB); MIB.setMemRefs(MMOBegin, MMOEnd); // Setup MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::EH_SjLj_Setup)) @@ -13597,8 +13627,8 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, MachineInstrBuilder MIB; - const int64_t Label_Offset = 1 * PVT.getStoreSize(); - const int64_t SP_Offset = 2 * PVT.getStoreSize(); + const int64_t LabelOffset = 1 * PVT.getStoreSize(); + const int64_t SPOffset = 2 * PVT.getStoreSize(); unsigned PtrLoadOpc = (PVT == MVT::i64) ? X86::MOV64rm : X86::MOV32rm; unsigned IJmpOpc = (PVT == MVT::i64) ? X86::JMP64r : X86::JMP32r; @@ -13612,7 +13642,7 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), Tmp); for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { if (i == X86::AddrDisp) - MIB.addDisp(MI->getOperand(i), Label_Offset); + MIB.addDisp(MI->getOperand(i), LabelOffset); else MIB.addOperand(MI->getOperand(i)); } @@ -13621,7 +13651,7 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), SP); for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { if (i == X86::AddrDisp) - MIB.addDisp(MI->getOperand(i), SP_Offset); + MIB.addDisp(MI->getOperand(i), SPOffset); else MIB.addOperand(MI->getOperand(i)); } @@ -15645,11 +15675,11 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, ISD::LoadExtType Ext = Ld->getExtensionType(); // If this is a vector EXT Load then attempt to optimize it using a - // shuffle. We need SSE4 for the shuffles. + // shuffle. We need SSSE3 shuffles. // TODO: It is possible to support ZExt by zeroing the undef values // during the shuffle phase or after the shuffle. if (RegVT.isVector() && RegVT.isInteger() && - Ext == ISD::EXTLOAD && Subtarget->hasSSE41()) { + Ext == ISD::EXTLOAD && Subtarget->hasSSSE3()) { assert(MemVT != RegVT && "Cannot extend to the same type"); assert(MemVT.isVector() && "Must load a vector from memory"); diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 1fe8976b60..20bc85e65f 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -67,15 +67,11 @@ MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const { /// operand to an MCSymbol. MCSymbol *X86MCInstLower:: GetSymbolFromOperand(const MachineOperand &MO) const { - assert((MO.isGlobal() || MO.isSymbol()) && "Isn't a symbol reference"); + assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) && "Isn't a symbol reference"); SmallString<128> Name; - if (!MO.isGlobal()) { - assert(MO.isSymbol()); - Name += MAI.getGlobalPrefix(); - Name += MO.getSymbolName(); - } else { + if (MO.isGlobal()) { const GlobalValue *GV = MO.getGlobal(); bool isImplicitlyPrivate = false; if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB || @@ -85,6 +81,11 @@ GetSymbolFromOperand(const MachineOperand &MO) const { isImplicitlyPrivate = true; Mang->getNameWithPrefix(Name, GV, isImplicitlyPrivate); + } else if (MO.isSymbol()) { + Name += MAI.getGlobalPrefix(); + Name += MO.getSymbolName(); + } else if (MO.isMBB()) { + Name += MO.getMBB()->getSymbol()->getName(); } // If the target flags on the operand changes the name of the symbol, do that @@ -215,7 +216,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, if (Expr == 0) Expr = MCSymbolRefExpr::Create(Sym, RefKind, Ctx); - if (!MO.isJTI() && MO.getOffset()) + if (!MO.isJTI() && !MO.isMBB() && MO.getOffset()) Expr = MCBinaryExpr::CreateAdd(Expr, MCConstantExpr::Create(MO.getOffset(), Ctx), Ctx); @@ -348,9 +349,6 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { MCOp = MCOperand::CreateImm(MO.getImm()); break; case MachineOperand::MO_MachineBasicBlock: - MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create( - MO.getMBB()->getSymbol(), Ctx)); - break; case MachineOperand::MO_GlobalAddress: case MachineOperand::MO_ExternalSymbol: MCOp = LowerSymbolOperand(MO, GetSymbolFromOperand(MO)); diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index c804195f27..ed5e6f5227 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -50,8 +50,7 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT, InstrInfo(*this), TSInfo(*this), TLInfo(*this), - JITInfo(*this), - STTI(&TLInfo) { + JITInfo(*this) { } void X86_64TargetMachine::anchor() { } @@ -70,8 +69,7 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT, InstrInfo(*this), TSInfo(*this), TLInfo(*this), - JITInfo(*this), - STTI(&TLInfo) { + JITInfo(*this) { } /// X86TargetMachine ctor - Create an X86 target. diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index def028f191..5c625ac953 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -28,7 +28,6 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetTransformImpl.h" namespace llvm { @@ -93,8 +92,6 @@ class X86_32TargetMachine : public X86TargetMachine { #else X86JITInfo JITInfo; #endif - ScalarTargetTransformImpl STTI; - VectorTargetTransformImpl VTTI; public: X86_32TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -113,12 +110,6 @@ public: virtual X86JITInfo *getJITInfo() { return &JITInfo; } - virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { - return &STTI; - } - virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { - return &VTTI; - } }; /// X86_64TargetMachine - X86 64-bit target machine. @@ -130,8 +121,6 @@ class X86_64TargetMachine : public X86TargetMachine { X86SelectionDAGInfo TSInfo; X86TargetLowering TLInfo; X86JITInfo JITInfo; - ScalarTargetTransformImpl STTI; - VectorTargetTransformImpl VTTI; public: X86_64TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -150,12 +139,6 @@ public: virtual X86JITInfo *getJITInfo() { return &JITInfo; } - virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { - return &STTI; - } - virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { - return &VTTI; - } }; } // End llvm namespace diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp index 449eed3d8d..c4a58874a4 100644 --- a/lib/Target/X86/X86VZeroUpper.cpp +++ b/lib/Target/X86/X86VZeroUpper.cpp @@ -147,7 +147,7 @@ bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) { const TargetRegisterClass *RC = &X86::VR256RegClass; for (TargetRegisterClass::iterator i = RC->begin(), e = RC->end(); i != e; i++) { - if (MRI.isPhysRegUsed(*i)) { + if (!MRI.reg_nodbg_empty(*i)) { YMMUsed = true; break; } diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp index 0b7e3e10d4..c71d978ad8 100644 --- a/lib/Target/XCore/XCoreTargetMachine.cpp +++ b/lib/Target/XCore/XCoreTargetMachine.cpp @@ -32,7 +32,7 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT, InstrInfo(), FrameLowering(Subtarget), TLInfo(*this), - TSInfo(*this), STTI(&TLInfo) { + TSInfo(*this) { } namespace { diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h index c60c6a37f9..f7fec29f54 100644 --- a/lib/Target/XCore/XCoreTargetMachine.h +++ b/lib/Target/XCore/XCoreTargetMachine.h @@ -20,7 +20,6 @@ #include "XCoreISelLowering.h" #include "XCoreSelectionDAGInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetTransformImpl.h" #include "llvm/DataLayout.h" namespace llvm { @@ -32,8 +31,6 @@ class XCoreTargetMachine : public LLVMTargetMachine { XCoreFrameLowering FrameLowering; XCoreTargetLowering TLInfo; XCoreSelectionDAGInfo TSInfo; - ScalarTargetTransformImpl STTI; - VectorTargetTransformImpl VTTI; public: XCoreTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -56,12 +53,6 @@ public: virtual const TargetRegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); } - virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { - return &STTI; - } - virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { - return &VTTI; - } virtual const DataLayout *getDataLayout() const { return &DL; } // Pass Pipeline Configuration diff --git a/lib/Transforms/IPO/BarrierNoopPass.cpp b/lib/Transforms/IPO/BarrierNoopPass.cpp new file mode 100644 index 0000000000..2e32240621 --- /dev/null +++ b/lib/Transforms/IPO/BarrierNoopPass.cpp @@ -0,0 +1,47 @@ +//===- BarrierNoopPass.cpp - A barrier pass for the pass manager ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// NOTE: DO NOT USE THIS IF AVOIDABLE +// +// This pass is a nonce pass intended to allow manipulation of the implicitly +// nesting pass manager. For example, it can be used to cause a CGSCC pass +// manager to be closed prior to running a new collection of function passes. +// +// FIXME: This is a huge HACK. This should be removed when the pass manager's +// nesting is made explicit instead of implicit. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Pass.h" +#include "llvm/Transforms/IPO.h" +using namespace llvm; + +namespace { +/// \brief A nonce module pass used to place a barrier in a pass manager. +/// +/// There is no mechanism for ending a CGSCC pass manager once one is started. +/// This prevents extension points from having clear deterministic ordering +/// when they are phrased as non-module passes. +class BarrierNoop : public ModulePass { +public: + static char ID; // Pass identification. + + BarrierNoop() : ModulePass(ID) { + initializeBarrierNoopPass(*PassRegistry::getPassRegistry()); + } + + bool runOnModule(Module &M) { return false; } +}; +} + +ModulePass *llvm::createBarrierNoopPass() { return new BarrierNoop(); } + +char BarrierNoop::ID = 0; +INITIALIZE_PASS(BarrierNoop, "barrier", "A No-Op Barrier Pass", + false, false) diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt index 3f6b1de614..90c1c33e6d 100644 --- a/lib/Transforms/IPO/CMakeLists.txt +++ b/lib/Transforms/IPO/CMakeLists.txt @@ -1,5 +1,6 @@ add_llvm_library(LLVMipo ArgumentPromotion.cpp + BarrierNoopPass.cpp ConstantMerge.cpp DeadArgumentElimination.cpp ExtractGV.cpp diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 9e328b9ac9..04163f751f 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -119,6 +119,14 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { MPM.add(Inliner); Inliner = 0; } + + // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC + // pass manager, but we don't want to add extensions into that pass manager. + // To prevent this we must insert a no-op module pass to reset the pass + // manager to get the same behavior as EP_OptimizerLast in non-O0 builds. + if (!GlobalExtensions->empty() || !Extensions.empty()) + MPM.add(createBarrierNoopPass()); + addExtensionsToPM(EP_EnabledOnOptLevel0, MPM); return; } @@ -176,6 +184,12 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. MPM.add(createLoopDeletionPass()); // Delete dead loops + + if (Vectorize) { + MPM.add(createLoopVectorizePass()); + MPM.add(createLICMPass()); + } + if (!DisableUnrollLoops) MPM.add(createLoopUnrollPass()); // Unroll small loops addExtensionsToPM(EP_LoopOptimizerEnd, MPM); diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index b566994edf..75f42f30fa 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -534,7 +534,7 @@ void AddressSanitizer::createInitializerPoisonCalls(Module &M, bool AddressSanitizer::ShouldInstrumentGlobal(GlobalVariable *G) { Type *Ty = cast<PointerType>(G->getType())->getElementType(); - DEBUG(dbgs() << "GLOBAL: " << *G); + DEBUG(dbgs() << "GLOBAL: " << *G << "\n"); if (BL->isIn(*G)) return false; if (!Ty->isSized()) return false; @@ -682,7 +682,7 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) { FirstDynamic = LastDynamic; } - DEBUG(dbgs() << "NEW GLOBAL:\n" << *NewGlobal); + DEBUG(dbgs() << "NEW GLOBAL: " << *NewGlobal << "\n"); } ArrayType *ArrayOfGlobalStructTy = ArrayType::get(GlobalStructTy, n); @@ -851,6 +851,7 @@ bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) { bool AddressSanitizer::runOnFunction(Function &F) { if (BL->isIn(F)) return false; if (&F == AsanCtorFunction) return false; + DEBUG(dbgs() << "ASAN instrumenting:\n" << F << "\n"); // If needed, insert __asan_init before checking for AddressSafety attr. maybeInsertAsanInitAtFunctionEntry(F); @@ -914,8 +915,6 @@ bool AddressSanitizer::runOnFunction(Function &F) { NumInstrumented++; } - DEBUG(dbgs() << F); - bool ChangedStack = poisonStackInFunction(F); // We must unpoison the stack before every NoReturn call (throw, _exit, etc). @@ -925,6 +924,7 @@ bool AddressSanitizer::runOnFunction(Function &F) { IRBuilder<> IRB(CI); IRB.CreateCall(AsanHandleNoReturnFunc); } + DEBUG(dbgs() << "ASAN done instrumenting:\n" << F << "\n"); return NumInstrumented > 0 || ChangedStack || !NoReturnCalls.empty(); } diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 99a62dbe62..958348d9fa 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -37,7 +37,7 @@ // // TODO: Handle multiple loops at a time. // -// TODO: Should AddrMode::BaseGV be changed to a ConstantExpr +// TODO: Should TargetLowering::AddrMode::BaseGV be changed to a ConstantExpr // instead of a GlobalValue? // // TODO: When truncation is free, truncate ICmp users' operands to make it a @@ -67,7 +67,6 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/TargetTransformInfo.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/DenseSet.h" @@ -75,6 +74,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLowering.h" #include <algorithm> using namespace llvm; @@ -1118,7 +1118,7 @@ public: enum KindType { Basic, ///< A normal use, with no folding. Special, ///< A special case of basic, allowing -1 scales. - Address, ///< An address use; folding according to ScalarTargetTransformInfo. + Address, ///< An address use; folding according to TargetLowering ICmpZero ///< An equality icmp with both operands folded into one. // TODO: Add a generic icmp too? }; @@ -1272,12 +1272,12 @@ void LSRUse::dump() const { /// address-mode folding and special icmp tricks. static bool isLegalUse(const AddrMode &AM, LSRUse::KindType Kind, Type *AccessTy, - const ScalarTargetTransformInfo *STTI) { + const TargetLowering *TLI) { switch (Kind) { case LSRUse::Address: // If we have low-level target information, ask the target if it can // completely fold this address. - if (STTI) return STTI->isLegalAddressingMode(AM, AccessTy); + if (TLI) return TLI->isLegalAddressingMode(AM, AccessTy); // Otherwise, just guess that reg+reg addressing is legal. return !AM.BaseGV && AM.BaseOffs == 0 && AM.Scale <= 1; @@ -1300,7 +1300,7 @@ static bool isLegalUse(const AddrMode &AM, // If we have low-level target information, ask the target if it can fold an // integer immediate on an icmp. if (AM.BaseOffs != 0) { - if (!STTI) + if (!TLI) return false; // We have one of: // ICmpZero BaseReg + Offset => ICmp BaseReg, -Offset @@ -1309,7 +1309,7 @@ static bool isLegalUse(const AddrMode &AM, int64_t Offs = AM.BaseOffs; if (AM.Scale == 0) Offs = -(uint64_t)Offs; // The cast does the right thing with INT64_MIN. - return STTI->isLegalICmpImmediate(Offs); + return TLI->isLegalICmpImmediate(Offs); } // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg @@ -1330,20 +1330,20 @@ static bool isLegalUse(const AddrMode &AM, static bool isLegalUse(AddrMode AM, int64_t MinOffset, int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy, - const ScalarTargetTransformInfo *LTTI) { + const TargetLowering *TLI) { // Check for overflow. if (((int64_t)((uint64_t)AM.BaseOffs + MinOffset) > AM.BaseOffs) != (MinOffset > 0)) return false; AM.BaseOffs = (uint64_t)AM.BaseOffs + MinOffset; - if (isLegalUse(AM, Kind, AccessTy, LTTI)) { + if (isLegalUse(AM, Kind, AccessTy, TLI)) { AM.BaseOffs = (uint64_t)AM.BaseOffs - MinOffset; // Check for overflow. if (((int64_t)((uint64_t)AM.BaseOffs + MaxOffset) > AM.BaseOffs) != (MaxOffset > 0)) return false; AM.BaseOffs = (uint64_t)AM.BaseOffs + MaxOffset; - return isLegalUse(AM, Kind, AccessTy, LTTI); + return isLegalUse(AM, Kind, AccessTy, TLI); } return false; } @@ -1352,7 +1352,7 @@ static bool isAlwaysFoldable(int64_t BaseOffs, GlobalValue *BaseGV, bool HasBaseReg, LSRUse::KindType Kind, Type *AccessTy, - const ScalarTargetTransformInfo *LTTI) { + const TargetLowering *TLI) { // Fast-path: zero is always foldable. if (BaseOffs == 0 && !BaseGV) return true; @@ -1371,14 +1371,14 @@ static bool isAlwaysFoldable(int64_t BaseOffs, AM.HasBaseReg = true; } - return isLegalUse(AM, Kind, AccessTy, LTTI); + return isLegalUse(AM, Kind, AccessTy, TLI); } static bool isAlwaysFoldable(const SCEV *S, int64_t MinOffset, int64_t MaxOffset, bool HasBaseReg, LSRUse::KindType Kind, Type *AccessTy, - const ScalarTargetTransformInfo *LTTI, + const TargetLowering *TLI, ScalarEvolution &SE) { // Fast-path: zero is always foldable. if (S->isZero()) return true; @@ -1402,7 +1402,7 @@ static bool isAlwaysFoldable(const SCEV *S, AM.HasBaseReg = HasBaseReg; AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1; - return isLegalUse(AM, MinOffset, MaxOffset, Kind, AccessTy, LTTI); + return isLegalUse(AM, MinOffset, MaxOffset, Kind, AccessTy, TLI); } namespace { @@ -1502,7 +1502,7 @@ class LSRInstance { ScalarEvolution &SE; DominatorTree &DT; LoopInfo &LI; - const ScalarTargetTransformInfo *const STTI; + const TargetLowering *const TLI; Loop *const L; bool Changed; @@ -1638,7 +1638,7 @@ class LSRInstance { Pass *P); public: - LSRInstance(const ScalarTargetTransformInfo *ltti, Loop *l, Pass *P); + LSRInstance(const TargetLowering *tli, Loop *l, Pass *P); bool getChanged() const { return Changed; } @@ -1688,10 +1688,11 @@ void LSRInstance::OptimizeShadowIV() { } if (!DestTy) continue; - if (STTI) { + if (TLI) { // If target does not support DestTy natively then do not apply // this transformation. - if (!STTI->isTypeLegal(DestTy)) continue; + EVT DVT = TLI->getValueType(DestTy); + if (!TLI->isTypeLegal(DVT)) continue; } PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0)); @@ -2014,18 +2015,18 @@ LSRInstance::OptimizeLoopTermCond() { if (C->getValue().getMinSignedBits() >= 64 || C->getValue().isMinSignedValue()) goto decline_post_inc; - // Without STTI, assume that any stride might be valid, and so any + // Without TLI, assume that any stride might be valid, and so any // use might be shared. - if (!STTI) + if (!TLI) goto decline_post_inc; // Check for possible scaled-address reuse. Type *AccessTy = getAccessType(UI->getUser()); AddrMode AM; AM.Scale = C->getSExtValue(); - if (STTI->isLegalAddressingMode(AM, AccessTy)) + if (TLI->isLegalAddressingMode(AM, AccessTy)) goto decline_post_inc; AM.Scale = -AM.Scale; - if (STTI->isLegalAddressingMode(AM, AccessTy)) + if (TLI->isLegalAddressingMode(AM, AccessTy)) goto decline_post_inc; } } @@ -2096,12 +2097,12 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg, // Conservatively assume HasBaseReg is true for now. if (NewOffset < LU.MinOffset) { if (!isAlwaysFoldable(LU.MaxOffset - NewOffset, 0, HasBaseReg, - Kind, AccessTy, STTI)) + Kind, AccessTy, TLI)) return false; NewMinOffset = NewOffset; } else if (NewOffset > LU.MaxOffset) { if (!isAlwaysFoldable(NewOffset - LU.MinOffset, 0, HasBaseReg, - Kind, AccessTy, STTI)) + Kind, AccessTy, TLI)) return false; NewMaxOffset = NewOffset; } @@ -2130,7 +2131,7 @@ LSRInstance::getUse(const SCEV *&Expr, int64_t Offset = ExtractImmediate(Expr, SE); // Basic uses can't accept any offset, for example. - if (!isAlwaysFoldable(Offset, 0, /*HasBaseReg=*/true, Kind, AccessTy, STTI)) { + if (!isAlwaysFoldable(Offset, 0, /*HasBaseReg=*/true, Kind, AccessTy, TLI)) { Expr = Copy; Offset = 0; } @@ -2395,7 +2396,7 @@ bool IVChain::isProfitableIncrement(const SCEV *OperExpr, /// TODO: Consider IVInc free if it's already used in another chains. static bool isProfitableChain(IVChain &Chain, SmallPtrSet<Instruction*, 4> &Users, - ScalarEvolution &SE, const ScalarTargetTransformInfo *STTI) { + ScalarEvolution &SE, const TargetLowering *TLI) { if (StressIVChain) return true; @@ -2653,7 +2654,7 @@ void LSRInstance::CollectChains() { for (unsigned UsersIdx = 0, NChains = IVChainVec.size(); UsersIdx < NChains; ++UsersIdx) { if (!isProfitableChain(IVChainVec[UsersIdx], - ChainUsersVec[UsersIdx].FarUsers, SE, STTI)) + ChainUsersVec[UsersIdx].FarUsers, SE, TLI)) continue; // Preserve the chain at UsesIdx. if (ChainIdx != UsersIdx) @@ -2680,8 +2681,7 @@ void LSRInstance::FinalizeChain(IVChain &Chain) { /// Return true if the IVInc can be folded into an addressing mode. static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, - Value *Operand, - const ScalarTargetTransformInfo *STTI) { + Value *Operand, const TargetLowering *TLI) { const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr); if (!IncConst || !isAddressUse(UserInst, Operand)) return false; @@ -2691,7 +2691,7 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, int64_t IncOffset = IncConst->getValue()->getSExtValue(); if (!isAlwaysFoldable(IncOffset, /*BaseGV=*/0, /*HaseBaseReg=*/false, - LSRUse::Address, getAccessType(UserInst), STTI)) + LSRUse::Address, getAccessType(UserInst), TLI)) return false; return true; @@ -2762,7 +2762,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, // If an IV increment can't be folded, use it as the next IV value. if (!canFoldIVIncExpr(LeftOverExpr, IncI->UserInst, IncI->IVOperand, - STTI)) { + TLI)) { assert(IVTy == IVOper->getType() && "inconsistent IV increment type"); IVSrc = IVOper; LeftOverExpr = 0; @@ -3108,7 +3108,7 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, // into an immediate field. if (isAlwaysFoldable(*J, LU.MinOffset, LU.MaxOffset, Base.getNumRegs() > 1, - LU.Kind, LU.AccessTy, STTI, SE)) + LU.Kind, LU.AccessTy, TLI, SE)) continue; // Collect all operands except *J. @@ -3122,7 +3122,7 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, if (InnerAddOps.size() == 1 && isAlwaysFoldable(InnerAddOps[0], LU.MinOffset, LU.MaxOffset, Base.getNumRegs() > 1, - LU.Kind, LU.AccessTy, STTI, SE)) + LU.Kind, LU.AccessTy, TLI, SE)) continue; const SCEV *InnerSum = SE.getAddExpr(InnerAddOps); @@ -3132,9 +3132,9 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, // Add the remaining pieces of the add back into the new formula. const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum); - if (STTI && InnerSumSC && + if (TLI && InnerSumSC && SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 && - STTI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset + + TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset + InnerSumSC->getValue()->getZExtValue())) { F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset + InnerSumSC->getValue()->getZExtValue(); @@ -3144,8 +3144,8 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, // Add J as its own register, or an unfolded immediate. const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J); - if (STTI && SC && SE.getTypeSizeInBits(SC->getType()) <= 64 && - STTI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset + + if (TLI && SC && SE.getTypeSizeInBits(SC->getType()) <= 64 && + TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset + SC->getValue()->getZExtValue())) F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset + SC->getValue()->getZExtValue(); @@ -3205,7 +3205,7 @@ void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula F = Base; F.AM.BaseGV = GV; if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset, - LU.Kind, LU.AccessTy, STTI)) + LU.Kind, LU.AccessTy, TLI)) continue; F.BaseRegs[i] = G; (void)InsertFormula(LU, LUIdx, F); @@ -3230,7 +3230,7 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula F = Base; F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs - *I; if (isLegalUse(F.AM, LU.MinOffset - *I, LU.MaxOffset - *I, - LU.Kind, LU.AccessTy, STTI)) { + LU.Kind, LU.AccessTy, TLI)) { // Add the offset to the base register. const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G); // If it cancelled out, drop the base register, otherwise update it. @@ -3250,7 +3250,7 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula F = Base; F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Imm; if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset, - LU.Kind, LU.AccessTy, STTI)) + LU.Kind, LU.AccessTy, TLI)) continue; F.BaseRegs[i] = G; (void)InsertFormula(LU, LUIdx, F); @@ -3297,7 +3297,7 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, F.AM.BaseOffs = NewBaseOffs; // Check that this scale is legal. - if (!isLegalUse(F.AM, Offset, Offset, LU.Kind, LU.AccessTy, STTI)) + if (!isLegalUse(F.AM, Offset, Offset, LU.Kind, LU.AccessTy, TLI)) continue; // Compensate for the use having MinOffset built into it. @@ -3353,12 +3353,12 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) { Base.AM.HasBaseReg = Base.BaseRegs.size() > 1; // Check whether this scale is going to be legal. if (!isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset, - LU.Kind, LU.AccessTy, STTI)) { + LU.Kind, LU.AccessTy, TLI)) { // As a special-case, handle special out-of-loop Basic users specially. // TODO: Reconsider this special case. if (LU.Kind == LSRUse::Basic && isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset, - LSRUse::Special, LU.AccessTy, STTI) && + LSRUse::Special, LU.AccessTy, TLI) && LU.AllFixupsOutsideLoop) LU.Kind = LSRUse::Special; else @@ -3391,8 +3391,8 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) { /// GenerateTruncates - Generate reuse formulae from different IV types. void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) { - // This requires ScalarTargetTransformInfo to tell us which truncates are free. - if (!STTI) return; + // This requires TargetLowering to tell us which truncates are free. + if (!TLI) return; // Don't bother truncating symbolic values. if (Base.AM.BaseGV) return; @@ -3405,7 +3405,7 @@ void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) { for (SmallSetVector<Type *, 4>::const_iterator I = Types.begin(), E = Types.end(); I != E; ++I) { Type *SrcTy = *I; - if (SrcTy != DstTy && STTI->isTruncateFree(SrcTy, DstTy)) { + if (SrcTy != DstTy && TLI->isTruncateFree(SrcTy, DstTy)) { Formula F = Base; if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, *I); @@ -3561,7 +3561,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { Formula NewF = F; NewF.AM.BaseOffs = Offs; if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset, - LU.Kind, LU.AccessTy, STTI)) + LU.Kind, LU.AccessTy, TLI)) continue; NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg); @@ -3586,9 +3586,9 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { Formula NewF = F; NewF.AM.BaseOffs = (uint64_t)NewF.AM.BaseOffs + Imm; if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset, - LU.Kind, LU.AccessTy, STTI)) { - if (!STTI || - !STTI->isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm)) + LU.Kind, LU.AccessTy, TLI)) { + if (!TLI || + !TLI->isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm)) continue; NewF = F; NewF.UnfoldedOffset = (uint64_t)NewF.UnfoldedOffset + Imm; @@ -3900,7 +3900,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() { Formula &F = LUThatHas->Formulae[i]; if (!isLegalUse(F.AM, LUThatHas->MinOffset, LUThatHas->MaxOffset, - LUThatHas->Kind, LUThatHas->AccessTy, STTI)) { + LUThatHas->Kind, LUThatHas->AccessTy, TLI)) { DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n'); LUThatHas->DeleteFormula(F); @@ -4589,12 +4589,12 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution, Changed |= DeleteTriviallyDeadInstructions(DeadInsts); } -LSRInstance::LSRInstance(const ScalarTargetTransformInfo *stti, Loop *l, Pass *P) +LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P) : IU(P->getAnalysis<IVUsers>()), SE(P->getAnalysis<ScalarEvolution>()), DT(P->getAnalysis<DominatorTree>()), LI(P->getAnalysis<LoopInfo>()), - STTI(stti), L(l), Changed(false), IVIncInsertPos(0) { + TLI(tli), L(l), Changed(false), IVIncInsertPos(0) { // If LoopSimplify form is not available, stay out of trouble. if (!L->isLoopSimplifyForm()) @@ -4684,7 +4684,7 @@ LSRInstance::LSRInstance(const ScalarTargetTransformInfo *stti, Loop *l, Pass *P for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(), JE = LU.Formulae.end(); J != JE; ++J) assert(isLegalUse(J->AM, LU.MinOffset, LU.MaxOffset, - LU.Kind, LU.AccessTy, STTI) && + LU.Kind, LU.AccessTy, TLI) && "Illegal formula generated!"); }; #endif @@ -4757,13 +4757,13 @@ void LSRInstance::dump() const { namespace { class LoopStrengthReduce : public LoopPass { - /// ScalarTargetTransformInfo provides target information that is needed - /// for strength reducing loops. - const ScalarTargetTransformInfo *STTI; + /// TLI - Keep a pointer of a TargetLowering to consult for determining + /// transformation profitability. + const TargetLowering *const TLI; public: static char ID; // Pass ID, replacement for typeid - LoopStrengthReduce(); + explicit LoopStrengthReduce(const TargetLowering *tli = 0); private: bool runOnLoop(Loop *L, LPPassManager &LPM); @@ -4783,12 +4783,13 @@ INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce", "Loop Strength Reduction", false, false) -Pass *llvm::createLoopStrengthReducePass() { - return new LoopStrengthReduce(); + +Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) { + return new LoopStrengthReduce(TLI); } -LoopStrengthReduce::LoopStrengthReduce() - : LoopPass(ID), STTI(0) { +LoopStrengthReduce::LoopStrengthReduce(const TargetLowering *tli) + : LoopPass(ID), TLI(tli) { initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry()); } @@ -4814,13 +4815,8 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const { bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) { bool Changed = false; - TargetTransformInfo *TTI = getAnalysisIfAvailable<TargetTransformInfo>(); - - if (TTI) - STTI = TTI->getScalarTargetTransformInfo(); - // Run the main LSR transformation. - Changed |= LSRInstance(STTI, L, this).getChanged(); + Changed |= LSRInstance(TLI, L, this).getChanged(); // Remove any extra phis created by processing inner loops. Changed |= DeleteDeadPHIs(L->getHeader()); @@ -4831,7 +4827,7 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) { Rewriter.setDebugType(DEBUG_TYPE); #endif unsigned numFolded = Rewriter. - replaceCongruentIVs(L, &getAnalysis<DominatorTree>(), DeadInsts, STTI); + replaceCongruentIVs(L, &getAnalysis<DominatorTree>(), DeadInsts, TLI); if (numFolded) { Changed = true; DeleteTriviallyDeadInstructions(DeadInsts); diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index 3e84a91c1d..377a6250de 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -132,9 +132,6 @@ public: /// /// We flag partitions as splittable when they are formed entirely due to /// accesses by trivially splittable operations such as memset and memcpy. - /// - /// FIXME: At some point we should consider loads and stores of FCAs to be - /// splittable and eagerly split them into scalar values. bool IsSplittable; /// \brief Test whether a partition has been marked as dead. @@ -1785,9 +1782,9 @@ static Value *getNaturalGEPWithType(IRBuilder<> &IRB, const DataLayout &TD, break; if (SequentialType *SeqTy = dyn_cast<SequentialType>(ElementTy)) { ElementTy = SeqTy->getElementType(); - Indices.push_back(IRB.getInt(APInt(TD.getPointerSizeInBits( - ElementTy->isPointerTy() ? - cast<PointerType>(ElementTy)->getAddressSpace(): 0), 0))); + // Note that we use the default address space as this index is over an + // array or a vector, not a pointer. + Indices.push_back(IRB.getInt(APInt(TD.getPointerSizeInBits(0), 0))); } else if (StructType *STy = dyn_cast<StructType>(ElementTy)) { if (STy->element_begin() == STy->element_end()) break; // Nothing left to descend into. @@ -1828,7 +1825,7 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD, if (ElementSizeInBits % 8) return 0; // GEPs over non-multiple of 8 size vector elements are invalid. APInt ElementSize(Offset.getBitWidth(), ElementSizeInBits / 8); - APInt NumSkippedElements = Offset.udiv(ElementSize); + APInt NumSkippedElements = Offset.sdiv(ElementSize); if (NumSkippedElements.ugt(VecTy->getNumElements())) return 0; Offset -= NumSkippedElements * ElementSize; @@ -1840,7 +1837,7 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD, if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) { Type *ElementTy = ArrTy->getElementType(); APInt ElementSize(Offset.getBitWidth(), TD.getTypeAllocSize(ElementTy)); - APInt NumSkippedElements = Offset.udiv(ElementSize); + APInt NumSkippedElements = Offset.sdiv(ElementSize); if (NumSkippedElements.ugt(ArrTy->getNumElements())) return 0; @@ -1896,7 +1893,7 @@ static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const DataLayout &TD, APInt ElementSize(Offset.getBitWidth(), TD.getTypeAllocSize(ElementTy)); if (ElementSize == 0) return 0; // Zero-length arrays can't help us build a natural GEP. - APInt NumSkippedElements = Offset.udiv(ElementSize); + APInt NumSkippedElements = Offset.sdiv(ElementSize); Offset -= NumSkippedElements * ElementSize; Indices.push_back(IRB.getInt(NumSkippedElements)); @@ -2211,6 +2208,48 @@ static bool isIntegerWideningViable(const DataLayout &TD, return WholeAllocaOp; } +static Value *extractInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *V, + IntegerType *Ty, uint64_t Offset, + const Twine &Name) { + IntegerType *IntTy = cast<IntegerType>(V->getType()); + assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) && + "Element extends past full value"); + uint64_t ShAmt = 8*Offset; + if (DL.isBigEndian()) + ShAmt = 8*(DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset); + if (ShAmt) + V = IRB.CreateLShr(V, ShAmt, Name + ".shift"); + assert(Ty->getBitWidth() <= IntTy->getBitWidth() && + "Cannot extract to a larger integer!"); + if (Ty != IntTy) + V = IRB.CreateTrunc(V, Ty, Name + ".trunc"); + return V; +} + +static Value *insertInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *Old, + Value *V, uint64_t Offset, const Twine &Name) { + IntegerType *IntTy = cast<IntegerType>(Old->getType()); + IntegerType *Ty = cast<IntegerType>(V->getType()); + assert(Ty->getBitWidth() <= IntTy->getBitWidth() && + "Cannot insert a larger integer!"); + if (Ty != IntTy) + V = IRB.CreateZExt(V, IntTy, Name + ".ext"); + assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) && + "Element store outside of alloca store"); + uint64_t ShAmt = 8*Offset; + if (DL.isBigEndian()) + ShAmt = 8*(DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset); + if (ShAmt) + V = IRB.CreateShl(V, ShAmt, Name + ".shift"); + + if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) { + APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt); + Old = IRB.CreateAnd(Old, Mask, Name + ".mask"); + V = IRB.CreateOr(Old, V, Name + ".insert"); + } + return V; +} + namespace { /// \brief Visitor to rewrite instructions using a partition of an alloca to /// use a new alloca. @@ -2371,60 +2410,6 @@ private: return IRB.getInt32(Index); } - Value *extractInteger(IRBuilder<> &IRB, IntegerType *TargetTy, - uint64_t Offset) { - assert(IntTy && "We cannot extract an integer from the alloca"); - Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".load")); - V = convertValue(TD, IRB, V, IntTy); - assert(Offset >= NewAllocaBeginOffset && "Out of bounds offset"); - uint64_t RelOffset = Offset - NewAllocaBeginOffset; - assert(TD.getTypeStoreSize(TargetTy) + RelOffset <= - TD.getTypeStoreSize(IntTy) && - "Element load outside of alloca store"); - uint64_t ShAmt = 8*RelOffset; - if (TD.isBigEndian()) - ShAmt = 8*(TD.getTypeStoreSize(IntTy) - - TD.getTypeStoreSize(TargetTy) - RelOffset); - if (ShAmt) - V = IRB.CreateLShr(V, ShAmt, getName(".shift")); - assert(TargetTy->getBitWidth() <= IntTy->getBitWidth() && - "Cannot extract to a larger integer!"); - if (TargetTy != IntTy) - V = IRB.CreateTrunc(V, TargetTy, getName(".trunc")); - return V; - } - - StoreInst *insertInteger(IRBuilder<> &IRB, Value *V, uint64_t Offset) { - IntegerType *Ty = cast<IntegerType>(V->getType()); - assert(Ty->getBitWidth() <= IntTy->getBitWidth() && - "Cannot insert a larger integer!"); - if (Ty != IntTy) - V = IRB.CreateZExt(V, IntTy, getName(".ext")); - assert(Offset >= NewAllocaBeginOffset && "Out of bounds offset"); - uint64_t RelOffset = Offset - NewAllocaBeginOffset; - assert(TD.getTypeStoreSize(Ty) + RelOffset <= - TD.getTypeStoreSize(IntTy) && - "Element store outside of alloca store"); - uint64_t ShAmt = 8*RelOffset; - if (TD.isBigEndian()) - ShAmt = 8*(TD.getTypeStoreSize(IntTy) - TD.getTypeStoreSize(Ty) - - RelOffset); - if (ShAmt) - V = IRB.CreateShl(V, ShAmt, getName(".shift")); - - if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) { - APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt); - Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".oldload")); - Old = convertValue(TD, IRB, Old, IntTy); - Old = IRB.CreateAnd(Old, Mask, getName(".mask")); - V = IRB.CreateOr(Old, V, getName(".insert")); - } - V = convertValue(TD, IRB, V, NewAllocaTy); - return IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); - } - void deleteIfTriviallyDead(Value *V) { Instruction *I = cast<Instruction>(V); if (isInstructionTriviallyDead(I)) @@ -2452,12 +2437,18 @@ private: } bool rewriteIntegerLoad(IRBuilder<> &IRB, LoadInst &LI) { + assert(IntTy && "We cannot insert an integer to the alloca"); assert(!LI.isVolatile()); - Value *Result = extractInteger(IRB, cast<IntegerType>(LI.getType()), - BeginOffset); - LI.replaceAllUsesWith(Result); + Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), + getName(".load")); + V = convertValue(TD, IRB, V, IntTy); + assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); + uint64_t Offset = BeginOffset - NewAllocaBeginOffset; + V = extractInteger(TD, IRB, V, cast<IntegerType>(LI.getType()), Offset, + getName(".extract")); + LI.replaceAllUsesWith(V); Pass.DeadInsts.push_back(&LI); - DEBUG(dbgs() << " to: " << *Result << "\n"); + DEBUG(dbgs() << " to: " << *V << "\n"); return true; } @@ -2519,8 +2510,20 @@ private: } bool rewriteIntegerStore(IRBuilder<> &IRB, StoreInst &SI) { + assert(IntTy && "We cannot extract an integer from the alloca"); assert(!SI.isVolatile()); - StoreInst *Store = insertInteger(IRB, SI.getValueOperand(), BeginOffset); + Value *V = SI.getValueOperand(); + if (TD.getTypeSizeInBits(V->getType()) != IntTy->getBitWidth()) { + Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), + getName(".oldload")); + Old = convertValue(TD, IRB, Old, IntTy); + assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); + uint64_t Offset = BeginOffset - NewAllocaBeginOffset; + V = insertInteger(TD, IRB, Old, SI.getValueOperand(), Offset, + getName(".insert")); + } + V = convertValue(TD, IRB, V, NewAllocaTy); + StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); Pass.DeadInsts.push_back(&SI); (void)Store; DEBUG(dbgs() << " to: " << *Store << "\n"); @@ -2652,10 +2655,12 @@ private: if (IntTy && (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset)) { assert(!II.isVolatile()); - StoreInst *Store = insertInteger(IRB, V, BeginOffset); - (void)Store; - DEBUG(dbgs() << " to: " << *Store << "\n"); - return true; + Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), + getName(".oldload")); + Old = convertValue(TD, IRB, Old, IntTy); + assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); + uint64_t Offset = BeginOffset - NewAllocaBeginOffset; + V = insertInteger(TD, IRB, Old, V, Offset, getName(".insert")); } if (V->getType() != AllocaTy) @@ -2811,17 +2816,25 @@ private: getIndex(IRB, BeginOffset), getName(".copyextract")); } else if (IntTy && !IsWholeAlloca && !IsDest) { - Src = extractInteger(IRB, SubIntTy, BeginOffset); + Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), + getName(".load")); + Src = convertValue(TD, IRB, Src, IntTy); + assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); + uint64_t Offset = BeginOffset - NewAllocaBeginOffset; + Src = extractInteger(TD, IRB, Src, SubIntTy, Offset, getName(".extract")); } else { Src = IRB.CreateAlignedLoad(SrcPtr, Align, II.isVolatile(), getName(".copyload")); } if (IntTy && !IsWholeAlloca && IsDest) { - StoreInst *Store = insertInteger(IRB, Src, BeginOffset); - (void)Store; - DEBUG(dbgs() << " to: " << *Store << "\n"); - return true; + Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), + getName(".oldload")); + Old = convertValue(TD, IRB, Old, IntTy); + assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); + uint64_t Offset = BeginOffset - NewAllocaBeginOffset; + Src = insertInteger(TD, IRB, Old, Src, Offset, getName(".insert")); + Src = convertValue(TD, IRB, Src, NewAllocaTy); } if (IsVectorElement && IsDest) { diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index d86c4cbc9f..90efa8ae0e 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -135,47 +135,6 @@ static bool IsOnlyUsedInEqualityComparison(Value *V, Value *With) { namespace { //===---------------------------------------===// -// 'strcpy' Optimizations - -struct StrCpyOpt : public LibCallOptimization { - bool OptChkCall; // True if it's optimizing a __strcpy_chk libcall. - - StrCpyOpt(bool c) : OptChkCall(c) {} - - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - // Verify the "strcpy" function prototype. - unsigned NumParams = OptChkCall ? 3 : 2; - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != NumParams || - FT->getReturnType() != FT->getParamType(0) || - FT->getParamType(0) != FT->getParamType(1) || - FT->getParamType(0) != B.getInt8PtrTy()) - return 0; - - Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); - if (Dst == Src) // strcpy(x,x) -> x - return Src; - - // These optimizations require DataLayout. - if (!TD) return 0; - - // See if we can get the length of the input string. - uint64_t Len = GetStringLength(Src); - if (Len == 0) return 0; - - // We have enough information to now generate the memcpy call to do the - // concatenation for us. Make a memcpy to copy the nul byte with align = 1. - if (!OptChkCall || - !EmitMemCpyChk(Dst, Src, - ConstantInt::get(TD->getIntPtrType(*Context), Len), - CI->getArgOperand(2), B, TD, TLI)) - B.CreateMemCpy(Dst, Src, - ConstantInt::get(TD->getIntPtrType(*Context), Len), 1); - return Dst; - } -}; - -//===---------------------------------------===// // 'stpcpy' Optimizations struct StpCpyOpt: public LibCallOptimization { @@ -1275,7 +1234,6 @@ namespace { StringMap<LibCallOptimization*> Optimizations; // String and Memory LibCall Optimizations - StrCpyOpt StrCpy; StrCpyOpt StrCpyChk; StpCpyOpt StpCpy; StpCpyOpt StpCpyChk; StrNCpyOpt StrNCpy; StrLenOpt StrLen; StrPBrkOpt StrPBrk; @@ -1295,8 +1253,7 @@ namespace { bool Modified; // This is only used by doInitialization. public: static char ID; // Pass identification - SimplifyLibCalls() : FunctionPass(ID), StrCpy(false), StrCpyChk(true), - StpCpy(false), StpCpyChk(true), + SimplifyLibCalls() : FunctionPass(ID), StpCpy(false), StpCpyChk(true), UnaryDoubleFP(false), UnsafeUnaryDoubleFP(true) { initializeSimplifyLibCallsPass(*PassRegistry::getPassRegistry()); } @@ -1348,7 +1305,6 @@ void SimplifyLibCalls::AddOpt(LibFunc::Func F1, LibFunc::Func F2, /// we know. void SimplifyLibCalls::InitOptimizations() { // String and Memory LibCall Optimizations - Optimizations["strcpy"] = &StrCpy; Optimizations["strncpy"] = &StrNCpy; Optimizations["stpcpy"] = &StpCpy; Optimizations["strlen"] = &StrLen; @@ -1369,7 +1325,6 @@ void SimplifyLibCalls::InitOptimizations() { AddOpt(LibFunc::memset, &MemSet); // _chk variants of String and Memory LibCall Optimizations. - Optimizations["__strcpy_chk"] = &StrCpyChk; Optimizations["__stpcpy_chk"] = &StpCpyChk; // Math Library Optimizations diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp index f35cbbdde5..930555424d 100644 --- a/lib/Transforms/Utils/LowerInvoke.cpp +++ b/lib/Transforms/Utils/LowerInvoke.cpp @@ -45,10 +45,10 @@ #include "llvm/Pass.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/TargetTransformInfo.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetLowering.h" #include <csetjmp> #include <set> using namespace llvm; @@ -70,14 +70,15 @@ namespace { Constant *SetJmpFn, *LongJmpFn, *StackSaveFn, *StackRestoreFn; bool useExpensiveEHSupport; - // We peek in STTI to grab the target's jmp_buf size and alignment - const ScalarTargetTransformInfo *STTI; + // We peek in TLI to grab the target's jmp_buf size and alignment + const TargetLowering *TLI; public: static char ID; // Pass identification, replacement for typeid - explicit LowerInvoke(bool useExpensiveEHSupport = ExpensiveEHSupport) + explicit LowerInvoke(const TargetLowering *tli = NULL, + bool useExpensiveEHSupport = ExpensiveEHSupport) : FunctionPass(ID), useExpensiveEHSupport(useExpensiveEHSupport), - STTI(0) { + TLI(tli) { initializeLowerInvokePass(*PassRegistry::getPassRegistry()); } bool doInitialization(Module &M); @@ -107,24 +108,21 @@ INITIALIZE_PASS(LowerInvoke, "lowerinvoke", char &llvm::LowerInvokePassID = LowerInvoke::ID; // Public Interface To the LowerInvoke pass. -FunctionPass *llvm::createLowerInvokePass() { - return new LowerInvoke(ExpensiveEHSupport); +FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI) { + return new LowerInvoke(TLI, ExpensiveEHSupport); } -FunctionPass *llvm::createLowerInvokePass(bool useExpensiveEHSupport) { - return new LowerInvoke(useExpensiveEHSupport); +FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI, + bool useExpensiveEHSupport) { + return new LowerInvoke(TLI, useExpensiveEHSupport); } // doInitialization - Make sure that there is a prototype for abort in the // current module. bool LowerInvoke::doInitialization(Module &M) { - TargetTransformInfo *TTI = getAnalysisIfAvailable<TargetTransformInfo>(); - if (TTI) - STTI = TTI->getScalarTargetTransformInfo(); - Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext()); if (useExpensiveEHSupport) { // Insert a type for the linked list of jump buffers. - unsigned JBSize = STTI ? STTI->getJumpBufSize() : 0; + unsigned JBSize = TLI ? TLI->getJumpBufSize() : 0; JBSize = JBSize ? JBSize : 200; Type *JmpBufTy = ArrayType::get(VoidPtrTy, JBSize); @@ -432,7 +430,7 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) { // Create an alloca for the incoming jump buffer ptr and the new jump buffer // that needs to be restored on all exits from the function. This is an // alloca because the value needs to be live across invokes. - unsigned Align = STTI ? STTI->getJumpBufAlignment() : 0; + unsigned Align = TLI ? TLI->getJumpBufAlignment() : 0; AllocaInst *JmpBuf = new AllocaInst(JBLinkTy, 0, Align, "jblink", F.begin()->begin()); @@ -577,10 +575,6 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) { } bool LowerInvoke::runOnFunction(Function &F) { - TargetTransformInfo *TTI = getAnalysisIfAvailable<TargetTransformInfo>(); - if (TTI) - STTI = TTI->getScalarTargetTransformInfo(); - if (useExpensiveEHSupport) return insertExpensiveEHSupport(F); else diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index bd28ec3527..b15acdff63 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -183,14 +183,30 @@ struct StrCpyChkOpt : public InstFortifiedLibCallOptimization { FT->getParamType(2) != TD->getIntPtrType(Context)) return 0; + Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); + if (Dst == Src) // __strcpy_chk(x,x) -> x + return Src; + // If a) we don't have any length information, or b) we know this will // fit then just lower to a plain st[rp]cpy. Otherwise we'll keep our // st[rp]cpy_chk call which may fail at runtime if the size is too long. // TODO: It might be nice to get a maximum length out of the possible // string lengths for varying. if (isFoldable(2, 1, true)) { - Value *Ret = EmitStrCpy(CI->getArgOperand(0), CI->getArgOperand(1), B, TD, - TLI, Name.substr(2, 6)); + Value *Ret = EmitStrCpy(Dst, Src, B, TD, TLI, Name.substr(2, 6)); + return Ret; + } else { + // Maybe we can stil fold __strcpy_chk to __memcpy_chk. + uint64_t Len = GetStringLength(Src); + if (Len == 0) return 0; + + // This optimization require DataLayout. + if (!TD) return 0; + + Value *Ret = + EmitMemCpyChk(Dst, Src, + ConstantInt::get(TD->getIntPtrType(Context), Len), + CI->getArgOperand(2), B, TD, TLI); return Ret; } return 0; @@ -497,6 +513,35 @@ struct StrNCmpOpt : public LibCallOptimization { } }; +struct StrCpyOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Verify the "strcpy" function prototype. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + FT->getReturnType() != FT->getParamType(0) || + FT->getParamType(0) != FT->getParamType(1) || + FT->getParamType(0) != B.getInt8PtrTy()) + return 0; + + Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); + if (Dst == Src) // strcpy(x,x) -> x + return Src; + + // These optimizations require DataLayout. + if (!TD) return 0; + + // See if we can get the length of the input string. + uint64_t Len = GetStringLength(Src); + if (Len == 0) return 0; + + // We have enough information to now generate the memcpy call to do the + // copy for us. Make a memcpy to copy the nul byte with align = 1. + B.CreateMemCpy(Dst, Src, + ConstantInt::get(TD->getIntPtrType(*Context), Len), 1); + return Dst; + } +}; + } // End anonymous namespace. namespace llvm { @@ -520,6 +565,7 @@ class LibCallSimplifierImpl { StrRChrOpt StrRChr; StrCmpOpt StrCmp; StrNCmpOpt StrNCmp; + StrCpyOpt StrCpy; void initOptimizations(); public: @@ -540,14 +586,15 @@ void LibCallSimplifierImpl::initOptimizations() { Optimizations["__stpcpy_chk"] = &StrCpyChk; Optimizations["__strncpy_chk"] = &StrNCpyChk; Optimizations["__stpncpy_chk"] = &StrNCpyChk; - Optimizations["strcmp"] = &StrCmp; - Optimizations["strncmp"] = &StrNCmp; // String and memory library call optimizations. Optimizations["strcat"] = &StrCat; Optimizations["strncat"] = &StrNCat; Optimizations["strchr"] = &StrChr; Optimizations["strrchr"] = &StrRChr; + Optimizations["strcmp"] = &StrCmp; + Optimizations["strncmp"] = &StrNCmp; + Optimizations["strcpy"] = &StrCpy; } Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) { diff --git a/lib/Transforms/Vectorize/CMakeLists.txt b/lib/Transforms/Vectorize/CMakeLists.txt index 06cf1e4e53..e64034ab26 100644 --- a/lib/Transforms/Vectorize/CMakeLists.txt +++ b/lib/Transforms/Vectorize/CMakeLists.txt @@ -1,6 +1,7 @@ add_llvm_library(LLVMVectorize BBVectorize.cpp Vectorize.cpp + LoopVectorize.cpp ) add_dependencies(LLVMVectorize intrinsics_gen) diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp new file mode 100644 index 0000000000..9bbd9ab60b --- /dev/null +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -0,0 +1,885 @@ +//===- LoopVectorize.cpp - A Loop Vectorizer ------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is a simple loop vectorizer. We currently only support single block +// loops. We have a very simple and restrictive legality check: we need to read +// and write from disjoint memory locations. We still don't have a cost model. +// This pass has three parts: +// 1. The main loop pass that drives the different parts. +// 2. LoopVectorizationLegality - A helper class that checks for the legality +// of the vectorization. +// 3. SingleBlockLoopVectorizer - A helper class that performs the actual +// widening of instructions. +// +//===----------------------------------------------------------------------===// +#define LV_NAME "loop-vectorize" +#define DEBUG_TYPE LV_NAME +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/LLVMContext.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Value.h" +#include "llvm/Function.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/Module.h" +#include "llvm/Type.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AliasSetTracker.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/DataLayout.h" +#include "llvm/Transforms/Utils/Local.h" +#include <algorithm> +using namespace llvm; + +static cl::opt<unsigned> +DefaultVectorizationFactor("default-loop-vectorize-width", + cl::init(4), cl::Hidden, + cl::desc("Set the default loop vectorization width")); + +namespace { + +/// Vectorize a simple loop. This class performs the widening of simple single +/// basic block loops into vectors. It does not perform any +/// vectorization-legality checks, and just does it. It widens the vectors +/// to a given vectorization factor (VF). +class SingleBlockLoopVectorizer { +public: + /// Ctor. + SingleBlockLoopVectorizer(Loop *OrigLoop, ScalarEvolution *Se, LoopInfo *Li, + LPPassManager *Lpm, unsigned VecWidth): + Orig(OrigLoop), SE(Se), LI(Li), LPM(Lpm), VF(VecWidth), + Builder(0), Induction(0), OldInduction(0) { } + + ~SingleBlockLoopVectorizer() { + delete Builder; + } + + // Perform the actual loop widening (vectorization). + void vectorize() { + ///Create a new empty loop. Unlink the old loop and connect the new one. + createEmptyLoop(); + /// Widen each instruction in the old loop to a new one in the new loop. + vectorizeLoop(); + // register the new loop. + cleanup(); + } + +private: + /// Create an empty loop, based on the loop ranges of the old loop. + void createEmptyLoop(); + /// Copy and widen the instructions from the old loop. + void vectorizeLoop(); + /// Insert the new loop to the loop hierarchy and pass manager. + void cleanup(); + + /// This instruction is un-vectorizable. Implement it as a sequence + /// of scalars. + void scalarizeInstruction(Instruction *Instr); + + /// Create a broadcast instruction. This method generates a broadcast + /// instruction (shuffle) for loop invariant values and for the induction + /// value. If this is the induction variable then we extend it to N, N+1, ... + /// this is needed because each iteration in the loop corresponds to a SIMD + /// element. + Value *getBroadcastInstrs(Value *V); + + /// This is a helper function used by getBroadcastInstrs. It adds 0, 1, 2 .. + /// for each element in the vector. Starting from zero. + Value *getConsecutiveVector(Value* Val); + + /// Check that the GEP operands are all uniform except for the last index + /// which has to be the induction variable. + bool isConsecutiveGep(GetElementPtrInst *Gep); + + /// When we go over instructions in the basic block we rely on previous + /// values within the current basic block or on loop invariant values. + /// When we widen (vectorize) values we place them in the map. If the values + /// are not within the map, they have to be loop invariant, so we simply + /// broadcast them into a vector. + Value *getVectorValue(Value *V); + + typedef DenseMap<Value*, Value*> ValueMap; + + /// The original loop. + Loop *Orig; + // Scev analysis to use. + ScalarEvolution *SE; + // Loop Info. + LoopInfo *LI; + // Loop Pass Manager; + LPPassManager *LPM; + // The vectorization factor to use. + unsigned VF; + + // The builder that we use + IRBuilder<> *Builder; + + // --- Vectorization state --- + + /// The new Induction variable which was added to the new block. + PHINode *Induction; + /// The induction variable of the old basic block. + PHINode *OldInduction; + // Maps scalars to widened vectors. + ValueMap WidenMap; +}; + +/// Perform the vectorization legality check. This class does not look at the +/// profitability of vectorization, only the legality. At the moment the checks +/// are very simple and focus on single basic block loops with a constant +/// iteration count and no reductions. +class LoopVectorizationLegality { +public: + LoopVectorizationLegality(Loop *Lp, ScalarEvolution *Se, DataLayout *Dl): + TheLoop(Lp), SE(Se), DL(Dl) { } + + /// Returns the maximum vectorization factor that we *can* use to vectorize + /// this loop. This does not mean that it is profitable to vectorize this + /// loop, only that it is legal to do so. This may be a large number. We + /// can vectorize to any SIMD width below this number. + unsigned getLoopMaxVF(); + +private: + /// Check if a single basic block loop is vectorizable. + /// At this point we know that this is a loop with a constant trip count + /// and we only need to check individual instructions. + bool canVectorizeBlock(BasicBlock &BB); + + // Check if a pointer value is known to be disjoint. + // Example: Alloca, Global, NoAlias. + bool isidentifiedSafeObject(Value* Val); + + /// The loop that we evaluate. + Loop *TheLoop; + /// Scev analysis. + ScalarEvolution *SE; + /// DataLayout analysis. + DataLayout *DL; +}; + +struct LoopVectorize : public LoopPass { + static char ID; // Pass identification, replacement for typeid + + LoopVectorize() : LoopPass(ID) { + initializeLoopVectorizePass(*PassRegistry::getPassRegistry()); + } + + ScalarEvolution *SE; + DataLayout *DL; + LoopInfo *LI; + + virtual bool runOnLoop(Loop *L, LPPassManager &LPM) { + // Only vectorize innermost loops. + if (!L->empty()) + return false; + + SE = &getAnalysis<ScalarEvolution>(); + DL = getAnalysisIfAvailable<DataLayout>(); + LI = &getAnalysis<LoopInfo>(); + + DEBUG(dbgs() << "LV: Checking a loop in \"" << + L->getHeader()->getParent()->getName() << "\"\n"); + + // Check if it is legal to vectorize the loop. + LoopVectorizationLegality LVL(L, SE, DL); + unsigned MaxVF = LVL.getLoopMaxVF(); + + // Check that we can vectorize using the chosen vectorization width. + if (MaxVF < DefaultVectorizationFactor) { + DEBUG(dbgs() << "LV: non-vectorizable MaxVF ("<< MaxVF << ").\n"); + return false; + } + + DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< MaxVF << ").\n"); + + // If we decided that is is *legal* to vectorizer the loop. Do it. + SingleBlockLoopVectorizer LB(L, SE, LI, &LPM, DefaultVectorizationFactor); + LB.vectorize(); + + DEBUG(verifyFunction(*L->getHeader()->getParent())); + return true; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + LoopPass::getAnalysisUsage(AU); + AU.addRequiredID(LoopSimplifyID); + AU.addRequired<LoopInfo>(); + AU.addRequired<ScalarEvolution>(); + } + +}; + +Value *SingleBlockLoopVectorizer::getBroadcastInstrs(Value *V) { + // Instructions that access the old induction variable + // actually want to get the new one. + if (V == OldInduction) + V = Induction; + // Create the types. + LLVMContext &C = V->getContext(); + Type *VTy = VectorType::get(V->getType(), VF); + Type *I32 = IntegerType::getInt32Ty(C); + Constant *Zero = ConstantInt::get(I32, 0); + Value *Zeros = ConstantAggregateZero::get(VectorType::get(I32, VF)); + Value *UndefVal = UndefValue::get(VTy); + // Insert the value into a new vector. + Value *SingleElem = Builder->CreateInsertElement(UndefVal, V, Zero); + // Broadcast the scalar into all locations in the vector. + Value *Shuf = Builder->CreateShuffleVector(SingleElem, UndefVal, Zeros, + "broadcast"); + // We are accessing the induction variable. Make sure to promote the + // index for each consecutive SIMD lane. This adds 0,1,2 ... to all lanes. + if (V == Induction) + return getConsecutiveVector(Shuf); + return Shuf; +} + +Value *SingleBlockLoopVectorizer::getConsecutiveVector(Value* Val) { + assert(Val->getType()->isVectorTy() && "Must be a vector"); + assert(Val->getType()->getScalarType()->isIntegerTy() && + "Elem must be an integer"); + // Create the types. + Type *ITy = Val->getType()->getScalarType(); + VectorType *Ty = cast<VectorType>(Val->getType()); + unsigned VLen = Ty->getNumElements(); + SmallVector<Constant*, 8> Indices; + + // Create a vector of consecutive numbers from zero to VF. + for (unsigned i = 0; i < VLen; ++i) + Indices.push_back(ConstantInt::get(ITy, i)); + + // Add the consecutive indices to the vector value. + Constant *Cv = ConstantVector::get(Indices); + assert(Cv->getType() == Val->getType() && "Invalid consecutive vec"); + return Builder->CreateAdd(Val, Cv, "induction"); +} + + +bool SingleBlockLoopVectorizer::isConsecutiveGep(GetElementPtrInst *Gep) { + if (!Gep) + return false; + + unsigned NumOperands = Gep->getNumOperands(); + Value *LastIndex = Gep->getOperand(NumOperands - 1); + + // Check that all of the gep indices are uniform except for the last. + for (unsigned i = 0; i < NumOperands - 1; ++i) + if (!SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), Orig)) + return false; + + // We can emit wide load/stores only of the last index is the induction + // variable. + const SCEV *Last = SE->getSCEV(LastIndex); + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Last)) { + const SCEV *Step = AR->getStepRecurrence(*SE); + + // The memory is consecutive because the last index is consecutive + // and all other indices are loop invariant. + if (Step->isOne()) + return true; + } + + return false; +} + +Value *SingleBlockLoopVectorizer::getVectorValue(Value *V) { + // If we saved a vectorized copy of V, use it. + ValueMap::iterator it = WidenMap.find(V); + if (it != WidenMap.end()) + return it->second; + + // Broadcast V and save the value for future uses. + Value *B = getBroadcastInstrs(V); + WidenMap[V] = B; + return B; +} + +void SingleBlockLoopVectorizer::scalarizeInstruction(Instruction *Instr) { + assert(!Instr->getType()->isAggregateType() && "Can't handle vectors"); + // Holds vector parameters or scalars, in case of uniform vals. + SmallVector<Value*, 8> Params; + + // Find all of the vectorized parameters. + for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) { + Value *SrcOp = Instr->getOperand(op); + + // If we are accessing the old induction variable, use the new one. + if (SrcOp == OldInduction) { + Params.push_back(getBroadcastInstrs(Induction)); + continue; + } + + // Try using previously calculated values. + Instruction *SrcInst = dyn_cast<Instruction>(SrcOp); + + // If the src is an instruction that appeared earlier in the basic block + // then it should already be vectorized. + if (SrcInst && SrcInst->getParent() == Instr->getParent()) { + assert(WidenMap.count(SrcInst) && "Source operand is unavailable"); + // The parameter is a vector value from earlier. + Params.push_back(WidenMap[SrcInst]); + } else { + // The parameter is a scalar from outside the loop. Maybe even a constant. + Params.push_back(SrcOp); + } + } + + assert(Params.size() == Instr->getNumOperands() && + "Invalid number of operands"); + + // Does this instruction return a value ? + bool IsVoidRetTy = Instr->getType()->isVoidTy(); + Value *VecResults = 0; + + // If we have a return value, create an empty vector. We place the scalarized + // instructions in this vector. + if (!IsVoidRetTy) + VecResults = UndefValue::get(VectorType::get(Instr->getType(), VF)); + + // For each scalar that we create. + for (unsigned i = 0; i < VF; ++i) { + Instruction *Cloned = Instr->clone(); + if (!IsVoidRetTy) + Cloned->setName(Instr->getName() + ".cloned"); + // Replace the operands of the cloned instrucions with extracted scalars. + for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) { + Value *Op = Params[op]; + // Param is a vector. Need to extract the right lane. + if (Op->getType()->isVectorTy()) + Op = Builder->CreateExtractElement(Op, Builder->getInt32(i)); + Cloned->setOperand(op, Op); + } + + // Place the cloned scalar in the new loop. + Builder->Insert(Cloned); + + // If the original scalar returns a value we need to place it in a vector + // so that future users will be able to use it. + if (!IsVoidRetTy) + VecResults = Builder->CreateInsertElement(VecResults, Cloned, + Builder->getInt32(i)); + } + + if (!IsVoidRetTy) + WidenMap[Instr] = VecResults; +} + +void SingleBlockLoopVectorizer::createEmptyLoop() { + /* + In this function we generate a new loop. The new loop will contain + the vectorized instructions while the old loop will continue to run the + scalar remainder. + + [ ] <-- vector loop bypass. + / | + / v +| [ ] <-- vector pre header. +| | +| v +| [ ] \ +| [ ]_| <-- vector loop. +| | + \ v + >[ ] <--- middle-block. + / | + / v +| [ ] <--- new preheader. +| | +| v +| [ ] \ +| [ ]_| <-- old scalar loop to handle remainder. () + \ | + \ v + >[ ] <-- exit block. + ... + */ + + // This is the original scalar-loop preheader. + BasicBlock *BypassBlock = Orig->getLoopPreheader(); + BasicBlock *ExitBlock = Orig->getExitBlock(); + assert(ExitBlock && "Must have an exit block"); + + assert(Orig->getNumBlocks() == 1 && "Invalid loop"); + assert(BypassBlock && "Invalid loop structure"); + + BasicBlock *VectorPH = + BypassBlock->splitBasicBlock(BypassBlock->getTerminator(), "vector.ph"); + BasicBlock *VecBody = VectorPH->splitBasicBlock(VectorPH->getTerminator(), + "vector.body"); + + BasicBlock *MiddleBlock = VecBody->splitBasicBlock(VecBody->getTerminator(), + "middle.block"); + + + BasicBlock *ScalarPH = + MiddleBlock->splitBasicBlock(MiddleBlock->getTerminator(), + "scalar.preheader"); + + // Find the induction variable. + BasicBlock *OldBasicBlock = Orig->getHeader(); + OldInduction = dyn_cast<PHINode>(OldBasicBlock->begin()); + assert(OldInduction && "We must have a single phi node."); + Type *IdxTy = OldInduction->getType(); + + // Use this IR builder to create the loop instructions (Phi, Br, Cmp) + // inside the loop. + Builder = new IRBuilder<>(VecBody); + Builder->SetInsertPoint(VecBody->getFirstInsertionPt()); + + // Generate the induction variable. + Induction = Builder->CreatePHI(IdxTy, 2, "index"); + Constant *Zero = ConstantInt::get(IdxTy, 0); + Constant *Step = ConstantInt::get(IdxTy, VF); + + // Find the loop boundaries. + const SCEV *ExitCount = SE->getExitCount(Orig, Orig->getHeader()); + assert(ExitCount != SE->getCouldNotCompute() && "Invalid loop count"); + + // Get the total trip count from the count by adding 1. + ExitCount = SE->getAddExpr(ExitCount, + SE->getConstant(ExitCount->getType(), 1)); + + // Expand the trip count and place the new instructions in the preheader. + // Notice that the pre-header does not change, only the loop body. + SCEVExpander Exp(*SE, "induction"); + Instruction *Loc = BypassBlock->getTerminator(); + + // We may need to extend the index in case there is a type mismatch. + // We know that the count starts at zero and does not overflow. + // We are using Zext because it should be less expensive. + if (ExitCount->getType() != Induction->getType()) + ExitCount = SE->getZeroExtendExpr(ExitCount, IdxTy); + + // Count holds the overall loop count (N). + Value *Count = Exp.expandCodeFor(ExitCount, Induction->getType(), Loc); + // Now we need to generate the expression for N - (N % VF), which is + // the part that the vectorized body will execute. + Constant *CIVF = ConstantInt::get(IdxTy, VF); + Value *R = BinaryOperator::CreateURem(Count, CIVF, "n.mod.vf", Loc); + Value *CountRoundDown = BinaryOperator::CreateSub(Count, R, "n.vec", Loc); + + // Now, compare the new count to zero. If it is zero, jump to the scalar part. + Value *Cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, + CountRoundDown, ConstantInt::getNullValue(IdxTy), + "cmp.zero", Loc); + BranchInst::Create(MiddleBlock, VectorPH, Cmp, Loc); + // Remove the old terminator. + Loc->eraseFromParent(); + + // Add a check in the middle block to see if we have completed + // all of the iterations in the first vector loop. + // If (N - N%VF) == N, then we *don't* need to run the remainder. + Value *CmpN = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, Count, + CountRoundDown, "cmp.n", + MiddleBlock->getTerminator()); + + BranchInst::Create(ExitBlock, ScalarPH, CmpN, MiddleBlock->getTerminator()); + // Remove the old terminator. + MiddleBlock->getTerminator()->eraseFromParent(); + + // Create i+1 and fill the PHINode. + Value *NextIdx = Builder->CreateAdd(Induction, Step, "index.next"); + Induction->addIncoming(Zero, VectorPH); + Induction->addIncoming(NextIdx, VecBody); + // Create the compare. + Value *ICmp = Builder->CreateICmpEQ(NextIdx, CountRoundDown); + Builder->CreateCondBr(ICmp, MiddleBlock, VecBody); + + // Now we have two terminators. Remove the old one from the block. + VecBody->getTerminator()->eraseFromParent(); + + // Fix the scalar body iteration count. + unsigned BlockIdx = OldInduction->getBasicBlockIndex(ScalarPH); + OldInduction->setIncomingValue(BlockIdx, CountRoundDown); + + // Get ready to start creating new instructions into the vectorized body. + Builder->SetInsertPoint(VecBody->getFirstInsertionPt()); + + // Register the new loop. + Loop* Lp = new Loop(); + LPM->insertLoop(Lp, Orig->getParentLoop()); + + Lp->addBasicBlockToLoop(VecBody, LI->getBase()); + + Loop *ParentLoop = Orig->getParentLoop(); + if (ParentLoop) { + ParentLoop->addBasicBlockToLoop(ScalarPH, LI->getBase()); + ParentLoop->addBasicBlockToLoop(VectorPH, LI->getBase()); + ParentLoop->addBasicBlockToLoop(MiddleBlock, LI->getBase()); + } +} + +void SingleBlockLoopVectorizer::vectorizeLoop() { + BasicBlock &BB = *Orig->getHeader(); + + // For each instruction in the old loop. + for (BasicBlock::iterator it = BB.begin(), e = BB.end(); it != e; ++it) { + Instruction *Inst = it; + + switch (Inst->getOpcode()) { + case Instruction::PHI: + case Instruction::Br: + // Nothing to do for PHIs and BR, since we already took care of the + // loop control flow instructions. + continue; + + case Instruction::Add: + case Instruction::FAdd: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::Mul: + case Instruction::FMul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: { + // Just widen binops. + BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst); + Value *A = getVectorValue(Inst->getOperand(0)); + Value *B = getVectorValue(Inst->getOperand(1)); + // Use this vector value for all users of the original instruction. + WidenMap[Inst] = Builder->CreateBinOp(BinOp->getOpcode(), A, B); + break; + } + case Instruction::Select: { + // Widen selects. + Value *A = getVectorValue(Inst->getOperand(0)); + Value *B = getVectorValue(Inst->getOperand(1)); + Value *C = getVectorValue(Inst->getOperand(2)); + WidenMap[Inst] = Builder->CreateSelect(A, B, C); + break; + } + + case Instruction::ICmp: + case Instruction::FCmp: { + // Widen compares. Generate vector compares. + bool FCmp = (Inst->getOpcode() == Instruction::FCmp); + CmpInst *Cmp = dyn_cast<CmpInst>(Inst); + Value *A = getVectorValue(Inst->getOperand(0)); + Value *B = getVectorValue(Inst->getOperand(1)); + if (FCmp) + WidenMap[Inst] = Builder->CreateFCmp(Cmp->getPredicate(), A, B); + else + WidenMap[Inst] = Builder->CreateICmp(Cmp->getPredicate(), A, B); + break; + } + + case Instruction::Store: { + // Attempt to issue a wide store. + StoreInst *SI = dyn_cast<StoreInst>(Inst); + Type *StTy = VectorType::get(SI->getValueOperand()->getType(), VF); + Value *Ptr = SI->getPointerOperand(); + unsigned Alignment = SI->getAlignment(); + GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr); + // This store does not use GEPs. + if (!isConsecutiveGep(Gep)) { + scalarizeInstruction(Inst); + break; + } + + // Create the new GEP with the new induction variable. + GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone()); + unsigned NumOperands = Gep->getNumOperands(); + Gep2->setOperand(NumOperands - 1, Induction); + Ptr = Builder->Insert(Gep2); + Ptr = Builder->CreateBitCast(Ptr, StTy->getPointerTo()); + Value *Val = getVectorValue(SI->getValueOperand()); + Builder->CreateStore(Val, Ptr)->setAlignment(Alignment); + break; + } + case Instruction::Load: { + // Attempt to issue a wide load. + LoadInst *LI = dyn_cast<LoadInst>(Inst); + Type *RetTy = VectorType::get(LI->getType(), VF); + Value *Ptr = LI->getPointerOperand(); + unsigned Alignment = LI->getAlignment(); + GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr); + + // We don't have a gep. Scalarize the load. + if (!isConsecutiveGep(Gep)) { + scalarizeInstruction(Inst); + break; + } + + // Create the new GEP with the new induction variable. + GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone()); + unsigned NumOperands = Gep->getNumOperands(); + Gep2->setOperand(NumOperands - 1, Induction); + Ptr = Builder->Insert(Gep2); + Ptr = Builder->CreateBitCast(Ptr, RetTy->getPointerTo()); + LI = Builder->CreateLoad(Ptr); + LI->setAlignment(Alignment); + // Use this vector value for all users of the load. + WidenMap[Inst] = LI; + break; + } + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::FPExt: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::SIToFP: + case Instruction::UIToFP: + case Instruction::Trunc: + case Instruction::FPTrunc: + case Instruction::BitCast: { + /// Vectorize bitcasts. + CastInst *CI = dyn_cast<CastInst>(Inst); + Value *A = getVectorValue(Inst->getOperand(0)); + Type *DestTy = VectorType::get(CI->getType()->getScalarType(), VF); + WidenMap[Inst] = Builder->CreateCast(CI->getOpcode(), A, DestTy); + break; + } + + default: + /// All other instructions are unsupported. Scalarize them. + scalarizeInstruction(Inst); + break; + }// end of switch. + }// end of for_each instr. +} + +void SingleBlockLoopVectorizer::cleanup() { + // The original basic block. + SE->forgetLoop(Orig); +} + +unsigned LoopVectorizationLegality::getLoopMaxVF() { + if (!TheLoop->getLoopPreheader()) { + assert(false && "No preheader!!"); + DEBUG(dbgs() << "LV: Loop not normalized." << "\n"); + return 1; + } + + // We can only vectorize single basic block loops. + unsigned NumBlocks = TheLoop->getNumBlocks(); + if (NumBlocks != 1) { + DEBUG(dbgs() << "LV: Too many blocks:" << NumBlocks << "\n"); + return 1; + } + + // We need to have a loop header. + BasicBlock *BB = TheLoop->getHeader(); + DEBUG(dbgs() << "LV: Found a loop: " << BB->getName() << "\n"); + + // Go over each instruction and look at memory deps. + if (!canVectorizeBlock(*BB)) { + DEBUG(dbgs() << "LV: Can't vectorize this loop header\n"); + return 1; + } + + // ScalarEvolution needs to be able to find the exit count. + const SCEV *ExitCount = SE->getExitCount(TheLoop, BB); + if (ExitCount == SE->getCouldNotCompute()) { + DEBUG(dbgs() << "LV: SCEV could not compute the loop exit count.\n"); + return 1; + } + + DEBUG(dbgs() << "LV: We can vectorize this loop!\n"); + + // Okay! We can vectorize. At this point we don't have any other mem analysis + // which may limit our maximum vectorization factor, so just return the + // maximum SIMD size. + return DefaultVectorizationFactor; +} + +bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) { + // Holds the read and write pointers that we find. + typedef SmallVector<Value*, 10> ValueVector; + ValueVector Reads; + ValueVector Writes; + + unsigned NumPhis = 0; + for (BasicBlock::iterator it = BB.begin(), e = BB.end(); it != e; ++it) { + Instruction *I = it; + + PHINode *Phi = dyn_cast<PHINode>(I); + if (Phi) { + NumPhis++; + // We only look at integer phi nodes. + if (!Phi->getType()->isIntegerTy()) { + DEBUG(dbgs() << "LV: Found an non-int PHI.\n"); + return false; + } + + // If we found an induction variable. + if (NumPhis > 1) { + DEBUG(dbgs() << "LV: Found more than one PHI.\n"); + return false; + } + + // This should not happen because the loop should be normalized. + if (Phi->getNumIncomingValues() != 2) { + DEBUG(dbgs() << "LV: Found an invalid PHI.\n"); + return false; + } + + // Check that the PHI is consecutive and starts at zero. + const SCEV *PhiScev = SE->getSCEV(Phi); + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); + if (!AR) { + DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); + return false; + } + + const SCEV *Step = AR->getStepRecurrence(*SE); + const SCEV *Start = AR->getStart(); + + if (!Step->isOne() || !Start->isZero()) { + DEBUG(dbgs() << "LV: PHI does not start at zero or steps by one.\n"); + return false; + } + } + + // If this is a load, record its pointer. If it is not a load, abort. + // Notice that we don't handle function calls that read or write. + if (I->mayReadFromMemory()) { + LoadInst *Ld = dyn_cast<LoadInst>(I); + if (!Ld) return false; + if (!Ld->isSimple()) { + DEBUG(dbgs() << "LV: Found a non-simple load.\n"); + return false; + } + GetUnderlyingObjects(Ld->getPointerOperand(), Reads, DL); + } + + // Record store pointers. Abort on all other instructions that write to + // memory. + if (I->mayWriteToMemory()) { + StoreInst *St = dyn_cast<StoreInst>(I); + if (!St) return false; + if (!St->isSimple()) { + DEBUG(dbgs() << "LV: Found a non-simple store.\n"); + return false; + } + GetUnderlyingObjects(St->getPointerOperand(), Writes, DL); + } + + // We still don't handle functions. + CallInst *CI = dyn_cast<CallInst>(I); + if (CI) { + DEBUG(dbgs() << "LV: Found a call site:"<< + CI->getCalledFunction()->getName() << "\n"); + return false; + } + + // We do not re-vectorize vectors. + if (!VectorType::isValidElementType(I->getType()) && + !I->getType()->isVoidTy()) { + DEBUG(dbgs() << "LV: Found unvectorizable type." << "\n"); + return false; + } + //Check that all of the users of the loop are inside the BB. + for (Value::use_iterator it = I->use_begin(), e = I->use_end(); + it != e; ++it) { + Instruction *U = cast<Instruction>(*it); + BasicBlock *Parent = U->getParent(); + if (Parent != &BB) { + DEBUG(dbgs() << "LV: Found an outside user for : "<< *U << "\n"); + return false; + } + } + } // next instr. + + if (NumPhis != 1) { + DEBUG(dbgs() << "LV: Did not find a Phi node.\n"); + return false; + } + + // Check that the underlying objects of the reads and writes are either + // disjoint memory locations, or that they are no-alias arguments. + ValueVector::iterator r, re, w, we; + for (r = Reads.begin(), re = Reads.end(); r != re; ++r) { + if (!isidentifiedSafeObject(*r)) { + DEBUG(dbgs() << "LV: Found a bad read Ptr: "<< **r << "\n"); + return false; + } + } + + for (w = Writes.begin(), we = Writes.end(); w != we; ++w) { + if (!isidentifiedSafeObject(*w)) { + DEBUG(dbgs() << "LV: Found a bad write Ptr: "<< **w << "\n"); + return false; + } + } + + // Check that there are no multiple write locations to the same pointer. + SmallPtrSet<Value*, 8> WritePointerSet; + for (w = Writes.begin(), we = Writes.end(); w != we; ++w) { + if (!WritePointerSet.insert(*w)) { + DEBUG(dbgs() << "LV: Multiple writes to the same index :"<< **w << "\n"); + return false; + } + } + + // Check that the reads and the writes are disjoint. + for (r = Reads.begin(), re = Reads.end(); r != re; ++r) { + if (WritePointerSet.count(*r)) { + DEBUG(dbgs() << "Vectorizer: Found a read/write ptr:"<< **r << "\n"); + return false; + } + } + + // All is okay. + return true; +} + +/// Checks if the value is a Global variable or if it is an Arguments +/// marked with the NoAlias attribute. +bool LoopVectorizationLegality::isidentifiedSafeObject(Value* Val) { + assert(Val && "Invalid value"); + if (dyn_cast<GlobalValue>(Val)) + return true; + if (dyn_cast<AllocaInst>(Val)) + return true; + Argument *A = dyn_cast<Argument>(Val); + if (!A) + return false; + return A->hasNoAliasAttr(); +} + +} // namespace + +char LoopVectorize::ID = 0; +static const char lv_name[] = "Loop Vectorization"; +INITIALIZE_PASS_BEGIN(LoopVectorize, LV_NAME, lv_name, false, false) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false) + +namespace llvm { + Pass *createLoopVectorizePass() { + return new LoopVectorize(); + } + +} + diff --git a/lib/Transforms/Vectorize/Vectorize.cpp b/lib/Transforms/Vectorize/Vectorize.cpp index 1ef60029bc..d26973a7b3 100644 --- a/lib/Transforms/Vectorize/Vectorize.cpp +++ b/lib/Transforms/Vectorize/Vectorize.cpp @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This file implements common infrastructure for libLLVMVectorizeOpts.a, which +// This file implements common infrastructure for libLLVMVectorizeOpts.a, which // implements several vectorization transformations over the LLVM intermediate // representation, including the C bindings for that library. // @@ -23,10 +23,11 @@ using namespace llvm; -/// initializeVectorizationPasses - Initialize all passes linked into the +/// initializeVectorizationPasses - Initialize all passes linked into the /// Vectorization library. void llvm::initializeVectorization(PassRegistry &Registry) { initializeBBVectorizePass(Registry); + initializeLoopVectorizePass(Registry); } void LLVMInitializeVectorization(LLVMPassRegistryRef R) { @@ -37,3 +38,6 @@ void LLVMAddBBVectorizePass(LLVMPassManagerRef PM) { unwrap(PM)->add(createBBVectorizePass()); } +void LLVMAddLoopVectorizePass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createLoopVectorizePass()); +} diff --git a/lib/VMCore/CMakeLists.txt b/lib/VMCore/CMakeLists.txt index ba807fcacc..6c30967974 100644 --- a/lib/VMCore/CMakeLists.txt +++ b/lib/VMCore/CMakeLists.txt @@ -33,7 +33,6 @@ add_llvm_library(LLVMCore PrintModulePass.cpp Type.cpp TypeFinder.cpp - TargetTransformInfo.cpp Use.cpp User.cpp Value.cpp diff --git a/lib/VMCore/TargetTransformInfo.cpp b/lib/VMCore/TargetTransformInfo.cpp deleted file mode 100644 index 3af0222a21..0000000000 --- a/lib/VMCore/TargetTransformInfo.cpp +++ /dev/null @@ -1,27 +0,0 @@ -//===- llvm/VMCore/TargetTransformInfo.cpp ----------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/TargetTransformInfo.h" -#include "llvm/Support/ErrorHandling.h" - -using namespace llvm; - -/// Default ctor. -/// -/// @note This has to exist, because this is a pass, but it should never be -/// used. -TargetTransformInfo::TargetTransformInfo() : ImmutablePass(ID) { - report_fatal_error("Bad TargetTransformInfo ctor used. " - "Tool did not specify a TargetTransformInfo to use?"); -} - -INITIALIZE_PASS(TargetTransformInfo, "TargetTransformInfo", - "Target Transform Info", false, true) -char TargetTransformInfo::ID = 0; - diff --git a/test/CodeGen/MSP430/fp.ll b/test/CodeGen/MSP430/fp.ll new file mode 100644 index 0000000000..c3273eff05 --- /dev/null +++ b/test/CodeGen/MSP430/fp.ll @@ -0,0 +1,17 @@ +; RUN: llc -O0 -disable-fp-elim < %s | FileCheck %s + +target datalayout = "e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16" +target triple = "msp430---elf" + +define void @fp() nounwind { +entry: +; CHECK: fp: +; CHECK: push.w r4 +; CHECK: mov.w r1, r4 +; CHECK: sub.w #2, r1 + %i = alloca i16, align 2 +; CHECK: mov.w #0, -2(r4) + store i16 0, i16* %i, align 2 +; CHECK: pop.w r4 + ret void +} diff --git a/test/CodeGen/Mips/brconeq.ll b/test/CodeGen/Mips/brconeq.ll new file mode 100644 index 0000000000..613391557e --- /dev/null +++ b/test/CodeGen/Mips/brconeq.ll @@ -0,0 +1,38 @@ +; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16 + +@i = global i32 5, align 4 +@j = global i32 10, align 4 +@result = global i32 0, align 4 + +define void @test() nounwind { +entry: + %0 = load i32* @i, align 4 + %1 = load i32* @j, align 4 + %cmp = icmp eq i32 %0, %1 +; 16: cmp ${{[0-9]+}}, ${{[0-9]+}} +; 16: bteqz $[[LABEL:[0-9A-Ba-b_]+]] +; 16: $[[LABEL]]: + br i1 %cmp, label %if.end, label %if.then + +if.then: ; preds = %entry + store i32 1, i32* @result, align 4 + br label %if.end + +if.end: ; preds = %entry, %if.then + ret void +} + + + + + + + + + + + + + + + diff --git a/test/CodeGen/Mips/brconeqk.ll b/test/CodeGen/Mips/brconeqk.ll new file mode 100644 index 0000000000..2c0e72dabd --- /dev/null +++ b/test/CodeGen/Mips/brconeqk.ll @@ -0,0 +1,22 @@ +; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16 + +@i = global i32 5, align 4 +@result = global i32 0, align 4 + +define void @test() nounwind { +entry: + %0 = load i32* @i, align 4 + %cmp = icmp eq i32 %0, 10 + br i1 %cmp, label %if.end, label %if.then +; 16: cmpi ${{[0-9]+}}, {{[0-9]+}} +; 16: bteqz $[[LABEL:[0-9A-Ba-b_]+]] +; 16: $[[LABEL]]: +if.then: ; preds = %entry + store i32 1, i32* @result, align 4 + br label %if.end + +if.end: ; preds = %entry, %if.then + ret void +} + + diff --git a/test/CodeGen/Mips/brconeqz.ll b/test/CodeGen/Mips/brconeqz.ll new file mode 100644 index 0000000000..5586e7b976 --- /dev/null +++ b/test/CodeGen/Mips/brconeqz.ll @@ -0,0 +1,20 @@ +; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16 + +@i = global i32 5, align 4 +@result = global i32 0, align 4 + +define void @test() nounwind { +entry: + %0 = load i32* @i, align 4 + %cmp = icmp eq i32 %0, 0 + br i1 %cmp, label %if.end, label %if.then +; 16: beqz ${{[0-9]+}}, $[[LABEL:[0-9A-Ba-b_]+]] +; 16: $[[LABEL]]: +if.then: ; preds = %entry + store i32 1, i32* @result, align 4 + br label %if.end + +if.end: ; preds = %entry, %if.then + ret void +} + diff --git a/test/CodeGen/Mips/brconge.ll b/test/CodeGen/Mips/brconge.ll new file mode 100644 index 0000000000..02f0a633b3 --- /dev/null +++ b/test/CodeGen/Mips/brconge.ll @@ -0,0 +1,37 @@ +; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16 + +@i = global i32 5, align 4 +@j = global i32 10, align 4 +@k = global i32 5, align 4 +@result1 = global i32 0, align 4 +@result2 = global i32 1, align 4 + +define void @test() nounwind { +entry: + %0 = load i32* @i, align 4 + %1 = load i32* @j, align 4 + %cmp = icmp slt i32 %0, %1 + br i1 %cmp, label %if.then, label %if.end + +; 16: slt ${{[0-9]+}}, ${{[0-9]+}} +; 16: bteqz $[[LABEL:[0-9A-Ba-b_]+]] +; 16: $[[LABEL]]: + +if.then: ; preds = %entry + store i32 1, i32* @result1, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + %2 = load i32* @k, align 4 + %cmp1 = icmp slt i32 %0, %2 + br i1 %cmp1, label %if.then2, label %if.end3 + +if.then2: ; preds = %if.end + store i32 1, i32* @result1, align 4 + br label %if.end3 + +if.end3: ; preds = %if.then2, %if.end + ret void +} + + diff --git a/test/CodeGen/Mips/brcongt.ll b/test/CodeGen/Mips/brcongt.ll new file mode 100644 index 0000000000..767b51b21b --- /dev/null +++ b/test/CodeGen/Mips/brcongt.ll @@ -0,0 +1,25 @@ +; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16 + +@i = global i32 5, align 4 +@j = global i32 10, align 4 +@k = global i32 5, align 4 +@result = global i32 0, align 4 + +define void @test() nounwind { +entry: + %0 = load i32* @i, align 4 + %1 = load i32* @j, align 4 + %cmp = icmp sgt i32 %0, %1 + br i1 %cmp, label %if.end, label %if.then +; 16: slt ${{[0-9]+}}, ${{[0-9]+}} +; 16: btnez $[[LABEL:[0-9A-Ba-b_]+]] +; 16: $[[LABEL]]: +if.then: ; preds = %entry + store i32 1, i32* @result, align 4 + br label %if.end + +if.end: ; preds = %entry, %if.then + ret void +} + + diff --git a/test/CodeGen/Mips/brconle.ll b/test/CodeGen/Mips/brconle.ll new file mode 100644 index 0000000000..854b2481c6 --- /dev/null +++ b/test/CodeGen/Mips/brconle.ll @@ -0,0 +1,37 @@ +; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16 + +@i = global i32 -5, align 4 +@j = global i32 10, align 4 +@k = global i32 -5, align 4 +@result1 = global i32 0, align 4 +@result2 = global i32 1, align 4 + +define void @test() nounwind { +entry: + %0 = load i32* @j, align 4 + %1 = load i32* @i, align 4 + %cmp = icmp sgt i32 %0, %1 + br i1 %cmp, label %if.then, label %if.end + +; 16: slt ${{[0-9]+}}, ${{[0-9]+}} +; 16: bteqz $[[LABEL:[0-9A-Ba-b_]+]] +; 16: $[[LABEL]]: + +if.then: ; preds = %entry + store i32 1, i32* @result1, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + %2 = load i32* @k, align 4 + %cmp1 = icmp sgt i32 %1, %2 + br i1 %cmp1, label %if.then2, label %if.end3 + +if.then2: ; preds = %if.end + store i32 0, i32* @result1, align 4 + br label %if.end3 + +if.end3: ; preds = %if.then2, %if.end + ret void +} + + diff --git a/test/CodeGen/Mips/brconlt.ll b/test/CodeGen/Mips/brconlt.ll new file mode 100644 index 0000000000..931a3e8c7b --- /dev/null +++ b/test/CodeGen/Mips/brconlt.ll @@ -0,0 +1,27 @@ +; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16 + +@i = global i32 5, align 4 +@j = global i32 10, align 4 +@k = global i32 5, align 4 +@result = global i32 0, align 4 + +define void @test() nounwind { +entry: + %0 = load i32* @j, align 4 + %1 = load i32* @i, align 4 + %cmp = icmp slt i32 %0, %1 + br i1 %cmp, label %if.end, label %if.then + +; 16: slt ${{[0-9]+}}, ${{[0-9]+}} +; 16: btnez $[[LABEL:[0-9A-Ba-b_]+]] +; 16: $[[LABEL]]: + +if.then: ; preds = %entry + store i32 1, i32* @result, align 4 + br label %if.end + +if.end: ; preds = %entry, %if.then + ret void +} + + diff --git a/test/CodeGen/Mips/brconne.ll b/test/CodeGen/Mips/brconne.ll new file mode 100644 index 0000000000..5d5bde3fcf --- /dev/null +++ b/test/CodeGen/Mips/brconne.ll @@ -0,0 +1,26 @@ +; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16 + +@i = global i32 5, align 4 +@j = global i32 5, align 4 +@result = global i32 0, align 4 + +define void @test() nounwind { +entry: + %0 = load i32* @j, align 4 + %1 = load i32* @i, align 4 + %cmp = icmp eq i32 %0, %1 + br i1 %cmp, label %if.then, label %if.end +; 16: cmp ${{[0-9]+}}, ${{[0-9]+}} +; 16: btnez $[[LABEL:[0-9A-Ba-b_]+]] +; 16: lw ${{[0-9]+}}, %got(result)(${{[0-9]+}}) +; 16: $[[LABEL]]: + +if.then: ; preds = %entry + store i32 1, i32* @result, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} + + diff --git a/test/CodeGen/Mips/brconnek.ll b/test/CodeGen/Mips/brconnek.ll new file mode 100644 index 0000000000..6208d7c5a0 --- /dev/null +++ b/test/CodeGen/Mips/brconnek.ll @@ -0,0 +1,25 @@ +; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16 + +@j = global i32 5, align 4 +@result = global i32 0, align 4 + +define void @test() nounwind { +entry: + %0 = load i32* @j, align 4 + %cmp = icmp eq i32 %0, 5 + br i1 %cmp, label %if.then, label %if.end + +; 16: cmpi ${{[0-9]+}}, {{[0-9]+}} +; 16: btnez $[[LABEL:[0-9A-Ba-b_]+]] +; 16: lw ${{[0-9]+}}, %got(result)(${{[0-9]+}}) +; 16: $[[LABEL]]: + +if.then: ; preds = %entry + store i32 1, i32* @result, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} + + diff --git a/test/CodeGen/Mips/brconnez.ll b/test/CodeGen/Mips/brconnez.ll new file mode 100644 index 0000000000..47db7901b5 --- /dev/null +++ b/test/CodeGen/Mips/brconnez.ll @@ -0,0 +1,24 @@ +; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16 + +@j = global i32 0, align 4 +@result = global i32 0, align 4 + +define void @test() nounwind { +entry: + %0 = load i32* @j, align 4 + %cmp = icmp eq i32 %0, 0 + br i1 %cmp, label %if.then, label %if.end + +; 16: bnez ${{[0-9]+}}, $[[LABEL:[0-9A-Ba-b_]+]] +; 16: lw ${{[0-9]+}}, %got(result)(${{[0-9]+}}) +; 16: $[[LABEL]]: + +if.then: ; preds = %entry + store i32 1, i32* @result, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} + + diff --git a/test/CodeGen/PowerPC/i64_fp_round.ll b/test/CodeGen/PowerPC/i64_fp_round.ll new file mode 100644 index 0000000000..5a0c072c9c --- /dev/null +++ b/test/CodeGen/PowerPC/i64_fp_round.ll @@ -0,0 +1,27 @@ +; RUN: llc -mcpu=pwr7 < %s | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define float @test(i64 %x) nounwind readnone { +entry: + %conv = sitofp i64 %x to float + ret float %conv +} + +; Verify that we get the code sequence needed to avoid double-rounding. +; Note that only parts of the sequence are checked for here, to allow +; for minor code generation differences. + +; CHECK: sradi [[REGISTER:[0-9]+]], 3, 53 +; CHECK: addi [[REGISTER:[0-9]+]], [[REGISTER]], 1 +; CHECK: cmpldi 0, [[REGISTER]], 1 +; CHECK: isel [[REGISTER:[0-9]+]], {{[0-9]+}}, 3, 1 +; CHECK: std [[REGISTER]], -{{[0-9]+}}(1) + + +; Also check that with -enable-unsafe-fp-math we do not get that extra +; code sequence. Simply verify that there is no "isel" present. + +; RUN: llc -mcpu=pwr7 -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=UNSAFE +; CHECK-UNSAFE-NOT: isel + diff --git a/test/CodeGen/X86/2012-10-18-crash-dagco.ll b/test/CodeGen/X86/2012-10-18-crash-dagco.ll new file mode 100644 index 0000000000..5b98624a37 --- /dev/null +++ b/test/CodeGen/X86/2012-10-18-crash-dagco.ll @@ -0,0 +1,61 @@ +; RUN: llc -march=x86-64 -mcpu=corei7 -disable-cgp-select2branch < %s + +; We should not crash on this test. + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" +target triple = "i386-apple-darwin9.0.0" + +@global = external constant [411 x i8], align 1 + +define void @snork() nounwind { +bb: + br i1 undef, label %bb26, label %bb27 + +bb26: ; preds = %bb48, %bb26, %bb + switch i32 undef, label %bb26 [ + i32 142771596, label %bb28 + ] + +bb27: ; preds = %bb48, %bb + switch i32 undef, label %bb49 [ + i32 142771596, label %bb28 + ] + +bb28: ; preds = %bb27, %bb26 + %tmp = load i32* null + %tmp29 = trunc i32 %tmp to i8 + store i8* undef, i8** undef + %tmp30 = load i32* null + %tmp31 = icmp eq i32 %tmp30, 0 + %tmp32 = getelementptr inbounds [411 x i8]* @global, i32 0, i32 undef + %tmp33 = load i8* %tmp32, align 1 + %tmp34 = getelementptr inbounds [411 x i8]* @global, i32 0, i32 0 + %tmp35 = load i8* %tmp34, align 1 + %tmp36 = select i1 %tmp31, i8 %tmp35, i8 %tmp33 + %tmp37 = select i1 undef, i8 %tmp29, i8 %tmp36 + %tmp38 = zext i8 %tmp37 to i32 + %tmp39 = select i1 undef, i32 0, i32 %tmp38 + %tmp40 = getelementptr inbounds i32* null, i32 %tmp39 + %tmp41 = load i32* %tmp40, align 4 + %tmp42 = load i32* undef, align 4 + %tmp43 = load i32* undef + %tmp44 = xor i32 %tmp42, %tmp43 + %tmp45 = lshr i32 %tmp44, 8 + %tmp46 = lshr i32 %tmp44, 7 + call void @spam() + unreachable + +bb47: ; No predecessors! + ret void + +bb48: ; No predecessors! + br i1 undef, label %bb27, label %bb26 + +bb49: ; preds = %bb49, %bb27 + br label %bb49 + +bb50: ; preds = %bb50 + br label %bb50 +} + +declare void @spam() noreturn nounwind diff --git a/test/CodeGen/X86/extract-concat.ll b/test/CodeGen/X86/extract-concat.ll new file mode 100644 index 0000000000..704309eb65 --- /dev/null +++ b/test/CodeGen/X86/extract-concat.ll @@ -0,0 +1,17 @@ +; RUN: llc < %s -mcpu=corei7 -mtriple=x86_64-unknown-linux-gnu | FileCheck %s + +define void @foo(<4 x float> %in, <4 x i8>* %out) { + %t0 = fptosi <4 x float> %in to <4 x i32> + %t1 = trunc <4 x i32> %t0 to <4 x i16> + %t2 = shufflevector <4 x i16> %t1, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + %t3 = trunc <8 x i16> %t2 to <8 x i8> + %t4 = shufflevector <8 x i8> %t3, <8 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %t5 = insertelement <4 x i8> %t4, i8 -1, i32 3 + store <4 x i8> %t5, <4 x i8>* %out + ret void +; CHECK: foo +; CHECK: cvttps2dq +; CHECK-NOT: pextrd +; CHECK: pshufb +; CHECK: ret +} diff --git a/test/CodeGen/X86/pr14090.ll b/test/CodeGen/X86/pr14090.ll new file mode 100644 index 0000000000..d76b912fd8 --- /dev/null +++ b/test/CodeGen/X86/pr14090.ll @@ -0,0 +1,76 @@ +; RUN: llc < %s -march=x86-64 -print-before=stack-coloring -print-after=stack-coloring >%t 2>&1 && FileCheck <%t %s + +define void @foo(i64* %retval.i, i32 %call, i32* %.ph.i80, i32 %fourteen, i32* %out.lo, i32* %out.hi) nounwind align 2 { +entry: + %_Tmp.i39 = alloca i64, align 8 + %retval.i33 = alloca i64, align 8 + %_Tmp.i = alloca i64, align 8 + %retval.i.i = alloca i64, align 8 + %_First.i = alloca i64, align 8 + + %0 = load i64* %retval.i, align 8 + + %1 = load i64* %retval.i, align 8 + + %_Tmp.i39.0.cast73 = bitcast i64* %_Tmp.i39 to i8* + call void @llvm.lifetime.start(i64 8, i8* %_Tmp.i39.0.cast73) + store i64 %1, i64* %_Tmp.i39, align 8 + %cmp.i.i.i40 = icmp slt i32 %call, 0 + %2 = lshr i64 %1, 32 + %3 = trunc i64 %2 to i32 + %sub.i.i.i44 = sub i32 0, %call + %cmp2.i.i.i45 = icmp ult i32 %3, %sub.i.i.i44 + %or.cond.i.i.i46 = and i1 %cmp.i.i.i40, %cmp2.i.i.i45 + %add.i.i.i47 = add i32 %3, %call + %sub5.i.i.i48 = lshr i32 %add.i.i.i47, 5 + %trunc.i50 = trunc i64 %1 to i32 + %inttoptr.i51 = inttoptr i32 %trunc.i50 to i32* + %add61617.i.i.i52 = or i32 %sub5.i.i.i48, -134217728 + %add61617.i.sub5.i.i.i53 = select i1 %or.cond.i.i.i46, i32 %add61617.i.i.i52, i32 %sub5.i.i.i48 + %storemerge2.i.i54 = getelementptr inbounds i32* %inttoptr.i51, i32 %add61617.i.sub5.i.i.i53 + %_Tmp.i39.0.cast74 = bitcast i64* %_Tmp.i39 to i32** + store i32* %storemerge2.i.i54, i32** %_Tmp.i39.0.cast74, align 8 + %storemerge.i.i55 = and i32 %add.i.i.i47, 31 + %_Tmp.i39.4.raw_idx = getelementptr inbounds i8* %_Tmp.i39.0.cast73, i32 4 + %_Tmp.i39.4.cast = bitcast i8* %_Tmp.i39.4.raw_idx to i32* + store i32 %storemerge.i.i55, i32* %_Tmp.i39.4.cast, align 4 + %srcval.i56 = load i64* %_Tmp.i39, align 8 + call void @llvm.lifetime.end(i64 8, i8* %_Tmp.i39.0.cast73) + +; CHECK: Before Merge disjoint stack slots +; CHECK: [[PREFIX15:MOV64mr.*<fi#]]{{[0-9]}}[[SUFFIX15:.*;]] mem:ST8[%fifteen] +; CHECK: [[PREFIX87:MOV32mr.*;]] mem:ST4[%sunkaddr87] + +; CHECK: After Merge disjoint stack slots +; CHECK: [[PREFIX15]]{{[0-9]}}[[SUFFIX15]] mem:ST8[%_Tmp.i39] +; CHECK: [[PREFIX87]] mem:ST4[<unknown>] + + %fifteen = bitcast i64* %retval.i.i to i32** + %sixteen = bitcast i64* %retval.i.i to i8* + call void @llvm.lifetime.start(i64 8, i8* %sixteen) + store i32* %.ph.i80, i32** %fifteen, align 8, !tbaa !0 + %sunkaddr = ptrtoint i64* %retval.i.i to i32 + %sunkaddr86 = add i32 %sunkaddr, 4 + %sunkaddr87 = inttoptr i32 %sunkaddr86 to i32* + store i32 %fourteen, i32* %sunkaddr87, align 4, !tbaa !3 + %seventeen = load i64* %retval.i.i, align 8 + call void @llvm.lifetime.end(i64 8, i8* %sixteen) + %eighteen = lshr i64 %seventeen, 32 + %nineteen = trunc i64 %eighteen to i32 + %shl.i.i.i = shl i32 1, %nineteen + + store i32 %shl.i.i.i, i32* %out.lo, align 8 + store i32 %nineteen, i32* %out.hi, align 8 + + ret void +} + +declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind + +declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind + +!0 = metadata !{metadata !"int", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA"} +!3 = metadata !{metadata !"any pointer", metadata !1} +!4 = metadata !{metadata !"vtable pointer", metadata !2} diff --git a/test/CodeGen/X86/sjlj.ll b/test/CodeGen/X86/sjlj.ll index d594e98299..681db00943 100644 --- a/test/CodeGen/X86/sjlj.ll +++ b/test/CodeGen/X86/sjlj.ll @@ -1,5 +1,7 @@ ; RUN: llc < %s -mtriple=i386-pc-linux -mcpu=corei7 -relocation-model=static | FileCheck --check-prefix=X86 %s -; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=corei7 | FileCheck --check-prefix=X64 %s +; RUN: llc < %s -mtriple=i386-pc-linux -mcpu=corei7 -relocation-model=pic | FileCheck --check-prefix=PIC86 %s +; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=corei7 -relocation-model=static | FileCheck --check-prefix=X64 %s +; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=corei7 -relocation-model=pic | FileCheck --check-prefix=PIC64 %s @buf = internal global [5 x i8*] zeroinitializer @@ -20,14 +22,26 @@ define i32 @sj0() nounwind { ret i32 %r ; X86: sj0 ; x86: movl %ebp, buf -; x86: movl ${{.*LBB.*}}, buf+4 ; X86: movl %esp, buf+8 +; x86: movl ${{.*LBB.*}}, buf+4 ; X86: ret +; PIC86: sj0 +; PIC86: movl %ebp, buf@GOTOFF(%[[GOT:.*]]) +; PIC86: movl %esp, buf@GOTOFF+8(%[[GOT]]) +; PIC86: leal {{.*LBB.*}}@GOTOFF(%[[GOT]]), %[[LREG:.*]] +; PIC86: movl %[[LREG]], buf@GOTOFF+4 +; PIC86: ret ; X64: sj0 ; x64: movq %rbp, buf(%rip) ; x64: movq ${{.*LBB.*}}, buf+8(%rip) ; X64: movq %rsp, buf+16(%rip) ; X64: ret +; PIC64: sj0 +; PIC64: movq %rbp, buf(%rip) +; PIC64: movq %rsp, buf+16(%rip) +; PIC64: leaq {{.*LBB.*}}(%rip), %[[LREG:.*]] +; PIC64: movq %[[LREG]], buf+8(%rip) +; PIC64: ret } define void @lj0() nounwind { diff --git a/test/CodeGen/X86/trunc-fp2int.ll b/test/CodeGen/X86/trunc-fp2int.ll deleted file mode 100644 index 792af16c95..0000000000 --- a/test/CodeGen/X86/trunc-fp2int.ll +++ /dev/null @@ -1,18 +0,0 @@ -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7 | FileCheck %s - -define <4 x i8> @bar(<4 x float> %in) nounwind readnone alwaysinline { - %1 = fptoui <4 x float> %in to <4 x i8> - ret <4 x i8> %1 -; CHECK: bar -; CHECK: cvttps2dq -} -define <4 x i8> @foo(<4 x float> %in) nounwind readnone alwaysinline { - %1 = fptoui <4 x float> %in to <4 x i32> - %2 = trunc <4 x i32> %1 to <4 x i16> - %3 = shufflevector <4 x i16> %2, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> - %4 = trunc <8 x i16> %3 to <8 x i8> - %5 = shufflevector <8 x i8> %4, <8 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> - ret <4 x i8> %5 -; CHECK: foo -; CHECK: cvttps2dq -} diff --git a/test/Transforms/InstCombine/strcpy-1.ll b/test/Transforms/InstCombine/strcpy-1.ll new file mode 100644 index 0000000000..b6cf048b2a --- /dev/null +++ b/test/Transforms/InstCombine/strcpy-1.ll @@ -0,0 +1,45 @@ +; Test that the strcpy library call simplifier works correctly. +; rdar://6839935 +; RUN: opt < %s -instcombine -S | FileCheck %s +; +; This transformation requires the pointer size, as it assumes that size_t is +; the size of a pointer. +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" + +@hello = constant [6 x i8] c"hello\00" +@a = common global [32 x i8] zeroinitializer, align 1 +@b = common global [32 x i8] zeroinitializer, align 1 + +declare i8* @strcpy(i8*, i8*) + +define void @test_simplify1() { +; CHECK: @test_simplify1 + + %dst = getelementptr [32 x i8]* @a, i32 0, i32 0 + %src = getelementptr [6 x i8]* @hello, i32 0, i32 0 + + call i8* @strcpy(i8* %dst, i8* %src) +; CHECK: @llvm.memcpy.p0i8.p0i8.i32 + ret void +} + +define i8* @test_simplify2() { +; CHECK: @test_simplify2 + + %dst = getelementptr [32 x i8]* @a, i32 0, i32 0 + + %ret = call i8* @strcpy(i8* %dst, i8* %dst) +; CHECK: ret i8* getelementptr inbounds ([32 x i8]* @a, i32 0, i32 0) + ret i8* %ret +} + +define i8* @test_no_simplify1() { +; CHECK: @test_no_simplify1 + + %dst = getelementptr [32 x i8]* @a, i32 0, i32 0 + %src = getelementptr [32 x i8]* @b, i32 0, i32 0 + + %ret = call i8* @strcpy(i8* %dst, i8* %src) +; CHECK: call i8* @strcpy + ret i8* %ret +} diff --git a/test/Transforms/InstCombine/strcpy-2.ll b/test/Transforms/InstCombine/strcpy-2.ll new file mode 100644 index 0000000000..779e9fdd95 --- /dev/null +++ b/test/Transforms/InstCombine/strcpy-2.ll @@ -0,0 +1,22 @@ +; Test that the strcpy library call simplifier works correctly. +; RUN: opt < %s -instcombine -S | FileCheck %s +; +; This transformation requires the pointer size, as it assumes that size_t is +; the size of a pointer. +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" + +@hello = constant [6 x i8] c"hello\00" +@a = common global [32 x i8] zeroinitializer, align 1 + +declare i16* @strcpy(i8*, i8*) + +define void @test_no_simplify1() { +; CHECK: @test_no_simplify1 + + %dst = getelementptr [32 x i8]* @a, i32 0, i32 0 + %src = getelementptr [6 x i8]* @hello, i32 0, i32 0 + + call i16* @strcpy(i8* %dst, i8* %src) +; CHECK: call i16* @strcpy + ret void +} diff --git a/test/Transforms/InstCombine/strcpy_chk-1.ll b/test/Transforms/InstCombine/strcpy_chk-1.ll index c03e8a348b..3e48f4fd30 100644 --- a/test/Transforms/InstCombine/strcpy_chk-1.ll +++ b/test/Transforms/InstCombine/strcpy_chk-1.ll @@ -7,16 +7,16 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3 @a = common global [60 x i8] zeroinitializer, align 1 @b = common global [60 x i8] zeroinitializer, align 1 -@.str = private constant [8 x i8] c"abcdefg\00" +@.str = private constant [12 x i8] c"abcdefghijk\00" ; Check cases where slen >= strlen (src). define void @test_simplify1() { ; CHECK: @test_simplify1 %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 - %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0 + %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0 -; CHECK-NEXT: call i8* @strcpy +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32 call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 60) ret void } @@ -24,19 +24,19 @@ define void @test_simplify1() { define void @test_simplify2() { ; CHECK: @test_simplify2 %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 - %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0 + %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0 -; CHECK-NEXT: call i8* @strcpy - call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 8) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32 + call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 12) ret void } define void @test_simplify3() { ; CHECK: @test_simplify3 %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 - %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0 + %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0 -; CHECK-NEXT: call i8* @strcpy +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32 call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 -1) ret void } @@ -53,36 +53,42 @@ define void @test_simplify4() { ret void } -define void @test_no_simplify1() { -; CHECK: @test_no_simplify1 +; Check case where the string length is not constant. + +define void @test_simplify5() { +; CHECK: @test_simplify5 %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 - %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0 + %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0 -; CHECK-NEXT: call i8* @__strcpy_chk - call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 8) +; CHECK: @__memcpy_chk + %len = call i32 @llvm.objectsize.i32(i8* %dst, i1 false) + call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 %len) ret void } -; Check case were slen < strlen (src). +; Check case where the source and destination are the same. -define void @test_no_simplify2() { -; CHECK: @test_no_simplify2 +define i8* @test_simplify6() { +; CHECK: @test_simplify6 %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 - %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0 -; CHECK-NEXT: call i8* @__strcpy_chk - call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 3) - ret void +; CHECK: getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0) + %len = call i32 @llvm.objectsize.i32(i8* %dst, i1 false) + %ret = call i8* @__strcpy_chk(i8* %dst, i8* %dst, i32 %len) + ret i8* %ret } -define void @test_no_simplify3() { -; CHECK: @test_no_simplify3 +; Check case where slen < strlen (src). + +define void @test_no_simplify1() { +; CHECK: @test_no_simplify1 %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 - %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0 + %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0 ; CHECK-NEXT: call i8* @__strcpy_chk - call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 0) + call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 8) ret void } declare i8* @__strcpy_chk(i8*, i8*, i32) nounwind +declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readonly diff --git a/test/Transforms/LoopVectorize/gcc-examples.ll b/test/Transforms/LoopVectorize/gcc-examples.ll new file mode 100644 index 0000000000..4e9e6f940e --- /dev/null +++ b/test/Transforms/LoopVectorize/gcc-examples.ll @@ -0,0 +1,649 @@ +; RUN: opt < %s -loop-vectorize -dce -instcombine -licm -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +@b = common global [2048 x i32] zeroinitializer, align 16 +@c = common global [2048 x i32] zeroinitializer, align 16 +@a = common global [2048 x i32] zeroinitializer, align 16 +@G = common global [32 x [1024 x i32]] zeroinitializer, align 16 +@ub = common global [1024 x i32] zeroinitializer, align 16 +@uc = common global [1024 x i32] zeroinitializer, align 16 +@d = common global [2048 x i32] zeroinitializer, align 16 +@fa = common global [1024 x float] zeroinitializer, align 16 +@fb = common global [1024 x float] zeroinitializer, align 16 +@ic = common global [1024 x i32] zeroinitializer, align 16 +@da = common global [1024 x float] zeroinitializer, align 16 +@db = common global [1024 x float] zeroinitializer, align 16 +@dc = common global [1024 x float] zeroinitializer, align 16 +@dd = common global [1024 x float] zeroinitializer, align 16 +@dj = common global [1024 x i32] zeroinitializer, align 16 + +;CHECK: @example1 +;CHECK: load <4 x i32> +;CHECK: add <4 x i32> +;CHECK: store <4 x i32> +;CHECK: ret void +define void @example1() nounwind uwtable ssp { + br label %1 + +; <label>:1 ; preds = %1, %0 + %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ] + %2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv + %3 = load i32* %2, align 4 + %4 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv + %5 = load i32* %4, align 4 + %6 = add nsw i32 %5, %3 + %7 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv + store i32 %6, i32* %7, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 256 + br i1 %exitcond, label %8, label %1 + +; <label>:8 ; preds = %1 + ret void +} + +;CHECK: @example2 +;CHECK: store <4 x i32> +;CHECK: ret void +define void @example2(i32 %n, i32 %x) nounwind uwtable ssp { + %1 = icmp sgt i32 %n, 0 + br i1 %1, label %.lr.ph5, label %.preheader + +..preheader_crit_edge: ; preds = %.lr.ph5 + %phitmp = sext i32 %n to i64 + br label %.preheader + +.preheader: ; preds = %..preheader_crit_edge, %0 + %i.0.lcssa = phi i64 [ %phitmp, %..preheader_crit_edge ], [ 0, %0 ] + %2 = icmp eq i32 %n, 0 + br i1 %2, label %._crit_edge, label %.lr.ph + +.lr.ph5: ; preds = %0, %.lr.ph5 + %indvars.iv6 = phi i64 [ %indvars.iv.next7, %.lr.ph5 ], [ 0, %0 ] + %3 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv6 + store i32 %x, i32* %3, align 4 + %indvars.iv.next7 = add i64 %indvars.iv6, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next7 to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %..preheader_crit_edge, label %.lr.ph5 + +.lr.ph: ; preds = %.preheader, %.lr.ph + %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ %i.0.lcssa, %.preheader ] + %.02 = phi i32 [ %4, %.lr.ph ], [ %n, %.preheader ] + %4 = add nsw i32 %.02, -1 + %5 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv + %6 = load i32* %5, align 4 + %7 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv + %8 = load i32* %7, align 4 + %9 = and i32 %8, %6 + %10 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv + store i32 %9, i32* %10, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %11 = icmp eq i32 %4, 0 + br i1 %11, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph, %.preheader + ret void +} + +; We can't vectorize this loop because it has non constant loop bounds. +;CHECK: @example3 +;CHECK-NOT: <4 x i32> +;CHECK: ret void +define void @example3(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture %q) nounwind uwtable ssp { + %1 = icmp eq i32 %n, 0 + br i1 %1, label %._crit_edge, label %.lr.ph + +.lr.ph: ; preds = %0, %.lr.ph + %.05 = phi i32 [ %2, %.lr.ph ], [ %n, %0 ] + %.014 = phi i32* [ %5, %.lr.ph ], [ %p, %0 ] + %.023 = phi i32* [ %3, %.lr.ph ], [ %q, %0 ] + %2 = add nsw i32 %.05, -1 + %3 = getelementptr inbounds i32* %.023, i64 1 + %4 = load i32* %.023, align 16 + %5 = getelementptr inbounds i32* %.014, i64 1 + store i32 %4, i32* %.014, align 16 + %6 = icmp eq i32 %2, 0 + br i1 %6, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph, %0 + ret void +} + +;CHECK: @example4 +;CHECK: load <4 x i32> +;CHECK: ret void +define void @example4(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture %q) nounwind uwtable ssp { + %1 = add nsw i32 %n, -1 + %2 = icmp eq i32 %n, 0 + br i1 %2, label %.preheader4, label %.lr.ph10 + +.preheader4: ; preds = %0 + %3 = icmp sgt i32 %1, 0 + br i1 %3, label %.lr.ph6, label %._crit_edge + +.lr.ph10: ; preds = %0, %.lr.ph10 + %4 = phi i32 [ %9, %.lr.ph10 ], [ %1, %0 ] + %.018 = phi i32* [ %8, %.lr.ph10 ], [ %p, %0 ] + %.027 = phi i32* [ %5, %.lr.ph10 ], [ %q, %0 ] + %5 = getelementptr inbounds i32* %.027, i64 1 + %6 = load i32* %.027, align 16 + %7 = add nsw i32 %6, 5 + %8 = getelementptr inbounds i32* %.018, i64 1 + store i32 %7, i32* %.018, align 16 + %9 = add nsw i32 %4, -1 + %10 = icmp eq i32 %4, 0 + br i1 %10, label %._crit_edge, label %.lr.ph10 + +.preheader: ; preds = %.lr.ph6 + br i1 %3, label %.lr.ph, label %._crit_edge + +.lr.ph6: ; preds = %.preheader4, %.lr.ph6 + %indvars.iv11 = phi i64 [ %indvars.iv.next12, %.lr.ph6 ], [ 0, %.preheader4 ] + %indvars.iv.next12 = add i64 %indvars.iv11, 1 + %11 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv.next12 + %12 = load i32* %11, align 4 + %13 = add nsw i64 %indvars.iv11, 3 + %14 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %13 + %15 = load i32* %14, align 4 + %16 = add nsw i32 %15, %12 + %17 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv11 + store i32 %16, i32* %17, align 4 + %lftr.wideiv13 = trunc i64 %indvars.iv.next12 to i32 + %exitcond14 = icmp eq i32 %lftr.wideiv13, %1 + br i1 %exitcond14, label %.preheader, label %.lr.ph6 + +.lr.ph: ; preds = %.preheader, %.lr.ph + %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.preheader ] + %18 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv + %19 = load i32* %18, align 4 + %20 = icmp sgt i32 %19, 4 + %21 = select i1 %20, i32 4, i32 0 + %22 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv + store i32 %21, i32* %22, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %1 + br i1 %exitcond, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph10, %.preheader4, %.lr.ph, %.preheader + ret void +} + +;CHECK: @example8 +;CHECK: store <4 x i32> +;CHECK: ret void +define void @example8(i32 %x) nounwind uwtable ssp { + br label %.preheader + +.preheader: ; preds = %3, %0 + %indvars.iv3 = phi i64 [ 0, %0 ], [ %indvars.iv.next4, %3 ] + br label %1 + +; <label>:1 ; preds = %1, %.preheader + %indvars.iv = phi i64 [ 0, %.preheader ], [ %indvars.iv.next, %1 ] + %2 = getelementptr inbounds [32 x [1024 x i32]]* @G, i64 0, i64 %indvars.iv3, i64 %indvars.iv + store i32 %x, i32* %2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %3, label %1 + +; <label>:3 ; preds = %1 + %indvars.iv.next4 = add i64 %indvars.iv3, 1 + %lftr.wideiv5 = trunc i64 %indvars.iv.next4 to i32 + %exitcond6 = icmp eq i32 %lftr.wideiv5, 32 + br i1 %exitcond6, label %4, label %.preheader + +; <label>:4 ; preds = %3 + ret void +} + +; We can't vectorize because it has a reduction variable. +;CHECK: @example9 +;CHECK-NOT: <4 x i32> +;CHECK: ret i32 +define i32 @example9() nounwind uwtable readonly ssp { + br label %1 + +; <label>:1 ; preds = %1, %0 + %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ] + %diff.01 = phi i32 [ 0, %0 ], [ %7, %1 ] + %2 = getelementptr inbounds [1024 x i32]* @ub, i64 0, i64 %indvars.iv + %3 = load i32* %2, align 4 + %4 = getelementptr inbounds [1024 x i32]* @uc, i64 0, i64 %indvars.iv + %5 = load i32* %4, align 4 + %6 = add i32 %3, %diff.01 + %7 = sub i32 %6, %5 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %8, label %1 + +; <label>:8 ; preds = %1 + ret i32 %7 +} + +;CHECK: @example10a +;CHECK: load <4 x i16> +;CHECK: add <4 x i16> +;CHECK: store <4 x i16> +;CHECK: ret void +define void @example10a(i16* noalias nocapture %sa, i16* noalias nocapture %sb, i16* noalias nocapture %sc, i32* noalias nocapture %ia, i32* noalias nocapture %ib, i32* noalias nocapture %ic) nounwind uwtable ssp { + br label %1 + +; <label>:1 ; preds = %1, %0 + %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ] + %2 = getelementptr inbounds i32* %ib, i64 %indvars.iv + %3 = load i32* %2, align 4 + %4 = getelementptr inbounds i32* %ic, i64 %indvars.iv + %5 = load i32* %4, align 4 + %6 = add nsw i32 %5, %3 + %7 = getelementptr inbounds i32* %ia, i64 %indvars.iv + store i32 %6, i32* %7, align 4 + %8 = getelementptr inbounds i16* %sb, i64 %indvars.iv + %9 = load i16* %8, align 2 + %10 = getelementptr inbounds i16* %sc, i64 %indvars.iv + %11 = load i16* %10, align 2 + %12 = add i16 %11, %9 + %13 = getelementptr inbounds i16* %sa, i64 %indvars.iv + store i16 %12, i16* %13, align 2 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %14, label %1 + +; <label>:14 ; preds = %1 + ret void +} + +;CHECK: @example10b +;CHECK: load <4 x i16> +;CHECK: sext <4 x i16> +;CHECK: store <4 x i32> +;CHECK: ret void +define void @example10b(i16* noalias nocapture %sa, i16* noalias nocapture %sb, i16* noalias nocapture %sc, i32* noalias nocapture %ia, i32* noalias nocapture %ib, i32* noalias nocapture %ic) nounwind uwtable ssp { + br label %1 + +; <label>:1 ; preds = %1, %0 + %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ] + %2 = getelementptr inbounds i16* %sb, i64 %indvars.iv + %3 = load i16* %2, align 2 + %4 = sext i16 %3 to i32 + %5 = getelementptr inbounds i32* %ia, i64 %indvars.iv + store i32 %4, i32* %5, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %6, label %1 + +; <label>:6 ; preds = %1 + ret void +} + +;CHECK: @example11 +;CHECK: load i32 +;CHECK: load i32 +;CHECK: load i32 +;CHECK: load i32 +;CHECK: insertelement +;CHECK: insertelement +;CHECK: insertelement +;CHECK: insertelement +;CHECK: ret void +define void @example11() nounwind uwtable ssp { + br label %1 + +; <label>:1 ; preds = %1, %0 + %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ] + %2 = shl nsw i64 %indvars.iv, 1 + %3 = or i64 %2, 1 + %4 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %3 + %5 = load i32* %4, align 4 + %6 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %3 + %7 = load i32* %6, align 4 + %8 = mul nsw i32 %7, %5 + %9 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %2 + %10 = load i32* %9, align 8 + %11 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %2 + %12 = load i32* %11, align 8 + %13 = mul nsw i32 %12, %10 + %14 = sub nsw i32 %8, %13 + %15 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv + store i32 %14, i32* %15, align 4 + %16 = mul nsw i32 %7, %10 + %17 = mul nsw i32 %12, %5 + %18 = add nsw i32 %17, %16 + %19 = getelementptr inbounds [2048 x i32]* @d, i64 0, i64 %indvars.iv + store i32 %18, i32* %19, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 512 + br i1 %exitcond, label %20, label %1 + +; <label>:20 ; preds = %1 + ret void +} + +;CHECK: @example12 +;CHECK: trunc <4 x i64> +;CHECK: store <4 x i32> +;CHECK: ret void +define void @example12() nounwind uwtable ssp { + br label %1 + +; <label>:1 ; preds = %1, %0 + %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ] + %2 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv + %3 = trunc i64 %indvars.iv to i32 + store i32 %3, i32* %2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %4, label %1 + +; <label>:4 ; preds = %1 + ret void +} + +; Can't vectorize because of reductions. +;CHECK: @example13 +;CHECK-NOT: <4 x i32> +;CHECK: ret void +define void @example13(i32** nocapture %A, i32** nocapture %B, i32* nocapture %out) nounwind uwtable ssp { + br label %.preheader + +.preheader: ; preds = %14, %0 + %indvars.iv4 = phi i64 [ 0, %0 ], [ %indvars.iv.next5, %14 ] + %1 = getelementptr inbounds i32** %A, i64 %indvars.iv4 + %2 = load i32** %1, align 8 + %3 = getelementptr inbounds i32** %B, i64 %indvars.iv4 + %4 = load i32** %3, align 8 + br label %5 + +; <label>:5 ; preds = %.preheader, %5 + %indvars.iv = phi i64 [ 0, %.preheader ], [ %indvars.iv.next, %5 ] + %diff.02 = phi i32 [ 0, %.preheader ], [ %11, %5 ] + %6 = getelementptr inbounds i32* %2, i64 %indvars.iv + %7 = load i32* %6, align 4 + %8 = getelementptr inbounds i32* %4, i64 %indvars.iv + %9 = load i32* %8, align 4 + %10 = add i32 %7, %diff.02 + %11 = sub i32 %10, %9 + %indvars.iv.next = add i64 %indvars.iv, 8 + %12 = trunc i64 %indvars.iv.next to i32 + %13 = icmp slt i32 %12, 1024 + br i1 %13, label %5, label %14 + +; <label>:14 ; preds = %5 + %15 = getelementptr inbounds i32* %out, i64 %indvars.iv4 + store i32 %11, i32* %15, align 4 + %indvars.iv.next5 = add i64 %indvars.iv4, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next5 to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 32 + br i1 %exitcond, label %16, label %.preheader + +; <label>:16 ; preds = %14 + ret void +} + +; Can't vectorize because of reductions. +;CHECK: @example14 +;CHECK-NOT: <4 x i32> +;CHECK: ret void +define void @example14(i32** nocapture %in, i32** nocapture %coeff, i32* nocapture %out) nounwind uwtable ssp { +.preheader3: + br label %.preheader + +.preheader: ; preds = %11, %.preheader3 + %indvars.iv7 = phi i64 [ 0, %.preheader3 ], [ %indvars.iv.next8, %11 ] + %sum.05 = phi i32 [ 0, %.preheader3 ], [ %10, %11 ] + br label %0 + +; <label>:0 ; preds = %0, %.preheader + %indvars.iv = phi i64 [ 0, %.preheader ], [ %indvars.iv.next, %0 ] + %sum.12 = phi i32 [ %sum.05, %.preheader ], [ %10, %0 ] + %1 = getelementptr inbounds i32** %in, i64 %indvars.iv + %2 = load i32** %1, align 8 + %3 = getelementptr inbounds i32* %2, i64 %indvars.iv7 + %4 = load i32* %3, align 4 + %5 = getelementptr inbounds i32** %coeff, i64 %indvars.iv + %6 = load i32** %5, align 8 + %7 = getelementptr inbounds i32* %6, i64 %indvars.iv7 + %8 = load i32* %7, align 4 + %9 = mul nsw i32 %8, %4 + %10 = add nsw i32 %9, %sum.12 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %11, label %0 + +; <label>:11 ; preds = %0 + %indvars.iv.next8 = add i64 %indvars.iv7, 1 + %lftr.wideiv9 = trunc i64 %indvars.iv.next8 to i32 + %exitcond10 = icmp eq i32 %lftr.wideiv9, 32 + br i1 %exitcond10, label %.preheader3.1, label %.preheader + +.preheader3.1: ; preds = %11 + store i32 %10, i32* %out, align 4 + br label %.preheader.1 + +.preheader.1: ; preds = %24, %.preheader3.1 + %indvars.iv7.1 = phi i64 [ 0, %.preheader3.1 ], [ %indvars.iv.next8.1, %24 ] + %sum.05.1 = phi i32 [ 0, %.preheader3.1 ], [ %23, %24 ] + br label %12 + +; <label>:12 ; preds = %12, %.preheader.1 + %indvars.iv.1 = phi i64 [ 0, %.preheader.1 ], [ %13, %12 ] + %sum.12.1 = phi i32 [ %sum.05.1, %.preheader.1 ], [ %23, %12 ] + %13 = add nsw i64 %indvars.iv.1, 1 + %14 = getelementptr inbounds i32** %in, i64 %13 + %15 = load i32** %14, align 8 + %16 = getelementptr inbounds i32* %15, i64 %indvars.iv7.1 + %17 = load i32* %16, align 4 + %18 = getelementptr inbounds i32** %coeff, i64 %indvars.iv.1 + %19 = load i32** %18, align 8 + %20 = getelementptr inbounds i32* %19, i64 %indvars.iv7.1 + %21 = load i32* %20, align 4 + %22 = mul nsw i32 %21, %17 + %23 = add nsw i32 %22, %sum.12.1 + %lftr.wideiv.1 = trunc i64 %13 to i32 + %exitcond.1 = icmp eq i32 %lftr.wideiv.1, 1024 + br i1 %exitcond.1, label %24, label %12 + +; <label>:24 ; preds = %12 + %indvars.iv.next8.1 = add i64 %indvars.iv7.1, 1 + %lftr.wideiv9.1 = trunc i64 %indvars.iv.next8.1 to i32 + %exitcond10.1 = icmp eq i32 %lftr.wideiv9.1, 32 + br i1 %exitcond10.1, label %.preheader3.2, label %.preheader.1 + +.preheader3.2: ; preds = %24 + %25 = getelementptr inbounds i32* %out, i64 1 + store i32 %23, i32* %25, align 4 + br label %.preheader.2 + +.preheader.2: ; preds = %38, %.preheader3.2 + %indvars.iv7.2 = phi i64 [ 0, %.preheader3.2 ], [ %indvars.iv.next8.2, %38 ] + %sum.05.2 = phi i32 [ 0, %.preheader3.2 ], [ %37, %38 ] + br label %26 + +; <label>:26 ; preds = %26, %.preheader.2 + %indvars.iv.2 = phi i64 [ 0, %.preheader.2 ], [ %indvars.iv.next.2, %26 ] + %sum.12.2 = phi i32 [ %sum.05.2, %.preheader.2 ], [ %37, %26 ] + %27 = add nsw i64 %indvars.iv.2, 2 + %28 = getelementptr inbounds i32** %in, i64 %27 + %29 = load i32** %28, align 8 + %30 = getelementptr inbounds i32* %29, i64 %indvars.iv7.2 + %31 = load i32* %30, align 4 + %32 = getelementptr inbounds i32** %coeff, i64 %indvars.iv.2 + %33 = load i32** %32, align 8 + %34 = getelementptr inbounds i32* %33, i64 %indvars.iv7.2 + %35 = load i32* %34, align 4 + %36 = mul nsw i32 %35, %31 + %37 = add nsw i32 %36, %sum.12.2 + %indvars.iv.next.2 = add i64 %indvars.iv.2, 1 + %lftr.wideiv.2 = trunc i64 %indvars.iv.next.2 to i32 + %exitcond.2 = icmp eq i32 %lftr.wideiv.2, 1024 + br i1 %exitcond.2, label %38, label %26 + +; <label>:38 ; preds = %26 + %indvars.iv.next8.2 = add i64 %indvars.iv7.2, 1 + %lftr.wideiv9.2 = trunc i64 %indvars.iv.next8.2 to i32 + %exitcond10.2 = icmp eq i32 %lftr.wideiv9.2, 32 + br i1 %exitcond10.2, label %.preheader3.3, label %.preheader.2 + +.preheader3.3: ; preds = %38 + %39 = getelementptr inbounds i32* %out, i64 2 + store i32 %37, i32* %39, align 4 + br label %.preheader.3 + +.preheader.3: ; preds = %52, %.preheader3.3 + %indvars.iv7.3 = phi i64 [ 0, %.preheader3.3 ], [ %indvars.iv.next8.3, %52 ] + %sum.05.3 = phi i32 [ 0, %.preheader3.3 ], [ %51, %52 ] + br label %40 + +; <label>:40 ; preds = %40, %.preheader.3 + %indvars.iv.3 = phi i64 [ 0, %.preheader.3 ], [ %indvars.iv.next.3, %40 ] + %sum.12.3 = phi i32 [ %sum.05.3, %.preheader.3 ], [ %51, %40 ] + %41 = add nsw i64 %indvars.iv.3, 3 + %42 = getelementptr inbounds i32** %in, i64 %41 + %43 = load i32** %42, align 8 + %44 = getelementptr inbounds i32* %43, i64 %indvars.iv7.3 + %45 = load i32* %44, align 4 + %46 = getelementptr inbounds i32** %coeff, i64 %indvars.iv.3 + %47 = load i32** %46, align 8 + %48 = getelementptr inbounds i32* %47, i64 %indvars.iv7.3 + %49 = load i32* %48, align 4 + %50 = mul nsw i32 %49, %45 + %51 = add nsw i32 %50, %sum.12.3 + %indvars.iv.next.3 = add i64 %indvars.iv.3, 1 + %lftr.wideiv.3 = trunc i64 %indvars.iv.next.3 to i32 + %exitcond.3 = icmp eq i32 %lftr.wideiv.3, 1024 + br i1 %exitcond.3, label %52, label %40 + +; <label>:52 ; preds = %40 + %indvars.iv.next8.3 = add i64 %indvars.iv7.3, 1 + %lftr.wideiv9.3 = trunc i64 %indvars.iv.next8.3 to i32 + %exitcond10.3 = icmp eq i32 %lftr.wideiv9.3, 32 + br i1 %exitcond10.3, label %53, label %.preheader.3 + +; <label>:53 ; preds = %52 + %54 = getelementptr inbounds i32* %out, i64 3 + store i32 %51, i32* %54, align 4 + ret void +} + +; Can't vectorize because the src and dst pointers are not disjoint. +;CHECK: @example21 +;CHECK-NOT: <4 x i32> +;CHECK: ret i32 +define i32 @example21(i32* nocapture %b, i32 %n) nounwind uwtable readonly ssp { + %1 = icmp sgt i32 %n, 0 + br i1 %1, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %0 + %2 = sext i32 %n to i64 + br label %3 + +; <label>:3 ; preds = %.lr.ph, %3 + %indvars.iv = phi i64 [ %2, %.lr.ph ], [ %indvars.iv.next, %3 ] + %a.02 = phi i32 [ 0, %.lr.ph ], [ %6, %3 ] + %indvars.iv.next = add i64 %indvars.iv, -1 + %4 = getelementptr inbounds i32* %b, i64 %indvars.iv.next + %5 = load i32* %4, align 4 + %6 = add nsw i32 %5, %a.02 + %7 = trunc i64 %indvars.iv.next to i32 + %8 = icmp sgt i32 %7, 0 + br i1 %8, label %3, label %._crit_edge + +._crit_edge: ; preds = %3, %0 + %a.0.lcssa = phi i32 [ 0, %0 ], [ %6, %3 ] + ret i32 %a.0.lcssa +} + +; Can't vectorize because there are multiple PHIs. +;CHECK: @example23 +;CHECK-NOT: <4 x i32> +;CHECK: ret void +define void @example23(i16* nocapture %src, i32* nocapture %dst) nounwind uwtable ssp { + br label %1 + +; <label>:1 ; preds = %1, %0 + %.04 = phi i16* [ %src, %0 ], [ %2, %1 ] + %.013 = phi i32* [ %dst, %0 ], [ %6, %1 ] + %i.02 = phi i32 [ 0, %0 ], [ %7, %1 ] + %2 = getelementptr inbounds i16* %.04, i64 1 + %3 = load i16* %.04, align 2 + %4 = zext i16 %3 to i32 + %5 = shl nuw nsw i32 %4, 7 + %6 = getelementptr inbounds i32* %.013, i64 1 + store i32 %5, i32* %.013, align 4 + %7 = add nsw i32 %i.02, 1 + %exitcond = icmp eq i32 %7, 256 + br i1 %exitcond, label %8, label %1 + +; <label>:8 ; preds = %1 + ret void +} + +;CHECK: @example24 +;CHECK: shufflevector <4 x i16> +;CHECK: ret void +define void @example24(i16 signext %x, i16 signext %y) nounwind uwtable ssp { + br label %1 + +; <label>:1 ; preds = %1, %0 + %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ] + %2 = getelementptr inbounds [1024 x float]* @fa, i64 0, i64 %indvars.iv + %3 = load float* %2, align 4 + %4 = getelementptr inbounds [1024 x float]* @fb, i64 0, i64 %indvars.iv + %5 = load float* %4, align 4 + %6 = fcmp olt float %3, %5 + %x.y = select i1 %6, i16 %x, i16 %y + %7 = sext i16 %x.y to i32 + %8 = getelementptr inbounds [1024 x i32]* @ic, i64 0, i64 %indvars.iv + store i32 %7, i32* %8, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %9, label %1 + +; <label>:9 ; preds = %1 + ret void +} + +;CHECK: @example25 +;CHECK: and <4 x i1> +;CHECK: zext <4 x i1> +;CHECK: ret void +define void @example25() nounwind uwtable ssp { + br label %1 + +; <label>:1 ; preds = %1, %0 + %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ] + %2 = getelementptr inbounds [1024 x float]* @da, i64 0, i64 %indvars.iv + %3 = load float* %2, align 4 + %4 = getelementptr inbounds [1024 x float]* @db, i64 0, i64 %indvars.iv + %5 = load float* %4, align 4 + %6 = fcmp olt float %3, %5 + %7 = getelementptr inbounds [1024 x float]* @dc, i64 0, i64 %indvars.iv + %8 = load float* %7, align 4 + %9 = getelementptr inbounds [1024 x float]* @dd, i64 0, i64 %indvars.iv + %10 = load float* %9, align 4 + %11 = fcmp olt float %8, %10 + %12 = and i1 %6, %11 + %13 = zext i1 %12 to i32 + %14 = getelementptr inbounds [1024 x i32]* @dj, i64 0, i64 %indvars.iv + store i32 %13, i32* %14, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %15, label %1 + +; <label>:15 ; preds = %1 + ret void +} + diff --git a/test/Transforms/LoopVectorize/lit.local.cfg b/test/Transforms/LoopVectorize/lit.local.cfg new file mode 100644 index 0000000000..19eebc0ac7 --- /dev/null +++ b/test/Transforms/LoopVectorize/lit.local.cfg @@ -0,0 +1 @@ +config.suffixes = ['.ll', '.c', '.cpp'] diff --git a/test/Transforms/LoopVectorize/non-const-n.ll b/test/Transforms/LoopVectorize/non-const-n.ll new file mode 100644 index 0000000000..04c5c84a4f --- /dev/null +++ b/test/Transforms/LoopVectorize/non-const-n.ll @@ -0,0 +1,38 @@ +; RUN: opt < %s -loop-vectorize -dce -instcombine -licm -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +@b = common global [2048 x i32] zeroinitializer, align 16 +@c = common global [2048 x i32] zeroinitializer, align 16 +@a = common global [2048 x i32] zeroinitializer, align 16 + +;CHECK: @example1 +;CHECK: shl i32 +;CHECK: zext i32 +;CHECK: load <4 x i32> +;CHECK: add <4 x i32> +;CHECK: store <4 x i32> +;CHECK: ret void +define void @example1(i32 %n) nounwind uwtable ssp { + %n4 = shl i32 %n, 2 + br label %1 + +; <label>:1 ; preds = %1, %0 + %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ] + %2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv + %3 = load i32* %2, align 4 + %4 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv + %5 = load i32* %4, align 4 + %6 = add nsw i32 %5, %3 + %7 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv + store i32 %6, i32* %7, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n4 + br i1 %exitcond, label %8, label %1 + +; <label>:8 ; preds = %1 + ret void +} + diff --git a/test/Transforms/SROA/basictest.ll b/test/Transforms/SROA/basictest.ll index 644fda167d..03120f7a32 100644 --- a/test/Transforms/SROA/basictest.ll +++ b/test/Transforms/SROA/basictest.ll @@ -1063,3 +1063,23 @@ entry: call void @llvm.lifetime.end(i64 -1, i8* %0) ret void } + +define void @PR14105({ [16 x i8] }* %ptr) { +; Ensure that when rewriting the GEP index '-1' for this alloca we preserve is +; sign as negative. We use a volatile memcpy to ensure promotion never actually +; occurs. +; CHECK: @PR14105 + +entry: + %a = alloca { [16 x i8] }, align 8 +; CHECK: alloca [16 x i8], align 8 + + %gep = getelementptr inbounds { [16 x i8] }* %ptr, i64 -1 +; CHECK-NEXT: getelementptr inbounds { [16 x i8] }* %ptr, i64 -1, i32 0, i64 0 + + %cast1 = bitcast { [16 x i8 ] }* %gep to i8* + %cast2 = bitcast { [16 x i8 ] }* %a to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %cast1, i8* %cast2, i32 16, i32 8, i1 true) + ret void +; CHECK: ret +} diff --git a/test/Transforms/SimplifyLibCalls/StrCpy.ll b/test/Transforms/SimplifyLibCalls/StrCpy.ll deleted file mode 100644 index 83406ff8f8..0000000000 --- a/test/Transforms/SimplifyLibCalls/StrCpy.ll +++ /dev/null @@ -1,37 +0,0 @@ -; Test that the StrCpyOptimizer works correctly -; RUN: opt < %s -simplify-libcalls -S | FileCheck %s - -; This transformation requires the pointer size, as it assumes that size_t is -; the size of a pointer. -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" - -@hello = constant [6 x i8] c"hello\00" - -declare i8* @strcpy(i8*, i8*) - -declare i8* @__strcpy_chk(i8*, i8*, i32) nounwind - -declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readonly - -; rdar://6839935 - -define i32 @t1() { -; CHECK: @t1 - %target = alloca [1024 x i8] - %arg1 = getelementptr [1024 x i8]* %target, i32 0, i32 0 - %arg2 = getelementptr [6 x i8]* @hello, i32 0, i32 0 - %rslt1 = call i8* @strcpy( i8* %arg1, i8* %arg2 ) -; CHECK: @llvm.memcpy.p0i8.p0i8.i32 - ret i32 0 -} - -define i32 @t2() { -; CHECK: @t2 - %target = alloca [1024 x i8] - %arg1 = getelementptr [1024 x i8]* %target, i32 0, i32 0 - %arg2 = getelementptr [6 x i8]* @hello, i32 0, i32 0 - %tmp1 = call i32 @llvm.objectsize.i32(i8* %arg1, i1 false) - %rslt1 = call i8* @__strcpy_chk(i8* %arg1, i8* %arg2, i32 %tmp1) -; CHECK: @__memcpy_chk - ret i32 0 -} diff --git a/test/lit.cfg b/test/lit.cfg index dc37317ba9..f24a854dc7 100644 --- a/test/lit.cfg +++ b/test/lit.cfg @@ -152,7 +152,10 @@ config.target_triple += lit.valgrindTriple # Provide a substition for those tests that need to run the jit to obtain data # but simply want use the currently considered most reliable jit for platform -defaultIsMCJIT='true' if 'arm' in config.target_triple else 'false' +if 'arm' in config.target_triple: + defaultIsMCJIT = 'true' +else: + defaultIsMCJIT = 'false' config.substitutions.append( ('%defaultjit', '-use-mcjit='+defaultIsMCJIT) ) # Process jit implementation option diff --git a/tools/bugpoint-passes/bugpoint.exports b/tools/bugpoint-passes/bugpoint.exports index d8fdd6a576..e69de29bb2 100644 --- a/tools/bugpoint-passes/bugpoint.exports +++ b/tools/bugpoint-passes/bugpoint.exports @@ -1 +0,0 @@ -_ZN4llvm14BasicBlockPass14doFinalizationERNS_6ModuleE diff --git a/tools/llc/llc.cpp b/tools/llc/llc.cpp index d586b22120..a6c670a7fc 100644 --- a/tools/llc/llc.cpp +++ b/tools/llc/llc.cpp @@ -22,7 +22,6 @@ #include "llvm/Assembly/PrintModulePass.h" #include "llvm/Support/DataStream.h" // @LOCALMOD #include "llvm/Support/IRReader.h" -#include "llvm/CodeGen/CommandFlags.h" #include "llvm/CodeGen/IntrinsicLowering.h" // @LOCALMOD #include "llvm/CodeGen/LinkAllAsmWriterComponents.h" #include "llvm/CodeGen/LinkAllCodegenComponents.h" @@ -84,6 +83,26 @@ OutputFilename("o", cl::desc("Output filename"), cl::value_desc("filename")); static cl::opt<std::string> MetadataTextFilename("metadata-text", cl::desc("Metadata as text, out filename"), cl::value_desc("filename")); + +// Using bitcode streaming has a couple of ramifications. Primarily it means +// that the module in the file will be compiled one function at a time rather +// than the whole module. This allows earlier functions to be compiled before +// later functions are read from the bitcode but of course means no whole-module +// optimizations. For now, streaming is only supported for files and stdin. +static cl::opt<bool> +LazyBitcode("streaming-bitcode", + cl::desc("Use lazy bitcode streaming for file inputs"), + cl::init(false)); + +// The option below overlaps very much with bitcode streaming. +// We keep it separate because it is still experimental and we want +// to use it without changing the outside behavior which is especially +// relevant for the sandboxed case. +static cl::opt<bool> +ReduceMemoryFootprint("reduce-memory-footprint", + cl::desc("Aggressively reduce memory used by llc"), + cl::init(false)); + // @LOCALMOD-END // Determine optimization level. @@ -98,34 +117,216 @@ OptLevel("O", static cl::opt<std::string> TargetTriple("mtriple", cl::desc("Override target triple for module")); +static cl::opt<std::string> +MArch("march", cl::desc("Architecture to generate code for (see --version)")); + +static cl::opt<std::string> +MCPU("mcpu", + cl::desc("Target a specific cpu type (-mcpu=help for details)"), + cl::value_desc("cpu-name"), + cl::init("")); + +static cl::list<std::string> +MAttrs("mattr", + cl::CommaSeparated, + cl::desc("Target specific attributes (-mattr=help for details)"), + cl::value_desc("a1,+a2,-a3,...")); + +static cl::opt<Reloc::Model> +RelocModel("relocation-model", + cl::desc("Choose relocation model"), + cl::init(Reloc::Default), + cl::values( + clEnumValN(Reloc::Default, "default", + "Target default relocation model"), + clEnumValN(Reloc::Static, "static", + "Non-relocatable code"), + clEnumValN(Reloc::PIC_, "pic", + "Fully relocatable, position independent code"), + clEnumValN(Reloc::DynamicNoPIC, "dynamic-no-pic", + "Relocatable external references, non-relocatable code"), + clEnumValEnd)); + +static cl::opt<llvm::CodeModel::Model> +CMModel("code-model", + cl::desc("Choose code model"), + cl::init(CodeModel::Default), + cl::values(clEnumValN(CodeModel::Default, "default", + "Target default code model"), + clEnumValN(CodeModel::Small, "small", + "Small code model"), + clEnumValN(CodeModel::Kernel, "kernel", + "Kernel code model"), + clEnumValN(CodeModel::Medium, "medium", + "Medium code model"), + clEnumValN(CodeModel::Large, "large", + "Large code model"), + clEnumValEnd)); + +static cl::opt<bool> +RelaxAll("mc-relax-all", + cl::desc("When used with filetype=obj, " + "relax all fixups in the emitted object file")); + +cl::opt<TargetMachine::CodeGenFileType> +FileType("filetype", cl::init(TargetMachine::CGFT_AssemblyFile), + cl::desc("Choose a file type (not all types are supported by all targets):"), + cl::values( + clEnumValN(TargetMachine::CGFT_AssemblyFile, "asm", + "Emit an assembly ('.s') file"), + clEnumValN(TargetMachine::CGFT_ObjectFile, "obj", + "Emit a native object ('.o') file"), + clEnumValN(TargetMachine::CGFT_Null, "null", + "Emit nothing, for performance testing"), + clEnumValEnd)); + cl::opt<bool> NoVerify("disable-verify", cl::Hidden, cl::desc("Do not verify input module")); -cl::opt<bool> +cl::opt<bool> DisableDotLoc("disable-dot-loc", cl::Hidden, + cl::desc("Do not use .loc entries")); + +cl::opt<bool> DisableCFI("disable-cfi", cl::Hidden, + cl::desc("Do not use .cfi_* directives")); + +cl::opt<bool> EnableDwarfDirectory("enable-dwarf-directory", cl::Hidden, + cl::desc("Use .file directives with an explicit directory.")); + +static cl::opt<bool> +DisableRedZone("disable-red-zone", + cl::desc("Do not emit code that uses the red zone."), + cl::init(false)); + +static cl::opt<bool> +EnableFPMAD("enable-fp-mad", + cl::desc("Enable less precise MAD instructions to be generated"), + cl::init(false)); + +static cl::opt<bool> +DisableFPElim("disable-fp-elim", + cl::desc("Disable frame pointer elimination optimization"), + cl::init(false)); + +static cl::opt<bool> +DisableFPElimNonLeaf("disable-non-leaf-fp-elim", + cl::desc("Disable frame pointer elimination optimization for non-leaf funcs"), + cl::init(false)); + +static cl::opt<bool> +EnableUnsafeFPMath("enable-unsafe-fp-math", + cl::desc("Enable optimizations that may decrease FP precision"), + cl::init(false)); + +static cl::opt<bool> +EnableNoInfsFPMath("enable-no-infs-fp-math", + cl::desc("Enable FP math optimizations that assume no +-Infs"), + cl::init(false)); + +static cl::opt<bool> +EnableNoNaNsFPMath("enable-no-nans-fp-math", + cl::desc("Enable FP math optimizations that assume no NaNs"), + cl::init(false)); + +static cl::opt<bool> +EnableHonorSignDependentRoundingFPMath("enable-sign-dependent-rounding-fp-math", + cl::Hidden, + cl::desc("Force codegen to assume rounding mode can change dynamically"), + cl::init(false)); + +static cl::opt<bool> +GenerateSoftFloatCalls("soft-float", + cl::desc("Generate software floating point library calls"), + cl::init(false)); + +static cl::opt<llvm::FloatABI::ABIType> +FloatABIForCalls("float-abi", + cl::desc("Choose float ABI type"), + cl::init(FloatABI::Default), + cl::values( + clEnumValN(FloatABI::Default, "default", + "Target default float ABI type"), + clEnumValN(FloatABI::Soft, "soft", + "Soft float ABI (implied by -soft-float)"), + clEnumValN(FloatABI::Hard, "hard", + "Hard float ABI (uses FP registers)"), + clEnumValEnd)); + +static cl::opt<llvm::FPOpFusion::FPOpFusionMode> +FuseFPOps("fp-contract", + cl::desc("Enable aggresive formation of fused FP ops"), + cl::init(FPOpFusion::Standard), + cl::values( + clEnumValN(FPOpFusion::Fast, "fast", + "Fuse FP ops whenever profitable"), + clEnumValN(FPOpFusion::Standard, "on", + "Only fuse 'blessed' FP ops."), + clEnumValN(FPOpFusion::Strict, "off", + "Only fuse FP ops when the result won't be effected."), + clEnumValEnd)); + +static cl::opt<bool> +DontPlaceZerosInBSS("nozero-initialized-in-bss", + cl::desc("Don't place zero-initialized symbols into bss section"), + cl::init(false)); + +static cl::opt<bool> DisableSimplifyLibCalls("disable-simplify-libcalls", - cl::desc("Disable simplify-libcalls"), - cl::init(false)); + cl::desc("Disable simplify-libcalls"), + cl::init(false)); -// @LOCALMOD-BEGIN -// Using bitcode streaming has a couple of ramifications. Primarily it means -// that the module in the file will be compiled one function at a time rather -// than the whole module. This allows earlier functions to be compiled before -// later functions are read from the bitcode but of course means no whole-module -// optimizations. For now, streaming is only supported for files and stdin. static cl::opt<bool> -LazyBitcode("streaming-bitcode", - cl::desc("Use lazy bitcode streaming for file inputs"), +EnableGuaranteedTailCallOpt("tailcallopt", + cl::desc("Turn fastcc calls into tail calls by (potentially) changing ABI."), cl::init(false)); -// The option below overlaps very much with bitcode streaming. -// We keep it separate because it is still experimental and we want -// to use it without changing the outside behavior which is especially -// relevant for the sandboxed case. static cl::opt<bool> -ReduceMemoryFootprint("reduce-memory-footprint", - cl::desc("Aggressively reduce memory used by llc"), +DisableTailCalls("disable-tail-calls", + cl::desc("Never emit tail calls"), cl::init(false)); -// @LOCALMOD-END + +static cl::opt<unsigned> +OverrideStackAlignment("stack-alignment", + cl::desc("Override default stack alignment"), + cl::init(0)); + +static cl::opt<bool> +EnableRealignStack("realign-stack", + cl::desc("Realign stack if needed"), + cl::init(true)); + +static cl::opt<std::string> +TrapFuncName("trap-func", cl::Hidden, + cl::desc("Emit a call to trap function rather than a trap instruction"), + cl::init("")); + +static cl::opt<bool> +EnablePIE("enable-pie", + cl::desc("Assume the creation of a position independent executable."), + cl::init(false)); + +static cl::opt<bool> +SegmentedStacks("segmented-stacks", + cl::desc("Use segmented stacks if possible."), + cl::init(false)); + +static cl::opt<bool> +UseInitArray("use-init-array", + cl::desc("Use .init_array instead of .ctors."), + cl::init(false)); + +static cl::opt<std::string> StopAfter("stop-after", + cl::desc("Stop compilation after a specific pass"), + cl::value_desc("pass-name"), + cl::init("")); +static cl::opt<std::string> StartAfter("start-after", + cl::desc("Resume compilation after a specific pass"), + cl::value_desc("pass-name"), + cl::init("")); + +static cl::opt<unsigned> +SSPBufferSize("stack-protector-buffer-size", cl::init(8), + cl::desc("Lower bound for a buffer to be considered for " + "stack protection")); // GetFileNameRoot - Helper function to get the basename of a filename. static inline std::string @@ -474,11 +675,6 @@ int llc_main(int argc, char **argv) { TLI->disableAllFunctions(); PM->add(TLI); - if (target.get()) { - PM->add(new TargetTransformInfo(target->getScalarTargetTransformInfo(), - target->getVectorTargetTransformInfo())); - } - // Add the target data from the target machine, if it exists, or the module. if (const DataLayout *TD = Target.getDataLayout()) PM->add(new DataLayout(*TD)); diff --git a/tools/llvm-dwarfdump/llvm-dwarfdump.cpp b/tools/llvm-dwarfdump/llvm-dwarfdump.cpp index 38c3a1e76f..309bc4ecd4 100644 --- a/tools/llvm-dwarfdump/llvm-dwarfdump.cpp +++ b/tools/llvm-dwarfdump/llvm-dwarfdump.cpp @@ -1,4 +1,4 @@ -//===-- llvm-dwarfdump.cpp - Debug info dumping utility for llvm -----------===// +//===-- llvm-dwarfdump.cpp - Debug info dumping utility for llvm ----------===// // // The LLVM Compiler Infrastructure // @@ -118,8 +118,8 @@ static void DumpInput(const StringRef &Filename) { if (PrintFunctions) SpecFlags |= DILineInfoSpecifier::FunctionName; if (PrintInlining) { - DIInliningInfo InliningInfo = dictx->getInliningInfoForAddress( - Address, SpecFlags); + DIInliningInfo InliningInfo = + dictx->getInliningInfoForAddress(Address, SpecFlags); uint32_t n = InliningInfo.getNumberOfFrames(); if (n == 0) { // Print one empty debug line info in any case. diff --git a/tools/lto/LTOCodeGenerator.cpp b/tools/lto/LTOCodeGenerator.cpp index 5d79fda5aa..f417f5f4fd 100644 --- a/tools/lto/LTOCodeGenerator.cpp +++ b/tools/lto/LTOCodeGenerator.cpp @@ -519,8 +519,6 @@ bool LTOCodeGenerator::generateObjectFile(raw_ostream &out, // Add an appropriate DataLayout instance for this module... passes.add(new DataLayout(*_target->getDataLayout())); - passes.add(new TargetTransformInfo(_target->getScalarTargetTransformInfo(), - _target->getVectorTargetTransformInfo())); // Enabling internalize here would use its AllButMain variant. It // keeps only main if it exists and does nothing for libraries. Instead diff --git a/tools/opt/CMakeLists.txt b/tools/opt/CMakeLists.txt index 32de6d4060..7daf22aa9e 100644 --- a/tools/opt/CMakeLists.txt +++ b/tools/opt/CMakeLists.txt @@ -1,4 +1,4 @@ -set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} bitreader asmparser bitwriter instrumentation scalaropts ipo vectorize) +set(LLVM_LINK_COMPONENTS bitreader asmparser bitwriter instrumentation scalaropts ipo vectorize) add_llvm_tool(opt AnalysisWrappers.cpp diff --git a/tools/opt/LLVMBuild.txt b/tools/opt/LLVMBuild.txt index b174431e04..4de99f51c8 100644 --- a/tools/opt/LLVMBuild.txt +++ b/tools/opt/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Tool name = opt parent = Tools -required_libraries = AsmParser BitReader BitWriter IPO Instrumentation Scalar all-targets +required_libraries = AsmParser BitReader BitWriter IPO Instrumentation Scalar diff --git a/tools/opt/Makefile b/tools/opt/Makefile index ee7e1cf796..16d116da5d 100644 --- a/tools/opt/Makefile +++ b/tools/opt/Makefile @@ -9,6 +9,6 @@ LEVEL := ../.. TOOLNAME := opt -LINK_COMPONENTS := bitreader bitwriter asmparser instrumentation scalaropts ipo vectorize all-targets +LINK_COMPONENTS := bitreader bitwriter asmparser instrumentation scalaropts ipo vectorize include $(LEVEL)/Makefile.common diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp index 751c8e7da0..35ab2d50cb 100644 --- a/tools/opt/opt.cpp +++ b/tools/opt/opt.cpp @@ -18,7 +18,6 @@ #include "llvm/Module.h" #include "llvm/PassManager.h" #include "llvm/CallGraphSCCPass.h" -#include "llvm/CodeGen/CommandFlags.h" #include "llvm/Bitcode/ReaderWriter.h" #include "llvm/Assembly/PrintModulePass.h" #include "llvm/Analysis/Verifier.h" @@ -37,9 +36,7 @@ #include "llvm/Support/PluginLoader.h" #include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/SystemUtils.h" -#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/ToolOutputFile.h" -#include "llvm/MC/SubtargetFeature.h" #include "llvm/LinkAllPasses.h" #include "llvm/LinkAllVMCore.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" @@ -481,75 +478,6 @@ static void AddStandardLinkPasses(PassManagerBase &PM) { /*RunInliner=*/ !DisableInline); } -//===----------------------------------------------------------------------===// -// CodeGen-related helper functions. -// -static TargetOptions GetTargetOptions() { - TargetOptions Options; - Options.LessPreciseFPMADOption = EnableFPMAD; - Options.NoFramePointerElim = DisableFPElim; - Options.NoFramePointerElimNonLeaf = DisableFPElimNonLeaf; - Options.AllowFPOpFusion = FuseFPOps; - Options.UnsafeFPMath = EnableUnsafeFPMath; - Options.NoInfsFPMath = EnableNoInfsFPMath; - Options.NoNaNsFPMath = EnableNoNaNsFPMath; - Options.HonorSignDependentRoundingFPMathOption = - EnableHonorSignDependentRoundingFPMath; - Options.UseSoftFloat = GenerateSoftFloatCalls; - if (FloatABIForCalls != FloatABI::Default) - Options.FloatABIType = FloatABIForCalls; - Options.NoZerosInBSS = DontPlaceZerosInBSS; - Options.GuaranteedTailCallOpt = EnableGuaranteedTailCallOpt; - Options.DisableTailCalls = DisableTailCalls; - Options.StackAlignmentOverride = OverrideStackAlignment; - Options.RealignStack = EnableRealignStack; - Options.TrapFuncName = TrapFuncName; - Options.PositionIndependentExecutable = EnablePIE; - Options.EnableSegmentedStacks = SegmentedStacks; - Options.UseInitArray = UseInitArray; - Options.SSPBufferSize = SSPBufferSize; - return Options; -} - -CodeGenOpt::Level GetCodeGenOptLevel() { - if (OptLevelO1) - return CodeGenOpt::Less; - if (OptLevelO2) - return CodeGenOpt::Default; - if (OptLevelO3) - return CodeGenOpt::Aggressive; - return CodeGenOpt::None; -} - -// Returns the TargetMachine instance or zero if no triple is provided. -static TargetMachine* GetTargetMachine(std::string TripleStr) { - if (TripleStr.empty()) - return 0; - - // Get the target specific parser. - std::string Error; - Triple TheTriple(Triple::normalize(TargetTriple)); - - const Target *TheTarget = TargetRegistry::lookupTarget(MArch, TheTriple, - Error); - if (!TheTarget) { - return 0; - } - - // Package up features to be passed to target/subtarget - std::string FeaturesStr; - if (MAttrs.size()) { - SubtargetFeatures Features; - for (unsigned i = 0; i != MAttrs.size(); ++i) - Features.AddFeature(MAttrs[i]); - FeaturesStr = Features.getString(); - } - - return TheTarget->createTargetMachine(TheTriple.getTriple(), - MCPU, FeaturesStr, GetTargetOptions(), - RelocModel, CMModel, - GetCodeGenOptLevel()); -} //===----------------------------------------------------------------------===// // main for opt @@ -652,12 +580,6 @@ int main(int argc, char **argv) { if (TD) Passes.add(TD); - std::auto_ptr<TargetMachine> TM(GetTargetMachine(TargetTriple)); - if (TM.get()) { - Passes.add(new TargetTransformInfo(TM->getScalarTargetTransformInfo(), - TM->getVectorTargetTransformInfo())); - } - OwningPtr<FunctionPassManager> FPasses; if (OptLevelO1 || OptLevelO2 || OptLevelOs || OptLevelOz || OptLevelO3) { FPasses.reset(new FunctionPassManager(M.get())); diff --git a/unittests/ExecutionEngine/JIT/JITTest.cpp b/unittests/ExecutionEngine/JIT/JITTest.cpp index ae6855e68b..6933091949 100644 --- a/unittests/ExecutionEngine/JIT/JITTest.cpp +++ b/unittests/ExecutionEngine/JIT/JITTest.cpp @@ -606,7 +606,7 @@ TEST_F(JITTest, FunctionIsRecompiledAndRelinked) { // program from the IR input to the JIT to assert that the JIT doesn't use its // definition. extern "C" int32_t JITTest_AvailableExternallyGlobal; -int32_t JITTest_AvailableExternallyGlobal = 42; +int32_t JITTest_AvailableExternallyGlobal LLVM_ATTRIBUTE_USED = 42; namespace { // Tests on ARM disabled as we're running the old jit diff --git a/unittests/ExecutionEngine/JIT/Makefile b/unittests/ExecutionEngine/JIT/Makefile index b535a6b296..9e0bb9ea59 100644 --- a/unittests/ExecutionEngine/JIT/Makefile +++ b/unittests/ExecutionEngine/JIT/Makefile @@ -35,8 +35,15 @@ ifeq ($(USE_OPROFILE), 1) LINK_COMPONENTS += oprofilejit endif +EXPORTED_SYMBOL_FILE = $(PROJ_OBJ_DIR)/JITTests.exports include $(LLVM_SRC_ROOT)/unittests/Makefile.unittest # Permit these tests to use the JIT's symbolic lookup. LD.Flags += $(RDYNAMIC) + +# Symbol exports are necessary (at least for now) when building with LTO. +$(LLVMUnitTestExe): $(NativeExportsFile) +$(PROJ_OBJ_DIR)/JITTests.exports: $(PROJ_SRC_DIR)/JITTests.def $(PROJ_OBJ_DIR)/.dir + tail -n +2 $< > $@ + |