diff options
177 files changed, 4951 insertions, 1339 deletions
@@ -73,10 +73,13 @@ endif ifeq ($(MAKECMDGOALS),install-clang) DIRS := tools/clang/tools/driver tools/clang/lib/Headers \ - tools/clang/tools/libclang tools/clang/tools/c-index-test \ + tools/clang/tools/libclang \ tools/clang/include/clang-c \ tools/clang/runtime tools/clang/docs \ tools/lto runtime + ifneq ($(BUILD_CLANG_ONLY),YES) + DIRS += tools/clang/tools/c-index-test + endif OPTIONAL_DIRS := NO_INSTALL = 1 endif diff --git a/cmake/modules/LLVMProcessSources.cmake b/cmake/modules/LLVMProcessSources.cmake index 0e410edc15..2cef6cfc3a 100644 --- a/cmake/modules/LLVMProcessSources.cmake +++ b/cmake/modules/LLVMProcessSources.cmake @@ -48,7 +48,7 @@ function(llvm_process_sources OUT_VAR) set( f ${CMAKE_CURRENT_SOURCE_DIR}/${s} ) add_file_dependencies( ${f} ${TABLEGEN_OUTPUT} ) endforeach(s) - if( MSVC_IDE ) + if( MSVC_IDE OR XCODE ) # This adds .td and .h files to the Visual Studio solution: # FIXME: Shall we handle *.def here? add_td_sources(sources) diff --git a/docs/HowToUseInstrMappings.rst b/docs/HowToUseInstrMappings.rst new file mode 100755 index 0000000000..b51e74e23c --- /dev/null +++ b/docs/HowToUseInstrMappings.rst @@ -0,0 +1,179 @@ +.. _how_to_use_instruction_mappings: + +=============================== +How To Use Instruction Mappings +=============================== + +.. sectionauthor:: Jyotsna Verma <jverma@codeaurora.org> + +.. contents:: + :local: + +Introduction +============ + +This document contains information about adding instruction mapping support +for a target. The motivation behind this feature comes from the need to switch +between different instruction formats during various optimizations. One approach +could be to use switch cases which list all the instructions along with formats +they can transition to. However, it has large maintenance overhead +because of the hardcoded instruction names. Also, whenever a new instruction is +added in the .td files, all the relevant switch cases should be modified +accordingly. Instead, the same functionality could be achieved with TableGen and +some support from the .td files for a fraction of maintenance cost. + +``InstrMapping`` Class Overview +=============================== + +TableGen uses relationship models to map instructions with each other. These +models are described using ``InstrMapping`` class as a base. Each model sets +various fields of the ``InstrMapping`` class such that they can uniquely +describe all the instructions using that model. TableGen parses all the relation +models and uses the information to construct relation tables which relate +instructions with each other. These tables are emitted in the +``XXXInstrInfo.inc`` file along with the functions to query them. Following +is the definition of ``InstrMapping`` class definied in Target.td file: + +.. code-block:: llvm + + class InstrMapping { + // Used to reduce search space only to the instructions using this + // relation model. + string FilterClass; + + // List of fields/attributes that should be same for all the instructions in + // a row of the relation table. Think of this as a set of properties shared + // by all the instructions related by this relationship. + list<string> RowFields = []; + + // List of fields/attributes that are same for all the instructions + // in a column of the relation table. + list<string> ColFields = []; + + // Values for the fields/attributes listed in 'ColFields' corresponding to + // the key instruction. This is the instruction that will be transformed + // using this relation model. + list<string> KeyCol = []; + + // List of values for the fields/attributes listed in 'ColFields', one for + // each column in the relation table. These are the instructions a key + // instruction will be transformed into. + list<list<string> > ValueCols = []; + } + +Sample Example +-------------- + +Let's say that we want to have a function +``int getPredOpcode(uint16_t Opcode, enum PredSense inPredSense)`` which +takes a non-predicated instruction and returns its predicated true or false form +depending on some input flag, ``inPredSense``. The first step in the process is +to define a relationship model that relates predicated instructions to their +non-predicated form by assigning appropriate values to the ``InstrMapping`` +fields. For this relationship, non-predicated instructions are treated as key +instruction since they are the one used to query the interface function. + +.. code-block:: llvm + + def getPredOpcode : InstrMapping { + // Choose a FilterClass that is used as a base class for all the + // instructions modeling this relationship. This is done to reduce the + // search space only to these set of instructions. + let FilterClass = "PredRel"; + + // Instructions with same values for all the fields in RowFields form a + // row in the resulting relation table. + // For example, if we want to relate 'ADD' (non-predicated) with 'Add_pt' + // (predicated true) and 'Add_pf' (predicated false), then all 3 + // instructions need to have same value for BaseOpcode field. It can be any + // unique value (Ex: XYZ) and should not be shared with any other + // instruction not related to 'add'. + let RowFields = ["BaseOpcode"]; + + // List of attributes that can be used to define key and column instructions + // for a relation. Key instruction is passed as an argument + // to the function used for querying relation tables. Column instructions + // are the instructions they (key) can transform into. + // + // Here, we choose 'PredSense' as ColFields since this is the unique + // attribute of the key (non-predicated) and column (true/false) + // instructions involved in this relationship model. + let ColFields = ["PredSense"]; + + // The key column contains non-predicated instructions. + let KeyCol = ["none"]; + + // Two value columns - first column contains instructions with + // PredSense=true while second column has instructions with PredSense=false. + let ValueCols = [["true"], ["false"]]; + } + +TableGen uses the above relationship model to emit relation table that maps +non-predicated instructions with their predicated forms. It also outputs the +interface function +``int getPredOpcode(uint16_t Opcode, enum PredSense inPredSense)`` to query +the table. Here, Function ``getPredOpcode`` takes two arguments, opcode of the +current instruction and PredSense of the desired instruction, and returns +predicated form of the instruction, if found in the relation table. +In order for an instruction to be added into the relation table, it needs +to include relevant information in its definition. For example, consider +following to be the current definitions of ADD, ADD_pt (true) and ADD_pf (false) +instructions: + +.. code-block::llvm + + def ADD : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$a, IntRegs:$b), + "$dst = add($a, $b)", + [(set (i32 IntRegs:$dst), (add (i32 IntRegs:$a), + (i32 IntRegs:$b)))]>; + + def ADD_Pt : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$p, IntRegs:$a, IntRegs:$b), + "if ($p) $dst = add($a, $b)", + []>; + + def ADD_Pf : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$p, IntRegs:$a, IntRegs:$b), + "if (!$p) $dst = add($a, $b)", + []>; + +In this step, we modify these instructions to include the information +required by the relationship model, <tt>getPredOpcode</tt>, so that they can +be related. + +.. code-block::llvm + + def ADD : PredRel, ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$a, IntRegs:$b), + "$dst = add($a, $b)", + [(set (i32 IntRegs:$dst), (add (i32 IntRegs:$a), + (i32 IntRegs:$b)))]> { + let BaseOpcode = "ADD"; + let PredSense = "none"; + } + + def ADD_Pt : PredRel, ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$p, IntRegs:$a, IntRegs:$b), + "if ($p) $dst = add($a, $b)", + []> { + let BaseOpcode = "ADD"; + let PredSense = "true"; + } + + def ADD_Pf : PredRel, ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$p, IntRegs:$a, IntRegs:$b), + "if (!$p) $dst = add($a, $b)", + []> { + let BaseOpcode = "ADD"; + let PredSense = "false"; + } + +Please note that all the above instructions use ``PredRel`` as a base class. +This is extremely important since TableGen uses it as a filter for selecting +instructions for ``getPredOpcode`` model. Any instruction not derived from +``PredRel`` is excluded from the analysis. ``BaseOpcode`` is another important +field. Since it's selected as a ``RowFields`` of the model, it is required +to have the same value for all 3 instructions in order to be related. Next, +``PredSense`` is used to determine their column positions by comparing its value +with ``KeyCol`` and ``ValueCols``. If an instruction sets its ``PredSense`` +value to something not used in the relation model, it will not be assigned +a column in the relation table. diff --git a/docs/MarkedUpDisassembly.rst b/docs/MarkedUpDisassembly.rst new file mode 100644 index 0000000000..e1282e102e --- /dev/null +++ b/docs/MarkedUpDisassembly.rst @@ -0,0 +1,88 @@ +.. _marked_up_disassembly: + +======================================= +LLVM's Optional Rich Disassembly Output +======================================= + +.. contents:: + :local: + +Introduction +============ + +LLVM's default disassembly output is raw text. To allow consumers more ability +to introspect the instructions' textual representation or to reformat for a more +user friendly display there is an optional rich disassembly output. + +This optional output is sufficient to reference into individual portions of the +instruction text. This is intended for clients like disassemblers, list file +generators, and pretty-printers, which need more than the raw instructions and +the ability to print them. + +To provide this functionality the assembly text is marked up with annotations. +The markup is simple enough in syntax to be robust even in the case of version +mismatches between consumers and producers. That is, the syntax generally does +not carry semantics beyond "this text has an annotation," so consumers can +simply ignore annotations they do not understand or do not care about. + +After calling ``LLVMCreateDisasm()`` to create a disassembler context the +optional output is enable with this call: + +.. code-block:: c + + LLVMSetDisasmOptions(DC, LLVMDisassembler_Option_UseMarkup); + +Then subsequent calls to ``LLVMDisasmInstruction()`` will return output strings +with the marked up annotations. + +Instruction Annotations +======================= + +.. _contextual markups: + +Contextual markups +------------------ + +Annoated assembly display will supply contextual markup to help clients more +efficiently implement things like pretty printers. Most markup will be target +independent, so clients can effectively provide good display without any target +specific knowledge. + +Annotated assembly goes through the normal instruction printer, but optionally +includes contextual tags on portions of the instruction string. An annotation +is any '<' '>' delimited section of text(1). + +.. code-block:: bat + + annotation: '<' tag-name tag-modifier-list ':' annotated-text '>' + tag-name: identifier + tag-modifier-list: comma delimited identifier list + +The tag-name is an identifier which gives the type of the annotation. For the +first pass, this will be very simple, with memory references, registers, and +immediates having the tag names "mem", "reg", and "imm", respectively. + +The tag-modifier-list is typically additional target-specific context, such as +register class. + +Clients should accept and ignore any tag-names or tag-modifiers they do not +understand, allowing the annotations to grow in richness without breaking older +clients. + +For example, a possible annotation of an ARM load of a stack-relative location +might be annotated as: + +.. code-block:: nasm + + ldr <reg gpr:r0>, <mem regoffset:[<reg gpr:sp>, <imm:#4>]> + + +1: For assembly dialects in which '<' and/or '>' are legal tokens, a literal token is escaped by following immediately with a repeat of the character. For example, a literal '<' character is output as '<<' in an annotated assembly string. + +C API Details +------------- + +The intended consumers of this information use the C API, therefore the new C +API function for the disassembler will be added to provide an option to produce +disassembled instructions with annotations, ``LLVMSetDisasmOptions()`` and the +``LLVMDisassembler_Option_UseMarkup`` option (see above). diff --git a/docs/ReleaseNotes.html b/docs/ReleaseNotes.html index 0ef8f3d1f3..9a1b547b4a 100644 --- a/docs/ReleaseNotes.html +++ b/docs/ReleaseNotes.html @@ -468,7 +468,10 @@ Release Notes</a>.</h1> <p> Loop Vectorizer - We've added a loop vectorizer and we are now able to vectorize small loops. The loop vectorizer is disabled by default and - can be enabled using the <b>-mllvm -vectorize</b> flag. <br/> + can be enabled using the <b>-mllvm -vectorize</b> flag. + The SIMD vector width can be specified using the flag + <b>-mllvm -force-vector-width=4</b>. + <br/> We can now vectorize this code: <pre class="doc_code"> diff --git a/docs/WritingAnLLVMBackend.html b/docs/WritingAnLLVMBackend.html index 7576d490d7..0ad472cb92 100644 --- a/docs/WritingAnLLVMBackend.html +++ b/docs/WritingAnLLVMBackend.html @@ -32,6 +32,7 @@ <li><a href="#InstructionSet">Instruction Set</a> <ul> <li><a href="#operandMapping">Instruction Operand Mapping</a></li> + <li><a href="#relationMapping">Instruction Relation Mapping</a></li> <li><a href="#implementInstr">Implement a subclass of TargetInstrInfo</a></li> <li><a href="#branchFolding">Branch Folding and If Conversion</a></li> </ul></li> @@ -1259,6 +1260,29 @@ the <tt>rd</tt>, <tt>rs1</tt>, and <tt>rs2</tt> fields respectively. <!-- ======================================================================= --> <h3> + <a name="relationMapping">Instruction Relation Mapping</a> +</h3> + +<div> + +<p> +This TableGen feature is used to relate instructions with each other. It is +particularly useful when you have multiple instruction formats and need to +switch between them after instruction selection. This entire feature is driven +by relation models which can be defined in <tt>XXXInstrInfo.td</tt> files +according to the target-specific instruction set. Relation models are defined +using <tt>InstrMapping</tt> class as a base. TableGen parses all the models +and generates instruction relation maps using the specified information. +Relation maps are emitted as tables in the <tt>XXXGenInstrInfo.inc</tt> file +along with the functions to query them. For the detailed information on how to +use this feature, please refer to +<a href="HowToUseInstrMappings.html">How to add Instruction Mappings</a> +document. +</p> +</div> + +<!-- ======================================================================= --> +<h3> <a name="implementInstr">Implement a subclass of </a> <a href="CodeGenerator.html#targetinstrinfo">TargetInstrInfo</a> </h3> diff --git a/docs/subsystems.rst b/docs/subsystems.rst index 6f77b79fbe..80d0eed663 100644 --- a/docs/subsystems.rst +++ b/docs/subsystems.rst @@ -17,6 +17,7 @@ Subsystem Documentation TableGenFundamentals DebuggingJITedCode GoldPlugin + MarkedUpDisassembly * `Writing an LLVM Pass <WritingAnLLVMPass.html>`_ @@ -98,3 +99,8 @@ Subsystem Documentation architecture. .. _`Howto: Implementing LLVM Integrated Assembler`: http://www.embecosm.com/download/ean10.html + +* :ref:`marked_up_disassembly` + + This document describes the optional rich disassembly output syntax. + diff --git a/include/llvm/ADT/DenseMap.h b/include/llvm/ADT/DenseMap.h index cbcf7892c9..ac4bdbd126 100644 --- a/include/llvm/ADT/DenseMap.h +++ b/include/llvm/ADT/DenseMap.h @@ -420,7 +420,7 @@ private: NumBuckets = getNumBuckets(); } if (NumBuckets-(NewNumEntries+getNumTombstones()) <= NumBuckets/8) { - this->grow(NumBuckets); + this->grow(NumBuckets * 2); LookupBucketFor(Key, TheBucket); } assert(TheBucket); @@ -600,7 +600,7 @@ public: unsigned OldNumBuckets = NumBuckets; BucketT *OldBuckets = Buckets; - allocateBuckets(std::max<unsigned>(64, NextPowerOf2(AtLeast))); + allocateBuckets(std::max<unsigned>(64, NextPowerOf2(AtLeast-1))); assert(Buckets); if (!OldBuckets) { this->BaseT::initEmpty(); @@ -826,11 +826,11 @@ public: } void grow(unsigned AtLeast) { - if (AtLeast > InlineBuckets) - AtLeast = std::max<unsigned>(64, NextPowerOf2(AtLeast)); + if (AtLeast >= InlineBuckets) + AtLeast = std::max<unsigned>(64, NextPowerOf2(AtLeast-1)); if (Small) { - if (AtLeast <= InlineBuckets) + if (AtLeast < InlineBuckets) return; // Nothing to do. // First move the inline buckets into a temporary storage. diff --git a/include/llvm/ADT/StringSet.h b/include/llvm/ADT/StringSet.h index 9c55f6b70e..b69a964a23 100644 --- a/include/llvm/ADT/StringSet.h +++ b/include/llvm/ADT/StringSet.h @@ -29,8 +29,13 @@ namespace llvm { assert(!InLang.empty()); const char *KeyStart = InLang.data(); const char *KeyEnd = KeyStart + InLang.size(); - return base::insert(llvm::StringMapEntry<char>:: - Create(KeyStart, KeyEnd, base::getAllocator(), '+')); + llvm::StringMapEntry<char> *Entry = llvm::StringMapEntry<char>:: + Create(KeyStart, KeyEnd, base::getAllocator(), '+'); + if (!base::insert(Entry)) { + Entry->Destroy(base::getAllocator()); + return false; + } + return true; } }; } diff --git a/include/llvm/Analysis/DependenceAnalysis.h b/include/llvm/Analysis/DependenceAnalysis.h index 9b6a6bbd3e..3818428a5e 100644 --- a/include/llvm/Analysis/DependenceAnalysis.h +++ b/include/llvm/Analysis/DependenceAnalysis.h @@ -30,23 +30,17 @@ #ifndef LLVM_ANALYSIS_DEPENDENCEANALYSIS_H #define LLVM_ANALYSIS_DEPENDENCEANALYSIS_H -#include "llvm/BasicBlock.h" -#include "llvm/Function.h" -#include "llvm/Instruction.h" +#include "llvm/Instructions.h" #include "llvm/Pass.h" #include "llvm/ADT/SmallBitVector.h" -#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Support/raw_ostream.h" - namespace llvm { class AliasAnalysis; + class Loop; + class LoopInfo; class ScalarEvolution; class SCEV; - class Value; + class SCEVConstant; class raw_ostream; /// Dependence - This class represents a dependence between two memory diff --git a/include/llvm/Analysis/MemoryBuiltins.h b/include/llvm/Analysis/MemoryBuiltins.h index a842898e41..9e5d97dd7f 100644 --- a/include/llvm/Analysis/MemoryBuiltins.h +++ b/include/llvm/Analysis/MemoryBuiltins.h @@ -168,7 +168,8 @@ class ObjectSizeOffsetVisitor public: ObjectSizeOffsetVisitor(const DataLayout *TD, const TargetLibraryInfo *TLI, - LLVMContext &Context, bool RoundToAlign = false); + LLVMContext &Context, bool RoundToAlign = false, + unsigned AS = 0); SizeOffsetType compute(Value *V); @@ -229,7 +230,7 @@ class ObjectSizeOffsetEvaluator public: ObjectSizeOffsetEvaluator(const DataLayout *TD, const TargetLibraryInfo *TLI, - LLVMContext &Context); + LLVMContext &Context, unsigned AS = 0); SizeOffsetEvalType compute(Value *V); bool knownSize(SizeOffsetEvalType SizeOffset) { diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h index 67c9a4d14f..d2df67080c 100644 --- a/include/llvm/Analysis/ScalarEvolution.h +++ b/include/llvm/Analysis/ScalarEvolution.h @@ -628,7 +628,7 @@ namespace llvm { /// getSizeOfExpr - Return an expression for sizeof on the given type. /// - const SCEV *getSizeOfExpr(Type *AllocTy); + const SCEV *getSizeOfExpr(Type *AllocTy, Type *IntPtrTy); /// getAlignOfExpr - Return an expression for alignof on the given type. /// @@ -636,7 +636,8 @@ namespace llvm { /// getOffsetOfExpr - Return an expression for offsetof on the given field. /// - const SCEV *getOffsetOfExpr(StructType *STy, unsigned FieldNo); + const SCEV *getOffsetOfExpr(StructType *STy, Type *IntPtrTy, + unsigned FieldNo); /// getOffsetOfExpr - Return an expression for offsetof on the given field. /// diff --git a/include/llvm/CallingConv.h b/include/llvm/CallingConv.h index 86e4eebb82..053f4eb326 100644 --- a/include/llvm/CallingConv.h +++ b/include/llvm/CallingConv.h @@ -112,7 +112,11 @@ namespace CallingConv { /// Cannot have variable arguments. /// Can also be called by the host. /// Is externally visible. - SPIR_KERNEL = 76 + SPIR_KERNEL = 76, + + /// Intel_OCL_BI - Calling conventions for Intel OpenCL built-ins + Intel_OCL_BI = 77 + }; } // End CallingConv namespace diff --git a/include/llvm/CodeGen/PBQP/Graph.h b/include/llvm/CodeGen/PBQP/Graph.h index a5d8b0dbd6..83c379b48c 100644 --- a/include/llvm/CodeGen/PBQP/Graph.h +++ b/include/llvm/CodeGen/PBQP/Graph.h @@ -19,6 +19,7 @@ #include <list> #include <map> +#include <llvm/ADT/ilist.h> namespace PBQP { @@ -31,16 +32,16 @@ namespace PBQP { class NodeEntry; class EdgeEntry; - typedef std::list<NodeEntry> NodeList; - typedef std::list<EdgeEntry> EdgeList; + typedef llvm::ilist<NodeEntry> NodeList; + typedef llvm::ilist<EdgeEntry> EdgeList; public: - typedef NodeList::iterator NodeItr; - typedef NodeList::const_iterator ConstNodeItr; + typedef NodeEntry* NodeItr; + typedef const NodeEntry* ConstNodeItr; - typedef EdgeList::iterator EdgeItr; - typedef EdgeList::const_iterator ConstEdgeItr; + typedef EdgeEntry* EdgeItr; + typedef const EdgeEntry* ConstEdgeItr; private: @@ -52,12 +53,14 @@ namespace PBQP { private: - class NodeEntry { + class NodeEntry : public llvm::ilist_node<NodeEntry> { + friend struct llvm::ilist_sentinel_traits<NodeEntry>; private: Vector costs; AdjEdgeList adjEdges; unsigned degree; void *data; + NodeEntry() : costs(0, 0) {} public: NodeEntry(const Vector &costs) : costs(costs), degree(0) {} Vector& getCosts() { return costs; } @@ -77,12 +80,14 @@ namespace PBQP { void* getData() { return data; } }; - class EdgeEntry { + class EdgeEntry : public llvm::ilist_node<EdgeEntry> { + friend struct llvm::ilist_sentinel_traits<EdgeEntry>; private: NodeItr node1, node2; Matrix costs; AdjEdgeItr node1AEItr, node2AEItr; void *data; + EdgeEntry() : costs(0, 0, 0) {} public: EdgeEntry(NodeItr node1, NodeItr node2, const Matrix &costs) : node1(node1), node2(node2), costs(costs) {} diff --git a/include/llvm/DataLayout.h b/include/llvm/DataLayout.h index c9ac0b7fea..d778556684 100644 --- a/include/llvm/DataLayout.h +++ b/include/llvm/DataLayout.h @@ -262,6 +262,14 @@ public: } return 8*val->second.TypeBitWidth; } + /// Layout pointer size, in bits, based on the type. + /// If this function is called with a pointer type, then + /// the type size of the pointer is returned. + /// If this function is called with a vector of pointers, + /// then the type size of the pointer is returned. + /// Otherwise the type sizeo f a default pointer is returned. + unsigned getPointerTypeSizeInBits(Type* Ty) const; + /// Size examples: /// /// Type SizeInBits StoreSizeInBits AllocSizeInBits[*] @@ -337,11 +345,13 @@ public: /// unsigned getPreferredTypeAlignmentShift(Type *Ty) const; - /// getIntPtrType - Return an unsigned integer type that is the same size or - /// greater to the host pointer size. - /// FIXME: Need to remove the default argument when the rest of the LLVM code - /// base has been updated. - IntegerType *getIntPtrType(LLVMContext &C, unsigned AddressSpace = 0) const; + /// getIntPtrType - Return an integer type that is the same size or + /// greater to the pointer size based on the address space. + IntegerType *getIntPtrType(LLVMContext &C, unsigned AddressSpace) const; + + /// getIntPtrType - Return an integer type that is the same size or + /// greater to the pointer size based on the Type. + IntegerType *getIntPtrType(Type *) const; /// getIndexedOffset - return the offset from the beginning of the type for /// the specified indices. This is used to implement getelementptr. diff --git a/include/llvm/InstrTypes.h b/include/llvm/InstrTypes.h index cfc79394b2..b661372f53 100644 --- a/include/llvm/InstrTypes.h +++ b/include/llvm/InstrTypes.h @@ -17,6 +17,7 @@ #define LLVM_INSTRUCTION_TYPES_H #include "llvm/Instruction.h" +#include "llvm/DataLayout.h" #include "llvm/OperandTraits.h" #include "llvm/DerivedTypes.h" #include "llvm/ADT/Twine.h" @@ -576,6 +577,11 @@ public: Type *IntPtrTy ///< Integer type corresponding to pointer ) const; + /// @brief Determine if this cast is a no-op cast. + bool isNoopCast( + const DataLayout &DL ///< DataLayout to get the Int Ptr type from. + ) const; + /// Determine how a pair of casts can be eliminated, if they can be at all. /// This is a helper function for both CastInst and ConstantExpr. /// @returns 0 if the CastInst pair can't be eliminated, otherwise diff --git a/include/llvm/MC/MCELFObjectWriter.h b/include/llvm/MC/MCELFObjectWriter.h index 688c8a9575..59a2501b8e 100644 --- a/include/llvm/MC/MCELFObjectWriter.h +++ b/include/llvm/MC/MCELFObjectWriter.h @@ -91,6 +91,9 @@ public: const MCFragment &F, const MCFixup &Fixup, bool IsPCRel) const; + virtual const MCSymbol *undefinedExplicitRelSym(const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel) const; virtual void adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset); diff --git a/include/llvm/MC/MCInstPrinter.h b/include/llvm/MC/MCInstPrinter.h index f17c347050..3b9420a403 100644 --- a/include/llvm/MC/MCInstPrinter.h +++ b/include/llvm/MC/MCInstPrinter.h @@ -66,6 +66,10 @@ public: bool getUseMarkup() const { return UseMarkup; } void setUseMarkup(bool Value) { UseMarkup = Value; } + + /// Utility functions to make adding mark ups simpler. + StringRef markup(StringRef s) const; + StringRef markup(StringRef a, StringRef b) const; }; } // namespace llvm diff --git a/include/llvm/MC/MCParser/MCAsmParser.h b/include/llvm/MC/MCParser/MCAsmParser.h index fe7bda08db..8a5f37cb0c 100644 --- a/include/llvm/MC/MCParser/MCAsmParser.h +++ b/include/llvm/MC/MCParser/MCAsmParser.h @@ -90,7 +90,7 @@ public: /// ParseMSInlineAsm - Parse ms-style inline assembly. virtual bool ParseMSInlineAsm(void *AsmLoc, std::string &AsmString, unsigned &NumOutputs, unsigned &NumInputs, - SmallVectorImpl<void *> &OpDecls, + SmallVectorImpl<std::pair<void *, bool> > &OpDecls, SmallVectorImpl<std::string> &Constraints, SmallVectorImpl<std::string> &Clobbers, const MCInstrInfo *MII, diff --git a/include/llvm/MC/MCParser/MCParsedAsmOperand.h b/include/llvm/MC/MCParser/MCParsedAsmOperand.h index 35f47c0b9c..89b0a1f47b 100644 --- a/include/llvm/MC/MCParser/MCParsedAsmOperand.h +++ b/include/llvm/MC/MCParser/MCParsedAsmOperand.h @@ -69,6 +69,9 @@ public: /// inline assembly. virtual bool isOffsetOf() const { return false; } + /// getOffsetOfLoc - Get the location of the offset operator. + virtual SMLoc getOffsetOfLoc() const { return SMLoc(); } + /// needSizeDirective - Do we need to emit a sizing directive for this /// operand? Only valid when parsing MS-style inline assembly. virtual bool needSizeDirective() const { return false; } diff --git a/include/llvm/Operator.h b/include/llvm/Operator.h index 462324a669..b326c11352 100644 --- a/include/llvm/Operator.h +++ b/include/llvm/Operator.h @@ -36,8 +36,11 @@ private: void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION; void *operator new(size_t s) LLVM_DELETED_FUNCTION; Operator() LLVM_DELETED_FUNCTION; - // NOTE: cannot use LLVM_DELETED_FUNCTION because it's not legal to delete - // an overridden method that's not deleted in the base class. + +protected: + // NOTE: Cannot use LLVM_DELETED_FUNCTION because it's not legal to delete + // an overridden method that's not deleted in the base class. Cannot leave + // this unimplemented because that leads to an ODR-violation. ~Operator(); public: @@ -79,8 +82,6 @@ public: }; private: - ~OverflowingBinaryOperator(); // DO NOT IMPLEMENT - friend class BinaryOperator; friend class ConstantExpr; void setHasNoUnsignedWrap(bool B) { @@ -132,8 +133,6 @@ public: }; private: - ~PossiblyExactOperator(); // DO NOT IMPLEMENT - friend class BinaryOperator; friend class ConstantExpr; void setIsExact(bool B) { @@ -168,9 +167,6 @@ public: /// FPMathOperator - Utility class for floating point operations which can have /// information about relaxed accuracy requirements attached to them. class FPMathOperator : public Operator { -private: - ~FPMathOperator(); // DO NOT IMPLEMENT - public: /// \brief Get the maximum error permitted by this operation in ULPs. An @@ -191,7 +187,6 @@ public: /// opcodes. template<typename SuperClass, unsigned Opc> class ConcreteOperator : public SuperClass { - ~ConcreteOperator(); // DO NOT IMPLEMENT public: static inline bool classof(const Instruction *I) { return I->getOpcode() == Opc; @@ -207,45 +202,35 @@ public: class AddOperator : public ConcreteOperator<OverflowingBinaryOperator, Instruction::Add> { - ~AddOperator(); // DO NOT IMPLEMENT }; class SubOperator : public ConcreteOperator<OverflowingBinaryOperator, Instruction::Sub> { - ~SubOperator(); // DO NOT IMPLEMENT }; class MulOperator : public ConcreteOperator<OverflowingBinaryOperator, Instruction::Mul> { - ~MulOperator(); // DO NOT IMPLEMENT }; class ShlOperator : public ConcreteOperator<OverflowingBinaryOperator, Instruction::Shl> { - ~ShlOperator(); // DO NOT IMPLEMENT }; class SDivOperator : public ConcreteOperator<PossiblyExactOperator, Instruction::SDiv> { - ~SDivOperator(); // DO NOT IMPLEMENT }; class UDivOperator : public ConcreteOperator<PossiblyExactOperator, Instruction::UDiv> { - ~UDivOperator(); // DO NOT IMPLEMENT }; class AShrOperator : public ConcreteOperator<PossiblyExactOperator, Instruction::AShr> { - ~AShrOperator(); // DO NOT IMPLEMENT }; class LShrOperator : public ConcreteOperator<PossiblyExactOperator, Instruction::LShr> { - ~LShrOperator(); // DO NOT IMPLEMENT }; class GEPOperator : public ConcreteOperator<Operator, Instruction::GetElementPtr> { - ~GEPOperator(); // DO NOT IMPLEMENT - enum { IsInBounds = (1 << 0) }; diff --git a/include/llvm/TableGen/Error.h b/include/llvm/TableGen/Error.h index 5c1c3adf7e..3f7b7f4e8c 100644 --- a/include/llvm/TableGen/Error.h +++ b/include/llvm/TableGen/Error.h @@ -40,6 +40,9 @@ void PrintError(const char *Loc, const Twine &Msg); void PrintError(const Twine &Msg); void PrintError(const TGError &Error); +LLVM_ATTRIBUTE_NORETURN void PrintFatalError(const std::string &Msg); +LLVM_ATTRIBUTE_NORETURN void PrintFatalError(ArrayRef<SMLoc> ErrorLoc, + const std::string &Msg); extern SourceMgr SrcMgr; diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td index ed3db69a4e..5fb12f503e 100644 --- a/include/llvm/Target/Target.td +++ b/include/llvm/Target/Target.td @@ -1032,6 +1032,55 @@ class ProcessorModel<string n, SchedMachineModel m, list<SubtargetFeature> f> } //===----------------------------------------------------------------------===// +// InstrMapping - This class is used to create mapping tables to relate +// instructions with each other based on the values specified in RowFields, +// ColFields, KeyCol and ValueCols. +// +class InstrMapping { + // FilterClass - Used to limit search space only to the instructions that + // define the relationship modeled by this InstrMapping record. + string FilterClass; + + // RowFields - List of fields/attributes that should be same for all the + // instructions in a row of the relation table. Think of this as a set of + // properties shared by all the instructions related by this relationship + // model and is used to categorize instructions into subgroups. For instance, + // if we want to define a relation that maps 'Add' instruction to its + // predicated forms, we can define RowFields like this: + // + // let RowFields = BaseOp + // All add instruction predicated/non-predicated will have to set their BaseOp + // to the same value. + // + // def Add: { let BaseOp = 'ADD'; let predSense = 'nopred' } + // def Add_predtrue: { let BaseOp = 'ADD'; let predSense = 'true' } + // def Add_predfalse: { let BaseOp = 'ADD'; let predSense = 'false' } + list<string> RowFields = []; + + // List of fields/attributes that are same for all the instructions + // in a column of the relation table. + // Ex: let ColFields = 'predSense' -- It means that the columns are arranged + // based on the 'predSense' values. All the instruction in a specific + // column have the same value and it is fixed for the column according + // to the values set in 'ValueCols'. + list<string> ColFields = []; + + // Values for the fields/attributes listed in 'ColFields'. + // Ex: let KeyCol = 'nopred' -- It means that the key instruction (instruction + // that models this relation) should be non-predicated. + // In the example above, 'Add' is the key instruction. + list<string> KeyCol = []; + + // List of values for the fields/attributes listed in 'ColFields', one for + // each column in the relation table. + // + // Ex: let ValueCols = [['true'],['false']] -- It adds two columns in the + // table. First column requires all the instructions to have predSense + // set to 'true' and second column requires it to be 'false'. + list<list<string> > ValueCols = []; +} + +//===----------------------------------------------------------------------===// // Pull in the common support for calling conventions. // include "llvm/Target/TargetCallingConv.td" diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index f34a7727cf..830e2d645a 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -103,6 +103,10 @@ public: TypeWidenVector // This vector should be widened into a larger vector. }; + /// LegalizeKind holds the legalization kind that needs to happen to EVT + /// in order to type-legalize it. + typedef std::pair<LegalizeTypeAction, EVT> LegalizeKind; + enum BooleanContent { // How the target represents true/false values. UndefinedBooleanContent, // Only bit 0 counts, the rest can hold garbage. ZeroOrOneBooleanContent, // All bits zero except for bit 0. @@ -1966,8 +1970,7 @@ private: ValueTypeActionImpl ValueTypeActions; - typedef std::pair<LegalizeTypeAction, EVT> LegalizeKind; - +public: LegalizeKind getTypeConversion(LLVMContext &Context, EVT VT) const { // If this is a simple type, use the ComputeRegisterProp mechanism. @@ -2084,6 +2087,7 @@ private: return LegalizeKind(TypeSplitVector, NVT); } +private: std::vector<std::pair<EVT, const TargetRegisterClass*> > AvailableRegClasses; /// TargetDAGCombineArray - Targets can specify ISD nodes that they would diff --git a/include/llvm/Target/TargetTransformImpl.h b/include/llvm/Target/TargetTransformImpl.h index 7648f4f935..25a7edeb01 100644 --- a/include/llvm/Target/TargetTransformImpl.h +++ b/include/llvm/Target/TargetTransformImpl.h @@ -16,6 +16,7 @@ #define LLVM_TARGET_TARGET_TRANSFORMATION_IMPL_H #include "llvm/TargetTransformInfo.h" +#include "llvm/CodeGen/ValueTypes.h" namespace llvm { @@ -47,7 +48,27 @@ public: virtual unsigned getJumpBufSize() const; }; -class VectorTargetTransformImpl : public VectorTargetTransformInfo { }; +class VectorTargetTransformImpl : public VectorTargetTransformInfo { +private: + const TargetLowering *TLI; + + /// Estimate the cost of type-legalization and the legalized type. + std::pair<unsigned, EVT> + getTypeLegalizationCost(LLVMContext &C, EVT Ty) const; + +public: + explicit VectorTargetTransformImpl(const TargetLowering *TL) : TLI(TL) {} + + virtual ~VectorTargetTransformImpl() {} + + virtual unsigned getInstrCost(unsigned Opcode, Type *Ty1, Type *Ty2) const; + + virtual unsigned getBroadcastCost(Type *Tp) const; + + virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, + unsigned AddressSpace) const; +}; } // end llvm namespace diff --git a/include/llvm/TargetTransformInfo.h b/include/llvm/TargetTransformInfo.h index 82fc14dbd7..96470c30ca 100644 --- a/include/llvm/TargetTransformInfo.h +++ b/include/llvm/TargetTransformInfo.h @@ -54,10 +54,10 @@ public: TargetTransformInfo(const TargetTransformInfo &T) : ImmutablePass(ID), STTI(T.STTI), VTTI(T.VTTI) { } - const ScalarTargetTransformInfo* getScalarTargetTransformInfo() { + const ScalarTargetTransformInfo* getScalarTargetTransformInfo() const { return STTI; } - const VectorTargetTransformInfo* getVectorTargetTransformInfo() { + const VectorTargetTransformInfo* getVectorTargetTransformInfo() const { return VTTI; } @@ -119,8 +119,43 @@ public: } }; +/// VectorTargetTransformInfo - This interface is used by the vectorizers +/// to estimate the profitability of vectorization for different instructions. class VectorTargetTransformInfo { - // TODO: define an interface for VectorTargetTransformInfo. +public: + virtual ~VectorTargetTransformInfo() {} + + /// Returns the expected cost of the instruction opcode. The opcode is one of + /// the enums like Instruction::Add. The type arguments are the type of the + /// operation. + /// Most instructions only use the first type and in that case the second + /// operand is ignored. + /// + /// Exceptions: + /// * Br instructions do not use any of the types. + /// * Select instructions pass the return type as Ty1 and the selector as Ty2. + /// * Cast instructions pass the destination as Ty1 and the source as Ty2. + /// * Insert/Extract element pass only the vector type as Ty1. + /// * ShuffleVector, Load, Store do not use this call. + virtual unsigned getInstrCost(unsigned Opcode, + Type *Ty1 = 0, + Type *Ty2 = 0) const { + return 1; + } + + /// Returns the cost of a vector broadcast of a scalar at place zero to a + /// vector of type 'Tp'. + virtual unsigned getBroadcastCost(Type *Tp) const { + return 1; + } + + /// Returns the cost of Load and Store instructions. + virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, + unsigned AddressSpace) const { + return 1; + } + }; } // End llvm namespace diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h index fd1b5556ef..49eeb57622 100644 --- a/include/llvm/Transforms/Utils/Local.h +++ b/include/llvm/Transforms/Utils/Local.h @@ -177,8 +177,9 @@ static inline unsigned getKnownAlignment(Value *V, const DataLayout *TD = 0) { template<typename IRBuilderTy> Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &TD, User *GEP, bool NoAssumptions = false) { + unsigned AS = cast<GEPOperator>(GEP)->getPointerAddressSpace(); gep_type_iterator GTI = gep_type_begin(GEP); - Type *IntPtrTy = TD.getIntPtrType(GEP->getContext()); + Type *IntPtrTy = TD.getIntPtrType(GEP->getContext(), AS); Value *Result = Constant::getNullValue(IntPtrTy); // If the GEP is inbounds, we know that none of the addressing operations will @@ -186,7 +187,6 @@ Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &TD, User *GEP, bool isInBounds = cast<GEPOperator>(GEP)->isInBounds() && !NoAssumptions; // Build a mask for high order bits. - unsigned AS = cast<GEPOperator>(GEP)->getPointerAddressSpace(); unsigned IntPtrWidth = TD.getPointerSizeInBits(AS); uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth); diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 146897ad67..de6d61d78b 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -41,7 +41,7 @@ using namespace llvm; // Constant Folding internal helper functions //===----------------------------------------------------------------------===// -/// FoldBitCast - Constant fold bitcast, symbolically evaluating it with +/// FoldBitCast - Constant fold bitcast, symbolically evaluating it with /// DataLayout. This always returns a non-null constant, but it may be a /// ConstantExpr if unfoldable. static Constant *FoldBitCast(Constant *C, Type *DestTy, @@ -59,9 +59,9 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, return ConstantExpr::getBitCast(C, DestTy); unsigned NumSrcElts = CDV->getType()->getNumElements(); - + Type *SrcEltTy = CDV->getType()->getElementType(); - + // If the vector is a vector of floating point, convert it to vector of int // to simplify things. if (SrcEltTy->isFloatingPointTy()) { @@ -72,7 +72,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, C = ConstantExpr::getBitCast(C, SrcIVTy); CDV = cast<ConstantDataVector>(C); } - + // Now that we know that the input value is a vector of integers, just shift // and insert them into our result. unsigned BitShift = TD.getTypeAllocSizeInBits(SrcEltTy); @@ -84,43 +84,43 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, else Result |= CDV->getElementAsInteger(i); } - + return ConstantInt::get(IT, Result); } - + // The code below only handles casts to vectors currently. VectorType *DestVTy = dyn_cast<VectorType>(DestTy); if (DestVTy == 0) return ConstantExpr::getBitCast(C, DestTy); - + // If this is a scalar -> vector cast, convert the input into a <1 x scalar> // vector so the code below can handle it uniformly. if (isa<ConstantFP>(C) || isa<ConstantInt>(C)) { Constant *Ops = C; // don't take the address of C! return FoldBitCast(ConstantVector::get(Ops), DestTy, TD); } - + // If this is a bitcast from constant vector -> vector, fold it. if (!isa<ConstantDataVector>(C) && !isa<ConstantVector>(C)) return ConstantExpr::getBitCast(C, DestTy); - + // If the element types match, VMCore can fold it. unsigned NumDstElt = DestVTy->getNumElements(); unsigned NumSrcElt = C->getType()->getVectorNumElements(); if (NumDstElt == NumSrcElt) return ConstantExpr::getBitCast(C, DestTy); - + Type *SrcEltTy = C->getType()->getVectorElementType(); Type *DstEltTy = DestVTy->getElementType(); - - // Otherwise, we're changing the number of elements in a vector, which + + // Otherwise, we're changing the number of elements in a vector, which // requires endianness information to do the right thing. For example, // bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>) // folds to (little endian): // <4 x i32> <i32 0, i32 0, i32 1, i32 0> // and to (big endian): // <4 x i32> <i32 0, i32 0, i32 0, i32 1> - + // First thing is first. We only want to think about integer here, so if // we have something in FP form, recast it as integer. if (DstEltTy->isFloatingPointTy()) { @@ -130,11 +130,11 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumDstElt); // Recursively handle this integer conversion, if possible. C = FoldBitCast(C, DestIVTy, TD); - + // Finally, VMCore can handle this now that #elts line up. return ConstantExpr::getBitCast(C, DestTy); } - + // Okay, we know the destination is integer, if the input is FP, convert // it to integer first. if (SrcEltTy->isFloatingPointTy()) { @@ -148,13 +148,13 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, !isa<ConstantDataVector>(C)) return C; } - + // Now we know that the input and output vectors are both integer vectors // of the same size, and that their #elements is not the same. Do the // conversion here, which depends on whether the input or output has // more elements. bool isLittleEndian = TD.isLittleEndian(); - + SmallVector<Constant*, 32> Result; if (NumDstElt < NumSrcElt) { // Handle: bitcast (<4 x i32> <i32 0, i32 1, i32 2, i32 3> to <2 x i64>) @@ -170,15 +170,15 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, Constant *Src =dyn_cast<ConstantInt>(C->getAggregateElement(SrcElt++)); if (!Src) // Reject constantexpr elements. return ConstantExpr::getBitCast(C, DestTy); - + // Zero extend the element to the right size. Src = ConstantExpr::getZExt(Src, Elt->getType()); - + // Shift it to the right place, depending on endianness. - Src = ConstantExpr::getShl(Src, + Src = ConstantExpr::getShl(Src, ConstantInt::get(Src->getType(), ShiftAmt)); ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize; - + // Mix it in. Elt = ConstantExpr::getOr(Elt, Src); } @@ -186,30 +186,30 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, } return ConstantVector::get(Result); } - + // Handle: bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>) unsigned Ratio = NumDstElt/NumSrcElt; unsigned DstBitSize = DstEltTy->getPrimitiveSizeInBits(); - + // Loop over each source value, expanding into multiple results. for (unsigned i = 0; i != NumSrcElt; ++i) { Constant *Src = dyn_cast<ConstantInt>(C->getAggregateElement(i)); if (!Src) // Reject constantexpr elements. return ConstantExpr::getBitCast(C, DestTy); - + unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1); for (unsigned j = 0; j != Ratio; ++j) { // Shift the piece of the value into the right place, depending on // endianness. - Constant *Elt = ConstantExpr::getLShr(Src, + Constant *Elt = ConstantExpr::getLShr(Src, ConstantInt::get(Src->getType(), ShiftAmt)); ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize; - + // Truncate and remember this piece. Result.push_back(ConstantExpr::getTrunc(Elt, DstEltTy)); } } - + return ConstantVector::get(Result); } @@ -224,28 +224,28 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, Offset = 0; return true; } - + // Otherwise, if this isn't a constant expr, bail out. ConstantExpr *CE = dyn_cast<ConstantExpr>(C); if (!CE) return false; - + // Look through ptr->int and ptr->ptr casts. if (CE->getOpcode() == Instruction::PtrToInt || CE->getOpcode() == Instruction::BitCast) return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD); - - // i32* getelementptr ([5 x i32]* @a, i32 0, i32 5) + + // i32* getelementptr ([5 x i32]* @a, i32 0, i32 5) if (CE->getOpcode() == Instruction::GetElementPtr) { // Cannot compute this if the element type of the pointer is missing size // info. if (!cast<PointerType>(CE->getOperand(0)->getType()) ->getElementType()->isSized()) return false; - + // If the base isn't a global+constant, we aren't either. if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD)) return false; - + // Otherwise, add any offset that our operands provide. gep_type_iterator GTI = gep_type_begin(CE); for (User::const_op_iterator i = CE->op_begin() + 1, e = CE->op_end(); @@ -253,7 +253,7 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, ConstantInt *CI = dyn_cast<ConstantInt>(*i); if (!CI) return false; // Index isn't a simple constant? if (CI->isZero()) continue; // Not adding anything. - + if (StructType *ST = dyn_cast<StructType>(*GTI)) { // N = N + Offset Offset += TD.getStructLayout(ST)->getElementOffset(CI->getZExtValue()); @@ -264,7 +264,7 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, } return true; } - + return false; } @@ -277,27 +277,27 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, const DataLayout &TD) { assert(ByteOffset <= TD.getTypeAllocSize(C->getType()) && "Out of range access"); - + // If this element is zero or undefined, we can just return since *CurPtr is // zero initialized. if (isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) return true; - + if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) { if (CI->getBitWidth() > 64 || (CI->getBitWidth() & 7) != 0) return false; - + uint64_t Val = CI->getZExtValue(); unsigned IntBytes = unsigned(CI->getBitWidth()/8); - + for (unsigned i = 0; i != BytesLeft && ByteOffset != IntBytes; ++i) { CurPtr[i] = (unsigned char)(Val >> (ByteOffset * 8)); ++ByteOffset; } return true; } - + if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) { if (CFP->getType()->isDoubleTy()) { C = FoldBitCast(C, Type::getInt64Ty(C->getContext()), TD); @@ -309,13 +309,13 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, } return false; } - + if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) { const StructLayout *SL = TD.getStructLayout(CS->getType()); unsigned Index = SL->getElementContainingOffset(ByteOffset); uint64_t CurEltOffset = SL->getElementOffset(Index); ByteOffset -= CurEltOffset; - + while (1) { // If the element access is to the element itself and not to tail padding, // read the bytes from the element. @@ -325,9 +325,9 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, !ReadDataFromGlobal(CS->getOperand(Index), ByteOffset, CurPtr, BytesLeft, TD)) return false; - + ++Index; - + // Check to see if we read from the last struct element, if so we're done. if (Index == CS->getType()->getNumElements()) return true; @@ -375,11 +375,11 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, } return true; } - + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { if (CE->getOpcode() == Instruction::IntToPtr && - CE->getOperand(0)->getType() == TD.getIntPtrType(CE->getContext())) - return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr, + CE->getOperand(0)->getType() == TD.getIntPtrType(CE->getType())) + return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr, BytesLeft, TD); } @@ -391,7 +391,7 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, const DataLayout &TD) { Type *LoadTy = cast<PointerType>(C->getType())->getElementType(); IntegerType *IntType = dyn_cast<IntegerType>(LoadTy); - + // If this isn't an integer load we can't fold it directly. if (!IntType) { // If this is a float/double load, we can try folding it as an int32/64 load @@ -415,15 +415,15 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, return FoldBitCast(Res, LoadTy, TD); return 0; } - + unsigned BytesLoaded = (IntType->getBitWidth() + 7) / 8; if (BytesLoaded > 32 || BytesLoaded == 0) return 0; - + GlobalValue *GVal; int64_t Offset; if (!IsConstantOffsetFromGlobal(C, GVal, Offset, TD)) return 0; - + GlobalVariable *GV = dyn_cast<GlobalVariable>(GVal); if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() || !GV->getInitializer()->getType()->isSized()) @@ -432,11 +432,11 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, // If we're loading off the beginning of the global, some bytes may be valid, // but we don't try to handle this. if (Offset < 0) return 0; - + // If we're not accessing anything in this constant, the result is undefined. if (uint64_t(Offset) >= TD.getTypeAllocSize(GV->getInitializer()->getType())) return UndefValue::get(IntType); - + unsigned char RawBytes[32] = {0}; if (!ReadDataFromGlobal(GV->getInitializer(), Offset, RawBytes, BytesLoaded, TD)) @@ -464,15 +464,15 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, // If the loaded value isn't a constant expr, we can't handle it. ConstantExpr *CE = dyn_cast<ConstantExpr>(C); if (!CE) return 0; - + if (CE->getOpcode() == Instruction::GetElementPtr) { if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getOperand(0))) if (GV->isConstant() && GV->hasDefinitiveInitializer()) - if (Constant *V = + if (Constant *V = ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE)) return V; } - + // Instead of loading constant c string, use corresponding integer value // directly if string length is small enough. StringRef Str; @@ -500,14 +500,14 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, SingleChar = 0; StrVal = (StrVal << 8) | SingleChar; } - + Constant *Res = ConstantInt::get(CE->getContext(), StrVal); if (Ty->isFloatingPointTy()) Res = ConstantExpr::getBitCast(Res, Ty); return Res; } } - + // If this load comes from anywhere in a constant global, and if the global // is all undef or zero, we know what it loads. if (GlobalVariable *GV = @@ -520,7 +520,7 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, return UndefValue::get(ResTy); } } - + // Try hard to fold loads from bitcasted strange and non-type-safe things. We // currently don't do any of this for big endian systems. It can be // generalized in the future if someone is interested. @@ -531,7 +531,7 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, static Constant *ConstantFoldLoadInst(const LoadInst *LI, const DataLayout *TD){ if (LI->isVolatile()) return 0; - + if (Constant *C = dyn_cast<Constant>(LI->getOperand(0))) return ConstantFoldLoadFromConstPtr(C, TD); @@ -540,23 +540,23 @@ static Constant *ConstantFoldLoadInst(const LoadInst *LI, const DataLayout *TD){ /// SymbolicallyEvaluateBinop - One of Op0/Op1 is a constant expression. /// Attempt to symbolically evaluate the result of a binary operator merging -/// these together. If target data info is available, it is provided as TD, +/// these together. If target data info is available, it is provided as TD, /// otherwise TD is null. static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, Constant *Op1, const DataLayout *TD){ // SROA - + // Fold (and 0xffffffff00000000, (shl x, 32)) -> shl. // Fold (lshr (or X, Y), 32) -> (lshr [X/Y], 32) if one doesn't contribute // bits. - - + + // If the constant expr is something like &A[123] - &A[4].f, fold this into a // constant. This happens frequently when iterating over a global array. if (Opc == Instruction::Sub && TD) { GlobalValue *GV1, *GV2; int64_t Offs1, Offs2; - + if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, *TD)) if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, *TD) && GV1 == GV2) { @@ -564,7 +564,7 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, return ConstantInt::get(Op0->getType(), Offs1-Offs2); } } - + return 0; } @@ -575,7 +575,7 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops, Type *ResultTy, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TD) return 0; - Type *IntPtrTy = TD->getIntPtrType(ResultTy->getContext()); + Type *IntPtrTy = TD->getIntPtrType(ResultTy); bool Any = false; SmallVector<Constant*, 32> NewIdxs; @@ -628,14 +628,15 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, if (!TD || !cast<PointerType>(Ptr->getType())->getElementType()->isSized() || !Ptr->getType()->isPointerTy()) return 0; - - Type *IntPtrTy = TD->getIntPtrType(Ptr->getContext()); + + unsigned AS = cast<PointerType>(Ptr->getType())->getAddressSpace(); + Type *IntPtrTy = TD->getIntPtrType(Ptr->getContext(), AS); // If this is a constant expr gep that is effectively computing an // "offsetof", fold it into 'cast int Size to T*' instead of 'gep 0, 0, 12' for (unsigned i = 1, e = Ops.size(); i != e; ++i) if (!isa<ConstantInt>(Ops[i])) { - + // If this is "gep i8* Ptr, (sub 0, V)", fold this as: // "inttoptr (sub (ptrtoint Ptr), V)" if (Ops.size() == 2 && @@ -702,6 +703,8 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, // Also, this helps GlobalOpt do SROA on GlobalVariables. Type *Ty = Ptr->getType(); assert(Ty->isPointerTy() && "Forming regular GEP of non-pointer type"); + assert(Ty->getPointerAddressSpace() == AS + && "Operand and result of GEP should be in the same address space."); SmallVector<Constant*, 32> NewIdxs; do { if (SequentialType *ATy = dyn_cast<SequentialType>(Ty)) { @@ -709,15 +712,15 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, // The only pointer indexing we'll do is on the first index of the GEP. if (!NewIdxs.empty()) break; - + // Only handle pointers to sized types, not pointers to functions. if (!ATy->getElementType()->isSized()) return 0; } - + // Determine which element of the array the offset points into. APInt ElemSize(BitWidth, TD->getTypeAllocSize(ATy->getElementType())); - IntegerType *IntPtrTy = TD->getIntPtrType(Ty->getContext()); + IntegerType *IntPtrTy = TD->getIntPtrType(Ty->getContext(), AS); if (ElemSize == 0) // The element size is 0. This may be [0 x Ty]*, so just use a zero // index for this level and proceed to the next level to see if it can @@ -837,7 +840,7 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, if (const CmpInst *CI = dyn_cast<CmpInst>(I)) return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1], TD, TLI); - + if (const LoadInst *LI = dyn_cast<LoadInst>(I)) return ConstantFoldLoadInst(LI, TD); @@ -887,19 +890,19 @@ Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE, /// information, due to only being passed an opcode and operands. Constant /// folding using this function strips this information. /// -Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, +Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, ArrayRef<Constant *> Ops, const DataLayout *TD, - const TargetLibraryInfo *TLI) { + const TargetLibraryInfo *TLI) { // Handle easy binops first. if (Instruction::isBinaryOp(Opcode)) { if (isa<ConstantExpr>(Ops[0]) || isa<ConstantExpr>(Ops[1])) if (Constant *C = SymbolicallyEvaluateBinop(Opcode, Ops[0], Ops[1], TD)) return C; - + return ConstantExpr::get(Opcode, Ops[0], Ops[1]); } - + switch (Opcode) { default: return 0; case Instruction::ICmp: @@ -918,7 +921,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, unsigned InWidth = Input->getType()->getScalarSizeInBits(); unsigned AS = cast<PointerType>(CE->getType())->getAddressSpace(); if (TD->getPointerSizeInBits(AS) < InWidth) { - Constant *Mask = + Constant *Mask = ConstantInt::get(CE->getContext(), APInt::getLowBitsSet(InWidth, TD->getPointerSizeInBits(AS))); Input = ConstantExpr::getAnd(Input, Mask); @@ -934,8 +937,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, // pointer, so it can't be done in ConstantExpr::getCast. if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) if (TD && CE->getOpcode() == Instruction::PtrToInt && - TD->getPointerSizeInBits( - cast<PointerType>(CE->getOperand(0)->getType())->getAddressSpace()) + TD->getTypeSizeInBits(CE->getOperand(0)->getType()) <= CE->getType()->getScalarSizeInBits()) return FoldBitCast(CE->getOperand(0), DestTy, *TD); @@ -967,7 +969,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, return C; if (Constant *C = SymbolicallyEvaluateGEP(Ops, DestTy, TD, TLI)) return C; - + return ConstantExpr::getGetElementPtr(Ops[0], Ops.slice(1)); } } @@ -977,7 +979,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, /// returns a constant expression of the specified operands. /// Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, - Constant *Ops0, Constant *Ops1, + Constant *Ops0, Constant *Ops1, const DataLayout *TD, const TargetLibraryInfo *TLI) { // fold: icmp (inttoptr x), null -> icmp x, 0 @@ -988,9 +990,10 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, // ConstantExpr::getCompare cannot do this, because it doesn't have TD // around to know if bit truncation is happening. if (ConstantExpr *CE0 = dyn_cast<ConstantExpr>(Ops0)) { + Type *IntPtrTy = NULL; if (TD && Ops1->isNullValue()) { - Type *IntPtrTy = TD->getIntPtrType(CE0->getContext()); if (CE0->getOpcode() == Instruction::IntToPtr) { + IntPtrTy = TD->getIntPtrType(CE0->getType()); // Convert the integer value to the right size to ensure we get the // proper extension or truncation. Constant *C = ConstantExpr::getIntegerCast(CE0->getOperand(0), @@ -998,22 +1001,24 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, Constant *Null = Constant::getNullValue(C->getType()); return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI); } - + // Only do this transformation if the int is intptrty in size, otherwise // there is a truncation or extension that we aren't modeling. - if (CE0->getOpcode() == Instruction::PtrToInt && - CE0->getType() == IntPtrTy) { - Constant *C = CE0->getOperand(0); - Constant *Null = Constant::getNullValue(C->getType()); - return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI); + if (CE0->getOpcode() == Instruction::PtrToInt) { + IntPtrTy = TD->getIntPtrType(CE0->getOperand(0)->getType()); + if (CE0->getType() == IntPtrTy) { + Constant *C = CE0->getOperand(0); + Constant *Null = Constant::getNullValue(C->getType()); + return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI); + } } } - + if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(Ops1)) { if (TD && CE0->getOpcode() == CE1->getOpcode()) { - Type *IntPtrTy = TD->getIntPtrType(CE0->getContext()); if (CE0->getOpcode() == Instruction::IntToPtr) { + Type *IntPtrTy = TD->getIntPtrType(CE0->getType()); // Convert the integer value to the right size to ensure we get the // proper extension or truncation. Constant *C0 = ConstantExpr::getIntegerCast(CE0->getOperand(0), @@ -1022,34 +1027,36 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, IntPtrTy, false); return ConstantFoldCompareInstOperands(Predicate, C0, C1, TD, TLI); } + } - // Only do this transformation if the int is intptrty in size, otherwise - // there is a truncation or extension that we aren't modeling. - if ((CE0->getOpcode() == Instruction::PtrToInt && - CE0->getType() == IntPtrTy && - CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType())) + // Only do this transformation if the int is intptrty in size, otherwise + // there is a truncation or extension that we aren't modeling. + if (CE0->getOpcode() == Instruction::PtrToInt) { + IntPtrTy = TD->getIntPtrType(CE0->getOperand(0)->getType()); + if (CE0->getType() == IntPtrTy && + CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType()) return ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0), - CE1->getOperand(0), TD, TLI); + CE1->getOperand(0), TD, TLI); } } - + // icmp eq (or x, y), 0 -> (icmp eq x, 0) & (icmp eq y, 0) // icmp ne (or x, y), 0 -> (icmp ne x, 0) | (icmp ne y, 0) if ((Predicate == ICmpInst::ICMP_EQ || Predicate == ICmpInst::ICMP_NE) && CE0->getOpcode() == Instruction::Or && Ops1->isNullValue()) { - Constant *LHS = + Constant *LHS = ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0), Ops1, TD, TLI); - Constant *RHS = + Constant *RHS = ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(1), Ops1, TD, TLI); - unsigned OpC = + unsigned OpC = Predicate == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or; Constant *Ops[] = { LHS, RHS }; return ConstantFoldInstOperands(OpC, LHS->getType(), Ops, TD, TLI); } } - + return ConstantExpr::getCompare(Predicate, Ops0, Ops1); } @@ -1057,7 +1064,7 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, /// ConstantFoldLoadThroughGEPConstantExpr - Given a constant and a /// getelementptr constantexpr, return the constant value being addressed by the /// constant expression, or null if something is funny and we can't decide. -Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C, +Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C, ConstantExpr *CE) { if (!CE->getOperand(1)->isNullValue()) return 0; // Do not allow stepping over the value! @@ -1127,14 +1134,14 @@ llvm::canConstantFoldCallTo(const Function *F) { if (!F->hasName()) return false; StringRef Name = F->getName(); - + // In these cases, the check of the length is required. We don't want to // return true for a name like "cos\0blah" which strcmp would return equal to // "cos", but has length 8. switch (Name[0]) { default: return false; case 'a': - return Name == "acos" || Name == "asin" || + return Name == "acos" || Name == "asin" || Name == "atan" || Name == "atan2"; case 'c': return Name == "cos" || Name == "ceil" || Name == "cosf" || Name == "cosh"; @@ -1154,7 +1161,7 @@ llvm::canConstantFoldCallTo(const Function *F) { } } -static Constant *ConstantFoldFP(double (*NativeFP)(double), double V, +static Constant *ConstantFoldFP(double (*NativeFP)(double), double V, Type *Ty) { sys::llvm_fenv_clearexcept(); V = NativeFP(V); @@ -1162,7 +1169,7 @@ static Constant *ConstantFoldFP(double (*NativeFP)(double), double V, sys::llvm_fenv_clearexcept(); return 0; } - + if (Ty->isFloatTy()) return ConstantFP::get(Ty->getContext(), APFloat((float)V)); if (Ty->isDoubleTy()) @@ -1178,7 +1185,7 @@ static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), sys::llvm_fenv_clearexcept(); return 0; } - + if (Ty->isFloatTy()) return ConstantFP::get(Ty->getContext(), APFloat((float)V)); if (Ty->isDoubleTy()) @@ -1272,7 +1279,7 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands, case 'e': if (Name == "exp" && TLI->has(LibFunc::exp)) return ConstantFoldFP(exp, V, Ty); - + if (Name == "exp2" && TLI->has(LibFunc::exp2)) { // Constant fold exp2(x) as pow(2,x) in case the host doesn't have a // C99 library. @@ -1348,7 +1355,7 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands, } // Support ConstantVector in case we have an Undef in the top. - if (isa<ConstantVector>(Operands[0]) || + if (isa<ConstantVector>(Operands[0]) || isa<ConstantDataVector>(Operands[0])) { Constant *Op = cast<Constant>(Operands[0]); switch (F->getIntrinsicID()) { @@ -1367,11 +1374,11 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands, case Intrinsic::x86_sse2_cvttsd2si64: if (ConstantFP *FPOp = dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U))) - return ConstantFoldConvertToInt(FPOp->getValueAPF(), + return ConstantFoldConvertToInt(FPOp->getValueAPF(), /*roundTowardZero=*/true, Ty); } } - + if (isa<UndefValue>(Operands[0])) { if (F->getIntrinsicID() == Intrinsic::bswap) return Operands[0]; @@ -1385,14 +1392,14 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands, if (ConstantFP *Op1 = dyn_cast<ConstantFP>(Operands[0])) { if (!Ty->isFloatTy() && !Ty->isDoubleTy()) return 0; - double Op1V = Ty->isFloatTy() ? + double Op1V = Ty->isFloatTy() ? (double)Op1->getValueAPF().convertToFloat() : Op1->getValueAPF().convertToDouble(); if (ConstantFP *Op2 = dyn_cast<ConstantFP>(Operands[1])) { if (Op2->getType() != Op1->getType()) return 0; - double Op2V = Ty->isFloatTy() ? + double Op2V = Ty->isFloatTy() ? (double)Op2->getValueAPF().convertToFloat(): Op2->getValueAPF().convertToDouble(); @@ -1419,7 +1426,7 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands, } return 0; } - + if (ConstantInt *Op1 = dyn_cast<ConstantInt>(Operands[0])) { if (ConstantInt *Op2 = dyn_cast<ConstantInt>(Operands[1])) { switch (F->getIntrinsicID()) { @@ -1469,7 +1476,7 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands, return ConstantInt::get(Ty, Op1->getValue().countLeadingZeros()); } } - + return 0; } return 0; diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp index 016fe396e7..f97f0f2de6 100644 --- a/lib/Analysis/DependenceAnalysis.cpp +++ b/lib/Analysis/DependenceAnalysis.cpp @@ -55,12 +55,16 @@ #include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Instructions.h" #include "llvm/Operator.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/InstIterator.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index 95e58022ca..64e183d60c 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -788,7 +788,7 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) { assert(V->getType()->isPointerTy() && "Unexpected operand type!"); } while (Visited.insert(V)); - Type *IntPtrTy = TD->getIntPtrType(V->getContext()); + Type *IntPtrTy = TD->getIntPtrType(V->getType()); return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset)); } @@ -828,8 +828,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { // size of the byval type by the target's pointer size. PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType()); unsigned TypeSize = TD->getTypeSizeInBits(PTy->getElementType()); - unsigned AS = PTy->getAddressSpace(); - unsigned PointerSize = TD->getPointerSizeInBits(AS); + unsigned PointerSize = TD->getTypeSizeInBits(PTy); // Ceiling division. unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize; diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 8e326122fa..7ef74f67ce 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -728,7 +728,7 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout &TD, assert(V->getType()->isPointerTy() && "Unexpected operand type!"); } while (Visited.insert(V)); - Type *IntPtrTy = TD.getIntPtrType(V->getContext()); + Type *IntPtrTy = TD.getIntPtrType(V->getContext(), AS); return ConstantInt::get(IntPtrTy, Offset); } @@ -1880,9 +1880,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // Turn icmp (ptrtoint x), (ptrtoint/constant) into a compare of the input // if the integer type is the same size as the pointer type. if (MaxRecurse && Q.TD && isa<PtrToIntInst>(LI) && - Q.TD->getPointerSizeInBits( - cast<PtrToIntInst>(LI)->getPointerAddressSpace()) == - DstTy->getPrimitiveSizeInBits()) { + Q.TD->getTypeSizeInBits(SrcTy) == DstTy->getPrimitiveSizeInBits()) { if (Constant *RHSC = dyn_cast<Constant>(RHS)) { // Transfer the cast to the constant. if (Value *V = SimplifyICmpInst(Pred, SrcOp, diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp index 6d6d580ed1..d62808e9cd 100644 --- a/lib/Analysis/Lint.cpp +++ b/lib/Analysis/Lint.cpp @@ -626,8 +626,7 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, if (W != V) return findValueImpl(W, OffsetOk, Visited); } else if (CastInst *CI = dyn_cast<CastInst>(V)) { - if (CI->isNoopCast(TD ? TD->getIntPtrType(V->getContext()) : - Type::getInt64Ty(V->getContext()))) + if (CI->isNoopCast(*TD)) return findValueImpl(CI->getOperand(0), OffsetOk, Visited); } else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) { if (Value *W = FindInsertedValue(Ex->getAggregateOperand(), @@ -640,7 +639,7 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, if (CastInst::isNoopCast(Instruction::CastOps(CE->getOpcode()), CE->getOperand(0)->getType(), CE->getType(), - TD ? TD->getIntPtrType(V->getContext()) : + TD ? TD->getIntPtrType(CE->getType()) : Type::getInt64Ty(V->getContext()))) return findValueImpl(CE->getOperand(0), OffsetOk, Visited); } else if (CE->getOpcode() == Instruction::ExtractValue) { diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp index 0a539fe758..8d903c63af 100644 --- a/lib/Analysis/MemoryBuiltins.cpp +++ b/lib/Analysis/MemoryBuiltins.cpp @@ -376,9 +376,10 @@ APInt ObjectSizeOffsetVisitor::align(APInt Size, uint64_t Align) { ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout *TD, const TargetLibraryInfo *TLI, LLVMContext &Context, - bool RoundToAlign) + bool RoundToAlign, + unsigned AS) : TD(TD), TLI(TLI), RoundToAlign(RoundToAlign) { - IntegerType *IntTy = TD->getIntPtrType(Context); + IntegerType *IntTy = TD->getIntPtrType(Context, AS); IntTyBits = IntTy->getBitWidth(); Zero = APInt::getNullValue(IntTyBits); } @@ -561,9 +562,10 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitInstruction(Instruction &I) { ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator(const DataLayout *TD, const TargetLibraryInfo *TLI, - LLVMContext &Context) + LLVMContext &Context, + unsigned AS) : TD(TD), TLI(TLI), Context(Context), Builder(Context, TargetFolder(TD)) { - IntTy = TD->getIntPtrType(Context); + IntTy = TD->getIntPtrType(Context, AS); Zero = ConstantInt::get(IntTy, 0); } diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index 9316df6fbf..9872890494 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -983,7 +983,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end(); I != E; ++I) { Visited.insert(std::make_pair(I->getBB(), Addr)); - if (!I->getResult().isNonLocal()) + if (!I->getResult().isNonLocal() && DT->isReachableFromEntry(I->getBB())) Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(), Addr)); } ++NumCacheCompleteNonLocalPtr; @@ -1029,7 +1029,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, NumSortedEntries); // If we got a Def or Clobber, add this to the list of results. - if (!Dep.isNonLocal()) { + if (!Dep.isNonLocal() && DT->isReachableFromEntry(BB)) { Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr())); continue; } diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 5400646be1..148912b766 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -2581,13 +2581,12 @@ const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS, return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS))); } -const SCEV *ScalarEvolution::getSizeOfExpr(Type *AllocTy) { +const SCEV *ScalarEvolution::getSizeOfExpr(Type *AllocTy, Type *IntPtrTy) { // If we have DataLayout, we can bypass creating a target-independent // constant expression and then folding it back into a ConstantInt. // This is just a compile-time optimization. if (TD) - return getConstant(TD->getIntPtrType(getContext()), - TD->getTypeAllocSize(AllocTy)); + return getConstant(IntPtrTy, TD->getTypeAllocSize(AllocTy)); Constant *C = ConstantExpr::getSizeOf(AllocTy); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) @@ -2606,13 +2605,13 @@ const SCEV *ScalarEvolution::getAlignOfExpr(Type *AllocTy) { return getTruncateOrZeroExtend(getSCEV(C), Ty); } -const SCEV *ScalarEvolution::getOffsetOfExpr(StructType *STy, +const SCEV *ScalarEvolution::getOffsetOfExpr(StructType *STy, Type *IntPtrTy, unsigned FieldNo) { // If we have DataLayout, we can bypass creating a target-independent // constant expression and then folding it back into a ConstantInt. // This is just a compile-time optimization. if (TD) - return getConstant(TD->getIntPtrType(getContext()), + return getConstant(IntPtrTy, TD->getStructLayout(STy)->getElementOffset(FieldNo)); Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo); @@ -2699,7 +2698,7 @@ Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const { // The only other support type is pointer. assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!"); - if (TD) return TD->getIntPtrType(getContext()); + if (TD) return TD->getIntPtrType(Ty); // Without DataLayout, conservatively assume pointers are 64-bit. return Type::getInt64Ty(getContext()); @@ -3152,13 +3151,13 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { if (StructType *STy = dyn_cast<StructType>(*GTI++)) { // For a struct, add the member offset. unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue(); - const SCEV *FieldOffset = getOffsetOfExpr(STy, FieldNo); + const SCEV *FieldOffset = getOffsetOfExpr(STy, IntPtrTy, FieldNo); // Add the field offset to the running total offset. TotalOffset = getAddExpr(TotalOffset, FieldOffset); } else { // For an array, add the element offset, explicitly scaled. - const SCEV *ElementSize = getSizeOfExpr(*GTI); + const SCEV *ElementSize = getSizeOfExpr(*GTI, IntPtrTy); const SCEV *IndexS = getSCEV(Index); // Getelementptr indices are signed. IndexS = getTruncateOrSignExtend(IndexS, IntPtrTy); @@ -3980,8 +3979,11 @@ getSmallConstantTripMultiple(Loop *L, BasicBlock *ExitingBlock) { ConstantInt *Result = MulC->getValue(); - // Guard against huge trip counts. - if (!Result || Result->getValue().getActiveBits() > 32) + // Guard against huge trip counts (this requires checking + // for zero to handle the case where the trip count == -1 and the + // addition wraps). + if (!Result || Result->getValue().getActiveBits() > 32 || + Result->getValue().getActiveBits() == 0) return 1; return (unsigned)Result->getZExtValue(); diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index 111bfb4a6a..0295da5e4a 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -417,7 +417,9 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, // array indexing. SmallVector<const SCEV *, 8> ScaledOps; if (ElTy->isSized()) { - const SCEV *ElSize = SE.getSizeOfExpr(ElTy); + Type *IntPtrTy = SE.TD ? SE.TD->getIntPtrType(PTy) : + IntegerType::getInt64Ty(PTy->getContext()); + const SCEV *ElSize = SE.getSizeOfExpr(ElTy, IntPtrTy); if (!ElSize->isZero()) { SmallVector<const SCEV *, 8> NewOps; for (unsigned i = 0, e = Ops.size(); i != e; ++i) { diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 464dfd51d6..91f973d8d3 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -527,6 +527,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(ptx_device); KEYWORD(spir_kernel); KEYWORD(spir_func); + KEYWORD(intel_ocl_bicc); KEYWORD(cc); KEYWORD(c); diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index cc7cc31246..75fc16cd95 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -1094,6 +1094,7 @@ bool LLParser::ParseOptionalVisibility(unsigned &Res) { /// ::= /*empty*/ /// ::= 'ccc' /// ::= 'fastcc' +/// ::= 'kw_intel_ocl_bicc' /// ::= 'coldcc' /// ::= 'x86_stdcallcc' /// ::= 'x86_fastcallcc' @@ -1125,6 +1126,7 @@ bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) { case lltok::kw_ptx_device: CC = CallingConv::PTX_Device; break; case lltok::kw_spir_kernel: CC = CallingConv::SPIR_KERNEL; break; case lltok::kw_spir_func: CC = CallingConv::SPIR_FUNC; break; + case lltok::kw_intel_ocl_bicc: CC = CallingConv::Intel_OCL_BI; break; case lltok::kw_cc: { unsigned ArbitraryCC; Lex.Lex(); diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index ff6d68f0da..6cffc52d17 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -77,6 +77,7 @@ namespace lltok { kw_c, kw_cc, kw_ccc, kw_fastcc, kw_coldcc, + kw_intel_ocl_bicc, kw_x86_stdcallcc, kw_x86_fastcallcc, kw_x86_thiscallcc, kw_arm_apcscc, kw_arm_aapcscc, kw_arm_aapcs_vfpcc, kw_msp430_intrcc, diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 788a89bf13..81eec3c9ac 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -401,8 +401,8 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // - __tlv_bootstrap - used to make sure support exists // - spare pointer, used when mapped by the runtime // - pointer to mangled symbol above with initializer - unsigned AS = GV->getType()->getAddressSpace(); - unsigned PtrSize = TD->getPointerSizeInBits(AS)/8; + assert(GV->getType()->isPointerTy() && "GV must be a pointer type!"); + unsigned PtrSize = TD->getTypeSizeInBits(GV->getType())/8; OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"), PtrSize, 0); OutStreamer.EmitIntValue(0, PtrSize, 0); @@ -1538,9 +1538,9 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { if (Offset == 0) return Base; - unsigned AS = cast<PointerType>(CE->getType())->getAddressSpace(); + assert(CE->getType()->isPointerTy() && "We must have a pointer type!"); // Truncate/sext the offset to the pointer size. - unsigned Width = TD.getPointerSizeInBits(AS); + unsigned Width = TD.getTypeSizeInBits(CE->getType()); if (Width < 64) Offset = SignExtend64(Offset, Width); @@ -1562,7 +1562,7 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { // Handle casts to pointers by changing them into casts to the appropriate // integer type. This promotes constant folding and simplifies this code. Constant *Op = CE->getOperand(0); - Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()), + Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CE->getType()), false/*ZExt*/); return lowerConstant(Op, AP); } diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp index 7f28828a5d..d5d84041b6 100644 --- a/lib/CodeGen/EarlyIfConversion.cpp +++ b/lib/CodeGen/EarlyIfConversion.cpp @@ -797,6 +797,5 @@ bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) { if (tryConvertIf(I->getBlock())) Changed = true; - MF.verify(this, "After early if-conversion"); return Changed; } diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp index eab10f2fd4..35f9e270dd 100644 --- a/lib/CodeGen/IntrinsicLowering.cpp +++ b/lib/CodeGen/IntrinsicLowering.cpp @@ -155,21 +155,21 @@ void IntrinsicLowering::AddPrototypes(Module &M) { Type::getInt8PtrTy(Context), Type::getInt8PtrTy(Context), Type::getInt8PtrTy(Context), - TD.getIntPtrType(Context), (Type *)0); + TD.getIntPtrType(Context, 0), (Type *)0); break; case Intrinsic::memmove: M.getOrInsertFunction("memmove", Type::getInt8PtrTy(Context), Type::getInt8PtrTy(Context), Type::getInt8PtrTy(Context), - TD.getIntPtrType(Context), (Type *)0); + TD.getIntPtrType(Context, 0), (Type *)0); break; case Intrinsic::memset: M.getOrInsertFunction("memset", Type::getInt8PtrTy(Context), Type::getInt8PtrTy(Context), Type::getInt32Ty(M.getContext()), - TD.getIntPtrType(Context), (Type *)0); + TD.getIntPtrType(Context, 0), (Type *)0); break; case Intrinsic::sqrt: EnsureFPIntrinsicsExist(M, I, "sqrtf", "sqrt", "sqrtl"); @@ -497,7 +497,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { break; // Strip out annotate intrinsic case Intrinsic::memcpy: { - IntegerType *IntPtr = TD.getIntPtrType(Context); + IntegerType *IntPtr = TD.getIntPtrType(CI->getArgOperand(0)->getType()); Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, /* isSigned */ false); Value *Ops[3]; @@ -508,7 +508,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { break; } case Intrinsic::memmove: { - IntegerType *IntPtr = TD.getIntPtrType(Context); + IntegerType *IntPtr = TD.getIntPtrType(CI->getArgOperand(0)->getType()); Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, /* isSigned */ false); Value *Ops[3]; @@ -519,7 +519,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { break; } case Intrinsic::memset: { - IntegerType *IntPtr = TD.getIntPtrType(Context); + IntegerType *IntPtr = TD.getIntPtrType(CI->getArgOperand(0)->getType()); Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, /* isSigned */ false); Value *Ops[3]; diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index ce5f414597..0a85179293 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -270,6 +270,8 @@ namespace { SDValue ReduceLoadWidth(SDNode *N); SDValue ReduceLoadOpStoreWidth(SDNode *N); SDValue TransformFPLoadStorePair(SDNode *N); + SDValue reduceBuildVecExtToExtBuildVec(SDNode *N); + SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N); SDValue GetDemandedBits(SDValue V, const APInt &Mask); @@ -8356,15 +8358,21 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { return SDValue(); } -SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { +// Simplify (build_vec (ext )) to (bitcast (build_vec )) +SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { + // We perform this optimization post type-legalization because + // the type-legalizer often scalarizes integer-promoted vectors. + // Performing this optimization before may create bit-casts which + // will be type-legalized to complex code sequences. + // We perform this optimization only before the operation legalizer because we + // may introduce illegal operations. + if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes) + return SDValue(); + unsigned NumInScalars = N->getNumOperands(); DebugLoc dl = N->getDebugLoc(); EVT VT = N->getValueType(0); - // A vector built entirely of undefs is undef. - if (ISD::allOperandsUndef(N)) - return DAG.getUNDEF(VT); - // Check to see if this is a BUILD_VECTOR of a bunch of values // which come from any_extend or zero_extend nodes. If so, we can create // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR @@ -8407,64 +8415,141 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { // In order to have valid types, all of the inputs must be extended from the // same source type and all of the inputs must be any or zero extend. // Scalar sizes must be a power of two. - EVT OutScalarTy = N->getValueType(0).getScalarType(); + EVT OutScalarTy = VT.getScalarType(); bool ValidTypes = SourceType != MVT::Other && isPowerOf2_32(OutScalarTy.getSizeInBits()) && isPowerOf2_32(SourceType.getSizeInBits()); - // We perform this optimization post type-legalization because - // the type-legalizer often scalarizes integer-promoted vectors. - // Performing this optimization before may create bit-casts which - // will be type-legalized to complex code sequences. - // We perform this optimization only before the operation legalizer because we - // may introduce illegal operations. // Create a new simpler BUILD_VECTOR sequence which other optimizations can // turn into a single shuffle instruction. - if ((Level == AfterLegalizeVectorOps || Level == AfterLegalizeTypes) && - ValidTypes) { - bool isLE = TLI.isLittleEndian(); - unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits(); - assert(ElemRatio > 1 && "Invalid element size ratio"); - SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType): - DAG.getConstant(0, SourceType); - - unsigned NewBVElems = ElemRatio * N->getValueType(0).getVectorNumElements(); - SmallVector<SDValue, 8> Ops(NewBVElems, Filler); - - // Populate the new build_vector - for (unsigned i=0; i < N->getNumOperands(); ++i) { - SDValue Cast = N->getOperand(i); - assert((Cast.getOpcode() == ISD::ANY_EXTEND || - Cast.getOpcode() == ISD::ZERO_EXTEND || - Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode"); - SDValue In; - if (Cast.getOpcode() == ISD::UNDEF) - In = DAG.getUNDEF(SourceType); - else - In = Cast->getOperand(0); - unsigned Index = isLE ? (i * ElemRatio) : - (i * ElemRatio + (ElemRatio - 1)); + if (!ValidTypes) + return SDValue(); + + bool isLE = TLI.isLittleEndian(); + unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits(); + assert(ElemRatio > 1 && "Invalid element size ratio"); + SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType): + DAG.getConstant(0, SourceType); + + unsigned NewBVElems = ElemRatio * VT.getVectorNumElements(); + SmallVector<SDValue, 8> Ops(NewBVElems, Filler); + + // Populate the new build_vector + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + SDValue Cast = N->getOperand(i); + assert((Cast.getOpcode() == ISD::ANY_EXTEND || + Cast.getOpcode() == ISD::ZERO_EXTEND || + Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode"); + SDValue In; + if (Cast.getOpcode() == ISD::UNDEF) + In = DAG.getUNDEF(SourceType); + else + In = Cast->getOperand(0); + unsigned Index = isLE ? (i * ElemRatio) : + (i * ElemRatio + (ElemRatio - 1)); + + assert(Index < Ops.size() && "Invalid index"); + Ops[Index] = In; + } + + // The type of the new BUILD_VECTOR node. + EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems); + assert(VecVT.getSizeInBits() == VT.getSizeInBits() && + "Invalid vector size"); + // Check if the new vector type is legal. + if (!isTypeLegal(VecVT)) return SDValue(); + + // Make the new BUILD_VECTOR. + SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], Ops.size()); + + // The new BUILD_VECTOR node has the potential to be further optimized. + AddToWorkList(BV.getNode()); + // Bitcast to the desired type. + return DAG.getNode(ISD::BITCAST, dl, VT, BV); +} + +SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { + EVT VT = N->getValueType(0); - assert(Index < Ops.size() && "Invalid index"); - Ops[Index] = In; + unsigned NumInScalars = N->getNumOperands(); + DebugLoc dl = N->getDebugLoc(); + + EVT SrcVT = MVT::Other; + unsigned Opcode = ISD::DELETED_NODE; + unsigned NumDefs = 0; + + for (unsigned i = 0; i != NumInScalars; ++i) { + SDValue In = N->getOperand(i); + unsigned Opc = In.getOpcode(); + + if (Opc == ISD::UNDEF) + continue; + + // If all scalar values are floats and converted from integers. + if (Opcode == ISD::DELETED_NODE && + (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) { + Opcode = Opc; + // If not supported by target, bail out. + if (TLI.getOperationAction(Opcode, VT) != TargetLowering::Legal && + TLI.getOperationAction(Opcode, VT) != TargetLowering::Custom) + return SDValue(); } + if (Opc != Opcode) + return SDValue(); + + EVT InVT = In.getOperand(0).getValueType(); + + // If all scalar values are typed differently, bail out. It's chosen to + // simplify BUILD_VECTOR of integer types. + if (SrcVT == MVT::Other) + SrcVT = InVT; + if (SrcVT != InVT) + return SDValue(); + NumDefs++; + } + + // If the vector has just one element defined, it's not worth to fold it into + // a vectorized one. + if (NumDefs < 2) + return SDValue(); - // The type of the new BUILD_VECTOR node. - EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems); - assert(VecVT.getSizeInBits() == N->getValueType(0).getSizeInBits() && - "Invalid vector size"); - // Check if the new vector type is legal. - if (!isTypeLegal(VecVT)) return SDValue(); + assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP) + && "Should only handle conversion from integer to float."); + assert(SrcVT != MVT::Other && "Cannot determine source type!"); - // Make the new BUILD_VECTOR. - SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), - VecVT, &Ops[0], Ops.size()); + EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars); + SmallVector<SDValue, 8> Opnds; + for (unsigned i = 0; i != NumInScalars; ++i) { + SDValue In = N->getOperand(i); - // The new BUILD_VECTOR node has the potential to be further optimized. - AddToWorkList(BV.getNode()); - // Bitcast to the desired type. - return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), BV); + if (In.getOpcode() == ISD::UNDEF) + Opnds.push_back(DAG.getUNDEF(SrcVT)); + else + Opnds.push_back(In.getOperand(0)); } + SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, + &Opnds[0], Opnds.size()); + AddToWorkList(BV.getNode()); + + return DAG.getNode(Opcode, dl, VT, BV); +} + +SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { + unsigned NumInScalars = N->getNumOperands(); + DebugLoc dl = N->getDebugLoc(); + EVT VT = N->getValueType(0); + + // A vector built entirely of undefs is undef. + if (ISD::allOperandsUndef(N)) + return DAG.getUNDEF(VT); + + SDValue V = reduceBuildVecExtToExtBuildVec(N); + if (V.getNode()) + return V; + + V = reduceBuildVecConvertToConvertBuildVec(N); + if (V.getNode()) + return V; // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from @@ -8549,7 +8634,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { return SDValue(); // Widen the input vector by adding undef values. - VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, + VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1, DAG.getUNDEF(VecIn1.getValueType())); } @@ -8570,7 +8655,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { SDValue Ops[2]; Ops[0] = VecIn1; Ops[1] = VecIn2; - return DAG.getVectorShuffle(VT, N->getDebugLoc(), Ops[0], Ops[1], &Mask[0]); + return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], &Mask[0]); } return SDValue(); diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 4854cf7b26..2ddc07cc63 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -101,8 +101,7 @@ bool FastISel::hasTrivialKill(const Value *V) const { // No-op casts are trivially coalesced by fast-isel. if (const CastInst *Cast = dyn_cast<CastInst>(I)) - if (Cast->isNoopCast(TD.getIntPtrType(Cast->getContext())) && - !hasTrivialKill(Cast->getOperand(0))) + if (Cast->isNoopCast(TD) && !hasTrivialKill(Cast->getOperand(0))) return false; // GEPs with all zero indices are trivially coalesced by fast-isel. @@ -175,7 +174,7 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { // Translate this as an integer zero so that it can be // local-CSE'd with actual integer zeros. Reg = - getRegForValue(Constant::getNullValue(TD.getIntPtrType(V->getContext()))); + getRegForValue(Constant::getNullValue(TD.getIntPtrType(V->getType()))); } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { if (CF->isNullValue()) { Reg = TargetMaterializeFloatZero(CF); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 183416f3fd..d661971bb8 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3804,7 +3804,8 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, // Emit a library call. TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - Entry.Ty = TLI.getDataLayout()->getIntPtrType(*getContext()); + unsigned AS = SrcPtrInfo.getAddrSpace(); + Entry.Ty = TLI.getDataLayout()->getIntPtrType(*getContext(), AS); Entry.Node = Dst; Args.push_back(Entry); Entry.Node = Src; Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); @@ -3859,7 +3860,8 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, // Emit a library call. TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - Entry.Ty = TLI.getDataLayout()->getIntPtrType(*getContext()); + unsigned AS = SrcPtrInfo.getAddrSpace(); + Entry.Ty = TLI.getDataLayout()->getIntPtrType(*getContext(), AS); Entry.Node = Dst; Args.push_back(Entry); Entry.Node = Src; Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); @@ -3908,7 +3910,8 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, return Result; // Emit a library call. - Type *IntPtrTy = TLI.getDataLayout()->getIntPtrType(*getContext()); + unsigned AS = DstPtrInfo.getAddrSpace(); + Type *IntPtrTy = TLI.getDataLayout()->getIntPtrType(*getContext(), AS); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = Dst; Entry.Ty = IntPtrTy; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 30a3fc2ca5..6c9d001a1f 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -2604,14 +2604,14 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB; // Update machine-CFG edges with unique successors. - SmallVector<BasicBlock*, 32> succs; - succs.reserve(I.getNumSuccessors()); - for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) - succs.push_back(I.getSuccessor(i)); - array_pod_sort(succs.begin(), succs.end()); - succs.erase(std::unique(succs.begin(), succs.end()), succs.end()); - for (unsigned i = 0, e = succs.size(); i != e; ++i) { - MachineBasicBlock *Succ = FuncInfo.MBBMap[succs[i]]; + SmallSet<BasicBlock*, 32> Done; + for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) { + BasicBlock *BB = I.getSuccessor(i); + bool Inserted = Done.insert(BB); + if (!Inserted) + continue; + + MachineBasicBlock *Succ = FuncInfo.MBBMap[BB]; addSuccessorWithWeight(IndirectBrMBB, Succ); } diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp index 94a2542e7a..99f6ec691a 100644 --- a/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/lib/ExecutionEngine/ExecutionEngine.cpp @@ -645,16 +645,17 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { } case Instruction::PtrToInt: { GenericValue GV = getConstantValue(Op0); - unsigned AS = cast<PointerType>(CE->getOperand(1)->getType()) - ->getAddressSpace(); - uint32_t PtrWidth = TD->getPointerSizeInBits(AS); + assert(CE->getOperand(1)->getType()->isPointerTy() && + "Must be a pointer type!"); + uint32_t PtrWidth = TD->getTypeSizeInBits(CE->getOperand(1)->getType()); GV.IntVal = APInt(PtrWidth, uintptr_t(GV.PointerVal)); return GV; } case Instruction::IntToPtr: { GenericValue GV = getConstantValue(Op0); - unsigned AS = cast<PointerType>(CE->getType())->getAddressSpace(); - uint32_t PtrWidth = TD->getPointerSizeInBits(AS); + assert(CE->getOperand(1)->getType()->isPointerTy() && + "Must be a pointer type!"); + uint32_t PtrWidth = TD->getTypeSizeInBits(CE->getType()); if (PtrWidth != GV.IntVal.getBitWidth()) GV.IntVal = GV.IntVal.zextOrTrunc(PtrWidth); assert(GV.IntVal.getBitWidth() <= 64 && "Bad pointer width"); diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index c1f8baed1a..ff05c82aec 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -190,7 +190,7 @@ void RuntimeDyldImpl::emitCommonSymbols(ObjectImage &Obj, if (!Addr) report_fatal_error("Unable to allocate memory for common symbols!"); uint64_t Offset = 0; - Sections.push_back(SectionEntry(Addr, TotalSize, TotalSize, 0)); + Sections.push_back(SectionEntry(StringRef(), Addr, TotalSize, TotalSize, 0)); memset(Addr, 0, TotalSize); DEBUG(dbgs() << "emitCommonSection SectionID: " << SectionID @@ -233,10 +233,12 @@ unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj, bool IsVirtual; bool IsZeroInit; uint64_t DataSize; + StringRef Name; Check(Section.isRequiredForExecution(IsRequired)); Check(Section.isVirtual(IsVirtual)); Check(Section.isZeroInit(IsZeroInit)); Check(Section.getSize(DataSize)); + Check(Section.getName(Name)); unsigned Allocate; unsigned SectionID = Sections.size(); @@ -264,6 +266,7 @@ unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj, memcpy(Addr, pData, DataSize); DEBUG(dbgs() << "emitSection SectionID: " << SectionID + << " Name: " << Name << " obj addr: " << format("%p", pData) << " new addr: " << format("%p", Addr) << " DataSize: " << DataSize @@ -279,6 +282,7 @@ unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj, Allocate = 0; Addr = 0; DEBUG(dbgs() << "emitSection SectionID: " << SectionID + << " Name: " << Name << " obj addr: " << format("%p", data.data()) << " new addr: 0" << " DataSize: " << DataSize @@ -287,7 +291,8 @@ unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj, << "\n"); } - Sections.push_back(SectionEntry(Addr, Allocate, DataSize,(uintptr_t)pData)); + Sections.push_back(SectionEntry(Name, Addr, Allocate, DataSize, + (uintptr_t)pData)); return SectionID; } @@ -353,6 +358,24 @@ uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr) { StubAddr++; *StubAddr = NopInstr; return Addr; + } else if (Arch == Triple::ppc64) { + // PowerPC64 stub: the address points to a function descriptor + // instead of the function itself. Load the function address + // on r11 and sets it to control register. Also loads the function + // TOC in r2 and environment pointer to r11. + writeInt32BE(Addr, 0x3D800000); // lis r12, highest(addr) + writeInt32BE(Addr+4, 0x618C0000); // ori r12, higher(addr) + writeInt32BE(Addr+8, 0x798C07C6); // sldi r12, r12, 32 + writeInt32BE(Addr+12, 0x658C0000); // oris r12, r12, h(addr) + writeInt32BE(Addr+16, 0x618C0000); // ori r12, r12, l(addr) + writeInt32BE(Addr+20, 0xF8410028); // std r2, 40(r1) + writeInt32BE(Addr+24, 0xE96C0000); // ld r11, 0(r12) + writeInt32BE(Addr+28, 0xE84C0008); // ld r2, 0(r12) + writeInt32BE(Addr+32, 0x7D6903A6); // mtctr r11 + writeInt32BE(Addr+36, 0xE96C0010); // ld r11, 16(r2) + writeInt32BE(Addr+40, 0x4E800420); // bctr + + return Addr; } return Addr; } diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index 08aba64e46..1073c6fc52 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -30,6 +30,14 @@ using namespace llvm::object; namespace { +static inline +error_code check(error_code Err) { + if (Err) { + report_fatal_error(Err.message()); + } + return Err; +} + template<support::endianness target_endianness, bool is64Bits> class DyldELFObject : public ELFObjectFile<target_endianness, is64Bits> { LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits) @@ -340,6 +348,179 @@ void RuntimeDyldELF::resolveMIPSRelocation(uint8_t *LocalAddress, } } +// Return the .TOC. section address to R_PPC64_TOC relocations. +uint64_t RuntimeDyldELF::findPPC64TOC() const { + // The TOC consists of sections .got, .toc, .tocbss, .plt in that + // order. The TOC starts where the first of these sections starts. + SectionList::const_iterator it = Sections.begin(); + SectionList::const_iterator ite = Sections.end(); + for (; it != ite; ++it) { + if (it->Name == ".got" || + it->Name == ".toc" || + it->Name == ".tocbss" || + it->Name == ".plt") + break; + } + if (it == ite) { + // This may happen for + // * references to TOC base base (sym@toc, .odp relocation) without + // a .toc directive. + // In this case just use the first section (which is usually + // the .odp) since the code won't reference the .toc base + // directly. + it = Sections.begin(); + } + assert (it != ite); + // Per the ppc64-elf-linux ABI, The TOC base is TOC value plus 0x8000 + // thus permitting a full 64 Kbytes segment. + return it->LoadAddress + 0x8000; +} + +// Returns the sections and offset associated with the ODP entry referenced +// by Symbol. +void RuntimeDyldELF::findOPDEntrySection(ObjectImage &Obj, + ObjSectionToIDMap &LocalSections, + RelocationValueRef &Rel) { + // Get the ELF symbol value (st_value) to compare with Relocation offset in + // .opd entries + + error_code err; + for (section_iterator si = Obj.begin_sections(), + se = Obj.end_sections(); si != se; si.increment(err)) { + StringRef SectionName; + check(si->getName(SectionName)); + if (SectionName != ".opd") + continue; + + for (relocation_iterator i = si->begin_relocations(), + e = si->end_relocations(); i != e;) { + check(err); + + // The R_PPC64_ADDR64 relocation indicates the first field + // of a .opd entry + uint64_t TypeFunc; + check(i->getType(TypeFunc)); + if (TypeFunc != ELF::R_PPC64_ADDR64) { + i.increment(err); + continue; + } + + SymbolRef TargetSymbol; + uint64_t TargetSymbolOffset; + int64_t TargetAdditionalInfo; + check(i->getSymbol(TargetSymbol)); + check(i->getOffset(TargetSymbolOffset)); + check(i->getAdditionalInfo(TargetAdditionalInfo)); + + i = i.increment(err); + if (i == e) + break; + check(err); + + // Just check if following relocation is a R_PPC64_TOC + uint64_t TypeTOC; + check(i->getType(TypeTOC)); + if (TypeTOC != ELF::R_PPC64_TOC) + continue; + + // Finally compares the Symbol value and the target symbol offset + // to check if this .opd entry refers to the symbol the relocation + // points to. + if (Rel.Addend != (intptr_t)TargetSymbolOffset) + continue; + + section_iterator tsi(Obj.end_sections()); + check(TargetSymbol.getSection(tsi)); + Rel.SectionID = findOrEmitSection(Obj, (*tsi), true, LocalSections); + Rel.Addend = (intptr_t)TargetAdditionalInfo; + return; + } + } + llvm_unreachable("Attempting to get address of ODP entry!"); +} + +// Relocation masks following the #lo(value), #hi(value), #higher(value), +// and #highest(value) macros defined in section 4.5.1. Relocation Types +// in PPC-elf64abi document. +// +static inline +uint16_t applyPPClo (uint64_t value) +{ + return value & 0xffff; +} + +static inline +uint16_t applyPPChi (uint64_t value) +{ + return (value >> 16) & 0xffff; +} + +static inline +uint16_t applyPPChigher (uint64_t value) +{ + return (value >> 32) & 0xffff; +} + +static inline +uint16_t applyPPChighest (uint64_t value) +{ + return (value >> 48) & 0xffff; +} + +void RuntimeDyldELF::resolvePPC64Relocation(uint8_t *LocalAddress, + uint64_t FinalAddress, + uint64_t Value, + uint32_t Type, + int64_t Addend) { + switch (Type) { + default: + llvm_unreachable("Relocation type not implemented yet!"); + break; + case ELF::R_PPC64_ADDR16_LO : + writeInt16BE(LocalAddress, applyPPClo (Value + Addend)); + break; + case ELF::R_PPC64_ADDR16_HI : + writeInt16BE(LocalAddress, applyPPChi (Value + Addend)); + break; + case ELF::R_PPC64_ADDR16_HIGHER : + writeInt16BE(LocalAddress, applyPPChigher (Value + Addend)); + break; + case ELF::R_PPC64_ADDR16_HIGHEST : + writeInt16BE(LocalAddress, applyPPChighest (Value + Addend)); + break; + case ELF::R_PPC64_ADDR14 : { + assert(((Value + Addend) & 3) == 0); + // Preserve the AA/LK bits in the branch instruction + uint8_t aalk = *(LocalAddress+3); + writeInt16BE(LocalAddress + 2, (aalk & 3) | ((Value + Addend) & 0xfffc)); + } break; + case ELF::R_PPC64_REL24 : { + int32_t delta = static_cast<int32_t>(Value - FinalAddress + Addend); + if (SignExtend32<24>(delta) != delta) + llvm_unreachable("Relocation R_PPC64_REL24 overflow"); + // Generates a 'bl <address>' instruction + writeInt32BE(LocalAddress, 0x48000001 | (delta & 0x03FFFFFC)); + } break; + case ELF::R_PPC64_ADDR64 : + writeInt64BE(LocalAddress, Value + Addend); + break; + case ELF::R_PPC64_TOC : + writeInt64BE(LocalAddress, findPPC64TOC()); + break; + case ELF::R_PPC64_TOC16 : { + uint64_t TOCStart = findPPC64TOC(); + Value = applyPPClo((Value + Addend) - TOCStart); + writeInt16BE(LocalAddress, applyPPClo(Value)); + } break; + case ELF::R_PPC64_TOC16_DS : { + uint64_t TOCStart = findPPC64TOC(); + Value = ((Value + Addend) - TOCStart); + writeInt16BE(LocalAddress, applyPPClo(Value)); + } break; + } +} + + void RuntimeDyldELF::resolveRelocation(uint8_t *LocalAddress, uint64_t FinalAddress, uint64_t Value, @@ -366,6 +547,9 @@ void RuntimeDyldELF::resolveRelocation(uint8_t *LocalAddress, (uint32_t)(Value & 0xffffffffL), Type, (uint32_t)(Addend & 0xffffffffL)); break; + case Triple::ppc64: + resolvePPC64Relocation(LocalAddress, FinalAddress, Value, Type, Addend); + break; default: llvm_unreachable("Unsupported CPU type!"); } } @@ -390,6 +574,8 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel, RelocationValueRef Value; // First search for the symbol in the local symbol table SymbolTableMap::const_iterator lsi = Symbols.find(TargetName.data()); + SymbolRef::Type SymType; + Symbol.getType(SymType); if (lsi != Symbols.end()) { Value.SectionID = lsi->second.first; Value.Addend = lsi->second.second; @@ -401,8 +587,6 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel, Value.SectionID = gsi->second.first; Value.Addend = gsi->second.second; } else { - SymbolRef::Type SymType; - Symbol.getType(SymType); switch (SymType) { case SymbolRef::ST_Debug: { // TODO: Now ELF SymbolRef::ST_Debug = STT_SECTION, it's not obviously @@ -446,7 +630,7 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel, SectionEntry &Section = Sections[Rel.SectionID]; uint8_t *Target = Section.Address + Rel.Offset; - // Look up for existing stub. + // Look for an existing stub. StubMap::const_iterator i = Stubs.find(Value); if (i != Stubs.end()) { resolveRelocation(Target, (uint64_t)Target, (uint64_t)Section.Address + @@ -516,6 +700,93 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel, Section.StubOffset, RelType, 0); Section.StubOffset += getMaxStubSize(); } + } else if (Arch == Triple::ppc64) { + if (RelType == ELF::R_PPC64_REL24) { + // A PPC branch relocation will need a stub function if the target is + // an external symbol (Symbol::ST_Unknown) or if the target address + // is not within the signed 24-bits branch address. + SectionEntry &Section = Sections[Rel.SectionID]; + uint8_t *Target = Section.Address + Rel.Offset; + bool RangeOverflow = false; + if (SymType != SymbolRef::ST_Unknown) { + // A function call may points to the .opd entry, so the final symbol value + // in calculated based in the relocation values in .opd section. + findOPDEntrySection(Obj, ObjSectionToID, Value); + uint8_t *RelocTarget = Sections[Value.SectionID].Address + Value.Addend; + int32_t delta = static_cast<int32_t>(Target - RelocTarget); + // If it is within 24-bits branch range, just set the branch target + if (SignExtend32<24>(delta) == delta) { + RelocationEntry RE(Rel.SectionID, Rel.Offset, RelType, Value.Addend); + if (Value.SymbolName) + addRelocationForSymbol(RE, Value.SymbolName); + else + addRelocationForSection(RE, Value.SectionID); + } else { + RangeOverflow = true; + } + } + if (SymType == SymbolRef::ST_Unknown || RangeOverflow == true) { + // It is an external symbol (SymbolRef::ST_Unknown) or within a range + // larger than 24-bits. + StubMap::const_iterator i = Stubs.find(Value); + if (i != Stubs.end()) { + // Symbol function stub already created, just relocate to it + resolveRelocation(Target, (uint64_t)Target, (uint64_t)Section.Address + + i->second, RelType, 0); + DEBUG(dbgs() << " Stub function found\n"); + } else { + // Create a new stub function. + DEBUG(dbgs() << " Create a new stub function\n"); + Stubs[Value] = Section.StubOffset; + uint8_t *StubTargetAddr = createStubFunction(Section.Address + + Section.StubOffset); + RelocationEntry RE(Rel.SectionID, StubTargetAddr - Section.Address, + ELF::R_PPC64_ADDR64, Value.Addend); + + // Generates the 64-bits address loads as exemplified in section + // 4.5.1 in PPC64 ELF ABI. + RelocationEntry REhst(Rel.SectionID, + StubTargetAddr - Section.Address + 2, + ELF::R_PPC64_ADDR16_HIGHEST, Value.Addend); + RelocationEntry REhr(Rel.SectionID, + StubTargetAddr - Section.Address + 6, + ELF::R_PPC64_ADDR16_HIGHER, Value.Addend); + RelocationEntry REh(Rel.SectionID, + StubTargetAddr - Section.Address + 14, + ELF::R_PPC64_ADDR16_HI, Value.Addend); + RelocationEntry REl(Rel.SectionID, + StubTargetAddr - Section.Address + 18, + ELF::R_PPC64_ADDR16_LO, Value.Addend); + + if (Value.SymbolName) { + addRelocationForSymbol(REhst, Value.SymbolName); + addRelocationForSymbol(REhr, Value.SymbolName); + addRelocationForSymbol(REh, Value.SymbolName); + addRelocationForSymbol(REl, Value.SymbolName); + } else { + addRelocationForSection(REhst, Value.SectionID); + addRelocationForSection(REhr, Value.SectionID); + addRelocationForSection(REh, Value.SectionID); + addRelocationForSection(REl, Value.SectionID); + } + + resolveRelocation(Target, (uint64_t)Target, (uint64_t)Section.Address + + Section.StubOffset, RelType, 0); + if (SymType == SymbolRef::ST_Unknown) + // Restore the TOC for external calls + writeInt32BE(Target+4, 0xE8410028); // ld r2,40(r1) + Section.StubOffset += getMaxStubSize(); + } + } + } else { + RelocationEntry RE(Rel.SectionID, Rel.Offset, RelType, Value.Addend); + // Extra check to avoid relocation againt empty symbols (usually + // the R_PPC64_TOC). + if (Value.SymbolName && !TargetName.empty()) + addRelocationForSymbol(RE, Value.SymbolName); + else + addRelocationForSection(RE, Value.SectionID); + } } else { RelocationEntry RE(Rel.SectionID, Rel.Offset, RelType, Value.Addend); if (Value.SymbolName) diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h index 3011a06537..6c31f0dc12 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h @@ -46,6 +46,12 @@ protected: uint32_t Type, int32_t Addend); + void resolvePPC64Relocation(uint8_t *LocalAddress, + uint64_t FinalAddress, + uint64_t Value, + uint32_t Type, + int64_t Addend); + virtual void resolveRelocation(uint8_t *LocalAddress, uint64_t FinalAddress, uint64_t Value, @@ -60,6 +66,11 @@ protected: virtual ObjectImage *createObjectImage(ObjectBuffer *InputBuffer); + uint64_t findPPC64TOC() const; + void findOPDEntrySection(ObjectImage &Obj, + ObjSectionToIDMap &LocalSections, + RelocationValueRef &Rel); + public: RuntimeDyldELF(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {} diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h index a9733407ec..45633e735c 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h @@ -24,6 +24,8 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/SwapByteOrder.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/system_error.h" #include <map> @@ -41,6 +43,9 @@ class Twine; /// linker. class SectionEntry { public: + /// Name - section name. + StringRef Name; + /// Address - address in the linker's memory where the section resides. uint8_t *Address; @@ -61,9 +66,9 @@ public: /// for calculating relocations in some object formats (like MachO). uintptr_t ObjAddress; - SectionEntry(uint8_t *address, size_t size, uintptr_t stubOffset, - uintptr_t objAddress) - : Address(address), Size(size), LoadAddress((uintptr_t)address), + SectionEntry(StringRef name, uint8_t *address, size_t size, + uintptr_t stubOffset, uintptr_t objAddress) + : Name(name), Address(address), Size(size), LoadAddress((uintptr_t)address), StubOffset(stubOffset), ObjAddress(objAddress) {} }; @@ -163,6 +168,8 @@ protected: return 8; // 32-bit instruction and 32-bit address else if (Arch == Triple::mipsel) return 16; + else if (Arch == Triple::ppc64) + return 44; else return 0; } @@ -185,6 +192,35 @@ protected: return (uint8_t*)Sections[SectionID].Address; } + void writeInt16BE(uint8_t *Addr, uint16_t Value) { + if (sys::isLittleEndianHost()) + Value = sys::SwapByteOrder(Value); + *Addr = (Value >> 8) & 0xFF; + *(Addr+1) = Value & 0xFF; + } + + void writeInt32BE(uint8_t *Addr, uint32_t Value) { + if (sys::isLittleEndianHost()) + Value = sys::SwapByteOrder(Value); + *Addr = (Value >> 24) & 0xFF; + *(Addr+1) = (Value >> 16) & 0xFF; + *(Addr+2) = (Value >> 8) & 0xFF; + *(Addr+3) = Value & 0xFF; + } + + void writeInt64BE(uint8_t *Addr, uint64_t Value) { + if (sys::isLittleEndianHost()) + Value = sys::SwapByteOrder(Value); + *Addr = (Value >> 56) & 0xFF; + *(Addr+1) = (Value >> 48) & 0xFF; + *(Addr+2) = (Value >> 40) & 0xFF; + *(Addr+3) = (Value >> 32) & 0xFF; + *(Addr+4) = (Value >> 24) & 0xFF; + *(Addr+5) = (Value >> 16) & 0xFF; + *(Addr+6) = (Value >> 8) & 0xFF; + *(Addr+7) = Value & 0xFF; + } + /// \brief Given the common symbols discovered in the object file, emit a /// new section for them and update the symbol mappings in the object and /// symbol table. diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp index 02b4c3c4c1..a94d51bb74 100644 --- a/lib/MC/ELFObjectWriter.cpp +++ b/lib/MC/ELFObjectWriter.cpp @@ -133,6 +133,11 @@ class ELFObjectWriter : public MCObjectWriter { bool IsPCRel) const { return TargetObjectWriter->ExplicitRelSym(Asm, Target, F, Fixup, IsPCRel); } + const MCSymbol *undefinedExplicitRelSym(const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel) const { + return TargetObjectWriter->undefinedExplicitRelSym(Target, Fixup, IsPCRel); + } bool is64Bit() const { return TargetObjectWriter->is64Bit(); } bool hasRelocationAddend() const { @@ -639,7 +644,7 @@ const MCSymbol *ELFObjectWriter::SymbolToReloc(const MCAssembler &Asm, if (ASymbol.isUndefined()) { if (Renamed) return Renamed; - return &ASymbol; + return undefinedExplicitRelSym(Target, Fixup, IsPCRel); } if (SD.isExternal()) { @@ -721,10 +726,13 @@ void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm, MCSymbolData &SD = Asm.getSymbolData(ASymbol); MCFragment *F = SD.getFragment(); - Index = F->getParent()->getOrdinal() + 1; - - // Offset of the symbol in the section - Value += Layout.getSymbolOffset(&SD); + if (F) { + Index = F->getParent()->getOrdinal() + 1; + // Offset of the symbol in the section + Value += Layout.getSymbolOffset(&SD); + } else { + Index = 0; + } } else { if (Asm.getSymbolData(Symbol).getFlags() & ELF_Other_Weakref) WeakrefUsedInReloc.insert(RelocSymbol); diff --git a/lib/MC/MCELFObjectTargetWriter.cpp b/lib/MC/MCELFObjectTargetWriter.cpp index 6eb6914f4b..74cd042a0f 100644 --- a/lib/MC/MCELFObjectTargetWriter.cpp +++ b/lib/MC/MCELFObjectTargetWriter.cpp @@ -9,6 +9,8 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCValue.h" using namespace llvm; @@ -35,6 +37,12 @@ const MCSymbol *MCELFObjectTargetWriter::ExplicitRelSym(const MCAssembler &Asm, return NULL; } +const MCSymbol *MCELFObjectTargetWriter::undefinedExplicitRelSym(const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel) const { + const MCSymbol &Symbol = Target.getSymA()->getSymbol(); + return &Symbol.AliasedSymbol(); +} void MCELFObjectTargetWriter::adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset) { diff --git a/lib/MC/MCInstPrinter.cpp b/lib/MC/MCInstPrinter.cpp index 847bcc0a16..41d90abeeb 100644 --- a/lib/MC/MCInstPrinter.cpp +++ b/lib/MC/MCInstPrinter.cpp @@ -36,3 +36,17 @@ void MCInstPrinter::printAnnotation(raw_ostream &OS, StringRef Annot) { OS << " " << MAI.getCommentString() << " " << Annot; } } + +/// Utility functions to make adding mark ups simpler. +StringRef MCInstPrinter::markup(StringRef s) const { + if (getUseMarkup()) + return s; + else + return ""; +} +StringRef MCInstPrinter::markup(StringRef a, StringRef b) const { + if (getUseMarkup()) + return a; + else + return b; +} diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index b1cc08d56e..a1f0a2a885 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -207,7 +207,7 @@ public: bool ParseMSInlineAsm(void *AsmLoc, std::string &AsmString, unsigned &NumOutputs, unsigned &NumInputs, - SmallVectorImpl<void *> &OpDecls, + SmallVectorImpl<std::pair<void *,bool> > &OpDecls, SmallVectorImpl<std::string> &Constraints, SmallVectorImpl<std::string> &Clobbers, const MCInstrInfo *MII, @@ -3658,7 +3658,8 @@ enum AsmRewriteKind { AOK_Input, AOK_Output, AOK_SizeDirective, - AOK_Emit + AOK_Emit, + AOK_Skip }; struct AsmRewrite { @@ -3690,14 +3691,16 @@ bool AsmParser::ParseDirectiveEmit(SMLoc IDLoc, ParseStatementInfo &Info) { bool AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString, unsigned &NumOutputs, unsigned &NumInputs, - SmallVectorImpl<void *> &OpDecls, + SmallVectorImpl<std::pair<void *, bool> > &OpDecls, SmallVectorImpl<std::string> &Constraints, SmallVectorImpl<std::string> &Clobbers, const MCInstrInfo *MII, const MCInstPrinter *IP, MCAsmParserSemaCallback &SI) { - SmallVector<void*, 4> InputDecls; - SmallVector<void*, 4> OutputDecls; + SmallVector<void *, 4> InputDecls; + SmallVector<void *, 4> OutputDecls; + SmallVector<bool, 4> InputDeclsOffsetOf; + SmallVector<bool, 4> OutputDeclsOffsetOf; SmallVector<std::string, 4> InputConstraints; SmallVector<std::string, 4> OutputConstraints; std::set<std::string> ClobberRegs; @@ -3725,13 +3728,13 @@ bool AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString, // Immediate. if (Operand->isImm()) { AsmStrRewrites.push_back(AsmRewrite(AOK_Imm, - Operand->getStartLoc(), - Operand->getNameLen())); + Operand->getStartLoc(), + Operand->getNameLen())); continue; } // Register operand. - if (Operand->isReg()) { + if (Operand->isReg() && !Operand->isOffsetOf()) { unsigned NumDefs = Desc.getNumDefs(); // Clobber. if (NumDefs && Operand->getMCOperandNum() < NumDefs) { @@ -3749,26 +3752,33 @@ bool AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString, Size); if (OpDecl) { bool isOutput = (i == 1) && Desc.mayStore(); - if (Operand->needSizeDirective()) + if (!Operand->isOffsetOf() && Operand->needSizeDirective()) AsmStrRewrites.push_back(AsmRewrite(AOK_SizeDirective, - Operand->getStartLoc(), 0, - Operand->getMemSize())); - + Operand->getStartLoc(), 0, + Operand->getMemSize())); + + // Don't emit the offset directive. + if (Operand->isOffsetOf()) + AsmStrRewrites.push_back(AsmRewrite(AOK_Skip, + Operand->getOffsetOfLoc(), 7)); + if (isOutput) { std::string Constraint = "="; ++InputIdx; OutputDecls.push_back(OpDecl); + OutputDeclsOffsetOf.push_back(Operand->isOffsetOf()); Constraint += Operand->getConstraint().str(); OutputConstraints.push_back(Constraint); AsmStrRewrites.push_back(AsmRewrite(AOK_Output, - Operand->getStartLoc(), - Operand->getNameLen())); + Operand->getStartLoc(), + Operand->getNameLen())); } else { InputDecls.push_back(OpDecl); + InputDeclsOffsetOf.push_back(Operand->isOffsetOf()); InputConstraints.push_back(Operand->getConstraint().str()); AsmStrRewrites.push_back(AsmRewrite(AOK_Input, - Operand->getStartLoc(), - Operand->getNameLen())); + Operand->getStartLoc(), + Operand->getNameLen())); } } } @@ -3789,13 +3799,15 @@ bool AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString, unsigned NumExprs = NumOutputs + NumInputs; OpDecls.resize(NumExprs); Constraints.resize(NumExprs); + // FIXME: Constraints are hard coded to 'm', but we need an 'r' + // constraint for offsetof. This needs to be cleaned up! for (unsigned i = 0; i < NumOutputs; ++i) { - OpDecls[i] = OutputDecls[i]; - Constraints[i] = OutputConstraints[i]; + OpDecls[i] = std::make_pair(OutputDecls[i], OutputDeclsOffsetOf[i]); + Constraints[i] = OutputDeclsOffsetOf[i] ? "=r" : OutputConstraints[i]; } for (unsigned i = 0, j = NumOutputs; i < NumInputs; ++i, ++j) { - OpDecls[j] = InputDecls[i]; - Constraints[j] = InputConstraints[i]; + OpDecls[j] = std::make_pair(InputDecls[i], InputDeclsOffsetOf[i]); + Constraints[j] = InputDeclsOffsetOf[i] ? "r" : InputConstraints[i]; } } @@ -3816,8 +3828,15 @@ bool AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString, OS << StringRef(Start, Loc - Start); PrevKind = Kind; + // Skip the original expression. + if (Kind == AOK_Skip) { + Start = Loc + (*I).Len; + continue; + } + // Rewrite expressions in $N notation. switch (Kind) { + default: break; case AOK_Imm: OS << Twine("$$") + StringRef(Loc, (*I).Len); break; diff --git a/lib/TableGen/Error.cpp b/lib/TableGen/Error.cpp index 5dd688cb67..ad98fba9ba 100644 --- a/lib/TableGen/Error.cpp +++ b/lib/TableGen/Error.cpp @@ -16,6 +16,8 @@ #include "llvm/ADT/Twine.h" #include "llvm/Support/raw_ostream.h" +#include <cstdlib> + namespace llvm { SourceMgr SrcMgr; @@ -63,4 +65,14 @@ void PrintError(const TGError &Error) { PrintError(Error.getLoc(), Error.getMessage()); } +void PrintFatalError(const std::string &Msg) { + PrintError(Twine(Msg)); + std::exit(1); +} + +void PrintFatalError(ArrayRef<SMLoc> ErrorLoc, const std::string &Msg) { + PrintError(ErrorLoc, Msg); + std::exit(1); +} + } // end namespace llvm diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 874ef43b90..bf50081cc7 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -296,15 +296,13 @@ class RegConstraint<string C> { // ARM specific transformation functions and pattern fragments. // -// imm_neg_XFORM - Return a imm value packed into the format described for -// imm_neg defs below. +// imm_neg_XFORM - Return the negation of an i32 immediate value. def imm_neg_XFORM : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(-(int)N->getZExtValue(), MVT::i32); }]>; -// so_imm_not_XFORM - Return a so_imm value packed into the format described for -// so_imm_not def below. -def so_imm_not_XFORM : SDNodeXForm<imm, [{ +// imm_not_XFORM - Return the complement of a i32 immediate value. +def imm_not_XFORM : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(~(int)N->getZExtValue(), MVT::i32); }]>; @@ -327,7 +325,7 @@ def so_imm_neg : Operand<i32>, PatLeaf<(imm), [{ def so_imm_not_asmoperand : AsmOperandClass { let Name = "ARMSOImmNot"; } def so_imm_not : Operand<i32>, PatLeaf<(imm), [{ return ARM_AM::getSOImmVal(~(uint32_t)N->getZExtValue()) != -1; - }], so_imm_not_XFORM> { + }], imm_not_XFORM> { let ParserMatchClass = so_imm_not_asmoperand; } @@ -3269,6 +3267,8 @@ def : ARMPat<(ARMaddc GPR:$src, imm0_65535_neg:$imm), // for part of the negation. def : ARMPat<(ARMadde GPR:$src, so_imm_not:$imm, CPSR), (SBCri GPR:$src, so_imm_not:$imm)>; +def : ARMPat<(ARMadde GPR:$src, imm0_65535_neg:$imm, CPSR), + (SBCrr GPR:$src, (MOVi16 (imm_not_XFORM imm:$imm)))>; // Note: These are implemented in C++ code, because they have to generate // ADD/SUBrs instructions, which use a complex pattern that a xform function diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 37b280f447..e10f4a865e 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -1953,7 +1953,7 @@ def : T2Pat<(ARMadde rGPR:$src, imm0_255_not:$imm, CPSR), def : T2Pat<(ARMadde rGPR:$src, t2_so_imm_not:$imm, CPSR), (t2SBCri rGPR:$src, t2_so_imm_not:$imm)>; def : T2Pat<(ARMadde rGPR:$src, imm0_65535_neg:$imm, CPSR), - (t2SBCrr rGPR:$src, (t2MOVi16 (imm_neg_XFORM imm:$imm)))>; + (t2SBCrr rGPR:$src, (t2MOVi16 (imm_not_XFORM imm:$imm)))>; // Select Bytes -- for disassembly only diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp index 4c44f69f4d..cb3ac4d1f6 100644 --- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -155,7 +155,8 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, TargetLowering::ArgListEntry Entry; // First argument: data pointer - Type *IntPtrTy = TLI.getDataLayout()->getIntPtrType(*DAG.getContext()); + unsigned AS = DstPtrInfo.getAddrSpace(); + Type *IntPtrTy = TLI.getDataLayout()->getIntPtrType(*DAG.getContext(), AS); Entry.Node = Dst; Entry.Ty = IntPtrTy; Args.push_back(Entry); diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 30fa6bc2c7..740548adbc 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -79,7 +79,7 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT, TLInfo(*this), TSInfo(*this), FrameLowering(Subtarget), - STTI(&TLInfo) { + STTI(&TLInfo), VTTI(&TLInfo) { if (!Subtarget.hasARMOps()) report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not " "support ARM mode execution!"); @@ -113,7 +113,7 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT, FrameLowering(Subtarget.hasThumb2() ? new ARMFrameLowering(Subtarget) : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)), - STTI(&TLInfo){ + STTI(&TLInfo), VTTI(&TLInfo) { } namespace { diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 227689f897..ddb38687d0 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -47,7 +47,7 @@ static void printRegImmShift(raw_ostream &O, ARM_AM::ShiftOpc ShOpc, assert (!(ShOpc == ARM_AM::ror && !ShImm) && "Cannot have ror #0"); O << getShiftOpcStr(ShOpc); - if (ShOpc != ARM_AM::rrx){ + if (ShOpc != ARM_AM::rrx) { O << " "; if (UseMarkup) O << "<imm:"; @@ -67,11 +67,9 @@ ARMInstPrinter::ARMInstPrinter(const MCAsmInfo &MAI, } void ARMInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { - if (UseMarkup) - OS << "<reg:"; - OS << getRegisterName(RegNo); - if (UseMarkup) - OS << ">"; + OS << markup("<reg:") + << getRegisterName(RegNo) + << markup(">"); } void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, @@ -143,12 +141,10 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, return; } - O << ", "; - if (UseMarkup) - O << "<imm:"; - O << "#" << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm())); - if (UseMarkup) - O << ">"; + O << ", " + << markup("<imm:") + << "#" << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm())) + << markup(">"); printAnnotation(O, Annot); return; } @@ -332,11 +328,9 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, unsigned Reg = Op.getReg(); printRegName(O, Reg); } else if (Op.isImm()) { - if (UseMarkup) - O << "<imm:"; - O << '#' << Op.getImm(); - if (UseMarkup) - O << ">"; + O << markup("<imm:") + << '#' << Op.getImm() + << markup(">"); } else { assert(Op.isExpr() && "unknown operand kind in printOperand"); // If a symbolic branch target was added as a constant expression then print @@ -360,18 +354,9 @@ void ARMInstPrinter::printT2LdrLabelOperand(const MCInst *MI, unsigned OpNum, if (MO1.isExpr()) O << *MO1.getExpr(); else if (MO1.isImm()) { - if (UseMarkup) - O << "<mem:"; - O << "[pc, "; - if (UseMarkup) - O << "<imm:"; - O << "#"; - O << MO1.getImm(); - if (UseMarkup) - O << ">"; - O << "]"; - if (UseMarkup) - O << ">"; + O << markup("<mem:") << "[pc, " + << markup("<imm:") << "#" << MO1.getImm() + << markup(">]>", "]"); } else llvm_unreachable("Unknown LDR label operand?"); @@ -424,25 +409,19 @@ void ARMInstPrinter::printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, const MCOperand &MO2 = MI->getOperand(Op+1); const MCOperand &MO3 = MI->getOperand(Op+2); - if (UseMarkup) - O << "<mem:"; - O << "["; + O << markup("<mem:") << "["; printRegName(O, MO1.getReg()); if (!MO2.getReg()) { if (ARM_AM::getAM2Offset(MO3.getImm())) { // Don't print +0. - O << ", "; - if (UseMarkup) - O << "<imm:"; - O << "#"; - O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm())); - O << ARM_AM::getAM2Offset(MO3.getImm()); - if (UseMarkup) - O << ">"; + O << ", " + << markup("<imm:") + << "#" + << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm())) + << ARM_AM::getAM2Offset(MO3.getImm()) + << markup(">"); } - O << "]"; - if (UseMarkup) - O << ">"; + O << "]" << markup(">"); return; } @@ -452,45 +431,29 @@ void ARMInstPrinter::printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, printRegImmShift(O, ARM_AM::getAM2ShiftOpc(MO3.getImm()), ARM_AM::getAM2Offset(MO3.getImm()), UseMarkup); - O << "]"; - if (UseMarkup) - O << ">"; + O << "]" << markup(">"); } void ARMInstPrinter::printAddrModeTBB(const MCInst *MI, unsigned Op, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(Op); const MCOperand &MO2 = MI->getOperand(Op+1); - if (UseMarkup) - O << "<mem:"; - O << "["; + O << markup("<mem:") << "["; printRegName(O, MO1.getReg()); O << ", "; printRegName(O, MO2.getReg()); - O << "]"; - if (UseMarkup) - O << ">"; + O << "]" << markup(">"); } void ARMInstPrinter::printAddrModeTBH(const MCInst *MI, unsigned Op, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(Op); const MCOperand &MO2 = MI->getOperand(Op+1); - if (UseMarkup) - O << "<mem:"; - O << "["; + O << markup("<mem:") << "["; printRegName(O, MO1.getReg()); O << ", "; printRegName(O, MO2.getReg()); - O << ", lsl "; - if (UseMarkup) - O << "<imm:"; - O << "#1"; - if (UseMarkup) - O << ">"; - O << "]"; - if (UseMarkup) - O << ">"; + O << ", lsl " << markup("<imm:") << "#1" << markup(">") << "]" << markup(">"); } void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op, @@ -520,13 +483,10 @@ void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI, if (!MO1.getReg()) { unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm()); - if (UseMarkup) - O << "<imm:"; - O << '#' - << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm())) - << ImmOffs; - if (UseMarkup) - O << ">"; + O << markup("<imm:") + << '#' << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm())) + << ImmOffs + << markup(">"); return; } @@ -547,13 +507,9 @@ void ARMInstPrinter::printAM3PostIndexOp(const MCInst *MI, unsigned Op, const MCOperand &MO2 = MI->getOperand(Op+1); const MCOperand &MO3 = MI->getOperand(Op+2); - if (UseMarkup) - O << "<mem:"; - O << "["; + O << markup("<mem:") << "["; printRegName(O, MO1.getReg()); - O << "], "; - if (UseMarkup) - O << ">"; + O << "], " << markup(">"); if (MO2.getReg()) { O << (char)ARM_AM::getAM3Op(MO3.getImm()); @@ -562,13 +518,11 @@ void ARMInstPrinter::printAM3PostIndexOp(const MCInst *MI, unsigned Op, } unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm()); - if (UseMarkup) - O << "<imm:"; - O << '#' + O << markup("<imm:") + << '#' << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm())) - << ImmOffs; - if (UseMarkup) - O << ">"; + << ImmOffs + << markup(">"); } void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, @@ -577,18 +531,13 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, const MCOperand &MO2 = MI->getOperand(Op+1); const MCOperand &MO3 = MI->getOperand(Op+2); - if (UseMarkup) - O << "<mem:"; - O << '['; + O << markup("<mem:") << '['; printRegName(O, MO1.getReg()); if (MO2.getReg()) { - O << ", "; - O << getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm())); + O << ", " << getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm())); printRegName(O, MO2.getReg()); - O << ']'; - if (UseMarkup) - O << ">"; + O << ']' << markup(">"); return; } @@ -597,18 +546,14 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, ARM_AM::AddrOpc op = ARM_AM::getAM3Op(MO3.getImm()); if (ImmOffs || (op == ARM_AM::sub)) { - O << ", "; - if (UseMarkup) - O << "<imm:"; - O << "#" + O << ", " + << markup("<imm:") + << "#" << ARM_AM::getAddrOpcStr(op) - << ImmOffs; - if (UseMarkup) - O << ">"; + << ImmOffs + << markup(">"); } - O << ']'; - if (UseMarkup) - O << ">"; + O << ']' << markup(">"); } void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op, @@ -642,13 +587,9 @@ void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI, } unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm()); - if (UseMarkup) - O << "<imm:"; - O << '#' - << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm())) - << ImmOffs; - if (UseMarkup) - O << ">"; + O << markup("<imm:") + << '#' << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm())) << ImmOffs + << markup(">"); } void ARMInstPrinter::printPostIdxImm8Operand(const MCInst *MI, @@ -656,11 +597,9 @@ void ARMInstPrinter::printPostIdxImm8Operand(const MCInst *MI, raw_ostream &O) { const MCOperand &MO = MI->getOperand(OpNum); unsigned Imm = MO.getImm(); - if (UseMarkup) - O << "<imm:"; - O << '#' << ((Imm & 256) ? "" : "-") << (Imm & 0xff); - if (UseMarkup) - O << ">"; + O << markup("<imm:") + << '#' << ((Imm & 256) ? "" : "-") << (Imm & 0xff) + << markup(">"); } void ARMInstPrinter::printPostIdxRegOperand(const MCInst *MI, unsigned OpNum, @@ -677,11 +616,9 @@ void ARMInstPrinter::printPostIdxImm8s4Operand(const MCInst *MI, raw_ostream &O) { const MCOperand &MO = MI->getOperand(OpNum); unsigned Imm = MO.getImm(); - if (UseMarkup) - O << "<imm:"; - O << '#' << ((Imm & 256) ? "" : "-") << ((Imm & 0xff) << 2); - if (UseMarkup) - O << ">"; + O << markup("<imm:") + << '#' << ((Imm & 256) ? "" : "-") << ((Imm & 0xff) << 2) + << markup(">"); } @@ -702,26 +639,20 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum, return; } - if (UseMarkup) - O << "<mem:"; - O << "["; + O << markup("<mem:") << "["; printRegName(O, MO1.getReg()); unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm()); unsigned Op = ARM_AM::getAM5Op(MO2.getImm()); if (ImmOffs || Op == ARM_AM::sub) { - O << ", "; - if (UseMarkup) - O << "<imm:"; - O << "#" + O << ", " + << markup("<imm:") + << "#" << ARM_AM::getAddrOpcStr(ARM_AM::getAM5Op(MO2.getImm())) - << ImmOffs * 4; - if (UseMarkup) - O << ">"; + << ImmOffs * 4 + << markup(">"); } - O << "]"; - if (UseMarkup) - O << ">"; + O << "]" << markup(">"); } void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum, @@ -729,29 +660,21 @@ void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum, const MCOperand &MO1 = MI->getOperand(OpNum); const MCOperand &MO2 = MI->getOperand(OpNum+1); - if (UseMarkup) - O << "<mem:"; - O << "["; + O << markup("<mem:") << "["; printRegName(O, MO1.getReg()); if (MO2.getImm()) { // FIXME: Both darwin as and GNU as violate ARM docs here. O << ", :" << (MO2.getImm() << 3); } - O << "]"; - if (UseMarkup) - O << ">"; + O << "]" << markup(">"); } void ARMInstPrinter::printAddrMode7Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); - if (UseMarkup) - O << "<mem:"; - O << "["; + O << markup("<mem:") << "["; printRegName(O, MO1.getReg()); - O << "]"; - if (UseMarkup) - O << ">"; + O << "]" << markup(">"); } void ARMInstPrinter::printAddrMode6OffsetOperand(const MCInst *MI, @@ -774,17 +697,9 @@ void ARMInstPrinter::printBitfieldInvMaskImmOperand(const MCInst *MI, int32_t lsb = CountTrailingZeros_32(v); int32_t width = (32 - CountLeadingZeros_32 (v)) - lsb; assert(MO.isImm() && "Not a valid bf_inv_mask_imm value!"); - if (UseMarkup) - O << "<imm:"; - O << '#' << lsb; - if (UseMarkup) - O << ">"; - O << ", "; - if (UseMarkup) - O << "<imm:"; - O << '#' << width; - if (UseMarkup) - O << ">"; + O << markup("<imm:") << '#' << lsb << markup(">") + << ", " + << markup("<imm:") << '#' << width << markup(">"); } void ARMInstPrinter::printMemBOption(const MCInst *MI, unsigned OpNum, @@ -799,20 +714,16 @@ void ARMInstPrinter::printShiftImmOperand(const MCInst *MI, unsigned OpNum, bool isASR = (ShiftOp & (1 << 5)) != 0; unsigned Amt = ShiftOp & 0x1f; if (isASR) { - O << ", asr "; - if (UseMarkup) - O << "<imm:"; - O << "#" << (Amt == 0 ? 32 : Amt); - if (UseMarkup) - O << ">"; + O << ", asr " + << markup("<imm:") + << "#" << (Amt == 0 ? 32 : Amt) + << markup(">"); } else if (Amt) { - O << ", lsl "; - if (UseMarkup) - O << "<imm:"; - O << "#" << Amt; - if (UseMarkup) - O << ">"; + O << ", lsl " + << markup("<imm:") + << "#" << Amt + << markup(">"); } } @@ -822,12 +733,7 @@ void ARMInstPrinter::printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum, if (Imm == 0) return; assert(Imm > 0 && Imm < 32 && "Invalid PKH shift immediate value!"); - O << ", lsl "; - if (UseMarkup) - O << "<imm:"; - O << "#" << Imm; - if (UseMarkup) - O << ">"; + O << ", lsl " << markup("<imm:") << "#" << Imm << markup(">"); } void ARMInstPrinter::printPKHASRShiftImm(const MCInst *MI, unsigned OpNum, @@ -837,12 +743,7 @@ void ARMInstPrinter::printPKHASRShiftImm(const MCInst *MI, unsigned OpNum, if (Imm == 0) Imm = 32; assert(Imm > 0 && Imm <= 32 && "Invalid PKH shift immediate value!"); - O << ", asr "; - if (UseMarkup) - O << "<imm:"; - O << "#" << Imm; - if (UseMarkup) - O << ">"; + O << ", asr " << markup("<imm:") << "#" << Imm << markup(">"); } void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum, @@ -1024,35 +925,29 @@ void ARMInstPrinter::printAdrLabelOperand(const MCInst *MI, unsigned OpNum, int32_t OffImm = (int32_t)MO.getImm(); - if (UseMarkup) - O << "<imm:"; + O << markup("<imm:"); if (OffImm == INT32_MIN) O << "#-0"; else if (OffImm < 0) O << "#-" << -OffImm; else O << "#" << OffImm; - if (UseMarkup) - O << ">"; + O << markup(">"); } void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { - if (UseMarkup) - O << "<imm:"; - O << "#" << MI->getOperand(OpNum).getImm() * 4; - if (UseMarkup) - O << ">"; + O << markup("<imm:") + << "#" << MI->getOperand(OpNum).getImm() * 4 + << markup(">"); } void ARMInstPrinter::printThumbSRImm(const MCInst *MI, unsigned OpNum, raw_ostream &O) { unsigned Imm = MI->getOperand(OpNum).getImm(); - if (UseMarkup) - O << "<imm:"; - O << "#" << (Imm == 0 ? 32 : Imm); - if (UseMarkup) - O << ">"; + O << markup("<imm:") + << "#" << (Imm == 0 ? 32 : Imm) + << markup(">"); } void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum, @@ -1082,17 +977,13 @@ void ARMInstPrinter::printThumbAddrModeRROperand(const MCInst *MI, unsigned Op, return; } - if (UseMarkup) - O << "<mem:"; - O << "["; + O << markup("<mem:") << "["; printRegName(O, MO1.getReg()); if (unsigned RegNum = MO2.getReg()) { O << ", "; printRegName(O, RegNum); } - O << "]"; - if (UseMarkup) - O << ">"; + O << "]" << markup(">"); } void ARMInstPrinter::printThumbAddrModeImm5SOperand(const MCInst *MI, @@ -1107,21 +998,15 @@ void ARMInstPrinter::printThumbAddrModeImm5SOperand(const MCInst *MI, return; } - if (UseMarkup) - O << "<mem:"; - O << "["; + O << markup("<mem:") << "["; printRegName(O, MO1.getReg()); if (unsigned ImmOffs = MO2.getImm()) { - O << ", "; - if (UseMarkup) - O << "<imm:"; - O << "#" << ImmOffs * Scale; - if (UseMarkup) - O << ">"; + O << ", " + << markup("<imm:") + << "#" << ImmOffs * Scale + << markup(">"); } - O << "]"; - if (UseMarkup) - O << ">"; + O << "]" << markup(">"); } void ARMInstPrinter::printThumbAddrModeImm5S1Operand(const MCInst *MI, @@ -1175,9 +1060,7 @@ void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum, return; } - if (UseMarkup) - O << "<mem:"; - O << "["; + O << markup("<mem:") << "["; printRegName(O, MO1.getReg()); int32_t OffImm = (int32_t)MO2.getImm(); @@ -1186,24 +1069,18 @@ void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum, if (OffImm == INT32_MIN) OffImm = 0; if (isSub) { - O << ", "; - if (UseMarkup) - O << "<imm:"; - O << "#-" << -OffImm; - if (UseMarkup) - O << ">"; + O << ", " + << markup("<imm:") + << "#-" << -OffImm + << markup(">"); } else if (OffImm > 0) { - O << ", "; - if (UseMarkup) - O << "<imm:"; - O << "#" << OffImm; - if (UseMarkup) - O << ">"; + O << ", " + << markup("<imm:") + << "#" << OffImm + << markup(">"); } - O << "]"; - if (UseMarkup) - O << ">"; + O << "]" << markup(">"); } void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI, @@ -1212,9 +1089,7 @@ void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI, const MCOperand &MO1 = MI->getOperand(OpNum); const MCOperand &MO2 = MI->getOperand(OpNum+1); - if (UseMarkup) - O << "<mem:"; - O << "["; + O << markup("<mem:") << "["; printRegName(O, MO1.getReg()); int32_t OffImm = (int32_t)MO2.getImm(); @@ -1231,9 +1106,7 @@ void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI, O << "#" << OffImm; if (OffImm != 0 && UseMarkup) O << ">"; - O << "]"; - if (UseMarkup) - O << ">"; + O << "]" << markup(">"); } void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI, @@ -1247,9 +1120,7 @@ void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI, return; } - if (UseMarkup) - O << "<mem:"; - O << "["; + O << markup("<mem:") << "["; printRegName(O, MO1.getReg()); int32_t OffImm = (int32_t)MO2.getImm(); @@ -1269,9 +1140,7 @@ void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI, O << "#" << OffImm; if (OffImm != 0 && UseMarkup) O << ">"; - O << "]"; - if (UseMarkup) - O << ">"; + O << "]" << markup(">"); } void ARMInstPrinter::printT2AddrModeImm0_1020s4Operand(const MCInst *MI, @@ -1280,21 +1149,15 @@ void ARMInstPrinter::printT2AddrModeImm0_1020s4Operand(const MCInst *MI, const MCOperand &MO1 = MI->getOperand(OpNum); const MCOperand &MO2 = MI->getOperand(OpNum+1); - if (UseMarkup) - O << "<mem:"; - O << "["; + O << markup("<mem:") << "["; printRegName(O, MO1.getReg()); if (MO2.getImm()) { - O << ", "; - if (UseMarkup) - O << "<imm:"; - O << "#" << MO2.getImm() * 4; - if (UseMarkup) - O << ">"; + O << ", " + << markup("<imm:") + << "#" << MO2.getImm() * 4 + << markup(">"); } - O << "]"; - if (UseMarkup) - O << ">"; + O << "]" << markup(">"); } void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI, @@ -1302,15 +1165,12 @@ void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); int32_t OffImm = (int32_t)MO1.getImm(); - O << ", "; - if (UseMarkup) - O << "<imm:"; + O << ", " << markup("<imm:"); if (OffImm < 0) O << "#-" << -OffImm; else O << "#" << OffImm; - if (UseMarkup) - O << ">"; + O << markup(">"); } void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, @@ -1343,9 +1203,7 @@ void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI, const MCOperand &MO2 = MI->getOperand(OpNum+1); const MCOperand &MO3 = MI->getOperand(OpNum+2); - if (UseMarkup) - O << "<mem:"; - O << "["; + O << markup("<mem:") << "["; printRegName(O, MO1.getReg()); assert(MO2.getReg() && "Invalid so_reg load / store address!"); @@ -1355,26 +1213,20 @@ void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI, unsigned ShAmt = MO3.getImm(); if (ShAmt) { assert(ShAmt <= 3 && "Not a valid Thumb2 addressing mode!"); - O << ", lsl "; - if (UseMarkup) - O << "<imm:"; - O << "#" << ShAmt; - if (UseMarkup) - O << ">"; + O << ", lsl " + << markup("<imm:") + << "#" << ShAmt + << markup(">"); } - O << "]"; - if (UseMarkup) - O << ">"; + O << "]" << markup(">"); } void ARMInstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { const MCOperand &MO = MI->getOperand(OpNum); - if (UseMarkup) - O << "<imm:"; - O << '#' << ARM_AM::getFPImmFloat(MO.getImm()); - if (UseMarkup) - O << ">"; + O << markup("<imm:") + << '#' << ARM_AM::getFPImmFloat(MO.getImm()) + << markup(">"); } void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum, @@ -1382,22 +1234,18 @@ void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum, unsigned EncodedImm = MI->getOperand(OpNum).getImm(); unsigned EltBits; uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits); - if (UseMarkup) - O << "<imm:"; - O << "#0x"; + O << markup("<imm:") + << "#0x"; O.write_hex(Val); - if (UseMarkup) - O << ">"; + O << markup(">"); } void ARMInstPrinter::printImmPlusOneOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { unsigned Imm = MI->getOperand(OpNum).getImm(); - if (UseMarkup) - O << "<imm:"; - O << "#" << Imm + 1; - if (UseMarkup) - O << ">"; + O << markup("<imm:") + << "#" << Imm + 1 + << markup(">"); } void ARMInstPrinter::printRotImmOperand(const MCInst *MI, unsigned OpNum, @@ -1405,45 +1253,35 @@ void ARMInstPrinter::printRotImmOperand(const MCInst *MI, unsigned OpNum, unsigned Imm = MI->getOperand(OpNum).getImm(); if (Imm == 0) return; - O << ", ror "; - if (UseMarkup) - O << "<imm:"; - O << "#"; + O << ", ror " + << markup("<imm:") + << "#"; switch (Imm) { default: assert (0 && "illegal ror immediate!"); case 1: O << "8"; break; case 2: O << "16"; break; case 3: O << "24"; break; } - if (UseMarkup) - O << ">"; + O << markup(">"); } void ARMInstPrinter::printFBits16(const MCInst *MI, unsigned OpNum, raw_ostream &O) { - if (UseMarkup) - O << "<imm:"; - O << "#" << 16 - MI->getOperand(OpNum).getImm(); - if (UseMarkup) - O << ">"; + O << markup("<imm:") + << "#" << 16 - MI->getOperand(OpNum).getImm() + << markup(">"); } void ARMInstPrinter::printFBits32(const MCInst *MI, unsigned OpNum, raw_ostream &O) { - if (UseMarkup) - O << "<imm:"; - O << "#" << 32 - MI->getOperand(OpNum).getImm(); - if (UseMarkup) - O << ">"; + O << markup("<imm:") + << "#" << 32 - MI->getOperand(OpNum).getImm() + << markup(">"); } void ARMInstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O) { - if (UseMarkup) - O << "<mem:"; O << "[" << MI->getOperand(OpNum).getImm() << "]"; - if (UseMarkup) - O << ">"; } void ARMInstPrinter::printVectorListOne(const MCInst *MI, unsigned OpNum, diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp index e92ad01e1d..918316572a 100644 --- a/lib/Target/CellSPU/SPUTargetMachine.cpp +++ b/lib/Target/CellSPU/SPUTargetMachine.cpp @@ -44,7 +44,7 @@ SPUTargetMachine::SPUTargetMachine(const Target &T, StringRef TT, TLInfo(*this), TSInfo(*this), InstrItins(Subtarget.getInstrItineraryData()), - STTI(&TLInfo){ + STTI(&TLInfo), VTTI(&TLInfo) { } //===----------------------------------------------------------------------===// diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp index 353542a809..30866e9eeb 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -75,7 +75,7 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT, TSInfo(*this), FrameLowering(Subtarget), InstrItins(&Subtarget.getInstrItineraryData()), - STTI(&TLInfo) { + STTI(&TLInfo), VTTI(&TLInfo) { setMCUseCFI(false); } diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp index cb5f46062d..1ae2baa198 100644 --- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp +++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp @@ -42,7 +42,8 @@ MBlazeTargetMachine(const Target &T, StringRef TT, InstrInfo(*this), FrameLowering(Subtarget), TLInfo(*this), TSInfo(*this), ELFWriterInfo(*this), - InstrItins(Subtarget.getInstrItineraryData()), STTI(&TLInfo) { + InstrItins(Subtarget.getInstrItineraryData()), + STTI(&TLInfo), VTTI(&TLInfo) { } namespace { diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp index 29ea681216..13e37b3735 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.cpp +++ b/lib/Target/MSP430/MSP430TargetMachine.cpp @@ -36,7 +36,7 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T, // FIXME: Check DataLayout string. DL("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"), InstrInfo(*this), TLInfo(*this), TSInfo(*this), - FrameLowering(Subtarget), STTI(&TLInfo) { } + FrameLowering(Subtarget), STTI(&TLInfo), VTTI(&TLInfo) { } namespace { /// MSP430 Code Generator Pass Configuration Options. diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index da6a8d2a67..ae89cdd693 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -3601,10 +3601,11 @@ MipsTargetLowering::LowerReturn(SDValue Chain, if (!Reg) llvm_unreachable("sret virtual register not created in the entry block"); SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy()); + unsigned V0 = IsN64 ? Mips::V0_64 : Mips::V0; - Chain = DAG.getCopyToReg(Chain, dl, IsN64 ? Mips::V0_64 : Mips::V0, Val, - Flag); + Chain = DAG.getCopyToReg(Chain, dl, V0, Val, Flag); Flag = Chain.getValue(1); + MF.getRegInfo().addLiveOut(V0); } // Return on Mips is always a "jr $ra" diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index 5e8062373f..f610253f49 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -53,7 +53,7 @@ MipsTargetMachine(const Target &T, StringRef TT, InstrInfo(MipsInstrInfo::create(*this)), FrameLowering(MipsFrameLowering::create(*this, Subtarget)), TLInfo(*this), TSInfo(*this), JITInfo(), - ELFWriterInfo(false, isLittle), STTI(&TLInfo) { + ELFWriterInfo(false, isLittle), STTI(&TLInfo), VTTI(&TLInfo) { } void MipsebTargetMachine::anchor() { } diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h index 60822d0c05..a62db327e5 100644 --- a/lib/Target/Mips/MipsTargetMachine.h +++ b/lib/Target/Mips/MipsTargetMachine.h @@ -40,7 +40,7 @@ class MipsTargetMachine : public LLVMTargetMachine { MipsJITInfo JITInfo; MipsELFWriterInfo ELFWriterInfo; ScalarTargetTransformImpl STTI; - VectorTargetTransformInfo VTTI; + VectorTargetTransformImpl VTTI; public: MipsTargetMachine(const Target &T, StringRef TT, diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index c46094569e..971d1b89a8 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -126,10 +126,9 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { return Base; // Truncate/sext the offset to the pointer size. - unsigned AS = PtrVal->getType()->isPointerTy() ? - cast<PointerType>(PtrVal->getType())->getAddressSpace() : 0; - if (TD.getPointerSizeInBits(AS) != 64) { - int SExtAmount = 64-TD.getPointerSizeInBits(AS); + unsigned PtrSize = TD.getPointerTypeSizeInBits(PtrVal->getType()); + if (PtrSize != 64) { + int SExtAmount = 64-PtrSize; Offset = (Offset << SExtAmount) >> SExtAmount; } @@ -151,7 +150,7 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { // Handle casts to pointers by changing them into casts to the appropriate // integer type. This promotes constant folding and simplifies this code. Constant *Op = CE->getOperand(0); - Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()), + Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CE->getType()), false/*ZExt*/); return LowerConstant(Op, AP); } diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 7519b4a083..cbb490003d 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -73,7 +73,7 @@ NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, Subtarget(TT, CPU, FS, is64bit), DL(Subtarget.getDataLayout()), InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(*this,is64bit), - STTI(&TLInfo) + STTI(&TLInfo), VTTI(&TLInfo) /*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ { } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index 1744738622..87ecb13a4c 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -29,9 +29,14 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { case FK_Data_1: case FK_Data_2: case FK_Data_4: + case FK_Data_8: + case PPC::fixup_ppc_toc: return Value; + case PPC::fixup_ppc_lo14: + case PPC::fixup_ppc_toc16_ds: + return (Value & 0xffff) << 2; case PPC::fixup_ppc_brcond14: - return Value & 0x3ffc; + return Value & 0xfffc; case PPC::fixup_ppc_br24: return Value & 0x3fffffc; #if 0 @@ -41,6 +46,7 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { case PPC::fixup_ppc_ha16: return ((Value >> 16) + ((Value & 0x8000) ? 1 : 0)) & 0xffff; case PPC::fixup_ppc_lo16: + case PPC::fixup_ppc_toc16: return Value & 0xffff; } } @@ -72,7 +78,10 @@ public: { "fixup_ppc_brcond14", 16, 14, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_ppc_lo16", 16, 16, 0 }, { "fixup_ppc_ha16", 16, 16, 0 }, - { "fixup_ppc_lo14", 16, 14, 0 } + { "fixup_ppc_lo14", 16, 14, 0 }, + { "fixup_ppc_toc", 0, 64, 0 }, + { "fixup_ppc_toc16", 16, 16, 0 }, + { "fixup_ppc_toc16_ds", 16, 14, 0 } }; if (Kind < FirstTargetFixupKind) diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index a19798157b..1518a60db8 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -11,6 +11,8 @@ #include "MCTargetDesc/PPCMCTargetDesc.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCValue.h" using namespace llvm; @@ -21,9 +23,15 @@ namespace { virtual ~PPCELFObjectWriter(); protected: + virtual unsigned getRelocTypeInner(const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel) const; virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel, bool IsRelocWithSymbol, int64_t Addend) const; + virtual const MCSymbol *undefinedExplicitRelSym(const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel) const; virtual void adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset); }; } @@ -36,11 +44,13 @@ PPCELFObjectWriter::PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI) PPCELFObjectWriter::~PPCELFObjectWriter() { } -unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target, - const MCFixup &Fixup, - bool IsPCRel, - bool IsRelocWithSymbol, - int64_t Addend) const { +unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel) const +{ + MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ? + MCSymbolRefExpr::VK_None : Target.getSymA()->getKind(); + // determine the type of the relocation unsigned Type; if (IsPCRel) { @@ -61,7 +71,7 @@ unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target, Type = ELF::R_PPC_ADDR24; break; case PPC::fixup_ppc_brcond14: - Type = ELF::R_PPC_ADDR14_BRTAKEN; // XXX: or BRNTAKEN?_ + Type = ELF::R_PPC_ADDR14; // XXX: or BRNTAKEN?_ break; case PPC::fixup_ppc_ha16: Type = ELF::R_PPC_ADDR16_HA; @@ -72,6 +82,26 @@ unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target, case PPC::fixup_ppc_lo14: Type = ELF::R_PPC_ADDR14; break; + case PPC::fixup_ppc_toc: + Type = ELF::R_PPC64_TOC; + break; + case PPC::fixup_ppc_toc16: + Type = ELF::R_PPC64_TOC16; + break; + case PPC::fixup_ppc_toc16_ds: + Type = ELF::R_PPC64_TOC16_DS; + break; + case FK_Data_8: + switch (Modifier) { + default: llvm_unreachable("Unsupported Modifier"); + case MCSymbolRefExpr::VK_PPC_TOC: + Type = ELF::R_PPC64_TOC; + break; + case MCSymbolRefExpr::VK_None: + Type = ELF::R_PPC64_ADDR64; + break; + } + break; case FK_Data_4: Type = ELF::R_PPC_ADDR32; break; @@ -83,11 +113,41 @@ unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target, return Type; } +unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel, + bool IsRelocWithSymbol, + int64_t Addend) const { + return getRelocTypeInner(Target, Fixup, IsPCRel); +} + +const MCSymbol *PPCELFObjectWriter::undefinedExplicitRelSym(const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel) const { + assert(Target.getSymA() && "SymA cannot be 0"); + const MCSymbol &Symbol = Target.getSymA()->getSymbol().AliasedSymbol(); + + unsigned RelocType = getRelocTypeInner(Target, Fixup, IsPCRel); + + // The .odp creation emits a relocation against the symbol ".TOC." which + // create a R_PPC64_TOC relocation. However the relocation symbol name + // in final object creation should be NULL, since the symbol does not + // really exist, it is just the reference to TOC base for the current + // object file. + bool EmitThisSym = RelocType != ELF::R_PPC64_TOC; + + if (EmitThisSym && !Symbol.isTemporary()) + return &Symbol; + return NULL; +} + void PPCELFObjectWriter:: adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset) { switch ((unsigned)Fixup.getKind()) { case PPC::fixup_ppc_ha16: case PPC::fixup_ppc_lo16: + case PPC::fixup_ppc_toc16: + case PPC::fixup_ppc_toc16_ds: RelocOffset += 2; break; default: diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h index b3c889e3f8..37b265e7fd 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h @@ -34,6 +34,16 @@ enum Fixups { /// fixup_ppc_lo14 - A 14-bit fixup corresponding to lo16(_foo) for instrs /// like 'std'. fixup_ppc_lo14, + + /// fixup_ppc_toc - Insert value of TOC base (.TOC.). + fixup_ppc_toc, + + /// fixup_ppc_toc16 - A 16-bit signed fixup relative to the TOC base. + fixup_ppc_toc16, + + /// fixup_ppc_toc16_ds - A 14-bit signed fixup relative to the TOC base with + /// implied 2 zero bits + fixup_ppc_toc16_ds, // Marker LastTargetFixupKind, diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index 1fba5b8dc3..21183024a5 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -15,7 +15,9 @@ #include "MCTargetDesc/PPCBaseInfo.h" #include "MCTargetDesc/PPCFixupKinds.h" #include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/ErrorHandling.h" @@ -28,13 +30,25 @@ class PPCMCCodeEmitter : public MCCodeEmitter { PPCMCCodeEmitter(const PPCMCCodeEmitter &) LLVM_DELETED_FUNCTION; void operator=(const PPCMCCodeEmitter &) LLVM_DELETED_FUNCTION; + const MCSubtargetInfo &STI; + Triple TT; + public: PPCMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti, - MCContext &ctx) { + MCContext &ctx) + : STI(sti), TT(STI.getTargetTriple()) { } ~PPCMCCodeEmitter() {} + bool is64BitMode() const { + return (STI.getFeatureBits() & PPC::Feature64Bit) != 0; + } + + bool isSVR4ABI() const { + return TT.isMacOSX() == 0; + } + unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups) const; unsigned getCondBrEncoding(const MCInst &MI, unsigned OpNo, @@ -61,11 +75,19 @@ public: SmallVectorImpl<MCFixup> &Fixups) const; void EncodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups) const { - unsigned Bits = getBinaryCodeForInstr(MI, Fixups); + uint64_t Bits = getBinaryCodeForInstr(MI, Fixups); + + // BL8_NOPELF and BLA8_NOP_ELF is both size of 8 bacause of the + // following 'nop'. + unsigned Size = 4; // FIXME: Have Desc.getSize() return the correct value! + unsigned Opcode = MI.getOpcode(); + if (Opcode == PPC::BL8_NOP_ELF || Opcode == PPC::BLA8_NOP_ELF) + Size = 8; // Output the constant in big endian byte order. - for (unsigned i = 0; i != 4; ++i) { - OS << (char)(Bits >> 24); + int ShiftValue = (Size * 8) - 8; + for (unsigned i = 0; i != Size; ++i) { + OS << (char)(Bits >> ShiftValue); Bits <<= 8; } @@ -140,8 +162,12 @@ unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo, return (getMachineOpValue(MI, MO, Fixups) & 0xFFFF) | RegBits; // Add a fixup for the displacement field. - Fixups.push_back(MCFixup::Create(0, MO.getExpr(), - (MCFixupKind)PPC::fixup_ppc_lo16)); + if (isSVR4ABI() && is64BitMode()) + Fixups.push_back(MCFixup::Create(0, MO.getExpr(), + (MCFixupKind)PPC::fixup_ppc_toc16)); + else + Fixups.push_back(MCFixup::Create(0, MO.getExpr(), + (MCFixupKind)PPC::fixup_ppc_lo16)); return RegBits; } @@ -158,8 +184,12 @@ unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo, return (getMachineOpValue(MI, MO, Fixups) & 0x3FFF) | RegBits; // Add a fixup for the branch target. - Fixups.push_back(MCFixup::Create(0, MO.getExpr(), - (MCFixupKind)PPC::fixup_ppc_lo14)); + if (isSVR4ABI() && is64BitMode()) + Fixups.push_back(MCFixup::Create(0, MO.getExpr(), + (MCFixupKind)PPC::fixup_ppc_toc16_ds)); + else + Fixups.push_back(MCFixup::Create(0, MO.getExpr(), + (MCFixupKind)PPC::fixup_ppc_lo14)); return RegBits; } diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index d8abd9fba0..6941413ed4 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -420,10 +420,14 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() { OutStreamer.EmitValueToAlignment(8); MCSymbol *Symbol1 = OutContext.GetOrCreateSymbol(".L." + Twine(CurrentFnSym->getName())); - MCSymbol *Symbol2 = OutContext.GetOrCreateSymbol(StringRef(".TOC.@tocbase")); + // Generates a R_PPC64_ADDR64 (from FK_DATA_8) relocation for the function + // entry point. OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol1, OutContext), 8/*size*/, 0/*addrspace*/); - OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol2, OutContext), + MCSymbol *Symbol2 = OutContext.GetOrCreateSymbol(StringRef(".TOC.")); + // Generates a R_PPC64_TOC relocation for TOC base insertion. + OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol2, + MCSymbolRefExpr::VK_PPC_TOC, OutContext), 8/*size*/, 0/*addrspace*/); // Emit a null environment pointer. OutStreamer.EmitIntValue(0, 8 /* size */, 0 /* addrspace */); diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index b93d50326a..de0d66124b 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1498,9 +1498,10 @@ SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); bool isPPC64 = (PtrVT == MVT::i64); + unsigned AS = 0; Type *IntPtrTy = DAG.getTargetLoweringInfo().getDataLayout()->getIntPtrType( - *DAG.getContext()); + *DAG.getContext(), AS); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; @@ -2083,25 +2084,42 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); InVals.push_back(FIN); - if (ObjSize==1 || ObjSize==2 || ObjSize==4) { + + if (ObjSize < 8) { if (GPR_idx != Num_GPR_Regs) { - unsigned VReg; - VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); + unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); - EVT ObjType = (ObjSize == 1 ? MVT::i8 : - (ObjSize == 2 ? MVT::i16 : MVT::i32)); - SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo(FuncArg, - CurArgOffset), - ObjType, false, false, 0); + SDValue Store; + + if (ObjSize==1 || ObjSize==2 || ObjSize==4) { + EVT ObjType = (ObjSize == 1 ? MVT::i8 : + (ObjSize == 2 ? MVT::i16 : MVT::i32)); + Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, + MachinePointerInfo(FuncArg, CurArgOffset), + ObjType, false, false, 0); + } else { + // For sizes that don't fit a truncating store (3, 5, 6, 7), + // store the whole register as-is to the parameter save area + // slot. The address of the parameter was already calculated + // above (InVals.push_back(FIN)) to be the right-justified + // offset within the slot. For this store, we need a new + // frame index that points at the beginning of the slot. + int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); + SDValue FIN = DAG.getFrameIndex(FI, PtrVT); + Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, + MachinePointerInfo(FuncArg, ArgOffset), + false, false, 0); + } + MemOps.push_back(Store); ++GPR_idx; } - + // Whether we copied from a register or not, advance the offset + // into the parameter save area by a full doubleword. ArgOffset += PtrByteSize; - continue; } + for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { // Store whatever pieces of the object are in registers // to memory. ArgOffset will be the address of the beginning @@ -2112,16 +2130,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); - SDValue Shifted = Val; - - // For 64-bit SVR4, small structs come in right-adjusted. - // Shift them left so the following logic works as expected. - if (ObjSize < 8) { - SDValue ShiftAmt = DAG.getConstant(64 - 8 * ObjSize, PtrVT); - Shifted = DAG.getNode(ISD::SHL, dl, PtrVT, Val, ShiftAmt); - } - - SDValue Store = DAG.getStore(Val.getValue(1), dl, Shifted, FIN, + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo(FuncArg, ArgOffset), false, false, 0); MemOps.push_back(Store); diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index cb0ea01fcf..5a78e8ac6b 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -639,13 +639,13 @@ def LDtocCPT: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg), (PPCtoc_entry tconstpool:$disp, G8RC:$reg))]>, isPPC64; let hasSideEffects = 1 in { -let RST = 2, DS_RA = 0 in // FIXME: Should be a pseudo. -def LDinto_toc: DSForm_1<58, 0, (outs), (ins G8RC:$reg), +let RST = 2, DS = 2 in +def LDinto_toc: DSForm_1a<58, 0, (outs), (ins G8RC:$reg), "ld 2, 8($reg)", LdStLD, [(PPCload_toc G8RC:$reg)]>, isPPC64; -let RST = 2, DS_RA = 0 in // FIXME: Should be a pseudo. -def LDtoc_restore : DSForm_1<58, 0, (outs), (ins), +let RST = 2, DS = 10, RA = 1 in +def LDtoc_restore : DSForm_1a<58, 0, (outs), (ins), "ld 2, 40(1)", LdStLD, [(PPCtoc_restore)]>, isPPC64; } diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index 14b8534d1c..9c8cb92cc7 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -54,19 +54,26 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU, CPUName = sys::getHostCPUName(); #endif - // Parse features string. - ParseSubtargetFeatures(CPUName, FS); - // Initialize scheduling itinerary for the specified CPU. InstrItins = getInstrItineraryForCPU(CPUName); + // Make sure 64-bit features are available when CPUname is generic + std::string FullFS = FS; + // If we are generating code for ppc64, verify that options make sense. if (is64Bit) { Has64BitSupport = true; // Silently force 64-bit register use on ppc64. Use64BitRegs = true; + if (!FullFS.empty()) + FullFS = "+64bit," + FullFS; + else + FullFS = "+64bit"; } + // Parse features string. + ParseSubtargetFeatures(CPUName, FullFS); + // If the user requested use of 64-bit regs, but the cpu selected doesn't // support it, ignore. if (use64BitRegs() && !has64BitSupport()) diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index b861383475..3fc977ee2b 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -44,7 +44,7 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, FrameLowering(Subtarget), JITInfo(*this, is64Bit), TLInfo(*this), TSInfo(*this), InstrItins(Subtarget.getInstrItineraryData()), - STTI(&TLInfo){ + STTI(&TLInfo), VTTI(&TLInfo) { // The binutils for the BG/P are too old for CFI. if (Subtarget.isBGP()) diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp index 1d8cc771dd..45c962471d 100644 --- a/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/lib/Target/Sparc/SparcTargetMachine.cpp @@ -36,7 +36,7 @@ SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT, DL(Subtarget.getDataLayout()), InstrInfo(Subtarget), TLInfo(*this), TSInfo(*this), - FrameLowering(Subtarget),STTI(&TLInfo) { + FrameLowering(Subtarget), STTI(&TLInfo), VTTI(&TLInfo) { } namespace { diff --git a/lib/Target/Target.cpp b/lib/Target/Target.cpp index 393178a469..7d3dd8f015 100644 --- a/lib/Target/Target.cpp +++ b/lib/Target/Target.cpp @@ -64,7 +64,7 @@ unsigned LLVMPointerSizeForAS(LLVMTargetDataRef TD, unsigned AS) { } LLVMTypeRef LLVMIntPtrType(LLVMTargetDataRef TD) { - return wrap(unwrap(TD)->getIntPtrType(getGlobalContext())); + return wrap(unwrap(TD)->getIntPtrType(getGlobalContext(), 0)); } LLVMTypeRef LLVMIntPtrTypeForAS(LLVMTargetDataRef TD, unsigned AS) { diff --git a/lib/Target/TargetTransformImpl.cpp b/lib/Target/TargetTransformImpl.cpp index 1cb5edab9d..382eecb766 100644 --- a/lib/Target/TargetTransformImpl.cpp +++ b/lib/Target/TargetTransformImpl.cpp @@ -9,9 +9,16 @@ #include "llvm/Target/TargetTransformImpl.h" #include "llvm/Target/TargetLowering.h" +#include <utility> using namespace llvm; +//===----------------------------------------------------------------------===// +// +// Calls used by scalar transformations. +// +//===----------------------------------------------------------------------===// + bool ScalarTargetTransformImpl::isLegalAddImmediate(int64_t imm) const { return TLI->isLegalAddImmediate(imm); } @@ -41,3 +48,151 @@ unsigned ScalarTargetTransformImpl::getJumpBufAlignment() const { unsigned ScalarTargetTransformImpl::getJumpBufSize() const { return TLI->getJumpBufSize(); } + +//===----------------------------------------------------------------------===// +// +// Calls used by the vectorizers. +// +//===----------------------------------------------------------------------===// +int InstructionOpcodeToISD(unsigned Opcode) { + static const int OpToISDTbl[] = { + /*Instruction::Ret */ 0, // Opcode numbering start at #1. + /*Instruction::Br */ 0, + /*Instruction::Switch */ 0, + /*Instruction::IndirectBr */ 0, + /*Instruction::Invoke */ 0, + /*Instruction::Resume */ 0, + /*Instruction::Unreachable */ 0, + /*Instruction::Add */ ISD::ADD, + /*Instruction::FAdd */ ISD::FADD, + /*Instruction::Sub */ ISD::SUB, + /*Instruction::FSub */ ISD::FSUB, + /*Instruction::Mul */ ISD::MUL, + /*Instruction::FMul */ ISD::FMUL, + /*Instruction::UDiv */ ISD::UDIV, + /*Instruction::SDiv */ ISD::UDIV, + /*Instruction::FDiv */ ISD::FDIV, + /*Instruction::URem */ ISD::UREM, + /*Instruction::SRem */ ISD::SREM, + /*Instruction::FRem */ ISD::FREM, + /*Instruction::Shl */ ISD::SHL, + /*Instruction::LShr */ ISD::SRL, + /*Instruction::AShr */ ISD::SRA, + /*Instruction::And */ ISD::AND, + /*Instruction::Or */ ISD::OR, + /*Instruction::Xor */ ISD::XOR, + /*Instruction::Alloca */ 0, + /*Instruction::Load */ ISD::LOAD, + /*Instruction::Store */ ISD::STORE, + /*Instruction::GetElementPtr */ 0, + /*Instruction::Fence */ 0, + /*Instruction::AtomicCmpXchg */ 0, + /*Instruction::AtomicRMW */ 0, + /*Instruction::Trunc */ ISD::TRUNCATE, + /*Instruction::ZExt */ ISD::ZERO_EXTEND, + /*Instruction::SExt */ ISD::SEXTLOAD, + /*Instruction::FPToUI */ ISD::FP_TO_UINT, + /*Instruction::FPToSI */ ISD::FP_TO_SINT, + /*Instruction::UIToFP */ ISD::UINT_TO_FP, + /*Instruction::SIToFP */ ISD::SINT_TO_FP, + /*Instruction::FPTrunc */ ISD::FP_ROUND, + /*Instruction::FPExt */ ISD::FP_EXTEND, + /*Instruction::PtrToInt */ ISD::BITCAST, + /*Instruction::IntToPtr */ ISD::BITCAST, + /*Instruction::BitCast */ ISD::BITCAST, + /*Instruction::ICmp */ ISD::SETCC, + /*Instruction::FCmp */ ISD::SETCC, + /*Instruction::PHI */ 0, + /*Instruction::Call */ 0, + /*Instruction::Select */ ISD::SELECT, + /*Instruction::UserOp1 */ 0, + /*Instruction::UserOp2 */ 0, + /*Instruction::VAArg */ 0, + /*Instruction::ExtractElement*/ ISD::EXTRACT_VECTOR_ELT, + /*Instruction::InsertElement */ ISD::INSERT_VECTOR_ELT, + /*Instruction::ShuffleVector */ ISD::VECTOR_SHUFFLE, + /*Instruction::ExtractValue */ ISD::MERGE_VALUES, + /*Instruction::InsertValue */ ISD::MERGE_VALUES, + /*Instruction::LandingPad */ 0}; + + assert((Instruction::Ret == 1) && (Instruction::LandingPad == 58) && + "Instruction order had changed"); + + // Opcode numbering starts at #1 but the table starts at #0, so we subtract + // one from the opcode number. + return OpToISDTbl[Opcode - 1]; +} + +std::pair<unsigned, EVT> +VectorTargetTransformImpl::getTypeLegalizationCost(LLVMContext &C, + EVT Ty) const { + unsigned Cost = 1; + // We keep legalizing the type until we find a legal kind. We assume that + // the only operation that costs anything is the split. After splitting + // we need to handle two types. + while (true) { + TargetLowering::LegalizeKind LK = TLI->getTypeConversion(C, Ty); + + if (LK.first == TargetLowering::TypeLegal) + return std::make_pair(Cost, LK.second); + + if (LK.first == TargetLowering::TypeSplitVector) + Cost *= 2; + + // Keep legalizing the type. + Ty = LK.second; + } +} + +unsigned +VectorTargetTransformImpl::getInstrCost(unsigned Opcode, Type *Ty1, + Type *Ty2) const { + // Check if any of the operands are vector operands. + int ISD = InstructionOpcodeToISD(Opcode); + + // Selects on vectors are actually vector selects. + if (ISD == ISD::SELECT) { + assert(Ty2 && "Ty2 must hold the select type"); + if (Ty2->isVectorTy()) + ISD = ISD::VSELECT; + } + + // If we don't have any information about this instruction assume it costs 1. + if (ISD == 0) + return 1; + + assert(Ty1 && "We need to have at least one type"); + + // From this stage we look at the legalized type. + std::pair<unsigned, EVT> LT = + getTypeLegalizationCost(Ty1->getContext(), TLI->getValueType(Ty1)); + + if (TLI->isOperationLegalOrCustom(ISD, LT.second)) { + // The operation is legal. Assume it costs 1. Multiply + // by the type-legalization overhead. + return LT.first * 1; + } + + unsigned NumElem = + (LT.second.isVector() ? LT.second.getVectorNumElements() : 1); + + // We will probably scalarize this instruction. Assume that the cost is the + // number of the vector elements. + return LT.first * NumElem * 1; +} + +unsigned +VectorTargetTransformImpl::getBroadcastCost(Type *Tp) const { + return 1; +} + +unsigned +VectorTargetTransformImpl::getMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, + unsigned AddressSpace) const { + // From this stage we look at the legalized type. + std::pair<unsigned, EVT> LT = + getTypeLegalizationCost(Src->getContext(), TLI->getValueType(Src)); + // Assume that all loads of legal types cost 1. + return LT.first; +} diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 9689180afd..708951126f 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -18,6 +18,7 @@ #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/ADT/APFloat.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringSwitch.h" @@ -54,10 +55,13 @@ private: X86Operand *ParseOperand(); X86Operand *ParseATTOperand(); X86Operand *ParseIntelOperand(); + X86Operand *ParseIntelOffsetOfOperator(SMLoc StartLoc); X86Operand *ParseIntelMemOperand(unsigned SegReg, SMLoc StartLoc); X86Operand *ParseIntelBracExpression(unsigned SegReg, unsigned Size); X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc); + const MCExpr *ParseIntelDotOperator(const MCExpr *Disp); + bool ParseDirectiveWord(unsigned Size, SMLoc L); bool ParseDirectiveCode(StringRef IDVal, SMLoc L); @@ -159,6 +163,7 @@ struct X86Operand : public MCParsedAsmOperand { } Kind; SMLoc StartLoc, EndLoc; + SMLoc OffsetOfLoc; union { struct { @@ -181,7 +186,6 @@ struct X86Operand : public MCParsedAsmOperand { unsigned IndexReg; unsigned Scale; unsigned Size; - bool OffsetOf; bool NeedSizeDir; } Mem; }; @@ -196,6 +200,8 @@ struct X86Operand : public MCParsedAsmOperand { /// getLocRange - Get the range between the first and last token of this /// operand. SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); } + /// getOffsetOfLoc - Get the location of the offset operator. + SMLoc getOffsetOfLoc() const { return OffsetOfLoc; } virtual void print(raw_ostream &OS) const {} @@ -321,8 +327,7 @@ struct X86Operand : public MCParsedAsmOperand { } bool isOffsetOf() const { - assert(Kind == Memory && "Invalid access!"); - return Mem.OffsetOf; + return OffsetOfLoc.getPointer(); } bool needSizeDirective() const { @@ -455,9 +460,11 @@ struct X86Operand : public MCParsedAsmOperand { return Res; } - static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc) { + static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc, + SMLoc OffsetOfLoc = SMLoc()) { X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc); Res->Reg.RegNo = RegNo; + Res->OffsetOfLoc = OffsetOfLoc; return Res; } @@ -468,9 +475,8 @@ struct X86Operand : public MCParsedAsmOperand { } /// Create an absolute memory operand. - static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, - SMLoc EndLoc, unsigned Size = 0, - bool OffsetOf = false, bool NeedSizeDir = false){ + static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc, + unsigned Size = 0, bool NeedSizeDir = false){ X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc); Res->Mem.SegReg = 0; Res->Mem.Disp = Disp; @@ -478,7 +484,6 @@ struct X86Operand : public MCParsedAsmOperand { Res->Mem.IndexReg = 0; Res->Mem.Scale = 1; Res->Mem.Size = Size; - Res->Mem.OffsetOf = OffsetOf; Res->Mem.NeedSizeDir = NeedSizeDir; return Res; } @@ -487,8 +492,7 @@ struct X86Operand : public MCParsedAsmOperand { static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg, unsigned Scale, SMLoc StartLoc, SMLoc EndLoc, - unsigned Size = 0, bool OffsetOf = false, - bool NeedSizeDir = false) { + unsigned Size = 0, bool NeedSizeDir = false) { // We should never just have a displacement, that should be parsed as an // absolute memory operand. assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!"); @@ -503,7 +507,6 @@ struct X86Operand : public MCParsedAsmOperand { Res->Mem.IndexReg = IndexReg; Res->Mem.Scale = Scale; Res->Mem.Size = Size; - Res->Mem.OffsetOf = OffsetOf; Res->Mem.NeedSizeDir = NeedSizeDir; return Res; } @@ -661,9 +664,10 @@ static unsigned getIntelMemOperandSize(StringRef OpStr) { X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, unsigned Size) { unsigned BaseReg = 0, IndexReg = 0, Scale = 1; - SMLoc Start = Parser.getTok().getLoc(), End; + const AsmToken &Tok = Parser.getTok(); + SMLoc Start = Tok.getLoc(), End; - const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext()); + const MCExpr *Disp = MCConstantExpr::Create(0, getContext()); // Parse [ BaseReg + Scale*IndexReg + Disp ] or [ symbol ] // Eat '[' @@ -682,9 +686,9 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, return X86Operand::CreateMem(Disp, Start, End, Size); } } else if (getLexer().is(AsmToken::Integer)) { - int64_t Val = Parser.getTok().getIntVal(); + int64_t Val = Tok.getIntVal(); Parser.Lex(); - SMLoc Loc = Parser.getTok().getLoc(); + SMLoc Loc = Tok.getLoc(); if (getLexer().is(AsmToken::RBrac)) { // Handle '[' number ']' Parser.Lex(); @@ -696,7 +700,7 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, } else if (getLexer().is(AsmToken::Star)) { // Handle '[' Scale*IndexReg ']' Parser.Lex(); - SMLoc IdxRegLoc = Parser.getTok().getLoc(); + SMLoc IdxRegLoc = Tok.getLoc(); if (ParseRegister(IndexReg, IdxRegLoc, End)) return ErrorOperand(IdxRegLoc, "Expected register"); Scale = Val; @@ -707,13 +711,13 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, if (getLexer().is(AsmToken::Plus) || getLexer().is(AsmToken::Minus)) { bool isPlus = getLexer().is(AsmToken::Plus); Parser.Lex(); - SMLoc PlusLoc = Parser.getTok().getLoc(); + SMLoc PlusLoc = Tok.getLoc(); if (getLexer().is(AsmToken::Integer)) { - int64_t Val = Parser.getTok().getIntVal(); + int64_t Val = Tok.getIntVal(); Parser.Lex(); if (getLexer().is(AsmToken::Star)) { Parser.Lex(); - SMLoc IdxRegLoc = Parser.getTok().getLoc(); + SMLoc IdxRegLoc = Tok.getLoc(); if (ParseRegister(IndexReg, IdxRegLoc, End)) return ErrorOperand(IdxRegLoc, "Expected register"); Scale = Val; @@ -724,7 +728,7 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, return ErrorOperand(PlusLoc, "unexpected token after +"); } else if (getLexer().is(AsmToken::Identifier)) { // This could be an index register or a displacement expression. - End = Parser.getTok().getLoc(); + End = Tok.getLoc(); if (!IndexReg) ParseRegister(IndexReg, Start, End); else if (getParser().ParseExpression(Disp, End)) return 0; @@ -734,11 +738,16 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, if (getLexer().isNot(AsmToken::RBrac)) if (getParser().ParseExpression(Disp, End)) return 0; - End = Parser.getTok().getLoc(); + End = Tok.getLoc(); if (getLexer().isNot(AsmToken::RBrac)) return ErrorOperand(End, "expected ']' token!"); Parser.Lex(); - End = Parser.getTok().getLoc(); + End = Tok.getLoc(); + + if (Tok.getString().startswith(".")) + Disp = ParseIntelDotOperator(Disp); + + End = Tok.getLoc(); // handle [-42] if (!BaseReg && !IndexReg) @@ -761,22 +770,10 @@ X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, SMLoc Start) { Parser.Lex(); } - // Parse the 'offset' operator. This operator is used to specify the - // location rather then the content of a variable. - bool OffsetOf = false; - if(isParsingInlineAsm() && (Tok.getString() == "offset" || - Tok.getString() == "OFFSET")) { - OffsetOf = true; - Parser.Lex(); // Eat offset. - } - - if (getLexer().is(AsmToken::LBrac)) { - assert (!OffsetOf && "Unexpected offset operator!"); + if (getLexer().is(AsmToken::LBrac)) return ParseIntelBracExpression(SegReg, Size); - } if (!ParseRegister(SegReg, Start, End)) { - assert (!OffsetOf && "Unexpected offset operator!"); // Handel SegReg : [ ... ] if (getLexer().isNot(AsmToken::Colon)) return ErrorOperand(Start, "Expected ':' token!"); @@ -801,12 +798,74 @@ X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, SMLoc Start) { NeedSizeDir = Size > 0; } } - return X86Operand::CreateMem(Disp, Start, End, Size, OffsetOf, NeedSizeDir); + if (!isParsingInlineAsm()) + return X86Operand::CreateMem(Disp, Start, End, Size); + else + // When parsing inline assembly we set the base register to a non-zero value + // as we don't know the actual value at this time. This is necessary to + // get the matching correct in some cases. + return X86Operand::CreateMem(/*SegReg*/0, Disp, /*BaseReg*/1, /*IndexReg*/0, + /*Scale*/1, Start, End, Size, NeedSizeDir); +} + +/// Parse the '.' operator. +const MCExpr *X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp) { + AsmToken Tok = *&Parser.getTok(); + + // Drop the '.'. + StringRef DotDispStr = Tok.getString().drop_front(1); + + Lex(); // Eat .field. + + // .Imm gets lexed as a real. + if (Tok.is(AsmToken::Real)) { + APInt DotDisp; + DotDispStr.getAsInteger(10, DotDisp); + uint64_t DotDispVal = DotDisp.getZExtValue(); + + // Special case zero dot displacement. + if (!DotDispVal) return Disp; + + // FIXME: Handle non-constant expressions. + if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp)) { + uint64_t OrigDispVal = OrigDisp->getValue(); + return MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext()); + } + } + return Disp; +} + +/// Parse the 'offset' operator. This operator is used to specify the +/// location rather then the content of a variable. +X86Operand *X86AsmParser::ParseIntelOffsetOfOperator(SMLoc Start) { + SMLoc OffsetOfLoc = Start; + Parser.Lex(); // Eat offset. + Start = Parser.getTok().getLoc(); + assert (Parser.getTok().is(AsmToken::Identifier) && "Expected an identifier"); + + SMLoc End; + const MCExpr *Val; + if (getParser().ParseExpression(Val, End)) + return 0; + + End = Parser.getTok().getLoc(); + + // The offset operator will have an 'r' constraint, thus we need to create + // register operand to ensure proper matching. Just pick a GPR based on + // the size of a pointer. + unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX; + return X86Operand::CreateReg(RegNo, Start, End, OffsetOfLoc); } X86Operand *X86AsmParser::ParseIntelOperand() { SMLoc Start = Parser.getTok().getLoc(), End; + // offset operator. + const AsmToken &Tok = Parser.getTok(); + if ((Tok.getString() == "offset" || Tok.getString() == "OFFSET") && + isParsingInlineAsm()) + return ParseIntelOffsetOfOperator(Start); + // immediate. if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Real) || getLexer().is(AsmToken::Minus)) { diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp index edad47312d..a4bd1147bc 100644 --- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp @@ -34,11 +34,9 @@ using namespace llvm; void X86ATTInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { - if (UseMarkup) - OS << "<reg:"; - OS << '%' << getRegisterName(RegNo); - if (UseMarkup) - OS << ">"; + OS << markup("<reg:") + << '%' << getRegisterName(RegNo) + << markup(">"); } void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, @@ -155,29 +153,21 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNo); if (Op.isReg()) { - if (UseMarkup) - O << "<reg:"; - O << '%' << getRegisterName(Op.getReg()); - if (UseMarkup) - O << ">"; + printRegName(O, Op.getReg()); } else if (Op.isImm()) { - if (UseMarkup) - O << "<imm:"; // Print X86 immediates as signed values. - O << '$' << (int64_t)Op.getImm(); - if (UseMarkup) - O << ">"; + O << markup("<imm:") + << '$' << (int64_t)Op.getImm() + << markup(">"); if (CommentStream && (Op.getImm() > 255 || Op.getImm() < -256)) *CommentStream << format("imm = 0x%" PRIX64 "\n", (uint64_t)Op.getImm()); } else { assert(Op.isExpr() && "unknown operand kind in printOperand"); - if (UseMarkup) - O << "<imm:"; - O << '$' << *Op.getExpr(); - if (UseMarkup) - O << ">"; + O << markup("<imm:") + << '$' << *Op.getExpr() + << markup(">"); } } @@ -188,8 +178,7 @@ void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op, const MCOperand &DispSpec = MI->getOperand(Op+3); const MCOperand &SegReg = MI->getOperand(Op+4); - if (UseMarkup) - O << "<mem:"; + O << markup("<mem:"); // If this has a segment register, print it. if (SegReg.getReg()) { @@ -216,17 +205,14 @@ void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op, printOperand(MI, Op+2, O); unsigned ScaleVal = MI->getOperand(Op+1).getImm(); if (ScaleVal != 1) { - O << ','; - if (UseMarkup) - O << "<imm:"; - O << ScaleVal; - if (UseMarkup) - O << ">"; + O << ',' + << markup("<imm:") + << ScaleVal + << markup(">"); } } O << ')'; } - if (UseMarkup) - O << ">"; + O << markup(">"); } diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 1dc4aa9989..1b5c4d9753 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -162,7 +162,7 @@ def : Proc<"core2", [FeatureSSSE3, FeatureCMPXCHG16B, FeatureSlowBTMem]>; def : Proc<"penryn", [FeatureSSE41, FeatureCMPXCHG16B, FeatureSlowBTMem]>; -def : AtomProc<"atom", [ProcIntelAtom, FeatureSSE3, FeatureCMPXCHG16B, +def : AtomProc<"atom", [ProcIntelAtom, FeatureSSSE3, FeatureCMPXCHG16B, FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP, FeatureSlowDivide]>; // "Arrandale" along with corei3 and corei5 diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index a6d2709b37..6786756c7f 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -88,6 +88,21 @@ def RetCC_X86_32_Fast : CallingConv<[ CCDelegateTo<RetCC_X86Common> ]>; +// Intel_OCL_BI return-value convention. +def RetCC_Intel_OCL_BI : CallingConv<[ + // Vector types are returned in XMM0,XMM1,XMMM2 and XMM3. + CCIfType<[f32, f64, v4i32, v2i64, v4f32, v2f64], + CCAssignToReg<[XMM0,XMM1,XMM2,XMM3]>>, + + // 256-bit FP vectors + // No more than 4 registers + CCIfType<[v8f32, v4f64, v8i32, v4i64], + CCAssignToReg<[YMM0,YMM1,YMM2,YMM3]>>, + + // i32, i64 in the standard way + CCDelegateTo<RetCC_X86Common> +]>; + // X86-64 C return-value convention. def RetCC_X86_64_C : CallingConv<[ // The X86-64 calling convention always returns FP values in XMM0. @@ -128,6 +143,10 @@ def RetCC_X86_64 : CallingConv<[ // This is the return-value convention used for the entire X86 backend. def RetCC_X86 : CallingConv<[ + + // Check if this is the Intel OpenCL built-ins calling convention + CCIfCC<"CallingConv::Intel_OCL_BI", CCDelegateTo<RetCC_Intel_OCL_BI>>, + CCIfSubtarget<"is64Bit()", CCDelegateTo<RetCC_X86_64>>, CCDelegateTo<RetCC_X86_32> ]>; @@ -235,6 +254,29 @@ def CC_X86_Win64_C : CallingConv<[ CCIfType<[f80], CCAssignToStack<0, 0>> ]>; +// X86-64 Intel OpenCL built-ins calling convention. +def CC_Intel_OCL_BI : CallingConv<[ + CCIfType<[i32], CCIfSubtarget<"isTargetWin32()", CCAssignToStack<4, 4>>>, + + CCIfType<[i32], CCIfSubtarget<"isTargetWin64()", CCAssignToReg<[ECX, EDX, R8D, R9D]>>>, + CCIfType<[i64], CCIfSubtarget<"isTargetWin64()", CCAssignToReg<[RCX, RDX, R8, R9 ]>>>, + + CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX]>>, + CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX]>>, + + // The SSE vector arguments are passed in XMM registers. + CCIfType<[f32, f64, v4i32, v2i64, v4f32, v2f64], + CCAssignToReg<[XMM0, XMM1, XMM2, XMM3]>>, + + // The 256-bit vector arguments are passed in YMM registers. + CCIfType<[v8f32, v4f64, v8i32, v4i64], + CCAssignToReg<[YMM0, YMM1, YMM2, YMM3]>>, + + CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_C>>, + CCDelegateTo<CC_X86_64_C> +]>; + + def CC_X86_64_GHC : CallingConv<[ // Promote i8/i16/i32 arguments to i64. CCIfType<[i8, i16, i32], CCPromoteToType<i64>>, @@ -324,7 +366,7 @@ def CC_X86_32_FastCall : CallingConv<[ CCIfNest<CCAssignToReg<[EAX]>>, // The first 2 integer arguments are passed in ECX/EDX - CCIfType<[i32], CCAssignToReg<[ECX, EDX]>>, + CCIfInReg<CCIfType<[i32], CCAssignToReg<[ECX, EDX]>>>, // Otherwise, same as everything else. CCDelegateTo<CC_X86_32_Common> @@ -408,6 +450,7 @@ def CC_X86_64 : CallingConv<[ // This is the argument convention used for the entire X86 backend. def CC_X86 : CallingConv<[ + CCIfCC<"CallingConv::Intel_OCL_BI", CCDelegateTo<CC_Intel_OCL_BI>>, CCIfSubtarget<"is64Bit()", CCDelegateTo<CC_X86_64>>, CCDelegateTo<CC_X86_32> ]>; @@ -426,3 +469,17 @@ def CSR_64EHRet : CalleeSavedRegs<(add RAX, RDX, CSR_64)>; def CSR_Win64 : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12, R13, R14, R15, (sequence "XMM%u", 6, 15))>; + + +// Standard C + YMM6-15 +def CSR_Win64_Intel_OCL_BI_AVX : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12, + R13, R14, R15, + (sequence "YMM%u", 6, 15))>; + +//Standard C + XMM 8-15 +def CSR_64_Intel_OCL_BI : CalleeSavedRegs<(add CSR_64, + (sequence "XMM%u", 8, 15))>; + +//Standard C + YMM 8-15 +def CSR_64_Intel_OCL_BI_AVX : CalleeSavedRegs<(add CSR_64, + (sequence "YMM%u", 8, 15))>; diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index c9301bb2c9..b8f7a6881f 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -282,8 +282,9 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) { bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM) { // Handle 'null' like i32/i64 0. - if (isa<ConstantPointerNull>(Val)) - Val = Constant::getNullValue(TD.getIntPtrType(Val->getContext())); + if (isa<ConstantPointerNull>(Val)) { + Val = Constant::getNullValue(TD.getIntPtrType(Val->getType())); + } // If this is a store of a simple constant, fold the constant into the store. if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) { @@ -908,8 +909,9 @@ bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1, if (Op0Reg == 0) return false; // Handle 'null' like i32/i64 0. - if (isa<ConstantPointerNull>(Op1)) - Op1 = Constant::getNullValue(TD.getIntPtrType(Op0->getContext())); + if (isa<ConstantPointerNull>(Op1)) { + Op1 = Constant::getNullValue(TD.getIntPtrType(Op0->getType())); + } // We have two options: compare with register or immediate. If the RHS of // the compare is an immediate that we can fold into this compare, use diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 4141068806..5bfb5054b0 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -315,11 +315,11 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF, if (CSI.empty()) return; std::vector<MachineMove> &Moves = MMI.getFrameMoves(); - const DataLayout *TD = TM.getDataLayout(); + const X86RegisterInfo *RegInfo = TM.getRegisterInfo(); bool HasFP = hasFP(MF); // Calculate amount of bytes used for return address storing. - int stackGrowth = -TM.getFrameLowering()->getStackSlotSize(); // @LOCALMOD + int stackGrowth = -RegInfo->getSlotSize(); // FIXME: This is dirty hack. The code itself is pretty mess right now. // It should be rewritten from scratch and generalized sometimes. @@ -717,9 +717,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // ELSE => DW_CFA_offset_extended std::vector<MachineMove> &Moves = MMI.getFrameMoves(); - const DataLayout *TD = MF.getTarget().getDataLayout(); uint64_t NumBytes = 0; - int stackGrowth = -TM.getFrameLowering()->getStackSlotSize(); // @LOCALMOD + int stackGrowth = -SlotSize; if (HasFP) { // Calculate required stack adjustment. diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b9348a2b90..5d3c5f0347 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -959,6 +959,13 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); + // As there is no 64-bit GPR available, we need build a special custom + // sequence to convert from v2i32 to v2f32. + if (!Subtarget->is64Bit()) + setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom); + setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom); setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom); @@ -1078,6 +1085,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal); setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal); + setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom); + setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, Legal); setOperationAction(ISD::SRL, MVT::v16i16, Custom); @@ -1268,7 +1279,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setTargetDAGCombine(ISD::ANY_EXTEND); setTargetDAGCombine(ISD::SIGN_EXTEND); setTargetDAGCombine(ISD::TRUNCATE); - setTargetDAGCombine(ISD::UINT_TO_FP); setTargetDAGCombine(ISD::SINT_TO_FP); setTargetDAGCombine(ISD::SETCC); if (Subtarget->is64Bit()) @@ -2204,16 +2214,14 @@ X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG, /// optimization is performed and it is required (FPDiff!=0). static SDValue EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF, - SDValue Chain, SDValue RetAddrFrIdx, - bool Is64Bit, int FPDiff, DebugLoc dl) { + SDValue Chain, SDValue RetAddrFrIdx, EVT PtrVT, + unsigned SlotSize, int FPDiff, DebugLoc dl) { // Store the return address to the appropriate stack slot. if (!FPDiff) return Chain; // Calculate the new stack slot for the return address. - int SlotSize = Is64Bit ? 8 : 4; int NewReturnAddrFI = MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, false); - EVT VT = Is64Bit ? MVT::i64 : MVT::i32; - SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT); + SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT); Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx, MachinePointerInfo::getFixedStack(NewReturnAddrFI), false, false, 0); @@ -2482,7 +2490,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, &MemOpChains2[0], MemOpChains2.size()); // Store the return address to the appropriate stack slot. - Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, Is64Bit, + Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, + getPointerTy(), RegInfo->getSlotSize(), FPDiff, dl); } @@ -2694,8 +2703,7 @@ X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize, unsigned StackAlignment = TFI.getStackAlignment(); uint64_t AlignMask = StackAlignment - 1; int64_t Offset = StackSize; - // @LOCALMOD - uint64_t SlotSize = Subtarget->is64Bit() ? 8 : 4; + unsigned SlotSize = RegInfo->getSlotSize(); if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) { // Number smaller than 12 so just add the difference. Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask)); @@ -3063,7 +3071,7 @@ SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const { if (ReturnAddrIndex == 0) { // Set up a frame object for the return address. - uint64_t SlotSize = Subtarget->is64Bit() ? 8 : 4; // @LOCALMOD + unsigned SlotSize = RegInfo->getSlotSize(); ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize, false); FuncInfo->setRAIndex(ReturnAddrIndex); @@ -6594,6 +6602,81 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) { getShuffleSHUFImmediate(SVOp), DAG); } +// Reduce a vector shuffle to zext. +SDValue +X86TargetLowering::lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const { + // PMOVZX is only available from SSE41. + if (!Subtarget->hasSSE41()) + return SDValue(); + + EVT VT = Op.getValueType(); + + // Only AVX2 support 256-bit vector integer extending. + if (!Subtarget->hasAVX2() && VT.is256BitVector()) + return SDValue(); + + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); + DebugLoc DL = Op.getDebugLoc(); + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + unsigned NumElems = VT.getVectorNumElements(); + + // Extending is an unary operation and the element type of the source vector + // won't be equal to or larger than i64. + if (V2.getOpcode() != ISD::UNDEF || !VT.isInteger() || + VT.getVectorElementType() == MVT::i64) + return SDValue(); + + // Find the expansion ratio, e.g. expanding from i8 to i32 has a ratio of 4. + unsigned Shift = 1; // Start from 2, i.e. 1 << 1. + while ((1 << Shift) < NumElems) { + if (SVOp->getMaskElt(1 << Shift) == 1) + break; + Shift += 1; + // The maximal ratio is 8, i.e. from i8 to i64. + if (Shift > 3) + return SDValue(); + } + + // Check the shuffle mask. + unsigned Mask = (1U << Shift) - 1; + for (unsigned i = 0; i != NumElems; ++i) { + int EltIdx = SVOp->getMaskElt(i); + if ((i & Mask) != 0 && EltIdx != -1) + return SDValue(); + if ((i & Mask) == 0 && (unsigned)EltIdx != (i >> Shift)) + return SDValue(); + } + + unsigned NBits = VT.getVectorElementType().getSizeInBits() << Shift; + EVT NeVT = EVT::getIntegerVT(*DAG.getContext(), NBits); + EVT NVT = EVT::getVectorVT(*DAG.getContext(), NeVT, NumElems >> Shift); + + if (!isTypeLegal(NVT)) + return SDValue(); + + // Simplify the operand as it's prepared to be fed into shuffle. + unsigned SignificantBits = NVT.getSizeInBits() >> Shift; + if (V1.getOpcode() == ISD::BITCAST && + V1.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR && + V1.getOperand(0).getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT && + V1.getOperand(0) + .getOperand(0).getValueType().getSizeInBits() == SignificantBits) { + // (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast x) + SDValue V = V1.getOperand(0).getOperand(0).getOperand(0); + ConstantSDNode *CIdx = + dyn_cast<ConstantSDNode>(V1.getOperand(0).getOperand(0).getOperand(1)); + // If it's foldable, i.e. normal load with single use, we will let code + // selection to fold it. Otherwise, we will short the conversion sequence. + if (CIdx && CIdx->getZExtValue() == 0 && + (!ISD::isNormalLoad(V.getNode()) || !V.hasOneUse())) + V1 = DAG.getNode(ISD::BITCAST, DL, V1.getValueType(), V); + } + + return DAG.getNode(ISD::BITCAST, DL, VT, + DAG.getNode(X86ISD::VZEXT, DL, NVT, V1)); +} + SDValue X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const { ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); @@ -6624,6 +6707,11 @@ X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const { return PromoteSplat(SVOp, DAG); } + // Check integer expanding shuffles. + SDValue NewOp = lowerVectorIntExtend(Op, DAG); + if (NewOp.getNode()) + return NewOp; + // If the shuffle can be profitably rewritten as a narrower shuffle, then // do it! if (VT == MVT::v8i16 || VT == MVT::v16i8 || @@ -8098,11 +8186,29 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op, return Sub; } +SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op, + SelectionDAG &DAG) const { + SDValue N0 = Op.getOperand(0); + EVT SVT = N0.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + + assert((SVT == MVT::v4i8 || SVT == MVT::v4i16 || + SVT == MVT::v8i8 || SVT == MVT::v8i16) && + "Custom UINT_TO_FP is not supported!"); + + EVT NVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, SVT.getVectorNumElements()); + return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), + DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N0)); +} + SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { SDValue N0 = Op.getOperand(0); DebugLoc dl = Op.getDebugLoc(); + if (Op.getValueType().isVector()) + return lowerUINT_TO_FP_vec(Op, DAG); + // Since UINT_TO_FP is legal (it's marked custom), dag combiner won't // optimize it to a SINT_TO_FP when the sign bit is known zero. Perform // the optimization here. @@ -8276,6 +8382,30 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned, bool IsReplace) co } } +SDValue X86TargetLowering::lowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const { + DebugLoc DL = Op.getDebugLoc(); + EVT VT = Op.getValueType(); + SDValue In = Op.getOperand(0); + EVT SVT = In.getValueType(); + + if (!VT.is256BitVector() || !SVT.is128BitVector() || + VT.getVectorNumElements() != SVT.getVectorNumElements()) + return SDValue(); + + assert(Subtarget->hasAVX() && "256-bit vector is observed without AVX!"); + + // AVX2 has better support of integer extending. + if (Subtarget->hasAVX2()) + return DAG.getNode(X86ISD::VZEXT, DL, VT, In); + + SDValue Lo = DAG.getNode(X86ISD::VZEXT, DL, MVT::v4i32, In); + static const int Mask[] = {4, 5, 6, 7, -1, -1, -1, -1}; + SDValue Hi = DAG.getNode(X86ISD::VZEXT, DL, MVT::v4i32, + DAG.getVectorShuffle(MVT::v8i16, DL, In, DAG.getUNDEF(MVT::v8i16), &Mask[0])); + + return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i32, Lo, Hi); +} + SDValue X86TargetLowering::lowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { DebugLoc DL = Op.getDebugLoc(); EVT VT = Op.getValueType(); @@ -10571,23 +10701,21 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); DebugLoc dl = Op.getDebugLoc(); + EVT PtrVT = getPointerTy(); if (Depth > 0) { SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); SDValue Offset = - // @LOCALMOD-BEGIN - DAG.getConstant(Subtarget->is64Bit() ? 8 : 4, - getPointerTy()); - // @LOCALMOD-END - return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), - DAG.getNode(ISD::ADD, dl, getPointerTy(), + DAG.getConstant(RegInfo->getSlotSize(), PtrVT); + return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), + DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset), MachinePointerInfo(), false, false, false, 0); } // Just load the return address. SDValue RetAddrFI = getReturnAddressFrameIndex(DAG); - return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), + return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI, MachinePointerInfo(), false, false, false, 0); } @@ -10609,10 +10737,7 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const { - // @LOCALMOD-START - int SlotSize = Subtarget->is64Bit() ? 8 : 4; - return DAG.getIntPtrConstant(2*SlotSize); - // @LOCALMOD-END + return DAG.getIntPtrConstant(2 * RegInfo->getSlotSize()); } SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { @@ -10620,17 +10745,17 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { SDValue Offset = Op.getOperand(1); SDValue Handler = Op.getOperand(2); DebugLoc dl = Op.getDebugLoc(); - // @LOCALMOD-START - bool Has64BitPtrs = Subtarget->has64BitPointers(); + // @LOCALMOD-START + bool Has64BitPointers = Subtarget->has64BitPointers(); SDValue Frame = DAG.getCopyFromReg(DAG.getEntryNode(), dl, - Has64BitPtrs ? X86::RBP : X86::EBP, + Has64BitPointers ? X86::RBP : X86::EBP, getPointerTy()); - unsigned StoreAddrReg = (Has64BitPtrs ? X86::RCX : X86::ECX); - int SlotSize = Subtarget->is64Bit() ? 8 : 4; - SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Frame, - DAG.getIntPtrConstant(SlotSize)); + unsigned StoreAddrReg = (Has64BitPointers ? X86::RCX : X86::ECX); // @LOCALMOD-END + + SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Frame, + DAG.getIntPtrConstant(RegInfo->getSlotSize())); StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StoreAddr, Offset); Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(), false, false, 0); @@ -11659,6 +11784,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG); case ISD::TRUNCATE: return lowerTRUNCATE(Op, DAG); + case ISD::ZERO_EXTEND: return lowerZERO_EXTEND(Op, DAG); case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG); case ISD::FP_EXTEND: return lowerFP_EXTEND(Op, DAG); @@ -11797,6 +11923,22 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, } return; } + case ISD::UINT_TO_FP: { + if (N->getOperand(0).getValueType() != MVT::v2i32 && + N->getValueType(0) != MVT::v2f32) + return; + SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64, + N->getOperand(0)); + SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), + MVT::f64); + SDValue VBias = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2f64, Bias, Bias); + SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64, ZExtIn, + DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, VBias)); + Or = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Or); + SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, Or, VBias); + Results.push_back(DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, Sub)); + return; + } case ISD::FP_ROUND: { SDValue V = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, N->getOperand(0)); Results.push_back(V); @@ -11999,6 +12141,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL"; case X86ISD::VSEXT_MOVL: return "X86ISD::VSEXT_MOVL"; case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD"; + case X86ISD::VZEXT: return "X86ISD::VZEXT"; + case X86ISD::VSEXT: return "X86ISD::VSEXT"; case X86ISD::VFPEXT: return "X86ISD::VFPEXT"; case X86ISD::VFPROUND: return "X86ISD::VFPROUND"; case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ"; @@ -16565,23 +16709,6 @@ static SDValue PerformBrCondCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } -static SDValue PerformUINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG) { - SDValue Op0 = N->getOperand(0); - EVT InVT = Op0->getValueType(0); - - // UINT_TO_FP(v4i8) -> SINT_TO_FP(ZEXT(v4i8 to v4i32)) - if (InVT == MVT::v8i8 || InVT == MVT::v4i8) { - DebugLoc dl = N->getDebugLoc(); - MVT DstVT = InVT == MVT::v4i8 ? MVT::v4i32 : MVT::v8i32; - SDValue P = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op0); - // Notice that we use SINT_TO_FP because we know that the high bits - // are zero and SINT_TO_FP is better supported by the hardware. - return DAG.getNode(ISD::SINT_TO_FP, dl, N->getValueType(0), P); - } - - return SDValue(); -} - static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, const X86TargetLowering *XTLI) { SDValue Op0 = N->getOperand(0); @@ -16727,6 +16854,21 @@ static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG, return OptimizeConditionalInDecrement(N, DAG); } +/// performVZEXTCombine - Performs build vector combines +static SDValue performVZEXTCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget *Subtarget) { + // (vzext (bitcast (vzext (x)) -> (vzext x) + SDValue In = N->getOperand(0); + while (In.getOpcode() == ISD::BITCAST) + In = In.getOperand(0); + + if (In.getOpcode() != X86ISD::VZEXT) + return SDValue(); + + return DAG.getNode(X86ISD::VZEXT, N->getDebugLoc(), N->getValueType(0), In.getOperand(0)); +} + SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -16749,7 +16891,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::XOR: return PerformXorCombine(N, DAG, DCI, Subtarget); case ISD::LOAD: return PerformLOADCombine(N, DAG, DCI, Subtarget); case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget); - case ISD::UINT_TO_FP: return PerformUINT_TO_FPCombine(N, DAG); case ISD::SINT_TO_FP: return PerformSINT_TO_FPCombine(N, DAG, this); case ISD::FADD: return PerformFADDCombine(N, DAG, Subtarget); case ISD::FSUB: return PerformFSUBCombine(N, DAG, Subtarget); @@ -16767,6 +16908,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::SETCC: return PerformISDSETCCCombine(N, DAG); case X86ISD::SETCC: return PerformSETCCCombine(N, DAG, DCI, Subtarget); case X86ISD::BRCOND: return PerformBrCondCombine(N, DAG, DCI, Subtarget); + case X86ISD::VZEXT: return performVZEXTCombine(N, DAG, DCI, Subtarget); case X86ISD::SHUFP: // Handle all target specific shuffles case X86ISD::PALIGN: case X86ISD::UNPCKH: diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index f6f9e584af..8d8f3f5161 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -243,6 +243,12 @@ namespace llvm { // VSEXT_MOVL - Vector move low and sign extend. VSEXT_MOVL, + // VZEXT - Vector integer zero-extend. + VZEXT, + + // VSEXT - Vector integer signed-extend. + VSEXT, + // VFPEXT - Vector FP extend. VFPEXT, @@ -803,7 +809,9 @@ namespace llvm { SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const; SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG) const; SDValue lowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; @@ -847,6 +855,8 @@ namespace llvm { SDValue LowerVectorAllZeroTest(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const; + virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 46281efa57..73ba0011df 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -90,6 +90,14 @@ def X86vsmovl : SDNode<"X86ISD::VSEXT_MOVL", def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def X86vzext : SDNode<"X86ISD::VZEXT", + SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, + SDTCisInt<0>, SDTCisInt<1>]>>; + +def X86vsext : SDNode<"X86ISD::VSEXT", + SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, + SDTCisInt<0>, SDTCisInt<1>]>>; + def X86vfpext : SDNode<"X86ISD::VFPEXT", SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, SDTCisFP<0>, SDTCisFP<1>]>>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index cc1291a8a0..e9c7f3e7f1 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -5841,6 +5841,85 @@ let Predicates = [UseSSE41] in { (PMOVZXBQrm addr:$src)>; } +let Predicates = [HasAVX2] in { + def : Pat<(v16i16 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBWYrr VR128:$src)>; + def : Pat<(v8i32 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBDYrr VR128:$src)>; + def : Pat<(v4i64 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBQYrr VR128:$src)>; + + def : Pat<(v8i32 (X86vzext (v8i16 VR128:$src))), (VPMOVZXWDYrr VR128:$src)>; + def : Pat<(v4i64 (X86vzext (v8i16 VR128:$src))), (VPMOVZXWQYrr VR128:$src)>; + + def : Pat<(v4i64 (X86vzext (v4i32 VR128:$src))), (VPMOVZXDQYrr VR128:$src)>; +} + +let Predicates = [HasAVX] in { + def : Pat<(v8i16 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBWrr VR128:$src)>; + def : Pat<(v4i32 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBDrr VR128:$src)>; + def : Pat<(v2i64 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBQrr VR128:$src)>; + + def : Pat<(v4i32 (X86vzext (v8i16 VR128:$src))), (VPMOVZXWDrr VR128:$src)>; + def : Pat<(v2i64 (X86vzext (v8i16 VR128:$src))), (VPMOVZXWQrr VR128:$src)>; + + def : Pat<(v2i64 (X86vzext (v4i32 VR128:$src))), (VPMOVZXDQrr VR128:$src)>; + + def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), + (VPMOVZXBWrm addr:$src)>; + def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))), + (VPMOVZXBWrm addr:$src)>; + def : Pat<(v4i32 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), + (VPMOVZXBDrm addr:$src)>; + def : Pat<(v2i64 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector (loadi16_anyext addr:$src))))))), + (VPMOVZXBQrm addr:$src)>; + + def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), + (VPMOVZXWDrm addr:$src)>; + def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))), + (VPMOVZXWDrm addr:$src)>; + def : Pat<(v2i64 (X86vzext (v8i16 (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), + (VPMOVZXWQrm addr:$src)>; + + def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), + (VPMOVZXDQrm addr:$src)>; + def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))), + (VPMOVZXDQrm addr:$src)>; + def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (X86vzload addr:$src)))))), + (VPMOVZXDQrm addr:$src)>; +} + +let Predicates = [UseSSE41] in { + def : Pat<(v8i16 (X86vzext (v16i8 VR128:$src))), (PMOVZXBWrr VR128:$src)>; + def : Pat<(v4i32 (X86vzext (v16i8 VR128:$src))), (PMOVZXBDrr VR128:$src)>; + def : Pat<(v2i64 (X86vzext (v16i8 VR128:$src))), (PMOVZXBQrr VR128:$src)>; + + def : Pat<(v4i32 (X86vzext (v8i16 VR128:$src))), (PMOVZXWDrr VR128:$src)>; + def : Pat<(v2i64 (X86vzext (v8i16 VR128:$src))), (PMOVZXWQrr VR128:$src)>; + + def : Pat<(v2i64 (X86vzext (v4i32 VR128:$src))), (PMOVZXDQrr VR128:$src)>; + + def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), + (PMOVZXBWrm addr:$src)>; + def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))), + (PMOVZXBWrm addr:$src)>; + def : Pat<(v4i32 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), + (PMOVZXBDrm addr:$src)>; + def : Pat<(v2i64 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector (loadi16_anyext addr:$src))))))), + (PMOVZXBQrm addr:$src)>; + + def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), + (PMOVZXWDrm addr:$src)>; + def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))), + (PMOVZXWDrm addr:$src)>; + def : Pat<(v2i64 (X86vzext (v8i16 (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), + (PMOVZXWQrm addr:$src)>; + + def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), + (PMOVZXDQrm addr:$src)>; + def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))), + (PMOVZXDQrm addr:$src)>; + def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (X86vzload addr:$src)))))), + (PMOVZXDQrm addr:$src)>; +} + //===----------------------------------------------------------------------===// // SSE4.1 - Extract Instructions //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index f7a17cd7c1..9054345d35 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -234,15 +234,26 @@ const uint16_t * X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { bool callsEHReturn = false; bool ghcCall = false; + bool oclBiCall = false; + bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX(); if (MF) { callsEHReturn = MF->getMMI().callsEHReturn(); const Function *F = MF->getFunction(); ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false); + oclBiCall = (F ? F->getCallingConv() == CallingConv::Intel_OCL_BI : false); } if (ghcCall) return CSR_NoRegs_SaveList; + if (oclBiCall) { + if (HasAVX && IsWin64) + return CSR_Win64_Intel_OCL_BI_AVX_SaveList; + if (HasAVX && Is64Bit) + return CSR_64_Intel_OCL_BI_AVX_SaveList; + if (!HasAVX && !IsWin64 && Is64Bit) + return CSR_64_Intel_OCL_BI_SaveList; + } if (Is64Bit) { if (IsWin64) return CSR_Win64_SaveList; @@ -257,6 +268,16 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { const uint32_t* X86RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { + bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX(); + + if (CC == CallingConv::Intel_OCL_BI) { + if (IsWin64 && HasAVX) + return CSR_Win64_Intel_OCL_BI_AVX_RegMask; + if (Is64Bit && HasAVX) + return CSR_64_Intel_OCL_BI_AVX_RegMask; + if (!HasAVX && !IsWin64 && Is64Bit) + return CSR_64_Intel_OCL_BI_RegMask; + } if (CC == CallingConv::GHC) return CSR_NoRegs_RegMask; if (!Is64Bit) diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp index a102935b4b..6e53e7ac93 100644 --- a/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -62,7 +62,8 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, if (const char *bzeroEntry = V && V->isNullValue() ? Subtarget->getBZeroEntry() : 0) { EVT IntPtr = TLI.getPointerTy(); - Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext()); + unsigned AS = DstPtrInfo.getAddrSpace(); + Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext(), AS); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = Dst; diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index c804195f27..e31bedf6de 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -51,7 +51,7 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT, TSInfo(*this), TLInfo(*this), JITInfo(*this), - STTI(&TLInfo) { + STTI(&TLInfo), VTTI(&TLInfo) { } void X86_64TargetMachine::anchor() { } @@ -71,7 +71,7 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT, TSInfo(*this), TLInfo(*this), JITInfo(*this), - STTI(&TLInfo) { + STTI(&TLInfo), VTTI(&TLInfo){ } /// X86TargetMachine ctor - Create an X86 target. diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 9e7816e21f..eaa745ba9b 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -477,7 +477,8 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const { } // Lower to a call to __misaligned_load(BasePtr). - Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext()); + unsigned AS = LD->getAddressSpace(); + Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext(), AS); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; @@ -536,7 +537,8 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const } // Lower to a call to __misaligned_store(BasePtr, Value). - Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext()); + unsigned AS = ST->getAddressSpace(); + Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext(), AS); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp index 0b7e3e10d4..d5a932c518 100644 --- a/lib/Target/XCore/XCoreTargetMachine.cpp +++ b/lib/Target/XCore/XCoreTargetMachine.cpp @@ -32,7 +32,7 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT, InstrInfo(), FrameLowering(Subtarget), TLInfo(*this), - TSInfo(*this), STTI(&TLInfo) { + TSInfo(*this), STTI(&TLInfo), VTTI(&TLInfo) { } namespace { diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 678189b3d6..3d5657fe6a 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -1500,7 +1500,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, unsigned TypeSize = TD->getTypeAllocSize(FieldTy); if (StructType *ST = dyn_cast<StructType>(FieldTy)) TypeSize = TD->getStructLayout(ST)->getSizeInBytes(); - Type *IntPtrTy = TD->getIntPtrType(CI->getContext()); + Type *IntPtrTy = TD->getIntPtrType(GV->getType()); Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy, ConstantInt::get(IntPtrTy, TypeSize), NElems, 0, @@ -1730,7 +1730,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, // If this is a fixed size array, transform the Malloc to be an alloc of // structs. malloc [100 x struct],1 -> malloc struct, 100 if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI, TLI))) { - Type *IntPtrTy = TD->getIntPtrType(CI->getContext()); + Type *IntPtrTy = TD->getIntPtrType(GV->getType()); unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes(); Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize); Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements()); diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index 44283ddce7..1c6477c022 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -206,9 +206,8 @@ bool FunctionComparator::isEquivalentType(Type *Ty1, return true; if (Ty1->getTypeID() != Ty2->getTypeID()) { if (TD) { - LLVMContext &Ctx = Ty1->getContext(); - if (isa<PointerType>(Ty1) && Ty2 == TD->getIntPtrType(Ctx)) return true; - if (isa<PointerType>(Ty2) && Ty1 == TD->getIntPtrType(Ctx)) return true; + if (isa<PointerType>(Ty1) && Ty2 == TD->getIntPtrType(Ty1)) return true; + if (isa<PointerType>(Ty2) && Ty1 == TD->getIntPtrType(Ty2)) return true; } return false; } diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h index 7467eca7ab..0e765f7aaa 100644 --- a/lib/Transforms/InstCombine/InstCombine.h +++ b/lib/Transforms/InstCombine/InstCombine.h @@ -208,7 +208,7 @@ private: bool ShouldChangeType(Type *From, Type *To) const; Value *dyn_castNegVal(Value *V) const; Value *dyn_castFNegVal(Value *V) const; - Type *FindElementAtOffset(Type *Ty, int64_t Offset, + Type *FindElementAtOffset(Type *Ty, int64_t Offset, Type *IntPtrTy, SmallVectorImpl<Value*> &NewIndices); Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI); diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 4f4c388a92..0958842d08 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -996,9 +996,9 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { // Conversion is ok if changing from one pointer type to another or from // a pointer to an integer of the same size. !((OldRetTy->isPointerTy() || !TD || - OldRetTy == TD->getIntPtrType(Caller->getContext())) && + OldRetTy == TD->getIntPtrType(NewRetTy)) && (NewRetTy->isPointerTy() || !TD || - NewRetTy == TD->getIntPtrType(Caller->getContext())))) + NewRetTy == TD->getIntPtrType(OldRetTy)))) return false; // Cannot transform this return value. if (!Caller->use_empty() && @@ -1057,11 +1057,13 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { // Converting from one pointer type to another or between a pointer and an // integer of the same size is safe even if we do not have a body. + // FIXME: Not sure what to do here, so setting AS to 0. + // How can the AS for a function call be outside the default? bool isConvertible = ActTy == ParamTy || (TD && ((ParamTy->isPointerTy() || - ParamTy == TD->getIntPtrType(Caller->getContext())) && + ParamTy == TD->getIntPtrType(ActTy)) && (ActTy->isPointerTy() || - ActTy == TD->getIntPtrType(Caller->getContext())))); + ActTy == TD->getIntPtrType(ParamTy)))); if (Callee->isDeclaration() && !isConvertible) return false; } diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index f3f3f8f585..119d2f5c99 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -30,7 +30,7 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale, Scale = 0; return ConstantInt::get(Val->getType(), 0); } - + if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) { // Cannot look past anything that might overflow. OverflowingBinaryOperator *OBI = dyn_cast<OverflowingBinaryOperator>(Val); @@ -47,19 +47,19 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale, Offset = 0; return I->getOperand(0); } - + if (I->getOpcode() == Instruction::Mul) { // This value is scaled by 'RHS'. Scale = RHS->getZExtValue(); Offset = 0; return I->getOperand(0); } - + if (I->getOpcode() == Instruction::Add) { - // We have X+C. Check to see if we really have (X*C2)+C1, + // We have X+C. Check to see if we really have (X*C2)+C1, // where C1 is divisible by C2. unsigned SubScale; - Value *SubVal = + Value *SubVal = DecomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset); Offset += RHS->getZExtValue(); Scale = SubScale; @@ -82,7 +82,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, if (!TD) return 0; PointerType *PTy = cast<PointerType>(CI.getType()); - + BuilderTy AllocaBuilder(*Builder); AllocaBuilder.SetInsertPoint(AI.getParent(), &AI); @@ -110,7 +110,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, uint64_t ArrayOffset; Value *NumElements = // See if the array size is a decomposable linear expr. DecomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset); - + // If we can now satisfy the modulus, by using a non-1 scale, we really can // do the xform. if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 || @@ -125,17 +125,17 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, // Insert before the alloca, not before the cast. Amt = AllocaBuilder.CreateMul(Amt, NumElements); } - + if (uint64_t Offset = (AllocElTySize*ArrayOffset)/CastElTySize) { Value *Off = ConstantInt::get(AI.getArraySize()->getType(), Offset, true); Amt = AllocaBuilder.CreateAdd(Amt, Off); } - + AllocaInst *New = AllocaBuilder.CreateAlloca(CastElTy, Amt); New->setAlignment(AI.getAlignment()); New->takeName(&AI); - + // If the allocation has multiple real uses, insert a cast and change all // things that used it to use the new cast. This will also hack on CI, but it // will die soon. @@ -148,10 +148,10 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, return ReplaceInstUsesWith(CI, New); } -/// EvaluateInDifferentType - Given an expression that +/// EvaluateInDifferentType - Given an expression that /// CanEvaluateTruncated or CanEvaluateSExtd returns true for, actually /// insert the code to evaluate the expression. -Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, +Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned) { if (Constant *C = dyn_cast<Constant>(V)) { C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/); @@ -181,7 +181,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned); Res = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS); break; - } + } case Instruction::Trunc: case Instruction::ZExt: case Instruction::SExt: @@ -190,7 +190,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, // new. if (I->getOperand(0)->getType() == Ty) return I->getOperand(0); - + // Otherwise, must be the same type of cast, so just reinsert a new one. // This also handles the case of zext(trunc(x)) -> zext(x). Res = CastInst::CreateIntegerCast(I->getOperand(0), Ty, @@ -212,11 +212,11 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, Res = NPN; break; } - default: + default: // TODO: Can handle more cases here. llvm_unreachable("Unreachable!"); } - + Res->takeName(I); return InsertNewInstWith(Res, *I); } @@ -224,7 +224,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, /// This function is a wrapper around CastInst::isEliminableCastPair. It /// simply extracts arguments and returns what that function returns. -static Instruction::CastOps +static Instruction::CastOps isEliminableCastPair( const CastInst *CI, ///< The first cast instruction unsigned opcode, ///< The opcode of the second cast instruction @@ -238,19 +238,18 @@ isEliminableCastPair( // Get the opcodes of the two Cast instructions Instruction::CastOps firstOp = Instruction::CastOps(CI->getOpcode()); Instruction::CastOps secondOp = Instruction::CastOps(opcode); - unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy, DstTy, - TD ? TD->getIntPtrType(CI->getContext()) : 0); - + TD ? TD->getIntPtrType(DstTy) : 0); + // We don't want to form an inttoptr or ptrtoint that converts to an integer // type that differs from the pointer size. if ((Res == Instruction::IntToPtr && - (!TD || SrcTy != TD->getIntPtrType(CI->getContext()))) || + (!TD || SrcTy != TD->getIntPtrType(DstTy))) || (Res == Instruction::PtrToInt && - (!TD || DstTy != TD->getIntPtrType(CI->getContext())))) + (!TD || DstTy != TD->getIntPtrType(SrcTy)))) Res = 0; - + return Instruction::CastOps(Res); } @@ -262,18 +261,18 @@ bool InstCombiner::ShouldOptimizeCast(Instruction::CastOps opc, const Value *V, Type *Ty) { // Noop casts and casts of constants should be eliminated trivially. if (V->getType() == Ty || isa<Constant>(V)) return false; - + // If this is another cast that can be eliminated, we prefer to have it // eliminated. if (const CastInst *CI = dyn_cast<CastInst>(V)) if (isEliminableCastPair(CI, opc, Ty, TD)) return false; - + // If this is a vector sext from a compare, then we don't want to break the // idiom where each element of the extended vector is either zero or all ones. if (opc == Instruction::SExt && isa<CmpInst>(V) && Ty->isVectorTy()) return false; - + return true; } @@ -285,7 +284,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) { // Many cases of "cast of a cast" are eliminable. If it's eliminable we just // eliminate it now. if (CastInst *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast - if (Instruction::CastOps opc = + if (Instruction::CastOps opc = isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), TD)) { // The first cast (CSrc) is eliminable so we need to fix up or replace // the second cast (CI). CSrc will then have a good chance of being dead. @@ -308,7 +307,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) { if (Instruction *NV = FoldOpIntoPhi(CI)) return NV; } - + return 0; } @@ -327,15 +326,15 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty) { // We can always evaluate constants in another type. if (isa<Constant>(V)) return true; - + Instruction *I = dyn_cast<Instruction>(V); if (!I) return false; - + Type *OrigTy = V->getType(); - + // If this is an extension from the dest type, we can eliminate it, even if it // has multiple uses. - if ((isa<ZExtInst>(I) || isa<SExtInst>(I)) && + if ((isa<ZExtInst>(I) || isa<SExtInst>(I)) && I->getOperand(0)->getType() == Ty) return true; @@ -420,29 +419,29 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty) { // TODO: Can handle more cases here. break; } - + return false; } Instruction *InstCombiner::visitTrunc(TruncInst &CI) { if (Instruction *Result = commonCastTransforms(CI)) return Result; - - // See if we can simplify any instructions used by the input whose sole + + // See if we can simplify any instructions used by the input whose sole // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(CI)) return &CI; - + Value *Src = CI.getOperand(0); Type *DestTy = CI.getType(), *SrcTy = Src->getType(); - + // Attempt to truncate the entire input expression tree to the destination // type. Only do this if the dest type is a simple type, don't convert the // expression tree to something weird like i93 unless the source is also // strange. if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) && CanEvaluateTruncated(Src, DestTy)) { - + // If this cast is a truncate, evaluting in a different type always // eliminates the cast, so it is always a win. DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type" @@ -459,7 +458,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { Value *Zero = Constant::getNullValue(Src->getType()); return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero); } - + // Transform trunc(lshr (zext A), Cst) to eliminate one type conversion. Value *A = 0; ConstantInt *Cst = 0; if (Src->hasOneUse() && @@ -469,7 +468,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { // ASize < MidSize and MidSize > ResultSize, but don't know the relation // between ASize and ResultSize. unsigned ASize = A->getType()->getPrimitiveSizeInBits(); - + // If the shift amount is larger than the size of A, then the result is // known to be zero because all the input bits got shifted out. if (Cst->getZExtValue() >= ASize) @@ -482,7 +481,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { Shift->takeName(Src); return CastInst::CreateIntegerCast(Shift, CI.getType(), false); } - + // Transform "trunc (and X, cst)" -> "and (trunc X), cst" so long as the dest // type isn't non-native. if (Src->hasOneUse() && isa<IntegerType>(Src->getType()) && @@ -505,7 +504,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, // cast to integer to avoid the comparison. if (ConstantInt *Op1C = dyn_cast<ConstantInt>(ICI->getOperand(1))) { const APInt &Op1CV = Op1C->getValue(); - + // zext (x <s 0) to i32 --> x>>u31 true if signbit set. // zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear. if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) || @@ -535,14 +534,14 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set. // zext (X != 1) to i32 --> X^1 iff X has only the low bit set. // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set. - if ((Op1CV == 0 || Op1CV.isPowerOf2()) && + if ((Op1CV == 0 || Op1CV.isPowerOf2()) && // This only works for EQ and NE ICI->isEquality()) { // If Op1C some other power of two, convert: uint32_t BitWidth = Op1C->getType()->getBitWidth(); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); ComputeMaskedBits(ICI->getOperand(0), KnownZero, KnownOne); - + APInt KnownZeroMask(~KnownZero); if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1? if (!DoXform) return ICI; @@ -556,7 +555,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, Res = ConstantExpr::getZExt(Res, CI.getType()); return ReplaceInstUsesWith(CI, Res); } - + uint32_t ShiftAmt = KnownZeroMask.logBase2(); Value *In = ICI->getOperand(0); if (ShiftAmt) { @@ -565,12 +564,12 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, In = Builder->CreateLShr(In, ConstantInt::get(In->getType(),ShiftAmt), In->getName()+".lobit"); } - + if ((Op1CV != 0) == isNE) { // Toggle the low bit. Constant *One = ConstantInt::get(In->getType(), 1); In = Builder->CreateXor(In, One); } - + if (CI.getType() == In->getType()) return ReplaceInstUsesWith(CI, In); return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/); @@ -643,19 +642,19 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) { BitsToClear = 0; if (isa<Constant>(V)) return true; - + Instruction *I = dyn_cast<Instruction>(V); if (!I) return false; - + // If the input is a truncate from the destination type, we can trivially // eliminate it. if (isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty) return true; - + // We can't extend or shrink something that has multiple uses: doing so would // require duplicating the instruction in general, which isn't profitable. if (!I->hasOneUse()) return false; - + unsigned Opc = I->getOpcode(), Tmp; switch (Opc) { case Instruction::ZExt: // zext(zext(x)) -> zext(x). @@ -675,7 +674,7 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) { // These can all be promoted if neither operand has 'bits to clear'. if (BitsToClear == 0 && Tmp == 0) return true; - + // If the operation is an AND/OR/XOR and the bits to clear are zero in the // other side, BitsToClear is ok. if (Tmp == 0 && @@ -688,10 +687,10 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) { APInt::getHighBitsSet(VSize, BitsToClear))) return true; } - + // Otherwise, we don't know how to analyze this BitsToClear case yet. return false; - + case Instruction::LShr: // We can promote lshr(x, cst) if we can promote x. This requires the // ultimate 'and' to clear out the high zero bits we're clearing out though. @@ -713,7 +712,7 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) { Tmp != BitsToClear) return false; return true; - + case Instruction::PHI: { // We can change a phi if we can change all operands. Note that we never // get into trouble with cyclic PHIs here because we only consider @@ -740,44 +739,44 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { // eliminated before we try to optimize this zext. if (CI.hasOneUse() && isa<TruncInst>(CI.use_back())) return 0; - + // If one of the common conversion will work, do it. if (Instruction *Result = commonCastTransforms(CI)) return Result; - // See if we can simplify any instructions used by the input whose sole + // See if we can simplify any instructions used by the input whose sole // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(CI)) return &CI; - + Value *Src = CI.getOperand(0); Type *SrcTy = Src->getType(), *DestTy = CI.getType(); - + // Attempt to extend the entire input expression tree to the destination // type. Only do this if the dest type is a simple type, don't convert the // expression tree to something weird like i93 unless the source is also // strange. unsigned BitsToClear; if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) && - CanEvaluateZExtd(Src, DestTy, BitsToClear)) { + CanEvaluateZExtd(Src, DestTy, BitsToClear)) { assert(BitsToClear < SrcTy->getScalarSizeInBits() && "Unreasonable BitsToClear"); - + // Okay, we can transform this! Insert the new expression now. DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type" " to avoid zero extend: " << CI); Value *Res = EvaluateInDifferentType(Src, DestTy, false); assert(Res->getType() == DestTy); - + uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits()-BitsToClear; uint32_t DestBitSize = DestTy->getScalarSizeInBits(); - + // If the high bits are already filled with zeros, just replace this // cast with the result. if (MaskedValueIsZero(Res, APInt::getHighBitsSet(DestBitSize, DestBitSize-SrcBitsKept))) return ReplaceInstUsesWith(CI, Res); - + // We need to emit an AND to clear the high bits. Constant *C = ConstantInt::get(Res->getType(), APInt::getLowBitsSet(DestBitSize, SrcBitsKept)); @@ -789,7 +788,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { // 'and' which will be much cheaper than the pair of casts. if (TruncInst *CSrc = dyn_cast<TruncInst>(Src)) { // A->B->C cast // TODO: Subsume this into EvaluateInDifferentType. - + // Get the sizes of the types involved. We know that the intermediate type // will be smaller than A or C, but don't know the relation between A and C. Value *A = CSrc->getOperand(0); @@ -806,7 +805,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { Value *And = Builder->CreateAnd(A, AndConst, CSrc->getName()+".mask"); return new ZExtInst(And, CI.getType()); } - + if (SrcSize == DstSize) { APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize)); return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(), @@ -815,7 +814,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { if (SrcSize > DstSize) { Value *Trunc = Builder->CreateTrunc(A, CI.getType()); APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize)); - return BinaryOperator::CreateAnd(Trunc, + return BinaryOperator::CreateAnd(Trunc, ConstantInt::get(Trunc->getType(), AndValue)); } @@ -873,7 +872,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { Value *New = Builder->CreateZExt(X, CI.getType()); return BinaryOperator::CreateXor(New, ConstantInt::get(CI.getType(), 1)); } - + return 0; } @@ -986,14 +985,14 @@ static bool CanEvaluateSExtd(Value *V, Type *Ty) { // If this is a constant, it can be trivially promoted. if (isa<Constant>(V)) return true; - + Instruction *I = dyn_cast<Instruction>(V); if (!I) return false; - + // If this is a truncate from the dest type, we can trivially eliminate it. if (isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty) return true; - + // We can't extend or shrink something that has multiple uses: doing so would // require duplicating the instruction in general, which isn't profitable. if (!I->hasOneUse()) return false; @@ -1012,14 +1011,14 @@ static bool CanEvaluateSExtd(Value *V, Type *Ty) { // These operators can all arbitrarily be extended if their inputs can. return CanEvaluateSExtd(I->getOperand(0), Ty) && CanEvaluateSExtd(I->getOperand(1), Ty); - + //case Instruction::Shl: TODO //case Instruction::LShr: TODO - + case Instruction::Select: return CanEvaluateSExtd(I->getOperand(1), Ty) && CanEvaluateSExtd(I->getOperand(2), Ty); - + case Instruction::PHI: { // We can change a phi if we can change all operands. Note that we never // get into trouble with cyclic PHIs here because we only consider @@ -1033,7 +1032,7 @@ static bool CanEvaluateSExtd(Value *V, Type *Ty) { // TODO: Can handle more cases here. break; } - + return false; } @@ -1042,15 +1041,15 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { // eliminated before we try to optimize this zext. if (CI.hasOneUse() && isa<TruncInst>(CI.use_back())) return 0; - + if (Instruction *I = commonCastTransforms(CI)) return I; - - // See if we can simplify any instructions used by the input whose sole + + // See if we can simplify any instructions used by the input whose sole // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(CI)) return &CI; - + Value *Src = CI.getOperand(0); Type *SrcTy = Src->getType(), *DestTy = CI.getType(); @@ -1073,7 +1072,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { // cast with the result. if (ComputeNumSignBits(Res) > DestBitSize - SrcBitSize) return ReplaceInstUsesWith(CI, Res); - + // We need to emit a shl + ashr to do the sign extend. Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize); return BinaryOperator::CreateAShr(Builder->CreateShl(Res, ShAmt, "sext"), @@ -1086,7 +1085,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { if (TI->hasOneUse() && TI->getOperand(0)->getType() == DestTy) { uint32_t SrcBitSize = SrcTy->getScalarSizeInBits(); uint32_t DestBitSize = DestTy->getScalarSizeInBits(); - + // We need to emit a shl + ashr to do the sign extend. Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize); Value *Res = Builder->CreateShl(TI->getOperand(0), ShAmt, "sext"); @@ -1122,7 +1121,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { A = Builder->CreateShl(A, ShAmtV, CI.getName()); return BinaryOperator::CreateAShr(A, ShAmtV); } - + return 0; } @@ -1144,7 +1143,7 @@ static Value *LookThroughFPExtensions(Value *V) { if (Instruction *I = dyn_cast<Instruction>(V)) if (I->getOpcode() == Instruction::FPExt) return LookThroughFPExtensions(I->getOperand(0)); - + // If this value is a constant, return the constant in the smallest FP type // that can accurately represent it. This allows us to turn // (float)((double)X+2.0) into x+2.0f. @@ -1163,14 +1162,14 @@ static Value *LookThroughFPExtensions(Value *V) { return V; // Don't try to shrink to various long double types. } - + return V; } Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { if (Instruction *I = commonCastTransforms(CI)) return I; - + // If we have fptrunc(fadd (fpextend x), (fpextend y)), where x and y are // smaller than the destination type, we can eliminate the truncate by doing // the add as the smaller type. This applies to fadd/fsub/fmul/fdiv as well @@ -1187,7 +1186,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { Type *SrcTy = OpI->getType(); Value *LHSTrunc = LookThroughFPExtensions(OpI->getOperand(0)); Value *RHSTrunc = LookThroughFPExtensions(OpI->getOperand(1)); - if (LHSTrunc->getType() != SrcTy && + if (LHSTrunc->getType() != SrcTy && RHSTrunc->getType() != SrcTy) { unsigned DstSize = CI.getType()->getScalarSizeInBits(); // If the source types were both smaller than the destination type of @@ -1199,10 +1198,10 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { return BinaryOperator::Create(OpI->getOpcode(), LHSTrunc, RHSTrunc); } } - break; + break; } } - + // Fold (fptrunc (sqrt (fpext x))) -> (sqrtf x) CallInst *Call = dyn_cast<CallInst>(CI.getOperand(0)); if (Call && Call->getCalledFunction() && TLI->has(LibFunc::sqrtf) && @@ -1217,7 +1216,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { Arg->getOperand(0)->getType()->isFloatTy()) { Function *Callee = Call->getCalledFunction(); Module *M = CI.getParent()->getParent()->getParent(); - Constant *SqrtfFunc = M->getOrInsertFunction("sqrtf", + Constant *SqrtfFunc = M->getOrInsertFunction("sqrtf", Callee->getAttributes(), Builder->getFloatTy(), Builder->getFloatTy(), @@ -1225,15 +1224,15 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { CallInst *ret = CallInst::Create(SqrtfFunc, Arg->getOperand(0), "sqrtfcall"); ret->setAttributes(Callee->getAttributes()); - - + + // Remove the old Call. With -fmath-errno, it won't get marked readnone. ReplaceInstUsesWith(*Call, UndefValue::get(Call->getType())); EraseInstFromFunction(*Call); return ret; } } - + return 0; } @@ -1251,7 +1250,7 @@ Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) { // This is safe if the intermediate type has enough bits in its mantissa to // accurately represent all values of X. For example, do not do this with // i64->float->i64. This is also safe for sitofp case, because any negative - // 'X' value would cause an undefined result for the fptoui. + // 'X' value would cause an undefined result for the fptoui. if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) && OpI->getOperand(0)->getType() == FI.getType() && (int)FI.getType()->getScalarSizeInBits() < /*extra bit for sign */ @@ -1265,19 +1264,19 @@ Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) { Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0)); if (OpI == 0) return commonCastTransforms(FI); - + // fptosi(sitofp(X)) --> X // fptosi(uitofp(X)) --> X // This is safe if the intermediate type has enough bits in its mantissa to // accurately represent all values of X. For example, do not do this with // i64->float->i64. This is also safe for sitofp case, because any negative - // 'X' value would cause an undefined result for the fptoui. + // 'X' value would cause an undefined result for the fptoui. if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) && OpI->getOperand(0)->getType() == FI.getType() && (int)FI.getType()->getScalarSizeInBits() <= OpI->getType()->getFPMantissaWidth()) return ReplaceInstUsesWith(FI, OpI->getOperand(0)); - + return commonCastTransforms(FI); } @@ -1298,17 +1297,17 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) { if (CI.getOperand(0)->getType()->getScalarSizeInBits() > TD->getPointerSizeInBits(AS)) { Value *P = Builder->CreateTrunc(CI.getOperand(0), - TD->getIntPtrType(CI.getContext())); + TD->getIntPtrType(CI.getType())); return new IntToPtrInst(P, CI.getType()); } if (CI.getOperand(0)->getType()->getScalarSizeInBits() < TD->getPointerSizeInBits(AS)) { Value *P = Builder->CreateZExt(CI.getOperand(0), - TD->getIntPtrType(CI.getContext())); + TD->getIntPtrType(CI.getType())); return new IntToPtrInst(P, CI.getType()); } } - + if (Instruction *I = commonCastTransforms(CI)) return I; @@ -1318,19 +1317,19 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) { /// @brief Implement the transforms for cast of pointer (bitcast/ptrtoint) Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { Value *Src = CI.getOperand(0); - + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Src)) { // If casting the result of a getelementptr instruction with no offset, turn // this into a cast of the original pointer! if (GEP->hasAllZeroIndices()) { // Changing the cast operand is usually not a good idea but it is safe - // here because the pointer operand is being replaced with another + // here because the pointer operand is being replaced with another // pointer operand so the opcode doesn't need to change. Worklist.Add(GEP); CI.setOperand(0, GEP->getOperand(0)); return &CI; } - + // If the GEP has a single use, and the base pointer is a bitcast, and the // GEP computes a constant offset, see if we can convert these three // instructions into fewer. This typically happens with unions and other @@ -1345,7 +1344,8 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { Type *GEPIdxTy = cast<PointerType>(OrigBase->getType())->getElementType(); SmallVector<Value*, 8> NewIndices; - if (FindElementAtOffset(GEPIdxTy, Offset, NewIndices)) { + Type *IntPtrTy = TD->getIntPtrType(OrigBase->getType()); + if (FindElementAtOffset(GEPIdxTy, Offset, IntPtrTy, NewIndices)) { // If we were able to index down into an element, create the GEP // and bitcast the result. This eliminates one bitcast, potentially // two. @@ -1353,15 +1353,15 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { Builder->CreateInBoundsGEP(OrigBase, NewIndices) : Builder->CreateGEP(OrigBase, NewIndices); NGEP->takeName(GEP); - + if (isa<BitCastInst>(CI)) return new BitCastInst(NGEP, CI.getType()); assert(isa<PtrToIntInst>(CI)); return new PtrToIntInst(NGEP, CI.getType()); - } + } } } - + return commonCastTransforms(CI); } @@ -1373,16 +1373,16 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) { if (TD) { if (CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits(AS)) { Value *P = Builder->CreatePtrToInt(CI.getOperand(0), - TD->getIntPtrType(CI.getContext())); + TD->getIntPtrType(CI.getContext(), AS)); return new TruncInst(P, CI.getType()); } if (CI.getType()->getScalarSizeInBits() > TD->getPointerSizeInBits(AS)) { Value *P = Builder->CreatePtrToInt(CI.getOperand(0), - TD->getIntPtrType(CI.getContext())); + TD->getIntPtrType(CI.getContext(), AS)); return new ZExtInst(P, CI.getType()); } } - + return commonPointerCastTransforms(CI); } @@ -1397,33 +1397,33 @@ static Instruction *OptimizeVectorResize(Value *InVal, VectorType *DestTy, // element size, or the input is a multiple of the output element size. // Convert the input type to have the same element type as the output. VectorType *SrcTy = cast<VectorType>(InVal->getType()); - + if (SrcTy->getElementType() != DestTy->getElementType()) { // The input types don't need to be identical, but for now they must be the // same size. There is no specific reason we couldn't handle things like // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten - // there yet. + // there yet. if (SrcTy->getElementType()->getPrimitiveSizeInBits() != DestTy->getElementType()->getPrimitiveSizeInBits()) return 0; - + SrcTy = VectorType::get(DestTy->getElementType(), SrcTy->getNumElements()); InVal = IC.Builder->CreateBitCast(InVal, SrcTy); } - + // Now that the element types match, get the shuffle mask and RHS of the // shuffle to use, which depends on whether we're increasing or decreasing the // size of the input. SmallVector<uint32_t, 16> ShuffleMask; Value *V2; - + if (SrcTy->getNumElements() > DestTy->getNumElements()) { // If we're shrinking the number of elements, just shuffle in the low // elements from the input and use undef as the second shuffle input. V2 = UndefValue::get(SrcTy); for (unsigned i = 0, e = DestTy->getNumElements(); i != e; ++i) ShuffleMask.push_back(i); - + } else { // If we're increasing the number of elements, shuffle in all of the // elements from InVal and fill the rest of the result elements with zeros @@ -1437,7 +1437,7 @@ static Instruction *OptimizeVectorResize(Value *InVal, VectorType *DestTy, for (unsigned i = 0, e = DestTy->getNumElements()-SrcElts; i != e; ++i) ShuffleMask.push_back(SrcElts); } - + return new ShuffleVectorInst(InVal, V2, ConstantDataVector::get(V2->getContext(), ShuffleMask)); @@ -1464,7 +1464,7 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex, Type *VecEltTy) { // Undef values never contribute useful bits to the result. if (isa<UndefValue>(V)) return true; - + // If we got down to a value of the right type, we win, try inserting into the // right element. if (V->getType() == VecEltTy) { @@ -1472,15 +1472,15 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex, if (Constant *C = dyn_cast<Constant>(V)) if (C->isNullValue()) return true; - + // Fail if multiple elements are inserted into this slot. if (ElementIndex >= Elements.size() || Elements[ElementIndex] != 0) return false; - + Elements[ElementIndex] = V; return true; } - + if (Constant *C = dyn_cast<Constant>(V)) { // Figure out the # elements this provides, and bitcast it or slice it up // as required. @@ -1491,7 +1491,7 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex, if (NumElts == 1) return CollectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy), ElementIndex, Elements, VecEltTy); - + // Okay, this is a constant that covers multiple elements. Slice it up into // pieces and insert each element-sized piece into the vector. if (!isa<IntegerType>(C->getType())) @@ -1499,7 +1499,7 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex, C->getType()->getPrimitiveSizeInBits())); unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits(); Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize); - + for (unsigned i = 0; i != NumElts; ++i) { Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(), i*ElementSize)); @@ -1509,23 +1509,23 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex, } return true; } - + if (!V->hasOneUse()) return false; - + Instruction *I = dyn_cast<Instruction>(V); if (I == 0) return false; switch (I->getOpcode()) { default: return false; // Unhandled case. case Instruction::BitCast: return CollectInsertionElements(I->getOperand(0), ElementIndex, - Elements, VecEltTy); + Elements, VecEltTy); case Instruction::ZExt: if (!isMultipleOfTypeSize( I->getOperand(0)->getType()->getPrimitiveSizeInBits(), VecEltTy)) return false; return CollectInsertionElements(I->getOperand(0), ElementIndex, - Elements, VecEltTy); + Elements, VecEltTy); case Instruction::Or: return CollectInsertionElements(I->getOperand(0), ElementIndex, Elements, VecEltTy) && @@ -1537,11 +1537,11 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex, if (CI == 0) return false; if (!isMultipleOfTypeSize(CI->getZExtValue(), VecEltTy)) return false; unsigned IndexShift = getTypeSizeIndex(CI->getZExtValue(), VecEltTy); - + return CollectInsertionElements(I->getOperand(0), ElementIndex+IndexShift, Elements, VecEltTy); } - + } } @@ -1576,11 +1576,11 @@ static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI, Value *Result = Constant::getNullValue(CI.getType()); for (unsigned i = 0, e = Elements.size(); i != e; ++i) { if (Elements[i] == 0) continue; // Unset element. - + Result = IC.Builder->CreateInsertElement(Result, Elements[i], IC.Builder->getInt32(i)); } - + return Result; } @@ -1608,11 +1608,11 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ VecTy->getPrimitiveSizeInBits() / DestWidth); VecInput = IC.Builder->CreateBitCast(VecInput, VecTy); } - + return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(0)); } } - + // bitcast(trunc(lshr(bitcast(somevector), cst)) ConstantInt *ShAmt = 0; if (match(Src, m_Trunc(m_LShr(m_BitCast(m_Value(VecInput)), @@ -1629,7 +1629,7 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ VecTy->getPrimitiveSizeInBits() / DestWidth); VecInput = IC.Builder->CreateBitCast(VecInput, VecTy); } - + unsigned Elt = ShAmt->getZExtValue() / DestWidth; return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt)); } @@ -1653,12 +1653,12 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { PointerType *SrcPTy = cast<PointerType>(SrcTy); Type *DstElTy = DstPTy->getElementType(); Type *SrcElTy = SrcPTy->getElementType(); - + // If the address spaces don't match, don't eliminate the bitcast, which is // required for changing types. if (SrcPTy->getAddressSpace() != DstPTy->getAddressSpace()) return 0; - + // If we are casting a alloca to a pointer to a type of the same // size, rewrite the allocation instruction to allocate the "right" type. // There is no need to modify malloc calls because it is their bitcast that @@ -1666,14 +1666,14 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { if (AllocaInst *AI = dyn_cast<AllocaInst>(Src)) if (Instruction *V = PromoteCastOfAllocation(CI, *AI)) return V; - + // If the source and destination are pointers, and this cast is equivalent // to a getelementptr X, 0, 0, 0... turn it into the appropriate gep. // This can enhance SROA and other transforms that want type-safe pointers. Constant *ZeroUInt = Constant::getNullValue(Type::getInt32Ty(CI.getContext())); unsigned NumZeros = 0; - while (SrcElTy != DstElTy && + while (SrcElTy != DstElTy && isa<CompositeType>(SrcElTy) && !SrcElTy->isPointerTy() && SrcElTy->getNumContainedTypes() /* not "{}" */) { SrcElTy = cast<CompositeType>(SrcElTy)->getTypeAtIndex(ZeroUInt); @@ -1686,7 +1686,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { return GetElementPtrInst::CreateInBounds(Src, Idxs); } } - + // Try to optimize int -> float bitcasts. if ((DestTy->isFloatTy() || DestTy->isDoubleTy()) && isa<IntegerType>(SrcTy)) if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this)) @@ -1699,7 +1699,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { Constant::getNullValue(Type::getInt32Ty(CI.getContext()))); // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast) } - + if (isa<IntegerType>(SrcTy)) { // If this is a cast from an integer to vector, check to see if the input // is a trunc or zext of a bitcast from vector. If so, we can replace all @@ -1712,7 +1712,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { cast<VectorType>(DestTy), *this)) return I; } - + // If the input is an 'or' instruction, we may be doing shifts and ors to // assemble the elements of the vector manually. Try to rip the code out // and replace it with insertelements. @@ -1723,7 +1723,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { if (VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) { if (SrcVTy->getNumElements() == 1 && !DestTy->isVectorTy()) { - Value *Elem = + Value *Elem = Builder->CreateExtractElement(Src, Constant::getNullValue(Type::getInt32Ty(CI.getContext()))); return CastInst::Create(Instruction::BitCast, Elem, DestTy); @@ -1733,7 +1733,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(Src)) { // Okay, we have (bitcast (shuffle ..)). Check to see if this is // a bitcast to a vector with the same # elts. - if (SVI->hasOneUse() && DestTy->isVectorTy() && + if (SVI->hasOneUse() && DestTy->isVectorTy() && cast<VectorType>(DestTy)->getNumElements() == SVI->getType()->getNumElements() && SVI->getType()->getNumElements() == @@ -1742,9 +1742,9 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { // If either of the operands is a cast from CI.getType(), then // evaluating the shuffle in the casted destination's type will allow // us to eliminate at least one cast. - if (((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(0))) && + if (((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(0))) && Tmp->getOperand(0)->getType() == DestTy) || - ((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(1))) && + ((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(1))) && Tmp->getOperand(0)->getType() == DestTy)) { Value *LHS = Builder->CreateBitCast(SVI->getOperand(0), DestTy); Value *RHS = Builder->CreateBitCast(SVI->getOperand(1), DestTy); @@ -1754,7 +1754,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { } } } - + if (SrcTy->isPointerTy()) return commonPointerCastTransforms(CI); return commonCastTransforms(CI); diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index e3e5ddae80..055c3b1514 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -371,7 +371,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, // an inbounds GEP because the index can't be out of range. if (!GEP->isInBounds() && Idx->getType()->getPrimitiveSizeInBits() > TD->getPointerSizeInBits(AS)) - Idx = Builder->CreateTrunc(Idx, TD->getIntPtrType(Idx->getContext())); + Idx = Builder->CreateTrunc(Idx, TD->getIntPtrType(Idx->getContext(), AS)); // If the comparison is only true for one or two elements, emit direct // comparisons. @@ -539,7 +539,7 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) { // we don't need to bother extending: the extension won't affect where the // computation crosses zero. if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) { - Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext()); + Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext(), AS); VariableIdx = IC.Builder->CreateTrunc(VariableIdx, IntPtrTy); } return VariableIdx; @@ -561,7 +561,7 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) { return 0; // Okay, we can do this evaluation. Start by converting the index to intptr. - Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext()); + Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext(), AS); if (VariableIdx->getType() != IntPtrTy) VariableIdx = IC.Builder->CreateIntCast(VariableIdx, IntPtrTy, true /*Signed*/); @@ -1554,8 +1554,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the // integer type is the same size as the pointer type. if (TD && LHSCI->getOpcode() == Instruction::PtrToInt && - TD->getPointerSizeInBits( - cast<PtrToIntInst>(LHSCI)->getPointerAddressSpace()) == + TD->getTypeSizeInBits(DestTy) == cast<IntegerType>(DestTy)->getBitWidth()) { Value *RHSOp = 0; if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1))) { @@ -2251,7 +2250,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { case Instruction::IntToPtr: // icmp pred inttoptr(X), null -> icmp pred X, 0 if (RHSC->isNullValue() && TD && - TD->getIntPtrType(RHSC->getContext()) == + TD->getIntPtrType(LHSI->getType()) == LHSI->getOperand(0)->getType()) return new ICmpInst(I.getPredicate(), LHSI->getOperand(0), Constant::getNullValue(LHSI->getOperand(0)->getType())); diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 4ab5b6e4a0..633ad93ad9 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -173,7 +173,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { // Ensure that the alloca array size argument has type intptr_t, so that // any casting is exposed early. if (TD) { - Type *IntPtrTy = TD->getIntPtrType(AI.getContext()); + Type *IntPtrTy = TD->getIntPtrType(AI.getType()); if (AI.getArraySize()->getType() != IntPtrTy) { Value *V = Builder->CreateIntCast(AI.getArraySize(), IntPtrTy, false); @@ -185,7 +185,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1 if (AI.isArrayAllocation()) { // Check C != 1 if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) { - Type *NewTy = + Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName()); New->setAlignment(AI.getAlignment()); @@ -311,7 +311,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, Type *SrcPTy = SrcTy->getElementType(); - if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() || + if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() || DestPTy->isVectorTy()) { // If the source is an array, the code below will not succeed. Check to // see if a trivial 'gep P, 0, 0' will help matters. Only do this for @@ -328,7 +328,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, } if (IC.getDataLayout() && - (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() || + (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() || SrcPTy->isVectorTy()) && // Do not allow turning this into a load of an integer, which is then // casted to a pointer, this pessimizes pointer analysis a lot. @@ -339,7 +339,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, // Okay, we are casting from one integer or pointer type to another of // the same size. Instead of casting the pointer before the load, cast // the result of the loaded value. - LoadInst *NewLoad = + LoadInst *NewLoad = IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName()); NewLoad->setAlignment(LI.getAlignment()); NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope()); @@ -376,7 +376,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { // None of the following transforms are legal for volatile/atomic loads. // FIXME: Some of it is okay for atomic loads; needs refactoring. if (!LI.isSimple()) return 0; - + // Do really simple store-to-load forwarding and load CSE, to catch cases // where there are several consecutive memory accesses to the same location, // separated by a few arithmetic operations. @@ -397,7 +397,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { Constant::getNullValue(Op->getType()), &LI); return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); } - } + } // load null/undef -> unreachable // TODO: Consider a target hook for valid address spaces for this xform. @@ -416,7 +416,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { if (CE->isCast()) if (Instruction *Res = InstCombineLoadCast(*this, LI, TD)) return Res; - + if (Op->hasOneUse()) { // Change select and PHI nodes to select values instead of addresses: this // helps alias analysis out a lot, allows many others simplifications, and @@ -470,18 +470,18 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { Type *DestPTy = cast<PointerType>(CI->getType())->getElementType(); PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType()); if (SrcTy == 0) return 0; - + Type *SrcPTy = SrcTy->getElementType(); if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy()) return 0; - + /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep" /// to its first element. This allows us to handle things like: /// store i32 xxx, (bitcast {foo*, float}* %P to i32*) /// on 32-bit hosts. SmallVector<Value*, 4> NewGEPIndices; - + // If the source is an array, the code below will not succeed. Check to // see if a trivial 'gep P, 0, 0' will help matters. Only do this for // constants. @@ -489,7 +489,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { // Index through pointer. Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext())); NewGEPIndices.push_back(Zero); - + while (1) { if (StructType *STy = dyn_cast<StructType>(SrcPTy)) { if (!STy->getNumElements()) /* Struct can be empty {} */ @@ -503,24 +503,23 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { break; } } - + SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace()); } if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy()) return 0; - + // If the pointers point into different address spaces or if they point to // values with different sizes, we can't do the transformation. if (!IC.getDataLayout() || - SrcTy->getAddressSpace() != - cast<PointerType>(CI->getType())->getAddressSpace() || + SrcTy->getAddressSpace() != CI->getType()->getPointerAddressSpace() || IC.getDataLayout()->getTypeSizeInBits(SrcPTy) != IC.getDataLayout()->getTypeSizeInBits(DestPTy)) return 0; // Okay, we are casting from one integer or pointer type to another of - // the same size. Instead of casting the pointer before + // the same size. Instead of casting the pointer before // the store, cast the value to be stored. Value *NewCast; Value *SIOp0 = SI.getOperand(0); @@ -534,12 +533,12 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { if (SIOp0->getType()->isPointerTy()) opcode = Instruction::PtrToInt; } - + // SIOp0 is a pointer to aggregate and this is a store to the first field, // emit a GEP to index into its first field. if (!NewGEPIndices.empty()) CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices); - + NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy, SIOp0->getName()+".c"); SI.setOperand(0, NewCast); @@ -558,7 +557,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { static bool equivalentAddressValues(Value *A, Value *B) { // Test if the values are trivially equivalent. if (A == B) return true; - + // Test if the values come form identical arithmetic instructions. // This uses isIdenticalToWhenDefined instead of isIdenticalTo because // its only used to compare two uses within the same basic block, which @@ -571,7 +570,7 @@ static bool equivalentAddressValues(Value *A, Value *B) { if (Instruction *BI = dyn_cast<Instruction>(B)) if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI)) return true; - + // Otherwise they may not be equivalent. return false; } @@ -602,7 +601,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { // If the RHS is an alloca with a single use, zapify the store, making the // alloca dead. if (Ptr->hasOneUse()) { - if (isa<AllocaInst>(Ptr)) + if (isa<AllocaInst>(Ptr)) return EraseInstFromFunction(SI); if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) { if (isa<AllocaInst>(GEP->getOperand(0))) { @@ -625,8 +624,8 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) { ScanInsts++; continue; - } - + } + if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) { // Prev store isn't volatile, and stores to the same location? if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1), @@ -638,7 +637,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { } break; } - + // If this is a load, we have to stop. However, if the loaded value is from // the pointer we're loading and is producing the pointer we're storing, // then *this* store is dead (X = load P; store X -> P). @@ -646,12 +645,12 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) && LI->isSimple()) return EraseInstFromFunction(SI); - + // Otherwise, this is a load from some other location. Stores before it // may not be dead. break; } - + // Don't skip over loads or things that can modify memory. if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory()) break; @@ -681,11 +680,11 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { if (Instruction *Res = InstCombineStoreToCast(*this, SI)) return Res; - + // If this store is the last instruction in the basic block (possibly // excepting debug info instructions), and if the block ends with an // unconditional branch, try to move it to the successor block. - BBI = &SI; + BBI = &SI; do { ++BBI; } while (isa<DbgInfoIntrinsic>(BBI) || @@ -694,7 +693,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { if (BI->isUnconditional()) if (SimplifyStoreAtEndOfBlock(SI)) return 0; // xform done! - + return 0; } @@ -708,12 +707,12 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { /// bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { BasicBlock *StoreBB = SI.getParent(); - + // Check to see if the successor block has exactly two incoming edges. If // so, see if the other predecessor contains a store to the same location. // if so, insert a PHI node (if needed) and move the stores down. BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0); - + // Determine whether Dest has exactly two predecessors and, if so, compute // the other predecessor. pred_iterator PI = pred_begin(DestBB); @@ -725,7 +724,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { if (++PI == pred_end(DestBB)) return false; - + P = *PI; if (P != StoreBB) { if (OtherBB) @@ -745,7 +744,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { BranchInst *OtherBr = dyn_cast<BranchInst>(BBI); if (!OtherBr || BBI == OtherBB->begin()) return false; - + // If the other block ends in an unconditional branch, check for the 'if then // else' case. there is an instruction before the branch. StoreInst *OtherStore = 0; @@ -767,10 +766,10 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { } else { // Otherwise, the other block ended with a conditional branch. If one of the // destinations is StoreBB, then we have the if/then case. - if (OtherBr->getSuccessor(0) != StoreBB && + if (OtherBr->getSuccessor(0) != StoreBB && OtherBr->getSuccessor(1) != StoreBB) return false; - + // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an // if/then triangle. See if there is a store to the same ptr as SI that // lives in OtherBB. @@ -788,7 +787,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { BBI == OtherBB->begin()) return false; } - + // In order to eliminate the store in OtherBr, we have to // make sure nothing reads or overwrites the stored value in // StoreBB. @@ -798,7 +797,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { return false; } } - + // Insert a PHI node now if we need it. Value *MergedVal = OtherStore->getOperand(0); if (MergedVal != SI.getOperand(0)) { @@ -807,7 +806,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { PN->addIncoming(OtherStore->getOperand(0), OtherBB); MergedVal = InsertNewInstBefore(PN, DestBB->front()); } - + // Advance to a place where it is safe to insert the new store and // insert it. BBI = DestBB->getFirstInsertionPt(); @@ -817,7 +816,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { SI.getOrdering(), SI.getSynchScope()); InsertNewInstBefore(NewSI, *BBI); - NewSI->setDebugLoc(OtherStore->getDebugLoc()); + NewSI->setDebugLoc(OtherStore->getDebugLoc()); // Nuke the old stores. EraseInstFromFunction(SI); diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 7f8c3ae558..00b7fca681 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -738,7 +738,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { /// or not there is a sequence of GEP indices into the type that will land us at /// the specified offset. If so, fill them into NewIndices and return the /// resultant element type, otherwise return null. -Type *InstCombiner::FindElementAtOffset(Type *Ty, int64_t Offset, +Type *InstCombiner::FindElementAtOffset(Type *Ty, int64_t Offset, Type *IntPtrTy, SmallVectorImpl<Value*> &NewIndices) { if (!TD) return 0; if (!Ty->isSized()) return 0; @@ -746,7 +746,6 @@ Type *InstCombiner::FindElementAtOffset(Type *Ty, int64_t Offset, // Start with the index over the outer type. Note that the type size // might be zero (even if the offset isn't zero) if the indexed type // is something like [0 x {int, int}] - Type *IntPtrTy = TD->getIntPtrType(Ty->getContext()); int64_t FirstIdx = 0; if (int64_t TySize = TD->getTypeAllocSize(Ty)) { FirstIdx = Offset/TySize; @@ -1055,7 +1054,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // by multiples of a zero size type with zero. if (TD) { bool MadeChange = false; - Type *IntPtrTy = TD->getIntPtrType(GEP.getContext()); + Type *IntPtrTy = TD->getIntPtrType(PtrOp->getType()); gep_type_iterator GTI = gep_type_begin(GEP); for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); @@ -1240,7 +1239,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // Earlier transforms ensure that the index has type IntPtrType, which // considerably simplifies the logic by eliminating implicit casts. - assert(Idx->getType() == TD->getIntPtrType(GEP.getContext()) && + assert(Idx->getType() == TD->getIntPtrType(GEP.getType()) && "Index not cast to pointer width?"); bool NSW; @@ -1275,7 +1274,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // Earlier transforms ensure that the index has type IntPtrType, which // considerably simplifies the logic by eliminating implicit casts. - assert(Idx->getType() == TD->getIntPtrType(GEP.getContext()) && + assert(Idx->getType() == TD->getIntPtrType(GEP.getType()) && "Index not cast to pointer width?"); bool NSW; @@ -1337,7 +1336,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { SmallVector<Value*, 8> NewIndices; Type *InTy = cast<PointerType>(BCI->getOperand(0)->getType())->getElementType(); - if (FindElementAtOffset(InTy, Offset, NewIndices)) { + Type *IntPtrTy = TD->getIntPtrType(BCI->getOperand(0)->getType()); + if (FindElementAtOffset(InTy, Offset, IntPtrTy, NewIndices)) { Value *NGEP = GEP.isInBounds() ? Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices) : Builder->CreateGEP(BCI->getOperand(0), NewIndices); diff --git a/lib/Transforms/Instrumentation/BoundsChecking.cpp b/lib/Transforms/Instrumentation/BoundsChecking.cpp index 976b963046..dd36a00070 100644 --- a/lib/Transforms/Instrumentation/BoundsChecking.cpp +++ b/lib/Transforms/Instrumentation/BoundsChecking.cpp @@ -143,7 +143,7 @@ bool BoundsChecking::instrument(Value *Ptr, Value *InstVal) { Value *Offset = SizeOffset.second; ConstantInt *SizeCI = dyn_cast<ConstantInt>(Size); - IntegerType *IntTy = TD->getIntPtrType(Inst->getContext()); + IntegerType *IntTy = TD->getIntPtrType(Ptr->getType()); Value *NeededSizeVal = ConstantInt::get(IntTy, NeededSize); // three checks are required to ensure safety: diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index 2b42c75d14..74e310f7e7 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -935,7 +935,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " << *MemoryInst); Type *IntPtrTy = - TLI->getDataLayout()->getIntPtrType(AccessTy->getContext()); + TLI->getDataLayout()->getIntPtrType(Addr->getType()); Value *Result = 0; diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index eb0da20abb..b6e15540e7 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -746,6 +746,15 @@ static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal, return true; } +/// Wrap TD.getIntPtrType, but return a vector type for vector inputs. +static Type *getIntPtrType(Type *Ty, const DataLayout &TD) { + Type *ITy = TD.getIntPtrType(Ty); + if (Ty->isVectorTy()) { + ITy = VectorType::get(ITy, Ty->getVectorNumElements()); + } + + return ITy; +} /// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and /// then a load from a must-aliased pointer of a different type, try to coerce @@ -769,24 +778,25 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal, // If the store and reload are the same size, we can always reuse it. if (StoreSize == LoadSize) { // Pointer to Pointer -> use bitcast. - if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy()) + if (StoredValTy->getScalarType()->isPointerTy() && + LoadedTy->getScalarType()->isPointerTy()) return new BitCastInst(StoredVal, LoadedTy, "", InsertPt); // Convert source pointers to integers, which can be bitcast. - if (StoredValTy->isPointerTy()) { - StoredValTy = TD.getIntPtrType(StoredValTy->getContext()); + if (StoredValTy->getScalarType()->isPointerTy()) { + StoredValTy = getIntPtrType(StoredValTy, TD); StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); } Type *TypeToCastTo = LoadedTy; - if (TypeToCastTo->isPointerTy()) - TypeToCastTo = TD.getIntPtrType(StoredValTy->getContext()); + if (TypeToCastTo->getScalarType()->isPointerTy()) + TypeToCastTo = getIntPtrType(StoredValTy, TD); if (StoredValTy != TypeToCastTo) StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt); // Cast to pointer if the load needs a pointer type. - if (LoadedTy->isPointerTy()) + if (LoadedTy->getScalarType()->isPointerTy()) StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt); return StoredVal; @@ -798,8 +808,8 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal, assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail"); // Convert source pointers to integers, which can be manipulated. - if (StoredValTy->isPointerTy()) { - StoredValTy = TD.getIntPtrType(StoredValTy->getContext()); + if (StoredValTy->getScalarType()->isPointerTy()) { + StoredValTy = getIntPtrType(StoredValTy, TD); StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); } @@ -824,7 +834,7 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal, return StoredVal; // If the result is a pointer, inttoptr. - if (LoadedTy->isPointerTy()) + if (LoadedTy->getScalarType()->isPointerTy()) return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt); // Otherwise, bitcast. @@ -1019,8 +1029,9 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, // Compute which bits of the stored value are being used by the load. Convert // to an integer type to start with. - if (SrcVal->getType()->isPointerTy()) - SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx)); + if (SrcVal->getType()->getScalarType()->isPointerTy()) + SrcVal = Builder.CreatePtrToInt(SrcVal, + getIntPtrType(SrcVal->getType(), TD)); if (!SrcVal->getType()->isIntegerTy()) SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8)); @@ -1301,7 +1312,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI, Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent()); // If new PHI nodes were created, notify alias analysis. - if (V->getType()->isPointerTy()) { + if (V->getType()->getScalarType()->isPointerTy()) { AliasAnalysis *AA = gvn.getAliasAnalysis(); for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) @@ -1498,7 +1509,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { if (isa<PHINode>(V)) V->takeName(LI); - if (V->getType()->isPointerTy()) + if (V->getType()->getScalarType()->isPointerTy()) MD->invalidateCachedPointerInfo(V); markInstructionForDeletion(LI); ++NumGVNLoad; @@ -1730,7 +1741,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { LI->replaceAllUsesWith(V); if (isa<PHINode>(V)) V->takeName(LI); - if (V->getType()->isPointerTy()) + if (V->getType()->getScalarType()->isPointerTy()) MD->invalidateCachedPointerInfo(V); markInstructionForDeletion(LI); ++NumPRELoad; @@ -1857,7 +1868,7 @@ bool GVN::processLoad(LoadInst *L) { // Replace the load! L->replaceAllUsesWith(AvailVal); - if (AvailVal->getType()->isPointerTy()) + if (AvailVal->getType()->getScalarType()->isPointerTy()) MD->invalidateCachedPointerInfo(AvailVal); markInstructionForDeletion(L); ++NumGVNLoad; @@ -1914,7 +1925,7 @@ bool GVN::processLoad(LoadInst *L) { // Remove it! L->replaceAllUsesWith(StoredVal); - if (StoredVal->getType()->isPointerTy()) + if (StoredVal->getType()->getScalarType()->isPointerTy()) MD->invalidateCachedPointerInfo(StoredVal); markInstructionForDeletion(L); ++NumGVNLoad; @@ -1943,7 +1954,7 @@ bool GVN::processLoad(LoadInst *L) { // Remove it! patchAndReplaceAllUsesWith(AvailableVal, L); - if (DepLI->getType()->isPointerTy()) + if (DepLI->getType()->getScalarType()->isPointerTy()) MD->invalidateCachedPointerInfo(DepLI); markInstructionForDeletion(L); ++NumGVNLoad; @@ -2184,7 +2195,7 @@ bool GVN::processInstruction(Instruction *I) { // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify. if (Value *V = SimplifyInstruction(I, TD, TLI, DT)) { I->replaceAllUsesWith(V); - if (MD && V->getType()->isPointerTy()) + if (MD && V->getType()->getScalarType()->isPointerTy()) MD->invalidateCachedPointerInfo(V); markInstructionForDeletion(I); ++NumGVNSimpl; @@ -2284,7 +2295,7 @@ bool GVN::processInstruction(Instruction *I) { // Remove it! patchAndReplaceAllUsesWith(repl, I); - if (MD && repl->getType()->isPointerTy()) + if (MD && repl->getType()->getScalarType()->isPointerTy()) MD->invalidateCachedPointerInfo(repl); markInstructionForDeletion(I); return true; @@ -2532,7 +2543,7 @@ bool GVN::performPRE(Function &F) { addToLeaderTable(ValNo, Phi, CurrentBlock); Phi->setDebugLoc(CurInst->getDebugLoc()); CurInst->replaceAllUsesWith(Phi); - if (Phi->getType()->isPointerTy()) { + if (Phi->getType()->getScalarType()->isPointerTy()) { // Because we have added a PHI-use of the pointer value, it has now // "escaped" from alias analysis' perspective. We need to inform // AA of this. diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 82eb746467..8a2f093629 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -1430,7 +1430,8 @@ FindLoopCounter(Loop *L, const SCEV *BECount, /// genLoopLimit - Help LinearFunctionTestReplace by generating a value that /// holds the RHS of the new loop test. static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L, - SCEVExpander &Rewriter, ScalarEvolution *SE) { + SCEVExpander &Rewriter, ScalarEvolution *SE, + Type *IntPtrTy) { const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IndVar)); assert(AR && AR->getLoop() == L && AR->isAffine() && "bad loop counter"); const SCEV *IVInit = AR->getStart(); @@ -1456,7 +1457,8 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L, // We could handle pointer IVs other than i8*, but we need to compensate for // gep index scaling. See canExpandBackedgeTakenCount comments. assert(SE->getSizeOfExpr( - cast<PointerType>(GEPBase->getType())->getElementType())->isOne() + cast<PointerType>(GEPBase->getType())->getElementType(), + IntPtrTy)->isOne() && "unit stride pointer IV must be i8*"); IRBuilder<> Builder(L->getLoopPreheader()->getTerminator()); @@ -1555,7 +1557,9 @@ LinearFunctionTestReplace(Loop *L, CmpIndVar = IndVar; } - Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE); + Type *IntPtrTy = TD ? TD->getIntPtrType(IndVar->getType()) : + IntegerType::getInt64Ty(IndVar->getContext()); + Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE, IntPtrTy); assert(ExitCnt->getType()->isPointerTy() == IndVar->getType()->isPointerTy() && "genLoopLimit missed a cast"); diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index a44e798f12..e4b40f3d3a 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -486,7 +486,9 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, // would be unsafe to do if there is anything else in the loop that may read // or write to the aliased location. Check for any overlap by generating the // base pointer and checking the region. - unsigned AddrSpace = cast<PointerType>(DestPtr->getType())->getAddressSpace(); + assert(DestPtr->getType()->isPointerTy() + && "Must be a pointer type."); + unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); Value *BasePtr = Expander.expandCodeFor(Ev->getStart(), Builder.getInt8PtrTy(AddrSpace), Preheader->getTerminator()); @@ -505,7 +507,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, // The # stored bytes is (BECount+1)*Size. Expand the trip count out to // pointer size if it isn't already. - Type *IntPtr = TD->getIntPtrType(DestPtr->getContext()); + Type *IntPtr = TD->getIntPtrType(DestPtr->getType()); BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr); const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1), @@ -611,7 +613,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, // The # stored bytes is (BECount+1)*Size. Expand the trip count out to // pointer size if it isn't already. - Type *IntPtr = TD->getIntPtrType(SI->getContext()); + Type *IntPtr = TD->getIntPtrType(SI->getType()); BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr); const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1), diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index 71c62257e7..af3a880cb9 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -582,7 +582,8 @@ private: P.Partitions.push_back(New); } - bool handleLoadOrStore(Type *Ty, Instruction &I, int64_t Offset) { + bool handleLoadOrStore(Type *Ty, Instruction &I, int64_t Offset, + bool IsVolatile) { uint64_t Size = TD.getTypeStoreSize(Ty); // If this memory access can be shown to *statically* extend outside the @@ -603,7 +604,14 @@ private: return true; } - insertUse(I, Offset, Size); + // We allow splitting of loads and stores where the type is an integer type + // and which cover the entire alloca. Such integer loads and stores + // often require decomposition into fine grained loads and stores. + bool IsSplittable = false; + if (IntegerType *ITy = dyn_cast<IntegerType>(Ty)) + IsSplittable = !IsVolatile && ITy->getBitWidth() == AllocSize*8; + + insertUse(I, Offset, Size, IsSplittable); return true; } @@ -624,7 +632,7 @@ private: bool visitLoadInst(LoadInst &LI) { assert((!LI.isSimple() || LI.getType()->isSingleValueType()) && "All simple FCA loads should have been pre-split"); - return handleLoadOrStore(LI.getType(), LI, Offset); + return handleLoadOrStore(LI.getType(), LI, Offset, LI.isVolatile()); } bool visitStoreInst(StoreInst &SI) { @@ -634,7 +642,7 @@ private: assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) && "All simple FCA stores should have been pre-split"); - return handleLoadOrStore(ValOp->getType(), SI, Offset); + return handleLoadOrStore(ValOp->getType(), SI, Offset, SI.isVolatile()); } @@ -1173,6 +1181,21 @@ Type *AllocaPartitioning::getCommonType(iterator I) const { UserTy = LI->getType(); } else if (StoreInst *SI = dyn_cast<StoreInst>(UI->U->getUser())) { UserTy = SI->getValueOperand()->getType(); + } else { + return 0; // Bail if we have weird uses. + } + + if (IntegerType *ITy = dyn_cast<IntegerType>(UserTy)) { + // If the type is larger than the partition, skip it. We only encounter + // this for split integer operations where we want to use the type of the + // entity causing the split. + if (ITy->getBitWidth() > (I->EndOffset - I->BeginOffset)*8) + continue; + + // If we have found an integer type use covering the alloca, use that + // regardless of the other types, as integers are often used for a "bucket + // of bits" type. + return ITy; } if (Ty && Ty != UserTy) @@ -2138,6 +2161,14 @@ static bool isIntegerWideningViable(const DataLayout &TD, if (SizeInBits != TD.getTypeStoreSizeInBits(AllocaTy)) return false; + // We need to ensure that an integer type with the appropriate bitwidth can + // be converted to the alloca type, whatever that is. We don't want to force + // the alloca itself to have an integer type if there is a more suitable one. + Type *IntTy = Type::getIntNTy(AllocaTy->getContext(), SizeInBits); + if (!canConvertValue(TD, AllocaTy, IntTy) || + !canConvertValue(TD, IntTy, AllocaTy)) + return false; + uint64_t Size = TD.getTypeStoreSize(AllocaTy); // Check the uses to ensure the uses are (likely) promoteable integer uses. @@ -2364,8 +2395,9 @@ private: Value *getAdjustedAllocaPtr(IRBuilder<> &IRB, Type *PointerTy) { assert(BeginOffset >= NewAllocaBeginOffset); - unsigned AS = cast<PointerType>(PointerTy)->getAddressSpace(); - APInt Offset(TD.getPointerSizeInBits(AS), BeginOffset - NewAllocaBeginOffset); + assert(PointerTy->isPointerTy() && + "Type must be pointer type!"); + APInt Offset(TD.getTypeSizeInBits(PointerTy), BeginOffset - NewAllocaBeginOffset); return getAdjustedPtr(IRB, TD, &NewAI, Offset, PointerTy, getName("")); } @@ -2460,6 +2492,50 @@ private: if (VecTy) return rewriteVectorizedLoadInst(IRB, LI, OldOp); + + uint64_t Size = EndOffset - BeginOffset; + if (Size < TD.getTypeStoreSize(LI.getType())) { + assert(!LI.isVolatile()); + assert(LI.getType()->isIntegerTy() && + "Only integer type loads and stores are split"); + assert(LI.getType()->getIntegerBitWidth() == + TD.getTypeStoreSizeInBits(LI.getType()) && + "Non-byte-multiple bit width"); + assert(LI.getType()->getIntegerBitWidth() == + TD.getTypeSizeInBits(OldAI.getAllocatedType()) && + "Only alloca-wide loads can be split and recomposed"); + IntegerType *NarrowTy = Type::getIntNTy(LI.getContext(), Size * 8); + bool IsConvertable = (BeginOffset - NewAllocaBeginOffset == 0) && + canConvertValue(TD, NewAllocaTy, NarrowTy); + Value *V; + // Move the insertion point just past the load so that we can refer to it. + IRB.SetInsertPoint(llvm::next(BasicBlock::iterator(&LI))); + if (IsConvertable) + V = convertValue(TD, IRB, + IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), + getName(".load")), + NarrowTy); + else + V = IRB.CreateAlignedLoad( + getAdjustedAllocaPtr(IRB, NarrowTy->getPointerTo()), + getPartitionTypeAlign(NarrowTy), getName(".load")); + // Create a placeholder value with the same type as LI to use as the + // basis for the new value. This allows us to replace the uses of LI with + // the computed value, and then replace the placeholder with LI, leaving + // LI only used for this computation. + Value *Placeholder + = IRB.CreateLoad(UndefValue::get(LI.getType()->getPointerTo())); + V = insertInteger(TD, IRB, Placeholder, V, BeginOffset, + getName(".insert")); + LI.replaceAllUsesWith(V); + Placeholder->replaceAllUsesWith(&LI); + cast<Instruction>(Placeholder)->eraseFromParent(); + if (Pass.DeadSplitInsts.insert(&LI)) + Pass.DeadInsts.push_back(&LI); + DEBUG(dbgs() << " to: " << *V << "\n"); + return IsConvertable; + } + if (IntTy && LI.getType()->isIntegerTy()) return rewriteIntegerLoad(IRB, LI); @@ -2539,6 +2615,39 @@ private: if (VecTy) return rewriteVectorizedStoreInst(IRB, SI, OldOp); Type *ValueTy = SI.getValueOperand()->getType(); + + uint64_t Size = EndOffset - BeginOffset; + if (Size < TD.getTypeStoreSize(ValueTy)) { + assert(!SI.isVolatile()); + assert(ValueTy->isIntegerTy() && + "Only integer type loads and stores are split"); + assert(ValueTy->getIntegerBitWidth() == + TD.getTypeStoreSizeInBits(ValueTy) && + "Non-byte-multiple bit width"); + assert(ValueTy->getIntegerBitWidth() == + TD.getTypeSizeInBits(OldAI.getAllocatedType()) && + "Only alloca-wide stores can be split and recomposed"); + IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8); + Value *V = extractInteger(TD, IRB, SI.getValueOperand(), NarrowTy, + BeginOffset, getName(".extract")); + StoreInst *NewSI; + bool IsConvertable = (BeginOffset - NewAllocaBeginOffset == 0) && + canConvertValue(TD, NarrowTy, NewAllocaTy); + if (IsConvertable) + NewSI = IRB.CreateAlignedStore(convertValue(TD, IRB, V, NewAllocaTy), + &NewAI, NewAI.getAlignment()); + else + NewSI = IRB.CreateAlignedStore( + V, getAdjustedAllocaPtr(IRB, NarrowTy->getPointerTo()), + getPartitionTypeAlign(NarrowTy)); + (void)NewSI; + if (Pass.DeadSplitInsts.insert(&SI)) + Pass.DeadInsts.push_back(&SI); + + DEBUG(dbgs() << " to: " << *NewSI << "\n"); + return IsConvertable; + } + if (IntTy && ValueTy->isIntegerTy()) return rewriteIntegerStore(IRB, SI); @@ -2687,9 +2796,8 @@ private: = P.getMemTransferOffsets(II); assert(OldPtr->getType()->isPointerTy() && "Must be a pointer type!"); - unsigned AS = cast<PointerType>(OldPtr->getType())->getAddressSpace(); // Compute the relative offset within the transfer. - unsigned IntPtrWidth = TD.getPointerSizeInBits(AS); + unsigned IntPtrWidth = TD.getTypeSizeInBits(OldPtr->getType()); APInt RelOffset(IntPtrWidth, BeginOffset - (IsDest ? MTO.DestBegin : MTO.SourceBegin)); @@ -3173,6 +3281,9 @@ static Type *getTypePartition(const DataLayout &TD, Type *Ty, uint64_t Offset, uint64_t Size) { if (Offset == 0 && TD.getTypeAllocSize(Ty) == Size) return stripAggregateTypeWrapping(TD, Ty); + if (Offset > TD.getTypeAllocSize(Ty) || + (TD.getTypeAllocSize(Ty) - Offset) < Size) + return 0; if (SequentialType *SeqTy = dyn_cast<SequentialType>(Ty)) { // We can't partition pointers... @@ -3464,6 +3575,8 @@ void SROA::deleteDeadInstructions(SmallPtrSet<AllocaInst*, 4> &DeletedAllocas) { Instruction *I = DeadInsts.pop_back_val(); DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n"); + I->replaceAllUsesWith(UndefValue::get(I->getType())); + for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) if (Instruction *U = dyn_cast<Instruction>(*OI)) { // Zero out the operand and see if it becomes trivially dead. diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index a46d09c320..a5446294e3 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -963,7 +963,7 @@ ConvertScalar_InsertValue(Value *SV, Value *Old, if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy()) SV = Builder.CreateBitCast(SV, IntegerType::get(SV->getContext(),SrcWidth)); else if (SV->getType()->isPointerTy()) - SV = Builder.CreatePtrToInt(SV, TD.getIntPtrType(SV->getContext())); + SV = Builder.CreatePtrToInt(SV, TD.getIntPtrType(SV->getType())); // Zero extend or truncate the value if needed. if (SV->getType() != AllocaType) { diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index c82a00fc2c..f3448bcd87 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -165,9 +165,10 @@ struct StpCpyOpt: public LibCallOptimization { uint64_t Len = GetStringLength(Src); if (Len == 0) return 0; - Value *LenV = ConstantInt::get(TD->getIntPtrType(*Context), Len); + Type *PT = FT->getParamType(0); + Value *LenV = ConstantInt::get(TD->getIntPtrType(PT), Len); Value *DstEnd = B.CreateGEP(Dst, - ConstantInt::get(TD->getIntPtrType(*Context), + ConstantInt::get(TD->getIntPtrType(PT), Len - 1)); // We have enough information to now generate the memcpy call to do the @@ -220,9 +221,10 @@ struct StrNCpyOpt : public LibCallOptimization { // Let strncpy handle the zero padding if (Len > SrcLen+1) return 0; + Type *PT = FT->getParamType(0); // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant] B.CreateMemCpy(Dst, Src, - ConstantInt::get(TD->getIntPtrType(*Context), Len), 1); + ConstantInt::get(TD->getIntPtrType(PT), Len), 1); return Dst; } @@ -508,10 +510,11 @@ struct MemCpyOpt : public LibCallOptimization { if (!TD) return 0; FunctionType *FT = Callee->getFunctionType(); + Type *PT = FT->getParamType(0); if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy() || - FT->getParamType(2) != TD->getIntPtrType(*Context)) + FT->getParamType(2) != TD->getIntPtrType(PT)) return 0; // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1) @@ -530,10 +533,11 @@ struct MemMoveOpt : public LibCallOptimization { if (!TD) return 0; FunctionType *FT = Callee->getFunctionType(); + Type *PT = FT->getParamType(0); if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy() || - FT->getParamType(2) != TD->getIntPtrType(*Context)) + FT->getParamType(2) != TD->getIntPtrType(PT)) return 0; // memmove(x, y, n) -> llvm.memmove(x, y, n, 1) @@ -552,10 +556,11 @@ struct MemSetOpt : public LibCallOptimization { if (!TD) return 0; FunctionType *FT = Callee->getFunctionType(); + Type *PT = FT->getParamType(0); if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isIntegerTy() || - FT->getParamType(2) != TD->getIntPtrType(*Context)) + FT->getParamType(2) != TD->getIntPtrType(PT)) return 0; // memset(p, v, n) -> llvm.memset(p, v, n, 1) @@ -980,8 +985,9 @@ struct SPrintFOpt : public LibCallOptimization { if (!TD) return 0; // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1) + Type *AT = CI->getArgOperand(0)->getType(); B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), - ConstantInt::get(TD->getIntPtrType(*Context), // Copy the + ConstantInt::get(TD->getIntPtrType(AT), // Copy the FormatStr.size() + 1), 1); // nul byte. return ConstantInt::get(CI->getType(), FormatStr.size()); } @@ -1108,8 +1114,9 @@ struct FPutsOpt : public LibCallOptimization { uint64_t Len = GetStringLength(CI->getArgOperand(0)); if (!Len) return 0; // Known to have no uses (see above). + Type *PT = FT->getParamType(0); return EmitFWrite(CI->getArgOperand(0), - ConstantInt::get(TD->getIntPtrType(*Context), Len-1), + ConstantInt::get(TD->getIntPtrType(PT), Len-1), CI->getArgOperand(1), B, TD, TLI); } }; @@ -1134,8 +1141,9 @@ struct FPrintFOpt : public LibCallOptimization { // These optimizations require DataLayout. if (!TD) return 0; + Type *AT = CI->getArgOperand(1)->getType(); Value *NewCI = EmitFWrite(CI->getArgOperand(1), - ConstantInt::get(TD->getIntPtrType(*Context), + ConstantInt::get(TD->getIntPtrType(AT), FormatStr.size()), CI->getArgOperand(0), B, TD, TLI); return NewCI ? ConstantInt::get(CI->getType(), FormatStr.size()) : 0; diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp index fa2faa2dad..bd28f10654 100644 --- a/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/lib/Transforms/Utils/BuildLibCalls.cpp @@ -46,9 +46,8 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD, AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, ArrayRef<Attributes::AttrVal>(AVs, 2)); - LLVMContext &Context = B.GetInsertBlock()->getContext(); Constant *StrLen = M->getOrInsertFunction("strlen", AttrListPtr::get(AWI), - TD->getIntPtrType(Context), + TD->getIntPtrType(Ptr->getType()), B.getInt8PtrTy(), NULL); CallInst *CI = B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen"); @@ -73,11 +72,10 @@ Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B, AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, ArrayRef<Attributes::AttrVal>(AVs, 2)); - LLVMContext &Context = B.GetInsertBlock()->getContext(); Constant *StrNLen = M->getOrInsertFunction("strnlen", AttrListPtr::get(AWI), - TD->getIntPtrType(Context), + TD->getIntPtrType(Ptr->getType()), B.getInt8PtrTy(), - TD->getIntPtrType(Context), + TD->getIntPtrType(Ptr->getType()), NULL); CallInst *CI = B.CreateCall2(StrNLen, CastToCStr(Ptr, B), MaxLen, "strnlen"); if (const Function *F = dyn_cast<Function>(StrNLen->stripPointerCasts())) @@ -126,12 +124,12 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, ArrayRef<Attributes::AttrVal>(AVs, 2)); - LLVMContext &Context = B.GetInsertBlock()->getContext(); Value *StrNCmp = M->getOrInsertFunction("strncmp", AttrListPtr::get(AWI), B.getInt32Ty(), B.getInt8PtrTy(), B.getInt8PtrTy(), - TD->getIntPtrType(Context), NULL); + TD->getIntPtrType(Ptr1->getType()), + NULL); CallInst *CI = B.CreateCall3(StrNCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B), Len, "strncmp"); @@ -201,14 +199,14 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, AttributeWithIndex AWI; AWI = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, Attributes::NoUnwind); - LLVMContext &Context = B.GetInsertBlock()->getContext(); Value *MemCpy = M->getOrInsertFunction("__memcpy_chk", AttrListPtr::get(AWI), B.getInt8PtrTy(), B.getInt8PtrTy(), B.getInt8PtrTy(), - TD->getIntPtrType(Context), - TD->getIntPtrType(Context), NULL); + TD->getIntPtrType(Dst->getType()), + TD->getIntPtrType(Src->getType()), + NULL); Dst = CastToCStr(Dst, B); Src = CastToCStr(Src, B); CallInst *CI = B.CreateCall4(MemCpy, Dst, Src, Len, ObjSize); @@ -230,12 +228,11 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val, Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind }; AWI = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, ArrayRef<Attributes::AttrVal>(AVs, 2)); - LLVMContext &Context = B.GetInsertBlock()->getContext(); Value *MemChr = M->getOrInsertFunction("memchr", AttrListPtr::get(AWI), B.getInt8PtrTy(), B.getInt8PtrTy(), B.getInt32Ty(), - TD->getIntPtrType(Context), + TD->getIntPtrType(Ptr->getType()), NULL); CallInst *CI = B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr"); @@ -260,12 +257,12 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, ArrayRef<Attributes::AttrVal>(AVs, 2)); - LLVMContext &Context = B.GetInsertBlock()->getContext(); Value *MemCmp = M->getOrInsertFunction("memcmp", AttrListPtr::get(AWI), B.getInt32Ty(), B.getInt8PtrTy(), B.getInt8PtrTy(), - TD->getIntPtrType(Context), NULL); + TD->getIntPtrType(Ptr1->getType()), + NULL); CallInst *CI = B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B), Len, "memcmp"); @@ -425,24 +422,24 @@ Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, AWI[1] = AttributeWithIndex::get(M->getContext(), 4, Attributes::NoCapture); AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, Attributes::NoUnwind); - LLVMContext &Context = B.GetInsertBlock()->getContext(); StringRef FWriteName = TLI->getName(LibFunc::fwrite); Constant *F; + Type *PtrTy = Ptr->getType(); if (File->getType()->isPointerTy()) F = M->getOrInsertFunction(FWriteName, AttrListPtr::get(AWI), - TD->getIntPtrType(Context), + TD->getIntPtrType(PtrTy), B.getInt8PtrTy(), - TD->getIntPtrType(Context), - TD->getIntPtrType(Context), + TD->getIntPtrType(PtrTy), + TD->getIntPtrType(PtrTy), File->getType(), NULL); else - F = M->getOrInsertFunction(FWriteName, TD->getIntPtrType(Context), + F = M->getOrInsertFunction(FWriteName, TD->getIntPtrType(PtrTy), B.getInt8PtrTy(), - TD->getIntPtrType(Context), - TD->getIntPtrType(Context), + TD->getIntPtrType(PtrTy), + TD->getIntPtrType(PtrTy), File->getType(), NULL); CallInst *CI = B.CreateCall4(F, CastToCStr(Ptr, B), Size, - ConstantInt::get(TD->getIntPtrType(Context), 1), File); + ConstantInt::get(TD->getIntPtrType(PtrTy), 1), File); if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) CI->setCallingConv(Fn->getCallingConv()); @@ -464,12 +461,13 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const DataLayout *TD, IRBuilder<> B(CI); if (Name == "__memcpy_chk") { + Type *PT = FT->getParamType(0); // Check if this has the right signature. if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy() || - FT->getParamType(2) != TD->getIntPtrType(Context) || - FT->getParamType(3) != TD->getIntPtrType(Context)) + FT->getParamType(2) != TD->getIntPtrType(PT) || + FT->getParamType(3) != TD->getIntPtrType(PT)) return false; if (isFoldable(3, 2, false)) { @@ -488,11 +486,12 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const DataLayout *TD, if (Name == "__memmove_chk") { // Check if this has the right signature. + Type *PT = FT->getParamType(0); if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy() || - FT->getParamType(2) != TD->getIntPtrType(Context) || - FT->getParamType(3) != TD->getIntPtrType(Context)) + FT->getParamType(2) != TD->getIntPtrType(PT) || + FT->getParamType(3) != TD->getIntPtrType(PT)) return false; if (isFoldable(3, 2, false)) { @@ -506,11 +505,12 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const DataLayout *TD, if (Name == "__memset_chk") { // Check if this has the right signature. + Type *PT = FT->getParamType(0); if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isIntegerTy() || - FT->getParamType(2) != TD->getIntPtrType(Context) || - FT->getParamType(3) != TD->getIntPtrType(Context)) + FT->getParamType(2) != TD->getIntPtrType(PT) || + FT->getParamType(3) != TD->getIntPtrType(PT)) return false; if (isFoldable(3, 2, false)) { @@ -525,11 +525,12 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const DataLayout *TD, if (Name == "__strcpy_chk" || Name == "__stpcpy_chk") { // Check if this has the right signature. + Type *PT = FT->getParamType(0); if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != Type::getInt8PtrTy(Context) || - FT->getParamType(2) != TD->getIntPtrType(Context)) + FT->getParamType(2) != TD->getIntPtrType(PT)) return 0; @@ -551,11 +552,12 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const DataLayout *TD, if (Name == "__strncpy_chk" || Name == "__stpncpy_chk") { // Check if this has the right signature. + Type *PT = FT->getParamType(0); if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != Type::getInt8PtrTy(Context) || !FT->getParamType(2)->isIntegerTy() || - FT->getParamType(3) != TD->getIntPtrType(Context)) + FT->getParamType(3) != TD->getIntPtrType(PT)) return false; if (isFoldable(3, 2, false)) { diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 9729687a83..c09d982d65 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -806,8 +806,7 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, const DataLayout *TD) { assert(V->getType()->isPointerTy() && "getOrEnforceKnownAlignment expects a pointer!"); - unsigned AS = cast<PointerType>(V->getType())->getAddressSpace(); - unsigned BitWidth = TD ? TD->getPointerSizeInBits(AS) : 64; + unsigned BitWidth = TD ? TD->getTypeSizeInBits(V->getType()) : 64; APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); ComputeMaskedBits(V, KnownZero, KnownOne, TD); unsigned TrailZ = KnownZero.countTrailingOnes(); diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index a008da67e9..870e2b2ade 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -392,7 +392,7 @@ static ConstantInt *GetConstantInt(Value *V, const DataLayout *TD) { // This is some kind of pointer constant. Turn it into a pointer-sized // ConstantInt if possible. - IntegerType *PtrTy = TD->getIntPtrType(V->getContext()); + IntegerType *PtrTy = TD->getIntPtrType(V->getType()); // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*). if (isa<ConstantPointerNull>(V)) @@ -532,9 +532,13 @@ Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) { CV = ICI->getOperand(0); // Unwrap any lossless ptrtoint cast. - if (TD && CV && CV->getType() == TD->getIntPtrType(CV->getContext())) - if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) + if (TD && CV) { + PtrToIntInst *PTII = NULL; + if ((PTII = dyn_cast<PtrToIntInst>(CV)) && + CV->getType() == TD->getIntPtrType(CV->getContext(), + PTII->getPointerAddressSpace())) CV = PTII->getOperand(0); + } return CV; } @@ -981,7 +985,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, // Convert pointer to int before we switch. if (CV->getType()->isPointerTy()) { assert(TD && "Cannot switch on pointer without DataLayout"); - CV = Builder.CreatePtrToInt(CV, TD->getIntPtrType(CV->getContext()), + CV = Builder.CreatePtrToInt(CV, TD->getIntPtrType(CV->getType()), "magicptr"); } @@ -2709,7 +2713,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *TD, if (CompVal->getType()->isPointerTy()) { assert(TD && "Cannot switch on pointer without DataLayout"); CompVal = Builder.CreatePtrToInt(CompVal, - TD->getIntPtrType(CompVal->getContext()), + TD->getIntPtrType(CompVal->getType()), "magicptr"); } diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index b15acdff63..162b29e829 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -102,14 +102,13 @@ struct MemCpyChkOpt : public InstFortifiedLibCallOptimization { virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { this->CI = CI; FunctionType *FT = Callee->getFunctionType(); - LLVMContext &Context = CI->getParent()->getContext(); // Check if this has the right signature. if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy() || - FT->getParamType(2) != TD->getIntPtrType(Context) || - FT->getParamType(3) != TD->getIntPtrType(Context)) + FT->getParamType(2) != TD->getIntPtrType(FT->getParamType(0)) || + FT->getParamType(3) != TD->getIntPtrType(FT->getParamType(1))) return 0; if (isFoldable(3, 2, false)) { @@ -125,14 +124,13 @@ struct MemMoveChkOpt : public InstFortifiedLibCallOptimization { virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { this->CI = CI; FunctionType *FT = Callee->getFunctionType(); - LLVMContext &Context = CI->getParent()->getContext(); // Check if this has the right signature. if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy() || - FT->getParamType(2) != TD->getIntPtrType(Context) || - FT->getParamType(3) != TD->getIntPtrType(Context)) + FT->getParamType(2) != TD->getIntPtrType(FT->getParamType(0)) || + FT->getParamType(3) != TD->getIntPtrType(FT->getParamType(1))) return 0; if (isFoldable(3, 2, false)) { @@ -148,14 +146,13 @@ struct MemSetChkOpt : public InstFortifiedLibCallOptimization { virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { this->CI = CI; FunctionType *FT = Callee->getFunctionType(); - LLVMContext &Context = CI->getParent()->getContext(); // Check if this has the right signature. if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isIntegerTy() || - FT->getParamType(2) != TD->getIntPtrType(Context) || - FT->getParamType(3) != TD->getIntPtrType(Context)) + FT->getParamType(2) != TD->getIntPtrType(FT->getParamType(0)) || + FT->getParamType(3) != TD->getIntPtrType(FT->getParamType(0))) return 0; if (isFoldable(3, 2, false)) { @@ -180,7 +177,7 @@ struct StrCpyChkOpt : public InstFortifiedLibCallOptimization { FT->getReturnType() != FT->getParamType(0) || FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != Type::getInt8PtrTy(Context) || - FT->getParamType(2) != TD->getIntPtrType(Context)) + FT->getParamType(2) != TD->getIntPtrType(FT->getParamType(0))) return 0; Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); @@ -205,8 +202,8 @@ struct StrCpyChkOpt : public InstFortifiedLibCallOptimization { Value *Ret = EmitMemCpyChk(Dst, Src, - ConstantInt::get(TD->getIntPtrType(Context), Len), - CI->getArgOperand(2), B, TD, TLI); + ConstantInt::get(TD->getIntPtrType(Dst->getType()), + Len), CI->getArgOperand(2), B, TD, TLI); return Ret; } return 0; @@ -225,7 +222,7 @@ struct StrNCpyChkOpt : public InstFortifiedLibCallOptimization { FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != Type::getInt8PtrTy(Context) || !FT->getParamType(2)->isIntegerTy() || - FT->getParamType(3) != TD->getIntPtrType(Context)) + FT->getParamType(3) != TD->getIntPtrType(FT->getParamType(0))) return 0; if (isFoldable(3, 2, false)) { @@ -287,7 +284,8 @@ struct StrCatOpt : public LibCallOptimization { // We have enough information to now generate the memcpy call to do the // concatenation for us. Make a memcpy to copy the nul byte with align = 1. B.CreateMemCpy(CpyDst, Src, - ConstantInt::get(TD->getIntPtrType(*Context), Len + 1), 1); + ConstantInt::get(TD->getIntPtrType(Src->getType()), + Len + 1), 1); return Dst; } }; @@ -359,8 +357,9 @@ struct StrChrOpt : public LibCallOptimization { if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32))// memchr needs i32. return 0; + Type *PT = FT->getParamType(0); return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul. - ConstantInt::get(TD->getIntPtrType(*Context), Len), + ConstantInt::get(TD->getIntPtrType(PT), Len), B, TD, TLI); } @@ -454,8 +453,9 @@ struct StrCmpOpt : public LibCallOptimization { // These optimizations require DataLayout. if (!TD) return 0; + Type *PT = FT->getParamType(0); return EmitMemCmp(Str1P, Str2P, - ConstantInt::get(TD->getIntPtrType(*Context), + ConstantInt::get(TD->getIntPtrType(PT), std::min(Len1, Len2)), B, TD, TLI); } @@ -537,7 +537,7 @@ struct StrCpyOpt : public LibCallOptimization { // We have enough information to now generate the memcpy call to do the // copy for us. Make a memcpy to copy the nul byte with align = 1. B.CreateMemCpy(Dst, Src, - ConstantInt::get(TD->getIntPtrType(*Context), Len), 1); + ConstantInt::get(TD->getIntPtrType(Dst->getType()), Len), 1); return Dst; } }; diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index f944d9b4fc..423c7a4911 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -18,10 +18,13 @@ // // This pass has three parts: // 1. The main loop pass that drives the different parts. -// 2. LoopVectorizationLegality - A helper class that checks for the legality +// 2. LoopVectorizationLegality - A unit that checks for the legality // of the vectorization. -// 3. SingleBlockLoopVectorizer - A helper class that performs the actual +// 3. SingleBlockLoopVectorizer - A unit that performs the actual // widening of instructions. +// 4. LoopVectorizationCostModel - A unit that checks for the profitability +// of vectorization. It decides on the optimal vector width, which +// can be one, if vectorization is not profitable. //===----------------------------------------------------------------------===// // // The reduction-variable vectorization is based on the paper: @@ -51,13 +54,14 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AliasSetTracker.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/TargetTransformInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -67,13 +71,14 @@ using namespace llvm; static cl::opt<unsigned> -DefaultVectorizationFactor("default-loop-vectorize-width", - cl::init(4), cl::Hidden, - cl::desc("Set the default loop vectorization width")); +VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden, + cl::desc("Set the default vectorization width. Zero is autoselect.")); + namespace { -// Forward declaration. +// Forward declarations. class LoopVectorizationLegality; +class LoopVectorizationCostModel; /// SingleBlockLoopVectorizer vectorizes loops which contain only one basic /// block to a specified vectorization factor (VF). @@ -203,7 +208,10 @@ public: enum ReductionKind { NoReduction = -1, /// Not a reduction. IntegerAdd = 0, /// Sum of numbers. - IntegerMult = 1 /// Product of numbers. + IntegerMult = 1, /// Product of numbers. + IntegerOr = 2, /// Bitwise or logical OR of numbers. + IntegerAnd = 3, /// Bitwise or logical AND of numbers. + IntegerXor = 4 /// Bitwise or logical XOR of numbers. }; /// This POD struct holds information about reduction variables. @@ -229,11 +237,10 @@ public: /// of the reductions that were found in the loop. typedef DenseMap<PHINode*, ReductionDescriptor> ReductionList; - /// Returns the maximum vectorization factor that we *can* use to vectorize - /// this loop. This does not mean that it is profitable to vectorize this - /// loop, only that it is legal to do so. This may be a large number. We - /// can vectorize to any SIMD width below this number. - unsigned getLoopMaxVF(); + /// Returns true if it is legal to vectorize this loop. + /// This does not mean that it is profitable to vectorize this + /// loop, only that it is legal to do so. + bool canVectorize(); /// Returns the Induction variable. PHINode *getInduction() {return Induction;} @@ -259,10 +266,6 @@ private: /// Returns true if BB is vectorizable bool canVectorizeMemory(BasicBlock &BB); - // Check if a pointer value is known to be disjoint. - // Example: Alloca, Global, NoAlias. - bool isIdentifiedSafeObject(Value* Val); - /// Returns True, if 'Phi' is the kind of reduction variable for type /// 'Kind'. If this is a reduction variable, it adds it to ReductionList. bool AddReductionVar(PHINode *Phi, ReductionKind Kind); @@ -290,6 +293,48 @@ private: SmallPtrSet<Value*, 4> AllowedExit; }; +/// LoopVectorizationCostModel - estimates the expected speedups due to +/// vectorization. +/// In many cases vectorization is not profitable. This can happen because +/// of a number of reasons. In this class we mainly attempt to predict +/// the expected speedup/slowdowns due to the supported instruction set. +/// We use the VectorTargetTransformInfo to query the different backends +/// for the cost of different operations. +class LoopVectorizationCostModel { +public: + /// C'tor. + LoopVectorizationCostModel(Loop *Lp, ScalarEvolution *Se, + LoopVectorizationLegality *Leg, + const VectorTargetTransformInfo *Vtti): + TheLoop(Lp), SE(Se), Legal(Leg), VTTI(Vtti) { } + + /// Returns the most profitable vectorization factor for the loop that is + /// smaller or equal to the VF argument. This method checks every power + /// of two up to VF. + unsigned findBestVectorizationFactor(unsigned VF = 4); + +private: + /// Returns the expected execution cost. The unit of the cost does + /// not matter because we use the 'cost' units to compare different + /// vector widths. The cost that is returned is *not* normalized by + /// the factor width. + unsigned expectedCost(unsigned VF); + + /// Returns the execution time cost of an instruction for a given vector + /// width. Vector width of one means scalar. + unsigned getInstructionCost(Instruction *I, unsigned VF); + + /// The loop that we evaluate. + Loop *TheLoop; + /// Scev analysis. + ScalarEvolution *SE; + + /// Vectorization legality. + LoopVectorizationLegality *Legal; + /// Vector target information. + const VectorTargetTransformInfo *VTTI; +}; + struct LoopVectorize : public LoopPass { static char ID; // Pass identification, replacement for typeid @@ -300,6 +345,7 @@ struct LoopVectorize : public LoopPass { ScalarEvolution *SE; DataLayout *DL; LoopInfo *LI; + TargetTransformInfo *TTI; virtual bool runOnLoop(Loop *L, LPPassManager &LPM) { // We only vectorize innermost loops. @@ -309,25 +355,42 @@ struct LoopVectorize : public LoopPass { SE = &getAnalysis<ScalarEvolution>(); DL = getAnalysisIfAvailable<DataLayout>(); LI = &getAnalysis<LoopInfo>(); + TTI = getAnalysisIfAvailable<TargetTransformInfo>(); DEBUG(dbgs() << "LV: Checking a loop in \"" << L->getHeader()->getParent()->getName() << "\"\n"); // Check if it is legal to vectorize the loop. LoopVectorizationLegality LVL(L, SE, DL); - unsigned MaxVF = LVL.getLoopMaxVF(); - - // Check that we can vectorize this loop using the chosen vectorization - // width. - if (MaxVF < DefaultVectorizationFactor) { - DEBUG(dbgs() << "LV: non-vectorizable MaxVF ("<< MaxVF << ").\n"); + if (!LVL.canVectorize()) { + DEBUG(dbgs() << "LV: Not vectorizing.\n"); return false; } - DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< MaxVF << ").\n"); + // Select the preffered vectorization factor. + unsigned VF = 1; + if (VectorizationFactor == 0) { + const VectorTargetTransformInfo *VTTI = 0; + if (TTI) + VTTI = TTI->getVectorTargetTransformInfo(); + // Use the cost model. + LoopVectorizationCostModel CM(L, SE, &LVL, VTTI); + VF = CM.findBestVectorizationFactor(); + + if (VF == 1) { + DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n"); + return false; + } + + } else { + // Use the user command flag. + VF = VectorizationFactor; + } + + DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< VF << ").\n"); // If we decided that it is *legal* to vectorizer the loop then do it. - SingleBlockLoopVectorizer LB(L, SE, LI, &LPM, DefaultVectorizationFactor); + SingleBlockLoopVectorizer LB(L, SE, LI, &LPM, VF); LB.vectorize(&LVL); DEBUG(verifyFunction(*L->getHeader()->getParent())); @@ -660,6 +723,13 @@ void SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal void SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { + //===------------------------------------------------===// + // + // Notice: any optimization or new instruction that go + // into the code below should be also be implemented in + // the cost-model. + // + //===------------------------------------------------===// typedef SmallVector<PHINode*, 4> PhiVector; BasicBlock &BB = *OrigLoop->getHeader(); Constant *Zero = ConstantInt::get( @@ -914,14 +984,28 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { // Extract the first scalar. Value *Scalar0 = Builder.CreateExtractElement(NewPhi, Builder.getInt32(0)); - // Extract and sum the remaining vector elements. + // Extract and reduce the remaining vector elements. for (unsigned i=1; i < VF; ++i) { Value *Scalar1 = Builder.CreateExtractElement(NewPhi, Builder.getInt32(i)); - if (RdxDesc.Kind == LoopVectorizationLegality::IntegerAdd) { - Scalar0 = Builder.CreateAdd(Scalar0, Scalar1); - } else { - Scalar0 = Builder.CreateMul(Scalar0, Scalar1); + switch (RdxDesc.Kind) { + case LoopVectorizationLegality::IntegerAdd: + Scalar0 = Builder.CreateAdd(Scalar0, Scalar1); + break; + case LoopVectorizationLegality::IntegerMult: + Scalar0 = Builder.CreateMul(Scalar0, Scalar1); + break; + case LoopVectorizationLegality::IntegerOr: + Scalar0 = Builder.CreateOr(Scalar0, Scalar1); + break; + case LoopVectorizationLegality::IntegerAnd: + Scalar0 = Builder.CreateAnd(Scalar0, Scalar1); + break; + case LoopVectorizationLegality::IntegerXor: + Scalar0 = Builder.CreateXor(Scalar0, Scalar1); + break; + default: + llvm_unreachable("Unknown reduction operation"); } } @@ -961,18 +1045,18 @@ void SingleBlockLoopVectorizer::cleanup() { SE->forgetLoop(OrigLoop); } -unsigned LoopVectorizationLegality::getLoopMaxVF() { +bool LoopVectorizationLegality::canVectorize() { if (!TheLoop->getLoopPreheader()) { assert(false && "No preheader!!"); DEBUG(dbgs() << "LV: Loop not normalized." << "\n"); - return 1; + return false; } // We can only vectorize single basic block loops. unsigned NumBlocks = TheLoop->getNumBlocks(); if (NumBlocks != 1) { DEBUG(dbgs() << "LV: Too many blocks:" << NumBlocks << "\n"); - return 1; + return false; } // We need to have a loop header. @@ -982,22 +1066,22 @@ unsigned LoopVectorizationLegality::getLoopMaxVF() { // Go over each instruction and look at memory deps. if (!canVectorizeBlock(*BB)) { DEBUG(dbgs() << "LV: Can't vectorize this loop header\n"); - return 1; + return false; } // ScalarEvolution needs to be able to find the exit count. const SCEV *ExitCount = SE->getExitCount(TheLoop, BB); if (ExitCount == SE->getCouldNotCompute()) { DEBUG(dbgs() << "LV: SCEV could not compute the loop exit count.\n"); - return 1; + return false; } DEBUG(dbgs() << "LV: We can vectorize this loop!\n"); // Okay! We can vectorize. At this point we don't have any other mem analysis - // which may limit our maximum vectorization factor, so just return the - // maximum SIMD size. - return DefaultVectorizationFactor; + // which may limit our maximum vectorization factor, so just return true with + // no restrictions. + return true; } bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) { @@ -1032,7 +1116,19 @@ bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) { continue; } if (AddReductionVar(Phi, IntegerMult)) { - DEBUG(dbgs() << "LV: Found an Mult reduction PHI."<< *Phi <<"\n"); + DEBUG(dbgs() << "LV: Found a MUL reduction PHI."<< *Phi <<"\n"); + continue; + } + if (AddReductionVar(Phi, IntegerOr)) { + DEBUG(dbgs() << "LV: Found an OR reduction PHI."<< *Phi <<"\n"); + continue; + } + if (AddReductionVar(Phi, IntegerAnd)) { + DEBUG(dbgs() << "LV: Found an AND reduction PHI."<< *Phi <<"\n"); + continue; + } + if (AddReductionVar(Phi, IntegerXor)) { + DEBUG(dbgs() << "LV: Found a XOR reduction PHI."<< *Phi <<"\n"); continue; } @@ -1178,7 +1274,7 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) { GetUnderlyingObjects(*I, TempObjects, DL); for (ValueVector::iterator it=TempObjects.begin(), e=TempObjects.end(); it != e; ++it) { - if (!isIdentifiedSafeObject(*it)) { + if (!isIdentifiedObject(*it)) { DEBUG(dbgs() << "LV: Found an unidentified write ptr:"<< **it <<"\n"); return false; } @@ -1196,7 +1292,7 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) { GetUnderlyingObjects(*I, TempObjects, DL); for (ValueVector::iterator it=TempObjects.begin(), e=TempObjects.end(); it != e; ++it) { - if (!isIdentifiedSafeObject(*it)) { + if (!isIdentifiedObject(*it)) { DEBUG(dbgs() << "LV: Found an unidentified read ptr:"<< **it <<"\n"); return false; } @@ -1213,19 +1309,6 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) { return true; } -/// Checks if the value is a Global variable or if it is an Arguments -/// marked with the NoAlias attribute. -bool LoopVectorizationLegality::isIdentifiedSafeObject(Value* Val) { - assert(Val && "Invalid value"); - if (isa<GlobalValue>(Val)) - return true; - if (isa<AllocaInst>(Val)) - return true; - if (Argument *A = dyn_cast<Argument>(Val)) - return A->hasNoAliasAttr(); - return false; -} - bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi, ReductionKind Kind) { if (Phi->getNumIncomingValues() != 2) @@ -1319,6 +1402,12 @@ LoopVectorizationLegality::isReductionInstr(Instruction *I, case Instruction::UDiv: case Instruction::SDiv: return Kind == IntegerMult; + case Instruction::And: + return Kind == IntegerAnd; + case Instruction::Or: + return Kind == IntegerOr; + case Instruction::Xor: + return Kind == IntegerXor; } } @@ -1340,6 +1429,193 @@ bool LoopVectorizationLegality::isInductionVariable(PHINode *Phi) { return true; } +unsigned +LoopVectorizationCostModel::findBestVectorizationFactor(unsigned VF) { + if (!VTTI) { + DEBUG(dbgs() << "LV: No vector target information. Not vectorizing. \n"); + return 1; + } + + float Cost = expectedCost(1); + unsigned Width = 1; + DEBUG(dbgs() << "LV: Scalar loop costs: "<< (int)Cost << ".\n"); + for (unsigned i=2; i <= VF; i*=2) { + // Notice that the vector loop needs to be executed less times, so + // we need to divide the cost of the vector loops by the width of + // the vector elements. + float VectorCost = expectedCost(i) / (float)i; + DEBUG(dbgs() << "LV: Vector loop of width "<< i << " costs: " << + (int)VectorCost << ".\n"); + if (VectorCost < Cost) { + Cost = VectorCost; + Width = i; + } + } + + DEBUG(dbgs() << "LV: Selecting VF = : "<< Width << ".\n"); + return Width; +} + +unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) { + // We can only estimate the cost of single basic block loops. + assert(1 == TheLoop->getNumBlocks() && "Too many blocks in loop"); + + BasicBlock *BB = TheLoop->getHeader(); + unsigned Cost = 0; + + // For each instruction in the old loop. + for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { + Instruction *Inst = it; + unsigned C = getInstructionCost(Inst, VF); + Cost += C; + DEBUG(dbgs() << "LV: Found an estimated cost of "<< C <<" for VF "<< VF << + " For instruction: "<< *Inst << "\n"); + } + + return Cost; +} + +unsigned +LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { + assert(VTTI && "Invalid vector target transformation info"); + switch (I->getOpcode()) { + case Instruction::GetElementPtr: + return 0; + case Instruction::Br: { + return VTTI->getInstrCost(I->getOpcode()); + } + case Instruction::PHI: + return 0; + case Instruction::Add: + case Instruction::FAdd: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::Mul: + case Instruction::FMul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: { + Type *VTy = VectorType::get(I->getType(), VF); + return VTTI->getInstrCost(I->getOpcode(), VTy); + } + case Instruction::Select: { + SelectInst *SI = cast<SelectInst>(I); + Type *VTy = VectorType::get(I->getType(), VF); + const SCEV *CondSCEV = SE->getSCEV(SI->getCondition()); + bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop)); + Type *CondTy = SI->getCondition()->getType(); + if (ScalarCond) + CondTy = VectorType::get(CondTy, VF); + + return VTTI->getInstrCost(I->getOpcode(), VTy, CondTy); + } + case Instruction::ICmp: + case Instruction::FCmp: { + Type *VTy = VectorType::get(I->getOperand(0)->getType(), VF); + return VTTI->getInstrCost(I->getOpcode(), VTy); + } + case Instruction::Store: { + StoreInst *SI = cast<StoreInst>(I); + Type *VTy = VectorType::get(SI->getValueOperand()->getType(), VF); + + // Scalarized stores. + if (!Legal->isConsecutiveGep(SI->getPointerOperand())) { + unsigned Cost = 0; + if (VF != 1) { + unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement, + VTy); + // The cost of extracting from the value vector and pointer vector. + Cost += VF * (ExtCost * 2); + } + // The cost of the scalar stores. + Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(), + VTy->getScalarType(), + SI->getAlignment(), + SI->getPointerAddressSpace()); + return Cost; + } + + // Wide stores. + return VTTI->getMemoryOpCost(I->getOpcode(), VTy, SI->getAlignment(), + SI->getPointerAddressSpace()); + } + case Instruction::Load: { + LoadInst *LI = cast<LoadInst>(I); + Type *VTy = VectorType::get(I->getType(), VF); + + // Scalarized loads. + if (!Legal->isConsecutiveGep(LI->getPointerOperand())) { + unsigned Cost = 0; + if (VF != 1) { + unsigned InCost = VTTI->getInstrCost(Instruction::InsertElement, VTy); + unsigned ExCost = VTTI->getInstrCost(Instruction::ExtractValue, VTy); + + // The cost of inserting the loaded value into the result vector, and + // extracting from a vector of pointers. + Cost += VF * (InCost + ExCost); + } + // The cost of the scalar stores. + Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(), VTy->getScalarType(), + LI->getAlignment(), + LI->getPointerAddressSpace()); + return Cost; + } + + // Wide loads. + return VTTI->getMemoryOpCost(I->getOpcode(), VTy, LI->getAlignment(), + LI->getPointerAddressSpace()); + } + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::FPExt: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::SIToFP: + case Instruction::UIToFP: + case Instruction::Trunc: + case Instruction::FPTrunc: + case Instruction::BitCast: { + Type *SrcTy = VectorType::get(I->getOperand(0)->getType(), VF); + Type *DstTy = VectorType::get(I->getType(), VF); + return VTTI->getInstrCost(I->getOpcode(), DstTy, SrcTy); + } + default: { + // We are scalarizing the instruction. Return the cost of the scalar + // instruction, plus the cost of insert and extract into vector + // elements, times the vector width. + unsigned Cost = 0; + Type *Ty = I->getType(); + + if (!Ty->isVoidTy()) { + Type *VTy = VectorType::get(Ty, VF); + unsigned InsCost = VTTI->getInstrCost(Instruction::InsertElement, VTy); + unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement, VTy); + Cost += VF * (InsCost + ExtCost); + } + + /// We don't have any information on the scalar instruction, but maybe + /// the target has. + /// TODO: This may be a target-specific intrinsic. + /// Need to add API for that. + Cost += VF * VTTI->getInstrCost(I->getOpcode(), Ty); + + return Cost; + } + }// end of switch. +} + + } // namespace char LoopVectorize::ID = 0; diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp index 5e23e6fc78..b72c17f667 100644 --- a/lib/VMCore/AsmWriter.cpp +++ b/lib/VMCore/AsmWriter.cpp @@ -74,6 +74,7 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) case CallingConv::X86_StdCall: Out << "x86_stdcallcc"; break; case CallingConv::X86_FastCall: Out << "x86_fastcallcc"; break; case CallingConv::X86_ThisCall: Out << "x86_thiscallcc"; break; + case CallingConv::Intel_OCL_BI: Out << "intel_ocl_bicc"; break; case CallingConv::ARM_APCS: Out << "arm_apcscc"; break; case CallingConv::ARM_AAPCS: Out << "arm_aapcscc"; break; case CallingConv::ARM_AAPCS_VFP:Out << "arm_aapcs_vfpcc"; break; diff --git a/lib/VMCore/DataLayout.cpp b/lib/VMCore/DataLayout.cpp index e6994be257..104e5da057 100644 --- a/lib/VMCore/DataLayout.cpp +++ b/lib/VMCore/DataLayout.cpp @@ -524,6 +524,14 @@ std::string DataLayout::getStringRepresentation() const { return OS.str(); } +unsigned DataLayout::getPointerTypeSizeInBits(Type *Ty) const +{ + if (Ty->isPointerTy()) return getTypeSizeInBits(Ty); + if (Ty->isVectorTy() + && cast<VectorType>(Ty)->getElementType()->isPointerTy()) + return getTypeSizeInBits(cast<VectorType>(Ty)->getElementType()); + return getPointerSizeInBits(0); +} uint64_t DataLayout::getTypeSizeInBits(Type *Ty) const { assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!"); @@ -660,13 +668,32 @@ unsigned DataLayout::getPreferredTypeAlignmentShift(Type *Ty) const { return Log2_32(Align); } -/// getIntPtrType - Return an unsigned integer type that is the same size or -/// greater to the host pointer size. +/// getIntPtrType - Return an integer type that is the same size or +/// greater to the pointer size for the address space. IntegerType *DataLayout::getIntPtrType(LLVMContext &C, unsigned AddressSpace) const { return IntegerType::get(C, getPointerSizeInBits(AddressSpace)); } +/// getIntPtrType - Return an integer type that is the same size or +/// greater to the pointer size of the specific PointerType. +IntegerType *DataLayout::getIntPtrType(Type *Ty) const { + LLVMContext &C = Ty->getContext(); + // For pointers, we return the size for the specific address space. + if (Ty->isPointerTy()) return IntegerType::get(C, getTypeSizeInBits(Ty)); + // For vector of pointers, we return the size of the address space + // of the pointer type. + if (Ty->isVectorTy() && cast<VectorType>(Ty)->getElementType()->isPointerTy()) + return IntegerType::get(C, + getTypeSizeInBits(cast<VectorType>(Ty)->getElementType())); + // Otherwise return the address space for the default address space. + // An example of this occuring is that you want to get the IntPtr + // for all of the arguments in a function. However, the IntPtr + // for a non-pointer type cannot be determined by the type, so + // the default value is used. + return getIntPtrType(C, 0); +} + uint64_t DataLayout::getIndexedOffset(Type *ptrTy, ArrayRef<Value *> Indices) const { diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp index 13c4a5d257..e9b96d6cd2 100644 --- a/lib/VMCore/Instructions.cpp +++ b/lib/VMCore/Instructions.cpp @@ -2120,6 +2120,17 @@ bool CastInst::isNoopCast(Type *IntPtrTy) const { return isNoopCast(getOpcode(), getOperand(0)->getType(), getType(), IntPtrTy); } +/// @brief Determine if a cast is a no-op +bool CastInst::isNoopCast(const DataLayout &DL) const { + unsigned AS = 0; + if (getOpcode() == Instruction::PtrToInt) + AS = getOperand(0)->getType()->getPointerAddressSpace(); + else if (getOpcode() == Instruction::IntToPtr) + AS = getType()->getPointerAddressSpace(); + Type *IntPtrTy = DL.getIntPtrType(getContext(), AS); + return isNoopCast(getOpcode(), getOperand(0)->getType(), getType(), IntPtrTy); +} + /// This function determines if a pair of casts can be eliminated and what /// opcode should be used in the elimination. This assumes that there are two /// instructions like this: diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp index 1a7a650989..54146e118c 100644 --- a/lib/VMCore/Type.cpp +++ b/lib/VMCore/Type.cpp @@ -233,7 +233,12 @@ unsigned Type::getVectorNumElements() const { } unsigned Type::getPointerAddressSpace() const { - return cast<PointerType>(this)->getAddressSpace(); + if (isPointerTy()) + return cast<PointerType>(this)->getAddressSpace(); + if (isVectorTy()) + return getSequentialElementType()->getPointerAddressSpace(); + llvm_unreachable("Should never reach here!"); + return 0; } diff --git a/lib/VMCore/User.cpp b/lib/VMCore/User.cpp index 5f35ce4b9a..e847ce6ee5 100644 --- a/lib/VMCore/User.cpp +++ b/lib/VMCore/User.cpp @@ -10,6 +10,7 @@ #include "llvm/Constant.h" #include "llvm/GlobalValue.h" #include "llvm/User.h" +#include "llvm/Operator.h" namespace llvm { @@ -78,4 +79,12 @@ void User::operator delete(void *Usr) { ::operator delete(Storage); } +//===----------------------------------------------------------------------===// +// Operator Class +//===----------------------------------------------------------------------===// + +Operator::~Operator() { + llvm_unreachable("should never destroy an Operator"); +} + } // End llvm namespace diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp index fd629b485a..eb40b09d29 100644 --- a/lib/VMCore/Verifier.cpp +++ b/lib/VMCore/Verifier.cpp @@ -705,6 +705,7 @@ void Verifier::visitFunction(Function &F) { case CallingConv::Cold: case CallingConv::X86_FastCall: case CallingConv::X86_ThisCall: + case CallingConv::Intel_OCL_BI: case CallingConv::PTX_Kernel: case CallingConv::PTX_Device: Assert1(!F.isVarArg(), diff --git a/projects/CMakeLists.txt b/projects/CMakeLists.txt index a9d2af6ad2..36751cd31d 100644 --- a/projects/CMakeLists.txt +++ b/projects/CMakeLists.txt @@ -14,8 +14,6 @@ endforeach(entry) # Also add in the compiler-rt tree if present and we have a sufficiently # recent version of CMake. if(${CMAKE_VERSION} VERSION_GREATER 2.8.7 AND - ${LLVM_BUILD_RUNTIME} AND - IS_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/compiler-rt AND - EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/compiler-rt/CMakeLists.txt) - add_subdirectory(compiler-rt) + ${LLVM_BUILD_RUNTIME}) + add_llvm_external_project(compiler-rt) endif() diff --git a/test/CodeGen/ARM/carry.ll b/test/CodeGen/ARM/carry.ll index f84774d9b6..bf51cd627b 100644 --- a/test/CodeGen/ARM/carry.ll +++ b/test/CodeGen/ARM/carry.ll @@ -45,3 +45,16 @@ entry: %0 = sub nsw i64 0, %x ret i64 %0 } + +; rdar://12559385 +define i64 @f5(i32 %vi) { +entry: +; CHECK: f5: +; CHECK: movw [[REG:r[0-9]+]], #36102 +; CHECK: sbc r{{[0-9]+}}, r{{[0-9]+}}, [[REG]] + %v0 = zext i32 %vi to i64 + %v1 = xor i64 %v0, -155057456198619 + %v4 = add i64 %v1, 155057456198619 + %v5 = add i64 %v4, %v1 + ret i64 %v5 +} diff --git a/test/CodeGen/Mips/mips64-sret.ll b/test/CodeGen/Mips/mips64-sret.ll index 498c5fe174..e26b0223b4 100644 --- a/test/CodeGen/Mips/mips64-sret.ll +++ b/test/CodeGen/Mips/mips64-sret.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 -O0 < %s +; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 -O3 < %s | FileCheck %s %struct.S = type { [8 x i32] } @@ -6,6 +6,8 @@ define void @f(%struct.S* noalias sret %agg.result) nounwind { entry: +; CHECK: daddu $2, $zero, $4 + %0 = bitcast %struct.S* %agg.result to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.S* @g to i8*), i64 32, i32 4, i1 false) ret void diff --git a/test/CodeGen/PowerPC/jaggedstructs.ll b/test/CodeGen/PowerPC/jaggedstructs.ll new file mode 100644 index 0000000000..62aa7cf929 --- /dev/null +++ b/test/CodeGen/PowerPC/jaggedstructs.ll @@ -0,0 +1,48 @@ +; RUN: llc -mcpu=pwr7 -O0 < %s | FileCheck %s + +; This tests receiving and re-passing parameters consisting of structures +; of size 3, 5, 6, and 7. They are to be found/placed right-adjusted in +; the parameter registers. + +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +%struct.S3 = type { [3 x i8] } +%struct.S5 = type { [5 x i8] } +%struct.S6 = type { [6 x i8] } +%struct.S7 = type { [7 x i8] } + +define void @test(%struct.S3* byval %s3, %struct.S5* byval %s5, %struct.S6* byval %s6, %struct.S7* byval %s7) nounwind { +entry: + call void @check(%struct.S3* byval %s3, %struct.S5* byval %s5, %struct.S6* byval %s6, %struct.S7* byval %s7) + ret void +} + +; CHECK: std 6, 216(1) +; CHECK: std 5, 208(1) +; CHECK: std 4, 200(1) +; CHECK: std 3, 192(1) +; CHECK: lbz {{[0-9]+}}, 199(1) +; CHECK: stb {{[0-9]+}}, 55(1) +; CHECK: lhz {{[0-9]+}}, 197(1) +; CHECK: sth {{[0-9]+}}, 53(1) +; CHECK: lbz {{[0-9]+}}, 207(1) +; CHECK: stb {{[0-9]+}}, 63(1) +; CHECK: lwz {{[0-9]+}}, 203(1) +; CHECK: stw {{[0-9]+}}, 59(1) +; CHECK: lhz {{[0-9]+}}, 214(1) +; CHECK: sth {{[0-9]+}}, 70(1) +; CHECK: lwz {{[0-9]+}}, 210(1) +; CHECK: stw {{[0-9]+}}, 66(1) +; CHECK: lbz {{[0-9]+}}, 223(1) +; CHECK: stb {{[0-9]+}}, 79(1) +; CHECK: lhz {{[0-9]+}}, 221(1) +; CHECK: sth {{[0-9]+}}, 77(1) +; CHECK: lwz {{[0-9]+}}, 217(1) +; CHECK: stw {{[0-9]+}}, 73(1) +; CHECK: ld 6, 72(1) +; CHECK: ld 5, 64(1) +; CHECK: ld 4, 56(1) +; CHECK: ld 3, 48(1) + +declare void @check(%struct.S3* byval, %struct.S5* byval, %struct.S6* byval, %struct.S7* byval) diff --git a/test/CodeGen/PowerPC/structsinregs.ll b/test/CodeGen/PowerPC/structsinregs.ll index 43ba13b426..ef706af95d 100644 --- a/test/CodeGen/PowerPC/structsinregs.ll +++ b/test/CodeGen/PowerPC/structsinregs.ll @@ -188,17 +188,13 @@ entry: %add13 = add nsw i32 %add11, %6 ret i32 %add13 -; CHECK: sldi 9, 9, 8 -; CHECK: sldi 8, 8, 16 -; CHECK: sldi 7, 7, 24 -; CHECK: sldi 5, 5, 40 -; CHECK: stw 6, 76(1) -; CHECK: sth 4, 62(1) -; CHECK: stb 3, 55(1) ; CHECK: std 9, 96(1) ; CHECK: std 8, 88(1) ; CHECK: std 7, 80(1) +; CHECK: stw 6, 76(1) ; CHECK: std 5, 64(1) +; CHECK: sth 4, 62(1) +; CHECK: stb 3, 55(1) ; CHECK: lbz {{[0-9]+}}, 85(1) ; CHECK: lbz {{[0-9]+}}, 86(1) ; CHECK: lbz {{[0-9]+}}, 83(1) diff --git a/test/CodeGen/Thumb2/carry.ll b/test/CodeGen/Thumb2/carry.ll index de6f6e260d..85b4370fa5 100644 --- a/test/CodeGen/Thumb2/carry.ll +++ b/test/CodeGen/Thumb2/carry.ll @@ -20,3 +20,16 @@ entry: %tmp2 = sub i64 %tmp1, %b ret i64 %tmp2 } + +; rdar://12559385 +define i64 @f3(i32 %vi) { +entry: +; CHECK: f3: +; CHECK: movw [[REG:r[0-9]+]], #36102 +; CHECK: sbcs r{{[0-9]+}}, [[REG]] + %v0 = zext i32 %vi to i64 + %v1 = xor i64 %v0, -155057456198619 + %v4 = add i64 %v1, 155057456198619 + %v5 = add i64 %v4, %v1 + ret i64 %v5 +} diff --git a/test/CodeGen/X86/2012-01-18-vbitcast.ll b/test/CodeGen/X86/2012-01-18-vbitcast.ll index 8a3ccc8dfd..3ce7db6e41 100644 --- a/test/CodeGen/X86/2012-01-18-vbitcast.ll +++ b/test/CodeGen/X86/2012-01-18-vbitcast.ll @@ -2,8 +2,8 @@ ;CHECK: vcast define <2 x i32> @vcast(<2 x float> %a, <2 x float> %b) { -;CHECK: pshufd -;CHECK: pshufd +;CHECK: pmovzxdq +;CHECK: pmovzxdq %af = bitcast <2 x float> %a to <2 x i32> %bf = bitcast <2 x float> %b to <2 x i32> %x = sub <2 x i32> %af, %bf diff --git a/test/CodeGen/X86/2012-03-15-build_vector_wl.ll b/test/CodeGen/X86/2012-03-15-build_vector_wl.ll index fec17e9f4a..c4b307e5a5 100644 --- a/test/CodeGen/X86/2012-03-15-build_vector_wl.ll +++ b/test/CodeGen/X86/2012-03-15-build_vector_wl.ll @@ -4,7 +4,7 @@ define <4 x i8> @build_vector_again(<16 x i8> %in) nounwind readnone { entry: %out = shufflevector <16 x i8> %in, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> -; CHECK: shufb +; CHECK: pmovzxbd ret <4 x i8> %out ; CHECK: ret } diff --git a/test/CodeGen/X86/2012-07-10-extload64.ll b/test/CodeGen/X86/2012-07-10-extload64.ll index 906b748fa4..4abdded38d 100644 --- a/test/CodeGen/X86/2012-07-10-extload64.ll +++ b/test/CodeGen/X86/2012-07-10-extload64.ll @@ -3,7 +3,7 @@ ; CHECK: load_store define void @load_store(<4 x i16>* %in) { entry: -; CHECK: movsd +; CHECK: pmovzxwd %A27 = load <4 x i16>* %in, align 4 %A28 = add <4 x i16> %A27, %A27 ; CHECK: movlpd @@ -27,6 +27,6 @@ define <2 x i32> @load_64(<2 x i32>* %ptr) { BB: %t = load <2 x i32>* %ptr ret <2 x i32> %t -;CHECK: movsd +;CHECK: pmovzxdq ;CHECK: ret } diff --git a/test/CodeGen/X86/avx-intel-ocl.ll b/test/CodeGen/X86/avx-intel-ocl.ll new file mode 100644 index 0000000000..1446b36a0f --- /dev/null +++ b/test/CodeGen/X86/avx-intel-ocl.ll @@ -0,0 +1,107 @@ +; RUN: llc < %s -mtriple=i386-pc-win32 -mcpu=corei7-avx -mattr=+avx | FileCheck -check-prefix=WIN32 %s +; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=corei7-avx -mattr=+avx | FileCheck -check-prefix=WIN64 %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck -check-prefix=NOT_WIN %s + +declare <16 x float> @func_float16_ptr(<16 x float>, <16 x float> *) +declare <16 x float> @func_float16(<16 x float>, <16 x float>) +; WIN64: testf16_inp +; WIN64: vaddps {{.*}}, {{%ymm[0-1]}} +; WIN64: vaddps {{.*}}, {{%ymm[0-1]}} +; WIN64: leaq {{.*}}(%rsp), %rcx +; WIN64: call +; WIN64: ret + +; WIN32: testf16_inp +; WIN32: movl %eax, (%esp) +; WIN32: vaddps {{.*}}, {{%ymm[0-1]}} +; WIN32: vaddps {{.*}}, {{%ymm[0-1]}} +; WIN32: call +; WIN32: ret + +; NOT_WIN: testf16_inp +; NOT_WIN: vaddps {{.*}}, {{%ymm[0-1]}} +; NOT_WIN: vaddps {{.*}}, {{%ymm[0-1]}} +; NOT_WIN: leaq {{.*}}(%rsp), %rdi +; NOT_WIN: call +; NOT_WIN: ret + +;test calling conventions - input parameters +define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind { + %y = alloca <16 x float>, align 16 + %x = fadd <16 x float> %a, %b + %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y) + %2 = load <16 x float>* %y, align 16 + %3 = fadd <16 x float> %2, %1 + ret <16 x float> %3 +} + +;test calling conventions - preserved registers + +; preserved ymm6-ymm15 +; WIN64: testf16_regs +; WIN64: call +; WIN64: vaddps {{%ymm[6-7]}}, %ymm0, %ymm0 +; WIN64: vaddps {{%ymm[6-7]}}, %ymm1, %ymm1 +; WIN64: ret + +; preserved ymm8-ymm15 +; NOT_WIN: testf16_regs +; NOT_WIN: call +; NOT_WIN: vaddps {{%ymm[8-9]}}, %ymm0, %ymm0 +; NOT_WIN: vaddps {{%ymm[8-9]}}, %ymm1, %ymm1 +; NOT_WIN: ret + +define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind { + %y = alloca <16 x float>, align 16 + %x = fadd <16 x float> %a, %b + %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y) + %2 = load <16 x float>* %y, align 16 + %3 = fadd <16 x float> %1, %b + %4 = fadd <16 x float> %2, %3 + ret <16 x float> %4 +} + +; test calling conventions - prolog and epilog +; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill +; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill +; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill +; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill +; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill +; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill +; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill +; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill +; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill +; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill +; WIN64: call +; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload +; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload +; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload +; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload +; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload +; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload +; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload +; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload +; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload +; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload + +; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill +; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill +; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill +; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill +; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill +; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill +; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill +; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill +; NOT_WIN: call +; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload +; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload +; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload +; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload +; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload +; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload +; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload +; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload +define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x float> %b) nounwind { + %c = call <16 x float> @func_float16(<16 x float> %a, <16 x float> %b) + ret <16 x float> %c +} diff --git a/test/CodeGen/X86/cvtv2f32.ll b/test/CodeGen/X86/cvtv2f32.ll new file mode 100644 index 0000000000..466b096067 --- /dev/null +++ b/test/CodeGen/X86/cvtv2f32.ll @@ -0,0 +1,25 @@ +; RUN: llc < %s -mtriple=i686-linux-pc -mcpu=corei7 | FileCheck %s + +define <2 x float> @foo(i32 %x, i32 %y, <2 x float> %v) { + %t1 = uitofp i32 %x to float + %t2 = insertelement <2 x float> undef, float %t1, i32 0 + %t3 = uitofp i32 %y to float + %t4 = insertelement <2 x float> %t2, float %t3, i32 1 + %t5 = fmul <2 x float> %v, %t4 + ret <2 x float> %t5 +; CHECK: foo +; CHECK: or +; CHECK: subpd +; CHECK: cvtpd2ps +; CHECK: ret +} + +define <2 x float> @bar(<2 x i32> %in) { + %r = uitofp <2 x i32> %in to <2 x float> + ret <2 x float> %r +; CHECK: bar +; CHECK: or +; CHECK: subpd +; CHECK: cvtpd2ps +; CHECK: ret +} diff --git a/test/CodeGen/X86/fast-cc-callee-pops.ll b/test/CodeGen/X86/fast-cc-callee-pops.ll index ea10897c73..2c5b80ac4a 100644 --- a/test/CodeGen/X86/fast-cc-callee-pops.ll +++ b/test/CodeGen/X86/fast-cc-callee-pops.ll @@ -2,12 +2,12 @@ ; Check that a fastcc function pops its stack variables before returning. -define x86_fastcallcc void @func(i64 %X, i64 %Y, float %G, double %Z) nounwind { +define x86_fastcallcc void @func(i64 inreg %X, i64 %Y, float %G, double %Z) nounwind { ret void ; CHECK: ret{{.*}}20 } -define x86_thiscallcc void @func2(i32 %X, i64 %Y, float %G, double %Z) nounwind { +define x86_thiscallcc void @func2(i32 inreg %X, i64 %Y, float %G, double %Z) nounwind { ret void ; CHECK: ret{{.*}}20 } diff --git a/test/CodeGen/X86/fast-cc-merge-stack-adj.ll b/test/CodeGen/X86/fast-cc-merge-stack-adj.ll index 14cb136f89..d591f9408b 100644 --- a/test/CodeGen/X86/fast-cc-merge-stack-adj.ll +++ b/test/CodeGen/X86/fast-cc-merge-stack-adj.ll @@ -3,7 +3,7 @@ target triple = "i686-pc-linux-gnu" -declare x86_fastcallcc void @func(i32*, i64) +declare x86_fastcallcc void @func(i32*, i64 inreg) define x86_fastcallcc void @caller(i32, i64) { %X = alloca i32 ; <i32*> [#uses=1] diff --git a/test/CodeGen/X86/fast-cc-pass-in-regs.ll b/test/CodeGen/X86/fast-cc-pass-in-regs.ll index a96e5043fe..b60b68bd38 100644 --- a/test/CodeGen/X86/fast-cc-pass-in-regs.ll +++ b/test/CodeGen/X86/fast-cc-pass-in-regs.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | FileCheck %s ; check that fastcc is passing stuff in regs. -declare x86_fastcallcc i64 @callee(i64) +declare x86_fastcallcc i64 @callee(i64 inreg) define i64 @caller() { %X = call x86_fastcallcc i64 @callee( i64 4294967299 ) ; <i64> [#uses=1] @@ -9,7 +9,7 @@ define i64 @caller() { ret i64 %X } -define x86_fastcallcc i64 @caller2(i64 %X) { +define x86_fastcallcc i64 @caller2(i64 inreg %X) { ret i64 %X ; CHECK: mov{{.*}}EAX, ECX } diff --git a/test/CodeGen/X86/ms-inline-asm.ll b/test/CodeGen/X86/ms-inline-asm.ll index 59efa8d547..24d28adda8 100644 --- a/test/CodeGen/X86/ms-inline-asm.ll +++ b/test/CodeGen/X86/ms-inline-asm.ll @@ -38,3 +38,26 @@ entry: ; CHECK: .att_syntax ; CHECK: {{## InlineAsm End|#NO_APP}} } + +%struct.t18_type = type { i32, i32 } + +define i32 @t18() nounwind { +entry: + %foo = alloca %struct.t18_type, align 4 + %a = getelementptr inbounds %struct.t18_type* %foo, i32 0, i32 0 + store i32 1, i32* %a, align 4 + %b = getelementptr inbounds %struct.t18_type* %foo, i32 0, i32 1 + store i32 2, i32* %b, align 4 + call void asm sideeffect inteldialect "lea ebx, foo\0A\09mov eax, [ebx].0\0A\09mov [ebx].4, ecx", "~{eax},~{dirflag},~{fpsr},~{flags}"() nounwind + %b1 = getelementptr inbounds %struct.t18_type* %foo, i32 0, i32 1 + %0 = load i32* %b1, align 4 + ret i32 %0 +; CHECK: t18 +; CHECK: {{## InlineAsm Start|#APP}} +; CHECK: .intel_syntax +; CHECK: lea ebx, foo +; CHECK: mov eax, [ebx].0 +; CHECK: mov [ebx].4, ecx +; CHECK: .att_syntax +; CHECK: {{## InlineAsm End|#NO_APP}} +} diff --git a/test/CodeGen/X86/pointer-vector.ll b/test/CodeGen/X86/pointer-vector.ll index 800fbedb4f..58423d1959 100644 --- a/test/CodeGen/X86/pointer-vector.ll +++ b/test/CodeGen/X86/pointer-vector.ll @@ -81,8 +81,7 @@ define <4 x i32*> @INT2PTR1(<4 x i8>* %p) nounwind { entry: %G = load <4 x i8>* %p ;CHECK: movl -;CHECK: movd -;CHECK: pshufb +;CHECK: pmovzxbd ;CHECK: pand %K = inttoptr <4 x i8> %G to <4 x i32*> ;CHECK: ret @@ -105,7 +104,7 @@ define <2 x i32*> @BITCAST1(<2 x i8*>* %p) nounwind { entry: %G = load <2 x i8*>* %p ;CHECK: movl -;CHECK: movsd +;CHECK: pmovzxdq %T = bitcast <2 x i8*> %G to <2 x i32*> ;CHECK: ret ret <2 x i32*> %T diff --git a/test/CodeGen/X86/pr14161.ll b/test/CodeGen/X86/pr14161.ll new file mode 100644 index 0000000000..ff4532eac3 --- /dev/null +++ b/test/CodeGen/X86/pr14161.ll @@ -0,0 +1,38 @@ +; RUN: llc < %s -mtriple=x86_64-linux-pc -mcpu=corei7 | FileCheck %s + +declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) + +define <2 x i16> @good(<4 x i32>*, <4 x i8>*) { +entry: + %2 = load <4 x i32>* %0, align 16 + %3 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %2, <4 x i32> <i32 127, i32 127, i32 127, i32 127>) + %4 = extractelement <4 x i32> %3, i32 0 + %5 = extractelement <4 x i32> %3, i32 1 + %6 = extractelement <4 x i32> %3, i32 2 + %7 = extractelement <4 x i32> %3, i32 3 + %8 = bitcast i32 %4 to <2 x i16> + %9 = bitcast i32 %5 to <2 x i16> + ret <2 x i16> %8 +; CHECK: good +; CHECK: pminud +; CHECK-NEXT: pmovzxwq +; CHECK: ret +} + +define <2 x i16> @bad(<4 x i32>*, <4 x i8>*) { +entry: + %2 = load <4 x i32>* %0, align 16 + %3 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %2, <4 x i32> <i32 127, i32 127, i32 127, i32 127>) + %4 = extractelement <4 x i32> %3, i32 0 + %5 = extractelement <4 x i32> %3, i32 1 + %6 = extractelement <4 x i32> %3, i32 2 + %7 = extractelement <4 x i32> %3, i32 3 + %8 = bitcast i32 %4 to <2 x i16> + %9 = bitcast i32 %5 to <2 x i16> + ret <2 x i16> %9 +; CHECK: bad +; CHECK: pminud +; CHECK: pextrd +; CHECK: pmovzxwq +; CHECK: ret +} diff --git a/test/CodeGen/X86/promote.ll b/test/CodeGen/X86/promote.ll index 8b30dc718b..283f48cd37 100644 --- a/test/CodeGen/X86/promote.ll +++ b/test/CodeGen/X86/promote.ll @@ -20,7 +20,7 @@ entry: ; CHECK: shuff_f define i32 @shuff_f(<4 x i8>* %A) { entry: -; CHECK: pshufb +; CHECK: pmovzxbd ; CHECK: paddd ; CHECK: pshufb %0 = load <4 x i8>* %A, align 8 diff --git a/test/CodeGen/X86/sse-intel-ocl.ll b/test/CodeGen/X86/sse-intel-ocl.ll new file mode 100644 index 0000000000..188505072f --- /dev/null +++ b/test/CodeGen/X86/sse-intel-ocl.ll @@ -0,0 +1,93 @@ +; RUN: llc < %s -mtriple=i386-pc-win32 -mcpu=nehalem | FileCheck -check-prefix=WIN32 %s +; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=nehalem | FileCheck -check-prefix=WIN64 %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=nehalem | FileCheck -check-prefix=NOT_WIN %s + +declare <16 x float> @func_float16_ptr(<16 x float>, <16 x float> *) +declare <16 x float> @func_float16(<16 x float>, <16 x float>) +; WIN64: testf16_inp +; WIN64: addps {{.*}}, {{%xmm[0-3]}} +; WIN64: addps {{.*}}, {{%xmm[0-3]}} +; WIN64: addps {{.*}}, {{%xmm[0-3]}} +; WIN64: addps {{.*}}, {{%xmm[0-3]}} +; WIN64: leaq {{.*}}(%rsp), %rcx +; WIN64: call +; WIN64: ret + +; WIN32: testf16_inp +; WIN32: movl %eax, (%esp) +; WIN32: addps {{.*}}, {{%xmm[0-3]}} +; WIN32: addps {{.*}}, {{%xmm[0-3]}} +; WIN32: addps {{.*}}, {{%xmm[0-3]}} +; WIN32: addps {{.*}}, {{%xmm[0-3]}} +; WIN32: call +; WIN32: ret + +; NOT_WIN: testf16_inp +; NOT_WIN: addps {{.*}}, {{%xmm[0-3]}} +; NOT_WIN: addps {{.*}}, {{%xmm[0-3]}} +; NOT_WIN: addps {{.*}}, {{%xmm[0-3]}} +; NOT_WIN: addps {{.*}}, {{%xmm[0-3]}} +; NOT_WIN: leaq {{.*}}(%rsp), %rdi +; NOT_WIN: call +; NOT_WIN: ret + +;test calling conventions - input parameters +define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind { + %y = alloca <16 x float>, align 16 + %x = fadd <16 x float> %a, %b + %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y) + %2 = load <16 x float>* %y, align 16 + %3 = fadd <16 x float> %2, %1 + ret <16 x float> %3 +} + +;test calling conventions - preserved registers + +; preserved xmm6-xmm15 +; WIN64: testf16_regs +; WIN64: call +; WIN64: addps {{%xmm[6-9]}}, {{.*}} +; WIN64: addps {{%xmm[6-9]}}, {{.*}} +; WIN64: ret + +; preserved xmm8-xmm15 +; NOT_WIN: testf16_regs +; NOT_WIN: call +; NOT_WIN: addps {{%xmm([8-9]|1[0-1])}}, {{.*}} +; NOT_WIN: addps {{%xmm([8-9]|1[0-1])}}, {{.*}} +; NOT_WIN: addps {{%xmm([8-9]|1[0-1])}}, {{.*}} +; NOT_WIN: addps {{%xmm([8-9]|1[0-1])}}, {{.*}} +; NOT_WIN: ret + +define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind { + %y = alloca <16 x float>, align 16 + %x = fadd <16 x float> %a, %b + %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y) + %2 = load <16 x float>* %y, align 16 + %3 = fadd <16 x float> %1, %b + %4 = fadd <16 x float> %2, %3 + ret <16 x float> %4 +} + +; test calling conventions - prolog and epilog +; NOT_WIN: movaps {{%xmm([8-9]|1[0-5])}}, {{.*(%rsp).*}} ## 16-byte Spill +; NOT_WIN: movaps {{%xmm([8-9]|1[0-5])}}, {{.*(%rsp).*}} ## 16-byte Spill +; NOT_WIN: movaps {{%xmm([8-9]|1[0-5])}}, {{.*(%rsp).*}} ## 16-byte Spill +; NOT_WIN: movaps {{%xmm([8-9]|1[0-5])}}, {{.*(%rsp).*}} ## 16-byte Spill +; NOT_WIN: movaps {{%xmm([8-9]|1[0-5])}}, {{.*(%rsp).*}} ## 16-byte Spill +; NOT_WIN: movaps {{%xmm([8-9]|1[0-5])}}, {{.*(%rsp).*}} ## 16-byte Spill +; NOT_WIN: movaps {{%xmm([8-9]|1[0-5])}}, {{.*(%rsp).*}} ## 16-byte Spill +; NOT_WIN: movaps {{%xmm([8-9]|1[0-5])}}, {{.*(%rsp).*}} ## 16-byte Spill +; NOT_WIN: call +; NOT_WIN: movaps {{.*(%rsp).*}}, {{%xmm([8-9]|1[0-5])}} ## 16-byte Reload +; NOT_WIN: movaps {{.*(%rsp).*}}, {{%xmm([8-9]|1[0-5])}} ## 16-byte Reload +; NOT_WIN: movaps {{.*(%rsp).*}}, {{%xmm([8-9]|1[0-5])}} ## 16-byte Reload +; NOT_WIN: movaps {{.*(%rsp).*}}, {{%xmm([8-9]|1[0-5])}} ## 16-byte Reload +; NOT_WIN: movaps {{.*(%rsp).*}}, {{%xmm([8-9]|1[0-5])}} ## 16-byte Reload +; NOT_WIN: movaps {{.*(%rsp).*}}, {{%xmm([8-9]|1[0-5])}} ## 16-byte Reload +; NOT_WIN: movaps {{.*(%rsp).*}}, {{%xmm([8-9]|1[0-5])}} ## 16-byte Reload +; NOT_WIN: movaps {{.*(%rsp).*}}, {{%xmm([8-9]|1[0-5])}} ## 16-byte Reload +define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x float> %b) nounwind { + %c = call <16 x float> @func_float16(<16 x float> %a, <16 x float> %b) + ret <16 x float> %c +} diff --git a/test/CodeGen/X86/trunc-ext-ld-st.ll b/test/CodeGen/X86/trunc-ext-ld-st.ll index 9877d7be16..1d22a185de 100644 --- a/test/CodeGen/X86/trunc-ext-ld-st.ll +++ b/test/CodeGen/X86/trunc-ext-ld-st.ll @@ -2,8 +2,7 @@ ;CHECK: load_2_i8 ; A single 16-bit load -;CHECK: movzwl -;CHECK: pshufb +;CHECK: pmovzxbq ;CHECK: paddq ;CHECK: pshufb ; A single 16-bit store @@ -19,8 +18,7 @@ define void @load_2_i8(<2 x i8>* %A) { ;CHECK: load_2_i16 ; Read 32-bits -;CHECK: movd -;CHECK: pshufb +;CHECK: pmovzxwq ;CHECK: paddq ;CHECK: pshufb ;CHECK: movd @@ -33,7 +31,7 @@ define void @load_2_i16(<2 x i16>* %A) { } ;CHECK: load_2_i32 -;CHECK: pshufd +;CHECK: pmovzxdq ;CHECK: paddq ;CHECK: pshufd ;CHECK: ret @@ -45,8 +43,7 @@ define void @load_2_i32(<2 x i32>* %A) { } ;CHECK: load_4_i8 -;CHECK: movd -;CHECK: pshufb +;CHECK: pmovzxbd ;CHECK: paddd ;CHECK: pshufb ;CHECK: ret @@ -58,7 +55,7 @@ define void @load_4_i8(<4 x i8>* %A) { } ;CHECK: load_4_i16 -;CHECK: punpcklwd +;CHECK: pmovzxwd ;CHECK: paddd ;CHECK: pshufb ;CHECK: ret @@ -70,7 +67,7 @@ define void @load_4_i16(<4 x i16>* %A) { } ;CHECK: load_8_i8 -;CHECK: punpcklbw +;CHECK: pmovzxbw ;CHECK: paddw ;CHECK: pshufb ;CHECK: ret diff --git a/test/CodeGen/X86/vec_compare-2.ll b/test/CodeGen/X86/vec_compare-2.ll index 46d6a23554..4da79538db 100644 --- a/test/CodeGen/X86/vec_compare-2.ll +++ b/test/CodeGen/X86/vec_compare-2.ll @@ -10,8 +10,7 @@ define void @blackDespeckle_wrapper(i8** %args_list, i64* %gtid, i64 %xend) { entry: ; CHECK: cfi_def_cfa_offset ; CHECK-NOT: set -; CHECK: punpcklwd -; CHECK: pshufd +; CHECK: pmovzxwq ; CHECK: pshufb %shr.i = ashr <4 x i32> zeroinitializer, <i32 3, i32 3, i32 3, i32 3> ; <<4 x i32>> [#uses=1] %cmp318.i = sext <4 x i1> zeroinitializer to <4 x i32> ; <<4 x i32>> [#uses=1] diff --git a/test/CodeGen/X86/widen_load-2.ll b/test/CodeGen/X86/widen_load-2.ll index 79aa000502..224898c1a3 100644 --- a/test/CodeGen/X86/widen_load-2.ll +++ b/test/CodeGen/X86/widen_load-2.ll @@ -170,7 +170,7 @@ define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp ; CHECK: rot %i8vec3pack = type { <3 x i8>, i8 } define %i8vec3pack @rot() nounwind { -; CHECK: movd {{-?[0-9]+}}(%rsp), {{%xmm[0-9]}} +; CHECK: pmovzxbd {{-?[0-9]+}}(%rsp), {{%xmm[0-9]}} entry: %X = alloca %i8vec3pack, align 4 %rot = alloca %i8vec3pack, align 4 diff --git a/test/MC/PowerPC/lit.local.cfg b/test/MC/PowerPC/lit.local.cfg new file mode 100644 index 0000000000..88488cdd04 --- /dev/null +++ b/test/MC/PowerPC/lit.local.cfg @@ -0,0 +1,5 @@ +config.suffixes = ['.ll', '.c', '.cpp', '.s'] + +targets = set(config.root.targets_to_build.split()) +if not 'PowerPC' in targets: + config.unsupported = True diff --git a/test/MC/PowerPC/ppc64-relocs-01.ll b/test/MC/PowerPC/ppc64-relocs-01.ll new file mode 100644 index 0000000000..5996af84f4 --- /dev/null +++ b/test/MC/PowerPC/ppc64-relocs-01.ll @@ -0,0 +1,66 @@ +;; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -O3 \ +;; RUN: -filetype=obj %s -o - | \ +;; RUN: elf-dump --dump-section-data | FileCheck %s + +;; FIXME: this file need to be in .s form, change when asm parse is done. + +@number64 = global i64 10, align 8 + +define i64 @access_int64(i64 %a) nounwind readonly { +entry: + %0 = load i64* @number64, align 8 + %cmp = icmp eq i64 %0, %a + %conv1 = zext i1 %cmp to i64 + ret i64 %conv1 +} + +declare double @sin(double) nounwind + +define double @test_branch24 (double %x) nounwind readonly { +entry: + %add = call double @sin(double %x) nounwind + ret double %add +} + +;; The relocations in .rela.text are the 'number64' load using a +;; R_PPC64_TOC16_DS against the .toc and the 'sin' external function +;; address using a R_PPC64_REL24 +;; CHECK: '.rela.text' +;; CHECK: Relocation 0 +;; CHECK-NEXT: 'r_offset', +;; CHECK-NEXT: 'r_sym', 0x00000006 +;; CHECK-NEXT: 'r_type', 0x0000003f +;; CHECK: Relocation 1 +;; CHECK-NEXT: 'r_offset', +;; CHECK-NEXT: 'r_sym', 0x0000000a +;; CHECK-NEXT: 'r_type', 0x0000000a + +;; The .opd entry for the 'access_int64' function creates 2 relocations: +;; 1. A R_PPC64_ADDR64 against the .text segment plus addend (the function +; address itself); +;; 2. And a R_PPC64_TOC against no symbol (the linker will replace for the +;; module's TOC base). +;; CHECK: '.rela.opd' +;; CHECK: Relocation 0 +;; CHECK-NEXT: 'r_offset', +;; CHECK-NEXT: 'r_sym', 0x00000002 +;; CHECK-NEXT: 'r_type', 0x00000026 +;; CHECK: Relocation 1 +;; CHECK-NEXT: 'r_offset', +;; CHECK-NEXT: 'r_sym', 0x00000000 +;; CHECK-NEXT: 'r_type', 0x00000033 + +;; Finally the TOC creates the relocation for the 'number64'. +;; CHECK: '.rela.toc' +;; CHECK: Relocation 0 +;; CHECK-NEXT: 'r_offset', +;; CHECK-NEXT: 'r_sym', 0x00000008 +;; CHECK-NEXT: 'r_type', 0x00000026 + +;; Check if the relocation references are for correct symbols. +;; CHECK: Symbol 7 +;; CHECK-NEXT: 'access_int64' +;; CHECK: Symbol 8 +;; CHECK-NEXT: 'number64' +;; CHECK: Symbol 10 +;; CHECK-NEXT: 'sin' diff --git a/test/MC/X86/x86-32-ms-inline-asm.s b/test/MC/X86/x86-32-ms-inline-asm.s new file mode 100644 index 0000000000..a5e80b2c93 --- /dev/null +++ b/test/MC/X86/x86-32-ms-inline-asm.s @@ -0,0 +1,10 @@ +// RUN: llvm-mc -x86-asm-syntax=intel -triple i386-unknown-unknown --show-encoding %s | FileCheck %s + +mov eax, [ebx].0 +mov [ebx].4, ecx + +// CHECK: movl (%ebx), %eax +// CHECK: encoding: [0x8b,0x03] +// CHECK: movl %ecx, 4(%ebx) +// CHECK: encoding: [0x89,0x4b,0x04] + diff --git a/test/Other/multi-pointer-size.ll b/test/Other/multi-pointer-size.ll new file mode 100644 index 0000000000..95fa54b8f2 --- /dev/null +++ b/test/Other/multi-pointer-size.ll @@ -0,0 +1,43 @@ +; RUN: opt -instcombine %s | llvm-dis | FileCheck %s +target datalayout = "e-p:32:32:32-p1:64:64:64-p2:8:8:8-p3:16:16:16--p4:96:96:96-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32" + +define i32 @test_as0(i32 addrspace(0)* %A) { +entry: +; CHECK: %arrayidx = getelementptr i32* %A, i32 1 + %arrayidx = getelementptr i32 addrspace(0)* %A, i64 1 + %y = load i32 addrspace(0)* %arrayidx, align 4 + ret i32 %y +} + +define i32 @test_as1(i32 addrspace(1)* %A) { +entry: +; CHECK: %arrayidx = getelementptr i32 addrspace(1)* %A, i64 1 + %arrayidx = getelementptr i32 addrspace(1)* %A, i32 1 + %y = load i32 addrspace(1)* %arrayidx, align 4 + ret i32 %y +} + +define i32 @test_as2(i32 addrspace(2)* %A) { +entry: +; CHECK: %arrayidx = getelementptr i32 addrspace(2)* %A, i8 1 + %arrayidx = getelementptr i32 addrspace(2)* %A, i32 1 + %y = load i32 addrspace(2)* %arrayidx, align 4 + ret i32 %y +} + +define i32 @test_as3(i32 addrspace(3)* %A) { +entry: +; CHECK: %arrayidx = getelementptr i32 addrspace(3)* %A, i16 1 + %arrayidx = getelementptr i32 addrspace(3)* %A, i32 1 + %y = load i32 addrspace(3)* %arrayidx, align 4 + ret i32 %y +} + +define i32 @test_as4(i32 addrspace(4)* %A) { +entry: +; CHECK: %arrayidx = getelementptr i32 addrspace(4)* %A, i96 1 + %arrayidx = getelementptr i32 addrspace(4)* %A, i32 1 + %y = load i32 addrspace(4)* %arrayidx, align 4 + ret i32 %y +} + diff --git a/test/Transforms/GVN/crash.ll b/test/Transforms/GVN/crash.ll index 31eae256c6..4a8c8e4589 100644 --- a/test/Transforms/GVN/crash.ll +++ b/test/Transforms/GVN/crash.ll @@ -163,3 +163,39 @@ entry: ret i8 %1 } + +; Test that a GEP in an unreachable block with the following form doesn't crash +; GVN: +; +; %x = gep %some.type %x, ... + +%struct.type = type { i64, i32, i32 } + +define fastcc void @func() nounwind uwtable ssp align 2 { +entry: + br label %reachable.bb + +;; Unreachable code. + +unreachable.bb: + %gep.val = getelementptr inbounds %struct.type* %gep.val, i64 1 + br i1 undef, label %u2.bb, label %u1.bb + +u1.bb: + %tmp1 = getelementptr inbounds %struct.type* %gep.val, i64 0, i32 0 + store i64 -1, i64* %tmp1, align 8 + br label %unreachable.bb + +u2.bb: + %0 = load i32* undef, align 4 + %conv.i.i.i.i.i = zext i32 %0 to i64 + br label %u2.bb + +;; Reachable code. + +reachable.bb: + br label %r1.bb + +r1.bb: + br label %u2.bb +} diff --git a/test/Transforms/GVN/pr14166.ll b/test/Transforms/GVN/pr14166.ll new file mode 100644 index 0000000000..9f47e46426 --- /dev/null +++ b/test/Transforms/GVN/pr14166.ll @@ -0,0 +1,27 @@ +; RUN: opt -gvn -S < %s | FileCheck %s +target datalayout = "e-p:32:32:32" +target triple = "i386-pc-linux-gnu" +define <2 x i32> @test1() { + %v1 = alloca <2 x i32> + call void @anything(<2 x i32>* %v1) + %v2 = load <2 x i32>* %v1 + %v3 = inttoptr <2 x i32> %v2 to <2 x i8*> + %v4 = bitcast <2 x i32>* %v1 to <2 x i8*>* + store <2 x i8*> %v3, <2 x i8*>* %v4 + %v5 = load <2 x i32>* %v1 + ret <2 x i32> %v5 +; CHECK: @test1 +; CHECK: %v1 = alloca <2 x i32> +; CHECK: call void @anything(<2 x i32>* %v1) +; CHECK: %v2 = load <2 x i32>* %v1 +; CHECK: %v3 = inttoptr <2 x i32> %v2 to <2 x i8*> +; CHECK: %v4 = bitcast <2 x i32>* %v1 to <2 x i8*>* +; CHECK: store <2 x i8*> %v3, <2 x i8*>* %v4 +; CHECK: %1 = ptrtoint <2 x i8*> %v3 to <2 x i32> +; CHECK: %2 = bitcast <2 x i32> %1 to i64 +; CHECK: %3 = bitcast i64 %2 to <2 x i32> +; CHECK: ret <2 x i32> %3 +} + +declare void @anything(<2 x i32>*) + diff --git a/test/Transforms/InstCombine/cast.ll b/test/Transforms/InstCombine/cast.ll index 899ffddd5b..b4eb69d436 100644 --- a/test/Transforms/InstCombine/cast.ll +++ b/test/Transforms/InstCombine/cast.ll @@ -891,3 +891,12 @@ define double @test80([100 x double]* %p, i32 %i) { ret double %l ; CHECK-NEXT: ret double } + +define double @test81(double *%p, float %f) { + %i = fptosi float %f to i64 + %q = bitcast double* %p to i8* + %pp = getelementptr i8* %q, i64 %i + %r = bitcast i8* %pp to double* + %l = load double* %r + ret double %l +} diff --git a/test/Transforms/InstCombine/constant-fold-gep-as-0.ll b/test/Transforms/InstCombine/constant-fold-gep-as-0.ll new file mode 100644 index 0000000000..74fe316137 --- /dev/null +++ b/test/Transforms/InstCombine/constant-fold-gep-as-0.ll @@ -0,0 +1,235 @@ +; "PLAIN" - No optimizations. This tests the target-independent +; constant folder. +; RUN: opt -S -o - < %s | FileCheck --check-prefix=PLAIN %s + +target datalayout = "e-p:128:128:128-p1:32:32:32-p2:8:8:8-p3:16:16:16-p4:64:64:64-p5:96:96:96-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32" + +; PLAIN: ModuleID = '<stdin>' + +; The automatic constant folder in opt does not have targetdata access, so +; it can't fold gep arithmetic, in general. However, the constant folder run +; from instcombine and global opt can use targetdata. +; PLAIN: @G8 = global i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 1 to i8 addrspace(1)*), i32 -1) +@G8 = global i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 1 to i8 addrspace(1)*), i32 -1) +; PLAIN: @G1 = global i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i8 1 to i1 addrspace(2)*), i8 -1) +@G1 = global i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i8 1 to i1 addrspace(2)*), i8 -1) +; PLAIN: @F8 = global i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 1 to i8 addrspace(1)*), i32 -2) +@F8 = global i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 1 to i8 addrspace(1)*), i32 -2) +; PLAIN: @F1 = global i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i8 1 to i1 addrspace(2)*), i8 -2) +@F1 = global i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i8 1 to i1 addrspace(2)*), i8 -2) +; PLAIN: @H8 = global i8 addrspace(1)* getelementptr (i8 addrspace(1)* null, i32 -1) +@H8 = global i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 0 to i8 addrspace(1)*), i32 -1) +; PLAIN: @H1 = global i1 addrspace(2)* getelementptr (i1 addrspace(2)* null, i8 -1) +@H1 = global i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i8 0 to i1 addrspace(2)*), i8 -1) + + +; The target-independent folder should be able to do some clever +; simplifications on sizeof, alignof, and offsetof expressions. The +; target-dependent folder should fold these down to constants. +; PLAIN-X: @a = constant i64 mul (i64 ptrtoint (double addrspace(4)* getelementptr (double addrspace(4)* null, i32 1) to i64), i64 2310) +@a = constant i64 mul (i64 3, i64 mul (i64 ptrtoint ({[7 x double], [7 x double]} addrspace(4)* getelementptr ({[7 x double], [7 x double]} addrspace(4)* null, i64 11) to i64), i64 5)) + +; PLAIN-X: @b = constant i64 ptrtoint (double addrspace(4)* getelementptr ({ i1, double }* null, i64 0, i32 1) to i64) +@b = constant i64 ptrtoint ([13 x double] addrspace(4)* getelementptr ({i1, [13 x double]} addrspace(4)* null, i64 0, i32 1) to i64) + +; PLAIN-X: @c = constant i64 mul nuw (i64 ptrtoint (double addrspace(4)* getelementptr (double addrspace(4)* null, i32 1) to i64), i64 2) +@c = constant i64 ptrtoint (double addrspace(4)* getelementptr ({double, double, double, double} addrspace(4)* null, i64 0, i32 2) to i64) + +; PLAIN-X: @d = constant i64 mul nuw (i64 ptrtoint (double addrspace(4)* getelementptr (double addrspace(4)* null, i32 1) to i64), i64 11) +@d = constant i64 ptrtoint (double addrspace(4)* getelementptr ([13 x double] addrspace(4)* null, i64 0, i32 11) to i64) + +; PLAIN-X: @e = constant i64 ptrtoint (double addrspace(4)* getelementptr ({ double, float, double, double }* null, i64 0, i32 2) to i64) +@e = constant i64 ptrtoint (double addrspace(4)* getelementptr ({double, float, double, double} addrspace(4)* null, i64 0, i32 2) to i64) + +; PLAIN-X: @f = constant i64 1 +@f = constant i64 ptrtoint (<{ i16, i128 }> addrspace(4)* getelementptr ({i1, <{ i16, i128 }>} addrspace(4)* null, i64 0, i32 1) to i64) + +; PLAIN-X: @g = constant i64 ptrtoint (double addrspace(4)* getelementptr ({ i1, double }* null, i64 0, i32 1) to i64) +@g = constant i64 ptrtoint ({double, double} addrspace(4)* getelementptr ({i1, {double, double}} addrspace(4)* null, i64 0, i32 1) to i64) + +; PLAIN-X: @h = constant i64 ptrtoint (i1 addrspace(2)* getelementptr (i1 addrspace(2)* null, i32 1) to i64) +@h = constant i64 ptrtoint (double addrspace(4)* getelementptr (double addrspace(4)* null, i64 1) to i64) + +; PLAIN-X: @i = constant i64 ptrtoint (i1 addrspace(2)* getelementptr ({ i1, i1 addrspace(2)* }* null, i64 0, i32 1) to i64) +@i = constant i64 ptrtoint (double addrspace(4)* getelementptr ({i1, double} addrspace(4)* null, i64 0, i32 1) to i64) + +; The target-dependent folder should cast GEP indices to integer-sized pointers. + +; PLAIN: @M = constant i64 addrspace(5)* getelementptr (i64 addrspace(5)* null, i32 1) +; PLAIN: @N = constant i64 addrspace(5)* getelementptr ({ i64, i64 } addrspace(5)* null, i32 0, i32 1) +; PLAIN: @O = constant i64 addrspace(5)* getelementptr ([2 x i64] addrspace(5)* null, i32 0, i32 1) + +@M = constant i64 addrspace(5)* getelementptr (i64 addrspace(5)* null, i32 1) +@N = constant i64 addrspace(5)* getelementptr ({ i64, i64 } addrspace(5)* null, i32 0, i32 1) +@O = constant i64 addrspace(5)* getelementptr ([2 x i64] addrspace(5)* null, i32 0, i32 1) + +; Fold GEP of a GEP. Very simple cases are folded. + +; PLAIN-X: @Y = global [3 x { i32, i32 }]addrspace(3)* getelementptr inbounds ([3 x { i32, i32 }]addrspace(3)* @ext, i64 2) +@ext = external addrspace(3) global [3 x { i32, i32 }] +@Y = global [3 x { i32, i32 }]addrspace(3)* getelementptr inbounds ([3 x { i32, i32 }]addrspace(3)* getelementptr inbounds ([3 x { i32, i32 }]addrspace(3)* @ext, i64 1), i64 1) + +; PLAIN-X: @Z = global i32addrspace(3)* getelementptr inbounds (i32addrspace(3)* getelementptr inbounds ([3 x { i32, i32 }]addrspace(3)* @ext, i64 0, i64 1, i32 0), i64 1) +@Z = global i32addrspace(3)* getelementptr inbounds (i32addrspace(3)* getelementptr inbounds ([3 x { i32, i32 }]addrspace(3)* @ext, i64 0, i64 1, i32 0), i64 1) + + +; Duplicate all of the above as function return values rather than +; global initializers. + +; PLAIN: define i8 addrspace(1)* @goo8() nounwind { +; PLAIN: %t = bitcast i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 1 to i8 addrspace(1)*), i32 -1) to i8 addrspace(1)* +; PLAIN: ret i8 addrspace(1)* %t +; PLAIN: } +; PLAIN: define i1 addrspace(2)* @goo1() nounwind { +; PLAIN: %t = bitcast i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i32 1 to i1 addrspace(2)*), i32 -1) to i1 addrspace(2)* +; PLAIN: ret i1 addrspace(2)* %t +; PLAIN: } +; PLAIN: define i8 addrspace(1)* @foo8() nounwind { +; PLAIN: %t = bitcast i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 1 to i8 addrspace(1)*), i32 -2) to i8 addrspace(1)* +; PLAIN: ret i8 addrspace(1)* %t +; PLAIN: } +; PLAIN: define i1 addrspace(2)* @foo1() nounwind { +; PLAIN: %t = bitcast i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i32 1 to i1 addrspace(2)*), i32 -2) to i1 addrspace(2)* +; PLAIN: ret i1 addrspace(2)* %t +; PLAIN: } +; PLAIN: define i8 addrspace(1)* @hoo8() nounwind { +; PLAIN: %t = bitcast i8 addrspace(1)* getelementptr (i8 addrspace(1)* null, i32 -1) to i8 addrspace(1)* +; PLAIN: ret i8 addrspace(1)* %t +; PLAIN: } +; PLAIN: define i1 addrspace(2)* @hoo1() nounwind { +; PLAIN: %t = bitcast i1 addrspace(2)* getelementptr (i1 addrspace(2)* null, i32 -1) to i1 addrspace(2)* +; PLAIN: ret i1 addrspace(2)* %t +; PLAIN: } +define i8 addrspace(1)* @goo8() nounwind { + %t = bitcast i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 1 to i8 addrspace(1)*), i32 -1) to i8 addrspace(1)* + ret i8 addrspace(1)* %t +} +define i1 addrspace(2)* @goo1() nounwind { + %t = bitcast i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i32 1 to i1 addrspace(2)*), i32 -1) to i1 addrspace(2)* + ret i1 addrspace(2)* %t +} +define i8 addrspace(1)* @foo8() nounwind { + %t = bitcast i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 1 to i8 addrspace(1)*), i32 -2) to i8 addrspace(1)* + ret i8 addrspace(1)* %t +} +define i1 addrspace(2)* @foo1() nounwind { + %t = bitcast i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i32 1 to i1 addrspace(2)*), i32 -2) to i1 addrspace(2)* + ret i1 addrspace(2)* %t +} +define i8 addrspace(1)* @hoo8() nounwind { + %t = bitcast i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 0 to i8 addrspace(1)*), i32 -1) to i8 addrspace(1)* + ret i8 addrspace(1)* %t +} +define i1 addrspace(2)* @hoo1() nounwind { + %t = bitcast i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i32 0 to i1 addrspace(2)*), i32 -1) to i1 addrspace(2)* + ret i1 addrspace(2)* %t +} + +; PLAIN-X: define i64 @fa() nounwind { +; PLAIN-X: %t = bitcast i64 mul (i64 ptrtoint (double addrspace(4)* getelementptr (double addrspace(4)* null, i32 1) to i64), i64 2310) to i64 +; PLAIN-X: ret i64 %t +; PLAIN-X: } +; PLAIN-X: define i64 @fb() nounwind { +; PLAIN-X: %t = bitcast i64 ptrtoint (double addrspace(4)* getelementptr ({ i1, double }* null, i64 0, i32 1) to i64) to i64 +; PLAIN-X: ret i64 %t +; PLAIN-X: } +; PLAIN-X: define i64 @fc() nounwind { +; PLAIN-X: %t = bitcast i64 mul nuw (i64 ptrtoint (double addrspace(4)* getelementptr (double addrspace(4)* null, i32 1) to i64), i64 2) to i64 +; PLAIN-X: ret i64 %t +; PLAIN-X: } +; PLAIN-X: define i64 @fd() nounwind { +; PLAIN-X: %t = bitcast i64 mul nuw (i64 ptrtoint (double addrspace(4)* getelementptr (double addrspace(4)* null, i32 1) to i64), i64 11) to i64 +; PLAIN-X: ret i64 %t +; PLAIN-X: } +; PLAIN-X: define i64 @fe() nounwind { +; PLAIN-X: %t = bitcast i64 ptrtoint (double addrspace(4)* getelementptr ({ double, float, double, double }* null, i64 0, i32 2) to i64) to i64 +; PLAIN-X: ret i64 %t +; PLAIN-X: } +; PLAIN-X: define i64 @ff() nounwind { +; PLAIN-X: %t = bitcast i64 1 to i64 +; PLAIN-X: ret i64 %t +; PLAIN-X: } +; PLAIN-X: define i64 @fg() nounwind { +; PLAIN-X: %t = bitcast i64 ptrtoint (double addrspace(4)* getelementptr ({ i1, double }* null, i64 0, i32 1) to i64) to i64 +; PLAIN-X: ret i64 %t +; PLAIN-X: } +; PLAIN-X: define i64 @fh() nounwind { +; PLAIN-X: %t = bitcast i64 ptrtoint (i1 addrspace(2)* getelementptr (i1 addrspace(2)* null, i32 1) to i64) to i64 +; PLAIN-X: ret i64 %t +; PLAIN-X: } +; PLAIN-X: define i64 @fi() nounwind { +; PLAIN-X: %t = bitcast i64 ptrtoint (i1 addrspace(2)* getelementptr ({ i1, i1 addrspace(2)* }* null, i64 0, i32 1) to i64) to i64 +; PLAIN-X: ret i64 %t +; PLAIN-X: } +define i64 @fa() nounwind { + %t = bitcast i64 mul (i64 3, i64 mul (i64 ptrtoint ({[7 x double], [7 x double]}* getelementptr ({[7 x double], [7 x double]}* null, i64 11) to i64), i64 5)) to i64 + ret i64 %t +} +define i64 @fb() nounwind { + %t = bitcast i64 ptrtoint ([13 x double] addrspace(4)* getelementptr ({i1, [13 x double]} addrspace(4)* null, i64 0, i32 1) to i64) to i64 + ret i64 %t +} +define i64 @fc() nounwind { + %t = bitcast i64 ptrtoint (double addrspace(4)* getelementptr ({double, double, double, double} addrspace(4)* null, i64 0, i32 2) to i64) to i64 + ret i64 %t +} +define i64 @fd() nounwind { + %t = bitcast i64 ptrtoint (double addrspace(4)* getelementptr ([13 x double] addrspace(4)* null, i64 0, i32 11) to i64) to i64 + ret i64 %t +} +define i64 @fe() nounwind { + %t = bitcast i64 ptrtoint (double addrspace(4)* getelementptr ({double, float, double, double} addrspace(4)* null, i64 0, i32 2) to i64) to i64 + ret i64 %t +} +define i64 @ff() nounwind { + %t = bitcast i64 ptrtoint (<{ i16, i128 }> addrspace(4)* getelementptr ({i1, <{ i16, i128 }>} addrspace(4)* null, i64 0, i32 1) to i64) to i64 + ret i64 %t +} +define i64 @fg() nounwind { + %t = bitcast i64 ptrtoint ({double, double} addrspace(4)* getelementptr ({i1, {double, double}} addrspace(4)* null, i64 0, i32 1) to i64) to i64 + ret i64 %t +} +define i64 @fh() nounwind { + %t = bitcast i64 ptrtoint (double addrspace(4)* getelementptr (double addrspace(4)* null, i32 1) to i64) to i64 + ret i64 %t +} +define i64 @fi() nounwind { + %t = bitcast i64 ptrtoint (double addrspace(4)* getelementptr ({i1, double}addrspace(4)* null, i64 0, i32 1) to i64) to i64 + ret i64 %t +} + +; PLAIN: define i64* @fM() nounwind { +; PLAIN: %t = bitcast i64* getelementptr (i64* null, i32 1) to i64* +; PLAIN: ret i64* %t +; PLAIN: } +; PLAIN: define i64* @fN() nounwind { +; PLAIN: %t = bitcast i64* getelementptr ({ i64, i64 }* null, i32 0, i32 1) to i64* +; PLAIN: ret i64* %t +; PLAIN: } +; PLAIN: define i64* @fO() nounwind { +; PLAIN: %t = bitcast i64* getelementptr ([2 x i64]* null, i32 0, i32 1) to i64* +; PLAIN: ret i64* %t +; PLAIN: } + +define i64* @fM() nounwind { + %t = bitcast i64* getelementptr (i64* null, i32 1) to i64* + ret i64* %t +} +define i64* @fN() nounwind { + %t = bitcast i64* getelementptr ({ i64, i64 }* null, i32 0, i32 1) to i64* + ret i64* %t +} +define i64* @fO() nounwind { + %t = bitcast i64* getelementptr ([2 x i64]* null, i32 0, i32 1) to i64* + ret i64* %t +} + +; PLAIN: define i32 addrspace(1)* @fZ() nounwind { +; PLAIN: %t = bitcast i32 addrspace(1)* getelementptr inbounds (i32 addrspace(1)* getelementptr inbounds ([3 x { i32, i32 }] addrspace(1)* @ext2, i64 0, i64 1, i32 0), i64 1) to i32 addrspace(1)* +; PLAIN: ret i32 addrspace(1)* %t +; PLAIN: } +@ext2 = external addrspace(1) global [3 x { i32, i32 }] +define i32 addrspace(1)* @fZ() nounwind { + %t = bitcast i32 addrspace(1)* getelementptr inbounds (i32 addrspace(1)* getelementptr inbounds ([3 x { i32, i32 }] addrspace(1)* @ext2, i64 0, i64 1, i32 0), i64 1) to i32 addrspace(1)* + ret i32 addrspace(1)* %t +} diff --git a/test/Transforms/LoopUnroll/pr14167.ll b/test/Transforms/LoopUnroll/pr14167.ll new file mode 100644 index 0000000000..205ae44b72 --- /dev/null +++ b/test/Transforms/LoopUnroll/pr14167.ll @@ -0,0 +1,44 @@ +; RUN: opt < %s -S -loop-unroll -unroll-runtime | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" +target triple = "powerpc64-bgq-linux" + +define void @test1() nounwind { +; Ensure that we don't crash when the trip count == -1. +; CHECK: @test1 +entry: + br label %for.cond2.preheader + +for.cond2.preheader: ; preds = %for.end, %entry + br i1 false, label %middle.block, label %vector.ph + +vector.ph: ; preds = %for.cond2.preheader + br label %vector.body + +vector.body: ; preds = %vector.body, %vector.ph + br i1 undef, label %middle.block.loopexit, label %vector.body + +middle.block.loopexit: ; preds = %vector.body + br label %middle.block + +middle.block: ; preds = %middle.block.loopexit, %for.cond2.preheader + br i1 true, label %for.end, label %scalar.preheader + +scalar.preheader: ; preds = %middle.block + br label %for.body4 + +for.body4: ; preds = %for.body4, %scalar.preheader + %indvars.iv = phi i64 [ 16000, %scalar.preheader ], [ %indvars.iv.next, %for.body4 ] + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp ne i32 %lftr.wideiv, 16000 + br i1 %exitcond, label %for.body4, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body4 + br label %for.end + +for.end: ; preds = %for.end.loopexit, %middle.block + br i1 undef, label %for.cond2.preheader, label %for.end15 + +for.end15: ; preds = %for.end + ret void +} diff --git a/test/Transforms/LoopVectorize/2012-10-20-infloop.ll b/test/Transforms/LoopVectorize/2012-10-20-infloop.ll index 5caaffc8dd..0176c9a189 100644 --- a/test/Transforms/LoopVectorize/2012-10-20-infloop.ll +++ b/test/Transforms/LoopVectorize/2012-10-20-infloop.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -dce +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce ; Check that we don't fall into an infinite loop. define void @test() nounwind { diff --git a/test/Transforms/LoopVectorize/cost-model.ll b/test/Transforms/LoopVectorize/cost-model.ll new file mode 100644 index 0000000000..18abf2885e --- /dev/null +++ b/test/Transforms/LoopVectorize/cost-model.ll @@ -0,0 +1,38 @@ +; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +@c = common global [2048 x i32] zeroinitializer, align 16 +@b = common global [2048 x i32] zeroinitializer, align 16 +@d = common global [2048 x i32] zeroinitializer, align 16 +@a = common global [2048 x i32] zeroinitializer, align 16 + +;CHECK: cost_model_1 +;CHECK-NOT: <4 x i32> +;CHECK: ret void +define void @cost_model_1() nounwind uwtable noinline ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %0 = shl nsw i64 %indvars.iv, 1 + %arrayidx = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %0 + %1 = load i32* %arrayidx, align 8 + %idxprom1 = sext i32 %1 to i64 + %arrayidx2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %idxprom1 + %2 = load i32* %arrayidx2, align 4 + %arrayidx4 = getelementptr inbounds [2048 x i32]* @d, i64 0, i64 %indvars.iv + %3 = load i32* %arrayidx4, align 4 + %idxprom5 = sext i32 %3 to i64 + %arrayidx6 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %idxprom5 + store i32 %2, i32* %arrayidx6, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 256 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} diff --git a/test/Transforms/LoopVectorize/cpp-new-array.ll b/test/Transforms/LoopVectorize/cpp-new-array.ll new file mode 100644 index 0000000000..26902eba9e --- /dev/null +++ b/test/Transforms/LoopVectorize/cpp-new-array.ll @@ -0,0 +1,46 @@ +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +;CHECK: @cpp_new_arrays +;CHECK: insertelement <4 x i32> +;CHECK: load <4 x float> +;CHECK: fadd <4 x float> +;CHECK: ret i32 +define i32 @cpp_new_arrays() uwtable ssp { +entry: + %call = call noalias i8* @_Znwm(i64 4) + %0 = bitcast i8* %call to float* + store float 1.000000e+03, float* %0, align 4 + %call1 = call noalias i8* @_Znwm(i64 4) + %1 = bitcast i8* %call1 to float* + store float 1.000000e+03, float* %1, align 4 + %call3 = call noalias i8* @_Znwm(i64 4) + %2 = bitcast i8* %call3 to float* + store float 1.000000e+03, float* %2, align 4 + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %idxprom = sext i32 %i.01 to i64 + %arrayidx = getelementptr inbounds float* %0, i64 %idxprom + %3 = load float* %arrayidx, align 4 + %idxprom5 = sext i32 %i.01 to i64 + %arrayidx6 = getelementptr inbounds float* %1, i64 %idxprom5 + %4 = load float* %arrayidx6, align 4 + %add = fadd float %3, %4 + %idxprom7 = sext i32 %i.01 to i64 + %arrayidx8 = getelementptr inbounds float* %2, i64 %idxprom7 + store float %add, float* %arrayidx8, align 4 + %inc = add nsw i32 %i.01, 1 + %cmp = icmp slt i32 %inc, 1000 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + %5 = load float* %2, align 4 + %conv10 = fptosi float %5 to i32 + ret i32 %conv10 +} + +declare noalias i8* @_Znwm(i64) diff --git a/test/Transforms/LoopVectorize/gcc-examples.ll b/test/Transforms/LoopVectorize/gcc-examples.ll index 6fb1792b2c..d8942ac861 100644 --- a/test/Transforms/LoopVectorize/gcc-examples.ll +++ b/test/Transforms/LoopVectorize/gcc-examples.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -dce -instcombine -licm -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/increment.ll b/test/Transforms/LoopVectorize/increment.ll index e818d68562..069b7ea031 100644 --- a/test/Transforms/LoopVectorize/increment.ll +++ b/test/Transforms/LoopVectorize/increment.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -dce -instcombine -licm -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/induction_plus.ll b/test/Transforms/LoopVectorize/induction_plus.ll index bd90113e52..b31bceb50d 100644 --- a/test/Transforms/LoopVectorize/induction_plus.ll +++ b/test/Transforms/LoopVectorize/induction_plus.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/non-const-n.ll b/test/Transforms/LoopVectorize/non-const-n.ll index 04c5c84a4f..7727b0a2dc 100644 --- a/test/Transforms/LoopVectorize/non-const-n.ll +++ b/test/Transforms/LoopVectorize/non-const-n.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -dce -instcombine -licm -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/read-only.ll b/test/Transforms/LoopVectorize/read-only.ll index 4095ea68ef..b4d1bac132 100644 --- a/test/Transforms/LoopVectorize/read-only.ll +++ b/test/Transforms/LoopVectorize/read-only.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -dce -instcombine -licm -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/reduction.ll b/test/Transforms/LoopVectorize/reduction.ll index 3e871b229b..746a08c3ea 100644 --- a/test/Transforms/LoopVectorize/reduction.ll +++ b/test/Transforms/LoopVectorize/reduction.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -dce -instcombine -licm -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" @@ -149,4 +149,83 @@ for.end: ; preds = %for.body, %entry ret i32 %sum.0.lcssa } +;CHECK: @reduction_and +;CHECK: and <4 x i32> +;CHECK: ret i32 +define i32 @reduction_and(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly { +entry: + %cmp7 = icmp sgt i32 %n, 0 + br i1 %cmp7, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %result.08 = phi i32 [ %and, %for.body ], [ -1, %entry ] + %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32* %B, i64 %indvars.iv + %1 = load i32* %arrayidx2, align 4 + %add = add nsw i32 %1, %0 + %and = and i32 %add, %result.08 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body +for.end: ; preds = %for.body, %entry + %result.0.lcssa = phi i32 [ -1, %entry ], [ %and, %for.body ] + ret i32 %result.0.lcssa +} + +;CHECK: @reduction_or +;CHECK: or <4 x i32> +;CHECK: ret i32 +define i32 @reduction_or(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly { +entry: + %cmp7 = icmp sgt i32 %n, 0 + br i1 %cmp7, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %result.08 = phi i32 [ %or, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32* %B, i64 %indvars.iv + %1 = load i32* %arrayidx2, align 4 + %add = add nsw i32 %1, %0 + %or = or i32 %add, %result.08 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %result.0.lcssa = phi i32 [ 0, %entry ], [ %or, %for.body ] + ret i32 %result.0.lcssa +} + +;CHECK: @reduction_xor +;CHECK: xor <4 x i32> +;CHECK: ret i32 +define i32 @reduction_xor(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly { +entry: + %cmp7 = icmp sgt i32 %n, 0 + br i1 %cmp7, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %result.08 = phi i32 [ %xor, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32* %B, i64 %indvars.iv + %1 = load i32* %arrayidx2, align 4 + %add = add nsw i32 %1, %0 + %xor = xor i32 %add, %result.08 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %result.0.lcssa = phi i32 [ 0, %entry ], [ %xor, %for.body ] + ret i32 %result.0.lcssa +} diff --git a/test/Transforms/LoopVectorize/scalar-select.ll b/test/Transforms/LoopVectorize/scalar-select.ll index 8d5b6fd8af..e537bde31b 100644 --- a/test/Transforms/LoopVectorize/scalar-select.ll +++ b/test/Transforms/LoopVectorize/scalar-select.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -dce -instcombine -licm -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/SROA/basictest.ll b/test/Transforms/SROA/basictest.ll index 03120f7a32..110950f76a 100644 --- a/test/Transforms/SROA/basictest.ll +++ b/test/Transforms/SROA/basictest.ll @@ -577,9 +577,17 @@ entry: %ai = load i24* %aiptr ; CHCEK-NOT: store ; CHCEK-NOT: load -; CHECK: %[[mask0:.*]] = and i24 undef, -256 -; CHECK-NEXT: %[[mask1:.*]] = and i24 %[[mask0]], -65281 -; CHECK-NEXT: %[[mask2:.*]] = and i24 %[[mask1]], 65535 +; CHECK: %[[ext2:.*]] = zext i8 0 to i24 +; CHECK-NEXT: %[[shift2:.*]] = shl i24 %[[ext2]], 16 +; CHECK-NEXT: %[[mask2:.*]] = and i24 undef, 65535 +; CHECK-NEXT: %[[insert2:.*]] = or i24 %[[mask2]], %[[shift2]] +; CHECK-NEXT: %[[ext1:.*]] = zext i8 0 to i24 +; CHECK-NEXT: %[[shift1:.*]] = shl i24 %[[ext1]], 8 +; CHECK-NEXT: %[[mask1:.*]] = and i24 %[[insert2]], -65281 +; CHECK-NEXT: %[[insert1:.*]] = or i24 %[[mask1]], %[[shift1]] +; CHECK-NEXT: %[[ext0:.*]] = zext i8 0 to i24 +; CHECK-NEXT: %[[mask0:.*]] = and i24 %[[insert1]], -256 +; CHECK-NEXT: %[[insert0:.*]] = or i24 %[[mask0]], %[[ext0]] %biptr = bitcast [3 x i8]* %b to i24* store i24 %ai, i24* %biptr @@ -591,10 +599,10 @@ entry: %b2 = load i8* %b2ptr ; CHCEK-NOT: store ; CHCEK-NOT: load -; CHECK: %[[trunc0:.*]] = trunc i24 %[[mask2]] to i8 -; CHECK-NEXT: %[[shift1:.*]] = lshr i24 %[[mask2]], 8 +; CHECK: %[[trunc0:.*]] = trunc i24 %[[insert0]] to i8 +; CHECK-NEXT: %[[shift1:.*]] = lshr i24 %[[insert0]], 8 ; CHECK-NEXT: %[[trunc1:.*]] = trunc i24 %[[shift1]] to i8 -; CHECK-NEXT: %[[shift2:.*]] = lshr i24 %[[mask2]], 16 +; CHECK-NEXT: %[[shift2:.*]] = lshr i24 %[[insert0]], 16 ; CHECK-NEXT: %[[trunc2:.*]] = trunc i24 %[[shift2]] to i8 %bsum0 = add i8 %b0, %b1 @@ -1064,6 +1072,49 @@ entry: ret void } +define i64 @PR14059.2({ float, float }* %phi) { +; Check that SROA can split up alloca-wide integer loads and stores where the +; underlying alloca has smaller components that are accessed independently. This +; shows up particularly with ABI lowering patterns coming out of Clang that rely +; on the particular register placement of a single large integer return value. +; CHECK: @PR14059.2 + +entry: + %retval = alloca { float, float }, align 4 + ; CHECK-NOT: alloca + + %0 = bitcast { float, float }* %retval to i64* + store i64 0, i64* %0 + ; CHECK-NOT: store + + %phi.realp = getelementptr inbounds { float, float }* %phi, i32 0, i32 0 + %phi.real = load float* %phi.realp + %phi.imagp = getelementptr inbounds { float, float }* %phi, i32 0, i32 1 + %phi.imag = load float* %phi.imagp + ; CHECK: %[[realp:.*]] = getelementptr inbounds { float, float }* %phi, i32 0, i32 0 + ; CHECK-NEXT: %[[real:.*]] = load float* %[[realp]] + ; CHECK-NEXT: %[[imagp:.*]] = getelementptr inbounds { float, float }* %phi, i32 0, i32 1 + ; CHECK-NEXT: %[[imag:.*]] = load float* %[[imagp]] + + %real = getelementptr inbounds { float, float }* %retval, i32 0, i32 0 + %imag = getelementptr inbounds { float, float }* %retval, i32 0, i32 1 + store float %phi.real, float* %real + store float %phi.imag, float* %imag + ; CHECK-NEXT: %[[imag_convert:.*]] = bitcast float %[[imag]] to i32 + ; CHECK-NEXT: %[[imag_ext:.*]] = zext i32 %[[imag_convert]] to i64 + ; CHECK-NEXT: %[[imag_shift:.*]] = shl i64 %[[imag_ext]], 32 + ; CHECK-NEXT: %[[imag_mask:.*]] = and i64 undef, 4294967295 + ; CHECK-NEXT: %[[imag_insert:.*]] = or i64 %[[imag_mask]], %[[imag_shift]] + ; CHECK-NEXT: %[[real_convert:.*]] = bitcast float %[[real]] to i32 + ; CHECK-NEXT: %[[real_ext:.*]] = zext i32 %[[real_convert]] to i64 + ; CHECK-NEXT: %[[real_mask:.*]] = and i64 %[[imag_insert]], -4294967296 + ; CHECK-NEXT: %[[real_insert:.*]] = or i64 %[[real_mask]], %[[real_ext]] + + %1 = load i64* %0, align 1 + ret i64 %1 + ; CHECK-NEXT: ret i64 %[[real_insert]] +} + define void @PR14105({ [16 x i8] }* %ptr) { ; Ensure that when rewriting the GEP index '-1' for this alloca we preserve is ; sign as negative. We use a volatile memcpy to ensure promotion never actually diff --git a/test/Transforms/SROA/big-endian.ll b/test/Transforms/SROA/big-endian.ll index 532f8690cf..ce82d1f30b 100644 --- a/test/Transforms/SROA/big-endian.ll +++ b/test/Transforms/SROA/big-endian.ll @@ -26,9 +26,17 @@ entry: %ai = load i24* %aiptr ; CHCEK-NOT: store ; CHCEK-NOT: load -; CHECK: %[[mask0:.*]] = and i24 undef, 65535 -; CHECK-NEXT: %[[mask1:.*]] = and i24 %[[mask0]], -65281 -; CHECK-NEXT: %[[mask2:.*]] = and i24 %[[mask1]], -256 +; CHECK: %[[ext2:.*]] = zext i8 0 to i24 +; CHECK-NEXT: %[[mask2:.*]] = and i24 undef, -256 +; CHECK-NEXT: %[[insert2:.*]] = or i24 %[[mask2]], %[[ext2]] +; CHECK-NEXT: %[[ext1:.*]] = zext i8 0 to i24 +; CHECK-NEXT: %[[shift1:.*]] = shl i24 %[[ext1]], 8 +; CHECK-NEXT: %[[mask1:.*]] = and i24 %[[insert2]], -65281 +; CHECK-NEXT: %[[insert1:.*]] = or i24 %[[mask1]], %[[shift1]] +; CHECK-NEXT: %[[ext0:.*]] = zext i8 0 to i24 +; CHECK-NEXT: %[[shift0:.*]] = shl i24 %[[ext0]], 16 +; CHECK-NEXT: %[[mask0:.*]] = and i24 %[[insert1]], 65535 +; CHECK-NEXT: %[[insert0:.*]] = or i24 %[[mask0]], %[[shift0]] %biptr = bitcast [3 x i8]* %b to i24* store i24 %ai, i24* %biptr @@ -40,11 +48,11 @@ entry: %b2 = load i8* %b2ptr ; CHCEK-NOT: store ; CHCEK-NOT: load -; CHECK: %[[shift0:.*]] = lshr i24 %[[mask2]], 16 +; CHECK: %[[shift0:.*]] = lshr i24 %[[insert0]], 16 ; CHECK-NEXT: %[[trunc0:.*]] = trunc i24 %[[shift0]] to i8 -; CHECK-NEXT: %[[shift1:.*]] = lshr i24 %[[mask2]], 8 +; CHECK-NEXT: %[[shift1:.*]] = lshr i24 %[[insert0]], 8 ; CHECK-NEXT: %[[trunc1:.*]] = trunc i24 %[[shift1]] to i8 -; CHECK-NEXT: %[[trunc2:.*]] = trunc i24 %[[mask2]] to i8 +; CHECK-NEXT: %[[trunc2:.*]] = trunc i24 %[[insert0]] to i8 %bsum0 = add i8 %b0, %b1 %bsum1 = add i8 %bsum0, %b2 @@ -74,27 +82,26 @@ entry: %a0i16ptr = bitcast i8* %a0ptr to i16* store i16 1, i16* %a0i16ptr -; CHECK: %[[mask:.*]] = and i56 undef, 1099511627775 -; CHECK-NEXT: %[[or:.*]] = or i56 %[[mask]], 1099511627776 +; CHECK: %[[mask0:.*]] = and i16 1, -16 %a1i4ptr = bitcast i8* %a1ptr to i4* store i4 1, i4* %a1i4ptr -; CHECK: %[[mask:.*]] = and i56 %[[or]], -16492674416641 -; CHECK-NEXT: %[[or:.*]] = or i56 %[[mask]], 1099511627776 +; CHECK-NEXT: %[[insert0:.*]] = or i16 %[[mask0]], 1 store i8 1, i8* %a2ptr -; CHECK-NEXT: %[[mask:.*]] = and i56 %[[or]], -1095216660481 -; CHECK-NEXT: %[[or:.*]] = or i56 %[[mask]], 4294967296 +; CHECK-NEXT: %[[mask1:.*]] = and i40 undef, 4294967295 +; CHECK-NEXT: %[[insert1:.*]] = or i40 %[[mask1]], 4294967296 %a3i24ptr = bitcast i8* %a3ptr to i24* store i24 1, i24* %a3i24ptr -; CHECK-NEXT: %[[mask:.*]] = and i56 %[[or]], -4294967041 -; CHECK-NEXT: %[[or:.*]] = or i56 %[[mask]], 256 +; CHECK-NEXT: %[[mask2:.*]] = and i40 %[[insert1]], -4294967041 +; CHECK-NEXT: %[[insert2:.*]] = or i40 %[[mask2]], 256 %a2i40ptr = bitcast i8* %a2ptr to i40* store i40 1, i40* %a2i40ptr -; CHECK-NEXT: %[[mask:.*]] = and i56 %[[or]], -1099511627776 -; CHECK-NEXT: %[[or:.*]] = or i56 %[[mask]], 1 +; CHECK-NEXT: %[[ext3:.*]] = zext i40 1 to i56 +; CHECK-NEXT: %[[mask3:.*]] = and i56 undef, -1099511627776 +; CHECK-NEXT: %[[insert3:.*]] = or i56 %[[mask3]], %[[ext3]] ; CHCEK-NOT: store ; CHCEK-NOT: load @@ -103,6 +110,10 @@ entry: %ai = load i56* %aiptr %ret = zext i56 %ai to i64 ret i64 %ret -; CHECK: %[[ret:.*]] = zext i56 %[[or]] to i64 +; CHECK-NEXT: %[[ext4:.*]] = zext i16 %[[insert0]] to i56 +; CHECK-NEXT: %[[shift4:.*]] = shl i56 %[[ext4]], 40 +; CHECK-NEXT: %[[mask4:.*]] = and i56 %[[insert3]], 1099511627775 +; CHECK-NEXT: %[[insert4:.*]] = or i56 %[[mask4]], %[[shift4]] +; CHECK-NEXT: %[[ret:.*]] = zext i56 %[[insert4]] to i64 ; CHECK-NEXT: ret i64 %[[ret]] } diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp index 751c8e7da0..0390bc470a 100644 --- a/tools/opt/opt.cpp +++ b/tools/opt/opt.cpp @@ -39,6 +39,7 @@ #include "llvm/Support/SystemUtils.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/ToolOutputFile.h" +#include "llvm/Support/TargetSelect.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/LinkAllPasses.h" #include "llvm/LinkAllVMCore.h" @@ -564,6 +565,9 @@ int main(int argc, char **argv) { llvm_shutdown_obj Y; // Call llvm_shutdown() on exit. LLVMContext &Context = getGlobalContext(); + InitializeAllTargets(); + InitializeAllTargetMCs(); + // Initialize passes PassRegistry &Registry = *PassRegistry::getPassRegistry(); initializeCore(Registry); diff --git a/unittests/ADT/DenseMapTest.cpp b/unittests/ADT/DenseMapTest.cpp index 75e7006434..15eb6988f6 100644 --- a/unittests/ADT/DenseMapTest.cpp +++ b/unittests/ADT/DenseMapTest.cpp @@ -330,4 +330,37 @@ TEST(DenseMapCustomTest, FindAsTest) { EXPECT_TRUE(map.find_as("d") == map.end()); } +struct ContiguousDenseMapInfo { + static inline unsigned getEmptyKey() { return ~0; } + static inline unsigned getTombstoneKey() { return ~0U - 1; } + static unsigned getHashValue(const unsigned& Val) { return Val; } + static bool isEqual(const unsigned& LHS, const unsigned& RHS) { + return LHS == RHS; + } +}; + +// Test that filling a small dense map with exactly the number of elements in +// the map grows to have enough space for an empty bucket. +TEST(DenseMapCustomTest, SmallDenseMapGrowTest) { + SmallDenseMap<unsigned, unsigned, 32, ContiguousDenseMapInfo> map; + // Add some number of elements, then delete a few to leave us some tombstones. + // If we just filled the map with 32 elements we'd grow because of not enough + // tombstones which masks the issue here. + for (unsigned i = 0; i < 20; ++i) + map[i] = i + 1; + for (unsigned i = 0; i < 10; ++i) + map.erase(i); + for (unsigned i = 20; i < 32; ++i) + map[i] = i + 1; + + // Size tests + EXPECT_EQ(22u, map.size()); + + // Try to find an element which doesn't exist. There was a bug in + // SmallDenseMap which led to a map with num elements == small capacity not + // having an empty bucket any more. Finding an element not in the map would + // therefore never terminate. + EXPECT_TRUE(map.find(32) == map.end()); +} + } diff --git a/utils/TableGen/CMakeLists.txt b/utils/TableGen/CMakeLists.txt index b8a6daf9e0..116fa57522 100644 --- a/utils/TableGen/CMakeLists.txt +++ b/utils/TableGen/CMakeLists.txt @@ -9,6 +9,7 @@ add_tablegen(llvm-tblgen LLVM CodeEmitterGen.cpp CodeGenDAGPatterns.cpp CodeGenInstruction.cpp + CodeGenMapTable.cpp CodeGenRegisters.cpp CodeGenSchedule.cpp CodeGenTarget.cpp diff --git a/utils/TableGen/CodeGenMapTable.cpp b/utils/TableGen/CodeGenMapTable.cpp new file mode 100644 index 0000000000..4bfd1ba798 --- /dev/null +++ b/utils/TableGen/CodeGenMapTable.cpp @@ -0,0 +1,608 @@ +//===- CodeGenMapTable.cpp - Instruction Mapping Table Generator ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// CodeGenMapTable provides functionality for the TabelGen to create +// relation mapping between instructions. Relation models are defined using +// InstrMapping as a base class. This file implements the functionality which +// parses these definitions and generates relation maps using the information +// specified there. These maps are emitted as tables in the XXXGenInstrInfo.inc +// file along with the functions to query them. +// +// A relationship model to relate non-predicate instructions with their +// predicated true/false forms can be defined as follows: +// +// def getPredOpcode : InstrMapping { +// let FilterClass = "PredRel"; +// let RowFields = ["BaseOpcode"]; +// let ColFields = ["PredSense"]; +// let KeyCol = ["none"]; +// let ValueCols = [["true"], ["false"]]; } +// +// CodeGenMapTable parses this map and generates a table in XXXGenInstrInfo.inc +// file that contains the instructions modeling this relationship. This table +// is defined in the function +// "int getPredOpcode(uint16_t Opcode, enum PredSense inPredSense)" +// that can be used to retrieve the predicated form of the instruction by +// passing its opcode value and the predicate sense (true/false) of the desired +// instruction as arguments. +// +// Short description of the algorithm: +// +// 1) Iterate through all the records that derive from "InstrMapping" class. +// 2) For each record, filter out instructions based on the FilterClass value. +// 3) Iterate through this set of instructions and insert them into +// RowInstrMap map based on their RowFields values. RowInstrMap is keyed by the +// vector of RowFields values and contains vectors of Records (instructions) as +// values. RowFields is a list of fields that are required to have the same +// values for all the instructions appearing in the same row of the relation +// table. All the instructions in a given row of the relation table have some +// sort of relationship with the key instruction defined by the corresponding +// relationship model. +// +// Ex: RowInstrMap(RowVal1, RowVal2, ...) -> [Instr1, Instr2, Instr3, ... ] +// Here Instr1, Instr2, Instr3 have same values (RowVal1, RowVal2) for +// RowFields. These groups of instructions are later matched against ValueCols +// to determine the column they belong to, if any. +// +// While building the RowInstrMap map, collect all the key instructions in +// KeyInstrVec. These are the instructions having the same values as KeyCol +// for all the fields listed in ColFields. +// +// For Example: +// +// Relate non-predicate instructions with their predicated true/false forms. +// +// def getPredOpcode : InstrMapping { +// let FilterClass = "PredRel"; +// let RowFields = ["BaseOpcode"]; +// let ColFields = ["PredSense"]; +// let KeyCol = ["none"]; +// let ValueCols = [["true"], ["false"]]; } +// +// Here, only instructions that have "none" as PredSense will be selected as key +// instructions. +// +// 4) For each key instruction, get the group of instructions that share the +// same key-value as the key instruction from RowInstrMap. Iterate over the list +// of columns in ValueCols (it is defined as a list<list<string> >. Therefore, +// it can specify multi-column relationships). For each column, find the +// instruction from the group that matches all the values for the column. +// Multiple matches are not allowed. +// +//===----------------------------------------------------------------------===// + +#include "CodeGenTarget.h" +#include "llvm/Support/Format.h" +using namespace llvm; +typedef std::map<std::string, std::vector<Record*> > InstrRelMapTy; + +typedef std::map<std::vector<Init*>, std::vector<Record*> > RowInstrMapTy; + +namespace { + +//===----------------------------------------------------------------------===// +// This class is used to represent InstrMapping class defined in Target.td file. +class InstrMap { +private: + std::string Name; + std::string FilterClass; + ListInit *RowFields; + ListInit *ColFields; + ListInit *KeyCol; + std::vector<ListInit*> ValueCols; + +public: + InstrMap(Record* MapRec) { + Name = MapRec->getName(); + + // FilterClass - It's used to reduce the search space only to the + // instructions that define the kind of relationship modeled by + // this InstrMapping object/record. + const RecordVal *Filter = MapRec->getValue("FilterClass"); + FilterClass = Filter->getValue()->getAsUnquotedString(); + + // List of fields/attributes that need to be same across all the + // instructions in a row of the relation table. + RowFields = MapRec->getValueAsListInit("RowFields"); + + // List of fields/attributes that are constant across all the instruction + // in a column of the relation table. Ex: ColFields = 'predSense' + ColFields = MapRec->getValueAsListInit("ColFields"); + + // Values for the fields/attributes listed in 'ColFields'. + // Ex: KeyCol = 'noPred' -- key instruction is non predicated + KeyCol = MapRec->getValueAsListInit("KeyCol"); + + // List of values for the fields/attributes listed in 'ColFields', one for + // each column in the relation table. + // + // Ex: ValueCols = [['true'],['false']] -- it results two columns in the + // table. First column requires all the instructions to have predSense + // set to 'true' and second column requires it to be 'false'. + ListInit *ColValList = MapRec->getValueAsListInit("ValueCols"); + + // Each instruction map must specify at least one column for it to be valid. + if (ColValList->getSize() == 0) + throw "InstrMapping record `" + MapRec->getName() + "' has empty " + + "`ValueCols' field!"; + + for (unsigned i = 0, e = ColValList->getSize(); i < e; i++) { + ListInit *ColI = dyn_cast<ListInit>(ColValList->getElement(i)); + + // Make sure that all the sub-lists in 'ValueCols' have same number of + // elements as the fields in 'ColFields'. + if (ColI->getSize() == ColFields->getSize()) + ValueCols.push_back(ColI); + else { + throw "Record `" + MapRec->getName() + "', field `" + "ValueCols" + + "' entries don't match with the entries in 'ColFields'!"; + } + } + } + + std::string getName() const { + return Name; + } + + std::string getFilterClass() { + return FilterClass; + } + + ListInit *getRowFields() const { + return RowFields; + } + + ListInit *getColFields() const { + return ColFields; + } + + ListInit *getKeyCol() const { + return KeyCol; + } + + const std::vector<ListInit*> &getValueCols() const { + return ValueCols; + } +}; +} // End anonymous namespace. + + +//===----------------------------------------------------------------------===// +// class MapTableEmitter : It builds the instruction relation maps using +// the information provided in InstrMapping records. It outputs these +// relationship maps as tables into XXXGenInstrInfo.inc file along with the +// functions to query them. + +namespace { +class MapTableEmitter { +private: +// std::string TargetName; + const CodeGenTarget &Target; + // InstrMapDesc - InstrMapping record to be processed. + InstrMap InstrMapDesc; + + // InstrDefs - list of instructions filtered using FilterClass defined + // in InstrMapDesc. + std::vector<Record*> InstrDefs; + + // RowInstrMap - maps RowFields values to the instructions. It's keyed by the + // values of the row fields and contains vector of records as values. + RowInstrMapTy RowInstrMap; + + // KeyInstrVec - list of key instructions. + std::vector<Record*> KeyInstrVec; + DenseMap<Record*, std::vector<Record*> > MapTable; + +public: + MapTableEmitter(CodeGenTarget &Target, RecordKeeper &Records, Record *IMRec): + Target(Target), InstrMapDesc(IMRec) { + const std::string FilterClass = InstrMapDesc.getFilterClass(); + InstrDefs = Records.getAllDerivedDefinitions(FilterClass); + } + + void buildRowInstrMap(); + + // Returns true if an instruction is a key instruction, i.e., its ColFields + // have same values as KeyCol. + bool isKeyColInstr(Record* CurInstr); + + // Find column instruction corresponding to a key instruction based on the + // constraints for that column. + Record *getInstrForColumn(Record *KeyInstr, ListInit *CurValueCol); + + // Find column instructions for each key instruction based + // on ValueCols and store them into MapTable. + void buildMapTable(); + + void emitBinSearch(raw_ostream &OS, unsigned TableSize); + void emitTablesWithFunc(raw_ostream &OS); + unsigned emitBinSearchTable(raw_ostream &OS); + + // Lookup functions to query binary search tables. + void emitMapFuncBody(raw_ostream &OS, unsigned TableSize); + +}; +} // End anonymous namespace. + + +//===----------------------------------------------------------------------===// +// Process all the instructions that model this relation (alreday present in +// InstrDefs) and insert them into RowInstrMap which is keyed by the values of +// the fields listed as RowFields. It stores vectors of records as values. +// All the related instructions have the same values for the RowFields thus are +// part of the same key-value pair. +//===----------------------------------------------------------------------===// + +void MapTableEmitter::buildRowInstrMap() { + for (unsigned i = 0, e = InstrDefs.size(); i < e; i++) { + std::vector<Record*> InstrList; + Record *CurInstr = InstrDefs[i]; + std::vector<Init*> KeyValue; + ListInit *RowFields = InstrMapDesc.getRowFields(); + for (unsigned j = 0, endRF = RowFields->getSize(); j < endRF; j++) { + Init *RowFieldsJ = RowFields->getElement(j); + Init *CurInstrVal = CurInstr->getValue(RowFieldsJ)->getValue(); + KeyValue.push_back(CurInstrVal); + } + + // Collect key instructions into KeyInstrVec. Later, these instructions are + // processed to assign column position to the instructions sharing + // their KeyValue in RowInstrMap. + if (isKeyColInstr(CurInstr)) + KeyInstrVec.push_back(CurInstr); + + RowInstrMap[KeyValue].push_back(CurInstr); + } +} + +//===----------------------------------------------------------------------===// +// Return true if an instruction is a KeyCol instruction. +//===----------------------------------------------------------------------===// + +bool MapTableEmitter::isKeyColInstr(Record* CurInstr) { + ListInit *ColFields = InstrMapDesc.getColFields(); + ListInit *KeyCol = InstrMapDesc.getKeyCol(); + + // Check if the instruction is a KeyCol instruction. + bool MatchFound = true; + for (unsigned j = 0, endCF = ColFields->getSize(); + (j < endCF) && MatchFound; j++) { + RecordVal *ColFieldName = CurInstr->getValue(ColFields->getElement(j)); + std::string CurInstrVal = ColFieldName->getValue()->getAsUnquotedString(); + std::string KeyColValue = KeyCol->getElement(j)->getAsUnquotedString(); + MatchFound = (CurInstrVal == KeyColValue); + } + return MatchFound; +} + +//===----------------------------------------------------------------------===// +// Build a map to link key instructions with the column instructions arranged +// according to their column positions. +//===----------------------------------------------------------------------===// + +void MapTableEmitter::buildMapTable() { + // Find column instructions for a given key based on the ColField + // constraints. + const std::vector<ListInit*> &ValueCols = InstrMapDesc.getValueCols(); + unsigned NumOfCols = ValueCols.size(); + for (unsigned j = 0, endKI = KeyInstrVec.size(); j < endKI; j++) { + Record *CurKeyInstr = KeyInstrVec[j]; + std::vector<Record*> ColInstrVec(NumOfCols); + + // Find the column instruction based on the constraints for the column. + for (unsigned ColIdx = 0; ColIdx < NumOfCols; ColIdx++) { + ListInit *CurValueCol = ValueCols[ColIdx]; + Record *ColInstr = getInstrForColumn(CurKeyInstr, CurValueCol); + ColInstrVec[ColIdx] = ColInstr; + } + MapTable[CurKeyInstr] = ColInstrVec; + } +} + +//===----------------------------------------------------------------------===// +// Find column instruction based on the constraints for that column. +//===----------------------------------------------------------------------===// + +Record *MapTableEmitter::getInstrForColumn(Record *KeyInstr, + ListInit *CurValueCol) { + ListInit *RowFields = InstrMapDesc.getRowFields(); + std::vector<Init*> KeyValue; + + // Construct KeyValue using KeyInstr's values for RowFields. + for (unsigned j = 0, endRF = RowFields->getSize(); j < endRF; j++) { + Init *RowFieldsJ = RowFields->getElement(j); + Init *KeyInstrVal = KeyInstr->getValue(RowFieldsJ)->getValue(); + KeyValue.push_back(KeyInstrVal); + } + + // Get all the instructions that share the same KeyValue as the KeyInstr + // in RowInstrMap. We search through these instructions to find a match + // for the current column, i.e., the instruction which has the same values + // as CurValueCol for all the fields in ColFields. + const std::vector<Record*> &RelatedInstrVec = RowInstrMap[KeyValue]; + + ListInit *ColFields = InstrMapDesc.getColFields(); + Record *MatchInstr = NULL; + + for (unsigned i = 0, e = RelatedInstrVec.size(); i < e; i++) { + bool MatchFound = true; + Record *CurInstr = RelatedInstrVec[i]; + for (unsigned j = 0, endCF = ColFields->getSize(); + (j < endCF) && MatchFound; j++) { + Init *ColFieldJ = ColFields->getElement(j); + Init *CurInstrInit = CurInstr->getValue(ColFieldJ)->getValue(); + std::string CurInstrVal = CurInstrInit->getAsUnquotedString(); + Init *ColFieldJVallue = CurValueCol->getElement(j); + MatchFound = (CurInstrVal == ColFieldJVallue->getAsUnquotedString()); + } + + if (MatchFound) { + if (MatchInstr) // Already had a match + // Error if multiple matches are found for a column. + throw "Multiple matches found for `" + KeyInstr->getName() + + "', for the relation `" + InstrMapDesc.getName(); + else + MatchInstr = CurInstr; + } + } + return MatchInstr; +} + +//===----------------------------------------------------------------------===// +// Emit one table per relation. Only instructions with a valid relation of a +// given type are included in the table sorted by their enum values (opcodes). +// Binary search is used for locating instructions in the table. +//===----------------------------------------------------------------------===// + +unsigned MapTableEmitter::emitBinSearchTable(raw_ostream &OS) { + + const std::vector<const CodeGenInstruction*> &NumberedInstructions = + Target.getInstructionsByEnumValue(); + std::string TargetName = Target.getName(); + const std::vector<ListInit*> &ValueCols = InstrMapDesc.getValueCols(); + unsigned NumCol = ValueCols.size(); + unsigned TotalNumInstr = NumberedInstructions.size(); + unsigned TableSize = 0; + + OS << "static const uint16_t "<<InstrMapDesc.getName(); + // Number of columns in the table are NumCol+1 because key instructions are + // emitted as first column. + OS << "Table[]["<< NumCol+1 << "] = {\n"; + for (unsigned i = 0; i < TotalNumInstr; i++) { + Record *CurInstr = NumberedInstructions[i]->TheDef; + std::vector<Record*> ColInstrs = MapTable[CurInstr]; + std::string OutStr(""); + unsigned RelExists = 0; + if (ColInstrs.size()) { + for (unsigned j = 0; j < NumCol; j++) { + if (ColInstrs[j] != NULL) { + RelExists = 1; + OutStr += ", "; + OutStr += TargetName; + OutStr += "::"; + OutStr += ColInstrs[j]->getName(); + } else { OutStr += ", -1";} + } + + if (RelExists) { + OS << " { " << TargetName << "::" << CurInstr->getName(); + OS << OutStr <<" },\n"; + TableSize++; + } + } + } + if (!TableSize) { + OS << " { " << TargetName << "::" << "INSTRUCTION_LIST_END, "; + OS << TargetName << "::" << "INSTRUCTION_LIST_END }"; + } + OS << "}; // End of " << InstrMapDesc.getName() << "Table\n\n"; + return TableSize; +} + +//===----------------------------------------------------------------------===// +// Emit binary search algorithm as part of the functions used to query +// relation tables. +//===----------------------------------------------------------------------===// + +void MapTableEmitter::emitBinSearch(raw_ostream &OS, unsigned TableSize) { + OS << " unsigned mid;\n"; + OS << " unsigned start = 0;\n"; + OS << " unsigned end = " << TableSize << ";\n"; + OS << " while (start < end) {\n"; + OS << " mid = start + (end - start)/2;\n"; + OS << " if (Opcode == " << InstrMapDesc.getName() << "Table[mid][0]) {\n"; + OS << " break;\n"; + OS << " }\n"; + OS << " if (Opcode < " << InstrMapDesc.getName() << "Table[mid][0])\n"; + OS << " end = mid;\n"; + OS << " else\n"; + OS << " start = mid + 1;\n"; + OS << " }\n"; + OS << " if (start == end)\n"; + OS << " return -1; // Instruction doesn't exist in this table.\n\n"; +} + +//===----------------------------------------------------------------------===// +// Emit functions to query relation tables. +//===----------------------------------------------------------------------===// + +void MapTableEmitter::emitMapFuncBody(raw_ostream &OS, + unsigned TableSize) { + + ListInit *ColFields = InstrMapDesc.getColFields(); + const std::vector<ListInit*> &ValueCols = InstrMapDesc.getValueCols(); + + // Emit binary search algorithm to locate instructions in the + // relation table. If found, return opcode value from the appropriate column + // of the table. + emitBinSearch(OS, TableSize); + + if (ValueCols.size() > 1) { + for (unsigned i = 0, e = ValueCols.size(); i < e; i++) { + ListInit *ColumnI = ValueCols[i]; + for (unsigned j = 0, ColSize = ColumnI->getSize(); j < ColSize; j++) { + std::string ColName = ColFields->getElement(j)->getAsUnquotedString(); + OS << " if (in" << ColName; + OS << " == "; + OS << ColName << "_" << ColumnI->getElement(j)->getAsUnquotedString(); + if (j < ColumnI->getSize() - 1) OS << " && "; + else OS << ")\n"; + } + OS << " return " << InstrMapDesc.getName(); + OS << "Table[mid]["<<i+1<<"];\n"; + } + OS << " return -1;"; + } + else + OS << " return " << InstrMapDesc.getName() << "Table[mid][1];\n"; + + OS <<"}\n\n"; +} + +//===----------------------------------------------------------------------===// +// Emit relation tables and the functions to query them. +//===----------------------------------------------------------------------===// + +void MapTableEmitter::emitTablesWithFunc(raw_ostream &OS) { + + // Emit function name and the input parameters : mostly opcode value of the + // current instruction. However, if a table has multiple columns (more than 2 + // since first column is used for the key instructions), then we also need + // to pass another input to indicate the column to be selected. + + ListInit *ColFields = InstrMapDesc.getColFields(); + const std::vector<ListInit*> &ValueCols = InstrMapDesc.getValueCols(); + OS << "// "<< InstrMapDesc.getName() << "\n"; + OS << "int "<< InstrMapDesc.getName() << "(uint16_t Opcode"; + if (ValueCols.size() > 1) { + for (unsigned i = 0, e = ColFields->getSize(); i < e; i++) { + std::string ColName = ColFields->getElement(i)->getAsUnquotedString(); + OS << ", enum " << ColName << " in" << ColName << ") {\n"; + } + } else { OS << ") {\n"; } + + // Emit map table. + unsigned TableSize = emitBinSearchTable(OS); + + // Emit rest of the function body. + emitMapFuncBody(OS, TableSize); +} + +//===----------------------------------------------------------------------===// +// Emit enums for the column fields across all the instruction maps. +//===----------------------------------------------------------------------===// + +static void emitEnums(raw_ostream &OS, RecordKeeper &Records) { + + std::vector<Record*> InstrMapVec; + InstrMapVec = Records.getAllDerivedDefinitions("InstrMapping"); + std::map<std::string, std::vector<Init*> > ColFieldValueMap; + + // Iterate over all InstrMapping records and create a map between column + // fields and their possible values across all records. + for (unsigned i = 0, e = InstrMapVec.size(); i < e; i++) { + Record *CurMap = InstrMapVec[i]; + ListInit *ColFields; + ColFields = CurMap->getValueAsListInit("ColFields"); + ListInit *List = CurMap->getValueAsListInit("ValueCols"); + std::vector<ListInit*> ValueCols; + unsigned ListSize = List->getSize(); + + for (unsigned j = 0; j < ListSize; j++) { + ListInit *ListJ = dyn_cast<ListInit>(List->getElement(j)); + + if (ListJ->getSize() != ColFields->getSize()) { + throw "Record `" + CurMap->getName() + "', field `" + "ValueCols" + + "' entries don't match with the entries in 'ColFields' !"; + } + ValueCols.push_back(ListJ); + } + + for (unsigned j = 0, endCF = ColFields->getSize(); j < endCF; j++) { + for (unsigned k = 0; k < ListSize; k++){ + std::string ColName = ColFields->getElement(j)->getAsUnquotedString(); + ColFieldValueMap[ColName].push_back((ValueCols[k])->getElement(j)); + } + } + } + + for (std::map<std::string, std::vector<Init*> >::iterator + II = ColFieldValueMap.begin(), IE = ColFieldValueMap.end(); + II != IE; II++) { + std::vector<Init*> FieldValues = (*II).second; + unsigned FieldSize = FieldValues.size(); + + // Delete duplicate entries from ColFieldValueMap + for (unsigned i = 0; i < FieldSize - 1; i++) { + Init *CurVal = FieldValues[i]; + for (unsigned j = i+1; j < FieldSize; j++) { + if (CurVal == FieldValues[j]) { + FieldValues.erase(FieldValues.begin()+j); + } + } + } + + // Emit enumerated values for the column fields. + OS << "enum " << (*II).first << " {\n"; + for (unsigned i = 0; i < FieldSize; i++) { + OS << "\t" << (*II).first << "_" << FieldValues[i]->getAsUnquotedString(); + if (i != FieldValues.size() - 1) + OS << ",\n"; + else + OS << "\n};\n\n"; + } + } +} + +namespace llvm { +//===----------------------------------------------------------------------===// +// Parse 'InstrMapping' records and use the information to form relationship +// between instructions. These relations are emitted as a tables along with the +// functions to query them. +//===----------------------------------------------------------------------===// +void EmitMapTable(RecordKeeper &Records, raw_ostream &OS) { + CodeGenTarget Target(Records); + std::string TargetName = Target.getName(); + std::vector<Record*> InstrMapVec; + InstrMapVec = Records.getAllDerivedDefinitions("InstrMapping"); + + if (!InstrMapVec.size()) + return; + + OS << "#ifdef GET_INSTRMAP_INFO\n"; + OS << "#undef GET_INSTRMAP_INFO\n"; + OS << "namespace llvm {\n\n"; + OS << "namespace " << TargetName << " {\n\n"; + + // Emit coulumn field names and their values as enums. + emitEnums(OS, Records); + + // Iterate over all instruction mapping records and construct relationship + // maps based on the information specified there. + // + for (unsigned i = 0, e = InstrMapVec.size(); i < e; i++) { + MapTableEmitter IMap(Target, Records, InstrMapVec[i]); + + // Build RowInstrMap to group instructions based on their values for + // RowFields. In the process, also collect key instructions into + // KeyInstrVec. + IMap.buildRowInstrMap(); + + // Build MapTable to map key instructions with the corresponding column + // instructions. + IMap.buildMapTable(); + + // Emit map tables and the functions to query them. + IMap.emitTablesWithFunc(OS); + } + OS << "} // End " << TargetName << " namespace\n"; + OS << "} // End llvm namespace\n"; + OS << "#endif // GET_INSTRMAP_INFO\n\n"; +} + +} // End llvm namespace diff --git a/utils/TableGen/CodeGenRegisters.cpp b/utils/TableGen/CodeGenRegisters.cpp index f195b4e3fa..10064fdd16 100644 --- a/utils/TableGen/CodeGenRegisters.cpp +++ b/utils/TableGen/CodeGenRegisters.cpp @@ -600,7 +600,7 @@ struct TupleExpander : SetTheory::Expander { unsigned Length = ~0u; SmallVector<SetTheory::RecSet, 4> Lists(Dim); for (unsigned i = 0; i != Dim; ++i) { - ST.evaluate(SubRegs->getElement(i), Lists[i]); + ST.evaluate(SubRegs->getElement(i), Lists[i], Def->getLoc()); Length = std::min(Length, unsigned(Lists[i].size())); } @@ -728,7 +728,7 @@ CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank, Record *R) // Alternative allocation orders may be subsets. SetTheory::RecSet Order; for (unsigned i = 0, e = AltOrders->size(); i != e; ++i) { - RegBank.getSets().evaluate(AltOrders->getElement(i), Order); + RegBank.getSets().evaluate(AltOrders->getElement(i), Order, R->getLoc()); Orders[1 + i].append(Order.begin(), Order.end()); // Verify that all altorder members are regclass members. while (!Order.empty()) { diff --git a/utils/TableGen/CodeGenSchedule.cpp b/utils/TableGen/CodeGenSchedule.cpp index fc101eec61..1cca3e3f85 100644 --- a/utils/TableGen/CodeGenSchedule.cpp +++ b/utils/TableGen/CodeGenSchedule.cpp @@ -38,8 +38,9 @@ static void dumpIdxVec(const SmallVectorImpl<unsigned> &V) { // (instrs a, b, ...) Evaluate and union all arguments. Identical to AddOp. struct InstrsOp : public SetTheory::Operator { - void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts) { - ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts); + void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts, + ArrayRef<SMLoc> Loc) { + ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts, Loc); } }; @@ -55,13 +56,15 @@ struct InstRegexOp : public SetTheory::Operator { const CodeGenTarget &Target; InstRegexOp(const CodeGenTarget &t): Target(t) {} - void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts) { + void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts, + ArrayRef<SMLoc> Loc) { SmallVector<Regex*, 4> RegexList; for (DagInit::const_arg_iterator AI = Expr->arg_begin(), AE = Expr->arg_end(); AI != AE; ++AI) { StringInit *SI = dyn_cast<StringInit>(*AI); if (!SI) - throw "instregex requires pattern string: " + Expr->getAsString(); + throw TGError(Loc, "instregex requires pattern string: " + + Expr->getAsString()); std::string pat = SI->getValue(); // Implement a python-style prefix match. if (pat[0] != '^') { diff --git a/utils/TableGen/InstrInfoEmitter.cpp b/utils/TableGen/InstrInfoEmitter.cpp index e447c16b16..8e670e3cbc 100644 --- a/utils/TableGen/InstrInfoEmitter.cpp +++ b/utils/TableGen/InstrInfoEmitter.cpp @@ -16,6 +16,7 @@ #include "CodeGenDAGPatterns.h" #include "CodeGenSchedule.h" #include "CodeGenTarget.h" +#include "TableGenBackends.h" #include "SequenceToOffsetTable.h" #include "llvm/ADT/StringExtras.h" #include "llvm/TableGen/Record.h" @@ -415,6 +416,7 @@ namespace llvm { void EmitInstrInfo(RecordKeeper &RK, raw_ostream &OS) { InstrInfoEmitter(RK).run(OS); + EmitMapTable(RK, OS); } } // End llvm namespace diff --git a/utils/TableGen/SetTheory.cpp b/utils/TableGen/SetTheory.cpp index 5b760e7a23..33a8f0e337 100644 --- a/utils/TableGen/SetTheory.cpp +++ b/utils/TableGen/SetTheory.cpp @@ -27,20 +27,20 @@ typedef SetTheory::RecVec RecVec; // (add a, b, ...) Evaluate and union all arguments. struct AddOp : public SetTheory::Operator { - void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts) { - ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts); + void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts, ArrayRef<SMLoc> Loc) { + ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts, Loc); } }; // (sub Add, Sub, ...) Set difference. struct SubOp : public SetTheory::Operator { - void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts) { + void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts, ArrayRef<SMLoc> Loc) { if (Expr->arg_size() < 2) - throw "Set difference needs at least two arguments: " + - Expr->getAsString(); + throw TGError(Loc, "Set difference needs at least two arguments: " + + Expr->getAsString()); RecSet Add, Sub; - ST.evaluate(*Expr->arg_begin(), Add); - ST.evaluate(Expr->arg_begin() + 1, Expr->arg_end(), Sub); + ST.evaluate(*Expr->arg_begin(), Add, Loc); + ST.evaluate(Expr->arg_begin() + 1, Expr->arg_end(), Sub, Loc); for (RecSet::iterator I = Add.begin(), E = Add.end(); I != E; ++I) if (!Sub.count(*I)) Elts.insert(*I); @@ -49,12 +49,13 @@ struct SubOp : public SetTheory::Operator { // (and S1, S2) Set intersection. struct AndOp : public SetTheory::Operator { - void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts) { + void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts, ArrayRef<SMLoc> Loc) { if (Expr->arg_size() != 2) - throw "Set intersection requires two arguments: " + Expr->getAsString(); + throw TGError(Loc, "Set intersection requires two arguments: " + + Expr->getAsString()); RecSet S1, S2; - ST.evaluate(Expr->arg_begin()[0], S1); - ST.evaluate(Expr->arg_begin()[1], S2); + ST.evaluate(Expr->arg_begin()[0], S1, Loc); + ST.evaluate(Expr->arg_begin()[1], S2, Loc); for (RecSet::iterator I = S1.begin(), E = S1.end(); I != E; ++I) if (S2.count(*I)) Elts.insert(*I); @@ -65,17 +66,19 @@ struct AndOp : public SetTheory::Operator { struct SetIntBinOp : public SetTheory::Operator { virtual void apply2(SetTheory &ST, DagInit *Expr, RecSet &Set, int64_t N, - RecSet &Elts) =0; + RecSet &Elts, ArrayRef<SMLoc> Loc) =0; - void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts) { + void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts, ArrayRef<SMLoc> Loc) { if (Expr->arg_size() != 2) - throw "Operator requires (Op Set, Int) arguments: " + Expr->getAsString(); + throw TGError(Loc, "Operator requires (Op Set, Int) arguments: " + + Expr->getAsString()); RecSet Set; - ST.evaluate(Expr->arg_begin()[0], Set); + ST.evaluate(Expr->arg_begin()[0], Set, Loc); IntInit *II = dyn_cast<IntInit>(Expr->arg_begin()[1]); if (!II) - throw "Second argument must be an integer: " + Expr->getAsString(); - apply2(ST, Expr, Set, II->getValue(), Elts); + throw TGError(Loc, "Second argument must be an integer: " + + Expr->getAsString()); + apply2(ST, Expr, Set, II->getValue(), Elts, Loc); } }; @@ -83,9 +86,10 @@ struct SetIntBinOp : public SetTheory::Operator { struct ShlOp : public SetIntBinOp { void apply2(SetTheory &ST, DagInit *Expr, RecSet &Set, int64_t N, - RecSet &Elts) { + RecSet &Elts, ArrayRef<SMLoc> Loc) { if (N < 0) - throw "Positive shift required: " + Expr->getAsString(); + throw TGError(Loc, "Positive shift required: " + + Expr->getAsString()); if (unsigned(N) < Set.size()) Elts.insert(Set.begin() + N, Set.end()); } @@ -95,9 +99,10 @@ struct ShlOp : public SetIntBinOp { struct TruncOp : public SetIntBinOp { void apply2(SetTheory &ST, DagInit *Expr, RecSet &Set, int64_t N, - RecSet &Elts) { + RecSet &Elts, ArrayRef<SMLoc> Loc) { if (N < 0) - throw "Positive length required: " + Expr->getAsString(); + throw TGError(Loc, "Positive length required: " + + Expr->getAsString()); if (unsigned(N) > Set.size()) N = Set.size(); Elts.insert(Set.begin(), Set.begin() + N); @@ -112,7 +117,7 @@ struct RotOp : public SetIntBinOp { void apply2(SetTheory &ST, DagInit *Expr, RecSet &Set, int64_t N, - RecSet &Elts) { + RecSet &Elts, ArrayRef<SMLoc> Loc) { if (Reverse) N = -N; // N > 0 -> rotate left, N < 0 -> rotate right. @@ -131,9 +136,10 @@ struct RotOp : public SetIntBinOp { struct DecimateOp : public SetIntBinOp { void apply2(SetTheory &ST, DagInit *Expr, RecSet &Set, int64_t N, - RecSet &Elts) { + RecSet &Elts, ArrayRef<SMLoc> Loc) { if (N <= 0) - throw "Positive stride required: " + Expr->getAsString(); + throw TGError(Loc, "Positive stride required: " + + Expr->getAsString()); for (unsigned I = 0; I < Set.size(); I += N) Elts.insert(Set[I]); } @@ -141,12 +147,12 @@ struct DecimateOp : public SetIntBinOp { // (interleave S1, S2, ...) Interleave elements of the arguments. struct InterleaveOp : public SetTheory::Operator { - void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts) { + void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts, ArrayRef<SMLoc> Loc) { // Evaluate the arguments individually. SmallVector<RecSet, 4> Args(Expr->getNumArgs()); unsigned MaxSize = 0; for (unsigned i = 0, e = Expr->getNumArgs(); i != e; ++i) { - ST.evaluate(Expr->getArg(i), Args[i]); + ST.evaluate(Expr->getArg(i), Args[i], Loc); MaxSize = std::max(MaxSize, unsigned(Args[i].size())); } // Interleave arguments into Elts. @@ -159,38 +165,38 @@ struct InterleaveOp : public SetTheory::Operator { // (sequence "Format", From, To) Generate a sequence of records by name. struct SequenceOp : public SetTheory::Operator { - void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts) { + void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts, ArrayRef<SMLoc> Loc) { int Step = 1; if (Expr->arg_size() > 4) - throw "Bad args to (sequence \"Format\", From, To): " + - Expr->getAsString(); + throw TGError(Loc, "Bad args to (sequence \"Format\", From, To): " + + Expr->getAsString()); else if (Expr->arg_size() == 4) { if (IntInit *II = dyn_cast<IntInit>(Expr->arg_begin()[3])) { Step = II->getValue(); } else - throw "Stride must be an integer: " + Expr->getAsString(); + throw TGError(Loc, "Stride must be an integer: " + Expr->getAsString()); } std::string Format; if (StringInit *SI = dyn_cast<StringInit>(Expr->arg_begin()[0])) Format = SI->getValue(); else - throw "Format must be a string: " + Expr->getAsString(); + throw TGError(Loc, "Format must be a string: " + Expr->getAsString()); int64_t From, To; if (IntInit *II = dyn_cast<IntInit>(Expr->arg_begin()[1])) From = II->getValue(); else - throw "From must be an integer: " + Expr->getAsString(); + throw TGError(Loc, "From must be an integer: " + Expr->getAsString()); if (From < 0 || From >= (1 << 30)) - throw "From out of range"; + throw TGError(Loc, "From out of range"); if (IntInit *II = dyn_cast<IntInit>(Expr->arg_begin()[2])) To = II->getValue(); else - throw "From must be an integer: " + Expr->getAsString(); + throw TGError(Loc, "From must be an integer: " + Expr->getAsString()); if (To < 0 || To >= (1 << 30)) - throw "To out of range"; + throw TGError(Loc, "To out of range"); RecordKeeper &Records = cast<DefInit>(Expr->getOperator())->getDef()->getRecords(); @@ -206,7 +212,8 @@ struct SequenceOp : public SetTheory::Operator { OS << format(Format.c_str(), unsigned(From)); Record *Rec = Records.getDef(OS.str()); if (!Rec) - throw "No def named '" + Name + "': " + Expr->getAsString(); + throw TGError(Loc, "No def named '" + Name + "': " + + Expr->getAsString()); // Try to reevaluate Rec in case it is a set. if (const RecVec *Result = ST.expand(Rec)) Elts.insert(Result->begin(), Result->end()); @@ -225,7 +232,7 @@ struct FieldExpander : public SetTheory::Expander { FieldExpander(StringRef fn) : FieldName(fn) {} void expand(SetTheory &ST, Record *Def, RecSet &Elts) { - ST.evaluate(Def->getValueInit(FieldName), Elts); + ST.evaluate(Def->getValueInit(FieldName), Elts, Def->getLoc()); } }; } // end anonymous namespace @@ -259,7 +266,7 @@ void SetTheory::addFieldExpander(StringRef ClassName, StringRef FieldName) { addExpander(ClassName, new FieldExpander(FieldName)); } -void SetTheory::evaluate(Init *Expr, RecSet &Elts) { +void SetTheory::evaluate(Init *Expr, RecSet &Elts, ArrayRef<SMLoc> Loc) { // A def in a list can be a just an element, or it may expand. if (DefInit *Def = dyn_cast<DefInit>(Expr)) { if (const RecVec *Result = expand(Def->getDef())) @@ -270,19 +277,19 @@ void SetTheory::evaluate(Init *Expr, RecSet &Elts) { // Lists simply expand. if (ListInit *LI = dyn_cast<ListInit>(Expr)) - return evaluate(LI->begin(), LI->end(), Elts); + return evaluate(LI->begin(), LI->end(), Elts, Loc); // Anything else must be a DAG. DagInit *DagExpr = dyn_cast<DagInit>(Expr); if (!DagExpr) - throw "Invalid set element: " + Expr->getAsString(); + throw TGError(Loc, "Invalid set element: " + Expr->getAsString()); DefInit *OpInit = dyn_cast<DefInit>(DagExpr->getOperator()); if (!OpInit) - throw "Bad set expression: " + Expr->getAsString(); + throw TGError(Loc, "Bad set expression: " + Expr->getAsString()); Operator *Op = Operators.lookup(OpInit->getDef()->getName()); if (!Op) - throw "Unknown set operator: " + Expr->getAsString(); - Op->apply(*this, DagExpr, Elts); + throw TGError(Loc, "Unknown set operator: " + Expr->getAsString()); + Op->apply(*this, DagExpr, Elts, Loc); } const RecVec *SetTheory::expand(Record *Set) { @@ -292,23 +299,19 @@ const RecVec *SetTheory::expand(Record *Set) { return &I->second; // This is the first time we see Set. Find a suitable expander. - try { - const std::vector<Record*> &SC = Set->getSuperClasses(); - for (unsigned i = 0, e = SC.size(); i != e; ++i) { - // Skip unnamed superclasses. - if (!dyn_cast<StringInit>(SC[i]->getNameInit())) - continue; - if (Expander *Exp = Expanders.lookup(SC[i]->getName())) { - // This breaks recursive definitions. - RecVec &EltVec = Expansions[Set]; - RecSet Elts; - Exp->expand(*this, Set, Elts); - EltVec.assign(Elts.begin(), Elts.end()); - return &EltVec; - } + const std::vector<Record*> &SC = Set->getSuperClasses(); + for (unsigned i = 0, e = SC.size(); i != e; ++i) { + // Skip unnamed superclasses. + if (!dyn_cast<StringInit>(SC[i]->getNameInit())) + continue; + if (Expander *Exp = Expanders.lookup(SC[i]->getName())) { + // This breaks recursive definitions. + RecVec &EltVec = Expansions[Set]; + RecSet Elts; + Exp->expand(*this, Set, Elts); + EltVec.assign(Elts.begin(), Elts.end()); + return &EltVec; } - } catch (const std::string &Error) { - throw TGError(Set->getLoc(), Error); } // Set is not expandable. diff --git a/utils/TableGen/SetTheory.h b/utils/TableGen/SetTheory.h index b394058f4c..122372ab33 100644 --- a/utils/TableGen/SetTheory.h +++ b/utils/TableGen/SetTheory.h @@ -49,6 +49,7 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/SetVector.h" +#include "llvm/Support/SourceMgr.h" #include <map> #include <vector> @@ -72,7 +73,8 @@ public: /// apply - Apply this operator to Expr's arguments and insert the result /// in Elts. - virtual void apply(SetTheory&, DagInit *Expr, RecSet &Elts) =0; + virtual void apply(SetTheory&, DagInit *Expr, RecSet &Elts, + ArrayRef<SMLoc> Loc) =0; }; /// Expander - A callback function that can transform a Record representing a @@ -119,13 +121,13 @@ public: void addOperator(StringRef Name, Operator*); /// evaluate - Evaluate Expr and append the resulting set to Elts. - void evaluate(Init *Expr, RecSet &Elts); + void evaluate(Init *Expr, RecSet &Elts, ArrayRef<SMLoc> Loc); /// evaluate - Evaluate a sequence of Inits and append to Elts. template<typename Iter> - void evaluate(Iter begin, Iter end, RecSet &Elts) { + void evaluate(Iter begin, Iter end, RecSet &Elts, ArrayRef<SMLoc> Loc) { while (begin != end) - evaluate(*begin++, Elts); + evaluate(*begin++, Elts, Loc); } /// expand - Expand a record into a set of elements if possible. Return a diff --git a/utils/TableGen/TableGenBackends.h b/utils/TableGen/TableGenBackends.h index 2c00c40cfe..f0d25d8a2c 100644 --- a/utils/TableGen/TableGenBackends.h +++ b/utils/TableGen/TableGenBackends.h @@ -74,5 +74,6 @@ void EmitInstrInfo(RecordKeeper &RK, raw_ostream &OS); void EmitPseudoLowering(RecordKeeper &RK, raw_ostream &OS); void EmitRegisterInfo(RecordKeeper &RK, raw_ostream &OS); void EmitSubtarget(RecordKeeper &RK, raw_ostream &OS); +void EmitMapTable(RecordKeeper &RK, raw_ostream &OS); } // End llvm namespace |