aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile5
-rw-r--r--cmake/modules/LLVMProcessSources.cmake2
-rwxr-xr-xdocs/HowToUseInstrMappings.rst179
-rw-r--r--docs/MarkedUpDisassembly.rst88
-rw-r--r--docs/ReleaseNotes.html5
-rw-r--r--docs/WritingAnLLVMBackend.html24
-rw-r--r--docs/subsystems.rst6
-rw-r--r--include/llvm/ADT/DenseMap.h10
-rw-r--r--include/llvm/ADT/StringSet.h9
-rw-r--r--include/llvm/Analysis/DependenceAnalysis.h14
-rw-r--r--include/llvm/Analysis/MemoryBuiltins.h5
-rw-r--r--include/llvm/Analysis/ScalarEvolution.h5
-rw-r--r--include/llvm/CallingConv.h6
-rw-r--r--include/llvm/CodeGen/PBQP/Graph.h21
-rw-r--r--include/llvm/DataLayout.h20
-rw-r--r--include/llvm/InstrTypes.h6
-rw-r--r--include/llvm/MC/MCELFObjectWriter.h3
-rw-r--r--include/llvm/MC/MCInstPrinter.h4
-rw-r--r--include/llvm/MC/MCParser/MCAsmParser.h2
-rw-r--r--include/llvm/MC/MCParser/MCParsedAsmOperand.h3
-rw-r--r--include/llvm/Operator.h25
-rw-r--r--include/llvm/TableGen/Error.h3
-rw-r--r--include/llvm/Target/Target.td49
-rw-r--r--include/llvm/Target/TargetLowering.h8
-rw-r--r--include/llvm/Target/TargetTransformImpl.h23
-rw-r--r--include/llvm/TargetTransformInfo.h41
-rw-r--r--include/llvm/Transforms/Utils/Local.h4
-rw-r--r--lib/Analysis/ConstantFolding.cpp241
-rw-r--r--lib/Analysis/DependenceAnalysis.cpp6
-rw-r--r--lib/Analysis/InlineCost.cpp5
-rw-r--r--lib/Analysis/InstructionSimplify.cpp6
-rw-r--r--lib/Analysis/Lint.cpp5
-rw-r--r--lib/Analysis/MemoryBuiltins.cpp10
-rw-r--r--lib/Analysis/MemoryDependenceAnalysis.cpp4
-rw-r--r--lib/Analysis/ScalarEvolution.cpp22
-rw-r--r--lib/Analysis/ScalarEvolutionExpander.cpp4
-rw-r--r--lib/AsmParser/LLLexer.cpp1
-rw-r--r--lib/AsmParser/LLParser.cpp2
-rw-r--r--lib/AsmParser/LLToken.h1
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp10
-rw-r--r--lib/CodeGen/EarlyIfConversion.cpp1
-rw-r--r--lib/CodeGen/IntrinsicLowering.cpp12
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp191
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp5
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp9
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp16
-rw-r--r--lib/ExecutionEngine/ExecutionEngine.cpp11
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp27
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp277
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h11
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h42
-rw-r--r--lib/MC/ELFObjectWriter.cpp18
-rw-r--r--lib/MC/MCELFObjectTargetWriter.cpp8
-rw-r--r--lib/MC/MCInstPrinter.cpp14
-rw-r--r--lib/MC/MCParser/AsmParser.cpp59
-rw-r--r--lib/TableGen/Error.cpp12
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td12
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td2
-rw-r--r--lib/Target/ARM/ARMSelectionDAGInfo.cpp3
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp4
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp446
-rw-r--r--lib/Target/CellSPU/SPUTargetMachine.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.cpp2
-rw-r--r--lib/Target/MBlaze/MBlazeTargetMachine.cpp3
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.cpp2
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp5
-rw-r--r--lib/Target/Mips/MipsTargetMachine.cpp2
-rw-r--r--lib/Target/Mips/MipsTargetMachine.h2
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.cpp9
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.cpp2
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp13
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp72
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h10
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp46
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp8
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp53
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td8
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.cpp13
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp2
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.cpp2
-rw-r--r--lib/Target/Target.cpp2
-rw-r--r--lib/Target/TargetTransformImpl.cpp155
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp131
-rw-r--r--lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp46
-rw-r--r--lib/Target/X86/X86.td2
-rw-r--r--lib/Target/X86/X86CallingConv.td59
-rw-r--r--lib/Target/X86/X86FastISel.cpp10
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp7
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp234
-rw-r--r--lib/Target/X86/X86ISelLowering.h10
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td8
-rw-r--r--lib/Target/X86/X86InstrSSE.td79
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp21
-rw-r--r--lib/Target/X86/X86SelectionDAGInfo.cpp3
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp4
-rw-r--r--lib/Target/XCore/XCoreISelLowering.cpp6
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.cpp2
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp4
-rw-r--r--lib/Transforms/IPO/MergeFunctions.cpp5
-rw-r--r--lib/Transforms/InstCombine/InstCombine.h2
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp10
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp290
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp11
-rw-r--r--lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp79
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp12
-rw-r--r--lib/Transforms/Instrumentation/BoundsChecking.cpp2
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp2
-rw-r--r--lib/Transforms/Scalar/GVN.cpp51
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp10
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp8
-rw-r--r--lib/Transforms/Scalar/SROA.cpp129
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp2
-rw-r--r--lib/Transforms/Scalar/SimplifyLibCalls.cpp26
-rw-r--r--lib/Transforms/Utils/BuildLibCalls.cpp62
-rw-r--r--lib/Transforms/Utils/Local.cpp3
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp14
-rw-r--r--lib/Transforms/Utils/SimplifyLibCalls.cpp34
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp388
-rw-r--r--lib/VMCore/AsmWriter.cpp1
-rw-r--r--lib/VMCore/DataLayout.cpp31
-rw-r--r--lib/VMCore/Instructions.cpp11
-rw-r--r--lib/VMCore/Type.cpp7
-rw-r--r--lib/VMCore/User.cpp9
-rw-r--r--lib/VMCore/Verifier.cpp1
-rw-r--r--projects/CMakeLists.txt6
-rw-r--r--test/CodeGen/ARM/carry.ll13
-rw-r--r--test/CodeGen/Mips/mips64-sret.ll4
-rw-r--r--test/CodeGen/PowerPC/jaggedstructs.ll48
-rw-r--r--test/CodeGen/PowerPC/structsinregs.ll10
-rw-r--r--test/CodeGen/Thumb2/carry.ll13
-rw-r--r--test/CodeGen/X86/2012-01-18-vbitcast.ll4
-rw-r--r--test/CodeGen/X86/2012-03-15-build_vector_wl.ll2
-rw-r--r--test/CodeGen/X86/2012-07-10-extload64.ll4
-rw-r--r--test/CodeGen/X86/avx-intel-ocl.ll107
-rw-r--r--test/CodeGen/X86/cvtv2f32.ll25
-rw-r--r--test/CodeGen/X86/fast-cc-callee-pops.ll4
-rw-r--r--test/CodeGen/X86/fast-cc-merge-stack-adj.ll2
-rw-r--r--test/CodeGen/X86/fast-cc-pass-in-regs.ll4
-rw-r--r--test/CodeGen/X86/ms-inline-asm.ll23
-rw-r--r--test/CodeGen/X86/pointer-vector.ll5
-rw-r--r--test/CodeGen/X86/pr14161.ll38
-rw-r--r--test/CodeGen/X86/promote.ll2
-rw-r--r--test/CodeGen/X86/sse-intel-ocl.ll93
-rw-r--r--test/CodeGen/X86/trunc-ext-ld-st.ll15
-rw-r--r--test/CodeGen/X86/vec_compare-2.ll3
-rw-r--r--test/CodeGen/X86/widen_load-2.ll2
-rw-r--r--test/MC/PowerPC/lit.local.cfg5
-rw-r--r--test/MC/PowerPC/ppc64-relocs-01.ll66
-rw-r--r--test/MC/X86/x86-32-ms-inline-asm.s10
-rw-r--r--test/Other/multi-pointer-size.ll43
-rw-r--r--test/Transforms/GVN/crash.ll36
-rw-r--r--test/Transforms/GVN/pr14166.ll27
-rw-r--r--test/Transforms/InstCombine/cast.ll9
-rw-r--r--test/Transforms/InstCombine/constant-fold-gep-as-0.ll235
-rw-r--r--test/Transforms/LoopUnroll/pr14167.ll44
-rw-r--r--test/Transforms/LoopVectorize/2012-10-20-infloop.ll2
-rw-r--r--test/Transforms/LoopVectorize/cost-model.ll38
-rw-r--r--test/Transforms/LoopVectorize/cpp-new-array.ll46
-rw-r--r--test/Transforms/LoopVectorize/gcc-examples.ll2
-rw-r--r--test/Transforms/LoopVectorize/increment.ll2
-rw-r--r--test/Transforms/LoopVectorize/induction_plus.ll2
-rw-r--r--test/Transforms/LoopVectorize/non-const-n.ll2
-rw-r--r--test/Transforms/LoopVectorize/read-only.ll2
-rw-r--r--test/Transforms/LoopVectorize/reduction.ll81
-rw-r--r--test/Transforms/LoopVectorize/scalar-select.ll2
-rw-r--r--test/Transforms/SROA/basictest.ll63
-rw-r--r--test/Transforms/SROA/big-endian.ll45
-rw-r--r--tools/opt/opt.cpp4
-rw-r--r--unittests/ADT/DenseMapTest.cpp33
-rw-r--r--utils/TableGen/CMakeLists.txt1
-rw-r--r--utils/TableGen/CodeGenMapTable.cpp608
-rw-r--r--utils/TableGen/CodeGenRegisters.cpp4
-rw-r--r--utils/TableGen/CodeGenSchedule.cpp11
-rw-r--r--utils/TableGen/InstrInfoEmitter.cpp2
-rw-r--r--utils/TableGen/SetTheory.cpp121
-rw-r--r--utils/TableGen/SetTheory.h10
-rw-r--r--utils/TableGen/TableGenBackends.h1
177 files changed, 4951 insertions, 1339 deletions
diff --git a/Makefile b/Makefile
index 49e4b24ebf..3bc1f5da52 100644
--- a/Makefile
+++ b/Makefile
@@ -73,10 +73,13 @@ endif
ifeq ($(MAKECMDGOALS),install-clang)
DIRS := tools/clang/tools/driver tools/clang/lib/Headers \
- tools/clang/tools/libclang tools/clang/tools/c-index-test \
+ tools/clang/tools/libclang \
tools/clang/include/clang-c \
tools/clang/runtime tools/clang/docs \
tools/lto runtime
+ ifneq ($(BUILD_CLANG_ONLY),YES)
+ DIRS += tools/clang/tools/c-index-test
+ endif
OPTIONAL_DIRS :=
NO_INSTALL = 1
endif
diff --git a/cmake/modules/LLVMProcessSources.cmake b/cmake/modules/LLVMProcessSources.cmake
index 0e410edc15..2cef6cfc3a 100644
--- a/cmake/modules/LLVMProcessSources.cmake
+++ b/cmake/modules/LLVMProcessSources.cmake
@@ -48,7 +48,7 @@ function(llvm_process_sources OUT_VAR)
set( f ${CMAKE_CURRENT_SOURCE_DIR}/${s} )
add_file_dependencies( ${f} ${TABLEGEN_OUTPUT} )
endforeach(s)
- if( MSVC_IDE )
+ if( MSVC_IDE OR XCODE )
# This adds .td and .h files to the Visual Studio solution:
# FIXME: Shall we handle *.def here?
add_td_sources(sources)
diff --git a/docs/HowToUseInstrMappings.rst b/docs/HowToUseInstrMappings.rst
new file mode 100755
index 0000000000..b51e74e23c
--- /dev/null
+++ b/docs/HowToUseInstrMappings.rst
@@ -0,0 +1,179 @@
+.. _how_to_use_instruction_mappings:
+
+===============================
+How To Use Instruction Mappings
+===============================
+
+.. sectionauthor:: Jyotsna Verma <jverma@codeaurora.org>
+
+.. contents::
+ :local:
+
+Introduction
+============
+
+This document contains information about adding instruction mapping support
+for a target. The motivation behind this feature comes from the need to switch
+between different instruction formats during various optimizations. One approach
+could be to use switch cases which list all the instructions along with formats
+they can transition to. However, it has large maintenance overhead
+because of the hardcoded instruction names. Also, whenever a new instruction is
+added in the .td files, all the relevant switch cases should be modified
+accordingly. Instead, the same functionality could be achieved with TableGen and
+some support from the .td files for a fraction of maintenance cost.
+
+``InstrMapping`` Class Overview
+===============================
+
+TableGen uses relationship models to map instructions with each other. These
+models are described using ``InstrMapping`` class as a base. Each model sets
+various fields of the ``InstrMapping`` class such that they can uniquely
+describe all the instructions using that model. TableGen parses all the relation
+models and uses the information to construct relation tables which relate
+instructions with each other. These tables are emitted in the
+``XXXInstrInfo.inc`` file along with the functions to query them. Following
+is the definition of ``InstrMapping`` class definied in Target.td file:
+
+.. code-block:: llvm
+
+ class InstrMapping {
+ // Used to reduce search space only to the instructions using this
+ // relation model.
+ string FilterClass;
+
+ // List of fields/attributes that should be same for all the instructions in
+ // a row of the relation table. Think of this as a set of properties shared
+ // by all the instructions related by this relationship.
+ list<string> RowFields = [];
+
+ // List of fields/attributes that are same for all the instructions
+ // in a column of the relation table.
+ list<string> ColFields = [];
+
+ // Values for the fields/attributes listed in 'ColFields' corresponding to
+ // the key instruction. This is the instruction that will be transformed
+ // using this relation model.
+ list<string> KeyCol = [];
+
+ // List of values for the fields/attributes listed in 'ColFields', one for
+ // each column in the relation table. These are the instructions a key
+ // instruction will be transformed into.
+ list<list<string> > ValueCols = [];
+ }
+
+Sample Example
+--------------
+
+Let's say that we want to have a function
+``int getPredOpcode(uint16_t Opcode, enum PredSense inPredSense)`` which
+takes a non-predicated instruction and returns its predicated true or false form
+depending on some input flag, ``inPredSense``. The first step in the process is
+to define a relationship model that relates predicated instructions to their
+non-predicated form by assigning appropriate values to the ``InstrMapping``
+fields. For this relationship, non-predicated instructions are treated as key
+instruction since they are the one used to query the interface function.
+
+.. code-block:: llvm
+
+ def getPredOpcode : InstrMapping {
+ // Choose a FilterClass that is used as a base class for all the
+ // instructions modeling this relationship. This is done to reduce the
+ // search space only to these set of instructions.
+ let FilterClass = "PredRel";
+
+ // Instructions with same values for all the fields in RowFields form a
+ // row in the resulting relation table.
+ // For example, if we want to relate 'ADD' (non-predicated) with 'Add_pt'
+ // (predicated true) and 'Add_pf' (predicated false), then all 3
+ // instructions need to have same value for BaseOpcode field. It can be any
+ // unique value (Ex: XYZ) and should not be shared with any other
+ // instruction not related to 'add'.
+ let RowFields = ["BaseOpcode"];
+
+ // List of attributes that can be used to define key and column instructions
+ // for a relation. Key instruction is passed as an argument
+ // to the function used for querying relation tables. Column instructions
+ // are the instructions they (key) can transform into.
+ //
+ // Here, we choose 'PredSense' as ColFields since this is the unique
+ // attribute of the key (non-predicated) and column (true/false)
+ // instructions involved in this relationship model.
+ let ColFields = ["PredSense"];
+
+ // The key column contains non-predicated instructions.
+ let KeyCol = ["none"];
+
+ // Two value columns - first column contains instructions with
+ // PredSense=true while second column has instructions with PredSense=false.
+ let ValueCols = [["true"], ["false"]];
+ }
+
+TableGen uses the above relationship model to emit relation table that maps
+non-predicated instructions with their predicated forms. It also outputs the
+interface function
+``int getPredOpcode(uint16_t Opcode, enum PredSense inPredSense)`` to query
+the table. Here, Function ``getPredOpcode`` takes two arguments, opcode of the
+current instruction and PredSense of the desired instruction, and returns
+predicated form of the instruction, if found in the relation table.
+In order for an instruction to be added into the relation table, it needs
+to include relevant information in its definition. For example, consider
+following to be the current definitions of ADD, ADD_pt (true) and ADD_pf (false)
+instructions:
+
+.. code-block::llvm
+
+ def ADD : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$a, IntRegs:$b),
+ "$dst = add($a, $b)",
+ [(set (i32 IntRegs:$dst), (add (i32 IntRegs:$a),
+ (i32 IntRegs:$b)))]>;
+
+ def ADD_Pt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$p, IntRegs:$a, IntRegs:$b),
+ "if ($p) $dst = add($a, $b)",
+ []>;
+
+ def ADD_Pf : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$p, IntRegs:$a, IntRegs:$b),
+ "if (!$p) $dst = add($a, $b)",
+ []>;
+
+In this step, we modify these instructions to include the information
+required by the relationship model, <tt>getPredOpcode</tt>, so that they can
+be related.
+
+.. code-block::llvm
+
+ def ADD : PredRel, ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$a, IntRegs:$b),
+ "$dst = add($a, $b)",
+ [(set (i32 IntRegs:$dst), (add (i32 IntRegs:$a),
+ (i32 IntRegs:$b)))]> {
+ let BaseOpcode = "ADD";
+ let PredSense = "none";
+ }
+
+ def ADD_Pt : PredRel, ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$p, IntRegs:$a, IntRegs:$b),
+ "if ($p) $dst = add($a, $b)",
+ []> {
+ let BaseOpcode = "ADD";
+ let PredSense = "true";
+ }
+
+ def ADD_Pf : PredRel, ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$p, IntRegs:$a, IntRegs:$b),
+ "if (!$p) $dst = add($a, $b)",
+ []> {
+ let BaseOpcode = "ADD";
+ let PredSense = "false";
+ }
+
+Please note that all the above instructions use ``PredRel`` as a base class.
+This is extremely important since TableGen uses it as a filter for selecting
+instructions for ``getPredOpcode`` model. Any instruction not derived from
+``PredRel`` is excluded from the analysis. ``BaseOpcode`` is another important
+field. Since it's selected as a ``RowFields`` of the model, it is required
+to have the same value for all 3 instructions in order to be related. Next,
+``PredSense`` is used to determine their column positions by comparing its value
+with ``KeyCol`` and ``ValueCols``. If an instruction sets its ``PredSense``
+value to something not used in the relation model, it will not be assigned
+a column in the relation table.
diff --git a/docs/MarkedUpDisassembly.rst b/docs/MarkedUpDisassembly.rst
new file mode 100644
index 0000000000..e1282e102e
--- /dev/null
+++ b/docs/MarkedUpDisassembly.rst
@@ -0,0 +1,88 @@
+.. _marked_up_disassembly:
+
+=======================================
+LLVM's Optional Rich Disassembly Output
+=======================================
+
+.. contents::
+ :local:
+
+Introduction
+============
+
+LLVM's default disassembly output is raw text. To allow consumers more ability
+to introspect the instructions' textual representation or to reformat for a more
+user friendly display there is an optional rich disassembly output.
+
+This optional output is sufficient to reference into individual portions of the
+instruction text. This is intended for clients like disassemblers, list file
+generators, and pretty-printers, which need more than the raw instructions and
+the ability to print them.
+
+To provide this functionality the assembly text is marked up with annotations.
+The markup is simple enough in syntax to be robust even in the case of version
+mismatches between consumers and producers. That is, the syntax generally does
+not carry semantics beyond "this text has an annotation," so consumers can
+simply ignore annotations they do not understand or do not care about.
+
+After calling ``LLVMCreateDisasm()`` to create a disassembler context the
+optional output is enable with this call:
+
+.. code-block:: c
+
+ LLVMSetDisasmOptions(DC, LLVMDisassembler_Option_UseMarkup);
+
+Then subsequent calls to ``LLVMDisasmInstruction()`` will return output strings
+with the marked up annotations.
+
+Instruction Annotations
+=======================
+
+.. _contextual markups:
+
+Contextual markups
+------------------
+
+Annoated assembly display will supply contextual markup to help clients more
+efficiently implement things like pretty printers. Most markup will be target
+independent, so clients can effectively provide good display without any target
+specific knowledge.
+
+Annotated assembly goes through the normal instruction printer, but optionally
+includes contextual tags on portions of the instruction string. An annotation
+is any '<' '>' delimited section of text(1).
+
+.. code-block:: bat
+
+ annotation: '<' tag-name tag-modifier-list ':' annotated-text '>'
+ tag-name: identifier
+ tag-modifier-list: comma delimited identifier list
+
+The tag-name is an identifier which gives the type of the annotation. For the
+first pass, this will be very simple, with memory references, registers, and
+immediates having the tag names "mem", "reg", and "imm", respectively.
+
+The tag-modifier-list is typically additional target-specific context, such as
+register class.
+
+Clients should accept and ignore any tag-names or tag-modifiers they do not
+understand, allowing the annotations to grow in richness without breaking older
+clients.
+
+For example, a possible annotation of an ARM load of a stack-relative location
+might be annotated as:
+
+.. code-block:: nasm
+
+ ldr <reg gpr:r0>, <mem regoffset:[<reg gpr:sp>, <imm:#4>]>
+
+
+1: For assembly dialects in which '<' and/or '>' are legal tokens, a literal token is escaped by following immediately with a repeat of the character. For example, a literal '<' character is output as '<<' in an annotated assembly string.
+
+C API Details
+-------------
+
+The intended consumers of this information use the C API, therefore the new C
+API function for the disassembler will be added to provide an option to produce
+disassembled instructions with annotations, ``LLVMSetDisasmOptions()`` and the
+``LLVMDisassembler_Option_UseMarkup`` option (see above).
diff --git a/docs/ReleaseNotes.html b/docs/ReleaseNotes.html
index 0ef8f3d1f3..9a1b547b4a 100644
--- a/docs/ReleaseNotes.html
+++ b/docs/ReleaseNotes.html
@@ -468,7 +468,10 @@ Release Notes</a>.</h1>
<p> Loop Vectorizer - We've added a loop vectorizer and we are now able to
vectorize small loops. The loop vectorizer is disabled by default and
- can be enabled using the <b>-mllvm -vectorize</b> flag. <br/>
+ can be enabled using the <b>-mllvm -vectorize</b> flag.
+ The SIMD vector width can be specified using the flag
+ <b>-mllvm -force-vector-width=4</b>.
+ <br/>
We can now vectorize this code:
<pre class="doc_code">
diff --git a/docs/WritingAnLLVMBackend.html b/docs/WritingAnLLVMBackend.html
index 7576d490d7..0ad472cb92 100644
--- a/docs/WritingAnLLVMBackend.html
+++ b/docs/WritingAnLLVMBackend.html
@@ -32,6 +32,7 @@
<li><a href="#InstructionSet">Instruction Set</a>
<ul>
<li><a href="#operandMapping">Instruction Operand Mapping</a></li>
+ <li><a href="#relationMapping">Instruction Relation Mapping</a></li>
<li><a href="#implementInstr">Implement a subclass of TargetInstrInfo</a></li>
<li><a href="#branchFolding">Branch Folding and If Conversion</a></li>
</ul></li>
@@ -1259,6 +1260,29 @@ the <tt>rd</tt>, <tt>rs1</tt>, and <tt>rs2</tt> fields respectively.
<!-- ======================================================================= -->
<h3>
+ <a name="relationMapping">Instruction Relation Mapping</a>
+</h3>
+
+<div>
+
+<p>
+This TableGen feature is used to relate instructions with each other. It is
+particularly useful when you have multiple instruction formats and need to
+switch between them after instruction selection. This entire feature is driven
+by relation models which can be defined in <tt>XXXInstrInfo.td</tt> files
+according to the target-specific instruction set. Relation models are defined
+using <tt>InstrMapping</tt> class as a base. TableGen parses all the models
+and generates instruction relation maps using the specified information.
+Relation maps are emitted as tables in the <tt>XXXGenInstrInfo.inc</tt> file
+along with the functions to query them. For the detailed information on how to
+use this feature, please refer to
+<a href="HowToUseInstrMappings.html">How to add Instruction Mappings</a>
+document.
+</p>
+</div>
+
+<!-- ======================================================================= -->
+<h3>
<a name="implementInstr">Implement a subclass of </a>
<a href="CodeGenerator.html#targetinstrinfo">TargetInstrInfo</a>
</h3>
diff --git a/docs/subsystems.rst b/docs/subsystems.rst
index 6f77b79fbe..80d0eed663 100644
--- a/docs/subsystems.rst
+++ b/docs/subsystems.rst
@@ -17,6 +17,7 @@ Subsystem Documentation
TableGenFundamentals
DebuggingJITedCode
GoldPlugin
+ MarkedUpDisassembly
* `Writing an LLVM Pass <WritingAnLLVMPass.html>`_
@@ -98,3 +99,8 @@ Subsystem Documentation
architecture.
.. _`Howto: Implementing LLVM Integrated Assembler`: http://www.embecosm.com/download/ean10.html
+
+* :ref:`marked_up_disassembly`
+
+ This document describes the optional rich disassembly output syntax.
+
diff --git a/include/llvm/ADT/DenseMap.h b/include/llvm/ADT/DenseMap.h
index cbcf7892c9..ac4bdbd126 100644
--- a/include/llvm/ADT/DenseMap.h
+++ b/include/llvm/ADT/DenseMap.h
@@ -420,7 +420,7 @@ private:
NumBuckets = getNumBuckets();
}
if (NumBuckets-(NewNumEntries+getNumTombstones()) <= NumBuckets/8) {
- this->grow(NumBuckets);
+ this->grow(NumBuckets * 2);
LookupBucketFor(Key, TheBucket);
}
assert(TheBucket);
@@ -600,7 +600,7 @@ public:
unsigned OldNumBuckets = NumBuckets;
BucketT *OldBuckets = Buckets;
- allocateBuckets(std::max<unsigned>(64, NextPowerOf2(AtLeast)));
+ allocateBuckets(std::max<unsigned>(64, NextPowerOf2(AtLeast-1)));
assert(Buckets);
if (!OldBuckets) {
this->BaseT::initEmpty();
@@ -826,11 +826,11 @@ public:
}
void grow(unsigned AtLeast) {
- if (AtLeast > InlineBuckets)
- AtLeast = std::max<unsigned>(64, NextPowerOf2(AtLeast));
+ if (AtLeast >= InlineBuckets)
+ AtLeast = std::max<unsigned>(64, NextPowerOf2(AtLeast-1));
if (Small) {
- if (AtLeast <= InlineBuckets)
+ if (AtLeast < InlineBuckets)
return; // Nothing to do.
// First move the inline buckets into a temporary storage.
diff --git a/include/llvm/ADT/StringSet.h b/include/llvm/ADT/StringSet.h
index 9c55f6b70e..b69a964a23 100644
--- a/include/llvm/ADT/StringSet.h
+++ b/include/llvm/ADT/StringSet.h
@@ -29,8 +29,13 @@ namespace llvm {
assert(!InLang.empty());
const char *KeyStart = InLang.data();
const char *KeyEnd = KeyStart + InLang.size();
- return base::insert(llvm::StringMapEntry<char>::
- Create(KeyStart, KeyEnd, base::getAllocator(), '+'));
+ llvm::StringMapEntry<char> *Entry = llvm::StringMapEntry<char>::
+ Create(KeyStart, KeyEnd, base::getAllocator(), '+');
+ if (!base::insert(Entry)) {
+ Entry->Destroy(base::getAllocator());
+ return false;
+ }
+ return true;
}
};
}
diff --git a/include/llvm/Analysis/DependenceAnalysis.h b/include/llvm/Analysis/DependenceAnalysis.h
index 9b6a6bbd3e..3818428a5e 100644
--- a/include/llvm/Analysis/DependenceAnalysis.h
+++ b/include/llvm/Analysis/DependenceAnalysis.h
@@ -30,23 +30,17 @@
#ifndef LLVM_ANALYSIS_DEPENDENCEANALYSIS_H
#define LLVM_ANALYSIS_DEPENDENCEANALYSIS_H
-#include "llvm/BasicBlock.h"
-#include "llvm/Function.h"
-#include "llvm/Instruction.h"
+#include "llvm/Instructions.h"
#include "llvm/Pass.h"
#include "llvm/ADT/SmallBitVector.h"
-#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Support/raw_ostream.h"
-
namespace llvm {
class AliasAnalysis;
+ class Loop;
+ class LoopInfo;
class ScalarEvolution;
class SCEV;
- class Value;
+ class SCEVConstant;
class raw_ostream;
/// Dependence - This class represents a dependence between two memory
diff --git a/include/llvm/Analysis/MemoryBuiltins.h b/include/llvm/Analysis/MemoryBuiltins.h
index a842898e41..9e5d97dd7f 100644
--- a/include/llvm/Analysis/MemoryBuiltins.h
+++ b/include/llvm/Analysis/MemoryBuiltins.h
@@ -168,7 +168,8 @@ class ObjectSizeOffsetVisitor
public:
ObjectSizeOffsetVisitor(const DataLayout *TD, const TargetLibraryInfo *TLI,
- LLVMContext &Context, bool RoundToAlign = false);
+ LLVMContext &Context, bool RoundToAlign = false,
+ unsigned AS = 0);
SizeOffsetType compute(Value *V);
@@ -229,7 +230,7 @@ class ObjectSizeOffsetEvaluator
public:
ObjectSizeOffsetEvaluator(const DataLayout *TD, const TargetLibraryInfo *TLI,
- LLVMContext &Context);
+ LLVMContext &Context, unsigned AS = 0);
SizeOffsetEvalType compute(Value *V);
bool knownSize(SizeOffsetEvalType SizeOffset) {
diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h
index 67c9a4d14f..d2df67080c 100644
--- a/include/llvm/Analysis/ScalarEvolution.h
+++ b/include/llvm/Analysis/ScalarEvolution.h
@@ -628,7 +628,7 @@ namespace llvm {
/// getSizeOfExpr - Return an expression for sizeof on the given type.
///
- const SCEV *getSizeOfExpr(Type *AllocTy);
+ const SCEV *getSizeOfExpr(Type *AllocTy, Type *IntPtrTy);
/// getAlignOfExpr - Return an expression for alignof on the given type.
///
@@ -636,7 +636,8 @@ namespace llvm {
/// getOffsetOfExpr - Return an expression for offsetof on the given field.
///
- const SCEV *getOffsetOfExpr(StructType *STy, unsigned FieldNo);
+ const SCEV *getOffsetOfExpr(StructType *STy, Type *IntPtrTy,
+ unsigned FieldNo);
/// getOffsetOfExpr - Return an expression for offsetof on the given field.
///
diff --git a/include/llvm/CallingConv.h b/include/llvm/CallingConv.h
index 86e4eebb82..053f4eb326 100644
--- a/include/llvm/CallingConv.h
+++ b/include/llvm/CallingConv.h
@@ -112,7 +112,11 @@ namespace CallingConv {
/// Cannot have variable arguments.
/// Can also be called by the host.
/// Is externally visible.
- SPIR_KERNEL = 76
+ SPIR_KERNEL = 76,
+
+ /// Intel_OCL_BI - Calling conventions for Intel OpenCL built-ins
+ Intel_OCL_BI = 77
+
};
} // End CallingConv namespace
diff --git a/include/llvm/CodeGen/PBQP/Graph.h b/include/llvm/CodeGen/PBQP/Graph.h
index a5d8b0dbd6..83c379b48c 100644
--- a/include/llvm/CodeGen/PBQP/Graph.h
+++ b/include/llvm/CodeGen/PBQP/Graph.h
@@ -19,6 +19,7 @@
#include <list>
#include <map>
+#include <llvm/ADT/ilist.h>
namespace PBQP {
@@ -31,16 +32,16 @@ namespace PBQP {
class NodeEntry;
class EdgeEntry;
- typedef std::list<NodeEntry> NodeList;
- typedef std::list<EdgeEntry> EdgeList;
+ typedef llvm::ilist<NodeEntry> NodeList;
+ typedef llvm::ilist<EdgeEntry> EdgeList;
public:
- typedef NodeList::iterator NodeItr;
- typedef NodeList::const_iterator ConstNodeItr;
+ typedef NodeEntry* NodeItr;
+ typedef const NodeEntry* ConstNodeItr;
- typedef EdgeList::iterator EdgeItr;
- typedef EdgeList::const_iterator ConstEdgeItr;
+ typedef EdgeEntry* EdgeItr;
+ typedef const EdgeEntry* ConstEdgeItr;
private:
@@ -52,12 +53,14 @@ namespace PBQP {
private:
- class NodeEntry {
+ class NodeEntry : public llvm::ilist_node<NodeEntry> {
+ friend struct llvm::ilist_sentinel_traits<NodeEntry>;
private:
Vector costs;
AdjEdgeList adjEdges;
unsigned degree;
void *data;
+ NodeEntry() : costs(0, 0) {}
public:
NodeEntry(const Vector &costs) : costs(costs), degree(0) {}
Vector& getCosts() { return costs; }
@@ -77,12 +80,14 @@ namespace PBQP {
void* getData() { return data; }
};
- class EdgeEntry {
+ class EdgeEntry : public llvm::ilist_node<EdgeEntry> {
+ friend struct llvm::ilist_sentinel_traits<EdgeEntry>;
private:
NodeItr node1, node2;
Matrix costs;
AdjEdgeItr node1AEItr, node2AEItr;
void *data;
+ EdgeEntry() : costs(0, 0, 0) {}
public:
EdgeEntry(NodeItr node1, NodeItr node2, const Matrix &costs)
: node1(node1), node2(node2), costs(costs) {}
diff --git a/include/llvm/DataLayout.h b/include/llvm/DataLayout.h
index c9ac0b7fea..d778556684 100644
--- a/include/llvm/DataLayout.h
+++ b/include/llvm/DataLayout.h
@@ -262,6 +262,14 @@ public:
}
return 8*val->second.TypeBitWidth;
}
+ /// Layout pointer size, in bits, based on the type.
+ /// If this function is called with a pointer type, then
+ /// the type size of the pointer is returned.
+ /// If this function is called with a vector of pointers,
+ /// then the type size of the pointer is returned.
+ /// Otherwise the type sizeo f a default pointer is returned.
+ unsigned getPointerTypeSizeInBits(Type* Ty) const;
+
/// Size examples:
///
/// Type SizeInBits StoreSizeInBits AllocSizeInBits[*]
@@ -337,11 +345,13 @@ public:
///
unsigned getPreferredTypeAlignmentShift(Type *Ty) const;
- /// getIntPtrType - Return an unsigned integer type that is the same size or
- /// greater to the host pointer size.
- /// FIXME: Need to remove the default argument when the rest of the LLVM code
- /// base has been updated.
- IntegerType *getIntPtrType(LLVMContext &C, unsigned AddressSpace = 0) const;
+ /// getIntPtrType - Return an integer type that is the same size or
+ /// greater to the pointer size based on the address space.
+ IntegerType *getIntPtrType(LLVMContext &C, unsigned AddressSpace) const;
+
+ /// getIntPtrType - Return an integer type that is the same size or
+ /// greater to the pointer size based on the Type.
+ IntegerType *getIntPtrType(Type *) const;
/// getIndexedOffset - return the offset from the beginning of the type for
/// the specified indices. This is used to implement getelementptr.
diff --git a/include/llvm/InstrTypes.h b/include/llvm/InstrTypes.h
index cfc79394b2..b661372f53 100644
--- a/include/llvm/InstrTypes.h
+++ b/include/llvm/InstrTypes.h
@@ -17,6 +17,7 @@
#define LLVM_INSTRUCTION_TYPES_H
#include "llvm/Instruction.h"
+#include "llvm/DataLayout.h"
#include "llvm/OperandTraits.h"
#include "llvm/DerivedTypes.h"
#include "llvm/ADT/Twine.h"
@@ -576,6 +577,11 @@ public:
Type *IntPtrTy ///< Integer type corresponding to pointer
) const;
+ /// @brief Determine if this cast is a no-op cast.
+ bool isNoopCast(
+ const DataLayout &DL ///< DataLayout to get the Int Ptr type from.
+ ) const;
+
/// Determine how a pair of casts can be eliminated, if they can be at all.
/// This is a helper function for both CastInst and ConstantExpr.
/// @returns 0 if the CastInst pair can't be eliminated, otherwise
diff --git a/include/llvm/MC/MCELFObjectWriter.h b/include/llvm/MC/MCELFObjectWriter.h
index 688c8a9575..59a2501b8e 100644
--- a/include/llvm/MC/MCELFObjectWriter.h
+++ b/include/llvm/MC/MCELFObjectWriter.h
@@ -91,6 +91,9 @@ public:
const MCFragment &F,
const MCFixup &Fixup,
bool IsPCRel) const;
+ virtual const MCSymbol *undefinedExplicitRelSym(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel) const;
virtual void adjustFixupOffset(const MCFixup &Fixup,
uint64_t &RelocOffset);
diff --git a/include/llvm/MC/MCInstPrinter.h b/include/llvm/MC/MCInstPrinter.h
index f17c347050..3b9420a403 100644
--- a/include/llvm/MC/MCInstPrinter.h
+++ b/include/llvm/MC/MCInstPrinter.h
@@ -66,6 +66,10 @@ public:
bool getUseMarkup() const { return UseMarkup; }
void setUseMarkup(bool Value) { UseMarkup = Value; }
+
+ /// Utility functions to make adding mark ups simpler.
+ StringRef markup(StringRef s) const;
+ StringRef markup(StringRef a, StringRef b) const;
};
} // namespace llvm
diff --git a/include/llvm/MC/MCParser/MCAsmParser.h b/include/llvm/MC/MCParser/MCAsmParser.h
index fe7bda08db..8a5f37cb0c 100644
--- a/include/llvm/MC/MCParser/MCAsmParser.h
+++ b/include/llvm/MC/MCParser/MCAsmParser.h
@@ -90,7 +90,7 @@ public:
/// ParseMSInlineAsm - Parse ms-style inline assembly.
virtual bool ParseMSInlineAsm(void *AsmLoc, std::string &AsmString,
unsigned &NumOutputs, unsigned &NumInputs,
- SmallVectorImpl<void *> &OpDecls,
+ SmallVectorImpl<std::pair<void *, bool> > &OpDecls,
SmallVectorImpl<std::string> &Constraints,
SmallVectorImpl<std::string> &Clobbers,
const MCInstrInfo *MII,
diff --git a/include/llvm/MC/MCParser/MCParsedAsmOperand.h b/include/llvm/MC/MCParser/MCParsedAsmOperand.h
index 35f47c0b9c..89b0a1f47b 100644
--- a/include/llvm/MC/MCParser/MCParsedAsmOperand.h
+++ b/include/llvm/MC/MCParser/MCParsedAsmOperand.h
@@ -69,6 +69,9 @@ public:
/// inline assembly.
virtual bool isOffsetOf() const { return false; }
+ /// getOffsetOfLoc - Get the location of the offset operator.
+ virtual SMLoc getOffsetOfLoc() const { return SMLoc(); }
+
/// needSizeDirective - Do we need to emit a sizing directive for this
/// operand? Only valid when parsing MS-style inline assembly.
virtual bool needSizeDirective() const { return false; }
diff --git a/include/llvm/Operator.h b/include/llvm/Operator.h
index 462324a669..b326c11352 100644
--- a/include/llvm/Operator.h
+++ b/include/llvm/Operator.h
@@ -36,8 +36,11 @@ private:
void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
void *operator new(size_t s) LLVM_DELETED_FUNCTION;
Operator() LLVM_DELETED_FUNCTION;
- // NOTE: cannot use LLVM_DELETED_FUNCTION because it's not legal to delete
- // an overridden method that's not deleted in the base class.
+
+protected:
+ // NOTE: Cannot use LLVM_DELETED_FUNCTION because it's not legal to delete
+ // an overridden method that's not deleted in the base class. Cannot leave
+ // this unimplemented because that leads to an ODR-violation.
~Operator();
public:
@@ -79,8 +82,6 @@ public:
};
private:
- ~OverflowingBinaryOperator(); // DO NOT IMPLEMENT
-
friend class BinaryOperator;
friend class ConstantExpr;
void setHasNoUnsignedWrap(bool B) {
@@ -132,8 +133,6 @@ public:
};
private:
- ~PossiblyExactOperator(); // DO NOT IMPLEMENT
-
friend class BinaryOperator;
friend class ConstantExpr;
void setIsExact(bool B) {
@@ -168,9 +167,6 @@ public:
/// FPMathOperator - Utility class for floating point operations which can have
/// information about relaxed accuracy requirements attached to them.
class FPMathOperator : public Operator {
-private:
- ~FPMathOperator(); // DO NOT IMPLEMENT
-
public:
/// \brief Get the maximum error permitted by this operation in ULPs. An
@@ -191,7 +187,6 @@ public:
/// opcodes.
template<typename SuperClass, unsigned Opc>
class ConcreteOperator : public SuperClass {
- ~ConcreteOperator(); // DO NOT IMPLEMENT
public:
static inline bool classof(const Instruction *I) {
return I->getOpcode() == Opc;
@@ -207,45 +202,35 @@ public:
class AddOperator
: public ConcreteOperator<OverflowingBinaryOperator, Instruction::Add> {
- ~AddOperator(); // DO NOT IMPLEMENT
};
class SubOperator
: public ConcreteOperator<OverflowingBinaryOperator, Instruction::Sub> {
- ~SubOperator(); // DO NOT IMPLEMENT
};
class MulOperator
: public ConcreteOperator<OverflowingBinaryOperator, Instruction::Mul> {
- ~MulOperator(); // DO NOT IMPLEMENT
};
class ShlOperator
: public ConcreteOperator<OverflowingBinaryOperator, Instruction::Shl> {
- ~ShlOperator(); // DO NOT IMPLEMENT
};
class SDivOperator
: public ConcreteOperator<PossiblyExactOperator, Instruction::SDiv> {
- ~SDivOperator(); // DO NOT IMPLEMENT
};
class UDivOperator
: public ConcreteOperator<PossiblyExactOperator, Instruction::UDiv> {
- ~UDivOperator(); // DO NOT IMPLEMENT
};
class AShrOperator
: public ConcreteOperator<PossiblyExactOperator, Instruction::AShr> {
- ~AShrOperator(); // DO NOT IMPLEMENT
};
class LShrOperator
: public ConcreteOperator<PossiblyExactOperator, Instruction::LShr> {
- ~LShrOperator(); // DO NOT IMPLEMENT
};
class GEPOperator
: public ConcreteOperator<Operator, Instruction::GetElementPtr> {
- ~GEPOperator(); // DO NOT IMPLEMENT
-
enum {
IsInBounds = (1 << 0)
};
diff --git a/include/llvm/TableGen/Error.h b/include/llvm/TableGen/Error.h
index 5c1c3adf7e..3f7b7f4e8c 100644
--- a/include/llvm/TableGen/Error.h
+++ b/include/llvm/TableGen/Error.h
@@ -40,6 +40,9 @@ void PrintError(const char *Loc, const Twine &Msg);
void PrintError(const Twine &Msg);
void PrintError(const TGError &Error);
+LLVM_ATTRIBUTE_NORETURN void PrintFatalError(const std::string &Msg);
+LLVM_ATTRIBUTE_NORETURN void PrintFatalError(ArrayRef<SMLoc> ErrorLoc,
+ const std::string &Msg);
extern SourceMgr SrcMgr;
diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td
index ed3db69a4e..5fb12f503e 100644
--- a/include/llvm/Target/Target.td
+++ b/include/llvm/Target/Target.td
@@ -1032,6 +1032,55 @@ class ProcessorModel<string n, SchedMachineModel m, list<SubtargetFeature> f>
}
//===----------------------------------------------------------------------===//
+// InstrMapping - This class is used to create mapping tables to relate
+// instructions with each other based on the values specified in RowFields,
+// ColFields, KeyCol and ValueCols.
+//
+class InstrMapping {
+ // FilterClass - Used to limit search space only to the instructions that
+ // define the relationship modeled by this InstrMapping record.
+ string FilterClass;
+
+ // RowFields - List of fields/attributes that should be same for all the
+ // instructions in a row of the relation table. Think of this as a set of
+ // properties shared by all the instructions related by this relationship
+ // model and is used to categorize instructions into subgroups. For instance,
+ // if we want to define a relation that maps 'Add' instruction to its
+ // predicated forms, we can define RowFields like this:
+ //
+ // let RowFields = BaseOp
+ // All add instruction predicated/non-predicated will have to set their BaseOp
+ // to the same value.
+ //
+ // def Add: { let BaseOp = 'ADD'; let predSense = 'nopred' }
+ // def Add_predtrue: { let BaseOp = 'ADD'; let predSense = 'true' }
+ // def Add_predfalse: { let BaseOp = 'ADD'; let predSense = 'false' }
+ list<string> RowFields = [];
+
+ // List of fields/attributes that are same for all the instructions
+ // in a column of the relation table.
+ // Ex: let ColFields = 'predSense' -- It means that the columns are arranged
+ // based on the 'predSense' values. All the instruction in a specific
+ // column have the same value and it is fixed for the column according
+ // to the values set in 'ValueCols'.
+ list<string> ColFields = [];
+
+ // Values for the fields/attributes listed in 'ColFields'.
+ // Ex: let KeyCol = 'nopred' -- It means that the key instruction (instruction
+ // that models this relation) should be non-predicated.
+ // In the example above, 'Add' is the key instruction.
+ list<string> KeyCol = [];
+
+ // List of values for the fields/attributes listed in 'ColFields', one for
+ // each column in the relation table.
+ //
+ // Ex: let ValueCols = [['true'],['false']] -- It adds two columns in the
+ // table. First column requires all the instructions to have predSense
+ // set to 'true' and second column requires it to be 'false'.
+ list<list<string> > ValueCols = [];
+}
+
+//===----------------------------------------------------------------------===//
// Pull in the common support for calling conventions.
//
include "llvm/Target/TargetCallingConv.td"
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index f34a7727cf..830e2d645a 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -103,6 +103,10 @@ public:
TypeWidenVector // This vector should be widened into a larger vector.
};
+ /// LegalizeKind holds the legalization kind that needs to happen to EVT
+ /// in order to type-legalize it.
+ typedef std::pair<LegalizeTypeAction, EVT> LegalizeKind;
+
enum BooleanContent { // How the target represents true/false values.
UndefinedBooleanContent, // Only bit 0 counts, the rest can hold garbage.
ZeroOrOneBooleanContent, // All bits zero except for bit 0.
@@ -1966,8 +1970,7 @@ private:
ValueTypeActionImpl ValueTypeActions;
- typedef std::pair<LegalizeTypeAction, EVT> LegalizeKind;
-
+public:
LegalizeKind
getTypeConversion(LLVMContext &Context, EVT VT) const {
// If this is a simple type, use the ComputeRegisterProp mechanism.
@@ -2084,6 +2087,7 @@ private:
return LegalizeKind(TypeSplitVector, NVT);
}
+private:
std::vector<std::pair<EVT, const TargetRegisterClass*> > AvailableRegClasses;
/// TargetDAGCombineArray - Targets can specify ISD nodes that they would
diff --git a/include/llvm/Target/TargetTransformImpl.h b/include/llvm/Target/TargetTransformImpl.h
index 7648f4f935..25a7edeb01 100644
--- a/include/llvm/Target/TargetTransformImpl.h
+++ b/include/llvm/Target/TargetTransformImpl.h
@@ -16,6 +16,7 @@
#define LLVM_TARGET_TARGET_TRANSFORMATION_IMPL_H
#include "llvm/TargetTransformInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
namespace llvm {
@@ -47,7 +48,27 @@ public:
virtual unsigned getJumpBufSize() const;
};
-class VectorTargetTransformImpl : public VectorTargetTransformInfo { };
+class VectorTargetTransformImpl : public VectorTargetTransformInfo {
+private:
+ const TargetLowering *TLI;
+
+ /// Estimate the cost of type-legalization and the legalized type.
+ std::pair<unsigned, EVT>
+ getTypeLegalizationCost(LLVMContext &C, EVT Ty) const;
+
+public:
+ explicit VectorTargetTransformImpl(const TargetLowering *TL) : TLI(TL) {}
+
+ virtual ~VectorTargetTransformImpl() {}
+
+ virtual unsigned getInstrCost(unsigned Opcode, Type *Ty1, Type *Ty2) const;
+
+ virtual unsigned getBroadcastCost(Type *Tp) const;
+
+ virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
+ unsigned Alignment,
+ unsigned AddressSpace) const;
+};
} // end llvm namespace
diff --git a/include/llvm/TargetTransformInfo.h b/include/llvm/TargetTransformInfo.h
index 82fc14dbd7..96470c30ca 100644
--- a/include/llvm/TargetTransformInfo.h
+++ b/include/llvm/TargetTransformInfo.h
@@ -54,10 +54,10 @@ public:
TargetTransformInfo(const TargetTransformInfo &T) :
ImmutablePass(ID), STTI(T.STTI), VTTI(T.VTTI) { }
- const ScalarTargetTransformInfo* getScalarTargetTransformInfo() {
+ const ScalarTargetTransformInfo* getScalarTargetTransformInfo() const {
return STTI;
}
- const VectorTargetTransformInfo* getVectorTargetTransformInfo() {
+ const VectorTargetTransformInfo* getVectorTargetTransformInfo() const {
return VTTI;
}
@@ -119,8 +119,43 @@ public:
}
};
+/// VectorTargetTransformInfo - This interface is used by the vectorizers
+/// to estimate the profitability of vectorization for different instructions.
class VectorTargetTransformInfo {
- // TODO: define an interface for VectorTargetTransformInfo.
+public:
+ virtual ~VectorTargetTransformInfo() {}
+
+ /// Returns the expected cost of the instruction opcode. The opcode is one of
+ /// the enums like Instruction::Add. The type arguments are the type of the
+ /// operation.
+ /// Most instructions only use the first type and in that case the second
+ /// operand is ignored.
+ ///
+ /// Exceptions:
+ /// * Br instructions do not use any of the types.
+ /// * Select instructions pass the return type as Ty1 and the selector as Ty2.
+ /// * Cast instructions pass the destination as Ty1 and the source as Ty2.
+ /// * Insert/Extract element pass only the vector type as Ty1.
+ /// * ShuffleVector, Load, Store do not use this call.
+ virtual unsigned getInstrCost(unsigned Opcode,
+ Type *Ty1 = 0,
+ Type *Ty2 = 0) const {
+ return 1;
+ }
+
+ /// Returns the cost of a vector broadcast of a scalar at place zero to a
+ /// vector of type 'Tp'.
+ virtual unsigned getBroadcastCost(Type *Tp) const {
+ return 1;
+ }
+
+ /// Returns the cost of Load and Store instructions.
+ virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
+ unsigned Alignment,
+ unsigned AddressSpace) const {
+ return 1;
+ }
+
};
} // End llvm namespace
diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h
index fd1b5556ef..49eeb57622 100644
--- a/include/llvm/Transforms/Utils/Local.h
+++ b/include/llvm/Transforms/Utils/Local.h
@@ -177,8 +177,9 @@ static inline unsigned getKnownAlignment(Value *V, const DataLayout *TD = 0) {
template<typename IRBuilderTy>
Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &TD, User *GEP,
bool NoAssumptions = false) {
+ unsigned AS = cast<GEPOperator>(GEP)->getPointerAddressSpace();
gep_type_iterator GTI = gep_type_begin(GEP);
- Type *IntPtrTy = TD.getIntPtrType(GEP->getContext());
+ Type *IntPtrTy = TD.getIntPtrType(GEP->getContext(), AS);
Value *Result = Constant::getNullValue(IntPtrTy);
// If the GEP is inbounds, we know that none of the addressing operations will
@@ -186,7 +187,6 @@ Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &TD, User *GEP,
bool isInBounds = cast<GEPOperator>(GEP)->isInBounds() && !NoAssumptions;
// Build a mask for high order bits.
- unsigned AS = cast<GEPOperator>(GEP)->getPointerAddressSpace();
unsigned IntPtrWidth = TD.getPointerSizeInBits(AS);
uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 146897ad67..de6d61d78b 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -41,7 +41,7 @@ using namespace llvm;
// Constant Folding internal helper functions
//===----------------------------------------------------------------------===//
-/// FoldBitCast - Constant fold bitcast, symbolically evaluating it with
+/// FoldBitCast - Constant fold bitcast, symbolically evaluating it with
/// DataLayout. This always returns a non-null constant, but it may be a
/// ConstantExpr if unfoldable.
static Constant *FoldBitCast(Constant *C, Type *DestTy,
@@ -59,9 +59,9 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
return ConstantExpr::getBitCast(C, DestTy);
unsigned NumSrcElts = CDV->getType()->getNumElements();
-
+
Type *SrcEltTy = CDV->getType()->getElementType();
-
+
// If the vector is a vector of floating point, convert it to vector of int
// to simplify things.
if (SrcEltTy->isFloatingPointTy()) {
@@ -72,7 +72,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
C = ConstantExpr::getBitCast(C, SrcIVTy);
CDV = cast<ConstantDataVector>(C);
}
-
+
// Now that we know that the input value is a vector of integers, just shift
// and insert them into our result.
unsigned BitShift = TD.getTypeAllocSizeInBits(SrcEltTy);
@@ -84,43 +84,43 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
else
Result |= CDV->getElementAsInteger(i);
}
-
+
return ConstantInt::get(IT, Result);
}
-
+
// The code below only handles casts to vectors currently.
VectorType *DestVTy = dyn_cast<VectorType>(DestTy);
if (DestVTy == 0)
return ConstantExpr::getBitCast(C, DestTy);
-
+
// If this is a scalar -> vector cast, convert the input into a <1 x scalar>
// vector so the code below can handle it uniformly.
if (isa<ConstantFP>(C) || isa<ConstantInt>(C)) {
Constant *Ops = C; // don't take the address of C!
return FoldBitCast(ConstantVector::get(Ops), DestTy, TD);
}
-
+
// If this is a bitcast from constant vector -> vector, fold it.
if (!isa<ConstantDataVector>(C) && !isa<ConstantVector>(C))
return ConstantExpr::getBitCast(C, DestTy);
-
+
// If the element types match, VMCore can fold it.
unsigned NumDstElt = DestVTy->getNumElements();
unsigned NumSrcElt = C->getType()->getVectorNumElements();
if (NumDstElt == NumSrcElt)
return ConstantExpr::getBitCast(C, DestTy);
-
+
Type *SrcEltTy = C->getType()->getVectorElementType();
Type *DstEltTy = DestVTy->getElementType();
-
- // Otherwise, we're changing the number of elements in a vector, which
+
+ // Otherwise, we're changing the number of elements in a vector, which
// requires endianness information to do the right thing. For example,
// bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
// folds to (little endian):
// <4 x i32> <i32 0, i32 0, i32 1, i32 0>
// and to (big endian):
// <4 x i32> <i32 0, i32 0, i32 0, i32 1>
-
+
// First thing is first. We only want to think about integer here, so if
// we have something in FP form, recast it as integer.
if (DstEltTy->isFloatingPointTy()) {
@@ -130,11 +130,11 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumDstElt);
// Recursively handle this integer conversion, if possible.
C = FoldBitCast(C, DestIVTy, TD);
-
+
// Finally, VMCore can handle this now that #elts line up.
return ConstantExpr::getBitCast(C, DestTy);
}
-
+
// Okay, we know the destination is integer, if the input is FP, convert
// it to integer first.
if (SrcEltTy->isFloatingPointTy()) {
@@ -148,13 +148,13 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
!isa<ConstantDataVector>(C))
return C;
}
-
+
// Now we know that the input and output vectors are both integer vectors
// of the same size, and that their #elements is not the same. Do the
// conversion here, which depends on whether the input or output has
// more elements.
bool isLittleEndian = TD.isLittleEndian();
-
+
SmallVector<Constant*, 32> Result;
if (NumDstElt < NumSrcElt) {
// Handle: bitcast (<4 x i32> <i32 0, i32 1, i32 2, i32 3> to <2 x i64>)
@@ -170,15 +170,15 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
Constant *Src =dyn_cast<ConstantInt>(C->getAggregateElement(SrcElt++));
if (!Src) // Reject constantexpr elements.
return ConstantExpr::getBitCast(C, DestTy);
-
+
// Zero extend the element to the right size.
Src = ConstantExpr::getZExt(Src, Elt->getType());
-
+
// Shift it to the right place, depending on endianness.
- Src = ConstantExpr::getShl(Src,
+ Src = ConstantExpr::getShl(Src,
ConstantInt::get(Src->getType(), ShiftAmt));
ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize;
-
+
// Mix it in.
Elt = ConstantExpr::getOr(Elt, Src);
}
@@ -186,30 +186,30 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
}
return ConstantVector::get(Result);
}
-
+
// Handle: bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
unsigned Ratio = NumDstElt/NumSrcElt;
unsigned DstBitSize = DstEltTy->getPrimitiveSizeInBits();
-
+
// Loop over each source value, expanding into multiple results.
for (unsigned i = 0; i != NumSrcElt; ++i) {
Constant *Src = dyn_cast<ConstantInt>(C->getAggregateElement(i));
if (!Src) // Reject constantexpr elements.
return ConstantExpr::getBitCast(C, DestTy);
-
+
unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1);
for (unsigned j = 0; j != Ratio; ++j) {
// Shift the piece of the value into the right place, depending on
// endianness.
- Constant *Elt = ConstantExpr::getLShr(Src,
+ Constant *Elt = ConstantExpr::getLShr(Src,
ConstantInt::get(Src->getType(), ShiftAmt));
ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize;
-
+
// Truncate and remember this piece.
Result.push_back(ConstantExpr::getTrunc(Elt, DstEltTy));
}
}
-
+
return ConstantVector::get(Result);
}
@@ -224,28 +224,28 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
Offset = 0;
return true;
}
-
+
// Otherwise, if this isn't a constant expr, bail out.
ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
if (!CE) return false;
-
+
// Look through ptr->int and ptr->ptr casts.
if (CE->getOpcode() == Instruction::PtrToInt ||
CE->getOpcode() == Instruction::BitCast)
return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD);
-
- // i32* getelementptr ([5 x i32]* @a, i32 0, i32 5)
+
+ // i32* getelementptr ([5 x i32]* @a, i32 0, i32 5)
if (CE->getOpcode() == Instruction::GetElementPtr) {
// Cannot compute this if the element type of the pointer is missing size
// info.
if (!cast<PointerType>(CE->getOperand(0)->getType())
->getElementType()->isSized())
return false;
-
+
// If the base isn't a global+constant, we aren't either.
if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD))
return false;
-
+
// Otherwise, add any offset that our operands provide.
gep_type_iterator GTI = gep_type_begin(CE);
for (User::const_op_iterator i = CE->op_begin() + 1, e = CE->op_end();
@@ -253,7 +253,7 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
ConstantInt *CI = dyn_cast<ConstantInt>(*i);
if (!CI) return false; // Index isn't a simple constant?
if (CI->isZero()) continue; // Not adding anything.
-
+
if (StructType *ST = dyn_cast<StructType>(*GTI)) {
// N = N + Offset
Offset += TD.getStructLayout(ST)->getElementOffset(CI->getZExtValue());
@@ -264,7 +264,7 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
}
return true;
}
-
+
return false;
}
@@ -277,27 +277,27 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
const DataLayout &TD) {
assert(ByteOffset <= TD.getTypeAllocSize(C->getType()) &&
"Out of range access");
-
+
// If this element is zero or undefined, we can just return since *CurPtr is
// zero initialized.
if (isa<ConstantAggregateZero>(C) || isa<UndefValue>(C))
return true;
-
+
if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
if (CI->getBitWidth() > 64 ||
(CI->getBitWidth() & 7) != 0)
return false;
-
+
uint64_t Val = CI->getZExtValue();
unsigned IntBytes = unsigned(CI->getBitWidth()/8);
-
+
for (unsigned i = 0; i != BytesLeft && ByteOffset != IntBytes; ++i) {
CurPtr[i] = (unsigned char)(Val >> (ByteOffset * 8));
++ByteOffset;
}
return true;
}
-
+
if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
if (CFP->getType()->isDoubleTy()) {
C = FoldBitCast(C, Type::getInt64Ty(C->getContext()), TD);
@@ -309,13 +309,13 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
}
return false;
}
-
+
if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) {
const StructLayout *SL = TD.getStructLayout(CS->getType());
unsigned Index = SL->getElementContainingOffset(ByteOffset);
uint64_t CurEltOffset = SL->getElementOffset(Index);
ByteOffset -= CurEltOffset;
-
+
while (1) {
// If the element access is to the element itself and not to tail padding,
// read the bytes from the element.
@@ -325,9 +325,9 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
!ReadDataFromGlobal(CS->getOperand(Index), ByteOffset, CurPtr,
BytesLeft, TD))
return false;
-
+
++Index;
-
+
// Check to see if we read from the last struct element, if so we're done.
if (Index == CS->getType()->getNumElements())
return true;
@@ -375,11 +375,11 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
}
return true;
}
-
+
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
if (CE->getOpcode() == Instruction::IntToPtr &&
- CE->getOperand(0)->getType() == TD.getIntPtrType(CE->getContext()))
- return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr,
+ CE->getOperand(0)->getType() == TD.getIntPtrType(CE->getType()))
+ return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr,
BytesLeft, TD);
}
@@ -391,7 +391,7 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
const DataLayout &TD) {
Type *LoadTy = cast<PointerType>(C->getType())->getElementType();
IntegerType *IntType = dyn_cast<IntegerType>(LoadTy);
-
+
// If this isn't an integer load we can't fold it directly.
if (!IntType) {
// If this is a float/double load, we can try folding it as an int32/64 load
@@ -415,15 +415,15 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
return FoldBitCast(Res, LoadTy, TD);
return 0;
}
-
+
unsigned BytesLoaded = (IntType->getBitWidth() + 7) / 8;
if (BytesLoaded > 32 || BytesLoaded == 0) return 0;
-
+
GlobalValue *GVal;
int64_t Offset;
if (!IsConstantOffsetFromGlobal(C, GVal, Offset, TD))
return 0;
-
+
GlobalVariable *GV = dyn_cast<GlobalVariable>(GVal);
if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() ||
!GV->getInitializer()->getType()->isSized())
@@ -432,11 +432,11 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
// If we're loading off the beginning of the global, some bytes may be valid,
// but we don't try to handle this.
if (Offset < 0) return 0;
-
+
// If we're not accessing anything in this constant, the result is undefined.
if (uint64_t(Offset) >= TD.getTypeAllocSize(GV->getInitializer()->getType()))
return UndefValue::get(IntType);
-
+
unsigned char RawBytes[32] = {0};
if (!ReadDataFromGlobal(GV->getInitializer(), Offset, RawBytes,
BytesLoaded, TD))
@@ -464,15 +464,15 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,
// If the loaded value isn't a constant expr, we can't handle it.
ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
if (!CE) return 0;
-
+
if (CE->getOpcode() == Instruction::GetElementPtr) {
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getOperand(0)))
if (GV->isConstant() && GV->hasDefinitiveInitializer())
- if (Constant *V =
+ if (Constant *V =
ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE))
return V;
}
-
+
// Instead of loading constant c string, use corresponding integer value
// directly if string length is small enough.
StringRef Str;
@@ -500,14 +500,14 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,
SingleChar = 0;
StrVal = (StrVal << 8) | SingleChar;
}
-
+
Constant *Res = ConstantInt::get(CE->getContext(), StrVal);
if (Ty->isFloatingPointTy())
Res = ConstantExpr::getBitCast(Res, Ty);
return Res;
}
}
-
+
// If this load comes from anywhere in a constant global, and if the global
// is all undef or zero, we know what it loads.
if (GlobalVariable *GV =
@@ -520,7 +520,7 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,
return UndefValue::get(ResTy);
}
}
-
+
// Try hard to fold loads from bitcasted strange and non-type-safe things. We
// currently don't do any of this for big endian systems. It can be
// generalized in the future if someone is interested.
@@ -531,7 +531,7 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,
static Constant *ConstantFoldLoadInst(const LoadInst *LI, const DataLayout *TD){
if (LI->isVolatile()) return 0;
-
+
if (Constant *C = dyn_cast<Constant>(LI->getOperand(0)))
return ConstantFoldLoadFromConstPtr(C, TD);
@@ -540,23 +540,23 @@ static Constant *ConstantFoldLoadInst(const LoadInst *LI, const DataLayout *TD){
/// SymbolicallyEvaluateBinop - One of Op0/Op1 is a constant expression.
/// Attempt to symbolically evaluate the result of a binary operator merging
-/// these together. If target data info is available, it is provided as TD,
+/// these together. If target data info is available, it is provided as TD,
/// otherwise TD is null.
static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
Constant *Op1, const DataLayout *TD){
// SROA
-
+
// Fold (and 0xffffffff00000000, (shl x, 32)) -> shl.
// Fold (lshr (or X, Y), 32) -> (lshr [X/Y], 32) if one doesn't contribute
// bits.
-
-
+
+
// If the constant expr is something like &A[123] - &A[4].f, fold this into a
// constant. This happens frequently when iterating over a global array.
if (Opc == Instruction::Sub && TD) {
GlobalValue *GV1, *GV2;
int64_t Offs1, Offs2;
-
+
if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, *TD))
if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, *TD) &&
GV1 == GV2) {
@@ -564,7 +564,7 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
return ConstantInt::get(Op0->getType(), Offs1-Offs2);
}
}
-
+
return 0;
}
@@ -575,7 +575,7 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops,
Type *ResultTy, const DataLayout *TD,
const TargetLibraryInfo *TLI) {
if (!TD) return 0;
- Type *IntPtrTy = TD->getIntPtrType(ResultTy->getContext());
+ Type *IntPtrTy = TD->getIntPtrType(ResultTy);
bool Any = false;
SmallVector<Constant*, 32> NewIdxs;
@@ -628,14 +628,15 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
if (!TD || !cast<PointerType>(Ptr->getType())->getElementType()->isSized() ||
!Ptr->getType()->isPointerTy())
return 0;
-
- Type *IntPtrTy = TD->getIntPtrType(Ptr->getContext());
+
+ unsigned AS = cast<PointerType>(Ptr->getType())->getAddressSpace();
+ Type *IntPtrTy = TD->getIntPtrType(Ptr->getContext(), AS);
// If this is a constant expr gep that is effectively computing an
// "offsetof", fold it into 'cast int Size to T*' instead of 'gep 0, 0, 12'
for (unsigned i = 1, e = Ops.size(); i != e; ++i)
if (!isa<ConstantInt>(Ops[i])) {
-
+
// If this is "gep i8* Ptr, (sub 0, V)", fold this as:
// "inttoptr (sub (ptrtoint Ptr), V)"
if (Ops.size() == 2 &&
@@ -702,6 +703,8 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
// Also, this helps GlobalOpt do SROA on GlobalVariables.
Type *Ty = Ptr->getType();
assert(Ty->isPointerTy() && "Forming regular GEP of non-pointer type");
+ assert(Ty->getPointerAddressSpace() == AS
+ && "Operand and result of GEP should be in the same address space.");
SmallVector<Constant*, 32> NewIdxs;
do {
if (SequentialType *ATy = dyn_cast<SequentialType>(Ty)) {
@@ -709,15 +712,15 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
// The only pointer indexing we'll do is on the first index of the GEP.
if (!NewIdxs.empty())
break;
-
+
// Only handle pointers to sized types, not pointers to functions.
if (!ATy->getElementType()->isSized())
return 0;
}
-
+
// Determine which element of the array the offset points into.
APInt ElemSize(BitWidth, TD->getTypeAllocSize(ATy->getElementType()));
- IntegerType *IntPtrTy = TD->getIntPtrType(Ty->getContext());
+ IntegerType *IntPtrTy = TD->getIntPtrType(Ty->getContext(), AS);
if (ElemSize == 0)
// The element size is 0. This may be [0 x Ty]*, so just use a zero
// index for this level and proceed to the next level to see if it can
@@ -837,7 +840,7 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I,
if (const CmpInst *CI = dyn_cast<CmpInst>(I))
return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1],
TD, TLI);
-
+
if (const LoadInst *LI = dyn_cast<LoadInst>(I))
return ConstantFoldLoadInst(LI, TD);
@@ -887,19 +890,19 @@ Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
/// information, due to only being passed an opcode and operands. Constant
/// folding using this function strips this information.
///
-Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
+Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
ArrayRef<Constant *> Ops,
const DataLayout *TD,
- const TargetLibraryInfo *TLI) {
+ const TargetLibraryInfo *TLI) {
// Handle easy binops first.
if (Instruction::isBinaryOp(Opcode)) {
if (isa<ConstantExpr>(Ops[0]) || isa<ConstantExpr>(Ops[1]))
if (Constant *C = SymbolicallyEvaluateBinop(Opcode, Ops[0], Ops[1], TD))
return C;
-
+
return ConstantExpr::get(Opcode, Ops[0], Ops[1]);
}
-
+
switch (Opcode) {
default: return 0;
case Instruction::ICmp:
@@ -918,7 +921,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
unsigned InWidth = Input->getType()->getScalarSizeInBits();
unsigned AS = cast<PointerType>(CE->getType())->getAddressSpace();
if (TD->getPointerSizeInBits(AS) < InWidth) {
- Constant *Mask =
+ Constant *Mask =
ConstantInt::get(CE->getContext(), APInt::getLowBitsSet(InWidth,
TD->getPointerSizeInBits(AS)));
Input = ConstantExpr::getAnd(Input, Mask);
@@ -934,8 +937,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
// pointer, so it can't be done in ConstantExpr::getCast.
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0]))
if (TD && CE->getOpcode() == Instruction::PtrToInt &&
- TD->getPointerSizeInBits(
- cast<PointerType>(CE->getOperand(0)->getType())->getAddressSpace())
+ TD->getTypeSizeInBits(CE->getOperand(0)->getType())
<= CE->getType()->getScalarSizeInBits())
return FoldBitCast(CE->getOperand(0), DestTy, *TD);
@@ -967,7 +969,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
return C;
if (Constant *C = SymbolicallyEvaluateGEP(Ops, DestTy, TD, TLI))
return C;
-
+
return ConstantExpr::getGetElementPtr(Ops[0], Ops.slice(1));
}
}
@@ -977,7 +979,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
/// returns a constant expression of the specified operands.
///
Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
- Constant *Ops0, Constant *Ops1,
+ Constant *Ops0, Constant *Ops1,
const DataLayout *TD,
const TargetLibraryInfo *TLI) {
// fold: icmp (inttoptr x), null -> icmp x, 0
@@ -988,9 +990,10 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
// ConstantExpr::getCompare cannot do this, because it doesn't have TD
// around to know if bit truncation is happening.
if (ConstantExpr *CE0 = dyn_cast<ConstantExpr>(Ops0)) {
+ Type *IntPtrTy = NULL;
if (TD && Ops1->isNullValue()) {
- Type *IntPtrTy = TD->getIntPtrType(CE0->getContext());
if (CE0->getOpcode() == Instruction::IntToPtr) {
+ IntPtrTy = TD->getIntPtrType(CE0->getType());
// Convert the integer value to the right size to ensure we get the
// proper extension or truncation.
Constant *C = ConstantExpr::getIntegerCast(CE0->getOperand(0),
@@ -998,22 +1001,24 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
Constant *Null = Constant::getNullValue(C->getType());
return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI);
}
-
+
// Only do this transformation if the int is intptrty in size, otherwise
// there is a truncation or extension that we aren't modeling.
- if (CE0->getOpcode() == Instruction::PtrToInt &&
- CE0->getType() == IntPtrTy) {
- Constant *C = CE0->getOperand(0);
- Constant *Null = Constant::getNullValue(C->getType());
- return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI);
+ if (CE0->getOpcode() == Instruction::PtrToInt) {
+ IntPtrTy = TD->getIntPtrType(CE0->getOperand(0)->getType());
+ if (CE0->getType() == IntPtrTy) {
+ Constant *C = CE0->getOperand(0);
+ Constant *Null = Constant::getNullValue(C->getType());
+ return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI);
+ }
}
}
-
+
if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(Ops1)) {
if (TD && CE0->getOpcode() == CE1->getOpcode()) {
- Type *IntPtrTy = TD->getIntPtrType(CE0->getContext());
if (CE0->getOpcode() == Instruction::IntToPtr) {
+ Type *IntPtrTy = TD->getIntPtrType(CE0->getType());
// Convert the integer value to the right size to ensure we get the
// proper extension or truncation.
Constant *C0 = ConstantExpr::getIntegerCast(CE0->getOperand(0),
@@ -1022,34 +1027,36 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
IntPtrTy, false);
return ConstantFoldCompareInstOperands(Predicate, C0, C1, TD, TLI);
}
+ }
- // Only do this transformation if the int is intptrty in size, otherwise
- // there is a truncation or extension that we aren't modeling.
- if ((CE0->getOpcode() == Instruction::PtrToInt &&
- CE0->getType() == IntPtrTy &&
- CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType()))
+ // Only do this transformation if the int is intptrty in size, otherwise
+ // there is a truncation or extension that we aren't modeling.
+ if (CE0->getOpcode() == Instruction::PtrToInt) {
+ IntPtrTy = TD->getIntPtrType(CE0->getOperand(0)->getType());
+ if (CE0->getType() == IntPtrTy &&
+ CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType())
return ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0),
- CE1->getOperand(0), TD, TLI);
+ CE1->getOperand(0), TD, TLI);
}
}
-
+
// icmp eq (or x, y), 0 -> (icmp eq x, 0) & (icmp eq y, 0)
// icmp ne (or x, y), 0 -> (icmp ne x, 0) | (icmp ne y, 0)
if ((Predicate == ICmpInst::ICMP_EQ || Predicate == ICmpInst::ICMP_NE) &&
CE0->getOpcode() == Instruction::Or && Ops1->isNullValue()) {
- Constant *LHS =
+ Constant *LHS =
ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0), Ops1,
TD, TLI);
- Constant *RHS =
+ Constant *RHS =
ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(1), Ops1,
TD, TLI);
- unsigned OpC =
+ unsigned OpC =
Predicate == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or;
Constant *Ops[] = { LHS, RHS };
return ConstantFoldInstOperands(OpC, LHS->getType(), Ops, TD, TLI);
}
}
-
+
return ConstantExpr::getCompare(Predicate, Ops0, Ops1);
}
@@ -1057,7 +1064,7 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
/// ConstantFoldLoadThroughGEPConstantExpr - Given a constant and a
/// getelementptr constantexpr, return the constant value being addressed by the
/// constant expression, or null if something is funny and we can't decide.
-Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C,
+Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C,
ConstantExpr *CE) {
if (!CE->getOperand(1)->isNullValue())
return 0; // Do not allow stepping over the value!
@@ -1127,14 +1134,14 @@ llvm::canConstantFoldCallTo(const Function *F) {
if (!F->hasName()) return false;
StringRef Name = F->getName();
-
+
// In these cases, the check of the length is required. We don't want to
// return true for a name like "cos\0blah" which strcmp would return equal to
// "cos", but has length 8.
switch (Name[0]) {
default: return false;
case 'a':
- return Name == "acos" || Name == "asin" ||
+ return Name == "acos" || Name == "asin" ||
Name == "atan" || Name == "atan2";
case 'c':
return Name == "cos" || Name == "ceil" || Name == "cosf" || Name == "cosh";
@@ -1154,7 +1161,7 @@ llvm::canConstantFoldCallTo(const Function *F) {
}
}
-static Constant *ConstantFoldFP(double (*NativeFP)(double), double V,
+static Constant *ConstantFoldFP(double (*NativeFP)(double), double V,
Type *Ty) {
sys::llvm_fenv_clearexcept();
V = NativeFP(V);
@@ -1162,7 +1169,7 @@ static Constant *ConstantFoldFP(double (*NativeFP)(double), double V,
sys::llvm_fenv_clearexcept();
return 0;
}
-
+
if (Ty->isFloatTy())
return ConstantFP::get(Ty->getContext(), APFloat((float)V));
if (Ty->isDoubleTy())
@@ -1178,7 +1185,7 @@ static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
sys::llvm_fenv_clearexcept();
return 0;
}
-
+
if (Ty->isFloatTy())
return ConstantFP::get(Ty->getContext(), APFloat((float)V));
if (Ty->isDoubleTy())
@@ -1272,7 +1279,7 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
case 'e':
if (Name == "exp" && TLI->has(LibFunc::exp))
return ConstantFoldFP(exp, V, Ty);
-
+
if (Name == "exp2" && TLI->has(LibFunc::exp2)) {
// Constant fold exp2(x) as pow(2,x) in case the host doesn't have a
// C99 library.
@@ -1348,7 +1355,7 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
}
// Support ConstantVector in case we have an Undef in the top.
- if (isa<ConstantVector>(Operands[0]) ||
+ if (isa<ConstantVector>(Operands[0]) ||
isa<ConstantDataVector>(Operands[0])) {
Constant *Op = cast<Constant>(Operands[0]);
switch (F->getIntrinsicID()) {
@@ -1367,11 +1374,11 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
case Intrinsic::x86_sse2_cvttsd2si64:
if (ConstantFP *FPOp =
dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
- return ConstantFoldConvertToInt(FPOp->getValueAPF(),
+ return ConstantFoldConvertToInt(FPOp->getValueAPF(),
/*roundTowardZero=*/true, Ty);
}
}
-
+
if (isa<UndefValue>(Operands[0])) {
if (F->getIntrinsicID() == Intrinsic::bswap)
return Operands[0];
@@ -1385,14 +1392,14 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
if (ConstantFP *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
if (!Ty->isFloatTy() && !Ty->isDoubleTy())
return 0;
- double Op1V = Ty->isFloatTy() ?
+ double Op1V = Ty->isFloatTy() ?
(double)Op1->getValueAPF().convertToFloat() :
Op1->getValueAPF().convertToDouble();
if (ConstantFP *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
if (Op2->getType() != Op1->getType())
return 0;
- double Op2V = Ty->isFloatTy() ?
+ double Op2V = Ty->isFloatTy() ?
(double)Op2->getValueAPF().convertToFloat():
Op2->getValueAPF().convertToDouble();
@@ -1419,7 +1426,7 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
}
return 0;
}
-
+
if (ConstantInt *Op1 = dyn_cast<ConstantInt>(Operands[0])) {
if (ConstantInt *Op2 = dyn_cast<ConstantInt>(Operands[1])) {
switch (F->getIntrinsicID()) {
@@ -1469,7 +1476,7 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
return ConstantInt::get(Ty, Op1->getValue().countLeadingZeros());
}
}
-
+
return 0;
}
return 0;
diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp
index 016fe396e7..f97f0f2de6 100644
--- a/lib/Analysis/DependenceAnalysis.cpp
+++ b/lib/Analysis/DependenceAnalysis.cpp
@@ -55,12 +55,16 @@
#include "llvm/Analysis/DependenceAnalysis.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Instructions.h"
#include "llvm/Operator.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index 95e58022ca..64e183d60c 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -788,7 +788,7 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
assert(V->getType()->isPointerTy() && "Unexpected operand type!");
} while (Visited.insert(V));
- Type *IntPtrTy = TD->getIntPtrType(V->getContext());
+ Type *IntPtrTy = TD->getIntPtrType(V->getType());
return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset));
}
@@ -828,8 +828,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
// size of the byval type by the target's pointer size.
PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
unsigned TypeSize = TD->getTypeSizeInBits(PTy->getElementType());
- unsigned AS = PTy->getAddressSpace();
- unsigned PointerSize = TD->getPointerSizeInBits(AS);
+ unsigned PointerSize = TD->getTypeSizeInBits(PTy);
// Ceiling division.
unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 8e326122fa..7ef74f67ce 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -728,7 +728,7 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout &TD,
assert(V->getType()->isPointerTy() && "Unexpected operand type!");
} while (Visited.insert(V));
- Type *IntPtrTy = TD.getIntPtrType(V->getContext());
+ Type *IntPtrTy = TD.getIntPtrType(V->getContext(), AS);
return ConstantInt::get(IntPtrTy, Offset);
}
@@ -1880,9 +1880,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// Turn icmp (ptrtoint x), (ptrtoint/constant) into a compare of the input
// if the integer type is the same size as the pointer type.
if (MaxRecurse && Q.TD && isa<PtrToIntInst>(LI) &&
- Q.TD->getPointerSizeInBits(
- cast<PtrToIntInst>(LI)->getPointerAddressSpace()) ==
- DstTy->getPrimitiveSizeInBits()) {
+ Q.TD->getTypeSizeInBits(SrcTy) == DstTy->getPrimitiveSizeInBits()) {
if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
// Transfer the cast to the constant.
if (Value *V = SimplifyICmpInst(Pred, SrcOp,
diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp
index 6d6d580ed1..d62808e9cd 100644
--- a/lib/Analysis/Lint.cpp
+++ b/lib/Analysis/Lint.cpp
@@ -626,8 +626,7 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk,
if (W != V)
return findValueImpl(W, OffsetOk, Visited);
} else if (CastInst *CI = dyn_cast<CastInst>(V)) {
- if (CI->isNoopCast(TD ? TD->getIntPtrType(V->getContext()) :
- Type::getInt64Ty(V->getContext())))
+ if (CI->isNoopCast(*TD))
return findValueImpl(CI->getOperand(0), OffsetOk, Visited);
} else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) {
if (Value *W = FindInsertedValue(Ex->getAggregateOperand(),
@@ -640,7 +639,7 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk,
if (CastInst::isNoopCast(Instruction::CastOps(CE->getOpcode()),
CE->getOperand(0)->getType(),
CE->getType(),
- TD ? TD->getIntPtrType(V->getContext()) :
+ TD ? TD->getIntPtrType(CE->getType()) :
Type::getInt64Ty(V->getContext())))
return findValueImpl(CE->getOperand(0), OffsetOk, Visited);
} else if (CE->getOpcode() == Instruction::ExtractValue) {
diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp
index 0a539fe758..8d903c63af 100644
--- a/lib/Analysis/MemoryBuiltins.cpp
+++ b/lib/Analysis/MemoryBuiltins.cpp
@@ -376,9 +376,10 @@ APInt ObjectSizeOffsetVisitor::align(APInt Size, uint64_t Align) {
ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout *TD,
const TargetLibraryInfo *TLI,
LLVMContext &Context,
- bool RoundToAlign)
+ bool RoundToAlign,
+ unsigned AS)
: TD(TD), TLI(TLI), RoundToAlign(RoundToAlign) {
- IntegerType *IntTy = TD->getIntPtrType(Context);
+ IntegerType *IntTy = TD->getIntPtrType(Context, AS);
IntTyBits = IntTy->getBitWidth();
Zero = APInt::getNullValue(IntTyBits);
}
@@ -561,9 +562,10 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitInstruction(Instruction &I) {
ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator(const DataLayout *TD,
const TargetLibraryInfo *TLI,
- LLVMContext &Context)
+ LLVMContext &Context,
+ unsigned AS)
: TD(TD), TLI(TLI), Context(Context), Builder(Context, TargetFolder(TD)) {
- IntTy = TD->getIntPtrType(Context);
+ IntTy = TD->getIntPtrType(Context, AS);
Zero = ConstantInt::get(IntTy, 0);
}
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index 9316df6fbf..9872890494 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -983,7 +983,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end();
I != E; ++I) {
Visited.insert(std::make_pair(I->getBB(), Addr));
- if (!I->getResult().isNonLocal())
+ if (!I->getResult().isNonLocal() && DT->isReachableFromEntry(I->getBB()))
Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(), Addr));
}
++NumCacheCompleteNonLocalPtr;
@@ -1029,7 +1029,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
NumSortedEntries);
// If we got a Def or Clobber, add this to the list of results.
- if (!Dep.isNonLocal()) {
+ if (!Dep.isNonLocal() && DT->isReachableFromEntry(BB)) {
Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr()));
continue;
}
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 5400646be1..148912b766 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -2581,13 +2581,12 @@ const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS,
return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
}
-const SCEV *ScalarEvolution::getSizeOfExpr(Type *AllocTy) {
+const SCEV *ScalarEvolution::getSizeOfExpr(Type *AllocTy, Type *IntPtrTy) {
// If we have DataLayout, we can bypass creating a target-independent
// constant expression and then folding it back into a ConstantInt.
// This is just a compile-time optimization.
if (TD)
- return getConstant(TD->getIntPtrType(getContext()),
- TD->getTypeAllocSize(AllocTy));
+ return getConstant(IntPtrTy, TD->getTypeAllocSize(AllocTy));
Constant *C = ConstantExpr::getSizeOf(AllocTy);
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
@@ -2606,13 +2605,13 @@ const SCEV *ScalarEvolution::getAlignOfExpr(Type *AllocTy) {
return getTruncateOrZeroExtend(getSCEV(C), Ty);
}
-const SCEV *ScalarEvolution::getOffsetOfExpr(StructType *STy,
+const SCEV *ScalarEvolution::getOffsetOfExpr(StructType *STy, Type *IntPtrTy,
unsigned FieldNo) {
// If we have DataLayout, we can bypass creating a target-independent
// constant expression and then folding it back into a ConstantInt.
// This is just a compile-time optimization.
if (TD)
- return getConstant(TD->getIntPtrType(getContext()),
+ return getConstant(IntPtrTy,
TD->getStructLayout(STy)->getElementOffset(FieldNo));
Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo);
@@ -2699,7 +2698,7 @@ Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const {
// The only other support type is pointer.
assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!");
- if (TD) return TD->getIntPtrType(getContext());
+ if (TD) return TD->getIntPtrType(Ty);
// Without DataLayout, conservatively assume pointers are 64-bit.
return Type::getInt64Ty(getContext());
@@ -3152,13 +3151,13 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
if (StructType *STy = dyn_cast<StructType>(*GTI++)) {
// For a struct, add the member offset.
unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
- const SCEV *FieldOffset = getOffsetOfExpr(STy, FieldNo);
+ const SCEV *FieldOffset = getOffsetOfExpr(STy, IntPtrTy, FieldNo);
// Add the field offset to the running total offset.
TotalOffset = getAddExpr(TotalOffset, FieldOffset);
} else {
// For an array, add the element offset, explicitly scaled.
- const SCEV *ElementSize = getSizeOfExpr(*GTI);
+ const SCEV *ElementSize = getSizeOfExpr(*GTI, IntPtrTy);
const SCEV *IndexS = getSCEV(Index);
// Getelementptr indices are signed.
IndexS = getTruncateOrSignExtend(IndexS, IntPtrTy);
@@ -3980,8 +3979,11 @@ getSmallConstantTripMultiple(Loop *L, BasicBlock *ExitingBlock) {
ConstantInt *Result = MulC->getValue();
- // Guard against huge trip counts.
- if (!Result || Result->getValue().getActiveBits() > 32)
+ // Guard against huge trip counts (this requires checking
+ // for zero to handle the case where the trip count == -1 and the
+ // addition wraps).
+ if (!Result || Result->getValue().getActiveBits() > 32 ||
+ Result->getValue().getActiveBits() == 0)
return 1;
return (unsigned)Result->getZExtValue();
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 111bfb4a6a..0295da5e4a 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -417,7 +417,9 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
// array indexing.
SmallVector<const SCEV *, 8> ScaledOps;
if (ElTy->isSized()) {
- const SCEV *ElSize = SE.getSizeOfExpr(ElTy);
+ Type *IntPtrTy = SE.TD ? SE.TD->getIntPtrType(PTy) :
+ IntegerType::getInt64Ty(PTy->getContext());
+ const SCEV *ElSize = SE.getSizeOfExpr(ElTy, IntPtrTy);
if (!ElSize->isZero()) {
SmallVector<const SCEV *, 8> NewOps;
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index 464dfd51d6..91f973d8d3 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -527,6 +527,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(ptx_device);
KEYWORD(spir_kernel);
KEYWORD(spir_func);
+ KEYWORD(intel_ocl_bicc);
KEYWORD(cc);
KEYWORD(c);
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index cc7cc31246..75fc16cd95 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -1094,6 +1094,7 @@ bool LLParser::ParseOptionalVisibility(unsigned &Res) {
/// ::= /*empty*/
/// ::= 'ccc'
/// ::= 'fastcc'
+/// ::= 'kw_intel_ocl_bicc'
/// ::= 'coldcc'
/// ::= 'x86_stdcallcc'
/// ::= 'x86_fastcallcc'
@@ -1125,6 +1126,7 @@ bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) {
case lltok::kw_ptx_device: CC = CallingConv::PTX_Device; break;
case lltok::kw_spir_kernel: CC = CallingConv::SPIR_KERNEL; break;
case lltok::kw_spir_func: CC = CallingConv::SPIR_FUNC; break;
+ case lltok::kw_intel_ocl_bicc: CC = CallingConv::Intel_OCL_BI; break;
case lltok::kw_cc: {
unsigned ArbitraryCC;
Lex.Lex();
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
index ff6d68f0da..6cffc52d17 100644
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -77,6 +77,7 @@ namespace lltok {
kw_c,
kw_cc, kw_ccc, kw_fastcc, kw_coldcc,
+ kw_intel_ocl_bicc,
kw_x86_stdcallcc, kw_x86_fastcallcc, kw_x86_thiscallcc,
kw_arm_apcscc, kw_arm_aapcscc, kw_arm_aapcs_vfpcc,
kw_msp430_intrcc,
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 788a89bf13..81eec3c9ac 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -401,8 +401,8 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
// - __tlv_bootstrap - used to make sure support exists
// - spare pointer, used when mapped by the runtime
// - pointer to mangled symbol above with initializer
- unsigned AS = GV->getType()->getAddressSpace();
- unsigned PtrSize = TD->getPointerSizeInBits(AS)/8;
+ assert(GV->getType()->isPointerTy() && "GV must be a pointer type!");
+ unsigned PtrSize = TD->getTypeSizeInBits(GV->getType())/8;
OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"),
PtrSize, 0);
OutStreamer.EmitIntValue(0, PtrSize, 0);
@@ -1538,9 +1538,9 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) {
if (Offset == 0)
return Base;
- unsigned AS = cast<PointerType>(CE->getType())->getAddressSpace();
+ assert(CE->getType()->isPointerTy() && "We must have a pointer type!");
// Truncate/sext the offset to the pointer size.
- unsigned Width = TD.getPointerSizeInBits(AS);
+ unsigned Width = TD.getTypeSizeInBits(CE->getType());
if (Width < 64)
Offset = SignExtend64(Offset, Width);
@@ -1562,7 +1562,7 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) {
// Handle casts to pointers by changing them into casts to the appropriate
// integer type. This promotes constant folding and simplifies this code.
Constant *Op = CE->getOperand(0);
- Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()),
+ Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CE->getType()),
false/*ZExt*/);
return lowerConstant(Op, AP);
}
diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp
index 7f28828a5d..d5d84041b6 100644
--- a/lib/CodeGen/EarlyIfConversion.cpp
+++ b/lib/CodeGen/EarlyIfConversion.cpp
@@ -797,6 +797,5 @@ bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) {
if (tryConvertIf(I->getBlock()))
Changed = true;
- MF.verify(this, "After early if-conversion");
return Changed;
}
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index eab10f2fd4..35f9e270dd 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -155,21 +155,21 @@ void IntrinsicLowering::AddPrototypes(Module &M) {
Type::getInt8PtrTy(Context),
Type::getInt8PtrTy(Context),
Type::getInt8PtrTy(Context),
- TD.getIntPtrType(Context), (Type *)0);
+ TD.getIntPtrType(Context, 0), (Type *)0);
break;
case Intrinsic::memmove:
M.getOrInsertFunction("memmove",
Type::getInt8PtrTy(Context),
Type::getInt8PtrTy(Context),
Type::getInt8PtrTy(Context),
- TD.getIntPtrType(Context), (Type *)0);
+ TD.getIntPtrType(Context, 0), (Type *)0);
break;
case Intrinsic::memset:
M.getOrInsertFunction("memset",
Type::getInt8PtrTy(Context),
Type::getInt8PtrTy(Context),
Type::getInt32Ty(M.getContext()),
- TD.getIntPtrType(Context), (Type *)0);
+ TD.getIntPtrType(Context, 0), (Type *)0);
break;
case Intrinsic::sqrt:
EnsureFPIntrinsicsExist(M, I, "sqrtf", "sqrt", "sqrtl");
@@ -497,7 +497,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
break; // Strip out annotate intrinsic
case Intrinsic::memcpy: {
- IntegerType *IntPtr = TD.getIntPtrType(Context);
+ IntegerType *IntPtr = TD.getIntPtrType(CI->getArgOperand(0)->getType());
Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
/* isSigned */ false);
Value *Ops[3];
@@ -508,7 +508,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
break;
}
case Intrinsic::memmove: {
- IntegerType *IntPtr = TD.getIntPtrType(Context);
+ IntegerType *IntPtr = TD.getIntPtrType(CI->getArgOperand(0)->getType());
Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
/* isSigned */ false);
Value *Ops[3];
@@ -519,7 +519,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
break;
}
case Intrinsic::memset: {
- IntegerType *IntPtr = TD.getIntPtrType(Context);
+ IntegerType *IntPtr = TD.getIntPtrType(CI->getArgOperand(0)->getType());
Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
/* isSigned */ false);
Value *Ops[3];
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index ce5f414597..0a85179293 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -270,6 +270,8 @@ namespace {
SDValue ReduceLoadWidth(SDNode *N);
SDValue ReduceLoadOpStoreWidth(SDNode *N);
SDValue TransformFPLoadStorePair(SDNode *N);
+ SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
+ SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
SDValue GetDemandedBits(SDValue V, const APInt &Mask);
@@ -8356,15 +8358,21 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
return SDValue();
}
-SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
+// Simplify (build_vec (ext )) to (bitcast (build_vec ))
+SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
+ // We perform this optimization post type-legalization because
+ // the type-legalizer often scalarizes integer-promoted vectors.
+ // Performing this optimization before may create bit-casts which
+ // will be type-legalized to complex code sequences.
+ // We perform this optimization only before the operation legalizer because we
+ // may introduce illegal operations.
+ if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
+ return SDValue();
+
unsigned NumInScalars = N->getNumOperands();
DebugLoc dl = N->getDebugLoc();
EVT VT = N->getValueType(0);
- // A vector built entirely of undefs is undef.
- if (ISD::allOperandsUndef(N))
- return DAG.getUNDEF(VT);
-
// Check to see if this is a BUILD_VECTOR of a bunch of values
// which come from any_extend or zero_extend nodes. If so, we can create
// a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
@@ -8407,64 +8415,141 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
// In order to have valid types, all of the inputs must be extended from the
// same source type and all of the inputs must be any or zero extend.
// Scalar sizes must be a power of two.
- EVT OutScalarTy = N->getValueType(0).getScalarType();
+ EVT OutScalarTy = VT.getScalarType();
bool ValidTypes = SourceType != MVT::Other &&
isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
isPowerOf2_32(SourceType.getSizeInBits());
- // We perform this optimization post type-legalization because
- // the type-legalizer often scalarizes integer-promoted vectors.
- // Performing this optimization before may create bit-casts which
- // will be type-legalized to complex code sequences.
- // We perform this optimization only before the operation legalizer because we
- // may introduce illegal operations.
// Create a new simpler BUILD_VECTOR sequence which other optimizations can
// turn into a single shuffle instruction.
- if ((Level == AfterLegalizeVectorOps || Level == AfterLegalizeTypes) &&
- ValidTypes) {
- bool isLE = TLI.isLittleEndian();
- unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
- assert(ElemRatio > 1 && "Invalid element size ratio");
- SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
- DAG.getConstant(0, SourceType);
-
- unsigned NewBVElems = ElemRatio * N->getValueType(0).getVectorNumElements();
- SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
-
- // Populate the new build_vector
- for (unsigned i=0; i < N->getNumOperands(); ++i) {
- SDValue Cast = N->getOperand(i);
- assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
- Cast.getOpcode() == ISD::ZERO_EXTEND ||
- Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode");
- SDValue In;
- if (Cast.getOpcode() == ISD::UNDEF)
- In = DAG.getUNDEF(SourceType);
- else
- In = Cast->getOperand(0);
- unsigned Index = isLE ? (i * ElemRatio) :
- (i * ElemRatio + (ElemRatio - 1));
+ if (!ValidTypes)
+ return SDValue();
+
+ bool isLE = TLI.isLittleEndian();
+ unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
+ assert(ElemRatio > 1 && "Invalid element size ratio");
+ SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
+ DAG.getConstant(0, SourceType);
+
+ unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
+ SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
+
+ // Populate the new build_vector
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ SDValue Cast = N->getOperand(i);
+ assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
+ Cast.getOpcode() == ISD::ZERO_EXTEND ||
+ Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode");
+ SDValue In;
+ if (Cast.getOpcode() == ISD::UNDEF)
+ In = DAG.getUNDEF(SourceType);
+ else
+ In = Cast->getOperand(0);
+ unsigned Index = isLE ? (i * ElemRatio) :
+ (i * ElemRatio + (ElemRatio - 1));
+
+ assert(Index < Ops.size() && "Invalid index");
+ Ops[Index] = In;
+ }
+
+ // The type of the new BUILD_VECTOR node.
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
+ assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
+ "Invalid vector size");
+ // Check if the new vector type is legal.
+ if (!isTypeLegal(VecVT)) return SDValue();
+
+ // Make the new BUILD_VECTOR.
+ SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], Ops.size());
+
+ // The new BUILD_VECTOR node has the potential to be further optimized.
+ AddToWorkList(BV.getNode());
+ // Bitcast to the desired type.
+ return DAG.getNode(ISD::BITCAST, dl, VT, BV);
+}
+
+SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
+ EVT VT = N->getValueType(0);
- assert(Index < Ops.size() && "Invalid index");
- Ops[Index] = In;
+ unsigned NumInScalars = N->getNumOperands();
+ DebugLoc dl = N->getDebugLoc();
+
+ EVT SrcVT = MVT::Other;
+ unsigned Opcode = ISD::DELETED_NODE;
+ unsigned NumDefs = 0;
+
+ for (unsigned i = 0; i != NumInScalars; ++i) {
+ SDValue In = N->getOperand(i);
+ unsigned Opc = In.getOpcode();
+
+ if (Opc == ISD::UNDEF)
+ continue;
+
+ // If all scalar values are floats and converted from integers.
+ if (Opcode == ISD::DELETED_NODE &&
+ (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) {
+ Opcode = Opc;
+ // If not supported by target, bail out.
+ if (TLI.getOperationAction(Opcode, VT) != TargetLowering::Legal &&
+ TLI.getOperationAction(Opcode, VT) != TargetLowering::Custom)
+ return SDValue();
}
+ if (Opc != Opcode)
+ return SDValue();
+
+ EVT InVT = In.getOperand(0).getValueType();
+
+ // If all scalar values are typed differently, bail out. It's chosen to
+ // simplify BUILD_VECTOR of integer types.
+ if (SrcVT == MVT::Other)
+ SrcVT = InVT;
+ if (SrcVT != InVT)
+ return SDValue();
+ NumDefs++;
+ }
+
+ // If the vector has just one element defined, it's not worth to fold it into
+ // a vectorized one.
+ if (NumDefs < 2)
+ return SDValue();
- // The type of the new BUILD_VECTOR node.
- EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
- assert(VecVT.getSizeInBits() == N->getValueType(0).getSizeInBits() &&
- "Invalid vector size");
- // Check if the new vector type is legal.
- if (!isTypeLegal(VecVT)) return SDValue();
+ assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP)
+ && "Should only handle conversion from integer to float.");
+ assert(SrcVT != MVT::Other && "Cannot determine source type!");
- // Make the new BUILD_VECTOR.
- SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
- VecVT, &Ops[0], Ops.size());
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars);
+ SmallVector<SDValue, 8> Opnds;
+ for (unsigned i = 0; i != NumInScalars; ++i) {
+ SDValue In = N->getOperand(i);
- // The new BUILD_VECTOR node has the potential to be further optimized.
- AddToWorkList(BV.getNode());
- // Bitcast to the desired type.
- return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), BV);
+ if (In.getOpcode() == ISD::UNDEF)
+ Opnds.push_back(DAG.getUNDEF(SrcVT));
+ else
+ Opnds.push_back(In.getOperand(0));
}
+ SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT,
+ &Opnds[0], Opnds.size());
+ AddToWorkList(BV.getNode());
+
+ return DAG.getNode(Opcode, dl, VT, BV);
+}
+
+SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
+ unsigned NumInScalars = N->getNumOperands();
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+
+ // A vector built entirely of undefs is undef.
+ if (ISD::allOperandsUndef(N))
+ return DAG.getUNDEF(VT);
+
+ SDValue V = reduceBuildVecExtToExtBuildVec(N);
+ if (V.getNode())
+ return V;
+
+ V = reduceBuildVecConvertToConvertBuildVec(N);
+ if (V.getNode())
+ return V;
// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
// operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
@@ -8549,7 +8634,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
return SDValue();
// Widen the input vector by adding undef values.
- VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT,
+ VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
VecIn1, DAG.getUNDEF(VecIn1.getValueType()));
}
@@ -8570,7 +8655,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
SDValue Ops[2];
Ops[0] = VecIn1;
Ops[1] = VecIn2;
- return DAG.getVectorShuffle(VT, N->getDebugLoc(), Ops[0], Ops[1], &Mask[0]);
+ return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], &Mask[0]);
}
return SDValue();
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 4854cf7b26..2ddc07cc63 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -101,8 +101,7 @@ bool FastISel::hasTrivialKill(const Value *V) const {
// No-op casts are trivially coalesced by fast-isel.
if (const CastInst *Cast = dyn_cast<CastInst>(I))
- if (Cast->isNoopCast(TD.getIntPtrType(Cast->getContext())) &&
- !hasTrivialKill(Cast->getOperand(0)))
+ if (Cast->isNoopCast(TD) && !hasTrivialKill(Cast->getOperand(0)))
return false;
// GEPs with all zero indices are trivially coalesced by fast-isel.
@@ -175,7 +174,7 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
// Translate this as an integer zero so that it can be
// local-CSE'd with actual integer zeros.
Reg =
- getRegForValue(Constant::getNullValue(TD.getIntPtrType(V->getContext())));
+ getRegForValue(Constant::getNullValue(TD.getIntPtrType(V->getType())));
} else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
if (CF->isNullValue()) {
Reg = TargetMaterializeFloatZero(CF);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 183416f3fd..d661971bb8 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -3804,7 +3804,8 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
// Emit a library call.
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
- Entry.Ty = TLI.getDataLayout()->getIntPtrType(*getContext());
+ unsigned AS = SrcPtrInfo.getAddrSpace();
+ Entry.Ty = TLI.getDataLayout()->getIntPtrType(*getContext(), AS);
Entry.Node = Dst; Args.push_back(Entry);
Entry.Node = Src; Args.push_back(Entry);
Entry.Node = Size; Args.push_back(Entry);
@@ -3859,7 +3860,8 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
// Emit a library call.
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
- Entry.Ty = TLI.getDataLayout()->getIntPtrType(*getContext());
+ unsigned AS = SrcPtrInfo.getAddrSpace();
+ Entry.Ty = TLI.getDataLayout()->getIntPtrType(*getContext(), AS);
Entry.Node = Dst; Args.push_back(Entry);
Entry.Node = Src; Args.push_back(Entry);
Entry.Node = Size; Args.push_back(Entry);
@@ -3908,7 +3910,8 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
return Result;
// Emit a library call.
- Type *IntPtrTy = TLI.getDataLayout()->getIntPtrType(*getContext());
+ unsigned AS = DstPtrInfo.getAddrSpace();
+ Type *IntPtrTy = TLI.getDataLayout()->getIntPtrType(*getContext(), AS);
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
Entry.Node = Dst; Entry.Ty = IntPtrTy;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 30a3fc2ca5..6c9d001a1f 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -2604,14 +2604,14 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB;
// Update machine-CFG edges with unique successors.
- SmallVector<BasicBlock*, 32> succs;
- succs.reserve(I.getNumSuccessors());
- for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i)
- succs.push_back(I.getSuccessor(i));
- array_pod_sort(succs.begin(), succs.end());
- succs.erase(std::unique(succs.begin(), succs.end()), succs.end());
- for (unsigned i = 0, e = succs.size(); i != e; ++i) {
- MachineBasicBlock *Succ = FuncInfo.MBBMap[succs[i]];
+ SmallSet<BasicBlock*, 32> Done;
+ for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) {
+ BasicBlock *BB = I.getSuccessor(i);
+ bool Inserted = Done.insert(BB);
+ if (!Inserted)
+ continue;
+
+ MachineBasicBlock *Succ = FuncInfo.MBBMap[BB];
addSuccessorWithWeight(IndirectBrMBB, Succ);
}
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index 94a2542e7a..99f6ec691a 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -645,16 +645,17 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
}
case Instruction::PtrToInt: {
GenericValue GV = getConstantValue(Op0);
- unsigned AS = cast<PointerType>(CE->getOperand(1)->getType())
- ->getAddressSpace();
- uint32_t PtrWidth = TD->getPointerSizeInBits(AS);
+ assert(CE->getOperand(1)->getType()->isPointerTy() &&
+ "Must be a pointer type!");
+ uint32_t PtrWidth = TD->getTypeSizeInBits(CE->getOperand(1)->getType());
GV.IntVal = APInt(PtrWidth, uintptr_t(GV.PointerVal));
return GV;
}
case Instruction::IntToPtr: {
GenericValue GV = getConstantValue(Op0);
- unsigned AS = cast<PointerType>(CE->getType())->getAddressSpace();
- uint32_t PtrWidth = TD->getPointerSizeInBits(AS);
+ assert(CE->getOperand(1)->getType()->isPointerTy() &&
+ "Must be a pointer type!");
+ uint32_t PtrWidth = TD->getTypeSizeInBits(CE->getType());
if (PtrWidth != GV.IntVal.getBitWidth())
GV.IntVal = GV.IntVal.zextOrTrunc(PtrWidth);
assert(GV.IntVal.getBitWidth() <= 64 && "Bad pointer width");
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index c1f8baed1a..ff05c82aec 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -190,7 +190,7 @@ void RuntimeDyldImpl::emitCommonSymbols(ObjectImage &Obj,
if (!Addr)
report_fatal_error("Unable to allocate memory for common symbols!");
uint64_t Offset = 0;
- Sections.push_back(SectionEntry(Addr, TotalSize, TotalSize, 0));
+ Sections.push_back(SectionEntry(StringRef(), Addr, TotalSize, TotalSize, 0));
memset(Addr, 0, TotalSize);
DEBUG(dbgs() << "emitCommonSection SectionID: " << SectionID
@@ -233,10 +233,12 @@ unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj,
bool IsVirtual;
bool IsZeroInit;
uint64_t DataSize;
+ StringRef Name;
Check(Section.isRequiredForExecution(IsRequired));
Check(Section.isVirtual(IsVirtual));
Check(Section.isZeroInit(IsZeroInit));
Check(Section.getSize(DataSize));
+ Check(Section.getName(Name));
unsigned Allocate;
unsigned SectionID = Sections.size();
@@ -264,6 +266,7 @@ unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj,
memcpy(Addr, pData, DataSize);
DEBUG(dbgs() << "emitSection SectionID: " << SectionID
+ << " Name: " << Name
<< " obj addr: " << format("%p", pData)
<< " new addr: " << format("%p", Addr)
<< " DataSize: " << DataSize
@@ -279,6 +282,7 @@ unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj,
Allocate = 0;
Addr = 0;
DEBUG(dbgs() << "emitSection SectionID: " << SectionID
+ << " Name: " << Name
<< " obj addr: " << format("%p", data.data())
<< " new addr: 0"
<< " DataSize: " << DataSize
@@ -287,7 +291,8 @@ unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj,
<< "\n");
}
- Sections.push_back(SectionEntry(Addr, Allocate, DataSize,(uintptr_t)pData));
+ Sections.push_back(SectionEntry(Name, Addr, Allocate, DataSize,
+ (uintptr_t)pData));
return SectionID;
}
@@ -353,6 +358,24 @@ uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr) {
StubAddr++;
*StubAddr = NopInstr;
return Addr;
+ } else if (Arch == Triple::ppc64) {
+ // PowerPC64 stub: the address points to a function descriptor
+ // instead of the function itself. Load the function address
+ // on r11 and sets it to control register. Also loads the function
+ // TOC in r2 and environment pointer to r11.
+ writeInt32BE(Addr, 0x3D800000); // lis r12, highest(addr)
+ writeInt32BE(Addr+4, 0x618C0000); // ori r12, higher(addr)
+ writeInt32BE(Addr+8, 0x798C07C6); // sldi r12, r12, 32
+ writeInt32BE(Addr+12, 0x658C0000); // oris r12, r12, h(addr)
+ writeInt32BE(Addr+16, 0x618C0000); // ori r12, r12, l(addr)
+ writeInt32BE(Addr+20, 0xF8410028); // std r2, 40(r1)
+ writeInt32BE(Addr+24, 0xE96C0000); // ld r11, 0(r12)
+ writeInt32BE(Addr+28, 0xE84C0008); // ld r2, 0(r12)
+ writeInt32BE(Addr+32, 0x7D6903A6); // mtctr r11
+ writeInt32BE(Addr+36, 0xE96C0010); // ld r11, 16(r2)
+ writeInt32BE(Addr+40, 0x4E800420); // bctr
+
+ return Addr;
}
return Addr;
}
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index 08aba64e46..1073c6fc52 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -30,6 +30,14 @@ using namespace llvm::object;
namespace {
+static inline
+error_code check(error_code Err) {
+ if (Err) {
+ report_fatal_error(Err.message());
+ }
+ return Err;
+}
+
template<support::endianness target_endianness, bool is64Bits>
class DyldELFObject : public ELFObjectFile<target_endianness, is64Bits> {
LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits)
@@ -340,6 +348,179 @@ void RuntimeDyldELF::resolveMIPSRelocation(uint8_t *LocalAddress,
}
}
+// Return the .TOC. section address to R_PPC64_TOC relocations.
+uint64_t RuntimeDyldELF::findPPC64TOC() const {
+ // The TOC consists of sections .got, .toc, .tocbss, .plt in that
+ // order. The TOC starts where the first of these sections starts.
+ SectionList::const_iterator it = Sections.begin();
+ SectionList::const_iterator ite = Sections.end();
+ for (; it != ite; ++it) {
+ if (it->Name == ".got" ||
+ it->Name == ".toc" ||
+ it->Name == ".tocbss" ||
+ it->Name == ".plt")
+ break;
+ }
+ if (it == ite) {
+ // This may happen for
+ // * references to TOC base base (sym@toc, .odp relocation) without
+ // a .toc directive.
+ // In this case just use the first section (which is usually
+ // the .odp) since the code won't reference the .toc base
+ // directly.
+ it = Sections.begin();
+ }
+ assert (it != ite);
+ // Per the ppc64-elf-linux ABI, The TOC base is TOC value plus 0x8000
+ // thus permitting a full 64 Kbytes segment.
+ return it->LoadAddress + 0x8000;
+}
+
+// Returns the sections and offset associated with the ODP entry referenced
+// by Symbol.
+void RuntimeDyldELF::findOPDEntrySection(ObjectImage &Obj,
+ ObjSectionToIDMap &LocalSections,
+ RelocationValueRef &Rel) {
+ // Get the ELF symbol value (st_value) to compare with Relocation offset in
+ // .opd entries
+
+ error_code err;
+ for (section_iterator si = Obj.begin_sections(),
+ se = Obj.end_sections(); si != se; si.increment(err)) {
+ StringRef SectionName;
+ check(si->getName(SectionName));
+ if (SectionName != ".opd")
+ continue;
+
+ for (relocation_iterator i = si->begin_relocations(),
+ e = si->end_relocations(); i != e;) {
+ check(err);
+
+ // The R_PPC64_ADDR64 relocation indicates the first field
+ // of a .opd entry
+ uint64_t TypeFunc;
+ check(i->getType(TypeFunc));
+ if (TypeFunc != ELF::R_PPC64_ADDR64) {
+ i.increment(err);
+ continue;
+ }
+
+ SymbolRef TargetSymbol;
+ uint64_t TargetSymbolOffset;
+ int64_t TargetAdditionalInfo;
+ check(i->getSymbol(TargetSymbol));
+ check(i->getOffset(TargetSymbolOffset));
+ check(i->getAdditionalInfo(TargetAdditionalInfo));
+
+ i = i.increment(err);
+ if (i == e)
+ break;
+ check(err);
+
+ // Just check if following relocation is a R_PPC64_TOC
+ uint64_t TypeTOC;
+ check(i->getType(TypeTOC));
+ if (TypeTOC != ELF::R_PPC64_TOC)
+ continue;
+
+ // Finally compares the Symbol value and the target symbol offset
+ // to check if this .opd entry refers to the symbol the relocation
+ // points to.
+ if (Rel.Addend != (intptr_t)TargetSymbolOffset)
+ continue;
+
+ section_iterator tsi(Obj.end_sections());
+ check(TargetSymbol.getSection(tsi));
+ Rel.SectionID = findOrEmitSection(Obj, (*tsi), true, LocalSections);
+ Rel.Addend = (intptr_t)TargetAdditionalInfo;
+ return;
+ }
+ }
+ llvm_unreachable("Attempting to get address of ODP entry!");
+}
+
+// Relocation masks following the #lo(value), #hi(value), #higher(value),
+// and #highest(value) macros defined in section 4.5.1. Relocation Types
+// in PPC-elf64abi document.
+//
+static inline
+uint16_t applyPPClo (uint64_t value)
+{
+ return value & 0xffff;
+}
+
+static inline
+uint16_t applyPPChi (uint64_t value)
+{
+ return (value >> 16) & 0xffff;
+}
+
+static inline
+uint16_t applyPPChigher (uint64_t value)
+{
+ return (value >> 32) & 0xffff;
+}
+
+static inline
+uint16_t applyPPChighest (uint64_t value)
+{
+ return (value >> 48) & 0xffff;
+}
+
+void RuntimeDyldELF::resolvePPC64Relocation(uint8_t *LocalAddress,
+ uint64_t FinalAddress,
+ uint64_t Value,
+ uint32_t Type,
+ int64_t Addend) {
+ switch (Type) {
+ default:
+ llvm_unreachable("Relocation type not implemented yet!");
+ break;
+ case ELF::R_PPC64_ADDR16_LO :
+ writeInt16BE(LocalAddress, applyPPClo (Value + Addend));
+ break;
+ case ELF::R_PPC64_ADDR16_HI :
+ writeInt16BE(LocalAddress, applyPPChi (Value + Addend));
+ break;
+ case ELF::R_PPC64_ADDR16_HIGHER :
+ writeInt16BE(LocalAddress, applyPPChigher (Value + Addend));
+ break;
+ case ELF::R_PPC64_ADDR16_HIGHEST :
+ writeInt16BE(LocalAddress, applyPPChighest (Value + Addend));
+ break;
+ case ELF::R_PPC64_ADDR14 : {
+ assert(((Value + Addend) & 3) == 0);
+ // Preserve the AA/LK bits in the branch instruction
+ uint8_t aalk = *(LocalAddress+3);
+ writeInt16BE(LocalAddress + 2, (aalk & 3) | ((Value + Addend) & 0xfffc));
+ } break;
+ case ELF::R_PPC64_REL24 : {
+ int32_t delta = static_cast<int32_t>(Value - FinalAddress + Addend);
+ if (SignExtend32<24>(delta) != delta)
+ llvm_unreachable("Relocation R_PPC64_REL24 overflow");
+ // Generates a 'bl <address>' instruction
+ writeInt32BE(LocalAddress, 0x48000001 | (delta & 0x03FFFFFC));
+ } break;
+ case ELF::R_PPC64_ADDR64 :
+ writeInt64BE(LocalAddress, Value + Addend);
+ break;
+ case ELF::R_PPC64_TOC :
+ writeInt64BE(LocalAddress, findPPC64TOC());
+ break;
+ case ELF::R_PPC64_TOC16 : {
+ uint64_t TOCStart = findPPC64TOC();
+ Value = applyPPClo((Value + Addend) - TOCStart);
+ writeInt16BE(LocalAddress, applyPPClo(Value));
+ } break;
+ case ELF::R_PPC64_TOC16_DS : {
+ uint64_t TOCStart = findPPC64TOC();
+ Value = ((Value + Addend) - TOCStart);
+ writeInt16BE(LocalAddress, applyPPClo(Value));
+ } break;
+ }
+}
+
+
void RuntimeDyldELF::resolveRelocation(uint8_t *LocalAddress,
uint64_t FinalAddress,
uint64_t Value,
@@ -366,6 +547,9 @@ void RuntimeDyldELF::resolveRelocation(uint8_t *LocalAddress,
(uint32_t)(Value & 0xffffffffL), Type,
(uint32_t)(Addend & 0xffffffffL));
break;
+ case Triple::ppc64:
+ resolvePPC64Relocation(LocalAddress, FinalAddress, Value, Type, Addend);
+ break;
default: llvm_unreachable("Unsupported CPU type!");
}
}
@@ -390,6 +574,8 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
RelocationValueRef Value;
// First search for the symbol in the local symbol table
SymbolTableMap::const_iterator lsi = Symbols.find(TargetName.data());
+ SymbolRef::Type SymType;
+ Symbol.getType(SymType);
if (lsi != Symbols.end()) {
Value.SectionID = lsi->second.first;
Value.Addend = lsi->second.second;
@@ -401,8 +587,6 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
Value.SectionID = gsi->second.first;
Value.Addend = gsi->second.second;
} else {
- SymbolRef::Type SymType;
- Symbol.getType(SymType);
switch (SymType) {
case SymbolRef::ST_Debug: {
// TODO: Now ELF SymbolRef::ST_Debug = STT_SECTION, it's not obviously
@@ -446,7 +630,7 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
SectionEntry &Section = Sections[Rel.SectionID];
uint8_t *Target = Section.Address + Rel.Offset;
- // Look up for existing stub.
+ // Look for an existing stub.
StubMap::const_iterator i = Stubs.find(Value);
if (i != Stubs.end()) {
resolveRelocation(Target, (uint64_t)Target, (uint64_t)Section.Address +
@@ -516,6 +700,93 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
Section.StubOffset, RelType, 0);
Section.StubOffset += getMaxStubSize();
}
+ } else if (Arch == Triple::ppc64) {
+ if (RelType == ELF::R_PPC64_REL24) {
+ // A PPC branch relocation will need a stub function if the target is
+ // an external symbol (Symbol::ST_Unknown) or if the target address
+ // is not within the signed 24-bits branch address.
+ SectionEntry &Section = Sections[Rel.SectionID];
+ uint8_t *Target = Section.Address + Rel.Offset;
+ bool RangeOverflow = false;
+ if (SymType != SymbolRef::ST_Unknown) {
+ // A function call may points to the .opd entry, so the final symbol value
+ // in calculated based in the relocation values in .opd section.
+ findOPDEntrySection(Obj, ObjSectionToID, Value);
+ uint8_t *RelocTarget = Sections[Value.SectionID].Address + Value.Addend;
+ int32_t delta = static_cast<int32_t>(Target - RelocTarget);
+ // If it is within 24-bits branch range, just set the branch target
+ if (SignExtend32<24>(delta) == delta) {
+ RelocationEntry RE(Rel.SectionID, Rel.Offset, RelType, Value.Addend);
+ if (Value.SymbolName)
+ addRelocationForSymbol(RE, Value.SymbolName);
+ else
+ addRelocationForSection(RE, Value.SectionID);
+ } else {
+ RangeOverflow = true;
+ }
+ }
+ if (SymType == SymbolRef::ST_Unknown || RangeOverflow == true) {
+ // It is an external symbol (SymbolRef::ST_Unknown) or within a range
+ // larger than 24-bits.
+ StubMap::const_iterator i = Stubs.find(Value);
+ if (i != Stubs.end()) {
+ // Symbol function stub already created, just relocate to it
+ resolveRelocation(Target, (uint64_t)Target, (uint64_t)Section.Address
+ + i->second, RelType, 0);
+ DEBUG(dbgs() << " Stub function found\n");
+ } else {
+ // Create a new stub function.
+ DEBUG(dbgs() << " Create a new stub function\n");
+ Stubs[Value] = Section.StubOffset;
+ uint8_t *StubTargetAddr = createStubFunction(Section.Address +
+ Section.StubOffset);
+ RelocationEntry RE(Rel.SectionID, StubTargetAddr - Section.Address,
+ ELF::R_PPC64_ADDR64, Value.Addend);
+
+ // Generates the 64-bits address loads as exemplified in section
+ // 4.5.1 in PPC64 ELF ABI.
+ RelocationEntry REhst(Rel.SectionID,
+ StubTargetAddr - Section.Address + 2,
+ ELF::R_PPC64_ADDR16_HIGHEST, Value.Addend);
+ RelocationEntry REhr(Rel.SectionID,
+ StubTargetAddr - Section.Address + 6,
+ ELF::R_PPC64_ADDR16_HIGHER, Value.Addend);
+ RelocationEntry REh(Rel.SectionID,
+ StubTargetAddr - Section.Address + 14,
+ ELF::R_PPC64_ADDR16_HI, Value.Addend);
+ RelocationEntry REl(Rel.SectionID,
+ StubTargetAddr - Section.Address + 18,
+ ELF::R_PPC64_ADDR16_LO, Value.Addend);
+
+ if (Value.SymbolName) {
+ addRelocationForSymbol(REhst, Value.SymbolName);
+ addRelocationForSymbol(REhr, Value.SymbolName);
+ addRelocationForSymbol(REh, Value.SymbolName);
+ addRelocationForSymbol(REl, Value.SymbolName);
+ } else {
+ addRelocationForSection(REhst, Value.SectionID);
+ addRelocationForSection(REhr, Value.SectionID);
+ addRelocationForSection(REh, Value.SectionID);
+ addRelocationForSection(REl, Value.SectionID);
+ }
+
+ resolveRelocation(Target, (uint64_t)Target, (uint64_t)Section.Address
+ + Section.StubOffset, RelType, 0);
+ if (SymType == SymbolRef::ST_Unknown)
+ // Restore the TOC for external calls
+ writeInt32BE(Target+4, 0xE8410028); // ld r2,40(r1)
+ Section.StubOffset += getMaxStubSize();
+ }
+ }
+ } else {
+ RelocationEntry RE(Rel.SectionID, Rel.Offset, RelType, Value.Addend);
+ // Extra check to avoid relocation againt empty symbols (usually
+ // the R_PPC64_TOC).
+ if (Value.SymbolName && !TargetName.empty())
+ addRelocationForSymbol(RE, Value.SymbolName);
+ else
+ addRelocationForSection(RE, Value.SectionID);
+ }
} else {
RelocationEntry RE(Rel.SectionID, Rel.Offset, RelType, Value.Addend);
if (Value.SymbolName)
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
index 3011a06537..6c31f0dc12 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
@@ -46,6 +46,12 @@ protected:
uint32_t Type,
int32_t Addend);
+ void resolvePPC64Relocation(uint8_t *LocalAddress,
+ uint64_t FinalAddress,
+ uint64_t Value,
+ uint32_t Type,
+ int64_t Addend);
+
virtual void resolveRelocation(uint8_t *LocalAddress,
uint64_t FinalAddress,
uint64_t Value,
@@ -60,6 +66,11 @@ protected:
virtual ObjectImage *createObjectImage(ObjectBuffer *InputBuffer);
+ uint64_t findPPC64TOC() const;
+ void findOPDEntrySection(ObjectImage &Obj,
+ ObjSectionToIDMap &LocalSections,
+ RelocationValueRef &Rel);
+
public:
RuntimeDyldELF(RTDyldMemoryManager *mm)
: RuntimeDyldImpl(mm) {}
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
index a9733407ec..45633e735c 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
@@ -24,6 +24,8 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/SwapByteOrder.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/system_error.h"
#include <map>
@@ -41,6 +43,9 @@ class Twine;
/// linker.
class SectionEntry {
public:
+ /// Name - section name.
+ StringRef Name;
+
/// Address - address in the linker's memory where the section resides.
uint8_t *Address;
@@ -61,9 +66,9 @@ public:
/// for calculating relocations in some object formats (like MachO).
uintptr_t ObjAddress;
- SectionEntry(uint8_t *address, size_t size, uintptr_t stubOffset,
- uintptr_t objAddress)
- : Address(address), Size(size), LoadAddress((uintptr_t)address),
+ SectionEntry(StringRef name, uint8_t *address, size_t size,
+ uintptr_t stubOffset, uintptr_t objAddress)
+ : Name(name), Address(address), Size(size), LoadAddress((uintptr_t)address),
StubOffset(stubOffset), ObjAddress(objAddress) {}
};
@@ -163,6 +168,8 @@ protected:
return 8; // 32-bit instruction and 32-bit address
else if (Arch == Triple::mipsel)
return 16;
+ else if (Arch == Triple::ppc64)
+ return 44;
else
return 0;
}
@@ -185,6 +192,35 @@ protected:
return (uint8_t*)Sections[SectionID].Address;
}
+ void writeInt16BE(uint8_t *Addr, uint16_t Value) {
+ if (sys::isLittleEndianHost())
+ Value = sys::SwapByteOrder(Value);
+ *Addr = (Value >> 8) & 0xFF;
+ *(Addr+1) = Value & 0xFF;
+ }
+
+ void writeInt32BE(uint8_t *Addr, uint32_t Value) {
+ if (sys::isLittleEndianHost())
+ Value = sys::SwapByteOrder(Value);
+ *Addr = (Value >> 24) & 0xFF;
+ *(Addr+1) = (Value >> 16) & 0xFF;
+ *(Addr+2) = (Value >> 8) & 0xFF;
+ *(Addr+3) = Value & 0xFF;
+ }
+
+ void writeInt64BE(uint8_t *Addr, uint64_t Value) {
+ if (sys::isLittleEndianHost())
+ Value = sys::SwapByteOrder(Value);
+ *Addr = (Value >> 56) & 0xFF;
+ *(Addr+1) = (Value >> 48) & 0xFF;
+ *(Addr+2) = (Value >> 40) & 0xFF;
+ *(Addr+3) = (Value >> 32) & 0xFF;
+ *(Addr+4) = (Value >> 24) & 0xFF;
+ *(Addr+5) = (Value >> 16) & 0xFF;
+ *(Addr+6) = (Value >> 8) & 0xFF;
+ *(Addr+7) = Value & 0xFF;
+ }
+
/// \brief Given the common symbols discovered in the object file, emit a
/// new section for them and update the symbol mappings in the object and
/// symbol table.
diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp
index 02b4c3c4c1..a94d51bb74 100644
--- a/lib/MC/ELFObjectWriter.cpp
+++ b/lib/MC/ELFObjectWriter.cpp
@@ -133,6 +133,11 @@ class ELFObjectWriter : public MCObjectWriter {
bool IsPCRel) const {
return TargetObjectWriter->ExplicitRelSym(Asm, Target, F, Fixup, IsPCRel);
}
+ const MCSymbol *undefinedExplicitRelSym(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel) const {
+ return TargetObjectWriter->undefinedExplicitRelSym(Target, Fixup, IsPCRel);
+ }
bool is64Bit() const { return TargetObjectWriter->is64Bit(); }
bool hasRelocationAddend() const {
@@ -639,7 +644,7 @@ const MCSymbol *ELFObjectWriter::SymbolToReloc(const MCAssembler &Asm,
if (ASymbol.isUndefined()) {
if (Renamed)
return Renamed;
- return &ASymbol;
+ return undefinedExplicitRelSym(Target, Fixup, IsPCRel);
}
if (SD.isExternal()) {
@@ -721,10 +726,13 @@ void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm,
MCSymbolData &SD = Asm.getSymbolData(ASymbol);
MCFragment *F = SD.getFragment();
- Index = F->getParent()->getOrdinal() + 1;
-
- // Offset of the symbol in the section
- Value += Layout.getSymbolOffset(&SD);
+ if (F) {
+ Index = F->getParent()->getOrdinal() + 1;
+ // Offset of the symbol in the section
+ Value += Layout.getSymbolOffset(&SD);
+ } else {
+ Index = 0;
+ }
} else {
if (Asm.getSymbolData(Symbol).getFlags() & ELF_Other_Weakref)
WeakrefUsedInReloc.insert(RelocSymbol);
diff --git a/lib/MC/MCELFObjectTargetWriter.cpp b/lib/MC/MCELFObjectTargetWriter.cpp
index 6eb6914f4b..74cd042a0f 100644
--- a/lib/MC/MCELFObjectTargetWriter.cpp
+++ b/lib/MC/MCELFObjectTargetWriter.cpp
@@ -9,6 +9,8 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCValue.h"
using namespace llvm;
@@ -35,6 +37,12 @@ const MCSymbol *MCELFObjectTargetWriter::ExplicitRelSym(const MCAssembler &Asm,
return NULL;
}
+const MCSymbol *MCELFObjectTargetWriter::undefinedExplicitRelSym(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel) const {
+ const MCSymbol &Symbol = Target.getSymA()->getSymbol();
+ return &Symbol.AliasedSymbol();
+}
void MCELFObjectTargetWriter::adjustFixupOffset(const MCFixup &Fixup,
uint64_t &RelocOffset) {
diff --git a/lib/MC/MCInstPrinter.cpp b/lib/MC/MCInstPrinter.cpp
index 847bcc0a16..41d90abeeb 100644
--- a/lib/MC/MCInstPrinter.cpp
+++ b/lib/MC/MCInstPrinter.cpp
@@ -36,3 +36,17 @@ void MCInstPrinter::printAnnotation(raw_ostream &OS, StringRef Annot) {
OS << " " << MAI.getCommentString() << " " << Annot;
}
}
+
+/// Utility functions to make adding mark ups simpler.
+StringRef MCInstPrinter::markup(StringRef s) const {
+ if (getUseMarkup())
+ return s;
+ else
+ return "";
+}
+StringRef MCInstPrinter::markup(StringRef a, StringRef b) const {
+ if (getUseMarkup())
+ return a;
+ else
+ return b;
+}
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index b1cc08d56e..a1f0a2a885 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -207,7 +207,7 @@ public:
bool ParseMSInlineAsm(void *AsmLoc, std::string &AsmString,
unsigned &NumOutputs, unsigned &NumInputs,
- SmallVectorImpl<void *> &OpDecls,
+ SmallVectorImpl<std::pair<void *,bool> > &OpDecls,
SmallVectorImpl<std::string> &Constraints,
SmallVectorImpl<std::string> &Clobbers,
const MCInstrInfo *MII,
@@ -3658,7 +3658,8 @@ enum AsmRewriteKind {
AOK_Input,
AOK_Output,
AOK_SizeDirective,
- AOK_Emit
+ AOK_Emit,
+ AOK_Skip
};
struct AsmRewrite {
@@ -3690,14 +3691,16 @@ bool AsmParser::ParseDirectiveEmit(SMLoc IDLoc, ParseStatementInfo &Info) {
bool AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString,
unsigned &NumOutputs, unsigned &NumInputs,
- SmallVectorImpl<void *> &OpDecls,
+ SmallVectorImpl<std::pair<void *, bool> > &OpDecls,
SmallVectorImpl<std::string> &Constraints,
SmallVectorImpl<std::string> &Clobbers,
const MCInstrInfo *MII,
const MCInstPrinter *IP,
MCAsmParserSemaCallback &SI) {
- SmallVector<void*, 4> InputDecls;
- SmallVector<void*, 4> OutputDecls;
+ SmallVector<void *, 4> InputDecls;
+ SmallVector<void *, 4> OutputDecls;
+ SmallVector<bool, 4> InputDeclsOffsetOf;
+ SmallVector<bool, 4> OutputDeclsOffsetOf;
SmallVector<std::string, 4> InputConstraints;
SmallVector<std::string, 4> OutputConstraints;
std::set<std::string> ClobberRegs;
@@ -3725,13 +3728,13 @@ bool AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString,
// Immediate.
if (Operand->isImm()) {
AsmStrRewrites.push_back(AsmRewrite(AOK_Imm,
- Operand->getStartLoc(),
- Operand->getNameLen()));
+ Operand->getStartLoc(),
+ Operand->getNameLen()));
continue;
}
// Register operand.
- if (Operand->isReg()) {
+ if (Operand->isReg() && !Operand->isOffsetOf()) {
unsigned NumDefs = Desc.getNumDefs();
// Clobber.
if (NumDefs && Operand->getMCOperandNum() < NumDefs) {
@@ -3749,26 +3752,33 @@ bool AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString,
Size);
if (OpDecl) {
bool isOutput = (i == 1) && Desc.mayStore();
- if (Operand->needSizeDirective())
+ if (!Operand->isOffsetOf() && Operand->needSizeDirective())
AsmStrRewrites.push_back(AsmRewrite(AOK_SizeDirective,
- Operand->getStartLoc(), 0,
- Operand->getMemSize()));
-
+ Operand->getStartLoc(), 0,
+ Operand->getMemSize()));
+
+ // Don't emit the offset directive.
+ if (Operand->isOffsetOf())
+ AsmStrRewrites.push_back(AsmRewrite(AOK_Skip,
+ Operand->getOffsetOfLoc(), 7));
+
if (isOutput) {
std::string Constraint = "=";
++InputIdx;
OutputDecls.push_back(OpDecl);
+ OutputDeclsOffsetOf.push_back(Operand->isOffsetOf());
Constraint += Operand->getConstraint().str();
OutputConstraints.push_back(Constraint);
AsmStrRewrites.push_back(AsmRewrite(AOK_Output,
- Operand->getStartLoc(),
- Operand->getNameLen()));
+ Operand->getStartLoc(),
+ Operand->getNameLen()));
} else {
InputDecls.push_back(OpDecl);
+ InputDeclsOffsetOf.push_back(Operand->isOffsetOf());
InputConstraints.push_back(Operand->getConstraint().str());
AsmStrRewrites.push_back(AsmRewrite(AOK_Input,
- Operand->getStartLoc(),
- Operand->getNameLen()));
+ Operand->getStartLoc(),
+ Operand->getNameLen()));
}
}
}
@@ -3789,13 +3799,15 @@ bool AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString,
unsigned NumExprs = NumOutputs + NumInputs;
OpDecls.resize(NumExprs);
Constraints.resize(NumExprs);
+ // FIXME: Constraints are hard coded to 'm', but we need an 'r'
+ // constraint for offsetof. This needs to be cleaned up!
for (unsigned i = 0; i < NumOutputs; ++i) {
- OpDecls[i] = OutputDecls[i];
- Constraints[i] = OutputConstraints[i];
+ OpDecls[i] = std::make_pair(OutputDecls[i], OutputDeclsOffsetOf[i]);
+ Constraints[i] = OutputDeclsOffsetOf[i] ? "=r" : OutputConstraints[i];
}
for (unsigned i = 0, j = NumOutputs; i < NumInputs; ++i, ++j) {
- OpDecls[j] = InputDecls[i];
- Constraints[j] = InputConstraints[i];
+ OpDecls[j] = std::make_pair(InputDecls[i], InputDeclsOffsetOf[i]);
+ Constraints[j] = InputDeclsOffsetOf[i] ? "r" : InputConstraints[i];
}
}
@@ -3816,8 +3828,15 @@ bool AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString,
OS << StringRef(Start, Loc - Start);
PrevKind = Kind;
+ // Skip the original expression.
+ if (Kind == AOK_Skip) {
+ Start = Loc + (*I).Len;
+ continue;
+ }
+
// Rewrite expressions in $N notation.
switch (Kind) {
+ default: break;
case AOK_Imm:
OS << Twine("$$") + StringRef(Loc, (*I).Len);
break;
diff --git a/lib/TableGen/Error.cpp b/lib/TableGen/Error.cpp
index 5dd688cb67..ad98fba9ba 100644
--- a/lib/TableGen/Error.cpp
+++ b/lib/TableGen/Error.cpp
@@ -16,6 +16,8 @@
#include "llvm/ADT/Twine.h"
#include "llvm/Support/raw_ostream.h"
+#include <cstdlib>
+
namespace llvm {
SourceMgr SrcMgr;
@@ -63,4 +65,14 @@ void PrintError(const TGError &Error) {
PrintError(Error.getLoc(), Error.getMessage());
}
+void PrintFatalError(const std::string &Msg) {
+ PrintError(Twine(Msg));
+ std::exit(1);
+}
+
+void PrintFatalError(ArrayRef<SMLoc> ErrorLoc, const std::string &Msg) {
+ PrintError(ErrorLoc, Msg);
+ std::exit(1);
+}
+
} // end namespace llvm
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 874ef43b90..bf50081cc7 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -296,15 +296,13 @@ class RegConstraint<string C> {
// ARM specific transformation functions and pattern fragments.
//
-// imm_neg_XFORM - Return a imm value packed into the format described for
-// imm_neg defs below.
+// imm_neg_XFORM - Return the negation of an i32 immediate value.
def imm_neg_XFORM : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(-(int)N->getZExtValue(), MVT::i32);
}]>;
-// so_imm_not_XFORM - Return a so_imm value packed into the format described for
-// so_imm_not def below.
-def so_imm_not_XFORM : SDNodeXForm<imm, [{
+// imm_not_XFORM - Return the complement of a i32 immediate value.
+def imm_not_XFORM : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(~(int)N->getZExtValue(), MVT::i32);
}]>;
@@ -327,7 +325,7 @@ def so_imm_neg : Operand<i32>, PatLeaf<(imm), [{
def so_imm_not_asmoperand : AsmOperandClass { let Name = "ARMSOImmNot"; }
def so_imm_not : Operand<i32>, PatLeaf<(imm), [{
return ARM_AM::getSOImmVal(~(uint32_t)N->getZExtValue()) != -1;
- }], so_imm_not_XFORM> {
+ }], imm_not_XFORM> {
let ParserMatchClass = so_imm_not_asmoperand;
}
@@ -3269,6 +3267,8 @@ def : ARMPat<(ARMaddc GPR:$src, imm0_65535_neg:$imm),
// for part of the negation.
def : ARMPat<(ARMadde GPR:$src, so_imm_not:$imm, CPSR),
(SBCri GPR:$src, so_imm_not:$imm)>;
+def : ARMPat<(ARMadde GPR:$src, imm0_65535_neg:$imm, CPSR),
+ (SBCrr GPR:$src, (MOVi16 (imm_not_XFORM imm:$imm)))>;
// Note: These are implemented in C++ code, because they have to generate
// ADD/SUBrs instructions, which use a complex pattern that a xform function
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 37b280f447..e10f4a865e 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -1953,7 +1953,7 @@ def : T2Pat<(ARMadde rGPR:$src, imm0_255_not:$imm, CPSR),
def : T2Pat<(ARMadde rGPR:$src, t2_so_imm_not:$imm, CPSR),
(t2SBCri rGPR:$src, t2_so_imm_not:$imm)>;
def : T2Pat<(ARMadde rGPR:$src, imm0_65535_neg:$imm, CPSR),
- (t2SBCrr rGPR:$src, (t2MOVi16 (imm_neg_XFORM imm:$imm)))>;
+ (t2SBCrr rGPR:$src, (t2MOVi16 (imm_not_XFORM imm:$imm)))>;
// Select Bytes -- for disassembly only
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index 4c44f69f4d..cb3ac4d1f6 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -155,7 +155,8 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
TargetLowering::ArgListEntry Entry;
// First argument: data pointer
- Type *IntPtrTy = TLI.getDataLayout()->getIntPtrType(*DAG.getContext());
+ unsigned AS = DstPtrInfo.getAddrSpace();
+ Type *IntPtrTy = TLI.getDataLayout()->getIntPtrType(*DAG.getContext(), AS);
Entry.Node = Dst;
Entry.Ty = IntPtrTy;
Args.push_back(Entry);
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 30fa6bc2c7..740548adbc 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -79,7 +79,7 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT,
TLInfo(*this),
TSInfo(*this),
FrameLowering(Subtarget),
- STTI(&TLInfo) {
+ STTI(&TLInfo), VTTI(&TLInfo) {
if (!Subtarget.hasARMOps())
report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not "
"support ARM mode execution!");
@@ -113,7 +113,7 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT,
FrameLowering(Subtarget.hasThumb2()
? new ARMFrameLowering(Subtarget)
: (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)),
- STTI(&TLInfo){
+ STTI(&TLInfo), VTTI(&TLInfo) {
}
namespace {
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 227689f897..ddb38687d0 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -47,7 +47,7 @@ static void printRegImmShift(raw_ostream &O, ARM_AM::ShiftOpc ShOpc,
assert (!(ShOpc == ARM_AM::ror && !ShImm) && "Cannot have ror #0");
O << getShiftOpcStr(ShOpc);
- if (ShOpc != ARM_AM::rrx){
+ if (ShOpc != ARM_AM::rrx) {
O << " ";
if (UseMarkup)
O << "<imm:";
@@ -67,11 +67,9 @@ ARMInstPrinter::ARMInstPrinter(const MCAsmInfo &MAI,
}
void ARMInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
- if (UseMarkup)
- OS << "<reg:";
- OS << getRegisterName(RegNo);
- if (UseMarkup)
- OS << ">";
+ OS << markup("<reg:")
+ << getRegisterName(RegNo)
+ << markup(">");
}
void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
@@ -143,12 +141,10 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
return;
}
- O << ", ";
- if (UseMarkup)
- O << "<imm:";
- O << "#" << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm()));
- if (UseMarkup)
- O << ">";
+ O << ", "
+ << markup("<imm:")
+ << "#" << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm()))
+ << markup(">");
printAnnotation(O, Annot);
return;
}
@@ -332,11 +328,9 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
unsigned Reg = Op.getReg();
printRegName(O, Reg);
} else if (Op.isImm()) {
- if (UseMarkup)
- O << "<imm:";
- O << '#' << Op.getImm();
- if (UseMarkup)
- O << ">";
+ O << markup("<imm:")
+ << '#' << Op.getImm()
+ << markup(">");
} else {
assert(Op.isExpr() && "unknown operand kind in printOperand");
// If a symbolic branch target was added as a constant expression then print
@@ -360,18 +354,9 @@ void ARMInstPrinter::printT2LdrLabelOperand(const MCInst *MI, unsigned OpNum,
if (MO1.isExpr())
O << *MO1.getExpr();
else if (MO1.isImm()) {
- if (UseMarkup)
- O << "<mem:";
- O << "[pc, ";
- if (UseMarkup)
- O << "<imm:";
- O << "#";
- O << MO1.getImm();
- if (UseMarkup)
- O << ">";
- O << "]";
- if (UseMarkup)
- O << ">";
+ O << markup("<mem:") << "[pc, "
+ << markup("<imm:") << "#" << MO1.getImm()
+ << markup(">]>", "]");
}
else
llvm_unreachable("Unknown LDR label operand?");
@@ -424,25 +409,19 @@ void ARMInstPrinter::printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
const MCOperand &MO2 = MI->getOperand(Op+1);
const MCOperand &MO3 = MI->getOperand(Op+2);
- if (UseMarkup)
- O << "<mem:";
- O << "[";
+ O << markup("<mem:") << "[";
printRegName(O, MO1.getReg());
if (!MO2.getReg()) {
if (ARM_AM::getAM2Offset(MO3.getImm())) { // Don't print +0.
- O << ", ";
- if (UseMarkup)
- O << "<imm:";
- O << "#";
- O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()));
- O << ARM_AM::getAM2Offset(MO3.getImm());
- if (UseMarkup)
- O << ">";
+ O << ", "
+ << markup("<imm:")
+ << "#"
+ << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
+ << ARM_AM::getAM2Offset(MO3.getImm())
+ << markup(">");
}
- O << "]";
- if (UseMarkup)
- O << ">";
+ O << "]" << markup(">");
return;
}
@@ -452,45 +431,29 @@ void ARMInstPrinter::printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
printRegImmShift(O, ARM_AM::getAM2ShiftOpc(MO3.getImm()),
ARM_AM::getAM2Offset(MO3.getImm()), UseMarkup);
- O << "]";
- if (UseMarkup)
- O << ">";
+ O << "]" << markup(">");
}
void ARMInstPrinter::printAddrModeTBB(const MCInst *MI, unsigned Op,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(Op);
const MCOperand &MO2 = MI->getOperand(Op+1);
- if (UseMarkup)
- O << "<mem:";
- O << "[";
+ O << markup("<mem:") << "[";
printRegName(O, MO1.getReg());
O << ", ";
printRegName(O, MO2.getReg());
- O << "]";
- if (UseMarkup)
- O << ">";
+ O << "]" << markup(">");
}
void ARMInstPrinter::printAddrModeTBH(const MCInst *MI, unsigned Op,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(Op);
const MCOperand &MO2 = MI->getOperand(Op+1);
- if (UseMarkup)
- O << "<mem:";
- O << "[";
+ O << markup("<mem:") << "[";
printRegName(O, MO1.getReg());
O << ", ";
printRegName(O, MO2.getReg());
- O << ", lsl ";
- if (UseMarkup)
- O << "<imm:";
- O << "#1";
- if (UseMarkup)
- O << ">";
- O << "]";
- if (UseMarkup)
- O << ">";
+ O << ", lsl " << markup("<imm:") << "#1" << markup(">") << "]" << markup(">");
}
void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op,
@@ -520,13 +483,10 @@ void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI,
if (!MO1.getReg()) {
unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm());
- if (UseMarkup)
- O << "<imm:";
- O << '#'
- << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()))
- << ImmOffs;
- if (UseMarkup)
- O << ">";
+ O << markup("<imm:")
+ << '#' << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()))
+ << ImmOffs
+ << markup(">");
return;
}
@@ -547,13 +507,9 @@ void ARMInstPrinter::printAM3PostIndexOp(const MCInst *MI, unsigned Op,
const MCOperand &MO2 = MI->getOperand(Op+1);
const MCOperand &MO3 = MI->getOperand(Op+2);
- if (UseMarkup)
- O << "<mem:";
- O << "[";
+ O << markup("<mem:") << "[";
printRegName(O, MO1.getReg());
- O << "], ";
- if (UseMarkup)
- O << ">";
+ O << "], " << markup(">");
if (MO2.getReg()) {
O << (char)ARM_AM::getAM3Op(MO3.getImm());
@@ -562,13 +518,11 @@ void ARMInstPrinter::printAM3PostIndexOp(const MCInst *MI, unsigned Op,
}
unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm());
- if (UseMarkup)
- O << "<imm:";
- O << '#'
+ O << markup("<imm:")
+ << '#'
<< ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm()))
- << ImmOffs;
- if (UseMarkup)
- O << ">";
+ << ImmOffs
+ << markup(">");
}
void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
@@ -577,18 +531,13 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
const MCOperand &MO2 = MI->getOperand(Op+1);
const MCOperand &MO3 = MI->getOperand(Op+2);
- if (UseMarkup)
- O << "<mem:";
- O << '[';
+ O << markup("<mem:") << '[';
printRegName(O, MO1.getReg());
if (MO2.getReg()) {
- O << ", ";
- O << getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm()));
+ O << ", " << getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm()));
printRegName(O, MO2.getReg());
- O << ']';
- if (UseMarkup)
- O << ">";
+ O << ']' << markup(">");
return;
}
@@ -597,18 +546,14 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
ARM_AM::AddrOpc op = ARM_AM::getAM3Op(MO3.getImm());
if (ImmOffs || (op == ARM_AM::sub)) {
- O << ", ";
- if (UseMarkup)
- O << "<imm:";
- O << "#"
+ O << ", "
+ << markup("<imm:")
+ << "#"
<< ARM_AM::getAddrOpcStr(op)
- << ImmOffs;
- if (UseMarkup)
- O << ">";
+ << ImmOffs
+ << markup(">");
}
- O << ']';
- if (UseMarkup)
- O << ">";
+ O << ']' << markup(">");
}
void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op,
@@ -642,13 +587,9 @@ void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI,
}
unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm());
- if (UseMarkup)
- O << "<imm:";
- O << '#'
- << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm()))
- << ImmOffs;
- if (UseMarkup)
- O << ">";
+ O << markup("<imm:")
+ << '#' << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm())) << ImmOffs
+ << markup(">");
}
void ARMInstPrinter::printPostIdxImm8Operand(const MCInst *MI,
@@ -656,11 +597,9 @@ void ARMInstPrinter::printPostIdxImm8Operand(const MCInst *MI,
raw_ostream &O) {
const MCOperand &MO = MI->getOperand(OpNum);
unsigned Imm = MO.getImm();
- if (UseMarkup)
- O << "<imm:";
- O << '#' << ((Imm & 256) ? "" : "-") << (Imm & 0xff);
- if (UseMarkup)
- O << ">";
+ O << markup("<imm:")
+ << '#' << ((Imm & 256) ? "" : "-") << (Imm & 0xff)
+ << markup(">");
}
void ARMInstPrinter::printPostIdxRegOperand(const MCInst *MI, unsigned OpNum,
@@ -677,11 +616,9 @@ void ARMInstPrinter::printPostIdxImm8s4Operand(const MCInst *MI,
raw_ostream &O) {
const MCOperand &MO = MI->getOperand(OpNum);
unsigned Imm = MO.getImm();
- if (UseMarkup)
- O << "<imm:";
- O << '#' << ((Imm & 256) ? "" : "-") << ((Imm & 0xff) << 2);
- if (UseMarkup)
- O << ">";
+ O << markup("<imm:")
+ << '#' << ((Imm & 256) ? "" : "-") << ((Imm & 0xff) << 2)
+ << markup(">");
}
@@ -702,26 +639,20 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
return;
}
- if (UseMarkup)
- O << "<mem:";
- O << "[";
+ O << markup("<mem:") << "[";
printRegName(O, MO1.getReg());
unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm());
unsigned Op = ARM_AM::getAM5Op(MO2.getImm());
if (ImmOffs || Op == ARM_AM::sub) {
- O << ", ";
- if (UseMarkup)
- O << "<imm:";
- O << "#"
+ O << ", "
+ << markup("<imm:")
+ << "#"
<< ARM_AM::getAddrOpcStr(ARM_AM::getAM5Op(MO2.getImm()))
- << ImmOffs * 4;
- if (UseMarkup)
- O << ">";
+ << ImmOffs * 4
+ << markup(">");
}
- O << "]";
- if (UseMarkup)
- O << ">";
+ O << "]" << markup(">");
}
void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum,
@@ -729,29 +660,21 @@ void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum,
const MCOperand &MO1 = MI->getOperand(OpNum);
const MCOperand &MO2 = MI->getOperand(OpNum+1);
- if (UseMarkup)
- O << "<mem:";
- O << "[";
+ O << markup("<mem:") << "[";
printRegName(O, MO1.getReg());
if (MO2.getImm()) {
// FIXME: Both darwin as and GNU as violate ARM docs here.
O << ", :" << (MO2.getImm() << 3);
}
- O << "]";
- if (UseMarkup)
- O << ">";
+ O << "]" << markup(">");
}
void ARMInstPrinter::printAddrMode7Operand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(OpNum);
- if (UseMarkup)
- O << "<mem:";
- O << "[";
+ O << markup("<mem:") << "[";
printRegName(O, MO1.getReg());
- O << "]";
- if (UseMarkup)
- O << ">";
+ O << "]" << markup(">");
}
void ARMInstPrinter::printAddrMode6OffsetOperand(const MCInst *MI,
@@ -774,17 +697,9 @@ void ARMInstPrinter::printBitfieldInvMaskImmOperand(const MCInst *MI,
int32_t lsb = CountTrailingZeros_32(v);
int32_t width = (32 - CountLeadingZeros_32 (v)) - lsb;
assert(MO.isImm() && "Not a valid bf_inv_mask_imm value!");
- if (UseMarkup)
- O << "<imm:";
- O << '#' << lsb;
- if (UseMarkup)
- O << ">";
- O << ", ";
- if (UseMarkup)
- O << "<imm:";
- O << '#' << width;
- if (UseMarkup)
- O << ">";
+ O << markup("<imm:") << '#' << lsb << markup(">")
+ << ", "
+ << markup("<imm:") << '#' << width << markup(">");
}
void ARMInstPrinter::printMemBOption(const MCInst *MI, unsigned OpNum,
@@ -799,20 +714,16 @@ void ARMInstPrinter::printShiftImmOperand(const MCInst *MI, unsigned OpNum,
bool isASR = (ShiftOp & (1 << 5)) != 0;
unsigned Amt = ShiftOp & 0x1f;
if (isASR) {
- O << ", asr ";
- if (UseMarkup)
- O << "<imm:";
- O << "#" << (Amt == 0 ? 32 : Amt);
- if (UseMarkup)
- O << ">";
+ O << ", asr "
+ << markup("<imm:")
+ << "#" << (Amt == 0 ? 32 : Amt)
+ << markup(">");
}
else if (Amt) {
- O << ", lsl ";
- if (UseMarkup)
- O << "<imm:";
- O << "#" << Amt;
- if (UseMarkup)
- O << ">";
+ O << ", lsl "
+ << markup("<imm:")
+ << "#" << Amt
+ << markup(">");
}
}
@@ -822,12 +733,7 @@ void ARMInstPrinter::printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum,
if (Imm == 0)
return;
assert(Imm > 0 && Imm < 32 && "Invalid PKH shift immediate value!");
- O << ", lsl ";
- if (UseMarkup)
- O << "<imm:";
- O << "#" << Imm;
- if (UseMarkup)
- O << ">";
+ O << ", lsl " << markup("<imm:") << "#" << Imm << markup(">");
}
void ARMInstPrinter::printPKHASRShiftImm(const MCInst *MI, unsigned OpNum,
@@ -837,12 +743,7 @@ void ARMInstPrinter::printPKHASRShiftImm(const MCInst *MI, unsigned OpNum,
if (Imm == 0)
Imm = 32;
assert(Imm > 0 && Imm <= 32 && "Invalid PKH shift immediate value!");
- O << ", asr ";
- if (UseMarkup)
- O << "<imm:";
- O << "#" << Imm;
- if (UseMarkup)
- O << ">";
+ O << ", asr " << markup("<imm:") << "#" << Imm << markup(">");
}
void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum,
@@ -1024,35 +925,29 @@ void ARMInstPrinter::printAdrLabelOperand(const MCInst *MI, unsigned OpNum,
int32_t OffImm = (int32_t)MO.getImm();
- if (UseMarkup)
- O << "<imm:";
+ O << markup("<imm:");
if (OffImm == INT32_MIN)
O << "#-0";
else if (OffImm < 0)
O << "#-" << -OffImm;
else
O << "#" << OffImm;
- if (UseMarkup)
- O << ">";
+ O << markup(">");
}
void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
- if (UseMarkup)
- O << "<imm:";
- O << "#" << MI->getOperand(OpNum).getImm() * 4;
- if (UseMarkup)
- O << ">";
+ O << markup("<imm:")
+ << "#" << MI->getOperand(OpNum).getImm() * 4
+ << markup(">");
}
void ARMInstPrinter::printThumbSRImm(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
unsigned Imm = MI->getOperand(OpNum).getImm();
- if (UseMarkup)
- O << "<imm:";
- O << "#" << (Imm == 0 ? 32 : Imm);
- if (UseMarkup)
- O << ">";
+ O << markup("<imm:")
+ << "#" << (Imm == 0 ? 32 : Imm)
+ << markup(">");
}
void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum,
@@ -1082,17 +977,13 @@ void ARMInstPrinter::printThumbAddrModeRROperand(const MCInst *MI, unsigned Op,
return;
}
- if (UseMarkup)
- O << "<mem:";
- O << "[";
+ O << markup("<mem:") << "[";
printRegName(O, MO1.getReg());
if (unsigned RegNum = MO2.getReg()) {
O << ", ";
printRegName(O, RegNum);
}
- O << "]";
- if (UseMarkup)
- O << ">";
+ O << "]" << markup(">");
}
void ARMInstPrinter::printThumbAddrModeImm5SOperand(const MCInst *MI,
@@ -1107,21 +998,15 @@ void ARMInstPrinter::printThumbAddrModeImm5SOperand(const MCInst *MI,
return;
}
- if (UseMarkup)
- O << "<mem:";
- O << "[";
+ O << markup("<mem:") << "[";
printRegName(O, MO1.getReg());
if (unsigned ImmOffs = MO2.getImm()) {
- O << ", ";
- if (UseMarkup)
- O << "<imm:";
- O << "#" << ImmOffs * Scale;
- if (UseMarkup)
- O << ">";
+ O << ", "
+ << markup("<imm:")
+ << "#" << ImmOffs * Scale
+ << markup(">");
}
- O << "]";
- if (UseMarkup)
- O << ">";
+ O << "]" << markup(">");
}
void ARMInstPrinter::printThumbAddrModeImm5S1Operand(const MCInst *MI,
@@ -1175,9 +1060,7 @@ void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
return;
}
- if (UseMarkup)
- O << "<mem:";
- O << "[";
+ O << markup("<mem:") << "[";
printRegName(O, MO1.getReg());
int32_t OffImm = (int32_t)MO2.getImm();
@@ -1186,24 +1069,18 @@ void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
if (OffImm == INT32_MIN)
OffImm = 0;
if (isSub) {
- O << ", ";
- if (UseMarkup)
- O << "<imm:";
- O << "#-" << -OffImm;
- if (UseMarkup)
- O << ">";
+ O << ", "
+ << markup("<imm:")
+ << "#-" << -OffImm
+ << markup(">");
}
else if (OffImm > 0) {
- O << ", ";
- if (UseMarkup)
- O << "<imm:";
- O << "#" << OffImm;
- if (UseMarkup)
- O << ">";
+ O << ", "
+ << markup("<imm:")
+ << "#" << OffImm
+ << markup(">");
}
- O << "]";
- if (UseMarkup)
- O << ">";
+ O << "]" << markup(">");
}
void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI,
@@ -1212,9 +1089,7 @@ void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI,
const MCOperand &MO1 = MI->getOperand(OpNum);
const MCOperand &MO2 = MI->getOperand(OpNum+1);
- if (UseMarkup)
- O << "<mem:";
- O << "[";
+ O << markup("<mem:") << "[";
printRegName(O, MO1.getReg());
int32_t OffImm = (int32_t)MO2.getImm();
@@ -1231,9 +1106,7 @@ void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI,
O << "#" << OffImm;
if (OffImm != 0 && UseMarkup)
O << ">";
- O << "]";
- if (UseMarkup)
- O << ">";
+ O << "]" << markup(">");
}
void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI,
@@ -1247,9 +1120,7 @@ void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI,
return;
}
- if (UseMarkup)
- O << "<mem:";
- O << "[";
+ O << markup("<mem:") << "[";
printRegName(O, MO1.getReg());
int32_t OffImm = (int32_t)MO2.getImm();
@@ -1269,9 +1140,7 @@ void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI,
O << "#" << OffImm;
if (OffImm != 0 && UseMarkup)
O << ">";
- O << "]";
- if (UseMarkup)
- O << ">";
+ O << "]" << markup(">");
}
void ARMInstPrinter::printT2AddrModeImm0_1020s4Operand(const MCInst *MI,
@@ -1280,21 +1149,15 @@ void ARMInstPrinter::printT2AddrModeImm0_1020s4Operand(const MCInst *MI,
const MCOperand &MO1 = MI->getOperand(OpNum);
const MCOperand &MO2 = MI->getOperand(OpNum+1);
- if (UseMarkup)
- O << "<mem:";
- O << "[";
+ O << markup("<mem:") << "[";
printRegName(O, MO1.getReg());
if (MO2.getImm()) {
- O << ", ";
- if (UseMarkup)
- O << "<imm:";
- O << "#" << MO2.getImm() * 4;
- if (UseMarkup)
- O << ">";
+ O << ", "
+ << markup("<imm:")
+ << "#" << MO2.getImm() * 4
+ << markup(">");
}
- O << "]";
- if (UseMarkup)
- O << ">";
+ O << "]" << markup(">");
}
void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI,
@@ -1302,15 +1165,12 @@ void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(OpNum);
int32_t OffImm = (int32_t)MO1.getImm();
- O << ", ";
- if (UseMarkup)
- O << "<imm:";
+ O << ", " << markup("<imm:");
if (OffImm < 0)
O << "#-" << -OffImm;
else
O << "#" << OffImm;
- if (UseMarkup)
- O << ">";
+ O << markup(">");
}
void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI,
@@ -1343,9 +1203,7 @@ void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI,
const MCOperand &MO2 = MI->getOperand(OpNum+1);
const MCOperand &MO3 = MI->getOperand(OpNum+2);
- if (UseMarkup)
- O << "<mem:";
- O << "[";
+ O << markup("<mem:") << "[";
printRegName(O, MO1.getReg());
assert(MO2.getReg() && "Invalid so_reg load / store address!");
@@ -1355,26 +1213,20 @@ void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI,
unsigned ShAmt = MO3.getImm();
if (ShAmt) {
assert(ShAmt <= 3 && "Not a valid Thumb2 addressing mode!");
- O << ", lsl ";
- if (UseMarkup)
- O << "<imm:";
- O << "#" << ShAmt;
- if (UseMarkup)
- O << ">";
+ O << ", lsl "
+ << markup("<imm:")
+ << "#" << ShAmt
+ << markup(">");
}
- O << "]";
- if (UseMarkup)
- O << ">";
+ O << "]" << markup(">");
}
void ARMInstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
const MCOperand &MO = MI->getOperand(OpNum);
- if (UseMarkup)
- O << "<imm:";
- O << '#' << ARM_AM::getFPImmFloat(MO.getImm());
- if (UseMarkup)
- O << ">";
+ O << markup("<imm:")
+ << '#' << ARM_AM::getFPImmFloat(MO.getImm())
+ << markup(">");
}
void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum,
@@ -1382,22 +1234,18 @@ void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum,
unsigned EncodedImm = MI->getOperand(OpNum).getImm();
unsigned EltBits;
uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits);
- if (UseMarkup)
- O << "<imm:";
- O << "#0x";
+ O << markup("<imm:")
+ << "#0x";
O.write_hex(Val);
- if (UseMarkup)
- O << ">";
+ O << markup(">");
}
void ARMInstPrinter::printImmPlusOneOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
unsigned Imm = MI->getOperand(OpNum).getImm();
- if (UseMarkup)
- O << "<imm:";
- O << "#" << Imm + 1;
- if (UseMarkup)
- O << ">";
+ O << markup("<imm:")
+ << "#" << Imm + 1
+ << markup(">");
}
void ARMInstPrinter::printRotImmOperand(const MCInst *MI, unsigned OpNum,
@@ -1405,45 +1253,35 @@ void ARMInstPrinter::printRotImmOperand(const MCInst *MI, unsigned OpNum,
unsigned Imm = MI->getOperand(OpNum).getImm();
if (Imm == 0)
return;
- O << ", ror ";
- if (UseMarkup)
- O << "<imm:";
- O << "#";
+ O << ", ror "
+ << markup("<imm:")
+ << "#";
switch (Imm) {
default: assert (0 && "illegal ror immediate!");
case 1: O << "8"; break;
case 2: O << "16"; break;
case 3: O << "24"; break;
}
- if (UseMarkup)
- O << ">";
+ O << markup(">");
}
void ARMInstPrinter::printFBits16(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
- if (UseMarkup)
- O << "<imm:";
- O << "#" << 16 - MI->getOperand(OpNum).getImm();
- if (UseMarkup)
- O << ">";
+ O << markup("<imm:")
+ << "#" << 16 - MI->getOperand(OpNum).getImm()
+ << markup(">");
}
void ARMInstPrinter::printFBits32(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
- if (UseMarkup)
- O << "<imm:";
- O << "#" << 32 - MI->getOperand(OpNum).getImm();
- if (UseMarkup)
- O << ">";
+ O << markup("<imm:")
+ << "#" << 32 - MI->getOperand(OpNum).getImm()
+ << markup(">");
}
void ARMInstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
- if (UseMarkup)
- O << "<mem:";
O << "[" << MI->getOperand(OpNum).getImm() << "]";
- if (UseMarkup)
- O << ">";
}
void ARMInstPrinter::printVectorListOne(const MCInst *MI, unsigned OpNum,
diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp
index e92ad01e1d..918316572a 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.cpp
+++ b/lib/Target/CellSPU/SPUTargetMachine.cpp
@@ -44,7 +44,7 @@ SPUTargetMachine::SPUTargetMachine(const Target &T, StringRef TT,
TLInfo(*this),
TSInfo(*this),
InstrItins(Subtarget.getInstrItineraryData()),
- STTI(&TLInfo){
+ STTI(&TLInfo), VTTI(&TLInfo) {
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 353542a809..30866e9eeb 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -75,7 +75,7 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT,
TSInfo(*this),
FrameLowering(Subtarget),
InstrItins(&Subtarget.getInstrItineraryData()),
- STTI(&TLInfo) {
+ STTI(&TLInfo), VTTI(&TLInfo) {
setMCUseCFI(false);
}
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
index cb5f46062d..1ae2baa198 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
@@ -42,7 +42,8 @@ MBlazeTargetMachine(const Target &T, StringRef TT,
InstrInfo(*this),
FrameLowering(Subtarget),
TLInfo(*this), TSInfo(*this), ELFWriterInfo(*this),
- InstrItins(Subtarget.getInstrItineraryData()), STTI(&TLInfo) {
+ InstrItins(Subtarget.getInstrItineraryData()),
+ STTI(&TLInfo), VTTI(&TLInfo) {
}
namespace {
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index 29ea681216..13e37b3735 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -36,7 +36,7 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T,
// FIXME: Check DataLayout string.
DL("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"),
InstrInfo(*this), TLInfo(*this), TSInfo(*this),
- FrameLowering(Subtarget), STTI(&TLInfo) { }
+ FrameLowering(Subtarget), STTI(&TLInfo), VTTI(&TLInfo) { }
namespace {
/// MSP430 Code Generator Pass Configuration Options.
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index da6a8d2a67..ae89cdd693 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -3601,10 +3601,11 @@ MipsTargetLowering::LowerReturn(SDValue Chain,
if (!Reg)
llvm_unreachable("sret virtual register not created in the entry block");
SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
+ unsigned V0 = IsN64 ? Mips::V0_64 : Mips::V0;
- Chain = DAG.getCopyToReg(Chain, dl, IsN64 ? Mips::V0_64 : Mips::V0, Val,
- Flag);
+ Chain = DAG.getCopyToReg(Chain, dl, V0, Val, Flag);
Flag = Chain.getValue(1);
+ MF.getRegInfo().addLiveOut(V0);
}
// Return on Mips is always a "jr $ra"
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index 5e8062373f..f610253f49 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -53,7 +53,7 @@ MipsTargetMachine(const Target &T, StringRef TT,
InstrInfo(MipsInstrInfo::create(*this)),
FrameLowering(MipsFrameLowering::create(*this, Subtarget)),
TLInfo(*this), TSInfo(*this), JITInfo(),
- ELFWriterInfo(false, isLittle), STTI(&TLInfo) {
+ ELFWriterInfo(false, isLittle), STTI(&TLInfo), VTTI(&TLInfo) {
}
void MipsebTargetMachine::anchor() { }
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index 60822d0c05..a62db327e5 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -40,7 +40,7 @@ class MipsTargetMachine : public LLVMTargetMachine {
MipsJITInfo JITInfo;
MipsELFWriterInfo ELFWriterInfo;
ScalarTargetTransformImpl STTI;
- VectorTargetTransformInfo VTTI;
+ VectorTargetTransformImpl VTTI;
public:
MipsTargetMachine(const Target &T, StringRef TT,
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index c46094569e..971d1b89a8 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -126,10 +126,9 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
return Base;
// Truncate/sext the offset to the pointer size.
- unsigned AS = PtrVal->getType()->isPointerTy() ?
- cast<PointerType>(PtrVal->getType())->getAddressSpace() : 0;
- if (TD.getPointerSizeInBits(AS) != 64) {
- int SExtAmount = 64-TD.getPointerSizeInBits(AS);
+ unsigned PtrSize = TD.getPointerTypeSizeInBits(PtrVal->getType());
+ if (PtrSize != 64) {
+ int SExtAmount = 64-PtrSize;
Offset = (Offset << SExtAmount) >> SExtAmount;
}
@@ -151,7 +150,7 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
// Handle casts to pointers by changing them into casts to the appropriate
// integer type. This promotes constant folding and simplifies this code.
Constant *Op = CE->getOperand(0);
- Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()),
+ Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CE->getType()),
false/*ZExt*/);
return LowerConstant(Op, AP);
}
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 7519b4a083..cbb490003d 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -73,7 +73,7 @@ NVPTXTargetMachine::NVPTXTargetMachine(const Target &T,
Subtarget(TT, CPU, FS, is64bit),
DL(Subtarget.getDataLayout()),
InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(*this,is64bit),
- STTI(&TLInfo)
+ STTI(&TLInfo), VTTI(&TLInfo)
/*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ {
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index 1744738622..87ecb13a4c 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -29,9 +29,14 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
case FK_Data_1:
case FK_Data_2:
case FK_Data_4:
+ case FK_Data_8:
+ case PPC::fixup_ppc_toc:
return Value;
+ case PPC::fixup_ppc_lo14:
+ case PPC::fixup_ppc_toc16_ds:
+ return (Value & 0xffff) << 2;
case PPC::fixup_ppc_brcond14:
- return Value & 0x3ffc;
+ return Value & 0xfffc;
case PPC::fixup_ppc_br24:
return Value & 0x3fffffc;
#if 0
@@ -41,6 +46,7 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
case PPC::fixup_ppc_ha16:
return ((Value >> 16) + ((Value & 0x8000) ? 1 : 0)) & 0xffff;
case PPC::fixup_ppc_lo16:
+ case PPC::fixup_ppc_toc16:
return Value & 0xffff;
}
}
@@ -72,7 +78,10 @@ public:
{ "fixup_ppc_brcond14", 16, 14, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_ppc_lo16", 16, 16, 0 },
{ "fixup_ppc_ha16", 16, 16, 0 },
- { "fixup_ppc_lo14", 16, 14, 0 }
+ { "fixup_ppc_lo14", 16, 14, 0 },
+ { "fixup_ppc_toc", 0, 64, 0 },
+ { "fixup_ppc_toc16", 16, 16, 0 },
+ { "fixup_ppc_toc16_ds", 16, 14, 0 }
};
if (Kind < FirstTargetFixupKind)
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index a19798157b..1518a60db8 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -11,6 +11,8 @@
#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCValue.h"
using namespace llvm;
@@ -21,9 +23,15 @@ namespace {
virtual ~PPCELFObjectWriter();
protected:
+ virtual unsigned getRelocTypeInner(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel) const;
virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
bool IsPCRel, bool IsRelocWithSymbol,
int64_t Addend) const;
+ virtual const MCSymbol *undefinedExplicitRelSym(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel) const;
virtual void adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset);
};
}
@@ -36,11 +44,13 @@ PPCELFObjectWriter::PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI)
PPCELFObjectWriter::~PPCELFObjectWriter() {
}
-unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target,
- const MCFixup &Fixup,
- bool IsPCRel,
- bool IsRelocWithSymbol,
- int64_t Addend) const {
+unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel) const
+{
+ MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ?
+ MCSymbolRefExpr::VK_None : Target.getSymA()->getKind();
+
// determine the type of the relocation
unsigned Type;
if (IsPCRel) {
@@ -61,7 +71,7 @@ unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target,
Type = ELF::R_PPC_ADDR24;
break;
case PPC::fixup_ppc_brcond14:
- Type = ELF::R_PPC_ADDR14_BRTAKEN; // XXX: or BRNTAKEN?_
+ Type = ELF::R_PPC_ADDR14; // XXX: or BRNTAKEN?_
break;
case PPC::fixup_ppc_ha16:
Type = ELF::R_PPC_ADDR16_HA;
@@ -72,6 +82,26 @@ unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target,
case PPC::fixup_ppc_lo14:
Type = ELF::R_PPC_ADDR14;
break;
+ case PPC::fixup_ppc_toc:
+ Type = ELF::R_PPC64_TOC;
+ break;
+ case PPC::fixup_ppc_toc16:
+ Type = ELF::R_PPC64_TOC16;
+ break;
+ case PPC::fixup_ppc_toc16_ds:
+ Type = ELF::R_PPC64_TOC16_DS;
+ break;
+ case FK_Data_8:
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_PPC_TOC:
+ Type = ELF::R_PPC64_TOC;
+ break;
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_PPC64_ADDR64;
+ break;
+ }
+ break;
case FK_Data_4:
Type = ELF::R_PPC_ADDR32;
break;
@@ -83,11 +113,41 @@ unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target,
return Type;
}
+unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel,
+ bool IsRelocWithSymbol,
+ int64_t Addend) const {
+ return getRelocTypeInner(Target, Fixup, IsPCRel);
+}
+
+const MCSymbol *PPCELFObjectWriter::undefinedExplicitRelSym(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel) const {
+ assert(Target.getSymA() && "SymA cannot be 0");
+ const MCSymbol &Symbol = Target.getSymA()->getSymbol().AliasedSymbol();
+
+ unsigned RelocType = getRelocTypeInner(Target, Fixup, IsPCRel);
+
+ // The .odp creation emits a relocation against the symbol ".TOC." which
+ // create a R_PPC64_TOC relocation. However the relocation symbol name
+ // in final object creation should be NULL, since the symbol does not
+ // really exist, it is just the reference to TOC base for the current
+ // object file.
+ bool EmitThisSym = RelocType != ELF::R_PPC64_TOC;
+
+ if (EmitThisSym && !Symbol.isTemporary())
+ return &Symbol;
+ return NULL;
+}
+
void PPCELFObjectWriter::
adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset) {
switch ((unsigned)Fixup.getKind()) {
case PPC::fixup_ppc_ha16:
case PPC::fixup_ppc_lo16:
+ case PPC::fixup_ppc_toc16:
+ case PPC::fixup_ppc_toc16_ds:
RelocOffset += 2;
break;
default:
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
index b3c889e3f8..37b265e7fd 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
@@ -34,6 +34,16 @@ enum Fixups {
/// fixup_ppc_lo14 - A 14-bit fixup corresponding to lo16(_foo) for instrs
/// like 'std'.
fixup_ppc_lo14,
+
+ /// fixup_ppc_toc - Insert value of TOC base (.TOC.).
+ fixup_ppc_toc,
+
+ /// fixup_ppc_toc16 - A 16-bit signed fixup relative to the TOC base.
+ fixup_ppc_toc16,
+
+ /// fixup_ppc_toc16_ds - A 14-bit signed fixup relative to the TOC base with
+ /// implied 2 zero bits
+ fixup_ppc_toc16_ds,
// Marker
LastTargetFixupKind,
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 1fba5b8dc3..21183024a5 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -15,7 +15,9 @@
#include "MCTargetDesc/PPCBaseInfo.h"
#include "MCTargetDesc/PPCFixupKinds.h"
#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/ErrorHandling.h"
@@ -28,13 +30,25 @@ class PPCMCCodeEmitter : public MCCodeEmitter {
PPCMCCodeEmitter(const PPCMCCodeEmitter &) LLVM_DELETED_FUNCTION;
void operator=(const PPCMCCodeEmitter &) LLVM_DELETED_FUNCTION;
+ const MCSubtargetInfo &STI;
+ Triple TT;
+
public:
PPCMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
- MCContext &ctx) {
+ MCContext &ctx)
+ : STI(sti), TT(STI.getTargetTriple()) {
}
~PPCMCCodeEmitter() {}
+ bool is64BitMode() const {
+ return (STI.getFeatureBits() & PPC::Feature64Bit) != 0;
+ }
+
+ bool isSVR4ABI() const {
+ return TT.isMacOSX() == 0;
+ }
+
unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const;
unsigned getCondBrEncoding(const MCInst &MI, unsigned OpNo,
@@ -61,11 +75,19 @@ public:
SmallVectorImpl<MCFixup> &Fixups) const;
void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups) const {
- unsigned Bits = getBinaryCodeForInstr(MI, Fixups);
+ uint64_t Bits = getBinaryCodeForInstr(MI, Fixups);
+
+ // BL8_NOPELF and BLA8_NOP_ELF is both size of 8 bacause of the
+ // following 'nop'.
+ unsigned Size = 4; // FIXME: Have Desc.getSize() return the correct value!
+ unsigned Opcode = MI.getOpcode();
+ if (Opcode == PPC::BL8_NOP_ELF || Opcode == PPC::BLA8_NOP_ELF)
+ Size = 8;
// Output the constant in big endian byte order.
- for (unsigned i = 0; i != 4; ++i) {
- OS << (char)(Bits >> 24);
+ int ShiftValue = (Size * 8) - 8;
+ for (unsigned i = 0; i != Size; ++i) {
+ OS << (char)(Bits >> ShiftValue);
Bits <<= 8;
}
@@ -140,8 +162,12 @@ unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo,
return (getMachineOpValue(MI, MO, Fixups) & 0xFFFF) | RegBits;
// Add a fixup for the displacement field.
- Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
- (MCFixupKind)PPC::fixup_ppc_lo16));
+ if (isSVR4ABI() && is64BitMode())
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_toc16));
+ else
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_lo16));
return RegBits;
}
@@ -158,8 +184,12 @@ unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
return (getMachineOpValue(MI, MO, Fixups) & 0x3FFF) | RegBits;
// Add a fixup for the branch target.
- Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
- (MCFixupKind)PPC::fixup_ppc_lo14));
+ if (isSVR4ABI() && is64BitMode())
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_toc16_ds));
+ else
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_lo14));
return RegBits;
}
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index d8abd9fba0..6941413ed4 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -420,10 +420,14 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
OutStreamer.EmitValueToAlignment(8);
MCSymbol *Symbol1 =
OutContext.GetOrCreateSymbol(".L." + Twine(CurrentFnSym->getName()));
- MCSymbol *Symbol2 = OutContext.GetOrCreateSymbol(StringRef(".TOC.@tocbase"));
+ // Generates a R_PPC64_ADDR64 (from FK_DATA_8) relocation for the function
+ // entry point.
OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol1, OutContext),
8/*size*/, 0/*addrspace*/);
- OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol2, OutContext),
+ MCSymbol *Symbol2 = OutContext.GetOrCreateSymbol(StringRef(".TOC."));
+ // Generates a R_PPC64_TOC relocation for TOC base insertion.
+ OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol2,
+ MCSymbolRefExpr::VK_PPC_TOC, OutContext),
8/*size*/, 0/*addrspace*/);
// Emit a null environment pointer.
OutStreamer.EmitIntValue(0, 8 /* size */, 0 /* addrspace */);
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index b93d50326a..de0d66124b 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1498,9 +1498,10 @@ SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
bool isPPC64 = (PtrVT == MVT::i64);
+ unsigned AS = 0;
Type *IntPtrTy =
DAG.getTargetLoweringInfo().getDataLayout()->getIntPtrType(
- *DAG.getContext());
+ *DAG.getContext(), AS);
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
@@ -2083,25 +2084,42 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
InVals.push_back(FIN);
- if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
+
+ if (ObjSize < 8) {
if (GPR_idx != Num_GPR_Regs) {
- unsigned VReg;
- VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
- EVT ObjType = (ObjSize == 1 ? MVT::i8 :
- (ObjSize == 2 ? MVT::i16 : MVT::i32));
- SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo(FuncArg,
- CurArgOffset),
- ObjType, false, false, 0);
+ SDValue Store;
+
+ if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
+ EVT ObjType = (ObjSize == 1 ? MVT::i8 :
+ (ObjSize == 2 ? MVT::i16 : MVT::i32));
+ Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(FuncArg, CurArgOffset),
+ ObjType, false, false, 0);
+ } else {
+ // For sizes that don't fit a truncating store (3, 5, 6, 7),
+ // store the whole register as-is to the parameter save area
+ // slot. The address of the parameter was already calculated
+ // above (InVals.push_back(FIN)) to be the right-justified
+ // offset within the slot. For this store, we need a new
+ // frame index that points at the beginning of the slot.
+ int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(FuncArg, ArgOffset),
+ false, false, 0);
+ }
+
MemOps.push_back(Store);
++GPR_idx;
}
-
+ // Whether we copied from a register or not, advance the offset
+ // into the parameter save area by a full doubleword.
ArgOffset += PtrByteSize;
-
continue;
}
+
for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
// Store whatever pieces of the object are in registers
// to memory. ArgOffset will be the address of the beginning
@@ -2112,16 +2130,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
- SDValue Shifted = Val;
-
- // For 64-bit SVR4, small structs come in right-adjusted.
- // Shift them left so the following logic works as expected.
- if (ObjSize < 8) {
- SDValue ShiftAmt = DAG.getConstant(64 - 8 * ObjSize, PtrVT);
- Shifted = DAG.getNode(ISD::SHL, dl, PtrVT, Val, ShiftAmt);
- }
-
- SDValue Store = DAG.getStore(Val.getValue(1), dl, Shifted, FIN,
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
MachinePointerInfo(FuncArg, ArgOffset),
false, false, 0);
MemOps.push_back(Store);
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index cb0ea01fcf..5a78e8ac6b 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -639,13 +639,13 @@ def LDtocCPT: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
(PPCtoc_entry tconstpool:$disp, G8RC:$reg))]>, isPPC64;
let hasSideEffects = 1 in {
-let RST = 2, DS_RA = 0 in // FIXME: Should be a pseudo.
-def LDinto_toc: DSForm_1<58, 0, (outs), (ins G8RC:$reg),
+let RST = 2, DS = 2 in
+def LDinto_toc: DSForm_1a<58, 0, (outs), (ins G8RC:$reg),
"ld 2, 8($reg)", LdStLD,
[(PPCload_toc G8RC:$reg)]>, isPPC64;
-let RST = 2, DS_RA = 0 in // FIXME: Should be a pseudo.
-def LDtoc_restore : DSForm_1<58, 0, (outs), (ins),
+let RST = 2, DS = 10, RA = 1 in
+def LDtoc_restore : DSForm_1a<58, 0, (outs), (ins),
"ld 2, 40(1)", LdStLD,
[(PPCtoc_restore)]>, isPPC64;
}
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index 14b8534d1c..9c8cb92cc7 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -54,19 +54,26 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
CPUName = sys::getHostCPUName();
#endif
- // Parse features string.
- ParseSubtargetFeatures(CPUName, FS);
-
// Initialize scheduling itinerary for the specified CPU.
InstrItins = getInstrItineraryForCPU(CPUName);
+ // Make sure 64-bit features are available when CPUname is generic
+ std::string FullFS = FS;
+
// If we are generating code for ppc64, verify that options make sense.
if (is64Bit) {
Has64BitSupport = true;
// Silently force 64-bit register use on ppc64.
Use64BitRegs = true;
+ if (!FullFS.empty())
+ FullFS = "+64bit," + FullFS;
+ else
+ FullFS = "+64bit";
}
+ // Parse features string.
+ ParseSubtargetFeatures(CPUName, FullFS);
+
// If the user requested use of 64-bit regs, but the cpu selected doesn't
// support it, ignore.
if (use64BitRegs() && !has64BitSupport())
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index b861383475..3fc977ee2b 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -44,7 +44,7 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT,
FrameLowering(Subtarget), JITInfo(*this, is64Bit),
TLInfo(*this), TSInfo(*this),
InstrItins(Subtarget.getInstrItineraryData()),
- STTI(&TLInfo){
+ STTI(&TLInfo), VTTI(&TLInfo) {
// The binutils for the BG/P are too old for CFI.
if (Subtarget.isBGP())
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index 1d8cc771dd..45c962471d 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -36,7 +36,7 @@ SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT,
DL(Subtarget.getDataLayout()),
InstrInfo(Subtarget),
TLInfo(*this), TSInfo(*this),
- FrameLowering(Subtarget),STTI(&TLInfo) {
+ FrameLowering(Subtarget), STTI(&TLInfo), VTTI(&TLInfo) {
}
namespace {
diff --git a/lib/Target/Target.cpp b/lib/Target/Target.cpp
index 393178a469..7d3dd8f015 100644
--- a/lib/Target/Target.cpp
+++ b/lib/Target/Target.cpp
@@ -64,7 +64,7 @@ unsigned LLVMPointerSizeForAS(LLVMTargetDataRef TD, unsigned AS) {
}
LLVMTypeRef LLVMIntPtrType(LLVMTargetDataRef TD) {
- return wrap(unwrap(TD)->getIntPtrType(getGlobalContext()));
+ return wrap(unwrap(TD)->getIntPtrType(getGlobalContext(), 0));
}
LLVMTypeRef LLVMIntPtrTypeForAS(LLVMTargetDataRef TD, unsigned AS) {
diff --git a/lib/Target/TargetTransformImpl.cpp b/lib/Target/TargetTransformImpl.cpp
index 1cb5edab9d..382eecb766 100644
--- a/lib/Target/TargetTransformImpl.cpp
+++ b/lib/Target/TargetTransformImpl.cpp
@@ -9,9 +9,16 @@
#include "llvm/Target/TargetTransformImpl.h"
#include "llvm/Target/TargetLowering.h"
+#include <utility>
using namespace llvm;
+//===----------------------------------------------------------------------===//
+//
+// Calls used by scalar transformations.
+//
+//===----------------------------------------------------------------------===//
+
bool ScalarTargetTransformImpl::isLegalAddImmediate(int64_t imm) const {
return TLI->isLegalAddImmediate(imm);
}
@@ -41,3 +48,151 @@ unsigned ScalarTargetTransformImpl::getJumpBufAlignment() const {
unsigned ScalarTargetTransformImpl::getJumpBufSize() const {
return TLI->getJumpBufSize();
}
+
+//===----------------------------------------------------------------------===//
+//
+// Calls used by the vectorizers.
+//
+//===----------------------------------------------------------------------===//
+int InstructionOpcodeToISD(unsigned Opcode) {
+ static const int OpToISDTbl[] = {
+ /*Instruction::Ret */ 0, // Opcode numbering start at #1.
+ /*Instruction::Br */ 0,
+ /*Instruction::Switch */ 0,
+ /*Instruction::IndirectBr */ 0,
+ /*Instruction::Invoke */ 0,
+ /*Instruction::Resume */ 0,
+ /*Instruction::Unreachable */ 0,
+ /*Instruction::Add */ ISD::ADD,
+ /*Instruction::FAdd */ ISD::FADD,
+ /*Instruction::Sub */ ISD::SUB,
+ /*Instruction::FSub */ ISD::FSUB,
+ /*Instruction::Mul */ ISD::MUL,
+ /*Instruction::FMul */ ISD::FMUL,
+ /*Instruction::UDiv */ ISD::UDIV,
+ /*Instruction::SDiv */ ISD::UDIV,
+ /*Instruction::FDiv */ ISD::FDIV,
+ /*Instruction::URem */ ISD::UREM,
+ /*Instruction::SRem */ ISD::SREM,
+ /*Instruction::FRem */ ISD::FREM,
+ /*Instruction::Shl */ ISD::SHL,
+ /*Instruction::LShr */ ISD::SRL,
+ /*Instruction::AShr */ ISD::SRA,
+ /*Instruction::And */ ISD::AND,
+ /*Instruction::Or */ ISD::OR,
+ /*Instruction::Xor */ ISD::XOR,
+ /*Instruction::Alloca */ 0,
+ /*Instruction::Load */ ISD::LOAD,
+ /*Instruction::Store */ ISD::STORE,
+ /*Instruction::GetElementPtr */ 0,
+ /*Instruction::Fence */ 0,
+ /*Instruction::AtomicCmpXchg */ 0,
+ /*Instruction::AtomicRMW */ 0,
+ /*Instruction::Trunc */ ISD::TRUNCATE,
+ /*Instruction::ZExt */ ISD::ZERO_EXTEND,
+ /*Instruction::SExt */ ISD::SEXTLOAD,
+ /*Instruction::FPToUI */ ISD::FP_TO_UINT,
+ /*Instruction::FPToSI */ ISD::FP_TO_SINT,
+ /*Instruction::UIToFP */ ISD::UINT_TO_FP,
+ /*Instruction::SIToFP */ ISD::SINT_TO_FP,
+ /*Instruction::FPTrunc */ ISD::FP_ROUND,
+ /*Instruction::FPExt */ ISD::FP_EXTEND,
+ /*Instruction::PtrToInt */ ISD::BITCAST,
+ /*Instruction::IntToPtr */ ISD::BITCAST,
+ /*Instruction::BitCast */ ISD::BITCAST,
+ /*Instruction::ICmp */ ISD::SETCC,
+ /*Instruction::FCmp */ ISD::SETCC,
+ /*Instruction::PHI */ 0,
+ /*Instruction::Call */ 0,
+ /*Instruction::Select */ ISD::SELECT,
+ /*Instruction::UserOp1 */ 0,
+ /*Instruction::UserOp2 */ 0,
+ /*Instruction::VAArg */ 0,
+ /*Instruction::ExtractElement*/ ISD::EXTRACT_VECTOR_ELT,
+ /*Instruction::InsertElement */ ISD::INSERT_VECTOR_ELT,
+ /*Instruction::ShuffleVector */ ISD::VECTOR_SHUFFLE,
+ /*Instruction::ExtractValue */ ISD::MERGE_VALUES,
+ /*Instruction::InsertValue */ ISD::MERGE_VALUES,
+ /*Instruction::LandingPad */ 0};
+
+ assert((Instruction::Ret == 1) && (Instruction::LandingPad == 58) &&
+ "Instruction order had changed");
+
+ // Opcode numbering starts at #1 but the table starts at #0, so we subtract
+ // one from the opcode number.
+ return OpToISDTbl[Opcode - 1];
+}
+
+std::pair<unsigned, EVT>
+VectorTargetTransformImpl::getTypeLegalizationCost(LLVMContext &C,
+ EVT Ty) const {
+ unsigned Cost = 1;
+ // We keep legalizing the type until we find a legal kind. We assume that
+ // the only operation that costs anything is the split. After splitting
+ // we need to handle two types.
+ while (true) {
+ TargetLowering::LegalizeKind LK = TLI->getTypeConversion(C, Ty);
+
+ if (LK.first == TargetLowering::TypeLegal)
+ return std::make_pair(Cost, LK.second);
+
+ if (LK.first == TargetLowering::TypeSplitVector)
+ Cost *= 2;
+
+ // Keep legalizing the type.
+ Ty = LK.second;
+ }
+}
+
+unsigned
+VectorTargetTransformImpl::getInstrCost(unsigned Opcode, Type *Ty1,
+ Type *Ty2) const {
+ // Check if any of the operands are vector operands.
+ int ISD = InstructionOpcodeToISD(Opcode);
+
+ // Selects on vectors are actually vector selects.
+ if (ISD == ISD::SELECT) {
+ assert(Ty2 && "Ty2 must hold the select type");
+ if (Ty2->isVectorTy())
+ ISD = ISD::VSELECT;
+ }
+
+ // If we don't have any information about this instruction assume it costs 1.
+ if (ISD == 0)
+ return 1;
+
+ assert(Ty1 && "We need to have at least one type");
+
+ // From this stage we look at the legalized type.
+ std::pair<unsigned, EVT> LT =
+ getTypeLegalizationCost(Ty1->getContext(), TLI->getValueType(Ty1));
+
+ if (TLI->isOperationLegalOrCustom(ISD, LT.second)) {
+ // The operation is legal. Assume it costs 1. Multiply
+ // by the type-legalization overhead.
+ return LT.first * 1;
+ }
+
+ unsigned NumElem =
+ (LT.second.isVector() ? LT.second.getVectorNumElements() : 1);
+
+ // We will probably scalarize this instruction. Assume that the cost is the
+ // number of the vector elements.
+ return LT.first * NumElem * 1;
+}
+
+unsigned
+VectorTargetTransformImpl::getBroadcastCost(Type *Tp) const {
+ return 1;
+}
+
+unsigned
+VectorTargetTransformImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
+ unsigned Alignment,
+ unsigned AddressSpace) const {
+ // From this stage we look at the legalized type.
+ std::pair<unsigned, EVT> LT =
+ getTypeLegalizationCost(Src->getContext(), TLI->getValueType(Src));
+ // Assume that all loads of legal types cost 1.
+ return LT.first;
+}
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 9689180afd..708951126f 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -18,6 +18,7 @@
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringSwitch.h"
@@ -54,10 +55,13 @@ private:
X86Operand *ParseOperand();
X86Operand *ParseATTOperand();
X86Operand *ParseIntelOperand();
+ X86Operand *ParseIntelOffsetOfOperator(SMLoc StartLoc);
X86Operand *ParseIntelMemOperand(unsigned SegReg, SMLoc StartLoc);
X86Operand *ParseIntelBracExpression(unsigned SegReg, unsigned Size);
X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
+ const MCExpr *ParseIntelDotOperator(const MCExpr *Disp);
+
bool ParseDirectiveWord(unsigned Size, SMLoc L);
bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
@@ -159,6 +163,7 @@ struct X86Operand : public MCParsedAsmOperand {
} Kind;
SMLoc StartLoc, EndLoc;
+ SMLoc OffsetOfLoc;
union {
struct {
@@ -181,7 +186,6 @@ struct X86Operand : public MCParsedAsmOperand {
unsigned IndexReg;
unsigned Scale;
unsigned Size;
- bool OffsetOf;
bool NeedSizeDir;
} Mem;
};
@@ -196,6 +200,8 @@ struct X86Operand : public MCParsedAsmOperand {
/// getLocRange - Get the range between the first and last token of this
/// operand.
SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); }
+ /// getOffsetOfLoc - Get the location of the offset operator.
+ SMLoc getOffsetOfLoc() const { return OffsetOfLoc; }
virtual void print(raw_ostream &OS) const {}
@@ -321,8 +327,7 @@ struct X86Operand : public MCParsedAsmOperand {
}
bool isOffsetOf() const {
- assert(Kind == Memory && "Invalid access!");
- return Mem.OffsetOf;
+ return OffsetOfLoc.getPointer();
}
bool needSizeDirective() const {
@@ -455,9 +460,11 @@ struct X86Operand : public MCParsedAsmOperand {
return Res;
}
- static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc) {
+ static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc,
+ SMLoc OffsetOfLoc = SMLoc()) {
X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc);
Res->Reg.RegNo = RegNo;
+ Res->OffsetOfLoc = OffsetOfLoc;
return Res;
}
@@ -468,9 +475,8 @@ struct X86Operand : public MCParsedAsmOperand {
}
/// Create an absolute memory operand.
- static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc,
- SMLoc EndLoc, unsigned Size = 0,
- bool OffsetOf = false, bool NeedSizeDir = false){
+ static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc,
+ unsigned Size = 0, bool NeedSizeDir = false){
X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
Res->Mem.SegReg = 0;
Res->Mem.Disp = Disp;
@@ -478,7 +484,6 @@ struct X86Operand : public MCParsedAsmOperand {
Res->Mem.IndexReg = 0;
Res->Mem.Scale = 1;
Res->Mem.Size = Size;
- Res->Mem.OffsetOf = OffsetOf;
Res->Mem.NeedSizeDir = NeedSizeDir;
return Res;
}
@@ -487,8 +492,7 @@ struct X86Operand : public MCParsedAsmOperand {
static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
unsigned BaseReg, unsigned IndexReg,
unsigned Scale, SMLoc StartLoc, SMLoc EndLoc,
- unsigned Size = 0, bool OffsetOf = false,
- bool NeedSizeDir = false) {
+ unsigned Size = 0, bool NeedSizeDir = false) {
// We should never just have a displacement, that should be parsed as an
// absolute memory operand.
assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
@@ -503,7 +507,6 @@ struct X86Operand : public MCParsedAsmOperand {
Res->Mem.IndexReg = IndexReg;
Res->Mem.Scale = Scale;
Res->Mem.Size = Size;
- Res->Mem.OffsetOf = OffsetOf;
Res->Mem.NeedSizeDir = NeedSizeDir;
return Res;
}
@@ -661,9 +664,10 @@ static unsigned getIntelMemOperandSize(StringRef OpStr) {
X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
unsigned Size) {
unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
- SMLoc Start = Parser.getTok().getLoc(), End;
+ const AsmToken &Tok = Parser.getTok();
+ SMLoc Start = Tok.getLoc(), End;
- const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
+ const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
// Parse [ BaseReg + Scale*IndexReg + Disp ] or [ symbol ]
// Eat '['
@@ -682,9 +686,9 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
return X86Operand::CreateMem(Disp, Start, End, Size);
}
} else if (getLexer().is(AsmToken::Integer)) {
- int64_t Val = Parser.getTok().getIntVal();
+ int64_t Val = Tok.getIntVal();
Parser.Lex();
- SMLoc Loc = Parser.getTok().getLoc();
+ SMLoc Loc = Tok.getLoc();
if (getLexer().is(AsmToken::RBrac)) {
// Handle '[' number ']'
Parser.Lex();
@@ -696,7 +700,7 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
} else if (getLexer().is(AsmToken::Star)) {
// Handle '[' Scale*IndexReg ']'
Parser.Lex();
- SMLoc IdxRegLoc = Parser.getTok().getLoc();
+ SMLoc IdxRegLoc = Tok.getLoc();
if (ParseRegister(IndexReg, IdxRegLoc, End))
return ErrorOperand(IdxRegLoc, "Expected register");
Scale = Val;
@@ -707,13 +711,13 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
if (getLexer().is(AsmToken::Plus) || getLexer().is(AsmToken::Minus)) {
bool isPlus = getLexer().is(AsmToken::Plus);
Parser.Lex();
- SMLoc PlusLoc = Parser.getTok().getLoc();
+ SMLoc PlusLoc = Tok.getLoc();
if (getLexer().is(AsmToken::Integer)) {
- int64_t Val = Parser.getTok().getIntVal();
+ int64_t Val = Tok.getIntVal();
Parser.Lex();
if (getLexer().is(AsmToken::Star)) {
Parser.Lex();
- SMLoc IdxRegLoc = Parser.getTok().getLoc();
+ SMLoc IdxRegLoc = Tok.getLoc();
if (ParseRegister(IndexReg, IdxRegLoc, End))
return ErrorOperand(IdxRegLoc, "Expected register");
Scale = Val;
@@ -724,7 +728,7 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
return ErrorOperand(PlusLoc, "unexpected token after +");
} else if (getLexer().is(AsmToken::Identifier)) {
// This could be an index register or a displacement expression.
- End = Parser.getTok().getLoc();
+ End = Tok.getLoc();
if (!IndexReg)
ParseRegister(IndexReg, Start, End);
else if (getParser().ParseExpression(Disp, End)) return 0;
@@ -734,11 +738,16 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
if (getLexer().isNot(AsmToken::RBrac))
if (getParser().ParseExpression(Disp, End)) return 0;
- End = Parser.getTok().getLoc();
+ End = Tok.getLoc();
if (getLexer().isNot(AsmToken::RBrac))
return ErrorOperand(End, "expected ']' token!");
Parser.Lex();
- End = Parser.getTok().getLoc();
+ End = Tok.getLoc();
+
+ if (Tok.getString().startswith("."))
+ Disp = ParseIntelDotOperator(Disp);
+
+ End = Tok.getLoc();
// handle [-42]
if (!BaseReg && !IndexReg)
@@ -761,22 +770,10 @@ X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, SMLoc Start) {
Parser.Lex();
}
- // Parse the 'offset' operator. This operator is used to specify the
- // location rather then the content of a variable.
- bool OffsetOf = false;
- if(isParsingInlineAsm() && (Tok.getString() == "offset" ||
- Tok.getString() == "OFFSET")) {
- OffsetOf = true;
- Parser.Lex(); // Eat offset.
- }
-
- if (getLexer().is(AsmToken::LBrac)) {
- assert (!OffsetOf && "Unexpected offset operator!");
+ if (getLexer().is(AsmToken::LBrac))
return ParseIntelBracExpression(SegReg, Size);
- }
if (!ParseRegister(SegReg, Start, End)) {
- assert (!OffsetOf && "Unexpected offset operator!");
// Handel SegReg : [ ... ]
if (getLexer().isNot(AsmToken::Colon))
return ErrorOperand(Start, "Expected ':' token!");
@@ -801,12 +798,74 @@ X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, SMLoc Start) {
NeedSizeDir = Size > 0;
}
}
- return X86Operand::CreateMem(Disp, Start, End, Size, OffsetOf, NeedSizeDir);
+ if (!isParsingInlineAsm())
+ return X86Operand::CreateMem(Disp, Start, End, Size);
+ else
+ // When parsing inline assembly we set the base register to a non-zero value
+ // as we don't know the actual value at this time. This is necessary to
+ // get the matching correct in some cases.
+ return X86Operand::CreateMem(/*SegReg*/0, Disp, /*BaseReg*/1, /*IndexReg*/0,
+ /*Scale*/1, Start, End, Size, NeedSizeDir);
+}
+
+/// Parse the '.' operator.
+const MCExpr *X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp) {
+ AsmToken Tok = *&Parser.getTok();
+
+ // Drop the '.'.
+ StringRef DotDispStr = Tok.getString().drop_front(1);
+
+ Lex(); // Eat .field.
+
+ // .Imm gets lexed as a real.
+ if (Tok.is(AsmToken::Real)) {
+ APInt DotDisp;
+ DotDispStr.getAsInteger(10, DotDisp);
+ uint64_t DotDispVal = DotDisp.getZExtValue();
+
+ // Special case zero dot displacement.
+ if (!DotDispVal) return Disp;
+
+ // FIXME: Handle non-constant expressions.
+ if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp)) {
+ uint64_t OrigDispVal = OrigDisp->getValue();
+ return MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
+ }
+ }
+ return Disp;
+}
+
+/// Parse the 'offset' operator. This operator is used to specify the
+/// location rather then the content of a variable.
+X86Operand *X86AsmParser::ParseIntelOffsetOfOperator(SMLoc Start) {
+ SMLoc OffsetOfLoc = Start;
+ Parser.Lex(); // Eat offset.
+ Start = Parser.getTok().getLoc();
+ assert (Parser.getTok().is(AsmToken::Identifier) && "Expected an identifier");
+
+ SMLoc End;
+ const MCExpr *Val;
+ if (getParser().ParseExpression(Val, End))
+ return 0;
+
+ End = Parser.getTok().getLoc();
+
+ // The offset operator will have an 'r' constraint, thus we need to create
+ // register operand to ensure proper matching. Just pick a GPR based on
+ // the size of a pointer.
+ unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
+ return X86Operand::CreateReg(RegNo, Start, End, OffsetOfLoc);
}
X86Operand *X86AsmParser::ParseIntelOperand() {
SMLoc Start = Parser.getTok().getLoc(), End;
+ // offset operator.
+ const AsmToken &Tok = Parser.getTok();
+ if ((Tok.getString() == "offset" || Tok.getString() == "OFFSET") &&
+ isParsingInlineAsm())
+ return ParseIntelOffsetOfOperator(Start);
+
// immediate.
if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Real) ||
getLexer().is(AsmToken::Minus)) {
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
index edad47312d..a4bd1147bc 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -34,11 +34,9 @@ using namespace llvm;
void X86ATTInstPrinter::printRegName(raw_ostream &OS,
unsigned RegNo) const {
- if (UseMarkup)
- OS << "<reg:";
- OS << '%' << getRegisterName(RegNo);
- if (UseMarkup)
- OS << ">";
+ OS << markup("<reg:")
+ << '%' << getRegisterName(RegNo)
+ << markup(">");
}
void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
@@ -155,29 +153,21 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
if (Op.isReg()) {
- if (UseMarkup)
- O << "<reg:";
- O << '%' << getRegisterName(Op.getReg());
- if (UseMarkup)
- O << ">";
+ printRegName(O, Op.getReg());
} else if (Op.isImm()) {
- if (UseMarkup)
- O << "<imm:";
// Print X86 immediates as signed values.
- O << '$' << (int64_t)Op.getImm();
- if (UseMarkup)
- O << ">";
+ O << markup("<imm:")
+ << '$' << (int64_t)Op.getImm()
+ << markup(">");
if (CommentStream && (Op.getImm() > 255 || Op.getImm() < -256))
*CommentStream << format("imm = 0x%" PRIX64 "\n", (uint64_t)Op.getImm());
} else {
assert(Op.isExpr() && "unknown operand kind in printOperand");
- if (UseMarkup)
- O << "<imm:";
- O << '$' << *Op.getExpr();
- if (UseMarkup)
- O << ">";
+ O << markup("<imm:")
+ << '$' << *Op.getExpr()
+ << markup(">");
}
}
@@ -188,8 +178,7 @@ void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
const MCOperand &DispSpec = MI->getOperand(Op+3);
const MCOperand &SegReg = MI->getOperand(Op+4);
- if (UseMarkup)
- O << "<mem:";
+ O << markup("<mem:");
// If this has a segment register, print it.
if (SegReg.getReg()) {
@@ -216,17 +205,14 @@ void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
printOperand(MI, Op+2, O);
unsigned ScaleVal = MI->getOperand(Op+1).getImm();
if (ScaleVal != 1) {
- O << ',';
- if (UseMarkup)
- O << "<imm:";
- O << ScaleVal;
- if (UseMarkup)
- O << ">";
+ O << ','
+ << markup("<imm:")
+ << ScaleVal
+ << markup(">");
}
}
O << ')';
}
- if (UseMarkup)
- O << ">";
+ O << markup(">");
}
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 1dc4aa9989..1b5c4d9753 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -162,7 +162,7 @@ def : Proc<"core2", [FeatureSSSE3, FeatureCMPXCHG16B,
FeatureSlowBTMem]>;
def : Proc<"penryn", [FeatureSSE41, FeatureCMPXCHG16B,
FeatureSlowBTMem]>;
-def : AtomProc<"atom", [ProcIntelAtom, FeatureSSE3, FeatureCMPXCHG16B,
+def : AtomProc<"atom", [ProcIntelAtom, FeatureSSSE3, FeatureCMPXCHG16B,
FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP,
FeatureSlowDivide]>;
// "Arrandale" along with corei3 and corei5
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index a6d2709b37..6786756c7f 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -88,6 +88,21 @@ def RetCC_X86_32_Fast : CallingConv<[
CCDelegateTo<RetCC_X86Common>
]>;
+// Intel_OCL_BI return-value convention.
+def RetCC_Intel_OCL_BI : CallingConv<[
+ // Vector types are returned in XMM0,XMM1,XMMM2 and XMM3.
+ CCIfType<[f32, f64, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToReg<[XMM0,XMM1,XMM2,XMM3]>>,
+
+ // 256-bit FP vectors
+ // No more than 4 registers
+ CCIfType<[v8f32, v4f64, v8i32, v4i64],
+ CCAssignToReg<[YMM0,YMM1,YMM2,YMM3]>>,
+
+ // i32, i64 in the standard way
+ CCDelegateTo<RetCC_X86Common>
+]>;
+
// X86-64 C return-value convention.
def RetCC_X86_64_C : CallingConv<[
// The X86-64 calling convention always returns FP values in XMM0.
@@ -128,6 +143,10 @@ def RetCC_X86_64 : CallingConv<[
// This is the return-value convention used for the entire X86 backend.
def RetCC_X86 : CallingConv<[
+
+ // Check if this is the Intel OpenCL built-ins calling convention
+ CCIfCC<"CallingConv::Intel_OCL_BI", CCDelegateTo<RetCC_Intel_OCL_BI>>,
+
CCIfSubtarget<"is64Bit()", CCDelegateTo<RetCC_X86_64>>,
CCDelegateTo<RetCC_X86_32>
]>;
@@ -235,6 +254,29 @@ def CC_X86_Win64_C : CallingConv<[
CCIfType<[f80], CCAssignToStack<0, 0>>
]>;
+// X86-64 Intel OpenCL built-ins calling convention.
+def CC_Intel_OCL_BI : CallingConv<[
+ CCIfType<[i32], CCIfSubtarget<"isTargetWin32()", CCAssignToStack<4, 4>>>,
+
+ CCIfType<[i32], CCIfSubtarget<"isTargetWin64()", CCAssignToReg<[ECX, EDX, R8D, R9D]>>>,
+ CCIfType<[i64], CCIfSubtarget<"isTargetWin64()", CCAssignToReg<[RCX, RDX, R8, R9 ]>>>,
+
+ CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX]>>,
+ CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX]>>,
+
+ // The SSE vector arguments are passed in XMM registers.
+ CCIfType<[f32, f64, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToReg<[XMM0, XMM1, XMM2, XMM3]>>,
+
+ // The 256-bit vector arguments are passed in YMM registers.
+ CCIfType<[v8f32, v4f64, v8i32, v4i64],
+ CCAssignToReg<[YMM0, YMM1, YMM2, YMM3]>>,
+
+ CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_C>>,
+ CCDelegateTo<CC_X86_64_C>
+]>;
+
+
def CC_X86_64_GHC : CallingConv<[
// Promote i8/i16/i32 arguments to i64.
CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
@@ -324,7 +366,7 @@ def CC_X86_32_FastCall : CallingConv<[
CCIfNest<CCAssignToReg<[EAX]>>,
// The first 2 integer arguments are passed in ECX/EDX
- CCIfType<[i32], CCAssignToReg<[ECX, EDX]>>,
+ CCIfInReg<CCIfType<[i32], CCAssignToReg<[ECX, EDX]>>>,
// Otherwise, same as everything else.
CCDelegateTo<CC_X86_32_Common>
@@ -408,6 +450,7 @@ def CC_X86_64 : CallingConv<[
// This is the argument convention used for the entire X86 backend.
def CC_X86 : CallingConv<[
+ CCIfCC<"CallingConv::Intel_OCL_BI", CCDelegateTo<CC_Intel_OCL_BI>>,
CCIfSubtarget<"is64Bit()", CCDelegateTo<CC_X86_64>>,
CCDelegateTo<CC_X86_32>
]>;
@@ -426,3 +469,17 @@ def CSR_64EHRet : CalleeSavedRegs<(add RAX, RDX, CSR_64)>;
def CSR_Win64 : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12, R13, R14, R15,
(sequence "XMM%u", 6, 15))>;
+
+
+// Standard C + YMM6-15
+def CSR_Win64_Intel_OCL_BI_AVX : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12,
+ R13, R14, R15,
+ (sequence "YMM%u", 6, 15))>;
+
+//Standard C + XMM 8-15
+def CSR_64_Intel_OCL_BI : CalleeSavedRegs<(add CSR_64,
+ (sequence "XMM%u", 8, 15))>;
+
+//Standard C + YMM 8-15
+def CSR_64_Intel_OCL_BI_AVX : CalleeSavedRegs<(add CSR_64,
+ (sequence "YMM%u", 8, 15))>;
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index c9301bb2c9..b8f7a6881f 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -282,8 +282,9 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) {
bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
const X86AddressMode &AM) {
// Handle 'null' like i32/i64 0.
- if (isa<ConstantPointerNull>(Val))
- Val = Constant::getNullValue(TD.getIntPtrType(Val->getContext()));
+ if (isa<ConstantPointerNull>(Val)) {
+ Val = Constant::getNullValue(TD.getIntPtrType(Val->getType()));
+ }
// If this is a store of a simple constant, fold the constant into the store.
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
@@ -908,8 +909,9 @@ bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1,
if (Op0Reg == 0) return false;
// Handle 'null' like i32/i64 0.
- if (isa<ConstantPointerNull>(Op1))
- Op1 = Constant::getNullValue(TD.getIntPtrType(Op0->getContext()));
+ if (isa<ConstantPointerNull>(Op1)) {
+ Op1 = Constant::getNullValue(TD.getIntPtrType(Op0->getType()));
+ }
// We have two options: compare with register or immediate. If the RHS of
// the compare is an immediate that we can fold into this compare, use
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 4141068806..5bfb5054b0 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -315,11 +315,11 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF,
if (CSI.empty()) return;
std::vector<MachineMove> &Moves = MMI.getFrameMoves();
- const DataLayout *TD = TM.getDataLayout();
+ const X86RegisterInfo *RegInfo = TM.getRegisterInfo();
bool HasFP = hasFP(MF);
// Calculate amount of bytes used for return address storing.
- int stackGrowth = -TM.getFrameLowering()->getStackSlotSize(); // @LOCALMOD
+ int stackGrowth = -RegInfo->getSlotSize();
// FIXME: This is dirty hack. The code itself is pretty mess right now.
// It should be rewritten from scratch and generalized sometimes.
@@ -717,9 +717,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// ELSE => DW_CFA_offset_extended
std::vector<MachineMove> &Moves = MMI.getFrameMoves();
- const DataLayout *TD = MF.getTarget().getDataLayout();
uint64_t NumBytes = 0;
- int stackGrowth = -TM.getFrameLowering()->getStackSlotSize(); // @LOCALMOD
+ int stackGrowth = -SlotSize;
if (HasFP) {
// Calculate required stack adjustment.
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index b9348a2b90..5d3c5f0347 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -959,6 +959,13 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
+ // As there is no 64-bit GPR available, we need build a special custom
+ // sequence to convert from v2i32 to v2f32.
+ if (!Subtarget->is64Bit())
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
+
setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
@@ -1078,6 +1085,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
+
setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, Legal);
setOperationAction(ISD::SRL, MVT::v16i16, Custom);
@@ -1268,7 +1279,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setTargetDAGCombine(ISD::ANY_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::TRUNCATE);
- setTargetDAGCombine(ISD::UINT_TO_FP);
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::SETCC);
if (Subtarget->is64Bit())
@@ -2204,16 +2214,14 @@ X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
/// optimization is performed and it is required (FPDiff!=0).
static SDValue
EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF,
- SDValue Chain, SDValue RetAddrFrIdx,
- bool Is64Bit, int FPDiff, DebugLoc dl) {
+ SDValue Chain, SDValue RetAddrFrIdx, EVT PtrVT,
+ unsigned SlotSize, int FPDiff, DebugLoc dl) {
// Store the return address to the appropriate stack slot.
if (!FPDiff) return Chain;
// Calculate the new stack slot for the return address.
- int SlotSize = Is64Bit ? 8 : 4;
int NewReturnAddrFI =
MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, false);
- EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
- SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT);
+ SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
MachinePointerInfo::getFixedStack(NewReturnAddrFI),
false, false, 0);
@@ -2482,7 +2490,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
&MemOpChains2[0], MemOpChains2.size());
// Store the return address to the appropriate stack slot.
- Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, Is64Bit,
+ Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
+ getPointerTy(), RegInfo->getSlotSize(),
FPDiff, dl);
}
@@ -2694,8 +2703,7 @@ X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
unsigned StackAlignment = TFI.getStackAlignment();
uint64_t AlignMask = StackAlignment - 1;
int64_t Offset = StackSize;
- // @LOCALMOD
- uint64_t SlotSize = Subtarget->is64Bit() ? 8 : 4;
+ unsigned SlotSize = RegInfo->getSlotSize();
if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
// Number smaller than 12 so just add the difference.
Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
@@ -3063,7 +3071,7 @@ SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
if (ReturnAddrIndex == 0) {
// Set up a frame object for the return address.
- uint64_t SlotSize = Subtarget->is64Bit() ? 8 : 4; // @LOCALMOD
+ unsigned SlotSize = RegInfo->getSlotSize();
ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize,
false);
FuncInfo->setRAIndex(ReturnAddrIndex);
@@ -6594,6 +6602,81 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
getShuffleSHUFImmediate(SVOp), DAG);
}
+// Reduce a vector shuffle to zext.
+SDValue
+X86TargetLowering::lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const {
+ // PMOVZX is only available from SSE41.
+ if (!Subtarget->hasSSE41())
+ return SDValue();
+
+ EVT VT = Op.getValueType();
+
+ // Only AVX2 support 256-bit vector integer extending.
+ if (!Subtarget->hasAVX2() && VT.is256BitVector())
+ return SDValue();
+
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ unsigned NumElems = VT.getVectorNumElements();
+
+ // Extending is an unary operation and the element type of the source vector
+ // won't be equal to or larger than i64.
+ if (V2.getOpcode() != ISD::UNDEF || !VT.isInteger() ||
+ VT.getVectorElementType() == MVT::i64)
+ return SDValue();
+
+ // Find the expansion ratio, e.g. expanding from i8 to i32 has a ratio of 4.
+ unsigned Shift = 1; // Start from 2, i.e. 1 << 1.
+ while ((1 << Shift) < NumElems) {
+ if (SVOp->getMaskElt(1 << Shift) == 1)
+ break;
+ Shift += 1;
+ // The maximal ratio is 8, i.e. from i8 to i64.
+ if (Shift > 3)
+ return SDValue();
+ }
+
+ // Check the shuffle mask.
+ unsigned Mask = (1U << Shift) - 1;
+ for (unsigned i = 0; i != NumElems; ++i) {
+ int EltIdx = SVOp->getMaskElt(i);
+ if ((i & Mask) != 0 && EltIdx != -1)
+ return SDValue();
+ if ((i & Mask) == 0 && (unsigned)EltIdx != (i >> Shift))
+ return SDValue();
+ }
+
+ unsigned NBits = VT.getVectorElementType().getSizeInBits() << Shift;
+ EVT NeVT = EVT::getIntegerVT(*DAG.getContext(), NBits);
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), NeVT, NumElems >> Shift);
+
+ if (!isTypeLegal(NVT))
+ return SDValue();
+
+ // Simplify the operand as it's prepared to be fed into shuffle.
+ unsigned SignificantBits = NVT.getSizeInBits() >> Shift;
+ if (V1.getOpcode() == ISD::BITCAST &&
+ V1.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ V1.getOperand(0).getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ V1.getOperand(0)
+ .getOperand(0).getValueType().getSizeInBits() == SignificantBits) {
+ // (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast x)
+ SDValue V = V1.getOperand(0).getOperand(0).getOperand(0);
+ ConstantSDNode *CIdx =
+ dyn_cast<ConstantSDNode>(V1.getOperand(0).getOperand(0).getOperand(1));
+ // If it's foldable, i.e. normal load with single use, we will let code
+ // selection to fold it. Otherwise, we will short the conversion sequence.
+ if (CIdx && CIdx->getZExtValue() == 0 &&
+ (!ISD::isNormalLoad(V.getNode()) || !V.hasOneUse()))
+ V1 = DAG.getNode(ISD::BITCAST, DL, V1.getValueType(), V);
+ }
+
+ return DAG.getNode(ISD::BITCAST, DL, VT,
+ DAG.getNode(X86ISD::VZEXT, DL, NVT, V1));
+}
+
SDValue
X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
@@ -6624,6 +6707,11 @@ X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const {
return PromoteSplat(SVOp, DAG);
}
+ // Check integer expanding shuffles.
+ SDValue NewOp = lowerVectorIntExtend(Op, DAG);
+ if (NewOp.getNode())
+ return NewOp;
+
// If the shuffle can be profitably rewritten as a narrower shuffle, then
// do it!
if (VT == MVT::v8i16 || VT == MVT::v16i8 ||
@@ -8098,11 +8186,29 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op,
return Sub;
}
+SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue N0 = Op.getOperand(0);
+ EVT SVT = N0.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+
+ assert((SVT == MVT::v4i8 || SVT == MVT::v4i16 ||
+ SVT == MVT::v8i8 || SVT == MVT::v8i16) &&
+ "Custom UINT_TO_FP is not supported!");
+
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, SVT.getVectorNumElements());
+ return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
+ DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N0));
+}
+
SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
SDValue N0 = Op.getOperand(0);
DebugLoc dl = Op.getDebugLoc();
+ if (Op.getValueType().isVector())
+ return lowerUINT_TO_FP_vec(Op, DAG);
+
// Since UINT_TO_FP is legal (it's marked custom), dag combiner won't
// optimize it to a SINT_TO_FP when the sign bit is known zero. Perform
// the optimization here.
@@ -8276,6 +8382,30 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned, bool IsReplace) co
}
}
+SDValue X86TargetLowering::lowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+ SDValue In = Op.getOperand(0);
+ EVT SVT = In.getValueType();
+
+ if (!VT.is256BitVector() || !SVT.is128BitVector() ||
+ VT.getVectorNumElements() != SVT.getVectorNumElements())
+ return SDValue();
+
+ assert(Subtarget->hasAVX() && "256-bit vector is observed without AVX!");
+
+ // AVX2 has better support of integer extending.
+ if (Subtarget->hasAVX2())
+ return DAG.getNode(X86ISD::VZEXT, DL, VT, In);
+
+ SDValue Lo = DAG.getNode(X86ISD::VZEXT, DL, MVT::v4i32, In);
+ static const int Mask[] = {4, 5, 6, 7, -1, -1, -1, -1};
+ SDValue Hi = DAG.getNode(X86ISD::VZEXT, DL, MVT::v4i32,
+ DAG.getVectorShuffle(MVT::v8i16, DL, In, DAG.getUNDEF(MVT::v8i16), &Mask[0]));
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i32, Lo, Hi);
+}
+
SDValue X86TargetLowering::lowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
DebugLoc DL = Op.getDebugLoc();
EVT VT = Op.getValueType();
@@ -10571,23 +10701,21 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op,
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
DebugLoc dl = Op.getDebugLoc();
+ EVT PtrVT = getPointerTy();
if (Depth > 0) {
SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
SDValue Offset =
- // @LOCALMOD-BEGIN
- DAG.getConstant(Subtarget->is64Bit() ? 8 : 4,
- getPointerTy());
- // @LOCALMOD-END
- return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
- DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ DAG.getConstant(RegInfo->getSlotSize(), PtrVT);
+ return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
+ DAG.getNode(ISD::ADD, dl, PtrVT,
FrameAddr, Offset),
MachinePointerInfo(), false, false, false, 0);
}
// Just load the return address.
SDValue RetAddrFI = getReturnAddressFrameIndex(DAG);
- return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+ return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
RetAddrFI, MachinePointerInfo(), false, false, false, 0);
}
@@ -10609,10 +10737,7 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op,
SelectionDAG &DAG) const {
- // @LOCALMOD-START
- int SlotSize = Subtarget->is64Bit() ? 8 : 4;
- return DAG.getIntPtrConstant(2*SlotSize);
- // @LOCALMOD-END
+ return DAG.getIntPtrConstant(2 * RegInfo->getSlotSize());
}
SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
@@ -10620,17 +10745,17 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
SDValue Offset = Op.getOperand(1);
SDValue Handler = Op.getOperand(2);
DebugLoc dl = Op.getDebugLoc();
- // @LOCALMOD-START
- bool Has64BitPtrs = Subtarget->has64BitPointers();
+ // @LOCALMOD-START
+ bool Has64BitPointers = Subtarget->has64BitPointers();
SDValue Frame = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
- Has64BitPtrs ? X86::RBP : X86::EBP,
+ Has64BitPointers ? X86::RBP : X86::EBP,
getPointerTy());
- unsigned StoreAddrReg = (Has64BitPtrs ? X86::RCX : X86::ECX);
- int SlotSize = Subtarget->is64Bit() ? 8 : 4;
- SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Frame,
- DAG.getIntPtrConstant(SlotSize));
+ unsigned StoreAddrReg = (Has64BitPointers ? X86::RCX : X86::ECX);
// @LOCALMOD-END
+
+ SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Frame,
+ DAG.getIntPtrConstant(RegInfo->getSlotSize()));
StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StoreAddr, Offset);
Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(),
false, false, 0);
@@ -11659,6 +11784,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
case ISD::TRUNCATE: return lowerTRUNCATE(Op, DAG);
+ case ISD::ZERO_EXTEND: return lowerZERO_EXTEND(Op, DAG);
case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG);
case ISD::FP_EXTEND: return lowerFP_EXTEND(Op, DAG);
@@ -11797,6 +11923,22 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
}
return;
}
+ case ISD::UINT_TO_FP: {
+ if (N->getOperand(0).getValueType() != MVT::v2i32 &&
+ N->getValueType(0) != MVT::v2f32)
+ return;
+ SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64,
+ N->getOperand(0));
+ SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL),
+ MVT::f64);
+ SDValue VBias = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2f64, Bias, Bias);
+ SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64, ZExtIn,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, VBias));
+ Or = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Or);
+ SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, Or, VBias);
+ Results.push_back(DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, Sub));
+ return;
+ }
case ISD::FP_ROUND: {
SDValue V = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, N->getOperand(0));
Results.push_back(V);
@@ -11999,6 +12141,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
case X86ISD::VSEXT_MOVL: return "X86ISD::VSEXT_MOVL";
case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
+ case X86ISD::VZEXT: return "X86ISD::VZEXT";
+ case X86ISD::VSEXT: return "X86ISD::VSEXT";
case X86ISD::VFPEXT: return "X86ISD::VFPEXT";
case X86ISD::VFPROUND: return "X86ISD::VFPROUND";
case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ";
@@ -16565,23 +16709,6 @@ static SDValue PerformBrCondCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-static SDValue PerformUINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG) {
- SDValue Op0 = N->getOperand(0);
- EVT InVT = Op0->getValueType(0);
-
- // UINT_TO_FP(v4i8) -> SINT_TO_FP(ZEXT(v4i8 to v4i32))
- if (InVT == MVT::v8i8 || InVT == MVT::v4i8) {
- DebugLoc dl = N->getDebugLoc();
- MVT DstVT = InVT == MVT::v4i8 ? MVT::v4i32 : MVT::v8i32;
- SDValue P = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op0);
- // Notice that we use SINT_TO_FP because we know that the high bits
- // are zero and SINT_TO_FP is better supported by the hardware.
- return DAG.getNode(ISD::SINT_TO_FP, dl, N->getValueType(0), P);
- }
-
- return SDValue();
-}
-
static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
const X86TargetLowering *XTLI) {
SDValue Op0 = N->getOperand(0);
@@ -16727,6 +16854,21 @@ static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG,
return OptimizeConditionalInDecrement(N, DAG);
}
+/// performVZEXTCombine - Performs build vector combines
+static SDValue performVZEXTCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ // (vzext (bitcast (vzext (x)) -> (vzext x)
+ SDValue In = N->getOperand(0);
+ while (In.getOpcode() == ISD::BITCAST)
+ In = In.getOperand(0);
+
+ if (In.getOpcode() != X86ISD::VZEXT)
+ return SDValue();
+
+ return DAG.getNode(X86ISD::VZEXT, N->getDebugLoc(), N->getValueType(0), In.getOperand(0));
+}
+
SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -16749,7 +16891,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::XOR: return PerformXorCombine(N, DAG, DCI, Subtarget);
case ISD::LOAD: return PerformLOADCombine(N, DAG, DCI, Subtarget);
case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget);
- case ISD::UINT_TO_FP: return PerformUINT_TO_FPCombine(N, DAG);
case ISD::SINT_TO_FP: return PerformSINT_TO_FPCombine(N, DAG, this);
case ISD::FADD: return PerformFADDCombine(N, DAG, Subtarget);
case ISD::FSUB: return PerformFSUBCombine(N, DAG, Subtarget);
@@ -16767,6 +16908,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SETCC: return PerformISDSETCCCombine(N, DAG);
case X86ISD::SETCC: return PerformSETCCCombine(N, DAG, DCI, Subtarget);
case X86ISD::BRCOND: return PerformBrCondCombine(N, DAG, DCI, Subtarget);
+ case X86ISD::VZEXT: return performVZEXTCombine(N, DAG, DCI, Subtarget);
case X86ISD::SHUFP: // Handle all target specific shuffles
case X86ISD::PALIGN:
case X86ISD::UNPCKH:
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index f6f9e584af..8d8f3f5161 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -243,6 +243,12 @@ namespace llvm {
// VSEXT_MOVL - Vector move low and sign extend.
VSEXT_MOVL,
+ // VZEXT - Vector integer zero-extend.
+ VZEXT,
+
+ // VSEXT - Vector integer signed-extend.
+ VSEXT,
+
// VFPEXT - Vector FP extend.
VFPEXT,
@@ -803,7 +809,9 @@ namespace llvm {
SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
@@ -847,6 +855,8 @@ namespace llvm {
SDValue LowerVectorAllZeroTest(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const;
+
virtual SDValue
LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 46281efa57..73ba0011df 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -90,6 +90,14 @@ def X86vsmovl : SDNode<"X86ISD::VSEXT_MOVL",
def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def X86vzext : SDNode<"X86ISD::VZEXT",
+ SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisInt<0>, SDTCisInt<1>]>>;
+
+def X86vsext : SDNode<"X86ISD::VSEXT",
+ SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisInt<0>, SDTCisInt<1>]>>;
+
def X86vfpext : SDNode<"X86ISD::VFPEXT",
SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisFP<0>, SDTCisFP<1>]>>;
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index cc1291a8a0..e9c7f3e7f1 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -5841,6 +5841,85 @@ let Predicates = [UseSSE41] in {
(PMOVZXBQrm addr:$src)>;
}
+let Predicates = [HasAVX2] in {
+ def : Pat<(v16i16 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBWYrr VR128:$src)>;
+ def : Pat<(v8i32 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBDYrr VR128:$src)>;
+ def : Pat<(v4i64 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBQYrr VR128:$src)>;
+
+ def : Pat<(v8i32 (X86vzext (v8i16 VR128:$src))), (VPMOVZXWDYrr VR128:$src)>;
+ def : Pat<(v4i64 (X86vzext (v8i16 VR128:$src))), (VPMOVZXWQYrr VR128:$src)>;
+
+ def : Pat<(v4i64 (X86vzext (v4i32 VR128:$src))), (VPMOVZXDQYrr VR128:$src)>;
+}
+
+let Predicates = [HasAVX] in {
+ def : Pat<(v8i16 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBWrr VR128:$src)>;
+ def : Pat<(v4i32 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBDrr VR128:$src)>;
+ def : Pat<(v2i64 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBQrr VR128:$src)>;
+
+ def : Pat<(v4i32 (X86vzext (v8i16 VR128:$src))), (VPMOVZXWDrr VR128:$src)>;
+ def : Pat<(v2i64 (X86vzext (v8i16 VR128:$src))), (VPMOVZXWQrr VR128:$src)>;
+
+ def : Pat<(v2i64 (X86vzext (v4i32 VR128:$src))), (VPMOVZXDQrr VR128:$src)>;
+
+ def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
+ (VPMOVZXBWrm addr:$src)>;
+ def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))),
+ (VPMOVZXBWrm addr:$src)>;
+ def : Pat<(v4i32 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
+ (VPMOVZXBDrm addr:$src)>;
+ def : Pat<(v2i64 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector (loadi16_anyext addr:$src))))))),
+ (VPMOVZXBQrm addr:$src)>;
+
+ def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
+ (VPMOVZXWDrm addr:$src)>;
+ def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))),
+ (VPMOVZXWDrm addr:$src)>;
+ def : Pat<(v2i64 (X86vzext (v8i16 (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
+ (VPMOVZXWQrm addr:$src)>;
+
+ def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
+ (VPMOVZXDQrm addr:$src)>;
+ def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))),
+ (VPMOVZXDQrm addr:$src)>;
+ def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (X86vzload addr:$src)))))),
+ (VPMOVZXDQrm addr:$src)>;
+}
+
+let Predicates = [UseSSE41] in {
+ def : Pat<(v8i16 (X86vzext (v16i8 VR128:$src))), (PMOVZXBWrr VR128:$src)>;
+ def : Pat<(v4i32 (X86vzext (v16i8 VR128:$src))), (PMOVZXBDrr VR128:$src)>;
+ def : Pat<(v2i64 (X86vzext (v16i8 VR128:$src))), (PMOVZXBQrr VR128:$src)>;
+
+ def : Pat<(v4i32 (X86vzext (v8i16 VR128:$src))), (PMOVZXWDrr VR128:$src)>;
+ def : Pat<(v2i64 (X86vzext (v8i16 VR128:$src))), (PMOVZXWQrr VR128:$src)>;
+
+ def : Pat<(v2i64 (X86vzext (v4i32 VR128:$src))), (PMOVZXDQrr VR128:$src)>;
+
+ def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
+ (PMOVZXBWrm addr:$src)>;
+ def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))),
+ (PMOVZXBWrm addr:$src)>;
+ def : Pat<(v4i32 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
+ (PMOVZXBDrm addr:$src)>;
+ def : Pat<(v2i64 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector (loadi16_anyext addr:$src))))))),
+ (PMOVZXBQrm addr:$src)>;
+
+ def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
+ (PMOVZXWDrm addr:$src)>;
+ def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))),
+ (PMOVZXWDrm addr:$src)>;
+ def : Pat<(v2i64 (X86vzext (v8i16 (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
+ (PMOVZXWQrm addr:$src)>;
+
+ def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
+ (PMOVZXDQrm addr:$src)>;
+ def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))),
+ (PMOVZXDQrm addr:$src)>;
+ def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (X86vzload addr:$src)))))),
+ (PMOVZXDQrm addr:$src)>;
+}
+
//===----------------------------------------------------------------------===//
// SSE4.1 - Extract Instructions
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index f7a17cd7c1..9054345d35 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -234,15 +234,26 @@ const uint16_t *
X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
bool callsEHReturn = false;
bool ghcCall = false;
+ bool oclBiCall = false;
+ bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
if (MF) {
callsEHReturn = MF->getMMI().callsEHReturn();
const Function *F = MF->getFunction();
ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false);
+ oclBiCall = (F ? F->getCallingConv() == CallingConv::Intel_OCL_BI : false);
}
if (ghcCall)
return CSR_NoRegs_SaveList;
+ if (oclBiCall) {
+ if (HasAVX && IsWin64)
+ return CSR_Win64_Intel_OCL_BI_AVX_SaveList;
+ if (HasAVX && Is64Bit)
+ return CSR_64_Intel_OCL_BI_AVX_SaveList;
+ if (!HasAVX && !IsWin64 && Is64Bit)
+ return CSR_64_Intel_OCL_BI_SaveList;
+ }
if (Is64Bit) {
if (IsWin64)
return CSR_Win64_SaveList;
@@ -257,6 +268,16 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
const uint32_t*
X86RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
+ bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
+
+ if (CC == CallingConv::Intel_OCL_BI) {
+ if (IsWin64 && HasAVX)
+ return CSR_Win64_Intel_OCL_BI_AVX_RegMask;
+ if (Is64Bit && HasAVX)
+ return CSR_64_Intel_OCL_BI_AVX_RegMask;
+ if (!HasAVX && !IsWin64 && Is64Bit)
+ return CSR_64_Intel_OCL_BI_RegMask;
+ }
if (CC == CallingConv::GHC)
return CSR_NoRegs_RegMask;
if (!Is64Bit)
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index a102935b4b..6e53e7ac93 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -62,7 +62,8 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
if (const char *bzeroEntry = V &&
V->isNullValue() ? Subtarget->getBZeroEntry() : 0) {
EVT IntPtr = TLI.getPointerTy();
- Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext());
+ unsigned AS = DstPtrInfo.getAddrSpace();
+ Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext(), AS);
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
Entry.Node = Dst;
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index c804195f27..e31bedf6de 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -51,7 +51,7 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT,
TSInfo(*this),
TLInfo(*this),
JITInfo(*this),
- STTI(&TLInfo) {
+ STTI(&TLInfo), VTTI(&TLInfo) {
}
void X86_64TargetMachine::anchor() { }
@@ -71,7 +71,7 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT,
TSInfo(*this),
TLInfo(*this),
JITInfo(*this),
- STTI(&TLInfo) {
+ STTI(&TLInfo), VTTI(&TLInfo){
}
/// X86TargetMachine ctor - Create an X86 target.
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index 9e7816e21f..eaa745ba9b 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -477,7 +477,8 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
}
// Lower to a call to __misaligned_load(BasePtr).
- Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext());
+ unsigned AS = LD->getAddressSpace();
+ Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext(), AS);
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
@@ -536,7 +537,8 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const
}
// Lower to a call to __misaligned_store(BasePtr, Value).
- Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext());
+ unsigned AS = ST->getAddressSpace();
+ Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext(), AS);
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index 0b7e3e10d4..d5a932c518 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -32,7 +32,7 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT,
InstrInfo(),
FrameLowering(Subtarget),
TLInfo(*this),
- TSInfo(*this), STTI(&TLInfo) {
+ TSInfo(*this), STTI(&TLInfo), VTTI(&TLInfo) {
}
namespace {
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 678189b3d6..3d5657fe6a 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -1500,7 +1500,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
unsigned TypeSize = TD->getTypeAllocSize(FieldTy);
if (StructType *ST = dyn_cast<StructType>(FieldTy))
TypeSize = TD->getStructLayout(ST)->getSizeInBytes();
- Type *IntPtrTy = TD->getIntPtrType(CI->getContext());
+ Type *IntPtrTy = TD->getIntPtrType(GV->getType());
Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy,
ConstantInt::get(IntPtrTy, TypeSize),
NElems, 0,
@@ -1730,7 +1730,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
// If this is a fixed size array, transform the Malloc to be an alloc of
// structs. malloc [100 x struct],1 -> malloc struct, 100
if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI, TLI))) {
- Type *IntPtrTy = TD->getIntPtrType(CI->getContext());
+ Type *IntPtrTy = TD->getIntPtrType(GV->getType());
unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes();
Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize);
Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index 44283ddce7..1c6477c022 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -206,9 +206,8 @@ bool FunctionComparator::isEquivalentType(Type *Ty1,
return true;
if (Ty1->getTypeID() != Ty2->getTypeID()) {
if (TD) {
- LLVMContext &Ctx = Ty1->getContext();
- if (isa<PointerType>(Ty1) && Ty2 == TD->getIntPtrType(Ctx)) return true;
- if (isa<PointerType>(Ty2) && Ty1 == TD->getIntPtrType(Ctx)) return true;
+ if (isa<PointerType>(Ty1) && Ty2 == TD->getIntPtrType(Ty1)) return true;
+ if (isa<PointerType>(Ty2) && Ty1 == TD->getIntPtrType(Ty2)) return true;
}
return false;
}
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
index 7467eca7ab..0e765f7aaa 100644
--- a/lib/Transforms/InstCombine/InstCombine.h
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -208,7 +208,7 @@ private:
bool ShouldChangeType(Type *From, Type *To) const;
Value *dyn_castNegVal(Value *V) const;
Value *dyn_castFNegVal(Value *V) const;
- Type *FindElementAtOffset(Type *Ty, int64_t Offset,
+ Type *FindElementAtOffset(Type *Ty, int64_t Offset, Type *IntPtrTy,
SmallVectorImpl<Value*> &NewIndices);
Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI);
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 4f4c388a92..0958842d08 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -996,9 +996,9 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
// Conversion is ok if changing from one pointer type to another or from
// a pointer to an integer of the same size.
!((OldRetTy->isPointerTy() || !TD ||
- OldRetTy == TD->getIntPtrType(Caller->getContext())) &&
+ OldRetTy == TD->getIntPtrType(NewRetTy)) &&
(NewRetTy->isPointerTy() || !TD ||
- NewRetTy == TD->getIntPtrType(Caller->getContext()))))
+ NewRetTy == TD->getIntPtrType(OldRetTy))))
return false; // Cannot transform this return value.
if (!Caller->use_empty() &&
@@ -1057,11 +1057,13 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
// Converting from one pointer type to another or between a pointer and an
// integer of the same size is safe even if we do not have a body.
+ // FIXME: Not sure what to do here, so setting AS to 0.
+ // How can the AS for a function call be outside the default?
bool isConvertible = ActTy == ParamTy ||
(TD && ((ParamTy->isPointerTy() ||
- ParamTy == TD->getIntPtrType(Caller->getContext())) &&
+ ParamTy == TD->getIntPtrType(ActTy)) &&
(ActTy->isPointerTy() ||
- ActTy == TD->getIntPtrType(Caller->getContext()))));
+ ActTy == TD->getIntPtrType(ParamTy))));
if (Callee->isDeclaration() && !isConvertible) return false;
}
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index f3f3f8f585..119d2f5c99 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -30,7 +30,7 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
Scale = 0;
return ConstantInt::get(Val->getType(), 0);
}
-
+
if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) {
// Cannot look past anything that might overflow.
OverflowingBinaryOperator *OBI = dyn_cast<OverflowingBinaryOperator>(Val);
@@ -47,19 +47,19 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
Offset = 0;
return I->getOperand(0);
}
-
+
if (I->getOpcode() == Instruction::Mul) {
// This value is scaled by 'RHS'.
Scale = RHS->getZExtValue();
Offset = 0;
return I->getOperand(0);
}
-
+
if (I->getOpcode() == Instruction::Add) {
- // We have X+C. Check to see if we really have (X*C2)+C1,
+ // We have X+C. Check to see if we really have (X*C2)+C1,
// where C1 is divisible by C2.
unsigned SubScale;
- Value *SubVal =
+ Value *SubVal =
DecomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset);
Offset += RHS->getZExtValue();
Scale = SubScale;
@@ -82,7 +82,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
if (!TD) return 0;
PointerType *PTy = cast<PointerType>(CI.getType());
-
+
BuilderTy AllocaBuilder(*Builder);
AllocaBuilder.SetInsertPoint(AI.getParent(), &AI);
@@ -110,7 +110,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
uint64_t ArrayOffset;
Value *NumElements = // See if the array size is a decomposable linear expr.
DecomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset);
-
+
// If we can now satisfy the modulus, by using a non-1 scale, we really can
// do the xform.
if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 ||
@@ -125,17 +125,17 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
// Insert before the alloca, not before the cast.
Amt = AllocaBuilder.CreateMul(Amt, NumElements);
}
-
+
if (uint64_t Offset = (AllocElTySize*ArrayOffset)/CastElTySize) {
Value *Off = ConstantInt::get(AI.getArraySize()->getType(),
Offset, true);
Amt = AllocaBuilder.CreateAdd(Amt, Off);
}
-
+
AllocaInst *New = AllocaBuilder.CreateAlloca(CastElTy, Amt);
New->setAlignment(AI.getAlignment());
New->takeName(&AI);
-
+
// If the allocation has multiple real uses, insert a cast and change all
// things that used it to use the new cast. This will also hack on CI, but it
// will die soon.
@@ -148,10 +148,10 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
return ReplaceInstUsesWith(CI, New);
}
-/// EvaluateInDifferentType - Given an expression that
+/// EvaluateInDifferentType - Given an expression that
/// CanEvaluateTruncated or CanEvaluateSExtd returns true for, actually
/// insert the code to evaluate the expression.
-Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
+Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
bool isSigned) {
if (Constant *C = dyn_cast<Constant>(V)) {
C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/);
@@ -181,7 +181,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
Res = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
break;
- }
+ }
case Instruction::Trunc:
case Instruction::ZExt:
case Instruction::SExt:
@@ -190,7 +190,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
// new.
if (I->getOperand(0)->getType() == Ty)
return I->getOperand(0);
-
+
// Otherwise, must be the same type of cast, so just reinsert a new one.
// This also handles the case of zext(trunc(x)) -> zext(x).
Res = CastInst::CreateIntegerCast(I->getOperand(0), Ty,
@@ -212,11 +212,11 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
Res = NPN;
break;
}
- default:
+ default:
// TODO: Can handle more cases here.
llvm_unreachable("Unreachable!");
}
-
+
Res->takeName(I);
return InsertNewInstWith(Res, *I);
}
@@ -224,7 +224,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
/// This function is a wrapper around CastInst::isEliminableCastPair. It
/// simply extracts arguments and returns what that function returns.
-static Instruction::CastOps
+static Instruction::CastOps
isEliminableCastPair(
const CastInst *CI, ///< The first cast instruction
unsigned opcode, ///< The opcode of the second cast instruction
@@ -238,19 +238,18 @@ isEliminableCastPair(
// Get the opcodes of the two Cast instructions
Instruction::CastOps firstOp = Instruction::CastOps(CI->getOpcode());
Instruction::CastOps secondOp = Instruction::CastOps(opcode);
-
unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy,
DstTy,
- TD ? TD->getIntPtrType(CI->getContext()) : 0);
-
+ TD ? TD->getIntPtrType(DstTy) : 0);
+
// We don't want to form an inttoptr or ptrtoint that converts to an integer
// type that differs from the pointer size.
if ((Res == Instruction::IntToPtr &&
- (!TD || SrcTy != TD->getIntPtrType(CI->getContext()))) ||
+ (!TD || SrcTy != TD->getIntPtrType(DstTy))) ||
(Res == Instruction::PtrToInt &&
- (!TD || DstTy != TD->getIntPtrType(CI->getContext()))))
+ (!TD || DstTy != TD->getIntPtrType(SrcTy))))
Res = 0;
-
+
return Instruction::CastOps(Res);
}
@@ -262,18 +261,18 @@ bool InstCombiner::ShouldOptimizeCast(Instruction::CastOps opc, const Value *V,
Type *Ty) {
// Noop casts and casts of constants should be eliminated trivially.
if (V->getType() == Ty || isa<Constant>(V)) return false;
-
+
// If this is another cast that can be eliminated, we prefer to have it
// eliminated.
if (const CastInst *CI = dyn_cast<CastInst>(V))
if (isEliminableCastPair(CI, opc, Ty, TD))
return false;
-
+
// If this is a vector sext from a compare, then we don't want to break the
// idiom where each element of the extended vector is either zero or all ones.
if (opc == Instruction::SExt && isa<CmpInst>(V) && Ty->isVectorTy())
return false;
-
+
return true;
}
@@ -285,7 +284,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
// Many cases of "cast of a cast" are eliminable. If it's eliminable we just
// eliminate it now.
if (CastInst *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast
- if (Instruction::CastOps opc =
+ if (Instruction::CastOps opc =
isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), TD)) {
// The first cast (CSrc) is eliminable so we need to fix up or replace
// the second cast (CI). CSrc will then have a good chance of being dead.
@@ -308,7 +307,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
if (Instruction *NV = FoldOpIntoPhi(CI))
return NV;
}
-
+
return 0;
}
@@ -327,15 +326,15 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty) {
// We can always evaluate constants in another type.
if (isa<Constant>(V))
return true;
-
+
Instruction *I = dyn_cast<Instruction>(V);
if (!I) return false;
-
+
Type *OrigTy = V->getType();
-
+
// If this is an extension from the dest type, we can eliminate it, even if it
// has multiple uses.
- if ((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
+ if ((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
I->getOperand(0)->getType() == Ty)
return true;
@@ -420,29 +419,29 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty) {
// TODO: Can handle more cases here.
break;
}
-
+
return false;
}
Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
if (Instruction *Result = commonCastTransforms(CI))
return Result;
-
- // See if we can simplify any instructions used by the input whose sole
+
+ // See if we can simplify any instructions used by the input whose sole
// purpose is to compute bits we don't care about.
if (SimplifyDemandedInstructionBits(CI))
return &CI;
-
+
Value *Src = CI.getOperand(0);
Type *DestTy = CI.getType(), *SrcTy = Src->getType();
-
+
// Attempt to truncate the entire input expression tree to the destination
// type. Only do this if the dest type is a simple type, don't convert the
// expression tree to something weird like i93 unless the source is also
// strange.
if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
CanEvaluateTruncated(Src, DestTy)) {
-
+
// If this cast is a truncate, evaluting in a different type always
// eliminates the cast, so it is always a win.
DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
@@ -459,7 +458,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
Value *Zero = Constant::getNullValue(Src->getType());
return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero);
}
-
+
// Transform trunc(lshr (zext A), Cst) to eliminate one type conversion.
Value *A = 0; ConstantInt *Cst = 0;
if (Src->hasOneUse() &&
@@ -469,7 +468,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
// ASize < MidSize and MidSize > ResultSize, but don't know the relation
// between ASize and ResultSize.
unsigned ASize = A->getType()->getPrimitiveSizeInBits();
-
+
// If the shift amount is larger than the size of A, then the result is
// known to be zero because all the input bits got shifted out.
if (Cst->getZExtValue() >= ASize)
@@ -482,7 +481,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
Shift->takeName(Src);
return CastInst::CreateIntegerCast(Shift, CI.getType(), false);
}
-
+
// Transform "trunc (and X, cst)" -> "and (trunc X), cst" so long as the dest
// type isn't non-native.
if (Src->hasOneUse() && isa<IntegerType>(Src->getType()) &&
@@ -505,7 +504,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
// cast to integer to avoid the comparison.
if (ConstantInt *Op1C = dyn_cast<ConstantInt>(ICI->getOperand(1))) {
const APInt &Op1CV = Op1C->getValue();
-
+
// zext (x <s 0) to i32 --> x>>u31 true if signbit set.
// zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear.
if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) ||
@@ -535,14 +534,14 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
// zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set.
// zext (X != 1) to i32 --> X^1 iff X has only the low bit set.
// zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
- if ((Op1CV == 0 || Op1CV.isPowerOf2()) &&
+ if ((Op1CV == 0 || Op1CV.isPowerOf2()) &&
// This only works for EQ and NE
ICI->isEquality()) {
// If Op1C some other power of two, convert:
uint32_t BitWidth = Op1C->getType()->getBitWidth();
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
ComputeMaskedBits(ICI->getOperand(0), KnownZero, KnownOne);
-
+
APInt KnownZeroMask(~KnownZero);
if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1?
if (!DoXform) return ICI;
@@ -556,7 +555,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
Res = ConstantExpr::getZExt(Res, CI.getType());
return ReplaceInstUsesWith(CI, Res);
}
-
+
uint32_t ShiftAmt = KnownZeroMask.logBase2();
Value *In = ICI->getOperand(0);
if (ShiftAmt) {
@@ -565,12 +564,12 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
In = Builder->CreateLShr(In, ConstantInt::get(In->getType(),ShiftAmt),
In->getName()+".lobit");
}
-
+
if ((Op1CV != 0) == isNE) { // Toggle the low bit.
Constant *One = ConstantInt::get(In->getType(), 1);
In = Builder->CreateXor(In, One);
}
-
+
if (CI.getType() == In->getType())
return ReplaceInstUsesWith(CI, In);
return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/);
@@ -643,19 +642,19 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) {
BitsToClear = 0;
if (isa<Constant>(V))
return true;
-
+
Instruction *I = dyn_cast<Instruction>(V);
if (!I) return false;
-
+
// If the input is a truncate from the destination type, we can trivially
// eliminate it.
if (isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty)
return true;
-
+
// We can't extend or shrink something that has multiple uses: doing so would
// require duplicating the instruction in general, which isn't profitable.
if (!I->hasOneUse()) return false;
-
+
unsigned Opc = I->getOpcode(), Tmp;
switch (Opc) {
case Instruction::ZExt: // zext(zext(x)) -> zext(x).
@@ -675,7 +674,7 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) {
// These can all be promoted if neither operand has 'bits to clear'.
if (BitsToClear == 0 && Tmp == 0)
return true;
-
+
// If the operation is an AND/OR/XOR and the bits to clear are zero in the
// other side, BitsToClear is ok.
if (Tmp == 0 &&
@@ -688,10 +687,10 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) {
APInt::getHighBitsSet(VSize, BitsToClear)))
return true;
}
-
+
// Otherwise, we don't know how to analyze this BitsToClear case yet.
return false;
-
+
case Instruction::LShr:
// We can promote lshr(x, cst) if we can promote x. This requires the
// ultimate 'and' to clear out the high zero bits we're clearing out though.
@@ -713,7 +712,7 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) {
Tmp != BitsToClear)
return false;
return true;
-
+
case Instruction::PHI: {
// We can change a phi if we can change all operands. Note that we never
// get into trouble with cyclic PHIs here because we only consider
@@ -740,44 +739,44 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
// eliminated before we try to optimize this zext.
if (CI.hasOneUse() && isa<TruncInst>(CI.use_back()))
return 0;
-
+
// If one of the common conversion will work, do it.
if (Instruction *Result = commonCastTransforms(CI))
return Result;
- // See if we can simplify any instructions used by the input whose sole
+ // See if we can simplify any instructions used by the input whose sole
// purpose is to compute bits we don't care about.
if (SimplifyDemandedInstructionBits(CI))
return &CI;
-
+
Value *Src = CI.getOperand(0);
Type *SrcTy = Src->getType(), *DestTy = CI.getType();
-
+
// Attempt to extend the entire input expression tree to the destination
// type. Only do this if the dest type is a simple type, don't convert the
// expression tree to something weird like i93 unless the source is also
// strange.
unsigned BitsToClear;
if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
- CanEvaluateZExtd(Src, DestTy, BitsToClear)) {
+ CanEvaluateZExtd(Src, DestTy, BitsToClear)) {
assert(BitsToClear < SrcTy->getScalarSizeInBits() &&
"Unreasonable BitsToClear");
-
+
// Okay, we can transform this! Insert the new expression now.
DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
" to avoid zero extend: " << CI);
Value *Res = EvaluateInDifferentType(Src, DestTy, false);
assert(Res->getType() == DestTy);
-
+
uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits()-BitsToClear;
uint32_t DestBitSize = DestTy->getScalarSizeInBits();
-
+
// If the high bits are already filled with zeros, just replace this
// cast with the result.
if (MaskedValueIsZero(Res, APInt::getHighBitsSet(DestBitSize,
DestBitSize-SrcBitsKept)))
return ReplaceInstUsesWith(CI, Res);
-
+
// We need to emit an AND to clear the high bits.
Constant *C = ConstantInt::get(Res->getType(),
APInt::getLowBitsSet(DestBitSize, SrcBitsKept));
@@ -789,7 +788,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
// 'and' which will be much cheaper than the pair of casts.
if (TruncInst *CSrc = dyn_cast<TruncInst>(Src)) { // A->B->C cast
// TODO: Subsume this into EvaluateInDifferentType.
-
+
// Get the sizes of the types involved. We know that the intermediate type
// will be smaller than A or C, but don't know the relation between A and C.
Value *A = CSrc->getOperand(0);
@@ -806,7 +805,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
Value *And = Builder->CreateAnd(A, AndConst, CSrc->getName()+".mask");
return new ZExtInst(And, CI.getType());
}
-
+
if (SrcSize == DstSize) {
APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(),
@@ -815,7 +814,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
if (SrcSize > DstSize) {
Value *Trunc = Builder->CreateTrunc(A, CI.getType());
APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
- return BinaryOperator::CreateAnd(Trunc,
+ return BinaryOperator::CreateAnd(Trunc,
ConstantInt::get(Trunc->getType(),
AndValue));
}
@@ -873,7 +872,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
Value *New = Builder->CreateZExt(X, CI.getType());
return BinaryOperator::CreateXor(New, ConstantInt::get(CI.getType(), 1));
}
-
+
return 0;
}
@@ -986,14 +985,14 @@ static bool CanEvaluateSExtd(Value *V, Type *Ty) {
// If this is a constant, it can be trivially promoted.
if (isa<Constant>(V))
return true;
-
+
Instruction *I = dyn_cast<Instruction>(V);
if (!I) return false;
-
+
// If this is a truncate from the dest type, we can trivially eliminate it.
if (isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty)
return true;
-
+
// We can't extend or shrink something that has multiple uses: doing so would
// require duplicating the instruction in general, which isn't profitable.
if (!I->hasOneUse()) return false;
@@ -1012,14 +1011,14 @@ static bool CanEvaluateSExtd(Value *V, Type *Ty) {
// These operators can all arbitrarily be extended if their inputs can.
return CanEvaluateSExtd(I->getOperand(0), Ty) &&
CanEvaluateSExtd(I->getOperand(1), Ty);
-
+
//case Instruction::Shl: TODO
//case Instruction::LShr: TODO
-
+
case Instruction::Select:
return CanEvaluateSExtd(I->getOperand(1), Ty) &&
CanEvaluateSExtd(I->getOperand(2), Ty);
-
+
case Instruction::PHI: {
// We can change a phi if we can change all operands. Note that we never
// get into trouble with cyclic PHIs here because we only consider
@@ -1033,7 +1032,7 @@ static bool CanEvaluateSExtd(Value *V, Type *Ty) {
// TODO: Can handle more cases here.
break;
}
-
+
return false;
}
@@ -1042,15 +1041,15 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
// eliminated before we try to optimize this zext.
if (CI.hasOneUse() && isa<TruncInst>(CI.use_back()))
return 0;
-
+
if (Instruction *I = commonCastTransforms(CI))
return I;
-
- // See if we can simplify any instructions used by the input whose sole
+
+ // See if we can simplify any instructions used by the input whose sole
// purpose is to compute bits we don't care about.
if (SimplifyDemandedInstructionBits(CI))
return &CI;
-
+
Value *Src = CI.getOperand(0);
Type *SrcTy = Src->getType(), *DestTy = CI.getType();
@@ -1073,7 +1072,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
// cast with the result.
if (ComputeNumSignBits(Res) > DestBitSize - SrcBitSize)
return ReplaceInstUsesWith(CI, Res);
-
+
// We need to emit a shl + ashr to do the sign extend.
Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize);
return BinaryOperator::CreateAShr(Builder->CreateShl(Res, ShAmt, "sext"),
@@ -1086,7 +1085,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
if (TI->hasOneUse() && TI->getOperand(0)->getType() == DestTy) {
uint32_t SrcBitSize = SrcTy->getScalarSizeInBits();
uint32_t DestBitSize = DestTy->getScalarSizeInBits();
-
+
// We need to emit a shl + ashr to do the sign extend.
Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize);
Value *Res = Builder->CreateShl(TI->getOperand(0), ShAmt, "sext");
@@ -1122,7 +1121,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
A = Builder->CreateShl(A, ShAmtV, CI.getName());
return BinaryOperator::CreateAShr(A, ShAmtV);
}
-
+
return 0;
}
@@ -1144,7 +1143,7 @@ static Value *LookThroughFPExtensions(Value *V) {
if (Instruction *I = dyn_cast<Instruction>(V))
if (I->getOpcode() == Instruction::FPExt)
return LookThroughFPExtensions(I->getOperand(0));
-
+
// If this value is a constant, return the constant in the smallest FP type
// that can accurately represent it. This allows us to turn
// (float)((double)X+2.0) into x+2.0f.
@@ -1163,14 +1162,14 @@ static Value *LookThroughFPExtensions(Value *V) {
return V;
// Don't try to shrink to various long double types.
}
-
+
return V;
}
Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
if (Instruction *I = commonCastTransforms(CI))
return I;
-
+
// If we have fptrunc(fadd (fpextend x), (fpextend y)), where x and y are
// smaller than the destination type, we can eliminate the truncate by doing
// the add as the smaller type. This applies to fadd/fsub/fmul/fdiv as well
@@ -1187,7 +1186,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
Type *SrcTy = OpI->getType();
Value *LHSTrunc = LookThroughFPExtensions(OpI->getOperand(0));
Value *RHSTrunc = LookThroughFPExtensions(OpI->getOperand(1));
- if (LHSTrunc->getType() != SrcTy &&
+ if (LHSTrunc->getType() != SrcTy &&
RHSTrunc->getType() != SrcTy) {
unsigned DstSize = CI.getType()->getScalarSizeInBits();
// If the source types were both smaller than the destination type of
@@ -1199,10 +1198,10 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
return BinaryOperator::Create(OpI->getOpcode(), LHSTrunc, RHSTrunc);
}
}
- break;
+ break;
}
}
-
+
// Fold (fptrunc (sqrt (fpext x))) -> (sqrtf x)
CallInst *Call = dyn_cast<CallInst>(CI.getOperand(0));
if (Call && Call->getCalledFunction() && TLI->has(LibFunc::sqrtf) &&
@@ -1217,7 +1216,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
Arg->getOperand(0)->getType()->isFloatTy()) {
Function *Callee = Call->getCalledFunction();
Module *M = CI.getParent()->getParent()->getParent();
- Constant *SqrtfFunc = M->getOrInsertFunction("sqrtf",
+ Constant *SqrtfFunc = M->getOrInsertFunction("sqrtf",
Callee->getAttributes(),
Builder->getFloatTy(),
Builder->getFloatTy(),
@@ -1225,15 +1224,15 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
CallInst *ret = CallInst::Create(SqrtfFunc, Arg->getOperand(0),
"sqrtfcall");
ret->setAttributes(Callee->getAttributes());
-
-
+
+
// Remove the old Call. With -fmath-errno, it won't get marked readnone.
ReplaceInstUsesWith(*Call, UndefValue::get(Call->getType()));
EraseInstFromFunction(*Call);
return ret;
}
}
-
+
return 0;
}
@@ -1251,7 +1250,7 @@ Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) {
// This is safe if the intermediate type has enough bits in its mantissa to
// accurately represent all values of X. For example, do not do this with
// i64->float->i64. This is also safe for sitofp case, because any negative
- // 'X' value would cause an undefined result for the fptoui.
+ // 'X' value would cause an undefined result for the fptoui.
if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&
OpI->getOperand(0)->getType() == FI.getType() &&
(int)FI.getType()->getScalarSizeInBits() < /*extra bit for sign */
@@ -1265,19 +1264,19 @@ Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) {
Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0));
if (OpI == 0)
return commonCastTransforms(FI);
-
+
// fptosi(sitofp(X)) --> X
// fptosi(uitofp(X)) --> X
// This is safe if the intermediate type has enough bits in its mantissa to
// accurately represent all values of X. For example, do not do this with
// i64->float->i64. This is also safe for sitofp case, because any negative
- // 'X' value would cause an undefined result for the fptoui.
+ // 'X' value would cause an undefined result for the fptoui.
if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&
OpI->getOperand(0)->getType() == FI.getType() &&
(int)FI.getType()->getScalarSizeInBits() <=
OpI->getType()->getFPMantissaWidth())
return ReplaceInstUsesWith(FI, OpI->getOperand(0));
-
+
return commonCastTransforms(FI);
}
@@ -1298,17 +1297,17 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
if (CI.getOperand(0)->getType()->getScalarSizeInBits() >
TD->getPointerSizeInBits(AS)) {
Value *P = Builder->CreateTrunc(CI.getOperand(0),
- TD->getIntPtrType(CI.getContext()));
+ TD->getIntPtrType(CI.getType()));
return new IntToPtrInst(P, CI.getType());
}
if (CI.getOperand(0)->getType()->getScalarSizeInBits() <
TD->getPointerSizeInBits(AS)) {
Value *P = Builder->CreateZExt(CI.getOperand(0),
- TD->getIntPtrType(CI.getContext()));
+ TD->getIntPtrType(CI.getType()));
return new IntToPtrInst(P, CI.getType());
}
}
-
+
if (Instruction *I = commonCastTransforms(CI))
return I;
@@ -1318,19 +1317,19 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
/// @brief Implement the transforms for cast of pointer (bitcast/ptrtoint)
Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
Value *Src = CI.getOperand(0);
-
+
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Src)) {
// If casting the result of a getelementptr instruction with no offset, turn
// this into a cast of the original pointer!
if (GEP->hasAllZeroIndices()) {
// Changing the cast operand is usually not a good idea but it is safe
- // here because the pointer operand is being replaced with another
+ // here because the pointer operand is being replaced with another
// pointer operand so the opcode doesn't need to change.
Worklist.Add(GEP);
CI.setOperand(0, GEP->getOperand(0));
return &CI;
}
-
+
// If the GEP has a single use, and the base pointer is a bitcast, and the
// GEP computes a constant offset, see if we can convert these three
// instructions into fewer. This typically happens with unions and other
@@ -1345,7 +1344,8 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
Type *GEPIdxTy =
cast<PointerType>(OrigBase->getType())->getElementType();
SmallVector<Value*, 8> NewIndices;
- if (FindElementAtOffset(GEPIdxTy, Offset, NewIndices)) {
+ Type *IntPtrTy = TD->getIntPtrType(OrigBase->getType());
+ if (FindElementAtOffset(GEPIdxTy, Offset, IntPtrTy, NewIndices)) {
// If we were able to index down into an element, create the GEP
// and bitcast the result. This eliminates one bitcast, potentially
// two.
@@ -1353,15 +1353,15 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
Builder->CreateInBoundsGEP(OrigBase, NewIndices) :
Builder->CreateGEP(OrigBase, NewIndices);
NGEP->takeName(GEP);
-
+
if (isa<BitCastInst>(CI))
return new BitCastInst(NGEP, CI.getType());
assert(isa<PtrToIntInst>(CI));
return new PtrToIntInst(NGEP, CI.getType());
- }
+ }
}
}
-
+
return commonCastTransforms(CI);
}
@@ -1373,16 +1373,16 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
if (TD) {
if (CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits(AS)) {
Value *P = Builder->CreatePtrToInt(CI.getOperand(0),
- TD->getIntPtrType(CI.getContext()));
+ TD->getIntPtrType(CI.getContext(), AS));
return new TruncInst(P, CI.getType());
}
if (CI.getType()->getScalarSizeInBits() > TD->getPointerSizeInBits(AS)) {
Value *P = Builder->CreatePtrToInt(CI.getOperand(0),
- TD->getIntPtrType(CI.getContext()));
+ TD->getIntPtrType(CI.getContext(), AS));
return new ZExtInst(P, CI.getType());
}
}
-
+
return commonPointerCastTransforms(CI);
}
@@ -1397,33 +1397,33 @@ static Instruction *OptimizeVectorResize(Value *InVal, VectorType *DestTy,
// element size, or the input is a multiple of the output element size.
// Convert the input type to have the same element type as the output.
VectorType *SrcTy = cast<VectorType>(InVal->getType());
-
+
if (SrcTy->getElementType() != DestTy->getElementType()) {
// The input types don't need to be identical, but for now they must be the
// same size. There is no specific reason we couldn't handle things like
// <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten
- // there yet.
+ // there yet.
if (SrcTy->getElementType()->getPrimitiveSizeInBits() !=
DestTy->getElementType()->getPrimitiveSizeInBits())
return 0;
-
+
SrcTy = VectorType::get(DestTy->getElementType(), SrcTy->getNumElements());
InVal = IC.Builder->CreateBitCast(InVal, SrcTy);
}
-
+
// Now that the element types match, get the shuffle mask and RHS of the
// shuffle to use, which depends on whether we're increasing or decreasing the
// size of the input.
SmallVector<uint32_t, 16> ShuffleMask;
Value *V2;
-
+
if (SrcTy->getNumElements() > DestTy->getNumElements()) {
// If we're shrinking the number of elements, just shuffle in the low
// elements from the input and use undef as the second shuffle input.
V2 = UndefValue::get(SrcTy);
for (unsigned i = 0, e = DestTy->getNumElements(); i != e; ++i)
ShuffleMask.push_back(i);
-
+
} else {
// If we're increasing the number of elements, shuffle in all of the
// elements from InVal and fill the rest of the result elements with zeros
@@ -1437,7 +1437,7 @@ static Instruction *OptimizeVectorResize(Value *InVal, VectorType *DestTy,
for (unsigned i = 0, e = DestTy->getNumElements()-SrcElts; i != e; ++i)
ShuffleMask.push_back(SrcElts);
}
-
+
return new ShuffleVectorInst(InVal, V2,
ConstantDataVector::get(V2->getContext(),
ShuffleMask));
@@ -1464,7 +1464,7 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
Type *VecEltTy) {
// Undef values never contribute useful bits to the result.
if (isa<UndefValue>(V)) return true;
-
+
// If we got down to a value of the right type, we win, try inserting into the
// right element.
if (V->getType() == VecEltTy) {
@@ -1472,15 +1472,15 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
if (Constant *C = dyn_cast<Constant>(V))
if (C->isNullValue())
return true;
-
+
// Fail if multiple elements are inserted into this slot.
if (ElementIndex >= Elements.size() || Elements[ElementIndex] != 0)
return false;
-
+
Elements[ElementIndex] = V;
return true;
}
-
+
if (Constant *C = dyn_cast<Constant>(V)) {
// Figure out the # elements this provides, and bitcast it or slice it up
// as required.
@@ -1491,7 +1491,7 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
if (NumElts == 1)
return CollectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy),
ElementIndex, Elements, VecEltTy);
-
+
// Okay, this is a constant that covers multiple elements. Slice it up into
// pieces and insert each element-sized piece into the vector.
if (!isa<IntegerType>(C->getType()))
@@ -1499,7 +1499,7 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
C->getType()->getPrimitiveSizeInBits()));
unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits();
Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
-
+
for (unsigned i = 0; i != NumElts; ++i) {
Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(),
i*ElementSize));
@@ -1509,23 +1509,23 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
}
return true;
}
-
+
if (!V->hasOneUse()) return false;
-
+
Instruction *I = dyn_cast<Instruction>(V);
if (I == 0) return false;
switch (I->getOpcode()) {
default: return false; // Unhandled case.
case Instruction::BitCast:
return CollectInsertionElements(I->getOperand(0), ElementIndex,
- Elements, VecEltTy);
+ Elements, VecEltTy);
case Instruction::ZExt:
if (!isMultipleOfTypeSize(
I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
VecEltTy))
return false;
return CollectInsertionElements(I->getOperand(0), ElementIndex,
- Elements, VecEltTy);
+ Elements, VecEltTy);
case Instruction::Or:
return CollectInsertionElements(I->getOperand(0), ElementIndex,
Elements, VecEltTy) &&
@@ -1537,11 +1537,11 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
if (CI == 0) return false;
if (!isMultipleOfTypeSize(CI->getZExtValue(), VecEltTy)) return false;
unsigned IndexShift = getTypeSizeIndex(CI->getZExtValue(), VecEltTy);
-
+
return CollectInsertionElements(I->getOperand(0), ElementIndex+IndexShift,
Elements, VecEltTy);
}
-
+
}
}
@@ -1576,11 +1576,11 @@ static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
Value *Result = Constant::getNullValue(CI.getType());
for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
if (Elements[i] == 0) continue; // Unset element.
-
+
Result = IC.Builder->CreateInsertElement(Result, Elements[i],
IC.Builder->getInt32(i));
}
-
+
return Result;
}
@@ -1608,11 +1608,11 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
VecTy->getPrimitiveSizeInBits() / DestWidth);
VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
}
-
+
return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(0));
}
}
-
+
// bitcast(trunc(lshr(bitcast(somevector), cst))
ConstantInt *ShAmt = 0;
if (match(Src, m_Trunc(m_LShr(m_BitCast(m_Value(VecInput)),
@@ -1629,7 +1629,7 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
VecTy->getPrimitiveSizeInBits() / DestWidth);
VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
}
-
+
unsigned Elt = ShAmt->getZExtValue() / DestWidth;
return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
}
@@ -1653,12 +1653,12 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
PointerType *SrcPTy = cast<PointerType>(SrcTy);
Type *DstElTy = DstPTy->getElementType();
Type *SrcElTy = SrcPTy->getElementType();
-
+
// If the address spaces don't match, don't eliminate the bitcast, which is
// required for changing types.
if (SrcPTy->getAddressSpace() != DstPTy->getAddressSpace())
return 0;
-
+
// If we are casting a alloca to a pointer to a type of the same
// size, rewrite the allocation instruction to allocate the "right" type.
// There is no need to modify malloc calls because it is their bitcast that
@@ -1666,14 +1666,14 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
if (AllocaInst *AI = dyn_cast<AllocaInst>(Src))
if (Instruction *V = PromoteCastOfAllocation(CI, *AI))
return V;
-
+
// If the source and destination are pointers, and this cast is equivalent
// to a getelementptr X, 0, 0, 0... turn it into the appropriate gep.
// This can enhance SROA and other transforms that want type-safe pointers.
Constant *ZeroUInt =
Constant::getNullValue(Type::getInt32Ty(CI.getContext()));
unsigned NumZeros = 0;
- while (SrcElTy != DstElTy &&
+ while (SrcElTy != DstElTy &&
isa<CompositeType>(SrcElTy) && !SrcElTy->isPointerTy() &&
SrcElTy->getNumContainedTypes() /* not "{}" */) {
SrcElTy = cast<CompositeType>(SrcElTy)->getTypeAtIndex(ZeroUInt);
@@ -1686,7 +1686,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
return GetElementPtrInst::CreateInBounds(Src, Idxs);
}
}
-
+
// Try to optimize int -> float bitcasts.
if ((DestTy->isFloatTy() || DestTy->isDoubleTy()) && isa<IntegerType>(SrcTy))
if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this))
@@ -1699,7 +1699,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
// FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast)
}
-
+
if (isa<IntegerType>(SrcTy)) {
// If this is a cast from an integer to vector, check to see if the input
// is a trunc or zext of a bitcast from vector. If so, we can replace all
@@ -1712,7 +1712,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
cast<VectorType>(DestTy), *this))
return I;
}
-
+
// If the input is an 'or' instruction, we may be doing shifts and ors to
// assemble the elements of the vector manually. Try to rip the code out
// and replace it with insertelements.
@@ -1723,7 +1723,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
if (VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) {
if (SrcVTy->getNumElements() == 1 && !DestTy->isVectorTy()) {
- Value *Elem =
+ Value *Elem =
Builder->CreateExtractElement(Src,
Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
return CastInst::Create(Instruction::BitCast, Elem, DestTy);
@@ -1733,7 +1733,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(Src)) {
// Okay, we have (bitcast (shuffle ..)). Check to see if this is
// a bitcast to a vector with the same # elts.
- if (SVI->hasOneUse() && DestTy->isVectorTy() &&
+ if (SVI->hasOneUse() && DestTy->isVectorTy() &&
cast<VectorType>(DestTy)->getNumElements() ==
SVI->getType()->getNumElements() &&
SVI->getType()->getNumElements() ==
@@ -1742,9 +1742,9 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
// If either of the operands is a cast from CI.getType(), then
// evaluating the shuffle in the casted destination's type will allow
// us to eliminate at least one cast.
- if (((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(0))) &&
+ if (((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(0))) &&
Tmp->getOperand(0)->getType() == DestTy) ||
- ((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(1))) &&
+ ((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(1))) &&
Tmp->getOperand(0)->getType() == DestTy)) {
Value *LHS = Builder->CreateBitCast(SVI->getOperand(0), DestTy);
Value *RHS = Builder->CreateBitCast(SVI->getOperand(1), DestTy);
@@ -1754,7 +1754,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
}
}
}
-
+
if (SrcTy->isPointerTy())
return commonPointerCastTransforms(CI);
return commonCastTransforms(CI);
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index e3e5ddae80..055c3b1514 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -371,7 +371,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
// an inbounds GEP because the index can't be out of range.
if (!GEP->isInBounds() &&
Idx->getType()->getPrimitiveSizeInBits() > TD->getPointerSizeInBits(AS))
- Idx = Builder->CreateTrunc(Idx, TD->getIntPtrType(Idx->getContext()));
+ Idx = Builder->CreateTrunc(Idx, TD->getIntPtrType(Idx->getContext(), AS));
// If the comparison is only true for one or two elements, emit direct
// comparisons.
@@ -539,7 +539,7 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
// we don't need to bother extending: the extension won't affect where the
// computation crosses zero.
if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) {
- Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext());
+ Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext(), AS);
VariableIdx = IC.Builder->CreateTrunc(VariableIdx, IntPtrTy);
}
return VariableIdx;
@@ -561,7 +561,7 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
return 0;
// Okay, we can do this evaluation. Start by converting the index to intptr.
- Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext());
+ Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext(), AS);
if (VariableIdx->getType() != IntPtrTy)
VariableIdx = IC.Builder->CreateIntCast(VariableIdx, IntPtrTy,
true /*Signed*/);
@@ -1554,8 +1554,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
// Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the
// integer type is the same size as the pointer type.
if (TD && LHSCI->getOpcode() == Instruction::PtrToInt &&
- TD->getPointerSizeInBits(
- cast<PtrToIntInst>(LHSCI)->getPointerAddressSpace()) ==
+ TD->getTypeSizeInBits(DestTy) ==
cast<IntegerType>(DestTy)->getBitWidth()) {
Value *RHSOp = 0;
if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1))) {
@@ -2251,7 +2250,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
case Instruction::IntToPtr:
// icmp pred inttoptr(X), null -> icmp pred X, 0
if (RHSC->isNullValue() && TD &&
- TD->getIntPtrType(RHSC->getContext()) ==
+ TD->getIntPtrType(LHSI->getType()) ==
LHSI->getOperand(0)->getType())
return new ICmpInst(I.getPredicate(), LHSI->getOperand(0),
Constant::getNullValue(LHSI->getOperand(0)->getType()));
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 4ab5b6e4a0..633ad93ad9 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -173,7 +173,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
// Ensure that the alloca array size argument has type intptr_t, so that
// any casting is exposed early.
if (TD) {
- Type *IntPtrTy = TD->getIntPtrType(AI.getContext());
+ Type *IntPtrTy = TD->getIntPtrType(AI.getType());
if (AI.getArraySize()->getType() != IntPtrTy) {
Value *V = Builder->CreateIntCast(AI.getArraySize(),
IntPtrTy, false);
@@ -185,7 +185,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
// Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
if (AI.isArrayAllocation()) { // Check C != 1
if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
- Type *NewTy =
+ Type *NewTy =
ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName());
New->setAlignment(AI.getAlignment());
@@ -311,7 +311,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
Type *SrcPTy = SrcTy->getElementType();
- if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() ||
+ if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() ||
DestPTy->isVectorTy()) {
// If the source is an array, the code below will not succeed. Check to
// see if a trivial 'gep P, 0, 0' will help matters. Only do this for
@@ -328,7 +328,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
}
if (IC.getDataLayout() &&
- (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() ||
+ (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() ||
SrcPTy->isVectorTy()) &&
// Do not allow turning this into a load of an integer, which is then
// casted to a pointer, this pessimizes pointer analysis a lot.
@@ -339,7 +339,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
// Okay, we are casting from one integer or pointer type to another of
// the same size. Instead of casting the pointer before the load, cast
// the result of the loaded value.
- LoadInst *NewLoad =
+ LoadInst *NewLoad =
IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName());
NewLoad->setAlignment(LI.getAlignment());
NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope());
@@ -376,7 +376,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
// None of the following transforms are legal for volatile/atomic loads.
// FIXME: Some of it is okay for atomic loads; needs refactoring.
if (!LI.isSimple()) return 0;
-
+
// Do really simple store-to-load forwarding and load CSE, to catch cases
// where there are several consecutive memory accesses to the same location,
// separated by a few arithmetic operations.
@@ -397,7 +397,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
Constant::getNullValue(Op->getType()), &LI);
return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
}
- }
+ }
// load null/undef -> unreachable
// TODO: Consider a target hook for valid address spaces for this xform.
@@ -416,7 +416,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
if (CE->isCast())
if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
return Res;
-
+
if (Op->hasOneUse()) {
// Change select and PHI nodes to select values instead of addresses: this
// helps alias analysis out a lot, allows many others simplifications, and
@@ -470,18 +470,18 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
Type *DestPTy = cast<PointerType>(CI->getType())->getElementType();
PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType());
if (SrcTy == 0) return 0;
-
+
Type *SrcPTy = SrcTy->getElementType();
if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy())
return 0;
-
+
/// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep"
/// to its first element. This allows us to handle things like:
/// store i32 xxx, (bitcast {foo*, float}* %P to i32*)
/// on 32-bit hosts.
SmallVector<Value*, 4> NewGEPIndices;
-
+
// If the source is an array, the code below will not succeed. Check to
// see if a trivial 'gep P, 0, 0' will help matters. Only do this for
// constants.
@@ -489,7 +489,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
// Index through pointer.
Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext()));
NewGEPIndices.push_back(Zero);
-
+
while (1) {
if (StructType *STy = dyn_cast<StructType>(SrcPTy)) {
if (!STy->getNumElements()) /* Struct can be empty {} */
@@ -503,24 +503,23 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
break;
}
}
-
+
SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace());
}
if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy())
return 0;
-
+
// If the pointers point into different address spaces or if they point to
// values with different sizes, we can't do the transformation.
if (!IC.getDataLayout() ||
- SrcTy->getAddressSpace() !=
- cast<PointerType>(CI->getType())->getAddressSpace() ||
+ SrcTy->getAddressSpace() != CI->getType()->getPointerAddressSpace() ||
IC.getDataLayout()->getTypeSizeInBits(SrcPTy) !=
IC.getDataLayout()->getTypeSizeInBits(DestPTy))
return 0;
// Okay, we are casting from one integer or pointer type to another of
- // the same size. Instead of casting the pointer before
+ // the same size. Instead of casting the pointer before
// the store, cast the value to be stored.
Value *NewCast;
Value *SIOp0 = SI.getOperand(0);
@@ -534,12 +533,12 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
if (SIOp0->getType()->isPointerTy())
opcode = Instruction::PtrToInt;
}
-
+
// SIOp0 is a pointer to aggregate and this is a store to the first field,
// emit a GEP to index into its first field.
if (!NewGEPIndices.empty())
CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices);
-
+
NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy,
SIOp0->getName()+".c");
SI.setOperand(0, NewCast);
@@ -558,7 +557,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
static bool equivalentAddressValues(Value *A, Value *B) {
// Test if the values are trivially equivalent.
if (A == B) return true;
-
+
// Test if the values come form identical arithmetic instructions.
// This uses isIdenticalToWhenDefined instead of isIdenticalTo because
// its only used to compare two uses within the same basic block, which
@@ -571,7 +570,7 @@ static bool equivalentAddressValues(Value *A, Value *B) {
if (Instruction *BI = dyn_cast<Instruction>(B))
if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
return true;
-
+
// Otherwise they may not be equivalent.
return false;
}
@@ -602,7 +601,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
// If the RHS is an alloca with a single use, zapify the store, making the
// alloca dead.
if (Ptr->hasOneUse()) {
- if (isa<AllocaInst>(Ptr))
+ if (isa<AllocaInst>(Ptr))
return EraseInstFromFunction(SI);
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
if (isa<AllocaInst>(GEP->getOperand(0))) {
@@ -625,8 +624,8 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
(isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
ScanInsts++;
continue;
- }
-
+ }
+
if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
// Prev store isn't volatile, and stores to the same location?
if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1),
@@ -638,7 +637,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
}
break;
}
-
+
// If this is a load, we have to stop. However, if the loaded value is from
// the pointer we're loading and is producing the pointer we're storing,
// then *this* store is dead (X = load P; store X -> P).
@@ -646,12 +645,12 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) &&
LI->isSimple())
return EraseInstFromFunction(SI);
-
+
// Otherwise, this is a load from some other location. Stores before it
// may not be dead.
break;
}
-
+
// Don't skip over loads or things that can modify memory.
if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory())
break;
@@ -681,11 +680,11 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
if (Instruction *Res = InstCombineStoreToCast(*this, SI))
return Res;
-
+
// If this store is the last instruction in the basic block (possibly
// excepting debug info instructions), and if the block ends with an
// unconditional branch, try to move it to the successor block.
- BBI = &SI;
+ BBI = &SI;
do {
++BBI;
} while (isa<DbgInfoIntrinsic>(BBI) ||
@@ -694,7 +693,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
if (BI->isUnconditional())
if (SimplifyStoreAtEndOfBlock(SI))
return 0; // xform done!
-
+
return 0;
}
@@ -708,12 +707,12 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
///
bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
BasicBlock *StoreBB = SI.getParent();
-
+
// Check to see if the successor block has exactly two incoming edges. If
// so, see if the other predecessor contains a store to the same location.
// if so, insert a PHI node (if needed) and move the stores down.
BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
-
+
// Determine whether Dest has exactly two predecessors and, if so, compute
// the other predecessor.
pred_iterator PI = pred_begin(DestBB);
@@ -725,7 +724,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
if (++PI == pred_end(DestBB))
return false;
-
+
P = *PI;
if (P != StoreBB) {
if (OtherBB)
@@ -745,7 +744,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
if (!OtherBr || BBI == OtherBB->begin())
return false;
-
+
// If the other block ends in an unconditional branch, check for the 'if then
// else' case. there is an instruction before the branch.
StoreInst *OtherStore = 0;
@@ -767,10 +766,10 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
} else {
// Otherwise, the other block ended with a conditional branch. If one of the
// destinations is StoreBB, then we have the if/then case.
- if (OtherBr->getSuccessor(0) != StoreBB &&
+ if (OtherBr->getSuccessor(0) != StoreBB &&
OtherBr->getSuccessor(1) != StoreBB)
return false;
-
+
// Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
// if/then triangle. See if there is a store to the same ptr as SI that
// lives in OtherBB.
@@ -788,7 +787,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
BBI == OtherBB->begin())
return false;
}
-
+
// In order to eliminate the store in OtherBr, we have to
// make sure nothing reads or overwrites the stored value in
// StoreBB.
@@ -798,7 +797,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
return false;
}
}
-
+
// Insert a PHI node now if we need it.
Value *MergedVal = OtherStore->getOperand(0);
if (MergedVal != SI.getOperand(0)) {
@@ -807,7 +806,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
PN->addIncoming(OtherStore->getOperand(0), OtherBB);
MergedVal = InsertNewInstBefore(PN, DestBB->front());
}
-
+
// Advance to a place where it is safe to insert the new store and
// insert it.
BBI = DestBB->getFirstInsertionPt();
@@ -817,7 +816,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
SI.getOrdering(),
SI.getSynchScope());
InsertNewInstBefore(NewSI, *BBI);
- NewSI->setDebugLoc(OtherStore->getDebugLoc());
+ NewSI->setDebugLoc(OtherStore->getDebugLoc());
// Nuke the old stores.
EraseInstFromFunction(SI);
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 7f8c3ae558..00b7fca681 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -738,7 +738,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
/// or not there is a sequence of GEP indices into the type that will land us at
/// the specified offset. If so, fill them into NewIndices and return the
/// resultant element type, otherwise return null.
-Type *InstCombiner::FindElementAtOffset(Type *Ty, int64_t Offset,
+Type *InstCombiner::FindElementAtOffset(Type *Ty, int64_t Offset, Type *IntPtrTy,
SmallVectorImpl<Value*> &NewIndices) {
if (!TD) return 0;
if (!Ty->isSized()) return 0;
@@ -746,7 +746,6 @@ Type *InstCombiner::FindElementAtOffset(Type *Ty, int64_t Offset,
// Start with the index over the outer type. Note that the type size
// might be zero (even if the offset isn't zero) if the indexed type
// is something like [0 x {int, int}]
- Type *IntPtrTy = TD->getIntPtrType(Ty->getContext());
int64_t FirstIdx = 0;
if (int64_t TySize = TD->getTypeAllocSize(Ty)) {
FirstIdx = Offset/TySize;
@@ -1055,7 +1054,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// by multiples of a zero size type with zero.
if (TD) {
bool MadeChange = false;
- Type *IntPtrTy = TD->getIntPtrType(GEP.getContext());
+ Type *IntPtrTy = TD->getIntPtrType(PtrOp->getType());
gep_type_iterator GTI = gep_type_begin(GEP);
for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end();
@@ -1240,7 +1239,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Earlier transforms ensure that the index has type IntPtrType, which
// considerably simplifies the logic by eliminating implicit casts.
- assert(Idx->getType() == TD->getIntPtrType(GEP.getContext()) &&
+ assert(Idx->getType() == TD->getIntPtrType(GEP.getType()) &&
"Index not cast to pointer width?");
bool NSW;
@@ -1275,7 +1274,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Earlier transforms ensure that the index has type IntPtrType, which
// considerably simplifies the logic by eliminating implicit casts.
- assert(Idx->getType() == TD->getIntPtrType(GEP.getContext()) &&
+ assert(Idx->getType() == TD->getIntPtrType(GEP.getType()) &&
"Index not cast to pointer width?");
bool NSW;
@@ -1337,7 +1336,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
SmallVector<Value*, 8> NewIndices;
Type *InTy =
cast<PointerType>(BCI->getOperand(0)->getType())->getElementType();
- if (FindElementAtOffset(InTy, Offset, NewIndices)) {
+ Type *IntPtrTy = TD->getIntPtrType(BCI->getOperand(0)->getType());
+ if (FindElementAtOffset(InTy, Offset, IntPtrTy, NewIndices)) {
Value *NGEP = GEP.isInBounds() ?
Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices) :
Builder->CreateGEP(BCI->getOperand(0), NewIndices);
diff --git a/lib/Transforms/Instrumentation/BoundsChecking.cpp b/lib/Transforms/Instrumentation/BoundsChecking.cpp
index 976b963046..dd36a00070 100644
--- a/lib/Transforms/Instrumentation/BoundsChecking.cpp
+++ b/lib/Transforms/Instrumentation/BoundsChecking.cpp
@@ -143,7 +143,7 @@ bool BoundsChecking::instrument(Value *Ptr, Value *InstVal) {
Value *Offset = SizeOffset.second;
ConstantInt *SizeCI = dyn_cast<ConstantInt>(Size);
- IntegerType *IntTy = TD->getIntPtrType(Inst->getContext());
+ IntegerType *IntTy = TD->getIntPtrType(Ptr->getType());
Value *NeededSizeVal = ConstantInt::get(IntTy, NeededSize);
// three checks are required to ensure safety:
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 2b42c75d14..74e310f7e7 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -935,7 +935,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
<< *MemoryInst);
Type *IntPtrTy =
- TLI->getDataLayout()->getIntPtrType(AccessTy->getContext());
+ TLI->getDataLayout()->getIntPtrType(Addr->getType());
Value *Result = 0;
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index eb0da20abb..b6e15540e7 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -746,6 +746,15 @@ static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
return true;
}
+/// Wrap TD.getIntPtrType, but return a vector type for vector inputs.
+static Type *getIntPtrType(Type *Ty, const DataLayout &TD) {
+ Type *ITy = TD.getIntPtrType(Ty);
+ if (Ty->isVectorTy()) {
+ ITy = VectorType::get(ITy, Ty->getVectorNumElements());
+ }
+
+ return ITy;
+}
/// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and
/// then a load from a must-aliased pointer of a different type, try to coerce
@@ -769,24 +778,25 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
// If the store and reload are the same size, we can always reuse it.
if (StoreSize == LoadSize) {
// Pointer to Pointer -> use bitcast.
- if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy())
+ if (StoredValTy->getScalarType()->isPointerTy() &&
+ LoadedTy->getScalarType()->isPointerTy())
return new BitCastInst(StoredVal, LoadedTy, "", InsertPt);
// Convert source pointers to integers, which can be bitcast.
- if (StoredValTy->isPointerTy()) {
- StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
+ if (StoredValTy->getScalarType()->isPointerTy()) {
+ StoredValTy = getIntPtrType(StoredValTy, TD);
StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
}
Type *TypeToCastTo = LoadedTy;
- if (TypeToCastTo->isPointerTy())
- TypeToCastTo = TD.getIntPtrType(StoredValTy->getContext());
+ if (TypeToCastTo->getScalarType()->isPointerTy())
+ TypeToCastTo = getIntPtrType(StoredValTy, TD);
if (StoredValTy != TypeToCastTo)
StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt);
// Cast to pointer if the load needs a pointer type.
- if (LoadedTy->isPointerTy())
+ if (LoadedTy->getScalarType()->isPointerTy())
StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt);
return StoredVal;
@@ -798,8 +808,8 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail");
// Convert source pointers to integers, which can be manipulated.
- if (StoredValTy->isPointerTy()) {
- StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
+ if (StoredValTy->getScalarType()->isPointerTy()) {
+ StoredValTy = getIntPtrType(StoredValTy, TD);
StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
}
@@ -824,7 +834,7 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
return StoredVal;
// If the result is a pointer, inttoptr.
- if (LoadedTy->isPointerTy())
+ if (LoadedTy->getScalarType()->isPointerTy())
return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt);
// Otherwise, bitcast.
@@ -1019,8 +1029,9 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
// Compute which bits of the stored value are being used by the load. Convert
// to an integer type to start with.
- if (SrcVal->getType()->isPointerTy())
- SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx));
+ if (SrcVal->getType()->getScalarType()->isPointerTy())
+ SrcVal = Builder.CreatePtrToInt(SrcVal,
+ getIntPtrType(SrcVal->getType(), TD));
if (!SrcVal->getType()->isIntegerTy())
SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8));
@@ -1301,7 +1312,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent());
// If new PHI nodes were created, notify alias analysis.
- if (V->getType()->isPointerTy()) {
+ if (V->getType()->getScalarType()->isPointerTy()) {
AliasAnalysis *AA = gvn.getAliasAnalysis();
for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
@@ -1498,7 +1509,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
if (isa<PHINode>(V))
V->takeName(LI);
- if (V->getType()->isPointerTy())
+ if (V->getType()->getScalarType()->isPointerTy())
MD->invalidateCachedPointerInfo(V);
markInstructionForDeletion(LI);
++NumGVNLoad;
@@ -1730,7 +1741,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
LI->replaceAllUsesWith(V);
if (isa<PHINode>(V))
V->takeName(LI);
- if (V->getType()->isPointerTy())
+ if (V->getType()->getScalarType()->isPointerTy())
MD->invalidateCachedPointerInfo(V);
markInstructionForDeletion(LI);
++NumPRELoad;
@@ -1857,7 +1868,7 @@ bool GVN::processLoad(LoadInst *L) {
// Replace the load!
L->replaceAllUsesWith(AvailVal);
- if (AvailVal->getType()->isPointerTy())
+ if (AvailVal->getType()->getScalarType()->isPointerTy())
MD->invalidateCachedPointerInfo(AvailVal);
markInstructionForDeletion(L);
++NumGVNLoad;
@@ -1914,7 +1925,7 @@ bool GVN::processLoad(LoadInst *L) {
// Remove it!
L->replaceAllUsesWith(StoredVal);
- if (StoredVal->getType()->isPointerTy())
+ if (StoredVal->getType()->getScalarType()->isPointerTy())
MD->invalidateCachedPointerInfo(StoredVal);
markInstructionForDeletion(L);
++NumGVNLoad;
@@ -1943,7 +1954,7 @@ bool GVN::processLoad(LoadInst *L) {
// Remove it!
patchAndReplaceAllUsesWith(AvailableVal, L);
- if (DepLI->getType()->isPointerTy())
+ if (DepLI->getType()->getScalarType()->isPointerTy())
MD->invalidateCachedPointerInfo(DepLI);
markInstructionForDeletion(L);
++NumGVNLoad;
@@ -2184,7 +2195,7 @@ bool GVN::processInstruction(Instruction *I) {
// "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
if (Value *V = SimplifyInstruction(I, TD, TLI, DT)) {
I->replaceAllUsesWith(V);
- if (MD && V->getType()->isPointerTy())
+ if (MD && V->getType()->getScalarType()->isPointerTy())
MD->invalidateCachedPointerInfo(V);
markInstructionForDeletion(I);
++NumGVNSimpl;
@@ -2284,7 +2295,7 @@ bool GVN::processInstruction(Instruction *I) {
// Remove it!
patchAndReplaceAllUsesWith(repl, I);
- if (MD && repl->getType()->isPointerTy())
+ if (MD && repl->getType()->getScalarType()->isPointerTy())
MD->invalidateCachedPointerInfo(repl);
markInstructionForDeletion(I);
return true;
@@ -2532,7 +2543,7 @@ bool GVN::performPRE(Function &F) {
addToLeaderTable(ValNo, Phi, CurrentBlock);
Phi->setDebugLoc(CurInst->getDebugLoc());
CurInst->replaceAllUsesWith(Phi);
- if (Phi->getType()->isPointerTy()) {
+ if (Phi->getType()->getScalarType()->isPointerTy()) {
// Because we have added a PHI-use of the pointer value, it has now
// "escaped" from alias analysis' perspective. We need to inform
// AA of this.
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 82eb746467..8a2f093629 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -1430,7 +1430,8 @@ FindLoopCounter(Loop *L, const SCEV *BECount,
/// genLoopLimit - Help LinearFunctionTestReplace by generating a value that
/// holds the RHS of the new loop test.
static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
- SCEVExpander &Rewriter, ScalarEvolution *SE) {
+ SCEVExpander &Rewriter, ScalarEvolution *SE,
+ Type *IntPtrTy) {
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
assert(AR && AR->getLoop() == L && AR->isAffine() && "bad loop counter");
const SCEV *IVInit = AR->getStart();
@@ -1456,7 +1457,8 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
// We could handle pointer IVs other than i8*, but we need to compensate for
// gep index scaling. See canExpandBackedgeTakenCount comments.
assert(SE->getSizeOfExpr(
- cast<PointerType>(GEPBase->getType())->getElementType())->isOne()
+ cast<PointerType>(GEPBase->getType())->getElementType(),
+ IntPtrTy)->isOne()
&& "unit stride pointer IV must be i8*");
IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
@@ -1555,7 +1557,9 @@ LinearFunctionTestReplace(Loop *L,
CmpIndVar = IndVar;
}
- Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE);
+ Type *IntPtrTy = TD ? TD->getIntPtrType(IndVar->getType()) :
+ IntegerType::getInt64Ty(IndVar->getContext());
+ Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE, IntPtrTy);
assert(ExitCnt->getType()->isPointerTy() == IndVar->getType()->isPointerTy()
&& "genLoopLimit missed a cast");
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index a44e798f12..e4b40f3d3a 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -486,7 +486,9 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
// would be unsafe to do if there is anything else in the loop that may read
// or write to the aliased location. Check for any overlap by generating the
// base pointer and checking the region.
- unsigned AddrSpace = cast<PointerType>(DestPtr->getType())->getAddressSpace();
+ assert(DestPtr->getType()->isPointerTy()
+ && "Must be a pointer type.");
+ unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
Value *BasePtr =
Expander.expandCodeFor(Ev->getStart(), Builder.getInt8PtrTy(AddrSpace),
Preheader->getTerminator());
@@ -505,7 +507,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
// pointer size if it isn't already.
- Type *IntPtr = TD->getIntPtrType(DestPtr->getContext());
+ Type *IntPtr = TD->getIntPtrType(DestPtr->getType());
BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
@@ -611,7 +613,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
// pointer size if it isn't already.
- Type *IntPtr = TD->getIntPtrType(SI->getContext());
+ Type *IntPtr = TD->getIntPtrType(SI->getType());
BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 71c62257e7..af3a880cb9 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -582,7 +582,8 @@ private:
P.Partitions.push_back(New);
}
- bool handleLoadOrStore(Type *Ty, Instruction &I, int64_t Offset) {
+ bool handleLoadOrStore(Type *Ty, Instruction &I, int64_t Offset,
+ bool IsVolatile) {
uint64_t Size = TD.getTypeStoreSize(Ty);
// If this memory access can be shown to *statically* extend outside the
@@ -603,7 +604,14 @@ private:
return true;
}
- insertUse(I, Offset, Size);
+ // We allow splitting of loads and stores where the type is an integer type
+ // and which cover the entire alloca. Such integer loads and stores
+ // often require decomposition into fine grained loads and stores.
+ bool IsSplittable = false;
+ if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
+ IsSplittable = !IsVolatile && ITy->getBitWidth() == AllocSize*8;
+
+ insertUse(I, Offset, Size, IsSplittable);
return true;
}
@@ -624,7 +632,7 @@ private:
bool visitLoadInst(LoadInst &LI) {
assert((!LI.isSimple() || LI.getType()->isSingleValueType()) &&
"All simple FCA loads should have been pre-split");
- return handleLoadOrStore(LI.getType(), LI, Offset);
+ return handleLoadOrStore(LI.getType(), LI, Offset, LI.isVolatile());
}
bool visitStoreInst(StoreInst &SI) {
@@ -634,7 +642,7 @@ private:
assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) &&
"All simple FCA stores should have been pre-split");
- return handleLoadOrStore(ValOp->getType(), SI, Offset);
+ return handleLoadOrStore(ValOp->getType(), SI, Offset, SI.isVolatile());
}
@@ -1173,6 +1181,21 @@ Type *AllocaPartitioning::getCommonType(iterator I) const {
UserTy = LI->getType();
} else if (StoreInst *SI = dyn_cast<StoreInst>(UI->U->getUser())) {
UserTy = SI->getValueOperand()->getType();
+ } else {
+ return 0; // Bail if we have weird uses.
+ }
+
+ if (IntegerType *ITy = dyn_cast<IntegerType>(UserTy)) {
+ // If the type is larger than the partition, skip it. We only encounter
+ // this for split integer operations where we want to use the type of the
+ // entity causing the split.
+ if (ITy->getBitWidth() > (I->EndOffset - I->BeginOffset)*8)
+ continue;
+
+ // If we have found an integer type use covering the alloca, use that
+ // regardless of the other types, as integers are often used for a "bucket
+ // of bits" type.
+ return ITy;
}
if (Ty && Ty != UserTy)
@@ -2138,6 +2161,14 @@ static bool isIntegerWideningViable(const DataLayout &TD,
if (SizeInBits != TD.getTypeStoreSizeInBits(AllocaTy))
return false;
+ // We need to ensure that an integer type with the appropriate bitwidth can
+ // be converted to the alloca type, whatever that is. We don't want to force
+ // the alloca itself to have an integer type if there is a more suitable one.
+ Type *IntTy = Type::getIntNTy(AllocaTy->getContext(), SizeInBits);
+ if (!canConvertValue(TD, AllocaTy, IntTy) ||
+ !canConvertValue(TD, IntTy, AllocaTy))
+ return false;
+
uint64_t Size = TD.getTypeStoreSize(AllocaTy);
// Check the uses to ensure the uses are (likely) promoteable integer uses.
@@ -2364,8 +2395,9 @@ private:
Value *getAdjustedAllocaPtr(IRBuilder<> &IRB, Type *PointerTy) {
assert(BeginOffset >= NewAllocaBeginOffset);
- unsigned AS = cast<PointerType>(PointerTy)->getAddressSpace();
- APInt Offset(TD.getPointerSizeInBits(AS), BeginOffset - NewAllocaBeginOffset);
+ assert(PointerTy->isPointerTy() &&
+ "Type must be pointer type!");
+ APInt Offset(TD.getTypeSizeInBits(PointerTy), BeginOffset - NewAllocaBeginOffset);
return getAdjustedPtr(IRB, TD, &NewAI, Offset, PointerTy, getName(""));
}
@@ -2460,6 +2492,50 @@ private:
if (VecTy)
return rewriteVectorizedLoadInst(IRB, LI, OldOp);
+
+ uint64_t Size = EndOffset - BeginOffset;
+ if (Size < TD.getTypeStoreSize(LI.getType())) {
+ assert(!LI.isVolatile());
+ assert(LI.getType()->isIntegerTy() &&
+ "Only integer type loads and stores are split");
+ assert(LI.getType()->getIntegerBitWidth() ==
+ TD.getTypeStoreSizeInBits(LI.getType()) &&
+ "Non-byte-multiple bit width");
+ assert(LI.getType()->getIntegerBitWidth() ==
+ TD.getTypeSizeInBits(OldAI.getAllocatedType()) &&
+ "Only alloca-wide loads can be split and recomposed");
+ IntegerType *NarrowTy = Type::getIntNTy(LI.getContext(), Size * 8);
+ bool IsConvertable = (BeginOffset - NewAllocaBeginOffset == 0) &&
+ canConvertValue(TD, NewAllocaTy, NarrowTy);
+ Value *V;
+ // Move the insertion point just past the load so that we can refer to it.
+ IRB.SetInsertPoint(llvm::next(BasicBlock::iterator(&LI)));
+ if (IsConvertable)
+ V = convertValue(TD, IRB,
+ IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ getName(".load")),
+ NarrowTy);
+ else
+ V = IRB.CreateAlignedLoad(
+ getAdjustedAllocaPtr(IRB, NarrowTy->getPointerTo()),
+ getPartitionTypeAlign(NarrowTy), getName(".load"));
+ // Create a placeholder value with the same type as LI to use as the
+ // basis for the new value. This allows us to replace the uses of LI with
+ // the computed value, and then replace the placeholder with LI, leaving
+ // LI only used for this computation.
+ Value *Placeholder
+ = IRB.CreateLoad(UndefValue::get(LI.getType()->getPointerTo()));
+ V = insertInteger(TD, IRB, Placeholder, V, BeginOffset,
+ getName(".insert"));
+ LI.replaceAllUsesWith(V);
+ Placeholder->replaceAllUsesWith(&LI);
+ cast<Instruction>(Placeholder)->eraseFromParent();
+ if (Pass.DeadSplitInsts.insert(&LI))
+ Pass.DeadInsts.push_back(&LI);
+ DEBUG(dbgs() << " to: " << *V << "\n");
+ return IsConvertable;
+ }
+
if (IntTy && LI.getType()->isIntegerTy())
return rewriteIntegerLoad(IRB, LI);
@@ -2539,6 +2615,39 @@ private:
if (VecTy)
return rewriteVectorizedStoreInst(IRB, SI, OldOp);
Type *ValueTy = SI.getValueOperand()->getType();
+
+ uint64_t Size = EndOffset - BeginOffset;
+ if (Size < TD.getTypeStoreSize(ValueTy)) {
+ assert(!SI.isVolatile());
+ assert(ValueTy->isIntegerTy() &&
+ "Only integer type loads and stores are split");
+ assert(ValueTy->getIntegerBitWidth() ==
+ TD.getTypeStoreSizeInBits(ValueTy) &&
+ "Non-byte-multiple bit width");
+ assert(ValueTy->getIntegerBitWidth() ==
+ TD.getTypeSizeInBits(OldAI.getAllocatedType()) &&
+ "Only alloca-wide stores can be split and recomposed");
+ IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8);
+ Value *V = extractInteger(TD, IRB, SI.getValueOperand(), NarrowTy,
+ BeginOffset, getName(".extract"));
+ StoreInst *NewSI;
+ bool IsConvertable = (BeginOffset - NewAllocaBeginOffset == 0) &&
+ canConvertValue(TD, NarrowTy, NewAllocaTy);
+ if (IsConvertable)
+ NewSI = IRB.CreateAlignedStore(convertValue(TD, IRB, V, NewAllocaTy),
+ &NewAI, NewAI.getAlignment());
+ else
+ NewSI = IRB.CreateAlignedStore(
+ V, getAdjustedAllocaPtr(IRB, NarrowTy->getPointerTo()),
+ getPartitionTypeAlign(NarrowTy));
+ (void)NewSI;
+ if (Pass.DeadSplitInsts.insert(&SI))
+ Pass.DeadInsts.push_back(&SI);
+
+ DEBUG(dbgs() << " to: " << *NewSI << "\n");
+ return IsConvertable;
+ }
+
if (IntTy && ValueTy->isIntegerTy())
return rewriteIntegerStore(IRB, SI);
@@ -2687,9 +2796,8 @@ private:
= P.getMemTransferOffsets(II);
assert(OldPtr->getType()->isPointerTy() && "Must be a pointer type!");
- unsigned AS = cast<PointerType>(OldPtr->getType())->getAddressSpace();
// Compute the relative offset within the transfer.
- unsigned IntPtrWidth = TD.getPointerSizeInBits(AS);
+ unsigned IntPtrWidth = TD.getTypeSizeInBits(OldPtr->getType());
APInt RelOffset(IntPtrWidth, BeginOffset - (IsDest ? MTO.DestBegin
: MTO.SourceBegin));
@@ -3173,6 +3281,9 @@ static Type *getTypePartition(const DataLayout &TD, Type *Ty,
uint64_t Offset, uint64_t Size) {
if (Offset == 0 && TD.getTypeAllocSize(Ty) == Size)
return stripAggregateTypeWrapping(TD, Ty);
+ if (Offset > TD.getTypeAllocSize(Ty) ||
+ (TD.getTypeAllocSize(Ty) - Offset) < Size)
+ return 0;
if (SequentialType *SeqTy = dyn_cast<SequentialType>(Ty)) {
// We can't partition pointers...
@@ -3464,6 +3575,8 @@ void SROA::deleteDeadInstructions(SmallPtrSet<AllocaInst*, 4> &DeletedAllocas) {
Instruction *I = DeadInsts.pop_back_val();
DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n");
+ I->replaceAllUsesWith(UndefValue::get(I->getType()));
+
for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
if (Instruction *U = dyn_cast<Instruction>(*OI)) {
// Zero out the operand and see if it becomes trivially dead.
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index a46d09c320..a5446294e3 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -963,7 +963,7 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy())
SV = Builder.CreateBitCast(SV, IntegerType::get(SV->getContext(),SrcWidth));
else if (SV->getType()->isPointerTy())
- SV = Builder.CreatePtrToInt(SV, TD.getIntPtrType(SV->getContext()));
+ SV = Builder.CreatePtrToInt(SV, TD.getIntPtrType(SV->getType()));
// Zero extend or truncate the value if needed.
if (SV->getType() != AllocaType) {
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index c82a00fc2c..f3448bcd87 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -165,9 +165,10 @@ struct StpCpyOpt: public LibCallOptimization {
uint64_t Len = GetStringLength(Src);
if (Len == 0) return 0;
- Value *LenV = ConstantInt::get(TD->getIntPtrType(*Context), Len);
+ Type *PT = FT->getParamType(0);
+ Value *LenV = ConstantInt::get(TD->getIntPtrType(PT), Len);
Value *DstEnd = B.CreateGEP(Dst,
- ConstantInt::get(TD->getIntPtrType(*Context),
+ ConstantInt::get(TD->getIntPtrType(PT),
Len - 1));
// We have enough information to now generate the memcpy call to do the
@@ -220,9 +221,10 @@ struct StrNCpyOpt : public LibCallOptimization {
// Let strncpy handle the zero padding
if (Len > SrcLen+1) return 0;
+ Type *PT = FT->getParamType(0);
// strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant]
B.CreateMemCpy(Dst, Src,
- ConstantInt::get(TD->getIntPtrType(*Context), Len), 1);
+ ConstantInt::get(TD->getIntPtrType(PT), Len), 1);
return Dst;
}
@@ -508,10 +510,11 @@ struct MemCpyOpt : public LibCallOptimization {
if (!TD) return 0;
FunctionType *FT = Callee->getFunctionType();
+ Type *PT = FT->getParamType(0);
if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
- FT->getParamType(2) != TD->getIntPtrType(*Context))
+ FT->getParamType(2) != TD->getIntPtrType(PT))
return 0;
// memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1)
@@ -530,10 +533,11 @@ struct MemMoveOpt : public LibCallOptimization {
if (!TD) return 0;
FunctionType *FT = Callee->getFunctionType();
+ Type *PT = FT->getParamType(0);
if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
- FT->getParamType(2) != TD->getIntPtrType(*Context))
+ FT->getParamType(2) != TD->getIntPtrType(PT))
return 0;
// memmove(x, y, n) -> llvm.memmove(x, y, n, 1)
@@ -552,10 +556,11 @@ struct MemSetOpt : public LibCallOptimization {
if (!TD) return 0;
FunctionType *FT = Callee->getFunctionType();
+ Type *PT = FT->getParamType(0);
if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isIntegerTy() ||
- FT->getParamType(2) != TD->getIntPtrType(*Context))
+ FT->getParamType(2) != TD->getIntPtrType(PT))
return 0;
// memset(p, v, n) -> llvm.memset(p, v, n, 1)
@@ -980,8 +985,9 @@ struct SPrintFOpt : public LibCallOptimization {
if (!TD) return 0;
// sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
+ Type *AT = CI->getArgOperand(0)->getType();
B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
- ConstantInt::get(TD->getIntPtrType(*Context), // Copy the
+ ConstantInt::get(TD->getIntPtrType(AT), // Copy the
FormatStr.size() + 1), 1); // nul byte.
return ConstantInt::get(CI->getType(), FormatStr.size());
}
@@ -1108,8 +1114,9 @@ struct FPutsOpt : public LibCallOptimization {
uint64_t Len = GetStringLength(CI->getArgOperand(0));
if (!Len) return 0;
// Known to have no uses (see above).
+ Type *PT = FT->getParamType(0);
return EmitFWrite(CI->getArgOperand(0),
- ConstantInt::get(TD->getIntPtrType(*Context), Len-1),
+ ConstantInt::get(TD->getIntPtrType(PT), Len-1),
CI->getArgOperand(1), B, TD, TLI);
}
};
@@ -1134,8 +1141,9 @@ struct FPrintFOpt : public LibCallOptimization {
// These optimizations require DataLayout.
if (!TD) return 0;
+ Type *AT = CI->getArgOperand(1)->getType();
Value *NewCI = EmitFWrite(CI->getArgOperand(1),
- ConstantInt::get(TD->getIntPtrType(*Context),
+ ConstantInt::get(TD->getIntPtrType(AT),
FormatStr.size()),
CI->getArgOperand(0), B, TD, TLI);
return NewCI ? ConstantInt::get(CI->getType(), FormatStr.size()) : 0;
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index fa2faa2dad..bd28f10654 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -46,9 +46,8 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD,
AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
ArrayRef<Attributes::AttrVal>(AVs, 2));
- LLVMContext &Context = B.GetInsertBlock()->getContext();
Constant *StrLen = M->getOrInsertFunction("strlen", AttrListPtr::get(AWI),
- TD->getIntPtrType(Context),
+ TD->getIntPtrType(Ptr->getType()),
B.getInt8PtrTy(),
NULL);
CallInst *CI = B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen");
@@ -73,11 +72,10 @@ Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B,
AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
ArrayRef<Attributes::AttrVal>(AVs, 2));
- LLVMContext &Context = B.GetInsertBlock()->getContext();
Constant *StrNLen = M->getOrInsertFunction("strnlen", AttrListPtr::get(AWI),
- TD->getIntPtrType(Context),
+ TD->getIntPtrType(Ptr->getType()),
B.getInt8PtrTy(),
- TD->getIntPtrType(Context),
+ TD->getIntPtrType(Ptr->getType()),
NULL);
CallInst *CI = B.CreateCall2(StrNLen, CastToCStr(Ptr, B), MaxLen, "strnlen");
if (const Function *F = dyn_cast<Function>(StrNLen->stripPointerCasts()))
@@ -126,12 +124,12 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len,
AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
ArrayRef<Attributes::AttrVal>(AVs, 2));
- LLVMContext &Context = B.GetInsertBlock()->getContext();
Value *StrNCmp = M->getOrInsertFunction("strncmp", AttrListPtr::get(AWI),
B.getInt32Ty(),
B.getInt8PtrTy(),
B.getInt8PtrTy(),
- TD->getIntPtrType(Context), NULL);
+ TD->getIntPtrType(Ptr1->getType()),
+ NULL);
CallInst *CI = B.CreateCall3(StrNCmp, CastToCStr(Ptr1, B),
CastToCStr(Ptr2, B), Len, "strncmp");
@@ -201,14 +199,14 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
AttributeWithIndex AWI;
AWI = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
Attributes::NoUnwind);
- LLVMContext &Context = B.GetInsertBlock()->getContext();
Value *MemCpy = M->getOrInsertFunction("__memcpy_chk",
AttrListPtr::get(AWI),
B.getInt8PtrTy(),
B.getInt8PtrTy(),
B.getInt8PtrTy(),
- TD->getIntPtrType(Context),
- TD->getIntPtrType(Context), NULL);
+ TD->getIntPtrType(Dst->getType()),
+ TD->getIntPtrType(Src->getType()),
+ NULL);
Dst = CastToCStr(Dst, B);
Src = CastToCStr(Src, B);
CallInst *CI = B.CreateCall4(MemCpy, Dst, Src, Len, ObjSize);
@@ -230,12 +228,11 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val,
Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind };
AWI = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
ArrayRef<Attributes::AttrVal>(AVs, 2));
- LLVMContext &Context = B.GetInsertBlock()->getContext();
Value *MemChr = M->getOrInsertFunction("memchr", AttrListPtr::get(AWI),
B.getInt8PtrTy(),
B.getInt8PtrTy(),
B.getInt32Ty(),
- TD->getIntPtrType(Context),
+ TD->getIntPtrType(Ptr->getType()),
NULL);
CallInst *CI = B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr");
@@ -260,12 +257,12 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
ArrayRef<Attributes::AttrVal>(AVs, 2));
- LLVMContext &Context = B.GetInsertBlock()->getContext();
Value *MemCmp = M->getOrInsertFunction("memcmp", AttrListPtr::get(AWI),
B.getInt32Ty(),
B.getInt8PtrTy(),
B.getInt8PtrTy(),
- TD->getIntPtrType(Context), NULL);
+ TD->getIntPtrType(Ptr1->getType()),
+ NULL);
CallInst *CI = B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B),
Len, "memcmp");
@@ -425,24 +422,24 @@ Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
AWI[1] = AttributeWithIndex::get(M->getContext(), 4, Attributes::NoCapture);
AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
Attributes::NoUnwind);
- LLVMContext &Context = B.GetInsertBlock()->getContext();
StringRef FWriteName = TLI->getName(LibFunc::fwrite);
Constant *F;
+ Type *PtrTy = Ptr->getType();
if (File->getType()->isPointerTy())
F = M->getOrInsertFunction(FWriteName, AttrListPtr::get(AWI),
- TD->getIntPtrType(Context),
+ TD->getIntPtrType(PtrTy),
B.getInt8PtrTy(),
- TD->getIntPtrType(Context),
- TD->getIntPtrType(Context),
+ TD->getIntPtrType(PtrTy),
+ TD->getIntPtrType(PtrTy),
File->getType(), NULL);
else
- F = M->getOrInsertFunction(FWriteName, TD->getIntPtrType(Context),
+ F = M->getOrInsertFunction(FWriteName, TD->getIntPtrType(PtrTy),
B.getInt8PtrTy(),
- TD->getIntPtrType(Context),
- TD->getIntPtrType(Context),
+ TD->getIntPtrType(PtrTy),
+ TD->getIntPtrType(PtrTy),
File->getType(), NULL);
CallInst *CI = B.CreateCall4(F, CastToCStr(Ptr, B), Size,
- ConstantInt::get(TD->getIntPtrType(Context), 1), File);
+ ConstantInt::get(TD->getIntPtrType(PtrTy), 1), File);
if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
CI->setCallingConv(Fn->getCallingConv());
@@ -464,12 +461,13 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const DataLayout *TD,
IRBuilder<> B(CI);
if (Name == "__memcpy_chk") {
+ Type *PT = FT->getParamType(0);
// Check if this has the right signature.
if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
- FT->getParamType(2) != TD->getIntPtrType(Context) ||
- FT->getParamType(3) != TD->getIntPtrType(Context))
+ FT->getParamType(2) != TD->getIntPtrType(PT) ||
+ FT->getParamType(3) != TD->getIntPtrType(PT))
return false;
if (isFoldable(3, 2, false)) {
@@ -488,11 +486,12 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const DataLayout *TD,
if (Name == "__memmove_chk") {
// Check if this has the right signature.
+ Type *PT = FT->getParamType(0);
if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
- FT->getParamType(2) != TD->getIntPtrType(Context) ||
- FT->getParamType(3) != TD->getIntPtrType(Context))
+ FT->getParamType(2) != TD->getIntPtrType(PT) ||
+ FT->getParamType(3) != TD->getIntPtrType(PT))
return false;
if (isFoldable(3, 2, false)) {
@@ -506,11 +505,12 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const DataLayout *TD,
if (Name == "__memset_chk") {
// Check if this has the right signature.
+ Type *PT = FT->getParamType(0);
if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isIntegerTy() ||
- FT->getParamType(2) != TD->getIntPtrType(Context) ||
- FT->getParamType(3) != TD->getIntPtrType(Context))
+ FT->getParamType(2) != TD->getIntPtrType(PT) ||
+ FT->getParamType(3) != TD->getIntPtrType(PT))
return false;
if (isFoldable(3, 2, false)) {
@@ -525,11 +525,12 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const DataLayout *TD,
if (Name == "__strcpy_chk" || Name == "__stpcpy_chk") {
// Check if this has the right signature.
+ Type *PT = FT->getParamType(0);
if (FT->getNumParams() != 3 ||
FT->getReturnType() != FT->getParamType(0) ||
FT->getParamType(0) != FT->getParamType(1) ||
FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
- FT->getParamType(2) != TD->getIntPtrType(Context))
+ FT->getParamType(2) != TD->getIntPtrType(PT))
return 0;
@@ -551,11 +552,12 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const DataLayout *TD,
if (Name == "__strncpy_chk" || Name == "__stpncpy_chk") {
// Check if this has the right signature.
+ Type *PT = FT->getParamType(0);
if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
FT->getParamType(0) != FT->getParamType(1) ||
FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
!FT->getParamType(2)->isIntegerTy() ||
- FT->getParamType(3) != TD->getIntPtrType(Context))
+ FT->getParamType(3) != TD->getIntPtrType(PT))
return false;
if (isFoldable(3, 2, false)) {
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 9729687a83..c09d982d65 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -806,8 +806,7 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
const DataLayout *TD) {
assert(V->getType()->isPointerTy() &&
"getOrEnforceKnownAlignment expects a pointer!");
- unsigned AS = cast<PointerType>(V->getType())->getAddressSpace();
- unsigned BitWidth = TD ? TD->getPointerSizeInBits(AS) : 64;
+ unsigned BitWidth = TD ? TD->getTypeSizeInBits(V->getType()) : 64;
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
ComputeMaskedBits(V, KnownZero, KnownOne, TD);
unsigned TrailZ = KnownZero.countTrailingOnes();
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index a008da67e9..870e2b2ade 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -392,7 +392,7 @@ static ConstantInt *GetConstantInt(Value *V, const DataLayout *TD) {
// This is some kind of pointer constant. Turn it into a pointer-sized
// ConstantInt if possible.
- IntegerType *PtrTy = TD->getIntPtrType(V->getContext());
+ IntegerType *PtrTy = TD->getIntPtrType(V->getType());
// Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
if (isa<ConstantPointerNull>(V))
@@ -532,9 +532,13 @@ Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) {
CV = ICI->getOperand(0);
// Unwrap any lossless ptrtoint cast.
- if (TD && CV && CV->getType() == TD->getIntPtrType(CV->getContext()))
- if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV))
+ if (TD && CV) {
+ PtrToIntInst *PTII = NULL;
+ if ((PTII = dyn_cast<PtrToIntInst>(CV)) &&
+ CV->getType() == TD->getIntPtrType(CV->getContext(),
+ PTII->getPointerAddressSpace()))
CV = PTII->getOperand(0);
+ }
return CV;
}
@@ -981,7 +985,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
// Convert pointer to int before we switch.
if (CV->getType()->isPointerTy()) {
assert(TD && "Cannot switch on pointer without DataLayout");
- CV = Builder.CreatePtrToInt(CV, TD->getIntPtrType(CV->getContext()),
+ CV = Builder.CreatePtrToInt(CV, TD->getIntPtrType(CV->getType()),
"magicptr");
}
@@ -2709,7 +2713,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *TD,
if (CompVal->getType()->isPointerTy()) {
assert(TD && "Cannot switch on pointer without DataLayout");
CompVal = Builder.CreatePtrToInt(CompVal,
- TD->getIntPtrType(CompVal->getContext()),
+ TD->getIntPtrType(CompVal->getType()),
"magicptr");
}
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index b15acdff63..162b29e829 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -102,14 +102,13 @@ struct MemCpyChkOpt : public InstFortifiedLibCallOptimization {
virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
this->CI = CI;
FunctionType *FT = Callee->getFunctionType();
- LLVMContext &Context = CI->getParent()->getContext();
// Check if this has the right signature.
if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
- FT->getParamType(2) != TD->getIntPtrType(Context) ||
- FT->getParamType(3) != TD->getIntPtrType(Context))
+ FT->getParamType(2) != TD->getIntPtrType(FT->getParamType(0)) ||
+ FT->getParamType(3) != TD->getIntPtrType(FT->getParamType(1)))
return 0;
if (isFoldable(3, 2, false)) {
@@ -125,14 +124,13 @@ struct MemMoveChkOpt : public InstFortifiedLibCallOptimization {
virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
this->CI = CI;
FunctionType *FT = Callee->getFunctionType();
- LLVMContext &Context = CI->getParent()->getContext();
// Check if this has the right signature.
if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
- FT->getParamType(2) != TD->getIntPtrType(Context) ||
- FT->getParamType(3) != TD->getIntPtrType(Context))
+ FT->getParamType(2) != TD->getIntPtrType(FT->getParamType(0)) ||
+ FT->getParamType(3) != TD->getIntPtrType(FT->getParamType(1)))
return 0;
if (isFoldable(3, 2, false)) {
@@ -148,14 +146,13 @@ struct MemSetChkOpt : public InstFortifiedLibCallOptimization {
virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
this->CI = CI;
FunctionType *FT = Callee->getFunctionType();
- LLVMContext &Context = CI->getParent()->getContext();
// Check if this has the right signature.
if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isIntegerTy() ||
- FT->getParamType(2) != TD->getIntPtrType(Context) ||
- FT->getParamType(3) != TD->getIntPtrType(Context))
+ FT->getParamType(2) != TD->getIntPtrType(FT->getParamType(0)) ||
+ FT->getParamType(3) != TD->getIntPtrType(FT->getParamType(0)))
return 0;
if (isFoldable(3, 2, false)) {
@@ -180,7 +177,7 @@ struct StrCpyChkOpt : public InstFortifiedLibCallOptimization {
FT->getReturnType() != FT->getParamType(0) ||
FT->getParamType(0) != FT->getParamType(1) ||
FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
- FT->getParamType(2) != TD->getIntPtrType(Context))
+ FT->getParamType(2) != TD->getIntPtrType(FT->getParamType(0)))
return 0;
Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
@@ -205,8 +202,8 @@ struct StrCpyChkOpt : public InstFortifiedLibCallOptimization {
Value *Ret =
EmitMemCpyChk(Dst, Src,
- ConstantInt::get(TD->getIntPtrType(Context), Len),
- CI->getArgOperand(2), B, TD, TLI);
+ ConstantInt::get(TD->getIntPtrType(Dst->getType()),
+ Len), CI->getArgOperand(2), B, TD, TLI);
return Ret;
}
return 0;
@@ -225,7 +222,7 @@ struct StrNCpyChkOpt : public InstFortifiedLibCallOptimization {
FT->getParamType(0) != FT->getParamType(1) ||
FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
!FT->getParamType(2)->isIntegerTy() ||
- FT->getParamType(3) != TD->getIntPtrType(Context))
+ FT->getParamType(3) != TD->getIntPtrType(FT->getParamType(0)))
return 0;
if (isFoldable(3, 2, false)) {
@@ -287,7 +284,8 @@ struct StrCatOpt : public LibCallOptimization {
// We have enough information to now generate the memcpy call to do the
// concatenation for us. Make a memcpy to copy the nul byte with align = 1.
B.CreateMemCpy(CpyDst, Src,
- ConstantInt::get(TD->getIntPtrType(*Context), Len + 1), 1);
+ ConstantInt::get(TD->getIntPtrType(Src->getType()),
+ Len + 1), 1);
return Dst;
}
};
@@ -359,8 +357,9 @@ struct StrChrOpt : public LibCallOptimization {
if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32))// memchr needs i32.
return 0;
+ Type *PT = FT->getParamType(0);
return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
- ConstantInt::get(TD->getIntPtrType(*Context), Len),
+ ConstantInt::get(TD->getIntPtrType(PT), Len),
B, TD, TLI);
}
@@ -454,8 +453,9 @@ struct StrCmpOpt : public LibCallOptimization {
// These optimizations require DataLayout.
if (!TD) return 0;
+ Type *PT = FT->getParamType(0);
return EmitMemCmp(Str1P, Str2P,
- ConstantInt::get(TD->getIntPtrType(*Context),
+ ConstantInt::get(TD->getIntPtrType(PT),
std::min(Len1, Len2)), B, TD, TLI);
}
@@ -537,7 +537,7 @@ struct StrCpyOpt : public LibCallOptimization {
// We have enough information to now generate the memcpy call to do the
// copy for us. Make a memcpy to copy the nul byte with align = 1.
B.CreateMemCpy(Dst, Src,
- ConstantInt::get(TD->getIntPtrType(*Context), Len), 1);
+ ConstantInt::get(TD->getIntPtrType(Dst->getType()), Len), 1);
return Dst;
}
};
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index f944d9b4fc..423c7a4911 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -18,10 +18,13 @@
//
// This pass has three parts:
// 1. The main loop pass that drives the different parts.
-// 2. LoopVectorizationLegality - A helper class that checks for the legality
+// 2. LoopVectorizationLegality - A unit that checks for the legality
// of the vectorization.
-// 3. SingleBlockLoopVectorizer - A helper class that performs the actual
+// 3. SingleBlockLoopVectorizer - A unit that performs the actual
// widening of instructions.
+// 4. LoopVectorizationCostModel - A unit that checks for the profitability
+// of vectorization. It decides on the optimal vector width, which
+// can be one, if vectorization is not profitable.
//===----------------------------------------------------------------------===//
//
// The reduction-variable vectorization is based on the paper:
@@ -51,13 +54,14 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/TargetTransformInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -67,13 +71,14 @@
using namespace llvm;
static cl::opt<unsigned>
-DefaultVectorizationFactor("default-loop-vectorize-width",
- cl::init(4), cl::Hidden,
- cl::desc("Set the default loop vectorization width"));
+VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden,
+ cl::desc("Set the default vectorization width. Zero is autoselect."));
+
namespace {
-// Forward declaration.
+// Forward declarations.
class LoopVectorizationLegality;
+class LoopVectorizationCostModel;
/// SingleBlockLoopVectorizer vectorizes loops which contain only one basic
/// block to a specified vectorization factor (VF).
@@ -203,7 +208,10 @@ public:
enum ReductionKind {
NoReduction = -1, /// Not a reduction.
IntegerAdd = 0, /// Sum of numbers.
- IntegerMult = 1 /// Product of numbers.
+ IntegerMult = 1, /// Product of numbers.
+ IntegerOr = 2, /// Bitwise or logical OR of numbers.
+ IntegerAnd = 3, /// Bitwise or logical AND of numbers.
+ IntegerXor = 4 /// Bitwise or logical XOR of numbers.
};
/// This POD struct holds information about reduction variables.
@@ -229,11 +237,10 @@ public:
/// of the reductions that were found in the loop.
typedef DenseMap<PHINode*, ReductionDescriptor> ReductionList;
- /// Returns the maximum vectorization factor that we *can* use to vectorize
- /// this loop. This does not mean that it is profitable to vectorize this
- /// loop, only that it is legal to do so. This may be a large number. We
- /// can vectorize to any SIMD width below this number.
- unsigned getLoopMaxVF();
+ /// Returns true if it is legal to vectorize this loop.
+ /// This does not mean that it is profitable to vectorize this
+ /// loop, only that it is legal to do so.
+ bool canVectorize();
/// Returns the Induction variable.
PHINode *getInduction() {return Induction;}
@@ -259,10 +266,6 @@ private:
/// Returns true if BB is vectorizable
bool canVectorizeMemory(BasicBlock &BB);
- // Check if a pointer value is known to be disjoint.
- // Example: Alloca, Global, NoAlias.
- bool isIdentifiedSafeObject(Value* Val);
-
/// Returns True, if 'Phi' is the kind of reduction variable for type
/// 'Kind'. If this is a reduction variable, it adds it to ReductionList.
bool AddReductionVar(PHINode *Phi, ReductionKind Kind);
@@ -290,6 +293,48 @@ private:
SmallPtrSet<Value*, 4> AllowedExit;
};
+/// LoopVectorizationCostModel - estimates the expected speedups due to
+/// vectorization.
+/// In many cases vectorization is not profitable. This can happen because
+/// of a number of reasons. In this class we mainly attempt to predict
+/// the expected speedup/slowdowns due to the supported instruction set.
+/// We use the VectorTargetTransformInfo to query the different backends
+/// for the cost of different operations.
+class LoopVectorizationCostModel {
+public:
+ /// C'tor.
+ LoopVectorizationCostModel(Loop *Lp, ScalarEvolution *Se,
+ LoopVectorizationLegality *Leg,
+ const VectorTargetTransformInfo *Vtti):
+ TheLoop(Lp), SE(Se), Legal(Leg), VTTI(Vtti) { }
+
+ /// Returns the most profitable vectorization factor for the loop that is
+ /// smaller or equal to the VF argument. This method checks every power
+ /// of two up to VF.
+ unsigned findBestVectorizationFactor(unsigned VF = 4);
+
+private:
+ /// Returns the expected execution cost. The unit of the cost does
+ /// not matter because we use the 'cost' units to compare different
+ /// vector widths. The cost that is returned is *not* normalized by
+ /// the factor width.
+ unsigned expectedCost(unsigned VF);
+
+ /// Returns the execution time cost of an instruction for a given vector
+ /// width. Vector width of one means scalar.
+ unsigned getInstructionCost(Instruction *I, unsigned VF);
+
+ /// The loop that we evaluate.
+ Loop *TheLoop;
+ /// Scev analysis.
+ ScalarEvolution *SE;
+
+ /// Vectorization legality.
+ LoopVectorizationLegality *Legal;
+ /// Vector target information.
+ const VectorTargetTransformInfo *VTTI;
+};
+
struct LoopVectorize : public LoopPass {
static char ID; // Pass identification, replacement for typeid
@@ -300,6 +345,7 @@ struct LoopVectorize : public LoopPass {
ScalarEvolution *SE;
DataLayout *DL;
LoopInfo *LI;
+ TargetTransformInfo *TTI;
virtual bool runOnLoop(Loop *L, LPPassManager &LPM) {
// We only vectorize innermost loops.
@@ -309,25 +355,42 @@ struct LoopVectorize : public LoopPass {
SE = &getAnalysis<ScalarEvolution>();
DL = getAnalysisIfAvailable<DataLayout>();
LI = &getAnalysis<LoopInfo>();
+ TTI = getAnalysisIfAvailable<TargetTransformInfo>();
DEBUG(dbgs() << "LV: Checking a loop in \"" <<
L->getHeader()->getParent()->getName() << "\"\n");
// Check if it is legal to vectorize the loop.
LoopVectorizationLegality LVL(L, SE, DL);
- unsigned MaxVF = LVL.getLoopMaxVF();
-
- // Check that we can vectorize this loop using the chosen vectorization
- // width.
- if (MaxVF < DefaultVectorizationFactor) {
- DEBUG(dbgs() << "LV: non-vectorizable MaxVF ("<< MaxVF << ").\n");
+ if (!LVL.canVectorize()) {
+ DEBUG(dbgs() << "LV: Not vectorizing.\n");
return false;
}
- DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< MaxVF << ").\n");
+ // Select the preffered vectorization factor.
+ unsigned VF = 1;
+ if (VectorizationFactor == 0) {
+ const VectorTargetTransformInfo *VTTI = 0;
+ if (TTI)
+ VTTI = TTI->getVectorTargetTransformInfo();
+ // Use the cost model.
+ LoopVectorizationCostModel CM(L, SE, &LVL, VTTI);
+ VF = CM.findBestVectorizationFactor();
+
+ if (VF == 1) {
+ DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n");
+ return false;
+ }
+
+ } else {
+ // Use the user command flag.
+ VF = VectorizationFactor;
+ }
+
+ DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< VF << ").\n");
// If we decided that it is *legal* to vectorizer the loop then do it.
- SingleBlockLoopVectorizer LB(L, SE, LI, &LPM, DefaultVectorizationFactor);
+ SingleBlockLoopVectorizer LB(L, SE, LI, &LPM, VF);
LB.vectorize(&LVL);
DEBUG(verifyFunction(*L->getHeader()->getParent()));
@@ -660,6 +723,13 @@ void SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal
void
SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
+ //===------------------------------------------------===//
+ //
+ // Notice: any optimization or new instruction that go
+ // into the code below should be also be implemented in
+ // the cost-model.
+ //
+ //===------------------------------------------------===//
typedef SmallVector<PHINode*, 4> PhiVector;
BasicBlock &BB = *OrigLoop->getHeader();
Constant *Zero = ConstantInt::get(
@@ -914,14 +984,28 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
// Extract the first scalar.
Value *Scalar0 =
Builder.CreateExtractElement(NewPhi, Builder.getInt32(0));
- // Extract and sum the remaining vector elements.
+ // Extract and reduce the remaining vector elements.
for (unsigned i=1; i < VF; ++i) {
Value *Scalar1 =
Builder.CreateExtractElement(NewPhi, Builder.getInt32(i));
- if (RdxDesc.Kind == LoopVectorizationLegality::IntegerAdd) {
- Scalar0 = Builder.CreateAdd(Scalar0, Scalar1);
- } else {
- Scalar0 = Builder.CreateMul(Scalar0, Scalar1);
+ switch (RdxDesc.Kind) {
+ case LoopVectorizationLegality::IntegerAdd:
+ Scalar0 = Builder.CreateAdd(Scalar0, Scalar1);
+ break;
+ case LoopVectorizationLegality::IntegerMult:
+ Scalar0 = Builder.CreateMul(Scalar0, Scalar1);
+ break;
+ case LoopVectorizationLegality::IntegerOr:
+ Scalar0 = Builder.CreateOr(Scalar0, Scalar1);
+ break;
+ case LoopVectorizationLegality::IntegerAnd:
+ Scalar0 = Builder.CreateAnd(Scalar0, Scalar1);
+ break;
+ case LoopVectorizationLegality::IntegerXor:
+ Scalar0 = Builder.CreateXor(Scalar0, Scalar1);
+ break;
+ default:
+ llvm_unreachable("Unknown reduction operation");
}
}
@@ -961,18 +1045,18 @@ void SingleBlockLoopVectorizer::cleanup() {
SE->forgetLoop(OrigLoop);
}
-unsigned LoopVectorizationLegality::getLoopMaxVF() {
+bool LoopVectorizationLegality::canVectorize() {
if (!TheLoop->getLoopPreheader()) {
assert(false && "No preheader!!");
DEBUG(dbgs() << "LV: Loop not normalized." << "\n");
- return 1;
+ return false;
}
// We can only vectorize single basic block loops.
unsigned NumBlocks = TheLoop->getNumBlocks();
if (NumBlocks != 1) {
DEBUG(dbgs() << "LV: Too many blocks:" << NumBlocks << "\n");
- return 1;
+ return false;
}
// We need to have a loop header.
@@ -982,22 +1066,22 @@ unsigned LoopVectorizationLegality::getLoopMaxVF() {
// Go over each instruction and look at memory deps.
if (!canVectorizeBlock(*BB)) {
DEBUG(dbgs() << "LV: Can't vectorize this loop header\n");
- return 1;
+ return false;
}
// ScalarEvolution needs to be able to find the exit count.
const SCEV *ExitCount = SE->getExitCount(TheLoop, BB);
if (ExitCount == SE->getCouldNotCompute()) {
DEBUG(dbgs() << "LV: SCEV could not compute the loop exit count.\n");
- return 1;
+ return false;
}
DEBUG(dbgs() << "LV: We can vectorize this loop!\n");
// Okay! We can vectorize. At this point we don't have any other mem analysis
- // which may limit our maximum vectorization factor, so just return the
- // maximum SIMD size.
- return DefaultVectorizationFactor;
+ // which may limit our maximum vectorization factor, so just return true with
+ // no restrictions.
+ return true;
}
bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) {
@@ -1032,7 +1116,19 @@ bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) {
continue;
}
if (AddReductionVar(Phi, IntegerMult)) {
- DEBUG(dbgs() << "LV: Found an Mult reduction PHI."<< *Phi <<"\n");
+ DEBUG(dbgs() << "LV: Found a MUL reduction PHI."<< *Phi <<"\n");
+ continue;
+ }
+ if (AddReductionVar(Phi, IntegerOr)) {
+ DEBUG(dbgs() << "LV: Found an OR reduction PHI."<< *Phi <<"\n");
+ continue;
+ }
+ if (AddReductionVar(Phi, IntegerAnd)) {
+ DEBUG(dbgs() << "LV: Found an AND reduction PHI."<< *Phi <<"\n");
+ continue;
+ }
+ if (AddReductionVar(Phi, IntegerXor)) {
+ DEBUG(dbgs() << "LV: Found a XOR reduction PHI."<< *Phi <<"\n");
continue;
}
@@ -1178,7 +1274,7 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) {
GetUnderlyingObjects(*I, TempObjects, DL);
for (ValueVector::iterator it=TempObjects.begin(), e=TempObjects.end();
it != e; ++it) {
- if (!isIdentifiedSafeObject(*it)) {
+ if (!isIdentifiedObject(*it)) {
DEBUG(dbgs() << "LV: Found an unidentified write ptr:"<< **it <<"\n");
return false;
}
@@ -1196,7 +1292,7 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) {
GetUnderlyingObjects(*I, TempObjects, DL);
for (ValueVector::iterator it=TempObjects.begin(), e=TempObjects.end();
it != e; ++it) {
- if (!isIdentifiedSafeObject(*it)) {
+ if (!isIdentifiedObject(*it)) {
DEBUG(dbgs() << "LV: Found an unidentified read ptr:"<< **it <<"\n");
return false;
}
@@ -1213,19 +1309,6 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) {
return true;
}
-/// Checks if the value is a Global variable or if it is an Arguments
-/// marked with the NoAlias attribute.
-bool LoopVectorizationLegality::isIdentifiedSafeObject(Value* Val) {
- assert(Val && "Invalid value");
- if (isa<GlobalValue>(Val))
- return true;
- if (isa<AllocaInst>(Val))
- return true;
- if (Argument *A = dyn_cast<Argument>(Val))
- return A->hasNoAliasAttr();
- return false;
-}
-
bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
ReductionKind Kind) {
if (Phi->getNumIncomingValues() != 2)
@@ -1319,6 +1402,12 @@ LoopVectorizationLegality::isReductionInstr(Instruction *I,
case Instruction::UDiv:
case Instruction::SDiv:
return Kind == IntegerMult;
+ case Instruction::And:
+ return Kind == IntegerAnd;
+ case Instruction::Or:
+ return Kind == IntegerOr;
+ case Instruction::Xor:
+ return Kind == IntegerXor;
}
}
@@ -1340,6 +1429,193 @@ bool LoopVectorizationLegality::isInductionVariable(PHINode *Phi) {
return true;
}
+unsigned
+LoopVectorizationCostModel::findBestVectorizationFactor(unsigned VF) {
+ if (!VTTI) {
+ DEBUG(dbgs() << "LV: No vector target information. Not vectorizing. \n");
+ return 1;
+ }
+
+ float Cost = expectedCost(1);
+ unsigned Width = 1;
+ DEBUG(dbgs() << "LV: Scalar loop costs: "<< (int)Cost << ".\n");
+ for (unsigned i=2; i <= VF; i*=2) {
+ // Notice that the vector loop needs to be executed less times, so
+ // we need to divide the cost of the vector loops by the width of
+ // the vector elements.
+ float VectorCost = expectedCost(i) / (float)i;
+ DEBUG(dbgs() << "LV: Vector loop of width "<< i << " costs: " <<
+ (int)VectorCost << ".\n");
+ if (VectorCost < Cost) {
+ Cost = VectorCost;
+ Width = i;
+ }
+ }
+
+ DEBUG(dbgs() << "LV: Selecting VF = : "<< Width << ".\n");
+ return Width;
+}
+
+unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) {
+ // We can only estimate the cost of single basic block loops.
+ assert(1 == TheLoop->getNumBlocks() && "Too many blocks in loop");
+
+ BasicBlock *BB = TheLoop->getHeader();
+ unsigned Cost = 0;
+
+ // For each instruction in the old loop.
+ for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
+ Instruction *Inst = it;
+ unsigned C = getInstructionCost(Inst, VF);
+ Cost += C;
+ DEBUG(dbgs() << "LV: Found an estimated cost of "<< C <<" for VF "<< VF <<
+ " For instruction: "<< *Inst << "\n");
+ }
+
+ return Cost;
+}
+
+unsigned
+LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
+ assert(VTTI && "Invalid vector target transformation info");
+ switch (I->getOpcode()) {
+ case Instruction::GetElementPtr:
+ return 0;
+ case Instruction::Br: {
+ return VTTI->getInstrCost(I->getOpcode());
+ }
+ case Instruction::PHI:
+ return 0;
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ Type *VTy = VectorType::get(I->getType(), VF);
+ return VTTI->getInstrCost(I->getOpcode(), VTy);
+ }
+ case Instruction::Select: {
+ SelectInst *SI = cast<SelectInst>(I);
+ Type *VTy = VectorType::get(I->getType(), VF);
+ const SCEV *CondSCEV = SE->getSCEV(SI->getCondition());
+ bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop));
+ Type *CondTy = SI->getCondition()->getType();
+ if (ScalarCond)
+ CondTy = VectorType::get(CondTy, VF);
+
+ return VTTI->getInstrCost(I->getOpcode(), VTy, CondTy);
+ }
+ case Instruction::ICmp:
+ case Instruction::FCmp: {
+ Type *VTy = VectorType::get(I->getOperand(0)->getType(), VF);
+ return VTTI->getInstrCost(I->getOpcode(), VTy);
+ }
+ case Instruction::Store: {
+ StoreInst *SI = cast<StoreInst>(I);
+ Type *VTy = VectorType::get(SI->getValueOperand()->getType(), VF);
+
+ // Scalarized stores.
+ if (!Legal->isConsecutiveGep(SI->getPointerOperand())) {
+ unsigned Cost = 0;
+ if (VF != 1) {
+ unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement,
+ VTy);
+ // The cost of extracting from the value vector and pointer vector.
+ Cost += VF * (ExtCost * 2);
+ }
+ // The cost of the scalar stores.
+ Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(),
+ VTy->getScalarType(),
+ SI->getAlignment(),
+ SI->getPointerAddressSpace());
+ return Cost;
+ }
+
+ // Wide stores.
+ return VTTI->getMemoryOpCost(I->getOpcode(), VTy, SI->getAlignment(),
+ SI->getPointerAddressSpace());
+ }
+ case Instruction::Load: {
+ LoadInst *LI = cast<LoadInst>(I);
+ Type *VTy = VectorType::get(I->getType(), VF);
+
+ // Scalarized loads.
+ if (!Legal->isConsecutiveGep(LI->getPointerOperand())) {
+ unsigned Cost = 0;
+ if (VF != 1) {
+ unsigned InCost = VTTI->getInstrCost(Instruction::InsertElement, VTy);
+ unsigned ExCost = VTTI->getInstrCost(Instruction::ExtractValue, VTy);
+
+ // The cost of inserting the loaded value into the result vector, and
+ // extracting from a vector of pointers.
+ Cost += VF * (InCost + ExCost);
+ }
+ // The cost of the scalar stores.
+ Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(), VTy->getScalarType(),
+ LI->getAlignment(),
+ LI->getPointerAddressSpace());
+ return Cost;
+ }
+
+ // Wide loads.
+ return VTTI->getMemoryOpCost(I->getOpcode(), VTy, LI->getAlignment(),
+ LI->getPointerAddressSpace());
+ }
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ case Instruction::BitCast: {
+ Type *SrcTy = VectorType::get(I->getOperand(0)->getType(), VF);
+ Type *DstTy = VectorType::get(I->getType(), VF);
+ return VTTI->getInstrCost(I->getOpcode(), DstTy, SrcTy);
+ }
+ default: {
+ // We are scalarizing the instruction. Return the cost of the scalar
+ // instruction, plus the cost of insert and extract into vector
+ // elements, times the vector width.
+ unsigned Cost = 0;
+ Type *Ty = I->getType();
+
+ if (!Ty->isVoidTy()) {
+ Type *VTy = VectorType::get(Ty, VF);
+ unsigned InsCost = VTTI->getInstrCost(Instruction::InsertElement, VTy);
+ unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement, VTy);
+ Cost += VF * (InsCost + ExtCost);
+ }
+
+ /// We don't have any information on the scalar instruction, but maybe
+ /// the target has.
+ /// TODO: This may be a target-specific intrinsic.
+ /// Need to add API for that.
+ Cost += VF * VTTI->getInstrCost(I->getOpcode(), Ty);
+
+ return Cost;
+ }
+ }// end of switch.
+}
+
+
} // namespace
char LoopVectorize::ID = 0;
diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp
index 5e23e6fc78..b72c17f667 100644
--- a/lib/VMCore/AsmWriter.cpp
+++ b/lib/VMCore/AsmWriter.cpp
@@ -74,6 +74,7 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out)
case CallingConv::X86_StdCall: Out << "x86_stdcallcc"; break;
case CallingConv::X86_FastCall: Out << "x86_fastcallcc"; break;
case CallingConv::X86_ThisCall: Out << "x86_thiscallcc"; break;
+ case CallingConv::Intel_OCL_BI: Out << "intel_ocl_bicc"; break;
case CallingConv::ARM_APCS: Out << "arm_apcscc"; break;
case CallingConv::ARM_AAPCS: Out << "arm_aapcscc"; break;
case CallingConv::ARM_AAPCS_VFP:Out << "arm_aapcs_vfpcc"; break;
diff --git a/lib/VMCore/DataLayout.cpp b/lib/VMCore/DataLayout.cpp
index e6994be257..104e5da057 100644
--- a/lib/VMCore/DataLayout.cpp
+++ b/lib/VMCore/DataLayout.cpp
@@ -524,6 +524,14 @@ std::string DataLayout::getStringRepresentation() const {
return OS.str();
}
+unsigned DataLayout::getPointerTypeSizeInBits(Type *Ty) const
+{
+ if (Ty->isPointerTy()) return getTypeSizeInBits(Ty);
+ if (Ty->isVectorTy()
+ && cast<VectorType>(Ty)->getElementType()->isPointerTy())
+ return getTypeSizeInBits(cast<VectorType>(Ty)->getElementType());
+ return getPointerSizeInBits(0);
+}
uint64_t DataLayout::getTypeSizeInBits(Type *Ty) const {
assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!");
@@ -660,13 +668,32 @@ unsigned DataLayout::getPreferredTypeAlignmentShift(Type *Ty) const {
return Log2_32(Align);
}
-/// getIntPtrType - Return an unsigned integer type that is the same size or
-/// greater to the host pointer size.
+/// getIntPtrType - Return an integer type that is the same size or
+/// greater to the pointer size for the address space.
IntegerType *DataLayout::getIntPtrType(LLVMContext &C,
unsigned AddressSpace) const {
return IntegerType::get(C, getPointerSizeInBits(AddressSpace));
}
+/// getIntPtrType - Return an integer type that is the same size or
+/// greater to the pointer size of the specific PointerType.
+IntegerType *DataLayout::getIntPtrType(Type *Ty) const {
+ LLVMContext &C = Ty->getContext();
+ // For pointers, we return the size for the specific address space.
+ if (Ty->isPointerTy()) return IntegerType::get(C, getTypeSizeInBits(Ty));
+ // For vector of pointers, we return the size of the address space
+ // of the pointer type.
+ if (Ty->isVectorTy() && cast<VectorType>(Ty)->getElementType()->isPointerTy())
+ return IntegerType::get(C,
+ getTypeSizeInBits(cast<VectorType>(Ty)->getElementType()));
+ // Otherwise return the address space for the default address space.
+ // An example of this occuring is that you want to get the IntPtr
+ // for all of the arguments in a function. However, the IntPtr
+ // for a non-pointer type cannot be determined by the type, so
+ // the default value is used.
+ return getIntPtrType(C, 0);
+}
+
uint64_t DataLayout::getIndexedOffset(Type *ptrTy,
ArrayRef<Value *> Indices) const {
diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp
index 13c4a5d257..e9b96d6cd2 100644
--- a/lib/VMCore/Instructions.cpp
+++ b/lib/VMCore/Instructions.cpp
@@ -2120,6 +2120,17 @@ bool CastInst::isNoopCast(Type *IntPtrTy) const {
return isNoopCast(getOpcode(), getOperand(0)->getType(), getType(), IntPtrTy);
}
+/// @brief Determine if a cast is a no-op
+bool CastInst::isNoopCast(const DataLayout &DL) const {
+ unsigned AS = 0;
+ if (getOpcode() == Instruction::PtrToInt)
+ AS = getOperand(0)->getType()->getPointerAddressSpace();
+ else if (getOpcode() == Instruction::IntToPtr)
+ AS = getType()->getPointerAddressSpace();
+ Type *IntPtrTy = DL.getIntPtrType(getContext(), AS);
+ return isNoopCast(getOpcode(), getOperand(0)->getType(), getType(), IntPtrTy);
+}
+
/// This function determines if a pair of casts can be eliminated and what
/// opcode should be used in the elimination. This assumes that there are two
/// instructions like this:
diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp
index 1a7a650989..54146e118c 100644
--- a/lib/VMCore/Type.cpp
+++ b/lib/VMCore/Type.cpp
@@ -233,7 +233,12 @@ unsigned Type::getVectorNumElements() const {
}
unsigned Type::getPointerAddressSpace() const {
- return cast<PointerType>(this)->getAddressSpace();
+ if (isPointerTy())
+ return cast<PointerType>(this)->getAddressSpace();
+ if (isVectorTy())
+ return getSequentialElementType()->getPointerAddressSpace();
+ llvm_unreachable("Should never reach here!");
+ return 0;
}
diff --git a/lib/VMCore/User.cpp b/lib/VMCore/User.cpp
index 5f35ce4b9a..e847ce6ee5 100644
--- a/lib/VMCore/User.cpp
+++ b/lib/VMCore/User.cpp
@@ -10,6 +10,7 @@
#include "llvm/Constant.h"
#include "llvm/GlobalValue.h"
#include "llvm/User.h"
+#include "llvm/Operator.h"
namespace llvm {
@@ -78,4 +79,12 @@ void User::operator delete(void *Usr) {
::operator delete(Storage);
}
+//===----------------------------------------------------------------------===//
+// Operator Class
+//===----------------------------------------------------------------------===//
+
+Operator::~Operator() {
+ llvm_unreachable("should never destroy an Operator");
+}
+
} // End llvm namespace
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index fd629b485a..eb40b09d29 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -705,6 +705,7 @@ void Verifier::visitFunction(Function &F) {
case CallingConv::Cold:
case CallingConv::X86_FastCall:
case CallingConv::X86_ThisCall:
+ case CallingConv::Intel_OCL_BI:
case CallingConv::PTX_Kernel:
case CallingConv::PTX_Device:
Assert1(!F.isVarArg(),
diff --git a/projects/CMakeLists.txt b/projects/CMakeLists.txt
index a9d2af6ad2..36751cd31d 100644
--- a/projects/CMakeLists.txt
+++ b/projects/CMakeLists.txt
@@ -14,8 +14,6 @@ endforeach(entry)
# Also add in the compiler-rt tree if present and we have a sufficiently
# recent version of CMake.
if(${CMAKE_VERSION} VERSION_GREATER 2.8.7 AND
- ${LLVM_BUILD_RUNTIME} AND
- IS_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/compiler-rt AND
- EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/compiler-rt/CMakeLists.txt)
- add_subdirectory(compiler-rt)
+ ${LLVM_BUILD_RUNTIME})
+ add_llvm_external_project(compiler-rt)
endif()
diff --git a/test/CodeGen/ARM/carry.ll b/test/CodeGen/ARM/carry.ll
index f84774d9b6..bf51cd627b 100644
--- a/test/CodeGen/ARM/carry.ll
+++ b/test/CodeGen/ARM/carry.ll
@@ -45,3 +45,16 @@ entry:
%0 = sub nsw i64 0, %x
ret i64 %0
}
+
+; rdar://12559385
+define i64 @f5(i32 %vi) {
+entry:
+; CHECK: f5:
+; CHECK: movw [[REG:r[0-9]+]], #36102
+; CHECK: sbc r{{[0-9]+}}, r{{[0-9]+}}, [[REG]]
+ %v0 = zext i32 %vi to i64
+ %v1 = xor i64 %v0, -155057456198619
+ %v4 = add i64 %v1, 155057456198619
+ %v5 = add i64 %v4, %v1
+ ret i64 %v5
+}
diff --git a/test/CodeGen/Mips/mips64-sret.ll b/test/CodeGen/Mips/mips64-sret.ll
index 498c5fe174..e26b0223b4 100644
--- a/test/CodeGen/Mips/mips64-sret.ll
+++ b/test/CodeGen/Mips/mips64-sret.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 -O0 < %s
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 -O3 < %s | FileCheck %s
%struct.S = type { [8 x i32] }
@@ -6,6 +6,8 @@
define void @f(%struct.S* noalias sret %agg.result) nounwind {
entry:
+; CHECK: daddu $2, $zero, $4
+
%0 = bitcast %struct.S* %agg.result to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.S* @g to i8*), i64 32, i32 4, i1 false)
ret void
diff --git a/test/CodeGen/PowerPC/jaggedstructs.ll b/test/CodeGen/PowerPC/jaggedstructs.ll
new file mode 100644
index 0000000000..62aa7cf929
--- /dev/null
+++ b/test/CodeGen/PowerPC/jaggedstructs.ll
@@ -0,0 +1,48 @@
+; RUN: llc -mcpu=pwr7 -O0 < %s | FileCheck %s
+
+; This tests receiving and re-passing parameters consisting of structures
+; of size 3, 5, 6, and 7. They are to be found/placed right-adjusted in
+; the parameter registers.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.S3 = type { [3 x i8] }
+%struct.S5 = type { [5 x i8] }
+%struct.S6 = type { [6 x i8] }
+%struct.S7 = type { [7 x i8] }
+
+define void @test(%struct.S3* byval %s3, %struct.S5* byval %s5, %struct.S6* byval %s6, %struct.S7* byval %s7) nounwind {
+entry:
+ call void @check(%struct.S3* byval %s3, %struct.S5* byval %s5, %struct.S6* byval %s6, %struct.S7* byval %s7)
+ ret void
+}
+
+; CHECK: std 6, 216(1)
+; CHECK: std 5, 208(1)
+; CHECK: std 4, 200(1)
+; CHECK: std 3, 192(1)
+; CHECK: lbz {{[0-9]+}}, 199(1)
+; CHECK: stb {{[0-9]+}}, 55(1)
+; CHECK: lhz {{[0-9]+}}, 197(1)
+; CHECK: sth {{[0-9]+}}, 53(1)
+; CHECK: lbz {{[0-9]+}}, 207(1)
+; CHECK: stb {{[0-9]+}}, 63(1)
+; CHECK: lwz {{[0-9]+}}, 203(1)
+; CHECK: stw {{[0-9]+}}, 59(1)
+; CHECK: lhz {{[0-9]+}}, 214(1)
+; CHECK: sth {{[0-9]+}}, 70(1)
+; CHECK: lwz {{[0-9]+}}, 210(1)
+; CHECK: stw {{[0-9]+}}, 66(1)
+; CHECK: lbz {{[0-9]+}}, 223(1)
+; CHECK: stb {{[0-9]+}}, 79(1)
+; CHECK: lhz {{[0-9]+}}, 221(1)
+; CHECK: sth {{[0-9]+}}, 77(1)
+; CHECK: lwz {{[0-9]+}}, 217(1)
+; CHECK: stw {{[0-9]+}}, 73(1)
+; CHECK: ld 6, 72(1)
+; CHECK: ld 5, 64(1)
+; CHECK: ld 4, 56(1)
+; CHECK: ld 3, 48(1)
+
+declare void @check(%struct.S3* byval, %struct.S5* byval, %struct.S6* byval, %struct.S7* byval)
diff --git a/test/CodeGen/PowerPC/structsinregs.ll b/test/CodeGen/PowerPC/structsinregs.ll
index 43ba13b426..ef706af95d 100644
--- a/test/CodeGen/PowerPC/structsinregs.ll
+++ b/test/CodeGen/PowerPC/structsinregs.ll
@@ -188,17 +188,13 @@ entry:
%add13 = add nsw i32 %add11, %6
ret i32 %add13
-; CHECK: sldi 9, 9, 8
-; CHECK: sldi 8, 8, 16
-; CHECK: sldi 7, 7, 24
-; CHECK: sldi 5, 5, 40
-; CHECK: stw 6, 76(1)
-; CHECK: sth 4, 62(1)
-; CHECK: stb 3, 55(1)
; CHECK: std 9, 96(1)
; CHECK: std 8, 88(1)
; CHECK: std 7, 80(1)
+; CHECK: stw 6, 76(1)
; CHECK: std 5, 64(1)
+; CHECK: sth 4, 62(1)
+; CHECK: stb 3, 55(1)
; CHECK: lbz {{[0-9]+}}, 85(1)
; CHECK: lbz {{[0-9]+}}, 86(1)
; CHECK: lbz {{[0-9]+}}, 83(1)
diff --git a/test/CodeGen/Thumb2/carry.ll b/test/CodeGen/Thumb2/carry.ll
index de6f6e260d..85b4370fa5 100644
--- a/test/CodeGen/Thumb2/carry.ll
+++ b/test/CodeGen/Thumb2/carry.ll
@@ -20,3 +20,16 @@ entry:
%tmp2 = sub i64 %tmp1, %b
ret i64 %tmp2
}
+
+; rdar://12559385
+define i64 @f3(i32 %vi) {
+entry:
+; CHECK: f3:
+; CHECK: movw [[REG:r[0-9]+]], #36102
+; CHECK: sbcs r{{[0-9]+}}, [[REG]]
+ %v0 = zext i32 %vi to i64
+ %v1 = xor i64 %v0, -155057456198619
+ %v4 = add i64 %v1, 155057456198619
+ %v5 = add i64 %v4, %v1
+ ret i64 %v5
+}
diff --git a/test/CodeGen/X86/2012-01-18-vbitcast.ll b/test/CodeGen/X86/2012-01-18-vbitcast.ll
index 8a3ccc8dfd..3ce7db6e41 100644
--- a/test/CodeGen/X86/2012-01-18-vbitcast.ll
+++ b/test/CodeGen/X86/2012-01-18-vbitcast.ll
@@ -2,8 +2,8 @@
;CHECK: vcast
define <2 x i32> @vcast(<2 x float> %a, <2 x float> %b) {
-;CHECK: pshufd
-;CHECK: pshufd
+;CHECK: pmovzxdq
+;CHECK: pmovzxdq
%af = bitcast <2 x float> %a to <2 x i32>
%bf = bitcast <2 x float> %b to <2 x i32>
%x = sub <2 x i32> %af, %bf
diff --git a/test/CodeGen/X86/2012-03-15-build_vector_wl.ll b/test/CodeGen/X86/2012-03-15-build_vector_wl.ll
index fec17e9f4a..c4b307e5a5 100644
--- a/test/CodeGen/X86/2012-03-15-build_vector_wl.ll
+++ b/test/CodeGen/X86/2012-03-15-build_vector_wl.ll
@@ -4,7 +4,7 @@
define <4 x i8> @build_vector_again(<16 x i8> %in) nounwind readnone {
entry:
%out = shufflevector <16 x i8> %in, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK: shufb
+; CHECK: pmovzxbd
ret <4 x i8> %out
; CHECK: ret
}
diff --git a/test/CodeGen/X86/2012-07-10-extload64.ll b/test/CodeGen/X86/2012-07-10-extload64.ll
index 906b748fa4..4abdded38d 100644
--- a/test/CodeGen/X86/2012-07-10-extload64.ll
+++ b/test/CodeGen/X86/2012-07-10-extload64.ll
@@ -3,7 +3,7 @@
; CHECK: load_store
define void @load_store(<4 x i16>* %in) {
entry:
-; CHECK: movsd
+; CHECK: pmovzxwd
%A27 = load <4 x i16>* %in, align 4
%A28 = add <4 x i16> %A27, %A27
; CHECK: movlpd
@@ -27,6 +27,6 @@ define <2 x i32> @load_64(<2 x i32>* %ptr) {
BB:
%t = load <2 x i32>* %ptr
ret <2 x i32> %t
-;CHECK: movsd
+;CHECK: pmovzxdq
;CHECK: ret
}
diff --git a/test/CodeGen/X86/avx-intel-ocl.ll b/test/CodeGen/X86/avx-intel-ocl.ll
new file mode 100644
index 0000000000..1446b36a0f
--- /dev/null
+++ b/test/CodeGen/X86/avx-intel-ocl.ll
@@ -0,0 +1,107 @@
+; RUN: llc < %s -mtriple=i386-pc-win32 -mcpu=corei7-avx -mattr=+avx | FileCheck -check-prefix=WIN32 %s
+; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=corei7-avx -mattr=+avx | FileCheck -check-prefix=WIN64 %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck -check-prefix=NOT_WIN %s
+
+declare <16 x float> @func_float16_ptr(<16 x float>, <16 x float> *)
+declare <16 x float> @func_float16(<16 x float>, <16 x float>)
+; WIN64: testf16_inp
+; WIN64: vaddps {{.*}}, {{%ymm[0-1]}}
+; WIN64: vaddps {{.*}}, {{%ymm[0-1]}}
+; WIN64: leaq {{.*}}(%rsp), %rcx
+; WIN64: call
+; WIN64: ret
+
+; WIN32: testf16_inp
+; WIN32: movl %eax, (%esp)
+; WIN32: vaddps {{.*}}, {{%ymm[0-1]}}
+; WIN32: vaddps {{.*}}, {{%ymm[0-1]}}
+; WIN32: call
+; WIN32: ret
+
+; NOT_WIN: testf16_inp
+; NOT_WIN: vaddps {{.*}}, {{%ymm[0-1]}}
+; NOT_WIN: vaddps {{.*}}, {{%ymm[0-1]}}
+; NOT_WIN: leaq {{.*}}(%rsp), %rdi
+; NOT_WIN: call
+; NOT_WIN: ret
+
+;test calling conventions - input parameters
+define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind {
+ %y = alloca <16 x float>, align 16
+ %x = fadd <16 x float> %a, %b
+ %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
+ %2 = load <16 x float>* %y, align 16
+ %3 = fadd <16 x float> %2, %1
+ ret <16 x float> %3
+}
+
+;test calling conventions - preserved registers
+
+; preserved ymm6-ymm15
+; WIN64: testf16_regs
+; WIN64: call
+; WIN64: vaddps {{%ymm[6-7]}}, %ymm0, %ymm0
+; WIN64: vaddps {{%ymm[6-7]}}, %ymm1, %ymm1
+; WIN64: ret
+
+; preserved ymm8-ymm15
+; NOT_WIN: testf16_regs
+; NOT_WIN: call
+; NOT_WIN: vaddps {{%ymm[8-9]}}, %ymm0, %ymm0
+; NOT_WIN: vaddps {{%ymm[8-9]}}, %ymm1, %ymm1
+; NOT_WIN: ret
+
+define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind {
+ %y = alloca <16 x float>, align 16
+ %x = fadd <16 x float> %a, %b
+ %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
+ %2 = load <16 x float>* %y, align 16
+ %3 = fadd <16 x float> %1, %b
+ %4 = fadd <16 x float> %2, %3
+ ret <16 x float> %4
+}
+
+; test calling conventions - prolog and epilog
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
+; WIN64: call
+; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
+; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
+; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
+; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
+; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
+; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
+; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
+; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
+; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
+; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
+
+; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
+; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
+; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
+; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
+; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
+; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
+; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
+; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
+; NOT_WIN: call
+; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
+; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
+; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
+; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
+; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
+; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
+; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
+; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
+define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x float> %b) nounwind {
+ %c = call <16 x float> @func_float16(<16 x float> %a, <16 x float> %b)
+ ret <16 x float> %c
+}
diff --git a/test/CodeGen/X86/cvtv2f32.ll b/test/CodeGen/X86/cvtv2f32.ll
new file mode 100644
index 0000000000..466b096067
--- /dev/null
+++ b/test/CodeGen/X86/cvtv2f32.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mtriple=i686-linux-pc -mcpu=corei7 | FileCheck %s
+
+define <2 x float> @foo(i32 %x, i32 %y, <2 x float> %v) {
+ %t1 = uitofp i32 %x to float
+ %t2 = insertelement <2 x float> undef, float %t1, i32 0
+ %t3 = uitofp i32 %y to float
+ %t4 = insertelement <2 x float> %t2, float %t3, i32 1
+ %t5 = fmul <2 x float> %v, %t4
+ ret <2 x float> %t5
+; CHECK: foo
+; CHECK: or
+; CHECK: subpd
+; CHECK: cvtpd2ps
+; CHECK: ret
+}
+
+define <2 x float> @bar(<2 x i32> %in) {
+ %r = uitofp <2 x i32> %in to <2 x float>
+ ret <2 x float> %r
+; CHECK: bar
+; CHECK: or
+; CHECK: subpd
+; CHECK: cvtpd2ps
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/fast-cc-callee-pops.ll b/test/CodeGen/X86/fast-cc-callee-pops.ll
index ea10897c73..2c5b80ac4a 100644
--- a/test/CodeGen/X86/fast-cc-callee-pops.ll
+++ b/test/CodeGen/X86/fast-cc-callee-pops.ll
@@ -2,12 +2,12 @@
; Check that a fastcc function pops its stack variables before returning.
-define x86_fastcallcc void @func(i64 %X, i64 %Y, float %G, double %Z) nounwind {
+define x86_fastcallcc void @func(i64 inreg %X, i64 %Y, float %G, double %Z) nounwind {
ret void
; CHECK: ret{{.*}}20
}
-define x86_thiscallcc void @func2(i32 %X, i64 %Y, float %G, double %Z) nounwind {
+define x86_thiscallcc void @func2(i32 inreg %X, i64 %Y, float %G, double %Z) nounwind {
ret void
; CHECK: ret{{.*}}20
}
diff --git a/test/CodeGen/X86/fast-cc-merge-stack-adj.ll b/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
index 14cb136f89..d591f9408b 100644
--- a/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
+++ b/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
@@ -3,7 +3,7 @@
target triple = "i686-pc-linux-gnu"
-declare x86_fastcallcc void @func(i32*, i64)
+declare x86_fastcallcc void @func(i32*, i64 inreg)
define x86_fastcallcc void @caller(i32, i64) {
%X = alloca i32 ; <i32*> [#uses=1]
diff --git a/test/CodeGen/X86/fast-cc-pass-in-regs.ll b/test/CodeGen/X86/fast-cc-pass-in-regs.ll
index a96e5043fe..b60b68bd38 100644
--- a/test/CodeGen/X86/fast-cc-pass-in-regs.ll
+++ b/test/CodeGen/X86/fast-cc-pass-in-regs.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | FileCheck %s
; check that fastcc is passing stuff in regs.
-declare x86_fastcallcc i64 @callee(i64)
+declare x86_fastcallcc i64 @callee(i64 inreg)
define i64 @caller() {
%X = call x86_fastcallcc i64 @callee( i64 4294967299 ) ; <i64> [#uses=1]
@@ -9,7 +9,7 @@ define i64 @caller() {
ret i64 %X
}
-define x86_fastcallcc i64 @caller2(i64 %X) {
+define x86_fastcallcc i64 @caller2(i64 inreg %X) {
ret i64 %X
; CHECK: mov{{.*}}EAX, ECX
}
diff --git a/test/CodeGen/X86/ms-inline-asm.ll b/test/CodeGen/X86/ms-inline-asm.ll
index 59efa8d547..24d28adda8 100644
--- a/test/CodeGen/X86/ms-inline-asm.ll
+++ b/test/CodeGen/X86/ms-inline-asm.ll
@@ -38,3 +38,26 @@ entry:
; CHECK: .att_syntax
; CHECK: {{## InlineAsm End|#NO_APP}}
}
+
+%struct.t18_type = type { i32, i32 }
+
+define i32 @t18() nounwind {
+entry:
+ %foo = alloca %struct.t18_type, align 4
+ %a = getelementptr inbounds %struct.t18_type* %foo, i32 0, i32 0
+ store i32 1, i32* %a, align 4
+ %b = getelementptr inbounds %struct.t18_type* %foo, i32 0, i32 1
+ store i32 2, i32* %b, align 4
+ call void asm sideeffect inteldialect "lea ebx, foo\0A\09mov eax, [ebx].0\0A\09mov [ebx].4, ecx", "~{eax},~{dirflag},~{fpsr},~{flags}"() nounwind
+ %b1 = getelementptr inbounds %struct.t18_type* %foo, i32 0, i32 1
+ %0 = load i32* %b1, align 4
+ ret i32 %0
+; CHECK: t18
+; CHECK: {{## InlineAsm Start|#APP}}
+; CHECK: .intel_syntax
+; CHECK: lea ebx, foo
+; CHECK: mov eax, [ebx].0
+; CHECK: mov [ebx].4, ecx
+; CHECK: .att_syntax
+; CHECK: {{## InlineAsm End|#NO_APP}}
+}
diff --git a/test/CodeGen/X86/pointer-vector.ll b/test/CodeGen/X86/pointer-vector.ll
index 800fbedb4f..58423d1959 100644
--- a/test/CodeGen/X86/pointer-vector.ll
+++ b/test/CodeGen/X86/pointer-vector.ll
@@ -81,8 +81,7 @@ define <4 x i32*> @INT2PTR1(<4 x i8>* %p) nounwind {
entry:
%G = load <4 x i8>* %p
;CHECK: movl
-;CHECK: movd
-;CHECK: pshufb
+;CHECK: pmovzxbd
;CHECK: pand
%K = inttoptr <4 x i8> %G to <4 x i32*>
;CHECK: ret
@@ -105,7 +104,7 @@ define <2 x i32*> @BITCAST1(<2 x i8*>* %p) nounwind {
entry:
%G = load <2 x i8*>* %p
;CHECK: movl
-;CHECK: movsd
+;CHECK: pmovzxdq
%T = bitcast <2 x i8*> %G to <2 x i32*>
;CHECK: ret
ret <2 x i32*> %T
diff --git a/test/CodeGen/X86/pr14161.ll b/test/CodeGen/X86/pr14161.ll
new file mode 100644
index 0000000000..ff4532eac3
--- /dev/null
+++ b/test/CodeGen/X86/pr14161.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -mtriple=x86_64-linux-pc -mcpu=corei7 | FileCheck %s
+
+declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>)
+
+define <2 x i16> @good(<4 x i32>*, <4 x i8>*) {
+entry:
+ %2 = load <4 x i32>* %0, align 16
+ %3 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %2, <4 x i32> <i32 127, i32 127, i32 127, i32 127>)
+ %4 = extractelement <4 x i32> %3, i32 0
+ %5 = extractelement <4 x i32> %3, i32 1
+ %6 = extractelement <4 x i32> %3, i32 2
+ %7 = extractelement <4 x i32> %3, i32 3
+ %8 = bitcast i32 %4 to <2 x i16>
+ %9 = bitcast i32 %5 to <2 x i16>
+ ret <2 x i16> %8
+; CHECK: good
+; CHECK: pminud
+; CHECK-NEXT: pmovzxwq
+; CHECK: ret
+}
+
+define <2 x i16> @bad(<4 x i32>*, <4 x i8>*) {
+entry:
+ %2 = load <4 x i32>* %0, align 16
+ %3 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %2, <4 x i32> <i32 127, i32 127, i32 127, i32 127>)
+ %4 = extractelement <4 x i32> %3, i32 0
+ %5 = extractelement <4 x i32> %3, i32 1
+ %6 = extractelement <4 x i32> %3, i32 2
+ %7 = extractelement <4 x i32> %3, i32 3
+ %8 = bitcast i32 %4 to <2 x i16>
+ %9 = bitcast i32 %5 to <2 x i16>
+ ret <2 x i16> %9
+; CHECK: bad
+; CHECK: pminud
+; CHECK: pextrd
+; CHECK: pmovzxwq
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/promote.ll b/test/CodeGen/X86/promote.ll
index 8b30dc718b..283f48cd37 100644
--- a/test/CodeGen/X86/promote.ll
+++ b/test/CodeGen/X86/promote.ll
@@ -20,7 +20,7 @@ entry:
; CHECK: shuff_f
define i32 @shuff_f(<4 x i8>* %A) {
entry:
-; CHECK: pshufb
+; CHECK: pmovzxbd
; CHECK: paddd
; CHECK: pshufb
%0 = load <4 x i8>* %A, align 8
diff --git a/test/CodeGen/X86/sse-intel-ocl.ll b/test/CodeGen/X86/sse-intel-ocl.ll
new file mode 100644
index 0000000000..188505072f
--- /dev/null
+++ b/test/CodeGen/X86/sse-intel-ocl.ll
@@ -0,0 +1,93 @@
+; RUN: llc < %s -mtriple=i386-pc-win32 -mcpu=nehalem | FileCheck -check-prefix=WIN32 %s
+; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=nehalem | FileCheck -check-prefix=WIN64 %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=nehalem | FileCheck -check-prefix=NOT_WIN %s
+
+declare <16 x float> @func_float16_ptr(<16 x float>, <16 x float> *)
+declare <16 x float> @func_float16(<16 x float>, <16 x float>)
+; WIN64: testf16_inp
+; WIN64: addps {{.*}}, {{%xmm[0-3]}}
+; WIN64: addps {{.*}}, {{%xmm[0-3]}}
+; WIN64: addps {{.*}}, {{%xmm[0-3]}}
+; WIN64: addps {{.*}}, {{%xmm[0-3]}}
+; WIN64: leaq {{.*}}(%rsp), %rcx
+; WIN64: call
+; WIN64: ret
+
+; WIN32: testf16_inp
+; WIN32: movl %eax, (%esp)
+; WIN32: addps {{.*}}, {{%xmm[0-3]}}
+; WIN32: addps {{.*}}, {{%xmm[0-3]}}
+; WIN32: addps {{.*}}, {{%xmm[0-3]}}
+; WIN32: addps {{.*}}, {{%xmm[0-3]}}
+; WIN32: call
+; WIN32: ret
+
+; NOT_WIN: testf16_inp
+; NOT_WIN: addps {{.*}}, {{%xmm[0-3]}}
+; NOT_WIN: addps {{.*}}, {{%xmm[0-3]}}
+; NOT_WIN: addps {{.*}}, {{%xmm[0-3]}}
+; NOT_WIN: addps {{.*}}, {{%xmm[0-3]}}
+; NOT_WIN: leaq {{.*}}(%rsp), %rdi
+; NOT_WIN: call
+; NOT_WIN: ret
+
+;test calling conventions - input parameters
+define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind {
+ %y = alloca <16 x float>, align 16
+ %x = fadd <16 x float> %a, %b
+ %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
+ %2 = load <16 x float>* %y, align 16
+ %3 = fadd <16 x float> %2, %1
+ ret <16 x float> %3
+}
+
+;test calling conventions - preserved registers
+
+; preserved xmm6-xmm15
+; WIN64: testf16_regs
+; WIN64: call
+; WIN64: addps {{%xmm[6-9]}}, {{.*}}
+; WIN64: addps {{%xmm[6-9]}}, {{.*}}
+; WIN64: ret
+
+; preserved xmm8-xmm15
+; NOT_WIN: testf16_regs
+; NOT_WIN: call
+; NOT_WIN: addps {{%xmm([8-9]|1[0-1])}}, {{.*}}
+; NOT_WIN: addps {{%xmm([8-9]|1[0-1])}}, {{.*}}
+; NOT_WIN: addps {{%xmm([8-9]|1[0-1])}}, {{.*}}
+; NOT_WIN: addps {{%xmm([8-9]|1[0-1])}}, {{.*}}
+; NOT_WIN: ret
+
+define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind {
+ %y = alloca <16 x float>, align 16
+ %x = fadd <16 x float> %a, %b
+ %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
+ %2 = load <16 x float>* %y, align 16
+ %3 = fadd <16 x float> %1, %b
+ %4 = fadd <16 x float> %2, %3
+ ret <16 x float> %4
+}
+
+; test calling conventions - prolog and epilog
+; NOT_WIN: movaps {{%xmm([8-9]|1[0-5])}}, {{.*(%rsp).*}} ## 16-byte Spill
+; NOT_WIN: movaps {{%xmm([8-9]|1[0-5])}}, {{.*(%rsp).*}} ## 16-byte Spill
+; NOT_WIN: movaps {{%xmm([8-9]|1[0-5])}}, {{.*(%rsp).*}} ## 16-byte Spill
+; NOT_WIN: movaps {{%xmm([8-9]|1[0-5])}}, {{.*(%rsp).*}} ## 16-byte Spill
+; NOT_WIN: movaps {{%xmm([8-9]|1[0-5])}}, {{.*(%rsp).*}} ## 16-byte Spill
+; NOT_WIN: movaps {{%xmm([8-9]|1[0-5])}}, {{.*(%rsp).*}} ## 16-byte Spill
+; NOT_WIN: movaps {{%xmm([8-9]|1[0-5])}}, {{.*(%rsp).*}} ## 16-byte Spill
+; NOT_WIN: movaps {{%xmm([8-9]|1[0-5])}}, {{.*(%rsp).*}} ## 16-byte Spill
+; NOT_WIN: call
+; NOT_WIN: movaps {{.*(%rsp).*}}, {{%xmm([8-9]|1[0-5])}} ## 16-byte Reload
+; NOT_WIN: movaps {{.*(%rsp).*}}, {{%xmm([8-9]|1[0-5])}} ## 16-byte Reload
+; NOT_WIN: movaps {{.*(%rsp).*}}, {{%xmm([8-9]|1[0-5])}} ## 16-byte Reload
+; NOT_WIN: movaps {{.*(%rsp).*}}, {{%xmm([8-9]|1[0-5])}} ## 16-byte Reload
+; NOT_WIN: movaps {{.*(%rsp).*}}, {{%xmm([8-9]|1[0-5])}} ## 16-byte Reload
+; NOT_WIN: movaps {{.*(%rsp).*}}, {{%xmm([8-9]|1[0-5])}} ## 16-byte Reload
+; NOT_WIN: movaps {{.*(%rsp).*}}, {{%xmm([8-9]|1[0-5])}} ## 16-byte Reload
+; NOT_WIN: movaps {{.*(%rsp).*}}, {{%xmm([8-9]|1[0-5])}} ## 16-byte Reload
+define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x float> %b) nounwind {
+ %c = call <16 x float> @func_float16(<16 x float> %a, <16 x float> %b)
+ ret <16 x float> %c
+}
diff --git a/test/CodeGen/X86/trunc-ext-ld-st.ll b/test/CodeGen/X86/trunc-ext-ld-st.ll
index 9877d7be16..1d22a185de 100644
--- a/test/CodeGen/X86/trunc-ext-ld-st.ll
+++ b/test/CodeGen/X86/trunc-ext-ld-st.ll
@@ -2,8 +2,7 @@
;CHECK: load_2_i8
; A single 16-bit load
-;CHECK: movzwl
-;CHECK: pshufb
+;CHECK: pmovzxbq
;CHECK: paddq
;CHECK: pshufb
; A single 16-bit store
@@ -19,8 +18,7 @@ define void @load_2_i8(<2 x i8>* %A) {
;CHECK: load_2_i16
; Read 32-bits
-;CHECK: movd
-;CHECK: pshufb
+;CHECK: pmovzxwq
;CHECK: paddq
;CHECK: pshufb
;CHECK: movd
@@ -33,7 +31,7 @@ define void @load_2_i16(<2 x i16>* %A) {
}
;CHECK: load_2_i32
-;CHECK: pshufd
+;CHECK: pmovzxdq
;CHECK: paddq
;CHECK: pshufd
;CHECK: ret
@@ -45,8 +43,7 @@ define void @load_2_i32(<2 x i32>* %A) {
}
;CHECK: load_4_i8
-;CHECK: movd
-;CHECK: pshufb
+;CHECK: pmovzxbd
;CHECK: paddd
;CHECK: pshufb
;CHECK: ret
@@ -58,7 +55,7 @@ define void @load_4_i8(<4 x i8>* %A) {
}
;CHECK: load_4_i16
-;CHECK: punpcklwd
+;CHECK: pmovzxwd
;CHECK: paddd
;CHECK: pshufb
;CHECK: ret
@@ -70,7 +67,7 @@ define void @load_4_i16(<4 x i16>* %A) {
}
;CHECK: load_8_i8
-;CHECK: punpcklbw
+;CHECK: pmovzxbw
;CHECK: paddw
;CHECK: pshufb
;CHECK: ret
diff --git a/test/CodeGen/X86/vec_compare-2.ll b/test/CodeGen/X86/vec_compare-2.ll
index 46d6a23554..4da79538db 100644
--- a/test/CodeGen/X86/vec_compare-2.ll
+++ b/test/CodeGen/X86/vec_compare-2.ll
@@ -10,8 +10,7 @@ define void @blackDespeckle_wrapper(i8** %args_list, i64* %gtid, i64 %xend) {
entry:
; CHECK: cfi_def_cfa_offset
; CHECK-NOT: set
-; CHECK: punpcklwd
-; CHECK: pshufd
+; CHECK: pmovzxwq
; CHECK: pshufb
%shr.i = ashr <4 x i32> zeroinitializer, <i32 3, i32 3, i32 3, i32 3> ; <<4 x i32>> [#uses=1]
%cmp318.i = sext <4 x i1> zeroinitializer to <4 x i32> ; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/X86/widen_load-2.ll b/test/CodeGen/X86/widen_load-2.ll
index 79aa000502..224898c1a3 100644
--- a/test/CodeGen/X86/widen_load-2.ll
+++ b/test/CodeGen/X86/widen_load-2.ll
@@ -170,7 +170,7 @@ define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp
; CHECK: rot
%i8vec3pack = type { <3 x i8>, i8 }
define %i8vec3pack @rot() nounwind {
-; CHECK: movd {{-?[0-9]+}}(%rsp), {{%xmm[0-9]}}
+; CHECK: pmovzxbd {{-?[0-9]+}}(%rsp), {{%xmm[0-9]}}
entry:
%X = alloca %i8vec3pack, align 4
%rot = alloca %i8vec3pack, align 4
diff --git a/test/MC/PowerPC/lit.local.cfg b/test/MC/PowerPC/lit.local.cfg
new file mode 100644
index 0000000000..88488cdd04
--- /dev/null
+++ b/test/MC/PowerPC/lit.local.cfg
@@ -0,0 +1,5 @@
+config.suffixes = ['.ll', '.c', '.cpp', '.s']
+
+targets = set(config.root.targets_to_build.split())
+if not 'PowerPC' in targets:
+ config.unsupported = True
diff --git a/test/MC/PowerPC/ppc64-relocs-01.ll b/test/MC/PowerPC/ppc64-relocs-01.ll
new file mode 100644
index 0000000000..5996af84f4
--- /dev/null
+++ b/test/MC/PowerPC/ppc64-relocs-01.ll
@@ -0,0 +1,66 @@
+;; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -O3 \
+;; RUN: -filetype=obj %s -o - | \
+;; RUN: elf-dump --dump-section-data | FileCheck %s
+
+;; FIXME: this file need to be in .s form, change when asm parse is done.
+
+@number64 = global i64 10, align 8
+
+define i64 @access_int64(i64 %a) nounwind readonly {
+entry:
+ %0 = load i64* @number64, align 8
+ %cmp = icmp eq i64 %0, %a
+ %conv1 = zext i1 %cmp to i64
+ ret i64 %conv1
+}
+
+declare double @sin(double) nounwind
+
+define double @test_branch24 (double %x) nounwind readonly {
+entry:
+ %add = call double @sin(double %x) nounwind
+ ret double %add
+}
+
+;; The relocations in .rela.text are the 'number64' load using a
+;; R_PPC64_TOC16_DS against the .toc and the 'sin' external function
+;; address using a R_PPC64_REL24
+;; CHECK: '.rela.text'
+;; CHECK: Relocation 0
+;; CHECK-NEXT: 'r_offset',
+;; CHECK-NEXT: 'r_sym', 0x00000006
+;; CHECK-NEXT: 'r_type', 0x0000003f
+;; CHECK: Relocation 1
+;; CHECK-NEXT: 'r_offset',
+;; CHECK-NEXT: 'r_sym', 0x0000000a
+;; CHECK-NEXT: 'r_type', 0x0000000a
+
+;; The .opd entry for the 'access_int64' function creates 2 relocations:
+;; 1. A R_PPC64_ADDR64 against the .text segment plus addend (the function
+; address itself);
+;; 2. And a R_PPC64_TOC against no symbol (the linker will replace for the
+;; module's TOC base).
+;; CHECK: '.rela.opd'
+;; CHECK: Relocation 0
+;; CHECK-NEXT: 'r_offset',
+;; CHECK-NEXT: 'r_sym', 0x00000002
+;; CHECK-NEXT: 'r_type', 0x00000026
+;; CHECK: Relocation 1
+;; CHECK-NEXT: 'r_offset',
+;; CHECK-NEXT: 'r_sym', 0x00000000
+;; CHECK-NEXT: 'r_type', 0x00000033
+
+;; Finally the TOC creates the relocation for the 'number64'.
+;; CHECK: '.rela.toc'
+;; CHECK: Relocation 0
+;; CHECK-NEXT: 'r_offset',
+;; CHECK-NEXT: 'r_sym', 0x00000008
+;; CHECK-NEXT: 'r_type', 0x00000026
+
+;; Check if the relocation references are for correct symbols.
+;; CHECK: Symbol 7
+;; CHECK-NEXT: 'access_int64'
+;; CHECK: Symbol 8
+;; CHECK-NEXT: 'number64'
+;; CHECK: Symbol 10
+;; CHECK-NEXT: 'sin'
diff --git a/test/MC/X86/x86-32-ms-inline-asm.s b/test/MC/X86/x86-32-ms-inline-asm.s
new file mode 100644
index 0000000000..a5e80b2c93
--- /dev/null
+++ b/test/MC/X86/x86-32-ms-inline-asm.s
@@ -0,0 +1,10 @@
+// RUN: llvm-mc -x86-asm-syntax=intel -triple i386-unknown-unknown --show-encoding %s | FileCheck %s
+
+mov eax, [ebx].0
+mov [ebx].4, ecx
+
+// CHECK: movl (%ebx), %eax
+// CHECK: encoding: [0x8b,0x03]
+// CHECK: movl %ecx, 4(%ebx)
+// CHECK: encoding: [0x89,0x4b,0x04]
+
diff --git a/test/Other/multi-pointer-size.ll b/test/Other/multi-pointer-size.ll
new file mode 100644
index 0000000000..95fa54b8f2
--- /dev/null
+++ b/test/Other/multi-pointer-size.ll
@@ -0,0 +1,43 @@
+; RUN: opt -instcombine %s | llvm-dis | FileCheck %s
+target datalayout = "e-p:32:32:32-p1:64:64:64-p2:8:8:8-p3:16:16:16--p4:96:96:96-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32"
+
+define i32 @test_as0(i32 addrspace(0)* %A) {
+entry:
+; CHECK: %arrayidx = getelementptr i32* %A, i32 1
+ %arrayidx = getelementptr i32 addrspace(0)* %A, i64 1
+ %y = load i32 addrspace(0)* %arrayidx, align 4
+ ret i32 %y
+}
+
+define i32 @test_as1(i32 addrspace(1)* %A) {
+entry:
+; CHECK: %arrayidx = getelementptr i32 addrspace(1)* %A, i64 1
+ %arrayidx = getelementptr i32 addrspace(1)* %A, i32 1
+ %y = load i32 addrspace(1)* %arrayidx, align 4
+ ret i32 %y
+}
+
+define i32 @test_as2(i32 addrspace(2)* %A) {
+entry:
+; CHECK: %arrayidx = getelementptr i32 addrspace(2)* %A, i8 1
+ %arrayidx = getelementptr i32 addrspace(2)* %A, i32 1
+ %y = load i32 addrspace(2)* %arrayidx, align 4
+ ret i32 %y
+}
+
+define i32 @test_as3(i32 addrspace(3)* %A) {
+entry:
+; CHECK: %arrayidx = getelementptr i32 addrspace(3)* %A, i16 1
+ %arrayidx = getelementptr i32 addrspace(3)* %A, i32 1
+ %y = load i32 addrspace(3)* %arrayidx, align 4
+ ret i32 %y
+}
+
+define i32 @test_as4(i32 addrspace(4)* %A) {
+entry:
+; CHECK: %arrayidx = getelementptr i32 addrspace(4)* %A, i96 1
+ %arrayidx = getelementptr i32 addrspace(4)* %A, i32 1
+ %y = load i32 addrspace(4)* %arrayidx, align 4
+ ret i32 %y
+}
+
diff --git a/test/Transforms/GVN/crash.ll b/test/Transforms/GVN/crash.ll
index 31eae256c6..4a8c8e4589 100644
--- a/test/Transforms/GVN/crash.ll
+++ b/test/Transforms/GVN/crash.ll
@@ -163,3 +163,39 @@ entry:
ret i8 %1
}
+
+; Test that a GEP in an unreachable block with the following form doesn't crash
+; GVN:
+;
+; %x = gep %some.type %x, ...
+
+%struct.type = type { i64, i32, i32 }
+
+define fastcc void @func() nounwind uwtable ssp align 2 {
+entry:
+ br label %reachable.bb
+
+;; Unreachable code.
+
+unreachable.bb:
+ %gep.val = getelementptr inbounds %struct.type* %gep.val, i64 1
+ br i1 undef, label %u2.bb, label %u1.bb
+
+u1.bb:
+ %tmp1 = getelementptr inbounds %struct.type* %gep.val, i64 0, i32 0
+ store i64 -1, i64* %tmp1, align 8
+ br label %unreachable.bb
+
+u2.bb:
+ %0 = load i32* undef, align 4
+ %conv.i.i.i.i.i = zext i32 %0 to i64
+ br label %u2.bb
+
+;; Reachable code.
+
+reachable.bb:
+ br label %r1.bb
+
+r1.bb:
+ br label %u2.bb
+}
diff --git a/test/Transforms/GVN/pr14166.ll b/test/Transforms/GVN/pr14166.ll
new file mode 100644
index 0000000000..9f47e46426
--- /dev/null
+++ b/test/Transforms/GVN/pr14166.ll
@@ -0,0 +1,27 @@
+; RUN: opt -gvn -S < %s | FileCheck %s
+target datalayout = "e-p:32:32:32"
+target triple = "i386-pc-linux-gnu"
+define <2 x i32> @test1() {
+ %v1 = alloca <2 x i32>
+ call void @anything(<2 x i32>* %v1)
+ %v2 = load <2 x i32>* %v1
+ %v3 = inttoptr <2 x i32> %v2 to <2 x i8*>
+ %v4 = bitcast <2 x i32>* %v1 to <2 x i8*>*
+ store <2 x i8*> %v3, <2 x i8*>* %v4
+ %v5 = load <2 x i32>* %v1
+ ret <2 x i32> %v5
+; CHECK: @test1
+; CHECK: %v1 = alloca <2 x i32>
+; CHECK: call void @anything(<2 x i32>* %v1)
+; CHECK: %v2 = load <2 x i32>* %v1
+; CHECK: %v3 = inttoptr <2 x i32> %v2 to <2 x i8*>
+; CHECK: %v4 = bitcast <2 x i32>* %v1 to <2 x i8*>*
+; CHECK: store <2 x i8*> %v3, <2 x i8*>* %v4
+; CHECK: %1 = ptrtoint <2 x i8*> %v3 to <2 x i32>
+; CHECK: %2 = bitcast <2 x i32> %1 to i64
+; CHECK: %3 = bitcast i64 %2 to <2 x i32>
+; CHECK: ret <2 x i32> %3
+}
+
+declare void @anything(<2 x i32>*)
+
diff --git a/test/Transforms/InstCombine/cast.ll b/test/Transforms/InstCombine/cast.ll
index 899ffddd5b..b4eb69d436 100644
--- a/test/Transforms/InstCombine/cast.ll
+++ b/test/Transforms/InstCombine/cast.ll
@@ -891,3 +891,12 @@ define double @test80([100 x double]* %p, i32 %i) {
ret double %l
; CHECK-NEXT: ret double
}
+
+define double @test81(double *%p, float %f) {
+ %i = fptosi float %f to i64
+ %q = bitcast double* %p to i8*
+ %pp = getelementptr i8* %q, i64 %i
+ %r = bitcast i8* %pp to double*
+ %l = load double* %r
+ ret double %l
+}
diff --git a/test/Transforms/InstCombine/constant-fold-gep-as-0.ll b/test/Transforms/InstCombine/constant-fold-gep-as-0.ll
new file mode 100644
index 0000000000..74fe316137
--- /dev/null
+++ b/test/Transforms/InstCombine/constant-fold-gep-as-0.ll
@@ -0,0 +1,235 @@
+; "PLAIN" - No optimizations. This tests the target-independent
+; constant folder.
+; RUN: opt -S -o - < %s | FileCheck --check-prefix=PLAIN %s
+
+target datalayout = "e-p:128:128:128-p1:32:32:32-p2:8:8:8-p3:16:16:16-p4:64:64:64-p5:96:96:96-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32"
+
+; PLAIN: ModuleID = '<stdin>'
+
+; The automatic constant folder in opt does not have targetdata access, so
+; it can't fold gep arithmetic, in general. However, the constant folder run
+; from instcombine and global opt can use targetdata.
+; PLAIN: @G8 = global i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 1 to i8 addrspace(1)*), i32 -1)
+@G8 = global i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 1 to i8 addrspace(1)*), i32 -1)
+; PLAIN: @G1 = global i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i8 1 to i1 addrspace(2)*), i8 -1)
+@G1 = global i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i8 1 to i1 addrspace(2)*), i8 -1)
+; PLAIN: @F8 = global i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 1 to i8 addrspace(1)*), i32 -2)
+@F8 = global i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 1 to i8 addrspace(1)*), i32 -2)
+; PLAIN: @F1 = global i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i8 1 to i1 addrspace(2)*), i8 -2)
+@F1 = global i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i8 1 to i1 addrspace(2)*), i8 -2)
+; PLAIN: @H8 = global i8 addrspace(1)* getelementptr (i8 addrspace(1)* null, i32 -1)
+@H8 = global i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 0 to i8 addrspace(1)*), i32 -1)
+; PLAIN: @H1 = global i1 addrspace(2)* getelementptr (i1 addrspace(2)* null, i8 -1)
+@H1 = global i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i8 0 to i1 addrspace(2)*), i8 -1)
+
+
+; The target-independent folder should be able to do some clever
+; simplifications on sizeof, alignof, and offsetof expressions. The
+; target-dependent folder should fold these down to constants.
+; PLAIN-X: @a = constant i64 mul (i64 ptrtoint (double addrspace(4)* getelementptr (double addrspace(4)* null, i32 1) to i64), i64 2310)
+@a = constant i64 mul (i64 3, i64 mul (i64 ptrtoint ({[7 x double], [7 x double]} addrspace(4)* getelementptr ({[7 x double], [7 x double]} addrspace(4)* null, i64 11) to i64), i64 5))
+
+; PLAIN-X: @b = constant i64 ptrtoint (double addrspace(4)* getelementptr ({ i1, double }* null, i64 0, i32 1) to i64)
+@b = constant i64 ptrtoint ([13 x double] addrspace(4)* getelementptr ({i1, [13 x double]} addrspace(4)* null, i64 0, i32 1) to i64)
+
+; PLAIN-X: @c = constant i64 mul nuw (i64 ptrtoint (double addrspace(4)* getelementptr (double addrspace(4)* null, i32 1) to i64), i64 2)
+@c = constant i64 ptrtoint (double addrspace(4)* getelementptr ({double, double, double, double} addrspace(4)* null, i64 0, i32 2) to i64)
+
+; PLAIN-X: @d = constant i64 mul nuw (i64 ptrtoint (double addrspace(4)* getelementptr (double addrspace(4)* null, i32 1) to i64), i64 11)
+@d = constant i64 ptrtoint (double addrspace(4)* getelementptr ([13 x double] addrspace(4)* null, i64 0, i32 11) to i64)
+
+; PLAIN-X: @e = constant i64 ptrtoint (double addrspace(4)* getelementptr ({ double, float, double, double }* null, i64 0, i32 2) to i64)
+@e = constant i64 ptrtoint (double addrspace(4)* getelementptr ({double, float, double, double} addrspace(4)* null, i64 0, i32 2) to i64)
+
+; PLAIN-X: @f = constant i64 1
+@f = constant i64 ptrtoint (<{ i16, i128 }> addrspace(4)* getelementptr ({i1, <{ i16, i128 }>} addrspace(4)* null, i64 0, i32 1) to i64)
+
+; PLAIN-X: @g = constant i64 ptrtoint (double addrspace(4)* getelementptr ({ i1, double }* null, i64 0, i32 1) to i64)
+@g = constant i64 ptrtoint ({double, double} addrspace(4)* getelementptr ({i1, {double, double}} addrspace(4)* null, i64 0, i32 1) to i64)
+
+; PLAIN-X: @h = constant i64 ptrtoint (i1 addrspace(2)* getelementptr (i1 addrspace(2)* null, i32 1) to i64)
+@h = constant i64 ptrtoint (double addrspace(4)* getelementptr (double addrspace(4)* null, i64 1) to i64)
+
+; PLAIN-X: @i = constant i64 ptrtoint (i1 addrspace(2)* getelementptr ({ i1, i1 addrspace(2)* }* null, i64 0, i32 1) to i64)
+@i = constant i64 ptrtoint (double addrspace(4)* getelementptr ({i1, double} addrspace(4)* null, i64 0, i32 1) to i64)
+
+; The target-dependent folder should cast GEP indices to integer-sized pointers.
+
+; PLAIN: @M = constant i64 addrspace(5)* getelementptr (i64 addrspace(5)* null, i32 1)
+; PLAIN: @N = constant i64 addrspace(5)* getelementptr ({ i64, i64 } addrspace(5)* null, i32 0, i32 1)
+; PLAIN: @O = constant i64 addrspace(5)* getelementptr ([2 x i64] addrspace(5)* null, i32 0, i32 1)
+
+@M = constant i64 addrspace(5)* getelementptr (i64 addrspace(5)* null, i32 1)
+@N = constant i64 addrspace(5)* getelementptr ({ i64, i64 } addrspace(5)* null, i32 0, i32 1)
+@O = constant i64 addrspace(5)* getelementptr ([2 x i64] addrspace(5)* null, i32 0, i32 1)
+
+; Fold GEP of a GEP. Very simple cases are folded.
+
+; PLAIN-X: @Y = global [3 x { i32, i32 }]addrspace(3)* getelementptr inbounds ([3 x { i32, i32 }]addrspace(3)* @ext, i64 2)
+@ext = external addrspace(3) global [3 x { i32, i32 }]
+@Y = global [3 x { i32, i32 }]addrspace(3)* getelementptr inbounds ([3 x { i32, i32 }]addrspace(3)* getelementptr inbounds ([3 x { i32, i32 }]addrspace(3)* @ext, i64 1), i64 1)
+
+; PLAIN-X: @Z = global i32addrspace(3)* getelementptr inbounds (i32addrspace(3)* getelementptr inbounds ([3 x { i32, i32 }]addrspace(3)* @ext, i64 0, i64 1, i32 0), i64 1)
+@Z = global i32addrspace(3)* getelementptr inbounds (i32addrspace(3)* getelementptr inbounds ([3 x { i32, i32 }]addrspace(3)* @ext, i64 0, i64 1, i32 0), i64 1)
+
+
+; Duplicate all of the above as function return values rather than
+; global initializers.
+
+; PLAIN: define i8 addrspace(1)* @goo8() nounwind {
+; PLAIN: %t = bitcast i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 1 to i8 addrspace(1)*), i32 -1) to i8 addrspace(1)*
+; PLAIN: ret i8 addrspace(1)* %t
+; PLAIN: }
+; PLAIN: define i1 addrspace(2)* @goo1() nounwind {
+; PLAIN: %t = bitcast i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i32 1 to i1 addrspace(2)*), i32 -1) to i1 addrspace(2)*
+; PLAIN: ret i1 addrspace(2)* %t
+; PLAIN: }
+; PLAIN: define i8 addrspace(1)* @foo8() nounwind {
+; PLAIN: %t = bitcast i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 1 to i8 addrspace(1)*), i32 -2) to i8 addrspace(1)*
+; PLAIN: ret i8 addrspace(1)* %t
+; PLAIN: }
+; PLAIN: define i1 addrspace(2)* @foo1() nounwind {
+; PLAIN: %t = bitcast i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i32 1 to i1 addrspace(2)*), i32 -2) to i1 addrspace(2)*
+; PLAIN: ret i1 addrspace(2)* %t
+; PLAIN: }
+; PLAIN: define i8 addrspace(1)* @hoo8() nounwind {
+; PLAIN: %t = bitcast i8 addrspace(1)* getelementptr (i8 addrspace(1)* null, i32 -1) to i8 addrspace(1)*
+; PLAIN: ret i8 addrspace(1)* %t
+; PLAIN: }
+; PLAIN: define i1 addrspace(2)* @hoo1() nounwind {
+; PLAIN: %t = bitcast i1 addrspace(2)* getelementptr (i1 addrspace(2)* null, i32 -1) to i1 addrspace(2)*
+; PLAIN: ret i1 addrspace(2)* %t
+; PLAIN: }
+define i8 addrspace(1)* @goo8() nounwind {
+ %t = bitcast i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 1 to i8 addrspace(1)*), i32 -1) to i8 addrspace(1)*
+ ret i8 addrspace(1)* %t
+}
+define i1 addrspace(2)* @goo1() nounwind {
+ %t = bitcast i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i32 1 to i1 addrspace(2)*), i32 -1) to i1 addrspace(2)*
+ ret i1 addrspace(2)* %t
+}
+define i8 addrspace(1)* @foo8() nounwind {
+ %t = bitcast i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 1 to i8 addrspace(1)*), i32 -2) to i8 addrspace(1)*
+ ret i8 addrspace(1)* %t
+}
+define i1 addrspace(2)* @foo1() nounwind {
+ %t = bitcast i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i32 1 to i1 addrspace(2)*), i32 -2) to i1 addrspace(2)*
+ ret i1 addrspace(2)* %t
+}
+define i8 addrspace(1)* @hoo8() nounwind {
+ %t = bitcast i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 0 to i8 addrspace(1)*), i32 -1) to i8 addrspace(1)*
+ ret i8 addrspace(1)* %t
+}
+define i1 addrspace(2)* @hoo1() nounwind {
+ %t = bitcast i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i32 0 to i1 addrspace(2)*), i32 -1) to i1 addrspace(2)*
+ ret i1 addrspace(2)* %t
+}
+
+; PLAIN-X: define i64 @fa() nounwind {
+; PLAIN-X: %t = bitcast i64 mul (i64 ptrtoint (double addrspace(4)* getelementptr (double addrspace(4)* null, i32 1) to i64), i64 2310) to i64
+; PLAIN-X: ret i64 %t
+; PLAIN-X: }
+; PLAIN-X: define i64 @fb() nounwind {
+; PLAIN-X: %t = bitcast i64 ptrtoint (double addrspace(4)* getelementptr ({ i1, double }* null, i64 0, i32 1) to i64) to i64
+; PLAIN-X: ret i64 %t
+; PLAIN-X: }
+; PLAIN-X: define i64 @fc() nounwind {
+; PLAIN-X: %t = bitcast i64 mul nuw (i64 ptrtoint (double addrspace(4)* getelementptr (double addrspace(4)* null, i32 1) to i64), i64 2) to i64
+; PLAIN-X: ret i64 %t
+; PLAIN-X: }
+; PLAIN-X: define i64 @fd() nounwind {
+; PLAIN-X: %t = bitcast i64 mul nuw (i64 ptrtoint (double addrspace(4)* getelementptr (double addrspace(4)* null, i32 1) to i64), i64 11) to i64
+; PLAIN-X: ret i64 %t
+; PLAIN-X: }
+; PLAIN-X: define i64 @fe() nounwind {
+; PLAIN-X: %t = bitcast i64 ptrtoint (double addrspace(4)* getelementptr ({ double, float, double, double }* null, i64 0, i32 2) to i64) to i64
+; PLAIN-X: ret i64 %t
+; PLAIN-X: }
+; PLAIN-X: define i64 @ff() nounwind {
+; PLAIN-X: %t = bitcast i64 1 to i64
+; PLAIN-X: ret i64 %t
+; PLAIN-X: }
+; PLAIN-X: define i64 @fg() nounwind {
+; PLAIN-X: %t = bitcast i64 ptrtoint (double addrspace(4)* getelementptr ({ i1, double }* null, i64 0, i32 1) to i64) to i64
+; PLAIN-X: ret i64 %t
+; PLAIN-X: }
+; PLAIN-X: define i64 @fh() nounwind {
+; PLAIN-X: %t = bitcast i64 ptrtoint (i1 addrspace(2)* getelementptr (i1 addrspace(2)* null, i32 1) to i64) to i64
+; PLAIN-X: ret i64 %t
+; PLAIN-X: }
+; PLAIN-X: define i64 @fi() nounwind {
+; PLAIN-X: %t = bitcast i64 ptrtoint (i1 addrspace(2)* getelementptr ({ i1, i1 addrspace(2)* }* null, i64 0, i32 1) to i64) to i64
+; PLAIN-X: ret i64 %t
+; PLAIN-X: }
+define i64 @fa() nounwind {
+ %t = bitcast i64 mul (i64 3, i64 mul (i64 ptrtoint ({[7 x double], [7 x double]}* getelementptr ({[7 x double], [7 x double]}* null, i64 11) to i64), i64 5)) to i64
+ ret i64 %t
+}
+define i64 @fb() nounwind {
+ %t = bitcast i64 ptrtoint ([13 x double] addrspace(4)* getelementptr ({i1, [13 x double]} addrspace(4)* null, i64 0, i32 1) to i64) to i64
+ ret i64 %t
+}
+define i64 @fc() nounwind {
+ %t = bitcast i64 ptrtoint (double addrspace(4)* getelementptr ({double, double, double, double} addrspace(4)* null, i64 0, i32 2) to i64) to i64
+ ret i64 %t
+}
+define i64 @fd() nounwind {
+ %t = bitcast i64 ptrtoint (double addrspace(4)* getelementptr ([13 x double] addrspace(4)* null, i64 0, i32 11) to i64) to i64
+ ret i64 %t
+}
+define i64 @fe() nounwind {
+ %t = bitcast i64 ptrtoint (double addrspace(4)* getelementptr ({double, float, double, double} addrspace(4)* null, i64 0, i32 2) to i64) to i64
+ ret i64 %t
+}
+define i64 @ff() nounwind {
+ %t = bitcast i64 ptrtoint (<{ i16, i128 }> addrspace(4)* getelementptr ({i1, <{ i16, i128 }>} addrspace(4)* null, i64 0, i32 1) to i64) to i64
+ ret i64 %t
+}
+define i64 @fg() nounwind {
+ %t = bitcast i64 ptrtoint ({double, double} addrspace(4)* getelementptr ({i1, {double, double}} addrspace(4)* null, i64 0, i32 1) to i64) to i64
+ ret i64 %t
+}
+define i64 @fh() nounwind {
+ %t = bitcast i64 ptrtoint (double addrspace(4)* getelementptr (double addrspace(4)* null, i32 1) to i64) to i64
+ ret i64 %t
+}
+define i64 @fi() nounwind {
+ %t = bitcast i64 ptrtoint (double addrspace(4)* getelementptr ({i1, double}addrspace(4)* null, i64 0, i32 1) to i64) to i64
+ ret i64 %t
+}
+
+; PLAIN: define i64* @fM() nounwind {
+; PLAIN: %t = bitcast i64* getelementptr (i64* null, i32 1) to i64*
+; PLAIN: ret i64* %t
+; PLAIN: }
+; PLAIN: define i64* @fN() nounwind {
+; PLAIN: %t = bitcast i64* getelementptr ({ i64, i64 }* null, i32 0, i32 1) to i64*
+; PLAIN: ret i64* %t
+; PLAIN: }
+; PLAIN: define i64* @fO() nounwind {
+; PLAIN: %t = bitcast i64* getelementptr ([2 x i64]* null, i32 0, i32 1) to i64*
+; PLAIN: ret i64* %t
+; PLAIN: }
+
+define i64* @fM() nounwind {
+ %t = bitcast i64* getelementptr (i64* null, i32 1) to i64*
+ ret i64* %t
+}
+define i64* @fN() nounwind {
+ %t = bitcast i64* getelementptr ({ i64, i64 }* null, i32 0, i32 1) to i64*
+ ret i64* %t
+}
+define i64* @fO() nounwind {
+ %t = bitcast i64* getelementptr ([2 x i64]* null, i32 0, i32 1) to i64*
+ ret i64* %t
+}
+
+; PLAIN: define i32 addrspace(1)* @fZ() nounwind {
+; PLAIN: %t = bitcast i32 addrspace(1)* getelementptr inbounds (i32 addrspace(1)* getelementptr inbounds ([3 x { i32, i32 }] addrspace(1)* @ext2, i64 0, i64 1, i32 0), i64 1) to i32 addrspace(1)*
+; PLAIN: ret i32 addrspace(1)* %t
+; PLAIN: }
+@ext2 = external addrspace(1) global [3 x { i32, i32 }]
+define i32 addrspace(1)* @fZ() nounwind {
+ %t = bitcast i32 addrspace(1)* getelementptr inbounds (i32 addrspace(1)* getelementptr inbounds ([3 x { i32, i32 }] addrspace(1)* @ext2, i64 0, i64 1, i32 0), i64 1) to i32 addrspace(1)*
+ ret i32 addrspace(1)* %t
+}
diff --git a/test/Transforms/LoopUnroll/pr14167.ll b/test/Transforms/LoopUnroll/pr14167.ll
new file mode 100644
index 0000000000..205ae44b72
--- /dev/null
+++ b/test/Transforms/LoopUnroll/pr14167.ll
@@ -0,0 +1,44 @@
+; RUN: opt < %s -S -loop-unroll -unroll-runtime | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-bgq-linux"
+
+define void @test1() nounwind {
+; Ensure that we don't crash when the trip count == -1.
+; CHECK: @test1
+entry:
+ br label %for.cond2.preheader
+
+for.cond2.preheader: ; preds = %for.end, %entry
+ br i1 false, label %middle.block, label %vector.ph
+
+vector.ph: ; preds = %for.cond2.preheader
+ br label %vector.body
+
+vector.body: ; preds = %vector.body, %vector.ph
+ br i1 undef, label %middle.block.loopexit, label %vector.body
+
+middle.block.loopexit: ; preds = %vector.body
+ br label %middle.block
+
+middle.block: ; preds = %middle.block.loopexit, %for.cond2.preheader
+ br i1 true, label %for.end, label %scalar.preheader
+
+scalar.preheader: ; preds = %middle.block
+ br label %for.body4
+
+for.body4: ; preds = %for.body4, %scalar.preheader
+ %indvars.iv = phi i64 [ 16000, %scalar.preheader ], [ %indvars.iv.next, %for.body4 ]
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp ne i32 %lftr.wideiv, 16000
+ br i1 %exitcond, label %for.body4, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body4
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %middle.block
+ br i1 undef, label %for.cond2.preheader, label %for.end15
+
+for.end15: ; preds = %for.end
+ ret void
+}
diff --git a/test/Transforms/LoopVectorize/2012-10-20-infloop.ll b/test/Transforms/LoopVectorize/2012-10-20-infloop.ll
index 5caaffc8dd..0176c9a189 100644
--- a/test/Transforms/LoopVectorize/2012-10-20-infloop.ll
+++ b/test/Transforms/LoopVectorize/2012-10-20-infloop.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -dce
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce
; Check that we don't fall into an infinite loop.
define void @test() nounwind {
diff --git a/test/Transforms/LoopVectorize/cost-model.ll b/test/Transforms/LoopVectorize/cost-model.ll
new file mode 100644
index 0000000000..18abf2885e
--- /dev/null
+++ b/test/Transforms/LoopVectorize/cost-model.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@c = common global [2048 x i32] zeroinitializer, align 16
+@b = common global [2048 x i32] zeroinitializer, align 16
+@d = common global [2048 x i32] zeroinitializer, align 16
+@a = common global [2048 x i32] zeroinitializer, align 16
+
+;CHECK: cost_model_1
+;CHECK-NOT: <4 x i32>
+;CHECK: ret void
+define void @cost_model_1() nounwind uwtable noinline ssp {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %0 = shl nsw i64 %indvars.iv, 1
+ %arrayidx = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %0
+ %1 = load i32* %arrayidx, align 8
+ %idxprom1 = sext i32 %1 to i64
+ %arrayidx2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %idxprom1
+ %2 = load i32* %arrayidx2, align 4
+ %arrayidx4 = getelementptr inbounds [2048 x i32]* @d, i64 0, i64 %indvars.iv
+ %3 = load i32* %arrayidx4, align 4
+ %idxprom5 = sext i32 %3 to i64
+ %arrayidx6 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %idxprom5
+ store i32 %2, i32* %arrayidx6, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 256
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
diff --git a/test/Transforms/LoopVectorize/cpp-new-array.ll b/test/Transforms/LoopVectorize/cpp-new-array.ll
new file mode 100644
index 0000000000..26902eba9e
--- /dev/null
+++ b/test/Transforms/LoopVectorize/cpp-new-array.ll
@@ -0,0 +1,46 @@
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+;CHECK: @cpp_new_arrays
+;CHECK: insertelement <4 x i32>
+;CHECK: load <4 x float>
+;CHECK: fadd <4 x float>
+;CHECK: ret i32
+define i32 @cpp_new_arrays() uwtable ssp {
+entry:
+ %call = call noalias i8* @_Znwm(i64 4)
+ %0 = bitcast i8* %call to float*
+ store float 1.000000e+03, float* %0, align 4
+ %call1 = call noalias i8* @_Znwm(i64 4)
+ %1 = bitcast i8* %call1 to float*
+ store float 1.000000e+03, float* %1, align 4
+ %call3 = call noalias i8* @_Znwm(i64 4)
+ %2 = bitcast i8* %call3 to float*
+ store float 1.000000e+03, float* %2, align 4
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %idxprom = sext i32 %i.01 to i64
+ %arrayidx = getelementptr inbounds float* %0, i64 %idxprom
+ %3 = load float* %arrayidx, align 4
+ %idxprom5 = sext i32 %i.01 to i64
+ %arrayidx6 = getelementptr inbounds float* %1, i64 %idxprom5
+ %4 = load float* %arrayidx6, align 4
+ %add = fadd float %3, %4
+ %idxprom7 = sext i32 %i.01 to i64
+ %arrayidx8 = getelementptr inbounds float* %2, i64 %idxprom7
+ store float %add, float* %arrayidx8, align 4
+ %inc = add nsw i32 %i.01, 1
+ %cmp = icmp slt i32 %inc, 1000
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end: ; preds = %for.body
+ %5 = load float* %2, align 4
+ %conv10 = fptosi float %5 to i32
+ ret i32 %conv10
+}
+
+declare noalias i8* @_Znwm(i64)
diff --git a/test/Transforms/LoopVectorize/gcc-examples.ll b/test/Transforms/LoopVectorize/gcc-examples.ll
index 6fb1792b2c..d8942ac861 100644
--- a/test/Transforms/LoopVectorize/gcc-examples.ll
+++ b/test/Transforms/LoopVectorize/gcc-examples.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/increment.ll b/test/Transforms/LoopVectorize/increment.ll
index e818d68562..069b7ea031 100644
--- a/test/Transforms/LoopVectorize/increment.ll
+++ b/test/Transforms/LoopVectorize/increment.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/induction_plus.ll b/test/Transforms/LoopVectorize/induction_plus.ll
index bd90113e52..b31bceb50d 100644
--- a/test/Transforms/LoopVectorize/induction_plus.ll
+++ b/test/Transforms/LoopVectorize/induction_plus.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -instcombine -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -instcombine -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/non-const-n.ll b/test/Transforms/LoopVectorize/non-const-n.ll
index 04c5c84a4f..7727b0a2dc 100644
--- a/test/Transforms/LoopVectorize/non-const-n.ll
+++ b/test/Transforms/LoopVectorize/non-const-n.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/read-only.ll b/test/Transforms/LoopVectorize/read-only.ll
index 4095ea68ef..b4d1bac132 100644
--- a/test/Transforms/LoopVectorize/read-only.ll
+++ b/test/Transforms/LoopVectorize/read-only.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/reduction.ll b/test/Transforms/LoopVectorize/reduction.ll
index 3e871b229b..746a08c3ea 100644
--- a/test/Transforms/LoopVectorize/reduction.ll
+++ b/test/Transforms/LoopVectorize/reduction.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
@@ -149,4 +149,83 @@ for.end: ; preds = %for.body, %entry
ret i32 %sum.0.lcssa
}
+;CHECK: @reduction_and
+;CHECK: and <4 x i32>
+;CHECK: ret i32
+define i32 @reduction_and(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly {
+entry:
+ %cmp7 = icmp sgt i32 %n, 0
+ br i1 %cmp7, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %result.08 = phi i32 [ %and, %for.body ], [ -1, %entry ]
+ %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv
+ %0 = load i32* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds i32* %B, i64 %indvars.iv
+ %1 = load i32* %arrayidx2, align 4
+ %add = add nsw i32 %1, %0
+ %and = and i32 %add, %result.08
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+for.end: ; preds = %for.body, %entry
+ %result.0.lcssa = phi i32 [ -1, %entry ], [ %and, %for.body ]
+ ret i32 %result.0.lcssa
+}
+
+;CHECK: @reduction_or
+;CHECK: or <4 x i32>
+;CHECK: ret i32
+define i32 @reduction_or(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly {
+entry:
+ %cmp7 = icmp sgt i32 %n, 0
+ br i1 %cmp7, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %result.08 = phi i32 [ %or, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv
+ %0 = load i32* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds i32* %B, i64 %indvars.iv
+ %1 = load i32* %arrayidx2, align 4
+ %add = add nsw i32 %1, %0
+ %or = or i32 %add, %result.08
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ %result.0.lcssa = phi i32 [ 0, %entry ], [ %or, %for.body ]
+ ret i32 %result.0.lcssa
+}
+
+;CHECK: @reduction_xor
+;CHECK: xor <4 x i32>
+;CHECK: ret i32
+define i32 @reduction_xor(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly {
+entry:
+ %cmp7 = icmp sgt i32 %n, 0
+ br i1 %cmp7, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %result.08 = phi i32 [ %xor, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv
+ %0 = load i32* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds i32* %B, i64 %indvars.iv
+ %1 = load i32* %arrayidx2, align 4
+ %add = add nsw i32 %1, %0
+ %xor = xor i32 %add, %result.08
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ %result.0.lcssa = phi i32 [ 0, %entry ], [ %xor, %for.body ]
+ ret i32 %result.0.lcssa
+}
diff --git a/test/Transforms/LoopVectorize/scalar-select.ll b/test/Transforms/LoopVectorize/scalar-select.ll
index 8d5b6fd8af..e537bde31b 100644
--- a/test/Transforms/LoopVectorize/scalar-select.ll
+++ b/test/Transforms/LoopVectorize/scalar-select.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/SROA/basictest.ll b/test/Transforms/SROA/basictest.ll
index 03120f7a32..110950f76a 100644
--- a/test/Transforms/SROA/basictest.ll
+++ b/test/Transforms/SROA/basictest.ll
@@ -577,9 +577,17 @@ entry:
%ai = load i24* %aiptr
; CHCEK-NOT: store
; CHCEK-NOT: load
-; CHECK: %[[mask0:.*]] = and i24 undef, -256
-; CHECK-NEXT: %[[mask1:.*]] = and i24 %[[mask0]], -65281
-; CHECK-NEXT: %[[mask2:.*]] = and i24 %[[mask1]], 65535
+; CHECK: %[[ext2:.*]] = zext i8 0 to i24
+; CHECK-NEXT: %[[shift2:.*]] = shl i24 %[[ext2]], 16
+; CHECK-NEXT: %[[mask2:.*]] = and i24 undef, 65535
+; CHECK-NEXT: %[[insert2:.*]] = or i24 %[[mask2]], %[[shift2]]
+; CHECK-NEXT: %[[ext1:.*]] = zext i8 0 to i24
+; CHECK-NEXT: %[[shift1:.*]] = shl i24 %[[ext1]], 8
+; CHECK-NEXT: %[[mask1:.*]] = and i24 %[[insert2]], -65281
+; CHECK-NEXT: %[[insert1:.*]] = or i24 %[[mask1]], %[[shift1]]
+; CHECK-NEXT: %[[ext0:.*]] = zext i8 0 to i24
+; CHECK-NEXT: %[[mask0:.*]] = and i24 %[[insert1]], -256
+; CHECK-NEXT: %[[insert0:.*]] = or i24 %[[mask0]], %[[ext0]]
%biptr = bitcast [3 x i8]* %b to i24*
store i24 %ai, i24* %biptr
@@ -591,10 +599,10 @@ entry:
%b2 = load i8* %b2ptr
; CHCEK-NOT: store
; CHCEK-NOT: load
-; CHECK: %[[trunc0:.*]] = trunc i24 %[[mask2]] to i8
-; CHECK-NEXT: %[[shift1:.*]] = lshr i24 %[[mask2]], 8
+; CHECK: %[[trunc0:.*]] = trunc i24 %[[insert0]] to i8
+; CHECK-NEXT: %[[shift1:.*]] = lshr i24 %[[insert0]], 8
; CHECK-NEXT: %[[trunc1:.*]] = trunc i24 %[[shift1]] to i8
-; CHECK-NEXT: %[[shift2:.*]] = lshr i24 %[[mask2]], 16
+; CHECK-NEXT: %[[shift2:.*]] = lshr i24 %[[insert0]], 16
; CHECK-NEXT: %[[trunc2:.*]] = trunc i24 %[[shift2]] to i8
%bsum0 = add i8 %b0, %b1
@@ -1064,6 +1072,49 @@ entry:
ret void
}
+define i64 @PR14059.2({ float, float }* %phi) {
+; Check that SROA can split up alloca-wide integer loads and stores where the
+; underlying alloca has smaller components that are accessed independently. This
+; shows up particularly with ABI lowering patterns coming out of Clang that rely
+; on the particular register placement of a single large integer return value.
+; CHECK: @PR14059.2
+
+entry:
+ %retval = alloca { float, float }, align 4
+ ; CHECK-NOT: alloca
+
+ %0 = bitcast { float, float }* %retval to i64*
+ store i64 0, i64* %0
+ ; CHECK-NOT: store
+
+ %phi.realp = getelementptr inbounds { float, float }* %phi, i32 0, i32 0
+ %phi.real = load float* %phi.realp
+ %phi.imagp = getelementptr inbounds { float, float }* %phi, i32 0, i32 1
+ %phi.imag = load float* %phi.imagp
+ ; CHECK: %[[realp:.*]] = getelementptr inbounds { float, float }* %phi, i32 0, i32 0
+ ; CHECK-NEXT: %[[real:.*]] = load float* %[[realp]]
+ ; CHECK-NEXT: %[[imagp:.*]] = getelementptr inbounds { float, float }* %phi, i32 0, i32 1
+ ; CHECK-NEXT: %[[imag:.*]] = load float* %[[imagp]]
+
+ %real = getelementptr inbounds { float, float }* %retval, i32 0, i32 0
+ %imag = getelementptr inbounds { float, float }* %retval, i32 0, i32 1
+ store float %phi.real, float* %real
+ store float %phi.imag, float* %imag
+ ; CHECK-NEXT: %[[imag_convert:.*]] = bitcast float %[[imag]] to i32
+ ; CHECK-NEXT: %[[imag_ext:.*]] = zext i32 %[[imag_convert]] to i64
+ ; CHECK-NEXT: %[[imag_shift:.*]] = shl i64 %[[imag_ext]], 32
+ ; CHECK-NEXT: %[[imag_mask:.*]] = and i64 undef, 4294967295
+ ; CHECK-NEXT: %[[imag_insert:.*]] = or i64 %[[imag_mask]], %[[imag_shift]]
+ ; CHECK-NEXT: %[[real_convert:.*]] = bitcast float %[[real]] to i32
+ ; CHECK-NEXT: %[[real_ext:.*]] = zext i32 %[[real_convert]] to i64
+ ; CHECK-NEXT: %[[real_mask:.*]] = and i64 %[[imag_insert]], -4294967296
+ ; CHECK-NEXT: %[[real_insert:.*]] = or i64 %[[real_mask]], %[[real_ext]]
+
+ %1 = load i64* %0, align 1
+ ret i64 %1
+ ; CHECK-NEXT: ret i64 %[[real_insert]]
+}
+
define void @PR14105({ [16 x i8] }* %ptr) {
; Ensure that when rewriting the GEP index '-1' for this alloca we preserve is
; sign as negative. We use a volatile memcpy to ensure promotion never actually
diff --git a/test/Transforms/SROA/big-endian.ll b/test/Transforms/SROA/big-endian.ll
index 532f8690cf..ce82d1f30b 100644
--- a/test/Transforms/SROA/big-endian.ll
+++ b/test/Transforms/SROA/big-endian.ll
@@ -26,9 +26,17 @@ entry:
%ai = load i24* %aiptr
; CHCEK-NOT: store
; CHCEK-NOT: load
-; CHECK: %[[mask0:.*]] = and i24 undef, 65535
-; CHECK-NEXT: %[[mask1:.*]] = and i24 %[[mask0]], -65281
-; CHECK-NEXT: %[[mask2:.*]] = and i24 %[[mask1]], -256
+; CHECK: %[[ext2:.*]] = zext i8 0 to i24
+; CHECK-NEXT: %[[mask2:.*]] = and i24 undef, -256
+; CHECK-NEXT: %[[insert2:.*]] = or i24 %[[mask2]], %[[ext2]]
+; CHECK-NEXT: %[[ext1:.*]] = zext i8 0 to i24
+; CHECK-NEXT: %[[shift1:.*]] = shl i24 %[[ext1]], 8
+; CHECK-NEXT: %[[mask1:.*]] = and i24 %[[insert2]], -65281
+; CHECK-NEXT: %[[insert1:.*]] = or i24 %[[mask1]], %[[shift1]]
+; CHECK-NEXT: %[[ext0:.*]] = zext i8 0 to i24
+; CHECK-NEXT: %[[shift0:.*]] = shl i24 %[[ext0]], 16
+; CHECK-NEXT: %[[mask0:.*]] = and i24 %[[insert1]], 65535
+; CHECK-NEXT: %[[insert0:.*]] = or i24 %[[mask0]], %[[shift0]]
%biptr = bitcast [3 x i8]* %b to i24*
store i24 %ai, i24* %biptr
@@ -40,11 +48,11 @@ entry:
%b2 = load i8* %b2ptr
; CHCEK-NOT: store
; CHCEK-NOT: load
-; CHECK: %[[shift0:.*]] = lshr i24 %[[mask2]], 16
+; CHECK: %[[shift0:.*]] = lshr i24 %[[insert0]], 16
; CHECK-NEXT: %[[trunc0:.*]] = trunc i24 %[[shift0]] to i8
-; CHECK-NEXT: %[[shift1:.*]] = lshr i24 %[[mask2]], 8
+; CHECK-NEXT: %[[shift1:.*]] = lshr i24 %[[insert0]], 8
; CHECK-NEXT: %[[trunc1:.*]] = trunc i24 %[[shift1]] to i8
-; CHECK-NEXT: %[[trunc2:.*]] = trunc i24 %[[mask2]] to i8
+; CHECK-NEXT: %[[trunc2:.*]] = trunc i24 %[[insert0]] to i8
%bsum0 = add i8 %b0, %b1
%bsum1 = add i8 %bsum0, %b2
@@ -74,27 +82,26 @@ entry:
%a0i16ptr = bitcast i8* %a0ptr to i16*
store i16 1, i16* %a0i16ptr
-; CHECK: %[[mask:.*]] = and i56 undef, 1099511627775
-; CHECK-NEXT: %[[or:.*]] = or i56 %[[mask]], 1099511627776
+; CHECK: %[[mask0:.*]] = and i16 1, -16
%a1i4ptr = bitcast i8* %a1ptr to i4*
store i4 1, i4* %a1i4ptr
-; CHECK: %[[mask:.*]] = and i56 %[[or]], -16492674416641
-; CHECK-NEXT: %[[or:.*]] = or i56 %[[mask]], 1099511627776
+; CHECK-NEXT: %[[insert0:.*]] = or i16 %[[mask0]], 1
store i8 1, i8* %a2ptr
-; CHECK-NEXT: %[[mask:.*]] = and i56 %[[or]], -1095216660481
-; CHECK-NEXT: %[[or:.*]] = or i56 %[[mask]], 4294967296
+; CHECK-NEXT: %[[mask1:.*]] = and i40 undef, 4294967295
+; CHECK-NEXT: %[[insert1:.*]] = or i40 %[[mask1]], 4294967296
%a3i24ptr = bitcast i8* %a3ptr to i24*
store i24 1, i24* %a3i24ptr
-; CHECK-NEXT: %[[mask:.*]] = and i56 %[[or]], -4294967041
-; CHECK-NEXT: %[[or:.*]] = or i56 %[[mask]], 256
+; CHECK-NEXT: %[[mask2:.*]] = and i40 %[[insert1]], -4294967041
+; CHECK-NEXT: %[[insert2:.*]] = or i40 %[[mask2]], 256
%a2i40ptr = bitcast i8* %a2ptr to i40*
store i40 1, i40* %a2i40ptr
-; CHECK-NEXT: %[[mask:.*]] = and i56 %[[or]], -1099511627776
-; CHECK-NEXT: %[[or:.*]] = or i56 %[[mask]], 1
+; CHECK-NEXT: %[[ext3:.*]] = zext i40 1 to i56
+; CHECK-NEXT: %[[mask3:.*]] = and i56 undef, -1099511627776
+; CHECK-NEXT: %[[insert3:.*]] = or i56 %[[mask3]], %[[ext3]]
; CHCEK-NOT: store
; CHCEK-NOT: load
@@ -103,6 +110,10 @@ entry:
%ai = load i56* %aiptr
%ret = zext i56 %ai to i64
ret i64 %ret
-; CHECK: %[[ret:.*]] = zext i56 %[[or]] to i64
+; CHECK-NEXT: %[[ext4:.*]] = zext i16 %[[insert0]] to i56
+; CHECK-NEXT: %[[shift4:.*]] = shl i56 %[[ext4]], 40
+; CHECK-NEXT: %[[mask4:.*]] = and i56 %[[insert3]], 1099511627775
+; CHECK-NEXT: %[[insert4:.*]] = or i56 %[[mask4]], %[[shift4]]
+; CHECK-NEXT: %[[ret:.*]] = zext i56 %[[insert4]] to i64
; CHECK-NEXT: ret i64 %[[ret]]
}
diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp
index 751c8e7da0..0390bc470a 100644
--- a/tools/opt/opt.cpp
+++ b/tools/opt/opt.cpp
@@ -39,6 +39,7 @@
#include "llvm/Support/SystemUtils.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/TargetSelect.h"
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/LinkAllPasses.h"
#include "llvm/LinkAllVMCore.h"
@@ -564,6 +565,9 @@ int main(int argc, char **argv) {
llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
LLVMContext &Context = getGlobalContext();
+ InitializeAllTargets();
+ InitializeAllTargetMCs();
+
// Initialize passes
PassRegistry &Registry = *PassRegistry::getPassRegistry();
initializeCore(Registry);
diff --git a/unittests/ADT/DenseMapTest.cpp b/unittests/ADT/DenseMapTest.cpp
index 75e7006434..15eb6988f6 100644
--- a/unittests/ADT/DenseMapTest.cpp
+++ b/unittests/ADT/DenseMapTest.cpp
@@ -330,4 +330,37 @@ TEST(DenseMapCustomTest, FindAsTest) {
EXPECT_TRUE(map.find_as("d") == map.end());
}
+struct ContiguousDenseMapInfo {
+ static inline unsigned getEmptyKey() { return ~0; }
+ static inline unsigned getTombstoneKey() { return ~0U - 1; }
+ static unsigned getHashValue(const unsigned& Val) { return Val; }
+ static bool isEqual(const unsigned& LHS, const unsigned& RHS) {
+ return LHS == RHS;
+ }
+};
+
+// Test that filling a small dense map with exactly the number of elements in
+// the map grows to have enough space for an empty bucket.
+TEST(DenseMapCustomTest, SmallDenseMapGrowTest) {
+ SmallDenseMap<unsigned, unsigned, 32, ContiguousDenseMapInfo> map;
+ // Add some number of elements, then delete a few to leave us some tombstones.
+ // If we just filled the map with 32 elements we'd grow because of not enough
+ // tombstones which masks the issue here.
+ for (unsigned i = 0; i < 20; ++i)
+ map[i] = i + 1;
+ for (unsigned i = 0; i < 10; ++i)
+ map.erase(i);
+ for (unsigned i = 20; i < 32; ++i)
+ map[i] = i + 1;
+
+ // Size tests
+ EXPECT_EQ(22u, map.size());
+
+ // Try to find an element which doesn't exist. There was a bug in
+ // SmallDenseMap which led to a map with num elements == small capacity not
+ // having an empty bucket any more. Finding an element not in the map would
+ // therefore never terminate.
+ EXPECT_TRUE(map.find(32) == map.end());
+}
+
}
diff --git a/utils/TableGen/CMakeLists.txt b/utils/TableGen/CMakeLists.txt
index b8a6daf9e0..116fa57522 100644
--- a/utils/TableGen/CMakeLists.txt
+++ b/utils/TableGen/CMakeLists.txt
@@ -9,6 +9,7 @@ add_tablegen(llvm-tblgen LLVM
CodeEmitterGen.cpp
CodeGenDAGPatterns.cpp
CodeGenInstruction.cpp
+ CodeGenMapTable.cpp
CodeGenRegisters.cpp
CodeGenSchedule.cpp
CodeGenTarget.cpp
diff --git a/utils/TableGen/CodeGenMapTable.cpp b/utils/TableGen/CodeGenMapTable.cpp
new file mode 100644
index 0000000000..4bfd1ba798
--- /dev/null
+++ b/utils/TableGen/CodeGenMapTable.cpp
@@ -0,0 +1,608 @@
+//===- CodeGenMapTable.cpp - Instruction Mapping Table Generator ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// CodeGenMapTable provides functionality for the TabelGen to create
+// relation mapping between instructions. Relation models are defined using
+// InstrMapping as a base class. This file implements the functionality which
+// parses these definitions and generates relation maps using the information
+// specified there. These maps are emitted as tables in the XXXGenInstrInfo.inc
+// file along with the functions to query them.
+//
+// A relationship model to relate non-predicate instructions with their
+// predicated true/false forms can be defined as follows:
+//
+// def getPredOpcode : InstrMapping {
+// let FilterClass = "PredRel";
+// let RowFields = ["BaseOpcode"];
+// let ColFields = ["PredSense"];
+// let KeyCol = ["none"];
+// let ValueCols = [["true"], ["false"]]; }
+//
+// CodeGenMapTable parses this map and generates a table in XXXGenInstrInfo.inc
+// file that contains the instructions modeling this relationship. This table
+// is defined in the function
+// "int getPredOpcode(uint16_t Opcode, enum PredSense inPredSense)"
+// that can be used to retrieve the predicated form of the instruction by
+// passing its opcode value and the predicate sense (true/false) of the desired
+// instruction as arguments.
+//
+// Short description of the algorithm:
+//
+// 1) Iterate through all the records that derive from "InstrMapping" class.
+// 2) For each record, filter out instructions based on the FilterClass value.
+// 3) Iterate through this set of instructions and insert them into
+// RowInstrMap map based on their RowFields values. RowInstrMap is keyed by the
+// vector of RowFields values and contains vectors of Records (instructions) as
+// values. RowFields is a list of fields that are required to have the same
+// values for all the instructions appearing in the same row of the relation
+// table. All the instructions in a given row of the relation table have some
+// sort of relationship with the key instruction defined by the corresponding
+// relationship model.
+//
+// Ex: RowInstrMap(RowVal1, RowVal2, ...) -> [Instr1, Instr2, Instr3, ... ]
+// Here Instr1, Instr2, Instr3 have same values (RowVal1, RowVal2) for
+// RowFields. These groups of instructions are later matched against ValueCols
+// to determine the column they belong to, if any.
+//
+// While building the RowInstrMap map, collect all the key instructions in
+// KeyInstrVec. These are the instructions having the same values as KeyCol
+// for all the fields listed in ColFields.
+//
+// For Example:
+//
+// Relate non-predicate instructions with their predicated true/false forms.
+//
+// def getPredOpcode : InstrMapping {
+// let FilterClass = "PredRel";
+// let RowFields = ["BaseOpcode"];
+// let ColFields = ["PredSense"];
+// let KeyCol = ["none"];
+// let ValueCols = [["true"], ["false"]]; }
+//
+// Here, only instructions that have "none" as PredSense will be selected as key
+// instructions.
+//
+// 4) For each key instruction, get the group of instructions that share the
+// same key-value as the key instruction from RowInstrMap. Iterate over the list
+// of columns in ValueCols (it is defined as a list<list<string> >. Therefore,
+// it can specify multi-column relationships). For each column, find the
+// instruction from the group that matches all the values for the column.
+// Multiple matches are not allowed.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CodeGenTarget.h"
+#include "llvm/Support/Format.h"
+using namespace llvm;
+typedef std::map<std::string, std::vector<Record*> > InstrRelMapTy;
+
+typedef std::map<std::vector<Init*>, std::vector<Record*> > RowInstrMapTy;
+
+namespace {
+
+//===----------------------------------------------------------------------===//
+// This class is used to represent InstrMapping class defined in Target.td file.
+class InstrMap {
+private:
+ std::string Name;
+ std::string FilterClass;
+ ListInit *RowFields;
+ ListInit *ColFields;
+ ListInit *KeyCol;
+ std::vector<ListInit*> ValueCols;
+
+public:
+ InstrMap(Record* MapRec) {
+ Name = MapRec->getName();
+
+ // FilterClass - It's used to reduce the search space only to the
+ // instructions that define the kind of relationship modeled by
+ // this InstrMapping object/record.
+ const RecordVal *Filter = MapRec->getValue("FilterClass");
+ FilterClass = Filter->getValue()->getAsUnquotedString();
+
+ // List of fields/attributes that need to be same across all the
+ // instructions in a row of the relation table.
+ RowFields = MapRec->getValueAsListInit("RowFields");
+
+ // List of fields/attributes that are constant across all the instruction
+ // in a column of the relation table. Ex: ColFields = 'predSense'
+ ColFields = MapRec->getValueAsListInit("ColFields");
+
+ // Values for the fields/attributes listed in 'ColFields'.
+ // Ex: KeyCol = 'noPred' -- key instruction is non predicated
+ KeyCol = MapRec->getValueAsListInit("KeyCol");
+
+ // List of values for the fields/attributes listed in 'ColFields', one for
+ // each column in the relation table.
+ //
+ // Ex: ValueCols = [['true'],['false']] -- it results two columns in the
+ // table. First column requires all the instructions to have predSense
+ // set to 'true' and second column requires it to be 'false'.
+ ListInit *ColValList = MapRec->getValueAsListInit("ValueCols");
+
+ // Each instruction map must specify at least one column for it to be valid.
+ if (ColValList->getSize() == 0)
+ throw "InstrMapping record `" + MapRec->getName() + "' has empty " +
+ "`ValueCols' field!";
+
+ for (unsigned i = 0, e = ColValList->getSize(); i < e; i++) {
+ ListInit *ColI = dyn_cast<ListInit>(ColValList->getElement(i));
+
+ // Make sure that all the sub-lists in 'ValueCols' have same number of
+ // elements as the fields in 'ColFields'.
+ if (ColI->getSize() == ColFields->getSize())
+ ValueCols.push_back(ColI);
+ else {
+ throw "Record `" + MapRec->getName() + "', field `" + "ValueCols" +
+ "' entries don't match with the entries in 'ColFields'!";
+ }
+ }
+ }
+
+ std::string getName() const {
+ return Name;
+ }
+
+ std::string getFilterClass() {
+ return FilterClass;
+ }
+
+ ListInit *getRowFields() const {
+ return RowFields;
+ }
+
+ ListInit *getColFields() const {
+ return ColFields;
+ }
+
+ ListInit *getKeyCol() const {
+ return KeyCol;
+ }
+
+ const std::vector<ListInit*> &getValueCols() const {
+ return ValueCols;
+ }
+};
+} // End anonymous namespace.
+
+
+//===----------------------------------------------------------------------===//
+// class MapTableEmitter : It builds the instruction relation maps using
+// the information provided in InstrMapping records. It outputs these
+// relationship maps as tables into XXXGenInstrInfo.inc file along with the
+// functions to query them.
+
+namespace {
+class MapTableEmitter {
+private:
+// std::string TargetName;
+ const CodeGenTarget &Target;
+ // InstrMapDesc - InstrMapping record to be processed.
+ InstrMap InstrMapDesc;
+
+ // InstrDefs - list of instructions filtered using FilterClass defined
+ // in InstrMapDesc.
+ std::vector<Record*> InstrDefs;
+
+ // RowInstrMap - maps RowFields values to the instructions. It's keyed by the
+ // values of the row fields and contains vector of records as values.
+ RowInstrMapTy RowInstrMap;
+
+ // KeyInstrVec - list of key instructions.
+ std::vector<Record*> KeyInstrVec;
+ DenseMap<Record*, std::vector<Record*> > MapTable;
+
+public:
+ MapTableEmitter(CodeGenTarget &Target, RecordKeeper &Records, Record *IMRec):
+ Target(Target), InstrMapDesc(IMRec) {
+ const std::string FilterClass = InstrMapDesc.getFilterClass();
+ InstrDefs = Records.getAllDerivedDefinitions(FilterClass);
+ }
+
+ void buildRowInstrMap();
+
+ // Returns true if an instruction is a key instruction, i.e., its ColFields
+ // have same values as KeyCol.
+ bool isKeyColInstr(Record* CurInstr);
+
+ // Find column instruction corresponding to a key instruction based on the
+ // constraints for that column.
+ Record *getInstrForColumn(Record *KeyInstr, ListInit *CurValueCol);
+
+ // Find column instructions for each key instruction based
+ // on ValueCols and store them into MapTable.
+ void buildMapTable();
+
+ void emitBinSearch(raw_ostream &OS, unsigned TableSize);
+ void emitTablesWithFunc(raw_ostream &OS);
+ unsigned emitBinSearchTable(raw_ostream &OS);
+
+ // Lookup functions to query binary search tables.
+ void emitMapFuncBody(raw_ostream &OS, unsigned TableSize);
+
+};
+} // End anonymous namespace.
+
+
+//===----------------------------------------------------------------------===//
+// Process all the instructions that model this relation (alreday present in
+// InstrDefs) and insert them into RowInstrMap which is keyed by the values of
+// the fields listed as RowFields. It stores vectors of records as values.
+// All the related instructions have the same values for the RowFields thus are
+// part of the same key-value pair.
+//===----------------------------------------------------------------------===//
+
+void MapTableEmitter::buildRowInstrMap() {
+ for (unsigned i = 0, e = InstrDefs.size(); i < e; i++) {
+ std::vector<Record*> InstrList;
+ Record *CurInstr = InstrDefs[i];
+ std::vector<Init*> KeyValue;
+ ListInit *RowFields = InstrMapDesc.getRowFields();
+ for (unsigned j = 0, endRF = RowFields->getSize(); j < endRF; j++) {
+ Init *RowFieldsJ = RowFields->getElement(j);
+ Init *CurInstrVal = CurInstr->getValue(RowFieldsJ)->getValue();
+ KeyValue.push_back(CurInstrVal);
+ }
+
+ // Collect key instructions into KeyInstrVec. Later, these instructions are
+ // processed to assign column position to the instructions sharing
+ // their KeyValue in RowInstrMap.
+ if (isKeyColInstr(CurInstr))
+ KeyInstrVec.push_back(CurInstr);
+
+ RowInstrMap[KeyValue].push_back(CurInstr);
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Return true if an instruction is a KeyCol instruction.
+//===----------------------------------------------------------------------===//
+
+bool MapTableEmitter::isKeyColInstr(Record* CurInstr) {
+ ListInit *ColFields = InstrMapDesc.getColFields();
+ ListInit *KeyCol = InstrMapDesc.getKeyCol();
+
+ // Check if the instruction is a KeyCol instruction.
+ bool MatchFound = true;
+ for (unsigned j = 0, endCF = ColFields->getSize();
+ (j < endCF) && MatchFound; j++) {
+ RecordVal *ColFieldName = CurInstr->getValue(ColFields->getElement(j));
+ std::string CurInstrVal = ColFieldName->getValue()->getAsUnquotedString();
+ std::string KeyColValue = KeyCol->getElement(j)->getAsUnquotedString();
+ MatchFound = (CurInstrVal == KeyColValue);
+ }
+ return MatchFound;
+}
+
+//===----------------------------------------------------------------------===//
+// Build a map to link key instructions with the column instructions arranged
+// according to their column positions.
+//===----------------------------------------------------------------------===//
+
+void MapTableEmitter::buildMapTable() {
+ // Find column instructions for a given key based on the ColField
+ // constraints.
+ const std::vector<ListInit*> &ValueCols = InstrMapDesc.getValueCols();
+ unsigned NumOfCols = ValueCols.size();
+ for (unsigned j = 0, endKI = KeyInstrVec.size(); j < endKI; j++) {
+ Record *CurKeyInstr = KeyInstrVec[j];
+ std::vector<Record*> ColInstrVec(NumOfCols);
+
+ // Find the column instruction based on the constraints for the column.
+ for (unsigned ColIdx = 0; ColIdx < NumOfCols; ColIdx++) {
+ ListInit *CurValueCol = ValueCols[ColIdx];
+ Record *ColInstr = getInstrForColumn(CurKeyInstr, CurValueCol);
+ ColInstrVec[ColIdx] = ColInstr;
+ }
+ MapTable[CurKeyInstr] = ColInstrVec;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Find column instruction based on the constraints for that column.
+//===----------------------------------------------------------------------===//
+
+Record *MapTableEmitter::getInstrForColumn(Record *KeyInstr,
+ ListInit *CurValueCol) {
+ ListInit *RowFields = InstrMapDesc.getRowFields();
+ std::vector<Init*> KeyValue;
+
+ // Construct KeyValue using KeyInstr's values for RowFields.
+ for (unsigned j = 0, endRF = RowFields->getSize(); j < endRF; j++) {
+ Init *RowFieldsJ = RowFields->getElement(j);
+ Init *KeyInstrVal = KeyInstr->getValue(RowFieldsJ)->getValue();
+ KeyValue.push_back(KeyInstrVal);
+ }
+
+ // Get all the instructions that share the same KeyValue as the KeyInstr
+ // in RowInstrMap. We search through these instructions to find a match
+ // for the current column, i.e., the instruction which has the same values
+ // as CurValueCol for all the fields in ColFields.
+ const std::vector<Record*> &RelatedInstrVec = RowInstrMap[KeyValue];
+
+ ListInit *ColFields = InstrMapDesc.getColFields();
+ Record *MatchInstr = NULL;
+
+ for (unsigned i = 0, e = RelatedInstrVec.size(); i < e; i++) {
+ bool MatchFound = true;
+ Record *CurInstr = RelatedInstrVec[i];
+ for (unsigned j = 0, endCF = ColFields->getSize();
+ (j < endCF) && MatchFound; j++) {
+ Init *ColFieldJ = ColFields->getElement(j);
+ Init *CurInstrInit = CurInstr->getValue(ColFieldJ)->getValue();
+ std::string CurInstrVal = CurInstrInit->getAsUnquotedString();
+ Init *ColFieldJVallue = CurValueCol->getElement(j);
+ MatchFound = (CurInstrVal == ColFieldJVallue->getAsUnquotedString());
+ }
+
+ if (MatchFound) {
+ if (MatchInstr) // Already had a match
+ // Error if multiple matches are found for a column.
+ throw "Multiple matches found for `" + KeyInstr->getName() +
+ "', for the relation `" + InstrMapDesc.getName();
+ else
+ MatchInstr = CurInstr;
+ }
+ }
+ return MatchInstr;
+}
+
+//===----------------------------------------------------------------------===//
+// Emit one table per relation. Only instructions with a valid relation of a
+// given type are included in the table sorted by their enum values (opcodes).
+// Binary search is used for locating instructions in the table.
+//===----------------------------------------------------------------------===//
+
+unsigned MapTableEmitter::emitBinSearchTable(raw_ostream &OS) {
+
+ const std::vector<const CodeGenInstruction*> &NumberedInstructions =
+ Target.getInstructionsByEnumValue();
+ std::string TargetName = Target.getName();
+ const std::vector<ListInit*> &ValueCols = InstrMapDesc.getValueCols();
+ unsigned NumCol = ValueCols.size();
+ unsigned TotalNumInstr = NumberedInstructions.size();
+ unsigned TableSize = 0;
+
+ OS << "static const uint16_t "<<InstrMapDesc.getName();
+ // Number of columns in the table are NumCol+1 because key instructions are
+ // emitted as first column.
+ OS << "Table[]["<< NumCol+1 << "] = {\n";
+ for (unsigned i = 0; i < TotalNumInstr; i++) {
+ Record *CurInstr = NumberedInstructions[i]->TheDef;
+ std::vector<Record*> ColInstrs = MapTable[CurInstr];
+ std::string OutStr("");
+ unsigned RelExists = 0;
+ if (ColInstrs.size()) {
+ for (unsigned j = 0; j < NumCol; j++) {
+ if (ColInstrs[j] != NULL) {
+ RelExists = 1;
+ OutStr += ", ";
+ OutStr += TargetName;
+ OutStr += "::";
+ OutStr += ColInstrs[j]->getName();
+ } else { OutStr += ", -1";}
+ }
+
+ if (RelExists) {
+ OS << " { " << TargetName << "::" << CurInstr->getName();
+ OS << OutStr <<" },\n";
+ TableSize++;
+ }
+ }
+ }
+ if (!TableSize) {
+ OS << " { " << TargetName << "::" << "INSTRUCTION_LIST_END, ";
+ OS << TargetName << "::" << "INSTRUCTION_LIST_END }";
+ }
+ OS << "}; // End of " << InstrMapDesc.getName() << "Table\n\n";
+ return TableSize;
+}
+
+//===----------------------------------------------------------------------===//
+// Emit binary search algorithm as part of the functions used to query
+// relation tables.
+//===----------------------------------------------------------------------===//
+
+void MapTableEmitter::emitBinSearch(raw_ostream &OS, unsigned TableSize) {
+ OS << " unsigned mid;\n";
+ OS << " unsigned start = 0;\n";
+ OS << " unsigned end = " << TableSize << ";\n";
+ OS << " while (start < end) {\n";
+ OS << " mid = start + (end - start)/2;\n";
+ OS << " if (Opcode == " << InstrMapDesc.getName() << "Table[mid][0]) {\n";
+ OS << " break;\n";
+ OS << " }\n";
+ OS << " if (Opcode < " << InstrMapDesc.getName() << "Table[mid][0])\n";
+ OS << " end = mid;\n";
+ OS << " else\n";
+ OS << " start = mid + 1;\n";
+ OS << " }\n";
+ OS << " if (start == end)\n";
+ OS << " return -1; // Instruction doesn't exist in this table.\n\n";
+}
+
+//===----------------------------------------------------------------------===//
+// Emit functions to query relation tables.
+//===----------------------------------------------------------------------===//
+
+void MapTableEmitter::emitMapFuncBody(raw_ostream &OS,
+ unsigned TableSize) {
+
+ ListInit *ColFields = InstrMapDesc.getColFields();
+ const std::vector<ListInit*> &ValueCols = InstrMapDesc.getValueCols();
+
+ // Emit binary search algorithm to locate instructions in the
+ // relation table. If found, return opcode value from the appropriate column
+ // of the table.
+ emitBinSearch(OS, TableSize);
+
+ if (ValueCols.size() > 1) {
+ for (unsigned i = 0, e = ValueCols.size(); i < e; i++) {
+ ListInit *ColumnI = ValueCols[i];
+ for (unsigned j = 0, ColSize = ColumnI->getSize(); j < ColSize; j++) {
+ std::string ColName = ColFields->getElement(j)->getAsUnquotedString();
+ OS << " if (in" << ColName;
+ OS << " == ";
+ OS << ColName << "_" << ColumnI->getElement(j)->getAsUnquotedString();
+ if (j < ColumnI->getSize() - 1) OS << " && ";
+ else OS << ")\n";
+ }
+ OS << " return " << InstrMapDesc.getName();
+ OS << "Table[mid]["<<i+1<<"];\n";
+ }
+ OS << " return -1;";
+ }
+ else
+ OS << " return " << InstrMapDesc.getName() << "Table[mid][1];\n";
+
+ OS <<"}\n\n";
+}
+
+//===----------------------------------------------------------------------===//
+// Emit relation tables and the functions to query them.
+//===----------------------------------------------------------------------===//
+
+void MapTableEmitter::emitTablesWithFunc(raw_ostream &OS) {
+
+ // Emit function name and the input parameters : mostly opcode value of the
+ // current instruction. However, if a table has multiple columns (more than 2
+ // since first column is used for the key instructions), then we also need
+ // to pass another input to indicate the column to be selected.
+
+ ListInit *ColFields = InstrMapDesc.getColFields();
+ const std::vector<ListInit*> &ValueCols = InstrMapDesc.getValueCols();
+ OS << "// "<< InstrMapDesc.getName() << "\n";
+ OS << "int "<< InstrMapDesc.getName() << "(uint16_t Opcode";
+ if (ValueCols.size() > 1) {
+ for (unsigned i = 0, e = ColFields->getSize(); i < e; i++) {
+ std::string ColName = ColFields->getElement(i)->getAsUnquotedString();
+ OS << ", enum " << ColName << " in" << ColName << ") {\n";
+ }
+ } else { OS << ") {\n"; }
+
+ // Emit map table.
+ unsigned TableSize = emitBinSearchTable(OS);
+
+ // Emit rest of the function body.
+ emitMapFuncBody(OS, TableSize);
+}
+
+//===----------------------------------------------------------------------===//
+// Emit enums for the column fields across all the instruction maps.
+//===----------------------------------------------------------------------===//
+
+static void emitEnums(raw_ostream &OS, RecordKeeper &Records) {
+
+ std::vector<Record*> InstrMapVec;
+ InstrMapVec = Records.getAllDerivedDefinitions("InstrMapping");
+ std::map<std::string, std::vector<Init*> > ColFieldValueMap;
+
+ // Iterate over all InstrMapping records and create a map between column
+ // fields and their possible values across all records.
+ for (unsigned i = 0, e = InstrMapVec.size(); i < e; i++) {
+ Record *CurMap = InstrMapVec[i];
+ ListInit *ColFields;
+ ColFields = CurMap->getValueAsListInit("ColFields");
+ ListInit *List = CurMap->getValueAsListInit("ValueCols");
+ std::vector<ListInit*> ValueCols;
+ unsigned ListSize = List->getSize();
+
+ for (unsigned j = 0; j < ListSize; j++) {
+ ListInit *ListJ = dyn_cast<ListInit>(List->getElement(j));
+
+ if (ListJ->getSize() != ColFields->getSize()) {
+ throw "Record `" + CurMap->getName() + "', field `" + "ValueCols" +
+ "' entries don't match with the entries in 'ColFields' !";
+ }
+ ValueCols.push_back(ListJ);
+ }
+
+ for (unsigned j = 0, endCF = ColFields->getSize(); j < endCF; j++) {
+ for (unsigned k = 0; k < ListSize; k++){
+ std::string ColName = ColFields->getElement(j)->getAsUnquotedString();
+ ColFieldValueMap[ColName].push_back((ValueCols[k])->getElement(j));
+ }
+ }
+ }
+
+ for (std::map<std::string, std::vector<Init*> >::iterator
+ II = ColFieldValueMap.begin(), IE = ColFieldValueMap.end();
+ II != IE; II++) {
+ std::vector<Init*> FieldValues = (*II).second;
+ unsigned FieldSize = FieldValues.size();
+
+ // Delete duplicate entries from ColFieldValueMap
+ for (unsigned i = 0; i < FieldSize - 1; i++) {
+ Init *CurVal = FieldValues[i];
+ for (unsigned j = i+1; j < FieldSize; j++) {
+ if (CurVal == FieldValues[j]) {
+ FieldValues.erase(FieldValues.begin()+j);
+ }
+ }
+ }
+
+ // Emit enumerated values for the column fields.
+ OS << "enum " << (*II).first << " {\n";
+ for (unsigned i = 0; i < FieldSize; i++) {
+ OS << "\t" << (*II).first << "_" << FieldValues[i]->getAsUnquotedString();
+ if (i != FieldValues.size() - 1)
+ OS << ",\n";
+ else
+ OS << "\n};\n\n";
+ }
+ }
+}
+
+namespace llvm {
+//===----------------------------------------------------------------------===//
+// Parse 'InstrMapping' records and use the information to form relationship
+// between instructions. These relations are emitted as a tables along with the
+// functions to query them.
+//===----------------------------------------------------------------------===//
+void EmitMapTable(RecordKeeper &Records, raw_ostream &OS) {
+ CodeGenTarget Target(Records);
+ std::string TargetName = Target.getName();
+ std::vector<Record*> InstrMapVec;
+ InstrMapVec = Records.getAllDerivedDefinitions("InstrMapping");
+
+ if (!InstrMapVec.size())
+ return;
+
+ OS << "#ifdef GET_INSTRMAP_INFO\n";
+ OS << "#undef GET_INSTRMAP_INFO\n";
+ OS << "namespace llvm {\n\n";
+ OS << "namespace " << TargetName << " {\n\n";
+
+ // Emit coulumn field names and their values as enums.
+ emitEnums(OS, Records);
+
+ // Iterate over all instruction mapping records and construct relationship
+ // maps based on the information specified there.
+ //
+ for (unsigned i = 0, e = InstrMapVec.size(); i < e; i++) {
+ MapTableEmitter IMap(Target, Records, InstrMapVec[i]);
+
+ // Build RowInstrMap to group instructions based on their values for
+ // RowFields. In the process, also collect key instructions into
+ // KeyInstrVec.
+ IMap.buildRowInstrMap();
+
+ // Build MapTable to map key instructions with the corresponding column
+ // instructions.
+ IMap.buildMapTable();
+
+ // Emit map tables and the functions to query them.
+ IMap.emitTablesWithFunc(OS);
+ }
+ OS << "} // End " << TargetName << " namespace\n";
+ OS << "} // End llvm namespace\n";
+ OS << "#endif // GET_INSTRMAP_INFO\n\n";
+}
+
+} // End llvm namespace
diff --git a/utils/TableGen/CodeGenRegisters.cpp b/utils/TableGen/CodeGenRegisters.cpp
index f195b4e3fa..10064fdd16 100644
--- a/utils/TableGen/CodeGenRegisters.cpp
+++ b/utils/TableGen/CodeGenRegisters.cpp
@@ -600,7 +600,7 @@ struct TupleExpander : SetTheory::Expander {
unsigned Length = ~0u;
SmallVector<SetTheory::RecSet, 4> Lists(Dim);
for (unsigned i = 0; i != Dim; ++i) {
- ST.evaluate(SubRegs->getElement(i), Lists[i]);
+ ST.evaluate(SubRegs->getElement(i), Lists[i], Def->getLoc());
Length = std::min(Length, unsigned(Lists[i].size()));
}
@@ -728,7 +728,7 @@ CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank, Record *R)
// Alternative allocation orders may be subsets.
SetTheory::RecSet Order;
for (unsigned i = 0, e = AltOrders->size(); i != e; ++i) {
- RegBank.getSets().evaluate(AltOrders->getElement(i), Order);
+ RegBank.getSets().evaluate(AltOrders->getElement(i), Order, R->getLoc());
Orders[1 + i].append(Order.begin(), Order.end());
// Verify that all altorder members are regclass members.
while (!Order.empty()) {
diff --git a/utils/TableGen/CodeGenSchedule.cpp b/utils/TableGen/CodeGenSchedule.cpp
index fc101eec61..1cca3e3f85 100644
--- a/utils/TableGen/CodeGenSchedule.cpp
+++ b/utils/TableGen/CodeGenSchedule.cpp
@@ -38,8 +38,9 @@ static void dumpIdxVec(const SmallVectorImpl<unsigned> &V) {
// (instrs a, b, ...) Evaluate and union all arguments. Identical to AddOp.
struct InstrsOp : public SetTheory::Operator {
- void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts) {
- ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts);
+ void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts,
+ ArrayRef<SMLoc> Loc) {
+ ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts, Loc);
}
};
@@ -55,13 +56,15 @@ struct InstRegexOp : public SetTheory::Operator {
const CodeGenTarget &Target;
InstRegexOp(const CodeGenTarget &t): Target(t) {}
- void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts) {
+ void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts,
+ ArrayRef<SMLoc> Loc) {
SmallVector<Regex*, 4> RegexList;
for (DagInit::const_arg_iterator
AI = Expr->arg_begin(), AE = Expr->arg_end(); AI != AE; ++AI) {
StringInit *SI = dyn_cast<StringInit>(*AI);
if (!SI)
- throw "instregex requires pattern string: " + Expr->getAsString();
+ throw TGError(Loc, "instregex requires pattern string: "
+ + Expr->getAsString());
std::string pat = SI->getValue();
// Implement a python-style prefix match.
if (pat[0] != '^') {
diff --git a/utils/TableGen/InstrInfoEmitter.cpp b/utils/TableGen/InstrInfoEmitter.cpp
index e447c16b16..8e670e3cbc 100644
--- a/utils/TableGen/InstrInfoEmitter.cpp
+++ b/utils/TableGen/InstrInfoEmitter.cpp
@@ -16,6 +16,7 @@
#include "CodeGenDAGPatterns.h"
#include "CodeGenSchedule.h"
#include "CodeGenTarget.h"
+#include "TableGenBackends.h"
#include "SequenceToOffsetTable.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/TableGen/Record.h"
@@ -415,6 +416,7 @@ namespace llvm {
void EmitInstrInfo(RecordKeeper &RK, raw_ostream &OS) {
InstrInfoEmitter(RK).run(OS);
+ EmitMapTable(RK, OS);
}
} // End llvm namespace
diff --git a/utils/TableGen/SetTheory.cpp b/utils/TableGen/SetTheory.cpp
index 5b760e7a23..33a8f0e337 100644
--- a/utils/TableGen/SetTheory.cpp
+++ b/utils/TableGen/SetTheory.cpp
@@ -27,20 +27,20 @@ typedef SetTheory::RecVec RecVec;
// (add a, b, ...) Evaluate and union all arguments.
struct AddOp : public SetTheory::Operator {
- void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts) {
- ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts);
+ void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts, ArrayRef<SMLoc> Loc) {
+ ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts, Loc);
}
};
// (sub Add, Sub, ...) Set difference.
struct SubOp : public SetTheory::Operator {
- void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts) {
+ void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts, ArrayRef<SMLoc> Loc) {
if (Expr->arg_size() < 2)
- throw "Set difference needs at least two arguments: " +
- Expr->getAsString();
+ throw TGError(Loc, "Set difference needs at least two arguments: " +
+ Expr->getAsString());
RecSet Add, Sub;
- ST.evaluate(*Expr->arg_begin(), Add);
- ST.evaluate(Expr->arg_begin() + 1, Expr->arg_end(), Sub);
+ ST.evaluate(*Expr->arg_begin(), Add, Loc);
+ ST.evaluate(Expr->arg_begin() + 1, Expr->arg_end(), Sub, Loc);
for (RecSet::iterator I = Add.begin(), E = Add.end(); I != E; ++I)
if (!Sub.count(*I))
Elts.insert(*I);
@@ -49,12 +49,13 @@ struct SubOp : public SetTheory::Operator {
// (and S1, S2) Set intersection.
struct AndOp : public SetTheory::Operator {
- void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts) {
+ void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts, ArrayRef<SMLoc> Loc) {
if (Expr->arg_size() != 2)
- throw "Set intersection requires two arguments: " + Expr->getAsString();
+ throw TGError(Loc, "Set intersection requires two arguments: " +
+ Expr->getAsString());
RecSet S1, S2;
- ST.evaluate(Expr->arg_begin()[0], S1);
- ST.evaluate(Expr->arg_begin()[1], S2);
+ ST.evaluate(Expr->arg_begin()[0], S1, Loc);
+ ST.evaluate(Expr->arg_begin()[1], S2, Loc);
for (RecSet::iterator I = S1.begin(), E = S1.end(); I != E; ++I)
if (S2.count(*I))
Elts.insert(*I);
@@ -65,17 +66,19 @@ struct AndOp : public SetTheory::Operator {
struct SetIntBinOp : public SetTheory::Operator {
virtual void apply2(SetTheory &ST, DagInit *Expr,
RecSet &Set, int64_t N,
- RecSet &Elts) =0;
+ RecSet &Elts, ArrayRef<SMLoc> Loc) =0;
- void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts) {
+ void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts, ArrayRef<SMLoc> Loc) {
if (Expr->arg_size() != 2)
- throw "Operator requires (Op Set, Int) arguments: " + Expr->getAsString();
+ throw TGError(Loc, "Operator requires (Op Set, Int) arguments: " +
+ Expr->getAsString());
RecSet Set;
- ST.evaluate(Expr->arg_begin()[0], Set);
+ ST.evaluate(Expr->arg_begin()[0], Set, Loc);
IntInit *II = dyn_cast<IntInit>(Expr->arg_begin()[1]);
if (!II)
- throw "Second argument must be an integer: " + Expr->getAsString();
- apply2(ST, Expr, Set, II->getValue(), Elts);
+ throw TGError(Loc, "Second argument must be an integer: " +
+ Expr->getAsString());
+ apply2(ST, Expr, Set, II->getValue(), Elts, Loc);
}
};
@@ -83,9 +86,10 @@ struct SetIntBinOp : public SetTheory::Operator {
struct ShlOp : public SetIntBinOp {
void apply2(SetTheory &ST, DagInit *Expr,
RecSet &Set, int64_t N,
- RecSet &Elts) {
+ RecSet &Elts, ArrayRef<SMLoc> Loc) {
if (N < 0)
- throw "Positive shift required: " + Expr->getAsString();
+ throw TGError(Loc, "Positive shift required: " +
+ Expr->getAsString());
if (unsigned(N) < Set.size())
Elts.insert(Set.begin() + N, Set.end());
}
@@ -95,9 +99,10 @@ struct ShlOp : public SetIntBinOp {
struct TruncOp : public SetIntBinOp {
void apply2(SetTheory &ST, DagInit *Expr,
RecSet &Set, int64_t N,
- RecSet &Elts) {
+ RecSet &Elts, ArrayRef<SMLoc> Loc) {
if (N < 0)
- throw "Positive length required: " + Expr->getAsString();
+ throw TGError(Loc, "Positive length required: " +
+ Expr->getAsString());
if (unsigned(N) > Set.size())
N = Set.size();
Elts.insert(Set.begin(), Set.begin() + N);
@@ -112,7 +117,7 @@ struct RotOp : public SetIntBinOp {
void apply2(SetTheory &ST, DagInit *Expr,
RecSet &Set, int64_t N,
- RecSet &Elts) {
+ RecSet &Elts, ArrayRef<SMLoc> Loc) {
if (Reverse)
N = -N;
// N > 0 -> rotate left, N < 0 -> rotate right.
@@ -131,9 +136,10 @@ struct RotOp : public SetIntBinOp {
struct DecimateOp : public SetIntBinOp {
void apply2(SetTheory &ST, DagInit *Expr,
RecSet &Set, int64_t N,
- RecSet &Elts) {
+ RecSet &Elts, ArrayRef<SMLoc> Loc) {
if (N <= 0)
- throw "Positive stride required: " + Expr->getAsString();
+ throw TGError(Loc, "Positive stride required: " +
+ Expr->getAsString());
for (unsigned I = 0; I < Set.size(); I += N)
Elts.insert(Set[I]);
}
@@ -141,12 +147,12 @@ struct DecimateOp : public SetIntBinOp {
// (interleave S1, S2, ...) Interleave elements of the arguments.
struct InterleaveOp : public SetTheory::Operator {
- void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts) {
+ void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts, ArrayRef<SMLoc> Loc) {
// Evaluate the arguments individually.
SmallVector<RecSet, 4> Args(Expr->getNumArgs());
unsigned MaxSize = 0;
for (unsigned i = 0, e = Expr->getNumArgs(); i != e; ++i) {
- ST.evaluate(Expr->getArg(i), Args[i]);
+ ST.evaluate(Expr->getArg(i), Args[i], Loc);
MaxSize = std::max(MaxSize, unsigned(Args[i].size()));
}
// Interleave arguments into Elts.
@@ -159,38 +165,38 @@ struct InterleaveOp : public SetTheory::Operator {
// (sequence "Format", From, To) Generate a sequence of records by name.
struct SequenceOp : public SetTheory::Operator {
- void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts) {
+ void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts, ArrayRef<SMLoc> Loc) {
int Step = 1;
if (Expr->arg_size() > 4)
- throw "Bad args to (sequence \"Format\", From, To): " +
- Expr->getAsString();
+ throw TGError(Loc, "Bad args to (sequence \"Format\", From, To): " +
+ Expr->getAsString());
else if (Expr->arg_size() == 4) {
if (IntInit *II = dyn_cast<IntInit>(Expr->arg_begin()[3])) {
Step = II->getValue();
} else
- throw "Stride must be an integer: " + Expr->getAsString();
+ throw TGError(Loc, "Stride must be an integer: " + Expr->getAsString());
}
std::string Format;
if (StringInit *SI = dyn_cast<StringInit>(Expr->arg_begin()[0]))
Format = SI->getValue();
else
- throw "Format must be a string: " + Expr->getAsString();
+ throw TGError(Loc, "Format must be a string: " + Expr->getAsString());
int64_t From, To;
if (IntInit *II = dyn_cast<IntInit>(Expr->arg_begin()[1]))
From = II->getValue();
else
- throw "From must be an integer: " + Expr->getAsString();
+ throw TGError(Loc, "From must be an integer: " + Expr->getAsString());
if (From < 0 || From >= (1 << 30))
- throw "From out of range";
+ throw TGError(Loc, "From out of range");
if (IntInit *II = dyn_cast<IntInit>(Expr->arg_begin()[2]))
To = II->getValue();
else
- throw "From must be an integer: " + Expr->getAsString();
+ throw TGError(Loc, "From must be an integer: " + Expr->getAsString());
if (To < 0 || To >= (1 << 30))
- throw "To out of range";
+ throw TGError(Loc, "To out of range");
RecordKeeper &Records =
cast<DefInit>(Expr->getOperator())->getDef()->getRecords();
@@ -206,7 +212,8 @@ struct SequenceOp : public SetTheory::Operator {
OS << format(Format.c_str(), unsigned(From));
Record *Rec = Records.getDef(OS.str());
if (!Rec)
- throw "No def named '" + Name + "': " + Expr->getAsString();
+ throw TGError(Loc, "No def named '" + Name + "': " +
+ Expr->getAsString());
// Try to reevaluate Rec in case it is a set.
if (const RecVec *Result = ST.expand(Rec))
Elts.insert(Result->begin(), Result->end());
@@ -225,7 +232,7 @@ struct FieldExpander : public SetTheory::Expander {
FieldExpander(StringRef fn) : FieldName(fn) {}
void expand(SetTheory &ST, Record *Def, RecSet &Elts) {
- ST.evaluate(Def->getValueInit(FieldName), Elts);
+ ST.evaluate(Def->getValueInit(FieldName), Elts, Def->getLoc());
}
};
} // end anonymous namespace
@@ -259,7 +266,7 @@ void SetTheory::addFieldExpander(StringRef ClassName, StringRef FieldName) {
addExpander(ClassName, new FieldExpander(FieldName));
}
-void SetTheory::evaluate(Init *Expr, RecSet &Elts) {
+void SetTheory::evaluate(Init *Expr, RecSet &Elts, ArrayRef<SMLoc> Loc) {
// A def in a list can be a just an element, or it may expand.
if (DefInit *Def = dyn_cast<DefInit>(Expr)) {
if (const RecVec *Result = expand(Def->getDef()))
@@ -270,19 +277,19 @@ void SetTheory::evaluate(Init *Expr, RecSet &Elts) {
// Lists simply expand.
if (ListInit *LI = dyn_cast<ListInit>(Expr))
- return evaluate(LI->begin(), LI->end(), Elts);
+ return evaluate(LI->begin(), LI->end(), Elts, Loc);
// Anything else must be a DAG.
DagInit *DagExpr = dyn_cast<DagInit>(Expr);
if (!DagExpr)
- throw "Invalid set element: " + Expr->getAsString();
+ throw TGError(Loc, "Invalid set element: " + Expr->getAsString());
DefInit *OpInit = dyn_cast<DefInit>(DagExpr->getOperator());
if (!OpInit)
- throw "Bad set expression: " + Expr->getAsString();
+ throw TGError(Loc, "Bad set expression: " + Expr->getAsString());
Operator *Op = Operators.lookup(OpInit->getDef()->getName());
if (!Op)
- throw "Unknown set operator: " + Expr->getAsString();
- Op->apply(*this, DagExpr, Elts);
+ throw TGError(Loc, "Unknown set operator: " + Expr->getAsString());
+ Op->apply(*this, DagExpr, Elts, Loc);
}
const RecVec *SetTheory::expand(Record *Set) {
@@ -292,23 +299,19 @@ const RecVec *SetTheory::expand(Record *Set) {
return &I->second;
// This is the first time we see Set. Find a suitable expander.
- try {
- const std::vector<Record*> &SC = Set->getSuperClasses();
- for (unsigned i = 0, e = SC.size(); i != e; ++i) {
- // Skip unnamed superclasses.
- if (!dyn_cast<StringInit>(SC[i]->getNameInit()))
- continue;
- if (Expander *Exp = Expanders.lookup(SC[i]->getName())) {
- // This breaks recursive definitions.
- RecVec &EltVec = Expansions[Set];
- RecSet Elts;
- Exp->expand(*this, Set, Elts);
- EltVec.assign(Elts.begin(), Elts.end());
- return &EltVec;
- }
+ const std::vector<Record*> &SC = Set->getSuperClasses();
+ for (unsigned i = 0, e = SC.size(); i != e; ++i) {
+ // Skip unnamed superclasses.
+ if (!dyn_cast<StringInit>(SC[i]->getNameInit()))
+ continue;
+ if (Expander *Exp = Expanders.lookup(SC[i]->getName())) {
+ // This breaks recursive definitions.
+ RecVec &EltVec = Expansions[Set];
+ RecSet Elts;
+ Exp->expand(*this, Set, Elts);
+ EltVec.assign(Elts.begin(), Elts.end());
+ return &EltVec;
}
- } catch (const std::string &Error) {
- throw TGError(Set->getLoc(), Error);
}
// Set is not expandable.
diff --git a/utils/TableGen/SetTheory.h b/utils/TableGen/SetTheory.h
index b394058f4c..122372ab33 100644
--- a/utils/TableGen/SetTheory.h
+++ b/utils/TableGen/SetTheory.h
@@ -49,6 +49,7 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/Support/SourceMgr.h"
#include <map>
#include <vector>
@@ -72,7 +73,8 @@ public:
/// apply - Apply this operator to Expr's arguments and insert the result
/// in Elts.
- virtual void apply(SetTheory&, DagInit *Expr, RecSet &Elts) =0;
+ virtual void apply(SetTheory&, DagInit *Expr, RecSet &Elts,
+ ArrayRef<SMLoc> Loc) =0;
};
/// Expander - A callback function that can transform a Record representing a
@@ -119,13 +121,13 @@ public:
void addOperator(StringRef Name, Operator*);
/// evaluate - Evaluate Expr and append the resulting set to Elts.
- void evaluate(Init *Expr, RecSet &Elts);
+ void evaluate(Init *Expr, RecSet &Elts, ArrayRef<SMLoc> Loc);
/// evaluate - Evaluate a sequence of Inits and append to Elts.
template<typename Iter>
- void evaluate(Iter begin, Iter end, RecSet &Elts) {
+ void evaluate(Iter begin, Iter end, RecSet &Elts, ArrayRef<SMLoc> Loc) {
while (begin != end)
- evaluate(*begin++, Elts);
+ evaluate(*begin++, Elts, Loc);
}
/// expand - Expand a record into a set of elements if possible. Return a
diff --git a/utils/TableGen/TableGenBackends.h b/utils/TableGen/TableGenBackends.h
index 2c00c40cfe..f0d25d8a2c 100644
--- a/utils/TableGen/TableGenBackends.h
+++ b/utils/TableGen/TableGenBackends.h
@@ -74,5 +74,6 @@ void EmitInstrInfo(RecordKeeper &RK, raw_ostream &OS);
void EmitPseudoLowering(RecordKeeper &RK, raw_ostream &OS);
void EmitRegisterInfo(RecordKeeper &RK, raw_ostream &OS);
void EmitSubtarget(RecordKeeper &RK, raw_ostream &OS);
+void EmitMapTable(RecordKeeper &RK, raw_ostream &OS);
} // End llvm namespace