aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CMakeLists.txt23
-rw-r--r--Makefile6
-rw-r--r--autoconf/configure.ac43
-rwxr-xr-xconfigure326
-rw-r--r--docs/CMake.rst5
-rw-r--r--docs/HowToBuildOnARM.rst34
-rw-r--r--docs/HowToSubmitABug.html345
-rw-r--r--docs/HowToSubmitABug.rst233
-rw-r--r--docs/index.rst46
-rw-r--r--docs/userguides.rst10
-rw-r--r--include/llvm/ADT/PackedVector.h27
-rw-r--r--include/llvm/ADT/SparseBitVector.h16
-rw-r--r--include/llvm/Attributes.h349
-rw-r--r--include/llvm/CodeGen/MachineModuleInfoImpls.h4
-rw-r--r--include/llvm/CodeGen/ValueTypes.h23
-rw-r--r--include/llvm/Config/config.h.cmake3
-rw-r--r--include/llvm/Function.h30
-rw-r--r--include/llvm/IRBuilder.h58
-rw-r--r--include/llvm/MC/MCExpr.h2
-rw-r--r--include/llvm/MC/MCInstrDesc.h2
-rw-r--r--include/llvm/MC/MCTargetAsmParser.h2
-rw-r--r--include/llvm/Object/MachOFormat.h5
-rw-r--r--include/llvm/Operator.h10
-rw-r--r--include/llvm/Support/TargetFolder.h9
-rw-r--r--include/llvm/Target/TargetData.h8
-rw-r--r--include/llvm/Target/TargetLowering.h15
-rw-r--r--include/llvm/Transforms/Utils/IntegerDivision.h10
-rw-r--r--include/llvm/Transforms/Utils/ValueMapper.h2
-rw-r--r--lib/Analysis/CodeMetrics.cpp2
-rw-r--r--lib/Analysis/IPA/CallGraph.cpp9
-rw-r--r--lib/Analysis/InlineCost.cpp6
-rw-r--r--lib/Analysis/Lint.cpp48
-rw-r--r--lib/Analysis/MemoryDependenceAnalysis.cpp2
-rw-r--r--lib/AsmParser/LLParser.cpp63
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.cpp2
-rw-r--r--lib/Bitcode/Writer/BitcodeWriter.cpp2
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp7
-rw-r--r--lib/CodeGen/BranchFolding.cpp2
-rw-r--r--lib/CodeGen/CodePlacementOpt.cpp2
-rw-r--r--lib/CodeGen/LiveInterval.cpp2
-rw-r--r--lib/CodeGen/LiveRangeEdit.cpp2
-rw-r--r--lib/CodeGen/MachineBlockPlacement.cpp2
-rw-r--r--lib/CodeGen/MachineFunction.cpp4
-rw-r--r--lib/CodeGen/MachineModuleInfoImpls.cpp4
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp6
-rw-r--r--lib/CodeGen/RegisterCoalescer.cpp8
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp74
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp6
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp125
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp1
-rw-r--r--lib/CodeGen/StackProtector.cpp4
-rw-r--r--lib/CodeGen/TailDuplication.cpp2
-rw-r--r--lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt9
-rw-r--r--lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp22
-rw-r--r--lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h (renamed from include/llvm/ExecutionEngine/IntelJITEventsWrapper.h)2
-rw-r--r--lib/ExecutionEngine/IntelJITEvents/Makefile5
-rw-r--r--lib/ExecutionEngine/IntelJITEvents/ittnotify_config.h449
-rw-r--r--lib/ExecutionEngine/IntelJITEvents/ittnotify_types.h63
-rw-r--r--lib/ExecutionEngine/IntelJITEvents/jitprofiling.c476
-rw-r--r--lib/ExecutionEngine/IntelJITEvents/jitprofiling.h254
-rw-r--r--lib/MC/MCExpr.cpp2
-rw-r--r--lib/Support/APFloat.cpp2
-rw-r--r--lib/Support/Errno.cpp11
-rw-r--r--lib/Support/Host.cpp2
-rw-r--r--lib/Support/Unix/Path.inc17
-rw-r--r--lib/Support/Unix/Signals.inc2
-rw-r--r--lib/Support/Windows/PathV2.inc2
-rw-r--r--lib/Support/YAMLParser.cpp7
-rw-r--r--lib/Target/ARM/ARM.h1
-rw-r--r--lib/Target/ARM/ARM.td22
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp469
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h9
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp2
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp45
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp2
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp21
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp14
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td21
-rw-r--r--lib/Target/ARM/ARMInstrInfo.cpp62
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td68
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td32
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td26
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td4
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.h12
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.td9
-rw-r--r--lib/Target/ARM/ARMSchedule.td2
-rw-r--r--lib/Target/ARM/ARMScheduleSwift.td1085
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp6
-rw-r--r--lib/Target/ARM/ARMSubtarget.h12
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp5
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp24
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp9
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp8
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp49
-rw-r--r--lib/Target/ARM/MLxExpansionPass.cpp68
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.cpp2
-rw-r--r--lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp11
-rw-r--r--lib/Target/Mips/AsmParser/MipsAsmParser.cpp21
-rw-r--r--lib/Target/Mips/CMakeLists.txt1
-rw-r--r--lib/Target/Mips/Disassembler/MipsDisassembler.cpp29
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h6
-rw-r--r--lib/Target/Mips/Makefile2
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.cpp21
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.td99
-rw-r--r--lib/Target/Mips/Mips16RegisterInfo.cpp12
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.cpp11
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.h8
-rw-r--r--lib/Target/Mips/MipsDSPInstrFormats.td284
-rw-r--r--lib/Target/Mips/MipsDSPInstrInfo.td1299
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp213
-rw-r--r--lib/Target/Mips/MipsISelLowering.h4
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td5
-rw-r--r--lib/Target/Mips/MipsMCInstLower.h2
-rw-r--r--lib/Target/Mips/MipsMachineFunction.h16
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.td43
-rw-r--r--lib/Target/Mips/MipsSERegisterInfo.cpp11
-rw-r--r--lib/Target/Mips/MipsSubtarget.cpp3
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.cpp4
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp10
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp2
-rw-r--r--lib/Target/TargetData.cpp2
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp23
-rw-r--r--lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp7
-rw-r--r--lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp7
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp5
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp48
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp2
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp149
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp100
-rw-r--r--lib/Target/X86/X86ISelLowering.h5
-rw-r--r--lib/Target/X86/X86InstrCompiler.td57
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp18
-rw-r--r--lib/Target/X86/X86InstrInfo.td12
-rw-r--r--lib/Target/X86/X86InstrShiftRotate.td78
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp2
-rw-r--r--lib/Target/X86/X86Subtarget.h1
-rw-r--r--lib/Transforms/IPO/ArgumentPromotion.cpp20
-rw-r--r--lib/Transforms/IPO/DeadArgumentElimination.cpp6
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp4
-rw-r--r--lib/Transforms/IPO/InlineAlways.cpp4
-rw-r--r--lib/Transforms/IPO/Inliner.cpp12
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp7
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp6
-rw-r--r--lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp42
-rw-r--r--lib/Transforms/InstCombine/InstCombineSelect.cpp24
-rw-r--r--lib/Transforms/Instrumentation/AddressSanitizer.cpp2
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp7
-rw-r--r--lib/Transforms/Scalar/CorrelatedValuePropagation.cpp5
-rw-r--r--lib/Transforms/Scalar/DeadStoreElimination.cpp126
-rw-r--r--lib/Transforms/Scalar/LoopUnrollPass.cpp2
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp2
-rw-r--r--lib/Transforms/Scalar/SROA.cpp161
-rw-r--r--lib/Transforms/Utils/IntegerDivision.cpp122
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp280
-rw-r--r--lib/Transforms/Utils/ValueMapper.cpp2
-rw-r--r--lib/VMCore/Attributes.cpp41
-rw-r--r--lib/VMCore/AttributesImpl.h40
-rw-r--r--lib/VMCore/Function.cpp10
-rw-r--r--lib/VMCore/IRBuilder.cpp6
-rw-r--r--lib/VMCore/LLVMContextImpl.cpp7
-rw-r--r--lib/VMCore/LLVMContextImpl.h6
-rw-r--r--lib/VMCore/ValueTypes.cpp14
-rw-r--r--lib/VMCore/Verifier.cpp2
-rw-r--r--test/Analysis/CallGraph/do-nothing-intrinsic.ll13
-rw-r--r--test/CodeGen/ARM/2010-12-07-PEIBug.ll2
-rw-r--r--test/CodeGen/ARM/2012-05-04-vmov.ll11
-rw-r--r--test/CodeGen/ARM/2012-05-10-PreferVMOVtoVDUP32.ll14
-rw-r--r--test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv.ll11
-rw-r--r--test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv2.ll11
-rw-r--r--test/CodeGen/ARM/atomicrmw_minmax.ll21
-rw-r--r--test/CodeGen/ARM/avoid-cpsr-rmw.ll1
-rw-r--r--test/CodeGen/ARM/call-noret.ll39
-rw-r--r--test/CodeGen/ARM/div.ll17
-rw-r--r--test/CodeGen/ARM/domain-conv-vmovs.ll4
-rw-r--r--test/CodeGen/ARM/fabss.ll4
-rw-r--r--test/CodeGen/ARM/fadds.ll8
-rw-r--r--test/CodeGen/ARM/fast-isel-pic.ll18
-rw-r--r--test/CodeGen/ARM/fdivs.ll8
-rw-r--r--test/CodeGen/ARM/fmuls.ll8
-rw-r--r--test/CodeGen/ARM/fp_convert.ll4
-rw-r--r--test/CodeGen/ARM/fsubs.ll6
-rw-r--r--test/CodeGen/ARM/ifcvt1.ll12
-rw-r--r--test/CodeGen/ARM/ifcvt12.ll15
-rw-r--r--test/CodeGen/ARM/ifcvt5.ll12
-rw-r--r--test/CodeGen/ARM/ldr_post.ll1
-rw-r--r--test/CodeGen/ARM/ldr_pre.ll1
-rw-r--r--test/CodeGen/ARM/mls.ll12
-rw-r--r--test/CodeGen/ARM/neon-fma.ll22
-rw-r--r--test/CodeGen/ARM/neon_ld2.ll27
-rw-r--r--test/CodeGen/ARM/opt-shuff-tstore.ll4
-rw-r--r--test/CodeGen/ARM/reg_sequence.ll1
-rw-r--r--test/CodeGen/ARM/subreg-remat.ll4
-rw-r--r--test/CodeGen/Mips/dsp-r1.ll1241
-rw-r--r--test/CodeGen/Mips/dsp-r2.ll568
-rw-r--r--test/CodeGen/Mips/vector-load-store.ll27
-rw-r--r--test/CodeGen/PowerPC/pr13891.ll27
-rw-r--r--test/CodeGen/Thumb2/cortex-fp.ll6
-rw-r--r--test/CodeGen/Thumb2/div.ll10
-rw-r--r--test/CodeGen/Thumb2/thumb2-mla.ll7
-rw-r--r--test/CodeGen/Thumb2/thumb2-smla.ll4
-rw-r--r--test/CodeGen/X86/2012-09-28-CGPBug.ll53
-rw-r--r--test/CodeGen/X86/atomic-minmax-i6432.ll51
-rw-r--r--test/CodeGen/X86/atomic6432.ll1
-rw-r--r--test/CodeGen/X86/crash.ll35
-rw-r--r--test/CodeGen/X86/jump_sign.ll28
-rw-r--r--test/CodeGen/X86/mulx32.ll22
-rw-r--r--test/CodeGen/X86/mulx64.ll22
-rw-r--r--test/CodeGen/X86/phys_subreg_coalesce-3.ll2
-rw-r--r--test/CodeGen/X86/pic_jumptable.ll12
-rw-r--r--test/CodeGen/X86/ptr-rotate.ll2
-rw-r--r--test/CodeGen/X86/rot32.ll29
-rw-r--r--test/CodeGen/X86/rot64.ll31
-rw-r--r--test/CodeGen/X86/rotate2.ll2
-rw-r--r--test/CodeGen/X86/shift-bmi2.ll178
-rw-r--r--test/CodeGen/X86/tailcall-64.ll29
-rw-r--r--test/CodeGen/X86/targetLoweringGeneric.ll2
-rw-r--r--test/DebugInfo/bug_null_debuginfo.ll3
-rw-r--r--test/MC/ARM/arm-arithmetic-aliases.s4
-rw-r--r--test/MC/MachO/ARM/long-call-branch-island-relocation.s43
-rw-r--r--test/MC/MachO/i386-large-relocations.s36
-rw-r--r--test/Other/lint.ll25
-rw-r--r--test/Transforms/CorrelatedValuePropagation/crash.ll25
-rw-r--r--test/Transforms/DeadStoreElimination/libcalls.ll70
-rw-r--r--test/Transforms/GlobalOpt/load-store-global.ll25
-rw-r--r--test/Transforms/InstCombine/2012-09-24-MemcpyFromGlobalCrash.ll19
-rw-r--r--test/Transforms/InstCombine/memcpy_chk-1.ll60
-rw-r--r--test/Transforms/InstCombine/memcpy_chk-2.ll24
-rw-r--r--test/Transforms/InstCombine/memmove_chk-1.ll60
-rw-r--r--test/Transforms/InstCombine/memmove_chk-2.ll24
-rw-r--r--test/Transforms/InstCombine/memset_chk-1.ll61
-rw-r--r--test/Transforms/InstCombine/memset_chk-2.ll20
-rw-r--r--test/Transforms/InstCombine/memset_chk.ll18
-rw-r--r--test/Transforms/InstCombine/strcpy_chk-1.ll88
-rw-r--r--test/Transforms/InstCombine/strcpy_chk-2.ll21
-rw-r--r--test/Transforms/InstCombine/strcpy_chk.ll13
-rw-r--r--test/Transforms/InstCombine/strncpy_chk-1.ll66
-rw-r--r--test/Transforms/InstCombine/strncpy_chk-2.ll21
-rw-r--r--test/Transforms/InstCombine/vec_demanded_elts.ll2
-rw-r--r--test/Transforms/InstCombine/vec_shuffle.ll43
-rw-r--r--test/Transforms/LoopUnroll/pr11361.ll2
-rw-r--r--test/Transforms/LoopUnswitch/2011-06-02-CritSwitch.ll2
-rw-r--r--test/Transforms/PhaseOrdering/gdce.ll106
-rw-r--r--test/Transforms/SROA/alignment.ll85
-rw-r--r--test/Transforms/SROA/basictest.ll42
-rw-r--r--test/Transforms/SROA/phi-and-select.ll45
-rw-r--r--test/Transforms/SimplifyCFG/switch_to_lookup_table.ll125
-rw-r--r--tools/lli/CMakeLists.txt2
-rw-r--r--unittests/ADT/APFloatTest.cpp17
-rw-r--r--unittests/ExecutionEngine/JIT/CMakeLists.txt2
-rw-r--r--unittests/ExecutionEngine/JIT/IntelJITEventListenerTest.cpp7
-rw-r--r--unittests/Transforms/Utils/CMakeLists.txt1
-rw-r--r--unittests/Transforms/Utils/IntegerDivision.cpp142
-rw-r--r--utils/TableGen/AsmMatcherEmitter.cpp16
253 files changed, 11290 insertions, 1813 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index df781f52cd..d3edc02198 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -115,6 +115,11 @@ if(LLVM_ENABLE_TIMESTAMPS)
set(ENABLE_TIMESTAMPS 1)
endif()
+option(LLVM_ENABLE_BACKTRACES "Enable embedding backtraces on crash." ON)
+if(LLVM_ENABLE_BACKTRACES)
+ set(ENABLE_BACKTRACES 1)
+endif()
+
option(LLVM_ENABLE_FFI "Use libffi to call external functions from the interpreter" OFF)
set(FFI_LIBRARY_DIR "" CACHE PATH "Additional directory, where CMake should search for libffi.so")
set(FFI_INCLUDE_DIR "" CACHE PATH "Additional directory, where CMake should search for ffi.h or ffi/ffi.h")
@@ -172,23 +177,7 @@ option(LLVM_USE_INTEL_JITEVENTS
if( LLVM_USE_INTEL_JITEVENTS )
# Verify we are on a supported platform
- if( CMAKE_SYSTEM_NAME MATCHES "Windows" OR CMAKE_SYSTEM_NAME MATCHES "Linux" )
- # Directory where Intel Parallel Amplifier XE 2011 is installed.
- if ( WIN32 )
- set(LLVM_INTEL_JITEVENTS_DIR $ENV{VTUNE_AMPLIFIER_XE_2011_DIR})
- else ( WIN32 )
- set(LLVM_INTEL_JITEVENTS_DIR "/opt/intel/vtune_amplifier_xe_2011")
- endif ( WIN32 )
-
- # Set include and library search paths for Intel JIT Events API
- set(LLVM_INTEL_JITEVENTS_INCDIR "${LLVM_INTEL_JITEVENTS_DIR}/include")
-
- if ( CMAKE_SIZEOF_VOID_P EQUAL 8 )
- set(LLVM_INTEL_JITEVENTS_LIBDIR "${LLVM_INTEL_JITEVENTS_DIR}/lib64")
- else ( CMAKE_SIZEOF_VOID_P EQUAL 8 )
- set(LLVM_INTEL_JITEVENTS_LIBDIR "${LLVM_INTEL_JITEVENTS_DIR}/lib32")
- endif ( CMAKE_SIZEOF_VOID_P EQUAL 8 )
- else()
+ if( NOT CMAKE_SYSTEM_NAME MATCHES "Windows" AND NOT CMAKE_SYSTEM_NAME MATCHES "Linux" )
message(FATAL_ERROR
"Intel JIT API support is available on Linux and Windows only.")
endif()
diff --git a/Makefile b/Makefile
index 870fa492a0..49e4b24ebf 100644
--- a/Makefile
+++ b/Makefile
@@ -117,16 +117,18 @@ cross-compile-build-tools:
unset CFLAGS ; \
unset CXXFLAGS ; \
unset SDKROOT ; \
+ unset UNIVERSAL_SDK_PATH ; \
$(PROJ_SRC_DIR)/configure --build=$(BUILD_TRIPLE) \
--host=$(BUILD_TRIPLE) --target=$(BUILD_TRIPLE) \
--disable-polly ; \
cd .. ; \
fi; \
- (unset SDKROOT; \
- $(MAKE) -C BuildTools \
+ ($(MAKE) -C BuildTools \
BUILD_DIRS_ONLY=1 \
NACL_SANDBOX=0 \
UNIVERSAL= \
+ UNIVERSAL_SDK_PATH= \
+ SDKROOT= \
TARGET_NATIVE_ARCH="$(TARGET_NATIVE_ARCH)" \
TARGETS_TO_BUILD="$(TARGETS_TO_BUILD)" \
ENABLE_OPTIMIZED=$(ENABLE_OPTIMIZED) \
diff --git a/autoconf/configure.ac b/autoconf/configure.ac
index 354d3d7322..12dd5ea783 100644
--- a/autoconf/configure.ac
+++ b/autoconf/configure.ac
@@ -1295,46 +1295,23 @@ AC_DEFINE_UNQUOTED([LLVM_USE_OPROFILE],$USE_OPROFILE,
dnl Enable support for Intel JIT Events API.
AC_ARG_WITH(intel-jitevents,
- AS_HELP_STRING([--with-intel-jitevents=<vtune-amplifier-dir>],
- [Specify location of run-time support library for Intel JIT API (default=/opt/intel/vtune_amplifier_xe_2011)]),
+ AS_HELP_STRING([--with-intel-jitevents Notify Intel JIT profiling API of generated code]),
[
+ case "$withval" in
+ yes) AC_SUBST(USE_INTEL_JITEVENTS,[1]);;
+ no) AC_SUBST(USE_INTEL_JITEVENTS,[0]);;
+ *) AC_MSG_ERROR([Invalid setting for --with-intel-jitevents. Use "yes" or "no"]);;
+ esac
+
case $llvm_cv_os_type in
Linux|Win32|Cygwin|MingW) ;;
- *)
- AC_MSG_ERROR([
- Intel JIT API support is available on Linux and Windows only."]) ;;
+ *) AC_MSG_ERROR([Intel JIT API support is available on Linux and Windows only.]);;
esac
- AC_SUBST(USE_INTEL_JITEVENTS, [1])
case "$llvm_cv_target_arch" in
- x86) llvm_intel_jitevents_archdir="lib32";;
- x86_64) llvm_intel_jitevents_archdir="lib64";;
- *) echo "Target architecture $llvm_cv_target_arch does not support Intel JIT Events API"
- exit -1;;
- esac
- INTEL_JITEVENTS_INCDIR="/opt/intel/vtune_amplifier_xe_2011/include"
- INTEL_JITEVENTS_LIBDIR="/opt/intel/vtune_amplifier_xe_2011/$llvm_intel_jitevents_archdir"
- case "$withval" in
- /* | [[A-Za-z]]:[[\\/]]*) INTEL_JITEVENTS_INCDIR=$withval/include
- INTEL_JITEVENTS_LIBDIR=$withval/$llvm_intel_jitevents_archdir ;;
- *) ;;
+ x86|x86_64) ;;
+ *) AC_MSG_ERROR([Target architecture $llvm_cv_target_arch does not support Intel JIT Events API.]);;
esac
-
- AC_SUBST(INTEL_JITEVENTS_INCDIR)
- AC_SUBST(INTEL_JITEVENTS_LIBDIR)
-
- LIBS="$LIBS -L${INTEL_JITEVENTS_LIBDIR}"
- CPPFLAGS="$CPPFLAGS -I$INTEL_JITEVENTS_INCDIR"
-
- AC_SEARCH_LIBS(iJIT_IsProfilingActive, jitprofiling, [], [
- echo "Error! Cannot find libjitprofiling.a. Please check path specified in flag --with-intel-jitevents"
- exit -1
- ])
- AC_CHECK_HEADER([jitprofiling.h], [], [
- echo "Error! Cannot find jitprofiling.h. Please check path specified in flag --with-intel-jitevents"
- exit -1
- ])
-
],
[
AC_SUBST(USE_INTEL_JITEVENTS, [0])
diff --git a/configure b/configure
index e844511d5a..70c4a6a65a 100755
--- a/configure
+++ b/configure
@@ -766,8 +766,6 @@ COVERED_SWITCH_DEFAULT
USE_UDIS86
USE_OPROFILE
USE_INTEL_JITEVENTS
-INTEL_JITEVENTS_INCDIR
-INTEL_JITEVENTS_LIBDIR
XML2CONFIG
LIBXML2_LIBS
LIBXML2_INC
@@ -1462,10 +1460,8 @@ Optional Packages:
--with-udis86=<path> Use udis86 external x86 disassembler library
--with-oprofile=<prefix>
Tell OProfile >= 0.9.4 how to symbolize JIT output
- --with-intel-jitevents=<vtune-amplifier-dir>
- Specify location of run-time support library for
- Intel JIT API
- (default=/opt/intel/vtune_amplifier_xe_2011)
+ --with-intel-jitevents Notify Intel JIT profiling API of generated code
+
Some influential environment variables:
CC C compiler command
@@ -10321,7 +10317,7 @@ else
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<EOF
-#line 10319 "configure"
+#line 10315 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H
@@ -13581,308 +13577,30 @@ _ACEOF
# Check whether --with-intel-jitevents was given.
if test "${with_intel_jitevents+set}" = set; then
withval=$with_intel_jitevents;
+ case "$withval" in
+ yes) USE_INTEL_JITEVENTS=1
+;;
+ no) USE_INTEL_JITEVENTS=0
+;;
+ *) { { echo "$as_me:$LINENO: error: Invalid setting for --with-intel-jitevents. Use \"yes\" or \"no\"" >&5
+echo "$as_me: error: Invalid setting for --with-intel-jitevents. Use \"yes\" or \"no\"" >&2;}
+ { (exit 1); exit 1; }; };;
+ esac
+
case $llvm_cv_os_type in
Linux|Win32|Cygwin|MingW) ;;
- *)
- { { echo "$as_me:$LINENO: error:
- Intel JIT API support is available on Linux and Windows only.\"" >&5
-echo "$as_me: error:
- Intel JIT API support is available on Linux and Windows only.\"" >&2;}
- { (exit 1); exit 1; }; } ;;
+ *) { { echo "$as_me:$LINENO: error: Intel JIT API support is available on Linux and Windows only." >&5
+echo "$as_me: error: Intel JIT API support is available on Linux and Windows only." >&2;}
+ { (exit 1); exit 1; }; };;
esac
- USE_INTEL_JITEVENTS=1
-
case "$llvm_cv_target_arch" in
- x86) llvm_intel_jitevents_archdir="lib32";;
- x86_64) llvm_intel_jitevents_archdir="lib64";;
- *) echo "Target architecture $llvm_cv_target_arch does not support Intel JIT Events API"
- exit -1;;
- esac
- INTEL_JITEVENTS_INCDIR="/opt/intel/vtune_amplifier_xe_2011/include"
- INTEL_JITEVENTS_LIBDIR="/opt/intel/vtune_amplifier_xe_2011/$llvm_intel_jitevents_archdir"
- case "$withval" in
- /* | [A-Za-z]:[\\/]*) INTEL_JITEVENTS_INCDIR=$withval/include
- INTEL_JITEVENTS_LIBDIR=$withval/$llvm_intel_jitevents_archdir ;;
- *) ;;
+ x86|x86_64) ;;
+ *) { { echo "$as_me:$LINENO: error: Target architecture $llvm_cv_target_arch does not support Intel JIT Events API." >&5
+echo "$as_me: error: Target architecture $llvm_cv_target_arch does not support Intel JIT Events API." >&2;}
+ { (exit 1); exit 1; }; };;
esac
-
-
-
- LIBS="$LIBS -L${INTEL_JITEVENTS_LIBDIR}"
- CPPFLAGS="$CPPFLAGS -I$INTEL_JITEVENTS_INCDIR"
-
- { echo "$as_me:$LINENO: checking for library containing iJIT_IsProfilingActive" >&5
-echo $ECHO_N "checking for library containing iJIT_IsProfilingActive... $ECHO_C" >&6; }
-if test "${ac_cv_search_iJIT_IsProfilingActive+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- ac_func_search_save_LIBS=$LIBS
-cat >conftest.$ac_ext <<_ACEOF
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-
-/* Override any GCC internal prototype to avoid an error.
- Use char because int might match the return type of a GCC
- builtin and then its argument prototype would still apply. */
-#ifdef __cplusplus
-extern "C"
-#endif
-char iJIT_IsProfilingActive ();
-int
-main ()
-{
-return iJIT_IsProfilingActive ();
- ;
- return 0;
-}
-_ACEOF
-for ac_lib in '' jitprofiling; do
- if test -z "$ac_lib"; then
- ac_res="none required"
- else
- ac_res=-l$ac_lib
- LIBS="-l$ac_lib $ac_func_search_save_LIBS"
- fi
- rm -f conftest.$ac_objext conftest$ac_exeext
-if { (ac_try="$ac_link"
-case "(($ac_try" in
- *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
- *) ac_try_echo=$ac_try;;
-esac
-eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
- (eval "$ac_link") 2>conftest.er1
- ac_status=$?
- grep -v '^ *+' conftest.er1 >conftest.err
- rm -f conftest.er1
- cat conftest.err >&5
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
- { (case "(($ac_try" in
- *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
- *) ac_try_echo=$ac_try;;
-esac
-eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
- (eval "$ac_try") 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; } &&
- { ac_try='test -s conftest$ac_exeext'
- { (case "(($ac_try" in
- *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
- *) ac_try_echo=$ac_try;;
-esac
-eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
- (eval "$ac_try") 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_cv_search_iJIT_IsProfilingActive=$ac_res
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-
-fi
-
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext
- if test "${ac_cv_search_iJIT_IsProfilingActive+set}" = set; then
- break
-fi
-done
-if test "${ac_cv_search_iJIT_IsProfilingActive+set}" = set; then
- :
-else
- ac_cv_search_iJIT_IsProfilingActive=no
-fi
-rm conftest.$ac_ext
-LIBS=$ac_func_search_save_LIBS
-fi
-{ echo "$as_me:$LINENO: result: $ac_cv_search_iJIT_IsProfilingActive" >&5
-echo "${ECHO_T}$ac_cv_search_iJIT_IsProfilingActive" >&6; }
-ac_res=$ac_cv_search_iJIT_IsProfilingActive
-if test "$ac_res" != no; then
- test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
-
-else
-
- echo "Error! Cannot find libjitprofiling.a. Please check path specified in flag --with-intel-jitevents"
- exit -1
-
-fi
-
- if test "${ac_cv_header_jitprofiling_h+set}" = set; then
- { echo "$as_me:$LINENO: checking for jitprofiling.h" >&5
-echo $ECHO_N "checking for jitprofiling.h... $ECHO_C" >&6; }
-if test "${ac_cv_header_jitprofiling_h+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-fi
-{ echo "$as_me:$LINENO: result: $ac_cv_header_jitprofiling_h" >&5
-echo "${ECHO_T}$ac_cv_header_jitprofiling_h" >&6; }
-else
- # Is the header compilable?
-{ echo "$as_me:$LINENO: checking jitprofiling.h usability" >&5
-echo $ECHO_N "checking jitprofiling.h usability... $ECHO_C" >&6; }
-cat >conftest.$ac_ext <<_ACEOF
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-#include <jitprofiling.h>
-_ACEOF
-rm -f conftest.$ac_objext
-if { (ac_try="$ac_compile"
-case "(($ac_try" in
- *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
- *) ac_try_echo=$ac_try;;
-esac
-eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
- (eval "$ac_compile") 2>conftest.er1
- ac_status=$?
- grep -v '^ *+' conftest.er1 >conftest.err
- rm -f conftest.er1
- cat conftest.err >&5
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
- { (case "(($ac_try" in
- *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
- *) ac_try_echo=$ac_try;;
-esac
-eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
- (eval "$ac_try") 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; } &&
- { ac_try='test -s conftest.$ac_objext'
- { (case "(($ac_try" in
- *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
- *) ac_try_echo=$ac_try;;
-esac
-eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
- (eval "$ac_try") 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_header_compiler=yes
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
- ac_header_compiler=no
-fi
-
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
-echo "${ECHO_T}$ac_header_compiler" >&6; }
-
-# Is the header present?
-{ echo "$as_me:$LINENO: checking jitprofiling.h presence" >&5
-echo $ECHO_N "checking jitprofiling.h presence... $ECHO_C" >&6; }
-cat >conftest.$ac_ext <<_ACEOF
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-#include <jitprofiling.h>
-_ACEOF
-if { (ac_try="$ac_cpp conftest.$ac_ext"
-case "(($ac_try" in
- *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
- *) ac_try_echo=$ac_try;;
-esac
-eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
- (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
- ac_status=$?
- grep -v '^ *+' conftest.er1 >conftest.err
- rm -f conftest.er1
- cat conftest.err >&5
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } >/dev/null; then
- if test -s conftest.err; then
- ac_cpp_err=$ac_c_preproc_warn_flag
- ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
- else
- ac_cpp_err=
- fi
-else
- ac_cpp_err=yes
-fi
-if test -z "$ac_cpp_err"; then
- ac_header_preproc=yes
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
- ac_header_preproc=no
-fi
-
-rm -f conftest.err conftest.$ac_ext
-{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
-echo "${ECHO_T}$ac_header_preproc" >&6; }
-
-# So? What about this header?
-case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
- yes:no: )
- { echo "$as_me:$LINENO: WARNING: jitprofiling.h: accepted by the compiler, rejected by the preprocessor!" >&5
-echo "$as_me: WARNING: jitprofiling.h: accepted by the compiler, rejected by the preprocessor!" >&2;}
- { echo "$as_me:$LINENO: WARNING: jitprofiling.h: proceeding with the compiler's result" >&5
-echo "$as_me: WARNING: jitprofiling.h: proceeding with the compiler's result" >&2;}
- ac_header_preproc=yes
- ;;
- no:yes:* )
- { echo "$as_me:$LINENO: WARNING: jitprofiling.h: present but cannot be compiled" >&5
-echo "$as_me: WARNING: jitprofiling.h: present but cannot be compiled" >&2;}
- { echo "$as_me:$LINENO: WARNING: jitprofiling.h: check for missing prerequisite headers?" >&5
-echo "$as_me: WARNING: jitprofiling.h: check for missing prerequisite headers?" >&2;}
- { echo "$as_me:$LINENO: WARNING: jitprofiling.h: see the Autoconf documentation" >&5
-echo "$as_me: WARNING: jitprofiling.h: see the Autoconf documentation" >&2;}
- { echo "$as_me:$LINENO: WARNING: jitprofiling.h: section \"Present But Cannot Be Compiled\"" >&5
-echo "$as_me: WARNING: jitprofiling.h: section \"Present But Cannot Be Compiled\"" >&2;}
- { echo "$as_me:$LINENO: WARNING: jitprofiling.h: proceeding with the preprocessor's result" >&5
-echo "$as_me: WARNING: jitprofiling.h: proceeding with the preprocessor's result" >&2;}
- { echo "$as_me:$LINENO: WARNING: jitprofiling.h: in the future, the compiler will take precedence" >&5
-echo "$as_me: WARNING: jitprofiling.h: in the future, the compiler will take precedence" >&2;}
- ( cat <<\_ASBOX
-## ------------------------------------ ##
-## Report this to http://llvm.org/bugs/ ##
-## ------------------------------------ ##
-_ASBOX
- ) | sed "s/^/$as_me: WARNING: /" >&2
- ;;
-esac
-{ echo "$as_me:$LINENO: checking for jitprofiling.h" >&5
-echo $ECHO_N "checking for jitprofiling.h... $ECHO_C" >&6; }
-if test "${ac_cv_header_jitprofiling_h+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- ac_cv_header_jitprofiling_h=$ac_header_preproc
-fi
-{ echo "$as_me:$LINENO: result: $ac_cv_header_jitprofiling_h" >&5
-echo "${ECHO_T}$ac_cv_header_jitprofiling_h" >&6; }
-
-fi
-if test $ac_cv_header_jitprofiling_h = yes; then
- :
-else
-
- echo "Error! Cannot find jitprofiling.h. Please check path specified in flag --with-intel-jitevents"
- exit -1
-
-fi
-
-
-
-
else
USE_INTEL_JITEVENTS=0
@@ -22313,8 +22031,6 @@ COVERED_SWITCH_DEFAULT!$COVERED_SWITCH_DEFAULT$ac_delim
USE_UDIS86!$USE_UDIS86$ac_delim
USE_OPROFILE!$USE_OPROFILE$ac_delim
USE_INTEL_JITEVENTS!$USE_INTEL_JITEVENTS$ac_delim
-INTEL_JITEVENTS_INCDIR!$INTEL_JITEVENTS_INCDIR$ac_delim
-INTEL_JITEVENTS_LIBDIR!$INTEL_JITEVENTS_LIBDIR$ac_delim
XML2CONFIG!$XML2CONFIG$ac_delim
LIBXML2_LIBS!$LIBXML2_LIBS$ac_delim
LIBXML2_INC!$LIBXML2_INC$ac_delim
@@ -22343,7 +22059,7 @@ LIBOBJS!$LIBOBJS$ac_delim
LTLIBOBJS!$LTLIBOBJS$ac_delim
_ACEOF
- if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 95; then
+ if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 93; then
break
elif $ac_last_try; then
{ { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5
diff --git a/docs/CMake.rst b/docs/CMake.rst
index e1761c5b1d..7f0420c446 100644
--- a/docs/CMake.rst
+++ b/docs/CMake.rst
@@ -273,11 +273,6 @@ LLVM-specific variables
**LLVM_USE_INTEL_JITEVENTS**:BOOL
Enable building support for Intel JIT Events API. Defaults to OFF
-**LLVM_INTEL_JITEVENTS_DIR**:PATH
- Path to installation of Intel(R) VTune(TM) Amplifier XE 2011, used to locate
- the ``jitprofiling`` library. Default = ``%VTUNE_AMPLIFIER_XE_2011_DIR%``
- (Windows) | ``/opt/intel/vtune_amplifier_xe_2011`` (Linux)
-
Executing the test suite
========================
diff --git a/docs/HowToBuildOnARM.rst b/docs/HowToBuildOnARM.rst
new file mode 100644
index 0000000000..6f9ac4adc0
--- /dev/null
+++ b/docs/HowToBuildOnARM.rst
@@ -0,0 +1,34 @@
+.. _how_to_build_on_arm:
+
+===================================================================
+How To Build On ARM
+===================================================================
+
+.. sectionauthor:: Wei-Ren Chen (陳韋任) <chenwj@iis.sinica.edu.tw>
+
+Introduction
+============
+
+This document contains information about building/testing LLVM and
+Clang on ARM.
+
+Notes On Building LLVM/Clang on ARM
+=====================================
+Here are some notes on building/testing LLVM/Clang on ARM. Note that
+ARM encompasses a wide variety of CPUs; this advice is primarily based
+on the ARMv6 and ARMv7 architectures and may be inapplicable to older chips.
+
+#. If you are building LLVM/Clang on an ARM board with 1G of memory or less,
+ please use ``gold`` rather then GNU ``ld``.
+ Building LLVM/Clang with ``--enable-optimized``
+ is prefered since it consumes less memory. Otherwise, the building
+ process will very likely fail due to insufficient memory. In any
+ case it is probably a good idea to set up a swap partition.
+
+#. If you want to run ``make
+ check-all`` after building LLVM/Clang, to avoid false alarms (eg, ARCMT
+ failure) please use the following configuration:
+
+ .. code-block:: bash
+
+ $ ../$LLVM_SRC_DIR/configure --with-abi=aapcs
diff --git a/docs/HowToSubmitABug.html b/docs/HowToSubmitABug.html
deleted file mode 100644
index 39f8385129..0000000000
--- a/docs/HowToSubmitABug.html
+++ /dev/null
@@ -1,345 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
- "http://www.w3.org/TR/html4/strict.dtd">
-<html>
-<head>
- <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
- <title>How to submit an LLVM bug report</title>
- <link rel="stylesheet" href="_static/llvm.css" type="text/css">
-</head>
-<body>
-
-<h1>
- How to submit an LLVM bug report
-</h1>
-
-<table class="layout" style="width: 90%" >
-<tr class="layout">
- <td class="left">
-<ol>
- <li><a href="#introduction">Introduction - Got bugs?</a></li>
- <li><a href="#crashers">Crashing Bugs</a>
- <ul>
- <li><a href="#front-end">Front-end bugs</a>
- <li><a href="#ct_optimizer">Compile-time optimization bugs</a>
- <li><a href="#ct_codegen">Code generator bugs</a>
- </ul></li>
- <li><a href="#miscompilations">Miscompilations</a></li>
- <li><a href="#codegen">Incorrect code generation (JIT and LLC)</a></li>
-</ol>
-<div class="doc_author">
- <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a> and
- <a href="http://misha.brukman.net">Misha Brukman</a></p>
-</div>
-</td>
-</tr>
-</table>
-
-<!-- *********************************************************************** -->
-<h2>
- <a name="introduction">Introduction - Got bugs?</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>If you're working with LLVM and run into a bug, we definitely want to know
-about it. This document describes what you can do to increase the odds of
-getting it fixed quickly.</p>
-
-<p>Basically you have to do two things at a minimum. First, decide whether the
-bug <a href="#crashers">crashes the compiler</a> (or an LLVM pass), or if the
-compiler is <a href="#miscompilations">miscompiling</a> the program (i.e., the
-compiler successfully produces an executable, but it doesn't run right). Based
-on
-what type of bug it is, follow the instructions in the linked section to narrow
-down the bug so that the person who fixes it will be able to find the problem
-more easily.</p>
-
-<p>Once you have a reduced test-case, go to <a
-href="http://llvm.org/bugs/enter_bug.cgi">the LLVM Bug Tracking
-System</a> and fill out the form with the necessary details (note that you don't
-need to pick a category, just use the "new-bugs" category if you're not sure).
-The bug description should contain the following
-information:</p>
-
-<ul>
- <li>All information necessary to reproduce the problem.</li>
- <li>The reduced test-case that triggers the bug.</li>
- <li>The location where you obtained LLVM (if not from our Subversion
- repository).</li>
-</ul>
-
-<p>Thanks for helping us make LLVM better!</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
- <a name="crashers">Crashing Bugs</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>More often than not, bugs in the compiler cause it to crash&mdash;often due
-to an assertion failure of some sort. The most important
-piece of the puzzle is to figure out if it is crashing in the GCC front-end
-or if it is one of the LLVM libraries (e.g. the optimizer or code generator)
-that has problems.</p>
-
-<p>To figure out which component is crashing (the front-end,
-optimizer or code generator), run the
-<tt><b>llvm-gcc</b></tt> command line as you were when the crash occurred, but
-with the following extra command line options:</p>
-
-<ul>
- <li><tt><b>-O0 -emit-llvm</b></tt>: If <tt>llvm-gcc</tt> still crashes when
- passed these options (which disable the optimizer and code generator), then
- the crash is in the front-end. Jump ahead to the section on <a
- href="#front-end">front-end bugs</a>.</li>
-
- <li><tt><b>-emit-llvm</b></tt>: If <tt>llvm-gcc</tt> crashes with this option
- (which disables the code generator), you found an optimizer bug. Jump ahead
- to <a href="#ct_optimizer"> compile-time optimization bugs</a>.</li>
-
- <li>Otherwise, you have a code generator crash. Jump ahead to <a
- href="#ct_codegen">code generator bugs</a>.</li>
-
-</ul>
-
-<!-- ======================================================================= -->
-<h3>
- <a name="front-end">Front-end bugs</a>
-</h3>
-
-<div>
-
-<p>If the problem is in the front-end, you should re-run the same
-<tt>llvm-gcc</tt> command that resulted in the crash, but add the
-<tt>-save-temps</tt> option. The compiler will crash again, but it will leave
-behind a <tt><i>foo</i>.i</tt> file (containing preprocessed C source code) and
-possibly <tt><i>foo</i>.s</tt> for each
-compiled <tt><i>foo</i>.c</tt> file. Send us the <tt><i>foo</i>.i</tt> file,
-along with the options you passed to llvm-gcc, and a brief description of the
-error it caused.</p>
-
-<p>The <a href="http://delta.tigris.org/">delta</a> tool helps to reduce the
-preprocessed file down to the smallest amount of code that still replicates the
-problem. You're encouraged to use delta to reduce the code to make the
-developers' lives easier. <a
-href="http://gcc.gnu.org/wiki/A_guide_to_testcase_reduction">This website</a>
-has instructions on the best way to use delta.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
- <a name="ct_optimizer">Compile-time optimization bugs</a>
-</h3>
-
-<div>
-
-<p>If you find that a bug crashes in the optimizer, compile your test-case to a
-<tt>.bc</tt> file by passing "<tt><b>-emit-llvm -O0 -c -o foo.bc</b></tt>".
-Then run:</p>
-
-<div class="doc_code">
-<p><tt><b>opt</b> -std-compile-opts -debug-pass=Arguments foo.bc
- -disable-output</tt></p>
-</div>
-
-<p>This command should do two things: it should print out a list of passes, and
-then it should crash in the same way as llvm-gcc. If it doesn't crash, please
-follow the instructions for a <a href="#front-end">front-end bug</a>.</p>
-
-<p>If this does crash, then you should be able to debug this with the following
-bugpoint command:</p>
-
-<div class="doc_code">
-<p><tt><b>bugpoint</b> foo.bc &lt;list of passes printed by
-<b>opt</b>&gt;</tt></p>
-</div>
-
-<p>Please run this, then file a bug with the instructions and reduced .bc files
-that bugpoint emits. If something goes wrong with bugpoint, please submit the
-"foo.bc" file and the list of passes printed by <b>opt</b>.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
- <a name="ct_codegen">Code generator bugs</a>
-</h3>
-
-<div>
-
-<p>If you find a bug that crashes llvm-gcc in the code generator, compile your
-source file to a .bc file by passing "<tt><b>-emit-llvm -c -o foo.bc</b></tt>"
-to llvm-gcc (in addition to the options you already pass). Once your have
-foo.bc, one of the following commands should fail:</p>
-
-<ol>
-<li><tt><b>llc</b> foo.bc</tt></li>
-<li><tt><b>llc</b> foo.bc -relocation-model=pic</tt></li>
-<li><tt><b>llc</b> foo.bc -relocation-model=static</tt></li>
-</ol>
-
-<p>If none of these crash, please follow the instructions for a
-<a href="#front-end">front-end bug</a>. If one of these do crash, you should
-be able to reduce this with one of the following bugpoint command lines (use
-the one corresponding to the command above that failed):</p>
-
-<ol>
-<li><tt><b>bugpoint</b> -run-llc foo.bc</tt></li>
-<li><tt><b>bugpoint</b> -run-llc foo.bc --tool-args
- -relocation-model=pic</tt></li>
-<li><tt><b>bugpoint</b> -run-llc foo.bc --tool-args
- -relocation-model=static</tt></li>
-</ol>
-
-<p>Please run this, then file a bug with the instructions and reduced .bc file
-that bugpoint emits. If something goes wrong with bugpoint, please submit the
-"foo.bc" file and the option that llc crashes with.</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
- <a name="miscompilations">Miscompilations</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>If llvm-gcc successfully produces an executable, but that executable doesn't
-run right, this is either a bug in the code or a bug in the
-compiler. The first thing to check is to make sure it is not using undefined
-behavior (e.g. reading a variable before it is defined). In particular, check
-to see if the program <a href="http://valgrind.org/">valgrind</a>s clean,
-passes purify, or some other memory checker tool. Many of the "LLVM bugs" that
-we have chased down ended up being bugs in the program being compiled, not
- LLVM.</p>
-
-<p>Once you determine that the program itself is not buggy, you should choose
-which code generator you wish to compile the program with (e.g. LLC or the JIT)
-and optionally a series of LLVM passes to run. For example:</p>
-
-<div class="doc_code">
-<p><tt>
-<b>bugpoint</b> -run-llc [... optzn passes ...] file-to-test.bc --args -- [program arguments]</tt></p>
-</div>
-
-<p><tt>bugpoint</tt> will try to narrow down your list of passes to the one pass
-that causes an error, and simplify the bitcode file as much as it can to assist
-you. It will print a message letting you know how to reproduce the resulting
-error.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
- <a name="codegen">Incorrect code generation</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Similarly to debugging incorrect compilation by mis-behaving passes, you can
-debug incorrect code generation by either LLC or the JIT, using
-<tt>bugpoint</tt>. The process <tt>bugpoint</tt> follows in this case is to try
-to narrow the code down to a function that is miscompiled by one or the other
-method, but since for correctness, the entire program must be run,
-<tt>bugpoint</tt> will compile the code it deems to not be affected with the C
-Backend, and then link in the shared object it generates.</p>
-
-<p>To debug the JIT:</p>
-
-<div class="doc_code">
-<pre>
-bugpoint -run-jit -output=[correct output file] [bitcode file] \
- --tool-args -- [arguments to pass to lli] \
- --args -- [program arguments]
-</pre>
-</div>
-
-<p>Similarly, to debug the LLC, one would run:</p>
-
-<div class="doc_code">
-<pre>
-bugpoint -run-llc -output=[correct output file] [bitcode file] \
- --tool-args -- [arguments to pass to llc] \
- --args -- [program arguments]
-</pre>
-</div>
-
-<p><b>Special note:</b> if you are debugging MultiSource or SPEC tests that
-already exist in the <tt>llvm/test</tt> hierarchy, there is an easier way to
-debug the JIT, LLC, and CBE, using the pre-written Makefile targets, which
-will pass the program options specified in the Makefiles:</p>
-
-<div class="doc_code">
-<p><tt>
-cd llvm/test/../../program<br>
-make bugpoint-jit
-</tt></p>
-</div>
-
-<p>At the end of a successful <tt>bugpoint</tt> run, you will be presented
-with two bitcode files: a <em>safe</em> file which can be compiled with the C
-backend and the <em>test</em> file which either LLC or the JIT
-mis-codegenerates, and thus causes the error.</p>
-
-<p>To reproduce the error that <tt>bugpoint</tt> found, it is sufficient to do
-the following:</p>
-
-<ol>
-
-<li><p>Regenerate the shared object from the safe bitcode file:</p>
-
-<div class="doc_code">
-<p><tt>
-<b>llc</b> -march=c safe.bc -o safe.c<br>
-<b>gcc</b> -shared safe.c -o safe.so
-</tt></p>
-</div></li>
-
-<li><p>If debugging LLC, compile test bitcode native and link with the shared
- object:</p>
-
-<div class="doc_code">
-<p><tt>
-<b>llc</b> test.bc -o test.s<br>
-<b>gcc</b> test.s safe.so -o test.llc<br>
-./test.llc [program options]
-</tt></p>
-</div></li>
-
-<li><p>If debugging the JIT, load the shared object and supply the test
- bitcode:</p>
-
-<div class="doc_code">
-<p><tt><b>lli</b> -load=safe.so test.bc [program options]</tt></p>
-</div></li>
-
-</ol>
-
-</div>
-
-<!-- *********************************************************************** -->
-<hr>
-<address>
- <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
- src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
- <a href="http://validator.w3.org/check/referer"><img
- src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
-
- <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
- <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a>
- <br>
- Last modified: $Date$
-</address>
-
-</body>
-</html>
diff --git a/docs/HowToSubmitABug.rst b/docs/HowToSubmitABug.rst
new file mode 100644
index 0000000000..ff2d649ce3
--- /dev/null
+++ b/docs/HowToSubmitABug.rst
@@ -0,0 +1,233 @@
+.. _how-to-submit-a-bug-report:
+
+================================
+How to submit an LLVM bug report
+================================
+
+.. sectionauthor:: Chris Lattner <sabre@nondot.org> and Misha Brukman <http://misha.brukman.net>
+
+Introduction - Got bugs?
+========================
+
+
+If you're working with LLVM and run into a bug, we definitely want to know
+about it. This document describes what you can do to increase the odds of
+getting it fixed quickly.
+
+Basically you have to do two things at a minimum. First, decide whether
+the bug `crashes the compiler`_ (or an LLVM pass), or if the
+compiler is `miscompiling`_ the program (i.e., the
+compiler successfully produces an executable, but it doesn't run right).
+Based on what type of bug it is, follow the instructions in the linked
+section to narrow down the bug so that the person who fixes it will be able
+to find the problem more easily.
+
+Once you have a reduced test-case, go to `the LLVM Bug Tracking System
+<http://llvm.org/bugs/enter_bug.cgi>`_ and fill out the form with the
+necessary details (note that you don't need to pick a category, just use
+the "new-bugs" category if you're not sure). The bug description should
+contain the following information:
+
+* All information necessary to reproduce the problem.
+* The reduced test-case that triggers the bug.
+* The location where you obtained LLVM (if not from our Subversion
+ repository).
+
+Thanks for helping us make LLVM better!
+
+.. _crashes the compiler:
+
+Crashing Bugs
+=============
+
+More often than not, bugs in the compiler cause it to crash---often due to
+an assertion failure of some sort. The most important piece of the puzzle
+is to figure out if it is crashing in the GCC front-end or if it is one of
+the LLVM libraries (e.g. the optimizer or code generator) that has
+problems.
+
+To figure out which component is crashing (the front-end, optimizer or code
+generator), run the ``llvm-gcc`` command line as you were when the crash
+occurred, but with the following extra command line options:
+
+* ``-O0 -emit-llvm``: If ``llvm-gcc`` still crashes when passed these
+ options (which disable the optimizer and code generator), then the crash
+ is in the front-end. Jump ahead to the section on :ref:`front-end bugs
+ <front-end>`.
+
+* ``-emit-llvm``: If ``llvm-gcc`` crashes with this option (which disables
+ the code generator), you found an optimizer bug. Jump ahead to
+ `compile-time optimization bugs`_.
+
+* Otherwise, you have a code generator crash. Jump ahead to `code
+ generator bugs`_.
+
+.. _front-end bug:
+.. _front-end:
+
+Front-end bugs
+--------------
+
+If the problem is in the front-end, you should re-run the same ``llvm-gcc``
+command that resulted in the crash, but add the ``-save-temps`` option.
+The compiler will crash again, but it will leave behind a ``foo.i`` file
+(containing preprocessed C source code) and possibly ``foo.s`` for each
+compiled ``foo.c`` file. Send us the ``foo.i`` file, along with the options
+you passed to ``llvm-gcc``, and a brief description of the error it caused.
+
+The `delta <http://delta.tigris.org/>`_ tool helps to reduce the
+preprocessed file down to the smallest amount of code that still replicates
+the problem. You're encouraged to use delta to reduce the code to make the
+developers' lives easier. `This website
+<http://gcc.gnu.org/wiki/A_guide_to_testcase_reduction>`_ has instructions
+on the best way to use delta.
+
+.. _compile-time optimization bugs:
+
+Compile-time optimization bugs
+------------------------------
+
+If you find that a bug crashes in the optimizer, compile your test-case to a
+``.bc`` file by passing "``-emit-llvm -O0 -c -o foo.bc``".
+Then run:
+
+.. code-block:: bash
+
+ opt -std-compile-opts -debug-pass=Arguments foo.bc -disable-output
+
+This command should do two things: it should print out a list of passes, and
+then it should crash in the same way as llvm-gcc. If it doesn't crash, please
+follow the instructions for a `front-end bug`_.
+
+If this does crash, then you should be able to debug this with the following
+bugpoint command:
+
+.. code-block:: bash
+
+ bugpoint foo.bc <list of passes printed by opt>
+
+Please run this, then file a bug with the instructions and reduced .bc
+files that bugpoint emits. If something goes wrong with bugpoint, please
+submit the "foo.bc" file and the list of passes printed by ``opt``.
+
+.. _code generator bugs:
+
+Code generator bugs
+-------------------
+
+If you find a bug that crashes llvm-gcc in the code generator, compile your
+source file to a .bc file by passing "``-emit-llvm -c -o foo.bc``" to
+llvm-gcc (in addition to the options you already pass). Once your have
+foo.bc, one of the following commands should fail:
+
+#. ``llc foo.bc``
+#. ``llc foo.bc -relocation-model=pic``
+#. ``llc foo.bc -relocation-model=static``
+
+If none of these crash, please follow the instructions for a `front-end
+bug`_. If one of these do crash, you should be able to reduce this with
+one of the following bugpoint command lines (use the one corresponding to
+the command above that failed):
+
+#. ``bugpoint -run-llc foo.bc``
+#. ``bugpoint -run-llc foo.bc --tool-args -relocation-model=pic``
+#. ``bugpoint -run-llc foo.bc --tool-args -relocation-model=static``
+
+Please run this, then file a bug with the instructions and reduced .bc file
+that bugpoint emits. If something goes wrong with bugpoint, please submit
+the "foo.bc" file and the option that llc crashes with.
+
+.. _miscompiling:
+
+Miscompilations
+===============
+
+If llvm-gcc successfully produces an executable, but that executable
+doesn't run right, this is either a bug in the code or a bug in the
+compiler. The first thing to check is to make sure it is not using
+undefined behavior (e.g. reading a variable before it is defined). In
+particular, check to see if the program `valgrind
+<http://valgrind.org/>`_'s clean, passes purify, or some other memory
+checker tool. Many of the "LLVM bugs" that we have chased down ended up
+being bugs in the program being compiled, not LLVM.
+
+Once you determine that the program itself is not buggy, you should choose
+which code generator you wish to compile the program with (e.g. LLC or the JIT)
+and optionally a series of LLVM passes to run. For example:
+
+.. code-block:: bash
+
+ bugpoint -run-llc [... optzn passes ...] file-to-test.bc --args -- [program arguments]
+
+bugpoint will try to narrow down your list of passes to the one pass that
+causes an error, and simplify the bitcode file as much as it can to assist
+you. It will print a message letting you know how to reproduce the
+resulting error.
+
+Incorrect code generation
+=========================
+
+Similarly to debugging incorrect compilation by mis-behaving passes, you
+can debug incorrect code generation by either LLC or the JIT, using
+``bugpoint``. The process ``bugpoint`` follows in this case is to try to
+narrow the code down to a function that is miscompiled by one or the other
+method, but since for correctness, the entire program must be run,
+``bugpoint`` will compile the code it deems to not be affected with the C
+Backend, and then link in the shared object it generates.
+
+To debug the JIT:
+
+.. code-block:: bash
+
+ bugpoint -run-jit -output=[correct output file] [bitcode file] \
+ --tool-args -- [arguments to pass to lli] \
+ --args -- [program arguments]
+
+Similarly, to debug the LLC, one would run:
+
+.. code-block:: bash
+
+ bugpoint -run-llc -output=[correct output file] [bitcode file] \
+ --tool-args -- [arguments to pass to llc] \
+ --args -- [program arguments]
+
+**Special note:** if you are debugging MultiSource or SPEC tests that
+already exist in the ``llvm/test`` hierarchy, there is an easier way to
+debug the JIT, LLC, and CBE, using the pre-written Makefile targets, which
+will pass the program options specified in the Makefiles:
+
+.. code-block:: bash
+
+ cd llvm/test/../../program
+ make bugpoint-jit
+
+At the end of a successful ``bugpoint`` run, you will be presented
+with two bitcode files: a *safe* file which can be compiled with the C
+backend and the *test* file which either LLC or the JIT
+mis-codegenerates, and thus causes the error.
+
+To reproduce the error that ``bugpoint`` found, it is sufficient to do
+the following:
+
+#. Regenerate the shared object from the safe bitcode file:
+
+ .. code-block:: bash
+
+ llc -march=c safe.bc -o safe.c
+ gcc -shared safe.c -o safe.so
+
+#. If debugging LLC, compile test bitcode native and link with the shared
+ object:
+
+ .. code-block:: bash
+
+ llc test.bc -o test.s
+ gcc test.s safe.so -o test.llc
+ ./test.llc [program options]
+
+#. If debugging the JIT, load the shared object and supply the test
+ bitcode:
+
+ .. code-block:: bash
+
+ lli -load=safe.so test.bc [program options]
diff --git a/docs/index.rst b/docs/index.rst
index 53d3e7c01b..50f76a3e3f 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -15,43 +15,43 @@ research projects.
Similarly, documentation is broken down into several high-level groupings
targeted at different audiences:
- * **Design & Overview**
+* **Design & Overview**
- Several introductory papers and presentations are available at
- :ref:`design_and_overview`.
+ Several introductory papers and presentations are available at
+ :ref:`design_and_overview`.
- * **Publications**
+* **Publications**
- The list of `publications <http://llvm.org/pubs>`_ based on LLVM.
+ The list of `publications <http://llvm.org/pubs>`_ based on LLVM.
- * **User Guides**
+* **User Guides**
- Those new to the LLVM system should first vist the :ref:`userguides`.
+ Those new to the LLVM system should first vist the :ref:`userguides`.
- NOTE: If you are a user who is only interested in using LLVM-based
- compilers, you should look into `Clang <http://clang.llvm.org>`_ or
- `DragonEgg <http://dragonegg.llvm.org>`_ instead. The documentation here is
- intended for users who have a need to work with the intermediate LLVM
- representation.
+ NOTE: If you are a user who is only interested in using LLVM-based
+ compilers, you should look into `Clang <http://clang.llvm.org>`_ or
+ `DragonEgg <http://dragonegg.llvm.org>`_ instead. The documentation here is
+ intended for users who have a need to work with the intermediate LLVM
+ representation.
- * **API Clients**
+* **API Clients**
- Developers of applications which use LLVM as a library should visit the
- :ref:`programming`.
+ Developers of applications which use LLVM as a library should visit the
+ :ref:`programming`.
- * **Subsystems**
+* **Subsystems**
- API clients and LLVM developers may be interested in the
- :ref:`subsystems` documentation.
+ API clients and LLVM developers may be interested in the
+ :ref:`subsystems` documentation.
- * **Development Process**
+* **Development Process**
- Additional documentation on the LLVM project can be found at
- :ref:`development_process`.
+ Additional documentation on the LLVM project can be found at
+ :ref:`development_process`.
- * **Mailing Lists**
+* **Mailing Lists**
- For more information, consider consulting the LLVM :ref:`mailing_lists`.
+ For more information, consider consulting the LLVM :ref:`mailing_lists`.
.. toctree::
:maxdepth: 2
diff --git a/docs/userguides.rst b/docs/userguides.rst
index c7197ef628..c5dd979224 100644
--- a/docs/userguides.rst
+++ b/docs/userguides.rst
@@ -7,6 +7,7 @@ User Guides
:hidden:
CMake
+ HowToBuildOnARM
CommandGuide/index
DeveloperPolicy
GettingStartedVS
@@ -15,6 +16,7 @@ User Guides
Packaging
HowToAddABuilder
yaml2obj
+ HowToSubmitABug
* `The LLVM Getting Started Guide <GettingStarted.html>`_
@@ -26,7 +28,11 @@ User Guides
An addendum to the main Getting Started guide for those using the `CMake
build system <http://www.cmake.org>`_.
-
+
+* :ref:`how_to_build_on_arm`
+
+ Notes on building and testing LLVM/Clang on ARM.
+
* `Getting Started with the LLVM System using Microsoft Visual Studio
<GettingStartedVS.html>`_
@@ -59,7 +65,7 @@ User Guides
This describes new features, known bugs, and other limitations.
-* `How to Submit A Bug Report <HowToSubmitABug.html>`_
+* :ref:`how-to-submit-a-bug-report`
Instructions for properly submitting information about any bugs you run into
in the LLVM system.
diff --git a/include/llvm/ADT/PackedVector.h b/include/llvm/ADT/PackedVector.h
index 2eaddc2b4e..1ae2a77e7e 100644
--- a/include/llvm/ADT/PackedVector.h
+++ b/include/llvm/ADT/PackedVector.h
@@ -19,32 +19,32 @@
namespace llvm {
-template <typename T, unsigned BitNum, bool isSigned>
+template <typename T, unsigned BitNum, typename BitVectorTy, bool isSigned>
class PackedVectorBase;
// This won't be necessary if we can specialize members without specializing
// the parent template.
-template <typename T, unsigned BitNum>
-class PackedVectorBase<T, BitNum, false> {
+template <typename T, unsigned BitNum, typename BitVectorTy>
+class PackedVectorBase<T, BitNum, BitVectorTy, false> {
protected:
- static T getValue(const llvm::BitVector &Bits, unsigned Idx) {
+ static T getValue(const BitVectorTy &Bits, unsigned Idx) {
T val = T();
for (unsigned i = 0; i != BitNum; ++i)
val = T(val | ((Bits[(Idx << (BitNum-1)) + i] ? 1UL : 0UL) << i));
return val;
}
- static void setValue(llvm::BitVector &Bits, unsigned Idx, T val) {
+ static void setValue(BitVectorTy &Bits, unsigned Idx, T val) {
assert((val >> BitNum) == 0 && "value is too big");
for (unsigned i = 0; i != BitNum; ++i)
Bits[(Idx << (BitNum-1)) + i] = val & (T(1) << i);
}
};
-template <typename T, unsigned BitNum>
-class PackedVectorBase<T, BitNum, true> {
+template <typename T, unsigned BitNum, typename BitVectorTy>
+class PackedVectorBase<T, BitNum, BitVectorTy, true> {
protected:
- static T getValue(const llvm::BitVector &Bits, unsigned Idx) {
+ static T getValue(const BitVectorTy &Bits, unsigned Idx) {
T val = T();
for (unsigned i = 0; i != BitNum-1; ++i)
val = T(val | ((Bits[(Idx << (BitNum-1)) + i] ? 1UL : 0UL) << i));
@@ -53,7 +53,7 @@ protected:
return val;
}
- static void setValue(llvm::BitVector &Bits, unsigned Idx, T val) {
+ static void setValue(BitVectorTy &Bits, unsigned Idx, T val) {
if (val < 0) {
val = ~val;
Bits.set((Idx << (BitNum-1)) + BitNum-1);
@@ -71,11 +71,12 @@ protected:
/// @endcode
/// will create a vector accepting values -2, -1, 0, 1. Any other value will hit
/// an assertion.
-template <typename T, unsigned BitNum>
-class PackedVector : public PackedVectorBase<T, BitNum,
+template <typename T, unsigned BitNum, typename BitVectorTy = BitVector>
+class PackedVector : public PackedVectorBase<T, BitNum, BitVectorTy,
std::numeric_limits<T>::is_signed> {
- llvm::BitVector Bits;
- typedef PackedVectorBase<T, BitNum, std::numeric_limits<T>::is_signed> base;
+ BitVectorTy Bits;
+ typedef PackedVectorBase<T, BitNum, BitVectorTy,
+ std::numeric_limits<T>::is_signed> base;
public:
class reference {
diff --git a/include/llvm/ADT/SparseBitVector.h b/include/llvm/ADT/SparseBitVector.h
index 791f1082c2..306e92832f 100644
--- a/include/llvm/ADT/SparseBitVector.h
+++ b/include/llvm/ADT/SparseBitVector.h
@@ -262,6 +262,22 @@ public:
}
};
+template <unsigned ElementSize>
+struct ilist_traits<SparseBitVectorElement<ElementSize> >
+ : public ilist_default_traits<SparseBitVectorElement<ElementSize> > {
+ typedef SparseBitVectorElement<ElementSize> Element;
+
+ Element *createSentinel() const { return static_cast<Element *>(&Sentinel); }
+ static void destroySentinel(Element *) {}
+
+ Element *provideInitialHead() const { return createSentinel(); }
+ Element *ensureHead(Element *) const { return createSentinel(); }
+ static void noteHead(Element *, Element *) {}
+
+private:
+ mutable ilist_half_node<Element> Sentinel;
+};
+
template <unsigned ElementSize = 128>
class SparseBitVector {
typedef ilist<SparseBitVectorElement<ElementSize> > ElementList;
diff --git a/include/llvm/Attributes.h b/include/llvm/Attributes.h
index 9dc2c1aa57..c9589603f9 100644
--- a/include/llvm/Attributes.h
+++ b/include/llvm/Attributes.h
@@ -22,6 +22,7 @@
namespace llvm {
+class LLVMContext;
class Type;
namespace Attribute {
@@ -96,16 +97,160 @@ DECLARE_LLVM_ATTRIBUTE(AddressSafety,1ULL<<32) ///< Address safety checking is o
#undef DECLARE_LLVM_ATTRIBUTE
+/// Note that uwtable is about the ABI or the user mandating an entry in the
+/// unwind table. The nounwind attribute is about an exception passing by the
+/// function.
+/// In a theoretical system that uses tables for profiling and sjlj for
+/// exceptions, they would be fully independent. In a normal system that
+/// uses tables for both, the semantics are:
+/// nil = Needs an entry because an exception might pass by.
+/// nounwind = No need for an entry
+/// uwtable = Needs an entry because the ABI says so and because
+/// an exception might pass by.
+/// uwtable + nounwind = Needs an entry because the ABI says so.
+
+/// @brief Attributes that only apply to function parameters.
+const AttrConst ParameterOnly = {ByVal_i | Nest_i |
+ StructRet_i | NoCapture_i};
+
+/// @brief Attributes that may be applied to the function itself. These cannot
+/// be used on return values or function parameters.
+const AttrConst FunctionOnly = {NoReturn_i | NoUnwind_i | ReadNone_i |
+ ReadOnly_i | NoInline_i | AlwaysInline_i | OptimizeForSize_i |
+ StackProtect_i | StackProtectReq_i | NoRedZone_i | NoImplicitFloat_i |
+ Naked_i | InlineHint_i | StackAlignment_i |
+ UWTable_i | NonLazyBind_i | ReturnsTwice_i | AddressSafety_i};
+
+/// @brief Parameter attributes that do not apply to vararg call arguments.
+const AttrConst VarArgsIncompatible = {StructRet_i};
+
+/// @brief Attributes that are mutually incompatible.
+const AttrConst MutuallyIncompatible[5] = {
+ {ByVal_i | Nest_i | StructRet_i},
+ {ByVal_i | Nest_i | InReg_i },
+ {ZExt_i | SExt_i},
+ {ReadNone_i | ReadOnly_i},
+ {NoInline_i | AlwaysInline_i}
+};
+
} // namespace Attribute
+/// AttributeImpl - The internal representation of the Attributes class. This is
+/// uniquified.
+class AttributesImpl;
+
/// Attributes - A bitset of attributes.
class Attributes {
// Currently, we need less than 64 bits.
uint64_t Bits;
+
+ explicit Attributes(AttributesImpl *A);
public:
- Attributes() : Bits(0) { }
- explicit Attributes(uint64_t Val) : Bits(Val) { }
- /*implicit*/ Attributes(Attribute::AttrConst Val) : Bits(Val.v) { }
+ Attributes() : Bits(0) {}
+ explicit Attributes(uint64_t Val) : Bits(Val) {}
+ /*implicit*/ Attributes(Attribute::AttrConst Val) : Bits(Val.v) {}
+
+ class Builder {
+ friend class Attributes;
+ uint64_t Bits;
+ public:
+ Builder() : Bits(0) {}
+ Builder(const Attributes &A) : Bits(A.Bits) {}
+
+ void addZExtAttr() {
+ Bits |= Attribute::ZExt_i;
+ }
+ void addSExtAttr() {
+ Bits |= Attribute::SExt_i;
+ }
+ void addNoReturnAttr() {
+ Bits |= Attribute::NoReturn_i;
+ }
+ void addInRegAttr() {
+ Bits |= Attribute::InReg_i;
+ }
+ void addStructRetAttr() {
+ Bits |= Attribute::StructRet_i;
+ }
+ void addNoUnwindAttr() {
+ Bits |= Attribute::NoUnwind_i;
+ }
+ void addNoAliasAttr() {
+ Bits |= Attribute::NoAlias_i;
+ }
+ void addByValAttr() {
+ Bits |= Attribute::ByVal_i;
+ }
+ void addNestAttr() {
+ Bits |= Attribute::Nest_i;
+ }
+ void addReadNoneAttr() {
+ Bits |= Attribute::ReadNone_i;
+ }
+ void addReadOnlyAttr() {
+ Bits |= Attribute::ReadOnly_i;
+ }
+ void addNoInlineAttr() {
+ Bits |= Attribute::NoInline_i;
+ }
+ void addAlwaysInlineAttr() {
+ Bits |= Attribute::AlwaysInline_i;
+ }
+ void addOptimizeForSizeAttr() {
+ Bits |= Attribute::OptimizeForSize_i;
+ }
+ void addStackProtectAttr() {
+ Bits |= Attribute::StackProtect_i;
+ }
+ void addStackProtectReqAttr() {
+ Bits |= Attribute::StackProtectReq_i;
+ }
+ void addNoCaptureAttr() {
+ Bits |= Attribute::NoCapture_i;
+ }
+ void addNoRedZoneAttr() {
+ Bits |= Attribute::NoRedZone_i;
+ }
+ void addNoImplicitFloatAttr() {
+ Bits |= Attribute::NoImplicitFloat_i;
+ }
+ void addNakedAttr() {
+ Bits |= Attribute::Naked_i;
+ }
+ void addInlineHintAttr() {
+ Bits |= Attribute::InlineHint_i;
+ }
+ void addReturnsTwiceAttr() {
+ Bits |= Attribute::ReturnsTwice_i;
+ }
+ void addUWTableAttr() {
+ Bits |= Attribute::UWTable_i;
+ }
+ void addNonLazyBindAttr() {
+ Bits |= Attribute::NonLazyBind_i;
+ }
+ void addAddressSafetyAttr() {
+ Bits |= Attribute::AddressSafety_i;
+ }
+ void addAlignmentAttr(unsigned Align) {
+ if (Align == 0) return;
+ assert(isPowerOf2_32(Align) && "Alignment must be a power of two.");
+ assert(Align <= 0x40000000 && "Alignment too large.");
+ Bits |= (Log2_32(Align) + 1) << 16;
+ }
+ void addStackAlignmentAttr(unsigned Align) {
+ // Default alignment, allow the target to define how to align it.
+ if (Align == 0) return;
+
+ assert(isPowerOf2_32(Align) && "Alignment must be a power of two.");
+ assert(Align <= 0x100 && "Alignment too large.");
+ Bits |= (Log2_32(Align) + 1) << 26;
+ }
+ };
+
+ /// get - Return a uniquified Attributes object. This takes the uniquified
+ /// value from the Builder and wraps it in the Attributes class.
+ static Attributes get(LLVMContext &Context, Builder &B);
// Attribute query methods.
// FIXME: StackAlignment & Alignment attributes have no predicate methods.
@@ -198,20 +343,12 @@ public:
return Bits & Attribute::AddressSafety_i;
}
- uint64_t getRawAlignment() const {
- return Bits & Attribute::Alignment_i;
- }
- uint64_t getRawStackAlignment() const {
- return Bits & Attribute::StackAlignment_i;
- }
-
/// This returns the alignment field of an attribute as a byte alignment
/// value.
unsigned getAlignment() const {
if (!hasAlignmentAttr())
return 0;
-
- return 1U << ((getRawAlignment() >> 16) - 1);
+ return 1U << (((Bits & Attribute::Alignment_i) >> 16) - 1);
}
/// This returns the stack alignment field of an attribute as a byte alignment
@@ -219,32 +356,7 @@ public:
unsigned getStackAlignment() const {
if (!hasStackAlignmentAttr())
return 0;
-
- return 1U << ((getRawStackAlignment() >> 26) - 1);
- }
-
- /// This turns an int alignment (a power of 2, normally) into the form used
- /// internally in Attributes.
- static Attributes constructAlignmentFromInt(unsigned i) {
- // Default alignment, allow the target to define how to align it.
- if (i == 0)
- return Attribute::None;
-
- assert(isPowerOf2_32(i) && "Alignment must be a power of two.");
- assert(i <= 0x40000000 && "Alignment too large.");
- return Attributes((Log2_32(i)+1) << 16);
- }
-
- /// This turns an int stack alignment (which must be a power of 2) into the
- /// form used internally in Attributes.
- static Attributes constructStackAlignmentFromInt(unsigned i) {
- // Default alignment, allow the target to define how to align it.
- if (i == 0)
- return Attribute::None;
-
- assert(isPowerOf2_32(i) && "Alignment must be a power of two.");
- assert(i <= 0x100 && "Alignment too large.");
- return Attributes((Log2_32(i)+1) << 26);
+ return 1U << (((Bits & Attribute::StackAlignment_i) >> 26) - 1);
}
// This is a "safe bool() operator".
@@ -276,107 +388,86 @@ public:
Attributes operator ~ () const { return Attributes(~Bits); }
uint64_t Raw() const { return Bits; }
- /// The set of Attributes set in Attributes is converted to a string of
- /// equivalent mnemonics. This is, presumably, for writing out the mnemonics
- /// for the assembly writer.
- /// @brief Convert attribute bits to text
- std::string getAsString() const;
-};
-
-namespace Attribute {
+ /// This turns an int alignment (a power of 2, normally) into the form used
+ /// internally in Attributes.
+ static Attributes constructAlignmentFromInt(unsigned i) {
+ // Default alignment, allow the target to define how to align it.
+ if (i == 0)
+ return Attribute::None;
-/// Note that uwtable is about the ABI or the user mandating an entry in the
-/// unwind table. The nounwind attribute is about an exception passing by the
-/// function.
-/// In a theoretical system that uses tables for profiling and sjlj for
-/// exceptions, they would be fully independent. In a normal system that
-/// uses tables for both, the semantics are:
-/// nil = Needs an entry because an exception might pass by.
-/// nounwind = No need for an entry
-/// uwtable = Needs an entry because the ABI says so and because
-/// an exception might pass by.
-/// uwtable + nounwind = Needs an entry because the ABI says so.
+ assert(isPowerOf2_32(i) && "Alignment must be a power of two.");
+ assert(i <= 0x40000000 && "Alignment too large.");
+ return Attributes((Log2_32(i)+1) << 16);
+ }
-/// @brief Attributes that only apply to function parameters.
-const AttrConst ParameterOnly = {ByVal_i | Nest_i |
- StructRet_i | NoCapture_i};
+ /// This turns an int stack alignment (which must be a power of 2) into the
+ /// form used internally in Attributes.
+ static Attributes constructStackAlignmentFromInt(unsigned i) {
+ // Default alignment, allow the target to define how to align it.
+ if (i == 0)
+ return Attribute::None;
-/// @brief Attributes that may be applied to the function itself. These cannot
-/// be used on return values or function parameters.
-const AttrConst FunctionOnly = {NoReturn_i | NoUnwind_i | ReadNone_i |
- ReadOnly_i | NoInline_i | AlwaysInline_i | OptimizeForSize_i |
- StackProtect_i | StackProtectReq_i | NoRedZone_i | NoImplicitFloat_i |
- Naked_i | InlineHint_i | StackAlignment_i |
- UWTable_i | NonLazyBind_i | ReturnsTwice_i | AddressSafety_i};
+ assert(isPowerOf2_32(i) && "Alignment must be a power of two.");
+ assert(i <= 0x100 && "Alignment too large.");
+ return Attributes((Log2_32(i)+1) << 26);
+ }
-/// @brief Parameter attributes that do not apply to vararg call arguments.
-const AttrConst VarArgsIncompatible = {StructRet_i};
+ /// @brief Which attributes cannot be applied to a type.
+ static Attributes typeIncompatible(Type *Ty);
+
+ /// This returns an integer containing an encoding of all the LLVM attributes
+ /// found in the given attribute bitset. Any change to this encoding is a
+ /// breaking change to bitcode compatibility.
+ static uint64_t encodeLLVMAttributesForBitcode(Attributes Attrs) {
+ // FIXME: It doesn't make sense to store the alignment information as an
+ // expanded out value, we should store it as a log2 value. However, we
+ // can't just change that here without breaking bitcode compatibility. If
+ // this ever becomes a problem in practice, we should introduce new tag
+ // numbers in the bitcode file and have those tags use a more efficiently
+ // encoded alignment field.
+
+ // Store the alignment in the bitcode as a 16-bit raw value instead of a
+ // 5-bit log2 encoded value. Shift the bits above the alignment up by 11
+ // bits.
+ uint64_t EncodedAttrs = Attrs.Raw() & 0xffff;
+ if (Attrs.hasAlignmentAttr())
+ EncodedAttrs |= (1ULL << 16) <<
+ (((Attrs.Bits & Attribute::Alignment_i) - 1) >> 16);
+ EncodedAttrs |= (Attrs.Raw() & (0xfffULL << 21)) << 11;
+ return EncodedAttrs;
+ }
+
+ /// This returns an attribute bitset containing the LLVM attributes that have
+ /// been decoded from the given integer. This function must stay in sync with
+ /// 'encodeLLVMAttributesForBitcode'.
+ static Attributes decodeLLVMAttributesForBitcode(uint64_t EncodedAttrs) {
+ // The alignment is stored as a 16-bit raw value from bits 31--16. We shift
+ // the bits above 31 down by 11 bits.
+ unsigned Alignment = (EncodedAttrs & (0xffffULL << 16)) >> 16;
+ assert((!Alignment || isPowerOf2_32(Alignment)) &&
+ "Alignment must be a power of two.");
+
+ Attributes Attrs(EncodedAttrs & 0xffff);
+ if (Alignment)
+ Attrs |= Attributes::constructAlignmentFromInt(Alignment);
+ Attrs |= Attributes((EncodedAttrs & (0xfffULL << 32)) >> 11);
+ return Attrs;
+ }
-/// @brief Attributes that are mutually incompatible.
-const AttrConst MutuallyIncompatible[5] = {
- {ByVal_i | Nest_i | StructRet_i},
- {ByVal_i | Nest_i | InReg_i },
- {ZExt_i | SExt_i},
- {ReadNone_i | ReadOnly_i},
- {NoInline_i | AlwaysInline_i}
+ /// The set of Attributes set in Attributes is converted to a string of
+ /// equivalent mnemonics. This is, presumably, for writing out the mnemonics
+ /// for the assembly writer.
+ /// @brief Convert attribute bits to text
+ std::string getAsString() const;
};
-/// @brief Which attributes cannot be applied to a type.
-Attributes typeIncompatible(Type *Ty);
-
-/// This returns an integer containing an encoding of all the
-/// LLVM attributes found in the given attribute bitset. Any
-/// change to this encoding is a breaking change to bitcode
-/// compatibility.
-inline uint64_t encodeLLVMAttributesForBitcode(Attributes Attrs) {
- // FIXME: It doesn't make sense to store the alignment information as an
- // expanded out value, we should store it as a log2 value. However, we can't
- // just change that here without breaking bitcode compatibility. If this ever
- // becomes a problem in practice, we should introduce new tag numbers in the
- // bitcode file and have those tags use a more efficiently encoded alignment
- // field.
-
- // Store the alignment in the bitcode as a 16-bit raw value instead of a
- // 5-bit log2 encoded value. Shift the bits above the alignment up by
- // 11 bits.
-
- uint64_t EncodedAttrs = Attrs.Raw() & 0xffff;
- if (Attrs.hasAlignmentAttr())
- EncodedAttrs |= (1ull << 16) <<
- ((Attrs.getRawAlignment() - 1) >> 16);
- EncodedAttrs |= (Attrs.Raw() & (0xfffull << 21)) << 11;
-
- return EncodedAttrs;
-}
-
-/// This returns an attribute bitset containing the LLVM attributes
-/// that have been decoded from the given integer. This function
-/// must stay in sync with 'encodeLLVMAttributesForBitcode'.
-inline Attributes decodeLLVMAttributesForBitcode(uint64_t EncodedAttrs) {
- // The alignment is stored as a 16-bit raw value from bits 31--16.
- // We shift the bits above 31 down by 11 bits.
-
- unsigned Alignment = (EncodedAttrs & (0xffffull << 16)) >> 16;
- assert((!Alignment || isPowerOf2_32(Alignment)) &&
- "Alignment must be a power of two.");
-
- Attributes Attrs(EncodedAttrs & 0xffff);
- if (Alignment)
- Attrs |= Attributes::constructAlignmentFromInt(Alignment);
- Attrs |= Attributes((EncodedAttrs & (0xfffull << 32)) >> 11);
-
- return Attrs;
-}
-
-} // end namespace Attribute
-
/// This is just a pair of values to associate a set of attributes
/// with an index.
struct AttributeWithIndex {
- Attributes Attrs; ///< The attributes that are set, or'd together.
- unsigned Index; ///< Index of the parameter for which the attributes apply.
- ///< Index 0 is used for return value attributes.
- ///< Index ~0U is used for function attributes.
+ Attributes Attrs; ///< The attributes that are set, or'd together.
+ unsigned Index; ///< Index of the parameter for which the attributes apply.
+ ///< Index 0 is used for return value attributes.
+ ///< Index ~0U is used for function attributes.
static AttributeWithIndex get(unsigned Idx, Attributes Attrs) {
AttributeWithIndex P;
diff --git a/include/llvm/CodeGen/MachineModuleInfoImpls.h b/include/llvm/CodeGen/MachineModuleInfoImpls.h
index 9401ffd199..7afc7eb6b3 100644
--- a/include/llvm/CodeGen/MachineModuleInfoImpls.h
+++ b/include/llvm/CodeGen/MachineModuleInfoImpls.h
@@ -38,7 +38,7 @@ namespace llvm {
/// this GV is external.
DenseMap<MCSymbol*, StubValueTy> HiddenGVStubs;
- virtual void Anchor(); // Out of line virtual method.
+ virtual void anchor(); // Out of line virtual method.
public:
MachineModuleInfoMachO(const MachineModuleInfo &) {}
@@ -76,7 +76,7 @@ namespace llvm {
/// mode.
DenseMap<MCSymbol*, StubValueTy> GVStubs;
- virtual void Anchor(); // Out of line virtual method.
+ virtual void anchor(); // Out of line virtual method.
public:
MachineModuleInfoELF(const MachineModuleInfo &) {}
diff --git a/include/llvm/CodeGen/ValueTypes.h b/include/llvm/CodeGen/ValueTypes.h
index 2d92d025b4..240199291a 100644
--- a/include/llvm/CodeGen/ValueTypes.h
+++ b/include/llvm/CodeGen/ValueTypes.h
@@ -181,6 +181,18 @@ namespace llvm {
SimpleTy <= MVT::LAST_VECTOR_VALUETYPE);
}
+ /// is16BitVector - Return true if this is a 16-bit vector type.
+ bool is16BitVector() const {
+ return (SimpleTy == MVT::v2i8 || SimpleTy == MVT::v1i16 ||
+ SimpleTy == MVT::v16i1);
+ }
+
+ /// is32BitVector - Return true if this is a 32-bit vector type.
+ bool is32BitVector() const {
+ return (SimpleTy == MVT::v4i8 || SimpleTy == MVT::v2i16 ||
+ SimpleTy == MVT::v1i32);
+ }
+
/// is64BitVector - Return true if this is a 64-bit vector type.
bool is64BitVector() const {
return (SimpleTy == MVT::v8i8 || SimpleTy == MVT::v4i16 ||
@@ -563,19 +575,12 @@ namespace llvm {
/// is16BitVector - Return true if this is a 16-bit vector type.
bool is16BitVector() const {
- if (!isSimple())
- return isExtended16BitVector();
-
- return (V == MVT::v2i8 || V==MVT::v1i16 || V == MVT::v16i1);
+ return isSimple() ? V.is16BitVector() : isExtended16BitVector();
}
/// is32BitVector - Return true if this is a 32-bit vector type.
bool is32BitVector() const {
- if (!isSimple())
- return isExtended32BitVector();
-
- return (V == MVT::v4i8 || V==MVT::v2i16
- || V==MVT::v1i32);
+ return isSimple() ? V.is32BitVector() : isExtended32BitVector();
}
/// is64BitVector - Return true if this is a 64-bit vector type.
diff --git a/include/llvm/Config/config.h.cmake b/include/llvm/Config/config.h.cmake
index eb20b6470b..ac760f911a 100644
--- a/include/llvm/Config/config.h.cmake
+++ b/include/llvm/Config/config.h.cmake
@@ -17,6 +17,9 @@
/* Default <path> to all compiler invocations for --sysroot=<path>. */
#undef DEFAULT_SYSROOT
+/* Define if you want backtraces on crash */
+#cmakedefine ENABLE_BACKTRACES
+
/* Define if position independent code is enabled */
#cmakedefine ENABLE_PIC
diff --git a/include/llvm/Function.h b/include/llvm/Function.h
index fbd2594a45..fa6d0d3f5b 100644
--- a/include/llvm/Function.h
+++ b/include/llvm/Function.h
@@ -168,10 +168,10 @@ public:
///
void setAttributes(const AttrListPtr &attrs) { AttributeList = attrs; }
- /// hasFnAttr - Return true if this function has the given attribute.
- bool hasFnAttr(Attributes N) const {
- // Function Attributes are stored at ~0 index
- return AttributeList.paramHasAttr(~0U, N);
+ /// getFnAttributes - Return the function attributes for querying.
+ ///
+ Attributes getFnAttributes() const {
+ return AttributeList.getFnAttributes();
}
/// addFnAttr - Add function attributes to this function.
@@ -195,6 +195,11 @@ public:
void setGC(const char *Str);
void clearGC();
+ /// getParamAttributes - Return the parameter attributes for querying.
+ Attributes getParamAttributes(unsigned Idx) const {
+ return AttributeList.getParamAttributes(Idx);
+ }
+
/// @brief Determine whether the function has the given attribute.
bool paramHasAttr(unsigned i, Attributes attr) const {
return AttributeList.paramHasAttr(i, attr);
@@ -213,7 +218,7 @@ public:
/// @brief Determine if the function does not access memory.
bool doesNotAccessMemory() const {
- return hasFnAttr(Attribute::ReadNone);
+ return getFnAttributes().hasReadNoneAttr();
}
void setDoesNotAccessMemory(bool DoesNotAccessMemory = true) {
if (DoesNotAccessMemory) addFnAttr(Attribute::ReadNone);
@@ -222,7 +227,7 @@ public:
/// @brief Determine if the function does not access or only reads memory.
bool onlyReadsMemory() const {
- return doesNotAccessMemory() || hasFnAttr(Attribute::ReadOnly);
+ return doesNotAccessMemory() || getFnAttributes().hasReadOnlyAttr();
}
void setOnlyReadsMemory(bool OnlyReadsMemory = true) {
if (OnlyReadsMemory) addFnAttr(Attribute::ReadOnly);
@@ -231,7 +236,7 @@ public:
/// @brief Determine if the function cannot return.
bool doesNotReturn() const {
- return hasFnAttr(Attribute::NoReturn);
+ return getFnAttributes().hasNoReturnAttr();
}
void setDoesNotReturn(bool DoesNotReturn = true) {
if (DoesNotReturn) addFnAttr(Attribute::NoReturn);
@@ -240,7 +245,7 @@ public:
/// @brief Determine if the function cannot unwind.
bool doesNotThrow() const {
- return hasFnAttr(Attribute::NoUnwind);
+ return getFnAttributes().hasNoUnwindAttr();
}
void setDoesNotThrow(bool DoesNotThrow = true) {
if (DoesNotThrow) addFnAttr(Attribute::NoUnwind);
@@ -250,7 +255,7 @@ public:
/// @brief True if the ABI mandates (or the user requested) that this
/// function be in a unwind table.
bool hasUWTable() const {
- return hasFnAttr(Attribute::UWTable);
+ return getFnAttributes().hasUWTableAttr();
}
void setHasUWTable(bool HasUWTable = true) {
if (HasUWTable)
@@ -267,13 +272,14 @@ public:
/// @brief Determine if the function returns a structure through first
/// pointer argument.
bool hasStructRetAttr() const {
- return paramHasAttr(1, Attribute::StructRet);
+ return getParamAttributes(1).hasStructRetAttr();
}
/// @brief Determine if the parameter does not alias other parameters.
/// @param n The parameter to check. 1 is the first parameter, 0 is the return
bool doesNotAlias(unsigned n) const {
- return paramHasAttr(n, Attribute::NoAlias);
+ return n != 0 ? getParamAttributes(n).hasNoAliasAttr() :
+ AttributeList.getRetAttributes().hasNoAliasAttr();
}
void setDoesNotAlias(unsigned n, bool DoesNotAlias = true) {
if (DoesNotAlias) addAttribute(n, Attribute::NoAlias);
@@ -283,7 +289,7 @@ public:
/// @brief Determine if the parameter can be captured.
/// @param n The parameter to check. 1 is the first parameter, 0 is the return
bool doesNotCapture(unsigned n) const {
- return paramHasAttr(n, Attribute::NoCapture);
+ return getParamAttributes(n).hasNoCaptureAttr();
}
void setDoesNotCapture(unsigned n, bool DoesNotCapture = true) {
if (DoesNotCapture) addAttribute(n, Attribute::NoCapture);
diff --git a/include/llvm/IRBuilder.h b/include/llvm/IRBuilder.h
index ae82c25e3d..46720983e4 100644
--- a/include/llvm/IRBuilder.h
+++ b/include/llvm/IRBuilder.h
@@ -285,12 +285,15 @@ public:
/// If the pointers aren't i8*, they will be converted. If a TBAA tag is
/// specified, it will be added to the instruction.
CallInst *CreateMemCpy(Value *Dst, Value *Src, uint64_t Size, unsigned Align,
- bool isVolatile = false, MDNode *TBAATag = 0) {
- return CreateMemCpy(Dst, Src, getInt64(Size), Align, isVolatile, TBAATag);
+ bool isVolatile = false, MDNode *TBAATag = 0,
+ MDNode *TBAAStructTag = 0) {
+ return CreateMemCpy(Dst, Src, getInt64(Size), Align, isVolatile, TBAATag,
+ TBAAStructTag);
}
CallInst *CreateMemCpy(Value *Dst, Value *Src, Value *Size, unsigned Align,
- bool isVolatile = false, MDNode *TBAATag = 0);
+ bool isVolatile = false, MDNode *TBAATag = 0,
+ MDNode *TBAAStructTag = 0);
/// CreateMemMove - Create and insert a memmove between the specified
/// pointers. If the pointers aren't i8*, they will be converted. If a TBAA
@@ -810,6 +813,31 @@ public:
StoreInst *CreateStore(Value *Val, Value *Ptr, bool isVolatile = false) {
return Insert(new StoreInst(Val, Ptr, isVolatile));
}
+ // Provided to resolve 'CreateAlignedLoad(Ptr, Align, "...")' correctly,
+ // instead of converting the string to 'bool' for the isVolatile parameter.
+ LoadInst *CreateAlignedLoad(Value *Ptr, unsigned Align, const char *Name) {
+ LoadInst *LI = CreateLoad(Ptr, Name);
+ LI->setAlignment(Align);
+ return LI;
+ }
+ LoadInst *CreateAlignedLoad(Value *Ptr, unsigned Align,
+ const Twine &Name = "") {
+ LoadInst *LI = CreateLoad(Ptr, Name);
+ LI->setAlignment(Align);
+ return LI;
+ }
+ LoadInst *CreateAlignedLoad(Value *Ptr, unsigned Align, bool isVolatile,
+ const Twine &Name = "") {
+ LoadInst *LI = CreateLoad(Ptr, isVolatile, Name);
+ LI->setAlignment(Align);
+ return LI;
+ }
+ StoreInst *CreateAlignedStore(Value *Val, Value *Ptr, unsigned Align,
+ bool isVolatile = false) {
+ StoreInst *SI = CreateStore(Val, Ptr, isVolatile);
+ SI->setAlignment(Align);
+ return SI;
+ }
FenceInst *CreateFence(AtomicOrdering Ordering,
SynchronizationScope SynchScope = CrossThread) {
return Insert(new FenceInst(Context, Ordering, SynchScope));
@@ -970,6 +998,30 @@ public:
Value *CreateSExt(Value *V, Type *DestTy, const Twine &Name = "") {
return CreateCast(Instruction::SExt, V, DestTy, Name);
}
+ /// CreateZExtOrTrunc - Create a ZExt or Trunc from the integer value V to
+ /// DestTy. Return the value untouched if the type of V is already DestTy.
+ Value *CreateZExtOrTrunc(Value *V, IntegerType *DestTy,
+ const Twine &Name = "") {
+ assert(isa<IntegerType>(V->getType()) && "Can only zero extend integers!");
+ IntegerType *IntTy = cast<IntegerType>(V->getType());
+ if (IntTy->getBitWidth() < DestTy->getBitWidth())
+ return CreateZExt(V, DestTy, Name);
+ if (IntTy->getBitWidth() > DestTy->getBitWidth())
+ return CreateTrunc(V, DestTy, Name);
+ return V;
+ }
+ /// CreateSExtOrTrunc - Create a SExt or Trunc from the integer value V to
+ /// DestTy. Return the value untouched if the type of V is already DestTy.
+ Value *CreateSExtOrTrunc(Value *V, IntegerType *DestTy,
+ const Twine &Name = "") {
+ assert(isa<IntegerType>(V->getType()) && "Can only sign extend integers!");
+ IntegerType *IntTy = cast<IntegerType>(V->getType());
+ if (IntTy->getBitWidth() < DestTy->getBitWidth())
+ return CreateSExt(V, DestTy, Name);
+ if (IntTy->getBitWidth() > DestTy->getBitWidth())
+ return CreateTrunc(V, DestTy, Name);
+ return V;
+ }
Value *CreateFPToUI(Value *V, Type *DestTy, const Twine &Name = ""){
return CreateCast(Instruction::FPToUI, V, DestTy, Name);
}
diff --git a/include/llvm/MC/MCExpr.h b/include/llvm/MC/MCExpr.h
index f36db3c05a..5032887248 100644
--- a/include/llvm/MC/MCExpr.h
+++ b/include/llvm/MC/MCExpr.h
@@ -446,7 +446,7 @@ public:
/// NOTE: All subclasses are required to have trivial destructors because
/// MCExprs are bump pointer allocated and not destructed.
class MCTargetExpr : public MCExpr {
- virtual void Anchor();
+ virtual void anchor();
protected:
MCTargetExpr() : MCExpr(Target) {}
virtual ~MCTargetExpr() {}
diff --git a/include/llvm/MC/MCInstrDesc.h b/include/llvm/MC/MCInstrDesc.h
index dbf16d8700..02383f8bc6 100644
--- a/include/llvm/MC/MCInstrDesc.h
+++ b/include/llvm/MC/MCInstrDesc.h
@@ -1,4 +1,4 @@
-//===-- llvm/Mc/McInstrDesc.h - Instruction Descriptors -*- C++ -*-===//
+//===-- llvm/MC/MCInstrDesc.h - Instruction Descriptors -*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/include/llvm/MC/MCTargetAsmParser.h b/include/llvm/MC/MCTargetAsmParser.h
index a771ed7a9d..2b5a672d6d 100644
--- a/include/llvm/MC/MCTargetAsmParser.h
+++ b/include/llvm/MC/MCTargetAsmParser.h
@@ -115,7 +115,7 @@ public:
return Match_Success;
}
- virtual unsigned getMCInstOperandNum(unsigned Kind, MCInst &Inst,
+ virtual unsigned getMCInstOperandNum(unsigned Kind,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
unsigned OperandNum,
unsigned &NumMCOperands) = 0;
diff --git a/include/llvm/Object/MachOFormat.h b/include/llvm/Object/MachOFormat.h
index e4bfcc67fe..c0f700d3c8 100644
--- a/include/llvm/Object/MachOFormat.h
+++ b/include/llvm/Object/MachOFormat.h
@@ -61,7 +61,10 @@ namespace mach {
CSARM_V6 = 6,
CSARM_V5TEJ = 7,
CSARM_XSCALE = 8,
- CSARM_V7 = 9
+ CSARM_V7 = 9,
+ CSARM_V7F = 10,
+ CSARM_V7S = 11,
+ CSARM_V7K = 12
};
/// \brief PowerPC Machine Subtypes.
diff --git a/include/llvm/Operator.h b/include/llvm/Operator.h
index cf6d8e2c37..459df2b289 100644
--- a/include/llvm/Operator.h
+++ b/include/llvm/Operator.h
@@ -35,7 +35,9 @@ private:
void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
void *operator new(size_t s) LLVM_DELETED_FUNCTION;
Operator() LLVM_DELETED_FUNCTION;
- ~Operator() LLVM_DELETED_FUNCTION;
+ // NOTE: cannot use LLVM_DELETED_FUNCTION because gcc errors when deleting
+ // an override of a non-deleted function.
+ ~Operator();
public:
/// getOpcode - Return the opcode for this Instruction or ConstantExpr.
@@ -77,7 +79,7 @@ public:
};
private:
- ~OverflowingBinaryOperator() LLVM_DELETED_FUNCTION;
+ ~OverflowingBinaryOperator(); // DO NOT IMPLEMENT
friend class BinaryOperator;
friend class ConstantExpr;
@@ -131,7 +133,7 @@ public:
};
private:
- ~PossiblyExactOperator() LLVM_DELETED_FUNCTION;
+ ~PossiblyExactOperator(); // DO NOT IMPLEMENT
friend class BinaryOperator;
friend class ConstantExpr;
@@ -168,7 +170,7 @@ public:
/// information about relaxed accuracy requirements attached to them.
class FPMathOperator : public Operator {
private:
- ~FPMathOperator() LLVM_DELETED_FUNCTION;
+ ~FPMathOperator(); // DO NOT IMPLEMENT
public:
diff --git a/include/llvm/Support/TargetFolder.h b/include/llvm/Support/TargetFolder.h
index c65faa6621..a02db2fe66 100644
--- a/include/llvm/Support/TargetFolder.h
+++ b/include/llvm/Support/TargetFolder.h
@@ -177,7 +177,14 @@ public:
return Fold(ConstantExpr::getIntegerCast(C, DestTy, isSigned));
}
Constant *CreatePointerCast(Constant *C, Type *DestTy) const {
- return ConstantExpr::getPointerCast(C, DestTy);
+ if (C->getType() == DestTy)
+ return C; // avoid calling Fold
+ return Fold(ConstantExpr::getPointerCast(C, DestTy));
+ }
+ Constant *CreateFPCast(Constant *C, Type *DestTy) const {
+ if (C->getType() == DestTy)
+ return C; // avoid calling Fold
+ return Fold(ConstantExpr::getFPCast(C, DestTy));
}
Constant *CreateBitCast(Constant *C, Type *DestTy) const {
return CreateCast(Instruction::BitCast, C, DestTy);
diff --git a/include/llvm/Target/TargetData.h b/include/llvm/Target/TargetData.h
index 4f94ab751c..c97af7db68 100644
--- a/include/llvm/Target/TargetData.h
+++ b/include/llvm/Target/TargetData.h
@@ -53,10 +53,10 @@ enum AlignTypeEnum {
/// @note The unusual order of elements in the structure attempts to reduce
/// padding and make the structure slightly more cache friendly.
struct TargetAlignElem {
- AlignTypeEnum AlignType : 8; ///< Alignment type (AlignTypeEnum)
- unsigned ABIAlign; ///< ABI alignment for this type/bitw
- unsigned PrefAlign; ///< Pref. alignment for this type/bitw
- uint32_t TypeBitWidth; ///< Type bit width
+ uint32_t AlignType : 8; ///< Alignment type (AlignTypeEnum)
+ uint32_t TypeBitWidth : 24; ///< Type bit width
+ uint32_t ABIAlign : 16; ///< ABI alignment for this type/bitw
+ uint32_t PrefAlign : 16; ///< Pref. alignment for this type/bitw
/// Initializer
static TargetAlignElem get(AlignTypeEnum align_type, unsigned abi_align,
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index c1043aad37..a78ef58b88 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -724,6 +724,12 @@ public:
return SupportJumpTables;
}
+ /// getMinimumJumpTableEntries - return integer threshold on number of
+ /// blocks to use jump tables rather than if sequence.
+ int getMinimumJumpTableEntries() const {
+ return MinimumJumpTableEntries;
+ }
+
/// getStackPointerRegisterToSaveRestore - If a physical register, this
/// specifies the register that llvm.savestack/llvm.restorestack should save
/// and restore.
@@ -1044,6 +1050,12 @@ protected:
SupportJumpTables = Val;
}
+ /// setMinimumJumpTableEntries - Indicate the number of blocks to generate
+ /// jump tables rather than if sequence.
+ void setMinimumJumpTableEntries(int Val) {
+ MinimumJumpTableEntries = Val;
+ }
+
/// setStackPointerRegisterToSaveRestore - If set to a physical register, this
/// specifies the register that llvm.savestack/llvm.restorestack should save
/// and restore.
@@ -1838,6 +1850,9 @@ private:
/// If it's not true, then each jumptable must be lowered into if-then-else's.
bool SupportJumpTables;
+ /// MinimumJumpTableEntries - Number of blocks threshold to use jump tables.
+ int MinimumJumpTableEntries;
+
/// BooleanContents - Information about the contents of the high-bits in
/// boolean values held in a type wider than i1. See getBooleanContents.
BooleanContent BooleanContents;
diff --git a/include/llvm/Transforms/Utils/IntegerDivision.h b/include/llvm/Transforms/Utils/IntegerDivision.h
index 8d3f53e6f9..cecc8075de 100644
--- a/include/llvm/Transforms/Utils/IntegerDivision.h
+++ b/include/llvm/Transforms/Utils/IntegerDivision.h
@@ -23,6 +23,16 @@ namespace llvm {
namespace llvm {
+ /// Generate code to calculate the remainder of two integers, replacing Rem
+ /// with the generated code. This currently generates code using the udiv
+ /// expansion, but future work includes generating more specialized code,
+ /// e.g. when more information about the operands are known. Currently only
+ /// implements 32bit scalar division (due to udiv's limitation), but future
+ /// work is removing this limitation.
+ ///
+ /// @brief Replace Rem with generated code.
+ bool expandRemainder(BinaryOperator *Rem);
+
/// Generate code to divide two integers, replacing Div with the generated
/// code. This currently generates code similarly to compiler-rt's
/// implementations, but future work includes generating more specialized code
diff --git a/include/llvm/Transforms/Utils/ValueMapper.h b/include/llvm/Transforms/Utils/ValueMapper.h
index 8594707a84..5390c5e8ed 100644
--- a/include/llvm/Transforms/Utils/ValueMapper.h
+++ b/include/llvm/Transforms/Utils/ValueMapper.h
@@ -25,7 +25,7 @@ namespace llvm {
/// ValueMapTypeRemapper - This is a class that can be implemented by clients
/// to remap types when cloning constants and instructions.
class ValueMapTypeRemapper {
- virtual void Anchor(); // Out of line method.
+ virtual void anchor(); // Out of line method.
public:
virtual ~ValueMapTypeRemapper() {}
diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp
index acda34ba14..9a1ca63c1c 100644
--- a/lib/Analysis/CodeMetrics.cpp
+++ b/lib/Analysis/CodeMetrics.cpp
@@ -196,7 +196,7 @@ void CodeMetrics::analyzeFunction(Function *F, const TargetData *TD) {
// as volatile if they are live across a setjmp call, and they probably
// won't do this in callers.
exposesReturnsTwice = F->callsFunctionThatReturnsTwice() &&
- !F->hasFnAttr(Attribute::ReturnsTwice);
+ !F->getFnAttributes().hasReturnsTwiceAttr();
// Look at the size of the callee.
for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/IPA/CallGraph.cpp
index 17631ddb30..dec0eced27 100644
--- a/lib/Analysis/IPA/CallGraph.cpp
+++ b/lib/Analysis/IPA/CallGraph.cpp
@@ -141,12 +141,13 @@ private:
for (BasicBlock::iterator II = BB->begin(), IE = BB->end();
II != IE; ++II) {
CallSite CS(cast<Value>(II));
- if (CS && !isa<IntrinsicInst>(II)) {
+ if (CS) {
const Function *Callee = CS.getCalledFunction();
- if (Callee)
- Node->addCalledFunction(CS, getOrInsertFunction(Callee));
- else
+ if (!Callee)
+ // Indirect calls of intrinsics are not allowed so no need to check.
Node->addCalledFunction(CS, CallsExternalNode);
+ else if (!Callee->isIntrinsic())
+ Node->addCalledFunction(CS, getOrInsertFunction(Callee));
}
}
}
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index 1a94665096..7ecc06bbb2 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -128,7 +128,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
public:
CallAnalyzer(const TargetData *TD, Function &Callee, int Threshold)
: TD(TD), F(Callee), Threshold(Threshold), Cost(0),
- AlwaysInline(F.hasFnAttr(Attribute::AlwaysInline)),
+ AlwaysInline(F.getFnAttributes().hasAlwaysInlineAttr()),
IsCallerRecursive(false), IsRecursiveCall(false),
ExposesReturnsTwice(false), HasDynamicAlloca(false), AllocatedSize(0),
NumInstructions(0), NumVectorInstructions(0),
@@ -613,7 +613,7 @@ bool CallAnalyzer::visitStore(StoreInst &I) {
bool CallAnalyzer::visitCallSite(CallSite CS) {
if (CS.isCall() && cast<CallInst>(CS.getInstruction())->canReturnTwice() &&
- !F.hasFnAttr(Attribute::ReturnsTwice)) {
+ !F.getFnAttributes().hasReturnsTwiceAttr()) {
// This aborts the entire analysis.
ExposesReturnsTwice = true;
return false;
@@ -1043,7 +1043,7 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, Function *Callee,
// something else. Don't inline functions marked noinline or call sites
// marked noinline.
if (!Callee || Callee->mayBeOverridden() ||
- Callee->hasFnAttr(Attribute::NoInline) || CS.isNoInline())
+ Callee->getFnAttributes().hasNoInlineAttr() || CS.isNoInline())
return llvm::InlineCost::getNever();
DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp
index 83bdf5286a..7bd945733b 100644
--- a/lib/Analysis/Lint.cpp
+++ b/lib/Analysis/Lint.cpp
@@ -411,14 +411,50 @@ void Lint::visitMemoryReference(Instruction &I,
"Undefined behavior: Branch to non-blockaddress", &I);
}
+ // Check for buffer overflows and misalignment.
if (TD) {
- if (Align == 0 && Ty) Align = TD->getABITypeAlignment(Ty);
+ // Only handles memory references that read/write something simple like an
+ // alloca instruction or a global variable.
+ int64_t Offset = 0;
+ if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, *TD)) {
+ // OK, so the access is to a constant offset from Ptr. Check that Ptr is
+ // something we can handle and if so extract the size of this base object
+ // along with its alignment.
+ uint64_t BaseSize = AliasAnalysis::UnknownSize;
+ unsigned BaseAlign = 0;
+
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(Base)) {
+ Type *ATy = AI->getAllocatedType();
+ if (!AI->isArrayAllocation() && ATy->isSized())
+ BaseSize = TD->getTypeAllocSize(ATy);
+ BaseAlign = AI->getAlignment();
+ if (BaseAlign == 0 && ATy->isSized())
+ BaseAlign = TD->getABITypeAlignment(ATy);
+ } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) {
+ // If the global may be defined differently in another compilation unit
+ // then don't warn about funky memory accesses.
+ if (GV->hasDefinitiveInitializer()) {
+ Type *GTy = GV->getType()->getElementType();
+ if (GTy->isSized())
+ BaseSize = TD->getTypeAllocSize(GTy);
+ BaseAlign = GV->getAlignment();
+ if (BaseAlign == 0 && GTy->isSized())
+ BaseAlign = TD->getABITypeAlignment(GTy);
+ }
+ }
- if (Align != 0) {
- unsigned BitWidth = TD->getTypeSizeInBits(Ptr->getType());
- APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- ComputeMaskedBits(Ptr, KnownZero, KnownOne, TD);
- Assert1(!(KnownOne & APInt::getLowBitsSet(BitWidth, Log2_32(Align))),
+ // Accesses from before the start or after the end of the object are not
+ // defined.
+ Assert1(Size == AliasAnalysis::UnknownSize ||
+ BaseSize == AliasAnalysis::UnknownSize ||
+ (Offset >= 0 && Offset + Size <= BaseSize),
+ "Undefined behavior: Buffer overflow", &I);
+
+ // Accesses that say that the memory is more aligned than it is are not
+ // defined.
+ if (Align == 0 && Ty && Ty->isSized())
+ Align = TD->getABITypeAlignment(Ty);
+ Assert1(!BaseAlign || Align <= MinAlign(BaseAlign, Offset),
"Undefined behavior: Memory reference address is misaligned", &I);
}
}
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index 5736c3569d..9ce9f8c801 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -327,7 +327,7 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs,
return 0;
if (LIOffs+NewLoadByteSize > MemLocEnd &&
- LI->getParent()->getParent()->hasFnAttr(Attribute::AddressSafety)) {
+ LI->getParent()->getParent()->getFnAttributes().hasAddressSafetyAttr()){
// We will be reading past the location accessed by the original program.
// While this is safe in a regular build, Address Safety analysis tools
// may start reporting false warnings. So, don't do widening.
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index eedec8383a..66a8e17e11 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -919,23 +919,13 @@ bool LLParser::ParseOptionalAddrSpace(unsigned &AddrSpace) {
bool LLParser::ParseOptionalAttrs(Attributes &Attrs, unsigned AttrKind) {
Attrs = Attribute::None;
LocTy AttrLoc = Lex.getLoc();
+ bool HaveError = false;
while (1) {
- switch (Lex.getKind()) {
+ lltok::Kind Token = Lex.getKind();
+ switch (Token) {
default: // End of attributes.
- if (AttrKind != 2 && (Attrs & Attribute::FunctionOnly))
- return Error(AttrLoc, "invalid use of function-only attribute");
-
- // As a hack, we allow "align 2" on functions as a synonym for
- // "alignstack 2".
- if (AttrKind == 2 &&
- (Attrs & ~(Attribute::FunctionOnly | Attribute::Alignment)))
- return Error(AttrLoc, "invalid use of attribute on a function");
-
- if (AttrKind != 0 && (Attrs & Attribute::ParameterOnly))
- return Error(AttrLoc, "invalid use of parameter-only attribute");
-
- return false;
+ return HaveError;
case lltok::kw_zeroext: Attrs |= Attribute::ZExt; break;
case lltok::kw_signext: Attrs |= Attribute::SExt; break;
case lltok::kw_inreg: Attrs |= Attribute::InReg; break;
@@ -980,6 +970,51 @@ bool LLParser::ParseOptionalAttrs(Attributes &Attrs, unsigned AttrKind) {
}
}
+
+ // Perform some error checking.
+ switch (Token) {
+ default:
+ if (AttrKind == 2)
+ HaveError |= Error(AttrLoc, "invalid use of attribute on a function");
+ break;
+ case lltok::kw_align:
+ // As a hack, we allow "align 2" on functions as a synonym for
+ // "alignstack 2".
+ break;
+
+ // Parameter Only:
+ case lltok::kw_sret:
+ case lltok::kw_nocapture:
+ case lltok::kw_byval:
+ case lltok::kw_nest:
+ if (AttrKind != 0)
+ HaveError |= Error(AttrLoc, "invalid use of parameter-only attribute");
+ break;
+
+ // Function Only:
+ case lltok::kw_noreturn:
+ case lltok::kw_nounwind:
+ case lltok::kw_readnone:
+ case lltok::kw_readonly:
+ case lltok::kw_noinline:
+ case lltok::kw_alwaysinline:
+ case lltok::kw_optsize:
+ case lltok::kw_ssp:
+ case lltok::kw_sspreq:
+ case lltok::kw_noredzone:
+ case lltok::kw_noimplicitfloat:
+ case lltok::kw_naked:
+ case lltok::kw_inlinehint:
+ case lltok::kw_alignstack:
+ case lltok::kw_uwtable:
+ case lltok::kw_nonlazybind:
+ case lltok::kw_returns_twice:
+ case lltok::kw_address_safety:
+ if (AttrKind != 2)
+ HaveError |= Error(AttrLoc, "invalid use of function-only attribute");
+ break;
+ }
+
Lex.Lex();
}
}
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 4a11223711..c3bffc5d63 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -477,7 +477,7 @@ bool BitcodeReader::ParseAttributeBlock() {
for (unsigned i = 0, e = Record.size(); i != e; i += 2) {
Attributes ReconstitutedAttr =
- Attribute::decodeLLVMAttributesForBitcode(Record[i+1]);
+ Attributes::decodeLLVMAttributesForBitcode(Record[i+1]);
Record[i+1] = ReconstitutedAttr.Raw();
}
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index 94ebe190d4..b3f1bb13a9 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -177,7 +177,7 @@ static void WriteAttributeTable(const ValueEnumerator &VE,
for (unsigned i = 0, e = A.getNumSlots(); i != e; ++i) {
const AttributeWithIndex &PAWI = A.getSlot(i);
Record.push_back(PAWI.Index);
- Record.push_back(Attribute::encodeLLVMAttributesForBitcode(PAWI.Attrs));
+ Record.push_back(Attributes::encodeLLVMAttributesForBitcode(PAWI.Attrs));
}
Stream.EmitRecord(bitc::PARAMATTR_CODE_ENTRY, Record);
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 55aa4ee665..d506d7e507 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1140,6 +1140,11 @@ void AsmPrinter::EmitJumpTableInfo() {
EmitAlignment(Log2_32(MJTI->getEntryAlignment(*TM.getTargetData())));
+ // Jump tables in code sections are marked with a data_region directive
+ // where that's supported.
+ if (!JTInDiffSection)
+ OutStreamer.EmitDataRegion(MCDR_DataRegionJT32);
+
for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) {
const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
@@ -1180,6 +1185,8 @@ void AsmPrinter::EmitJumpTableInfo() {
for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii)
EmitJumpTableEntry(MJTI, JTBBs[ii], JTI);
}
+ if (!JTInDiffSection)
+ OutStreamer.EmitDataRegion(MCDR_DataRegionEnd);
}
/// EmitJumpTableEntry - Emit a jump table entry for the specified MBB to the
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index efe022b074..5494c0f784 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -590,7 +590,7 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1,
// instructions that would be deleted in the merge.
MachineFunction *MF = MBB1->getParent();
if (EffectiveTailLen >= 2 &&
- MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize) &&
+ MF->getFunction()->getFnAttributes().hasOptimizeForSizeAttr() &&
(I1 == MBB1->begin() || I2 == MBB2->begin()))
return true;
diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp
index 99233dfc2e..1009a1e29c 100644
--- a/lib/CodeGen/CodePlacementOpt.cpp
+++ b/lib/CodeGen/CodePlacementOpt.cpp
@@ -373,7 +373,7 @@ bool CodePlacementOpt::OptimizeIntraLoopEdges(MachineFunction &MF) {
///
bool CodePlacementOpt::AlignLoops(MachineFunction &MF) {
const Function *F = MF.getFunction();
- if (F->hasFnAttr(Attribute::OptimizeForSize))
+ if (F->getFnAttributes().hasOptimizeForSizeAttr())
return false;
unsigned Align = TLI->getPrefLoopAlignment();
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index f4ebcd6fa4..c3bf2d234c 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -427,7 +427,7 @@ void LiveInterval::join(LiveInterval &Other,
// If we have to apply a mapping to our base interval assignment, rewrite it
// now.
- if (MustMapCurValNos) {
+ if (MustMapCurValNos && !empty()) {
// Map the first live range.
iterator OutIt = begin();
diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp
index b4ce9aa8c1..82710414b3 100644
--- a/lib/CodeGen/LiveRangeEdit.cpp
+++ b/lib/CodeGen/LiveRangeEdit.cpp
@@ -87,7 +87,7 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
// We can't remat physreg uses, unless it is a constant.
if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
- if (MRI.isConstantPhysReg(MO.getReg(), VRM->getMachineFunction()))
+ if (MRI.isConstantPhysReg(MO.getReg(), *OrigMI->getParent()->getParent()))
continue;
return false;
}
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index 9a8cc48172..1f1ce671f5 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -1013,7 +1013,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
// exclusively on the loop info here so that we can align backedges in
// unnatural CFGs and backedges that were introduced purely because of the
// loop rotations done during this layout pass.
- if (F.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+ if (F.getFunction()->getFnAttributes().hasOptimizeForSizeAttr())
return;
unsigned Align = TLI->getPrefLoopAlignment();
if (!Align)
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 304e39e159..34fbfe20f4 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -59,13 +59,13 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
RegInfo = 0;
MFInfo = 0;
FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameLowering());
- if (Fn->hasFnAttr(Attribute::StackAlignment))
+ if (Fn->getFnAttributes().hasStackAlignmentAttr())
FrameInfo->ensureMaxAlignment(Fn->getAttributes().
getFnAttributes().getStackAlignment());
ConstantPool = new (Allocator) MachineConstantPool(TM.getTargetData());
Alignment = TM.getTargetLowering()->getMinFunctionAlignment();
// FIXME: Shouldn't use pref alignment if explicit alignment is set on Fn.
- if (!Fn->hasFnAttr(Attribute::OptimizeForSize))
+ if (!Fn->getFnAttributes().hasOptimizeForSizeAttr())
Alignment = std::max(Alignment,
TM.getTargetLowering()->getPrefFunctionAlignment());
FunctionNumber = FunctionNum;
diff --git a/lib/CodeGen/MachineModuleInfoImpls.cpp b/lib/CodeGen/MachineModuleInfoImpls.cpp
index 5ab56c09f5..a1c7e9f5fb 100644
--- a/lib/CodeGen/MachineModuleInfoImpls.cpp
+++ b/lib/CodeGen/MachineModuleInfoImpls.cpp
@@ -21,8 +21,8 @@ using namespace llvm;
//===----------------------------------------------------------------------===//
// Out of line virtual method.
-void MachineModuleInfoMachO::Anchor() {}
-void MachineModuleInfoELF::Anchor() {}
+void MachineModuleInfoMachO::anchor() {}
+void MachineModuleInfoELF::anchor() {}
static int SortSymbolPair(const void *LHS, const void *RHS) {
typedef std::pair<MCSymbol*, MachineModuleInfoImpl::StubValueTy> PairTy;
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index c791ffb28c..3a4125475e 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -96,7 +96,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
placeCSRSpillsAndRestores(Fn);
// Add the code to save and restore the callee saved registers
- if (!F->hasFnAttr(Attribute::Naked))
+ if (!F->getFnAttributes().hasNakedAttr())
insertCSRSpillsAndRestores(Fn);
// Allow the target machine to make final modifications to the function
@@ -111,7 +111,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
// called functions. Because of this, calculateCalleeSavedRegisters()
// must be called before this function in order to set the AdjustsStack
// and MaxCallFrameSize variables.
- if (!F->hasFnAttr(Attribute::Naked))
+ if (!F->getFnAttributes().hasNakedAttr())
insertPrologEpilogCode(Fn);
// Replace all MO_FrameIndex operands with physical register references
@@ -221,7 +221,7 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
return;
// In Naked functions we aren't going to save any registers.
- if (Fn.getFunction()->hasFnAttr(Attribute::Naked))
+ if (Fn.getFunction()->getFnAttributes().hasNakedAttr())
return;
std::vector<CalleeSavedInfo> CSI;
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index dd0f548867..f45072f1ac 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -70,7 +70,7 @@ VerifyCoalescing("verify-coalescing",
// Temporary option for testing new coalescer algo.
static cl::opt<bool>
-NewCoalescer("new-coalescer", cl::Hidden,
+NewCoalescer("new-coalescer", cl::Hidden, cl::init(true),
cl::desc("Use new coalescer algorithm"));
namespace {
@@ -1732,6 +1732,12 @@ void JoinVals::pruneValues(JoinVals &Other,
case CR_Replace:
// This value takes precedence over the value in Other.LI.
LIS->pruneValue(&Other.LI, Def, &EndPoints);
+ // Remove <def,read-undef> flags. This def is now a partial redef.
+ if (!Def.isBlock())
+ for (MIOperands MO(Indexes->getInstructionFromIndex(Def));
+ MO.isValid(); ++MO)
+ if (MO->isReg() && MO->isDef() && MO->getReg() == LI.reg)
+ MO->setIsUndef(false);
DEBUG(dbgs() << "\t\tpruned " << PrintReg(Other.LI.reg) << " at " << Def
<< ": " << Other.LI << '\n');
break;
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 0107ded953..d115991858 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2999,7 +2999,7 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT));
if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
return DAG.getNode(ISD::ROTL, N->getDebugLoc(), VT, BSwap, ShAmt);
- else if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
+ if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, BSwap, ShAmt);
return DAG.getNode(ISD::OR, N->getDebugLoc(), VT,
DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, BSwap, ShAmt),
@@ -3217,11 +3217,8 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) {
if ((LShVal + RShVal) != OpSizeInBits)
return 0;
- SDValue Rot;
- if (HasROTL)
- Rot = DAG.getNode(ISD::ROTL, DL, VT, LHSShiftArg, LHSShiftAmt);
- else
- Rot = DAG.getNode(ISD::ROTR, DL, VT, LHSShiftArg, RHSShiftAmt);
+ SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
+ LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
// If there is an AND of either shifted operand, apply it to the result.
if (LHSMask.getNode() || RHSMask.getNode()) {
@@ -3254,12 +3251,8 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) {
if (ConstantSDNode *SUBC =
dyn_cast<ConstantSDNode>(RHSShiftAmt.getOperand(0))) {
if (SUBC->getAPIntValue() == OpSizeInBits) {
- if (HasROTL)
- return DAG.getNode(ISD::ROTL, DL, VT,
- LHSShiftArg, LHSShiftAmt).getNode();
- else
- return DAG.getNode(ISD::ROTR, DL, VT,
- LHSShiftArg, RHSShiftAmt).getNode();
+ return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
+ HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode();
}
}
}
@@ -3271,25 +3264,21 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) {
if (ConstantSDNode *SUBC =
dyn_cast<ConstantSDNode>(LHSShiftAmt.getOperand(0))) {
if (SUBC->getAPIntValue() == OpSizeInBits) {
- if (HasROTR)
- return DAG.getNode(ISD::ROTR, DL, VT,
- LHSShiftArg, RHSShiftAmt).getNode();
- else
- return DAG.getNode(ISD::ROTL, DL, VT,
- LHSShiftArg, LHSShiftAmt).getNode();
+ return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg,
+ HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode();
}
}
}
// Look for sign/zext/any-extended or truncate cases:
- if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND
- || LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND
- || LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND
- || LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
- (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND
- || RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND
- || RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND
- || RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
+ if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
+ LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
+ LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
+ LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
+ (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
+ RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
+ RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
+ RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
SDValue LExtOp0 = LHSShiftAmt.getOperand(0);
SDValue RExtOp0 = RHSShiftAmt.getOperand(0);
if (RExtOp0.getOpcode() == ISD::SUB &&
@@ -4428,20 +4417,18 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
// If the desired elements are smaller or larger than the source
// elements we can use a matching integer vector type and then
// truncate/sign extend
- else {
- EVT MatchingElementType =
- EVT::getIntegerVT(*DAG.getContext(),
- N0VT.getScalarType().getSizeInBits());
- EVT MatchingVectorType =
- EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
- N0VT.getVectorNumElements());
+ EVT MatchingElementType =
+ EVT::getIntegerVT(*DAG.getContext(),
+ N0VT.getScalarType().getSizeInBits());
+ EVT MatchingVectorType =
+ EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
+ N0VT.getVectorNumElements());
- if (SVT == MatchingVectorType) {
- SDValue VsetCC = DAG.getSetCC(N->getDebugLoc(), MatchingVectorType,
- N0.getOperand(0), N0.getOperand(1),
- cast<CondCodeSDNode>(N0.getOperand(2))->get());
- return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
- }
+ if (SVT == MatchingVectorType) {
+ SDValue VsetCC = DAG.getSetCC(N->getDebugLoc(), MatchingVectorType,
+ N0.getOperand(0), N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
}
}
@@ -5251,13 +5238,12 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// if the source is smaller than the dest, we still need an extend
return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT,
N0.getOperand(0));
- else if (N0.getOperand(0).getValueType().bitsGT(VT))
+ if (N0.getOperand(0).getValueType().bitsGT(VT))
// if the source is larger than the dest, than we just need the truncate
return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0));
- else
- // if the source and dest are the same type, we can drop both the extend
- // and the truncate.
- return N0.getOperand(0);
+ // if the source and dest are the same type, we can drop both the extend
+ // and the truncate.
+ return N0.getOperand(0);
}
// Fold extract-and-trunc into a narrow extract. For example:
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index bd33479b94..a48a6256e5 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -3521,7 +3521,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
- bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+ bool OptSize = MF.getFunction()->getFnAttributes().hasOptimizeForSizeAttr();
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
@@ -3614,7 +3614,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
- bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+ bool OptSize = MF.getFunction()->getFnAttributes().hasOptimizeForSizeAttr();
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
@@ -3692,7 +3692,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
- bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+ bool OptSize = MF.getFunction()->getFnAttributes().hasOptimizeForSizeAttr();
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 4f6ff08407..65becbe44f 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -89,7 +89,7 @@ static const unsigned MaxParallelChains = 64;
static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
const SDValue *Parts, unsigned NumParts,
- EVT PartVT, EVT ValueVT);
+ EVT PartVT, EVT ValueVT, const Value *V);
/// getCopyFromParts - Create a value that contains the specified legal parts
/// combined into the value they represent. If the parts combine to a type
@@ -99,9 +99,11 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
const SDValue *Parts,
unsigned NumParts, EVT PartVT, EVT ValueVT,
+ const Value *V,
ISD::NodeType AssertOp = ISD::DELETED_NODE) {
if (ValueVT.isVector())
- return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT);
+ return getCopyFromPartsVector(DAG, DL, Parts, NumParts,
+ PartVT, ValueVT, V);
assert(NumParts > 0 && "No parts to assemble!");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -125,9 +127,9 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
if (RoundParts > 2) {
Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2,
- PartVT, HalfVT);
+ PartVT, HalfVT, V);
Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2,
- RoundParts / 2, PartVT, HalfVT);
+ RoundParts / 2, PartVT, HalfVT, V);
} else {
Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]);
Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]);
@@ -143,7 +145,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
unsigned OddParts = NumParts - RoundParts;
EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
Hi = getCopyFromParts(DAG, DL,
- Parts + RoundParts, OddParts, PartVT, OddVT);
+ Parts + RoundParts, OddParts, PartVT, OddVT, V);
// Combine the round and odd parts.
Lo = Val;
@@ -172,7 +174,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
!PartVT.isVector() && "Unexpected split");
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
- Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT);
+ Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V);
}
}
@@ -210,14 +212,14 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
llvm_unreachable("Unknown mismatch!");
}
-/// getCopyFromParts - Create a value that contains the specified legal parts
-/// combined into the value they represent. If the parts combine to a type
-/// larger then ValueVT then AssertOp can be used to specify whether the extra
-/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
-/// (ISD::AssertSext).
+/// getCopyFromPartsVector - Create a value that contains the specified legal
+/// parts combined into the value they represent. If the parts combine to a
+/// type larger then ValueVT then AssertOp can be used to specify whether the
+/// extra bits are known to be zero (ISD::AssertZext) or sign extended from
+/// ValueVT (ISD::AssertSext).
static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
const SDValue *Parts, unsigned NumParts,
- EVT PartVT, EVT ValueVT) {
+ EVT PartVT, EVT ValueVT, const Value *V) {
assert(ValueVT.isVector() && "Not a vector value");
assert(NumParts > 0 && "No parts to assemble!");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -243,7 +245,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
// as appropriate.
for (unsigned i = 0; i != NumParts; ++i)
Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1,
- PartVT, IntermediateVT);
+ PartVT, IntermediateVT, V);
} else if (NumParts > 0) {
// If the intermediate type was expanded, build the intermediate
// operands from the parts.
@@ -252,7 +254,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
unsigned Factor = NumParts / NumIntermediates;
for (unsigned i = 0; i != NumIntermediates; ++i)
Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor,
- PartVT, IntermediateVT);
+ PartVT, IntermediateVT, V);
}
// Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
@@ -300,8 +302,19 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
// Handle cases such as i8 -> <1 x i1>
- assert(ValueVT.getVectorNumElements() == 1 &&
- "Only trivial scalar-to-vector conversions should get here!");
+ if (ValueVT.getVectorNumElements() != 1) {
+ LLVMContext &Ctx = *DAG.getContext();
+ Twine ErrMsg("non-trivial scalar-to-vector conversion");
+ if (const Instruction *I = dyn_cast_or_null<Instruction>(V)) {
+ if (const CallInst *CI = dyn_cast<CallInst>(I))
+ if (isa<InlineAsm>(CI->getCalledValue()))
+ ErrMsg = ErrMsg + ", possible invalid constraint for vector type";
+ Ctx.emitError(I, ErrMsg);
+ } else {
+ Ctx.emitError(ErrMsg);
+ }
+ report_fatal_error("Cannot handle scalar-to-vector conversion!");
+ }
if (ValueVT.getVectorNumElements() == 1 &&
ValueVT.getVectorElementType() != PartVT) {
@@ -313,25 +326,22 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val);
}
-
-
-
static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc dl,
SDValue Val, SDValue *Parts, unsigned NumParts,
- EVT PartVT);
+ EVT PartVT, const Value *V);
/// getCopyToParts - Create a series of nodes that contain the specified value
/// split into legal parts. If the parts contain more bits than Val, then, for
/// integers, ExtendKind can be used to specify how to generate the extra bits.
static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
SDValue Val, SDValue *Parts, unsigned NumParts,
- EVT PartVT,
+ EVT PartVT, const Value *V,
ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
EVT ValueVT = Val.getValueType();
// Handle the vector case separately.
if (ValueVT.isVector())
- return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT);
+ return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned PartBits = PartVT.getSizeInBits();
@@ -383,7 +393,19 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
"Failed to tile the value with PartVT!");
if (NumParts == 1) {
- assert(PartVT == ValueVT && "Type conversion failed!");
+ if (PartVT != ValueVT) {
+ LLVMContext &Ctx = *DAG.getContext();
+ Twine ErrMsg("scalar-to-vector conversion failed");
+ if (const Instruction *I = dyn_cast_or_null<Instruction>(V)) {
+ if (const CallInst *CI = dyn_cast<CallInst>(I))
+ if (isa<InlineAsm>(CI->getCalledValue()))
+ ErrMsg = ErrMsg + ", possible invalid constraint for vector type";
+ Ctx.emitError(I, ErrMsg);
+ } else {
+ Ctx.emitError(ErrMsg);
+ }
+ }
+
Parts[0] = Val;
return;
}
@@ -398,7 +420,7 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
unsigned OddParts = NumParts - RoundParts;
SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val,
DAG.getIntPtrConstant(RoundBits));
- getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT);
+ getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V);
if (TLI.isBigEndian())
// The odd parts were reversed by getCopyToParts - unreverse them.
@@ -444,7 +466,7 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
/// value split into legal parts.
static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
SDValue Val, SDValue *Parts, unsigned NumParts,
- EVT PartVT) {
+ EVT PartVT, const Value *V) {
EVT ValueVT = Val.getValueType();
assert(ValueVT.isVector() && "Not a vector");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -530,7 +552,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
// If the register was not expanded, promote or copy the value,
// as appropriate.
for (unsigned i = 0; i != NumParts; ++i)
- getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT);
+ getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V);
} else if (NumParts > 0) {
// If the intermediate type was expanded, split each the value into
// legal parts.
@@ -538,13 +560,10 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
"Must expand into a divisible number of parts!");
unsigned Factor = NumParts / NumIntermediates;
for (unsigned i = 0; i != NumIntermediates; ++i)
- getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT);
+ getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT, V);
}
}
-
-
-
namespace {
/// RegsForValue - This struct represents the registers (physical or virtual)
/// that a particular set of values is assigned, and the type information
@@ -622,14 +641,15 @@ namespace {
/// If the Flag pointer is NULL, no flag is used.
SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo,
DebugLoc dl,
- SDValue &Chain, SDValue *Flag) const;
+ SDValue &Chain, SDValue *Flag,
+ const Value *V = 0) const;
/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
/// specified value into the registers specified by this object. This uses
/// Chain/Flag as the input and updates them for the output Chain/Flag.
/// If the Flag pointer is NULL, no flag is used.
void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
- SDValue &Chain, SDValue *Flag) const;
+ SDValue &Chain, SDValue *Flag, const Value *V) const;
/// AddInlineAsmOperands - Add this value to the specified inlineasm node
/// operand list. This adds the code marker, matching input operand index
@@ -648,7 +668,8 @@ namespace {
SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
FunctionLoweringInfo &FuncInfo,
DebugLoc dl,
- SDValue &Chain, SDValue *Flag) const {
+ SDValue &Chain, SDValue *Flag,
+ const Value *V) const {
// A Value with type {} or [0 x %t] needs no registers.
if (ValueVTs.empty())
return SDValue();
@@ -722,7 +743,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
}
Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(),
- NumRegs, RegisterVT, ValueVT);
+ NumRegs, RegisterVT, ValueVT, V);
Part += NumRegs;
Parts.clear();
}
@@ -737,7 +758,8 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
/// Chain/Flag as the input and updates them for the output Chain/Flag.
/// If the Flag pointer is NULL, no flag is used.
void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
- SDValue &Chain, SDValue *Flag) const {
+ SDValue &Chain, SDValue *Flag,
+ const Value *V) const {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// Get the list of the values's legal parts.
@@ -749,7 +771,7 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
EVT RegisterVT = RegVTs[Value];
getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value),
- &Parts[Part], NumParts, RegisterVT);
+ &Parts[Part], NumParts, RegisterVT, V);
Part += NumParts;
}
@@ -994,7 +1016,7 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
unsigned InReg = It->second;
RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType());
SDValue Chain = DAG.getEntryNode();
- N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL);
+ N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL, V);
resolveDanglingDebugInfo(V, N);
return N;
}
@@ -1149,7 +1171,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
RegsForValue RFV(*DAG.getContext(), TLI, InReg, Inst->getType());
SDValue Chain = DAG.getEntryNode();
- return RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL);
+ return RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL, V);
}
llvm_unreachable("Can't get register for value!");
@@ -1218,7 +1240,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
SmallVector<SDValue, 4> Parts(NumParts);
getCopyToParts(DAG, getCurDebugLoc(),
SDValue(RetOp.getNode(), RetOp.getResNo() + j),
- &Parts[0], NumParts, PartVT, ExtendKind);
+ &Parts[0], NumParts, PartVT, &I, ExtendKind);
// 'inreg' on function refers to return value
ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
@@ -2093,7 +2115,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR,
for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I)
TSize += I->size();
- if (!areJTsAllowed(TLI) || TSize.ult(4))
+ if (!areJTsAllowed(TLI) || TSize.ult(TLI.getMinimumJumpTableEntries()))
return false;
APInt Range = ComputeRange(First, Last);
@@ -2565,9 +2587,10 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
if (handleSmallSwitchRange(CR, WorkList, SV, Default, SwitchMBB))
continue;
- // If the switch has more than 5 blocks, and at least 40% dense, and the
+ // If the switch has more than N blocks, and is at least 40% dense, and the
// target supports indirect branches, then emit a jump table rather than
// lowering the switch to a binary tree of conditional branches.
+ // N defaults to 4 and is controlled via TLS.getMinimumJumpTableEntries().
if (handleJTSwitchCase(CR, WorkList, SV, Default, SwitchMBB))
continue;
@@ -4377,7 +4400,7 @@ static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS,
return DAG.getConstantFP(1.0, LHS.getValueType());
const Function *F = DAG.getMachineFunction().getFunction();
- if (!F->hasFnAttr(Attribute::OptimizeForSize) ||
+ if (!F->getFnAttributes().hasOptimizeForSizeAttr() ||
// If optimizing for size, don't insert too many multiplies. This
// inserts up to 5 multiplies.
CountPopulation_32(Val)+Log2_32(Val) < 7) {
@@ -6244,7 +6267,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// Use the produced MatchedRegs object to
MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
- Chain, &Flag);
+ Chain, &Flag, CS.getInstruction());
MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse,
true, OpInfo.getMatchedOperand(),
DAG, AsmNodeOperands);
@@ -6326,7 +6349,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
}
OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
- Chain, &Flag);
+ Chain, &Flag, CS.getInstruction());
OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0,
DAG, AsmNodeOperands);
@@ -6357,7 +6380,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// and set it as the value of the call.
if (!RetValRegs.Regs.empty()) {
SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(),
- Chain, &Flag);
+ Chain, &Flag, CS.getInstruction());
// FIXME: Why don't we do this for inline asms with MRVs?
if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) {
@@ -6397,7 +6420,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
RegsForValue &OutRegs = IndirectStoresToEmit[i].first;
const Value *Ptr = IndirectStoresToEmit[i].second;
SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(),
- Chain, &Flag);
+ Chain, &Flag, IA);
StoresToEmit.push_back(std::make_pair(OutVal, Ptr));
}
@@ -6515,7 +6538,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
ExtendKind = ISD::ZERO_EXTEND;
getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts,
- PartVT, ExtendKind);
+ PartVT, CLI.CS ? CLI.CS->getInstruction() : 0, ExtendKind);
for (unsigned j = 0; j != NumParts; ++j) {
// if it isn't first piece, alignment must be 1
@@ -6596,7 +6619,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg],
- NumRegs, RegisterVT, VT,
+ NumRegs, RegisterVT, VT, NULL,
AssertOp));
CurReg += NumRegs;
}
@@ -6635,7 +6658,7 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
RegsForValue RFV(V->getContext(), TLI, Reg, V->getType());
SDValue Chain = DAG.getEntryNode();
- RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0);
+ RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0, V);
PendingExports.push_back(Chain);
}
@@ -6777,7 +6800,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
EVT RegVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
ISD::NodeType AssertOp = ISD::DELETED_NODE;
SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1,
- RegVT, VT, AssertOp);
+ RegVT, VT, NULL, AssertOp);
MachineFunction& MF = SDB->DAG.getMachineFunction();
MachineRegisterInfo& RegInfo = MF.getRegInfo();
@@ -6818,7 +6841,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i],
NumParts, PartVT, VT,
- AssertOp));
+ NULL, AssertOp));
}
i += NumParts;
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 56f3a45c9a..be3ecf34f7 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -613,6 +613,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
ShouldFoldAtomicFences = false;
InsertFencesForAtomic = false;
SupportJumpTables = true;
+ MinimumJumpTableEntries = 4;
InitLibcallNames(LibcallRoutineNames);
InitCmpLibcallCCs(CmpLibcallCCs);
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index a04ac3fbc1..a58c144659 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -137,10 +137,10 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool InStruct) const {
/// add a guard variable to functions that call alloca, and functions with
/// buffers larger than SSPBufferSize bytes.
bool StackProtector::RequiresStackProtector() const {
- if (F->hasFnAttr(Attribute::StackProtectReq))
+ if (F->getFnAttributes().hasStackProtectReqAttr())
return true;
- if (!F->hasFnAttr(Attribute::StackProtect))
+ if (!F->getFnAttributes().hasStackProtectAttr())
return false;
for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index a813fa65ac..230ea038e2 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -552,7 +552,7 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
// compensate for the duplication.
unsigned MaxDuplicateCount;
if (TailDuplicateSize.getNumOccurrences() == 0 &&
- MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+ MF.getFunction()->getFnAttributes().hasOptimizeForSizeAttr())
MaxDuplicateCount = 1;
else
MaxDuplicateCount = TailDuplicateSize;
diff --git a/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt b/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt
index 7d67d0d8be..348308897d 100644
--- a/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt
+++ b/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt
@@ -1,11 +1,6 @@
-
-include_directories( ${LLVM_INTEL_JITEVENTS_INCDIR} ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-set(system_libs
- ${system_libs}
- jitprofiling
- )
+include_directories( ${CMAKE_CURRENT_SOURCE_DIR}/.. )
add_llvm_library(LLVMIntelJITEvents
IntelJITEventListener.cpp
+ jitprofiling.c
)
diff --git a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
index c11c17eac7..23f8607322 100644
--- a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
+++ b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
@@ -22,12 +22,12 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/ExecutionEngine/IntelJITEventsWrapper.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/Errno.h"
#include "llvm/Support/ValueHandle.h"
#include "EventListenerCommon.h"
+#include "IntelJITEventsWrapper.h"
using namespace llvm;
using namespace llvm::jitprofiling;
@@ -37,13 +37,13 @@ namespace {
class IntelJITEventListener : public JITEventListener {
typedef DenseMap<void*, unsigned int> MethodIDMap;
- IntelJITEventsWrapper& Wrapper;
+ OwningPtr<IntelJITEventsWrapper> Wrapper;
MethodIDMap MethodIDs;
FilenameCache Filenames;
public:
- IntelJITEventListener(IntelJITEventsWrapper& libraryWrapper)
- : Wrapper(libraryWrapper) {
+ IntelJITEventListener(IntelJITEventsWrapper* libraryWrapper) {
+ Wrapper.reset(libraryWrapper);
}
~IntelJITEventListener() {
@@ -94,7 +94,7 @@ static iJIT_Method_Load FunctionDescToIntelJITFormat(
void IntelJITEventListener::NotifyFunctionEmitted(
const Function &F, void *FnStart, size_t FnSize,
const EmittedFunctionDetails &Details) {
- iJIT_Method_Load FunctionMessage = FunctionDescToIntelJITFormat(Wrapper,
+ iJIT_Method_Load FunctionMessage = FunctionDescToIntelJITFormat(*Wrapper,
F.getName().data(),
reinterpret_cast<uint64_t>(FnStart),
FnSize);
@@ -151,15 +151,15 @@ void IntelJITEventListener::NotifyFunctionEmitted(
FunctionMessage.line_number_table = 0;
}
- Wrapper.iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED,
- &FunctionMessage);
+ Wrapper->iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED,
+ &FunctionMessage);
MethodIDs[FnStart] = FunctionMessage.method_id;
}
void IntelJITEventListener::NotifyFreeingMachineCode(void *FnStart) {
MethodIDMap::iterator I = MethodIDs.find(FnStart);
if (I != MethodIDs.end()) {
- Wrapper.iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_UNLOAD_START, &I->second);
+ Wrapper->iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_UNLOAD_START, &I->second);
MethodIDs.erase(I);
}
}
@@ -168,15 +168,13 @@ void IntelJITEventListener::NotifyFreeingMachineCode(void *FnStart) {
namespace llvm {
JITEventListener *JITEventListener::createIntelJITEventListener() {
- static OwningPtr<IntelJITEventsWrapper> JITProfilingWrapper(
- new IntelJITEventsWrapper);
- return new IntelJITEventListener(*JITProfilingWrapper);
+ return new IntelJITEventListener(new IntelJITEventsWrapper);
}
// for testing
JITEventListener *JITEventListener::createIntelJITEventListener(
IntelJITEventsWrapper* TestImpl) {
- return new IntelJITEventListener(*TestImpl);
+ return new IntelJITEventListener(TestImpl);
}
} // namespace llvm
diff --git a/include/llvm/ExecutionEngine/IntelJITEventsWrapper.h b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h
index ca87342029..7ab08e15a8 100644
--- a/include/llvm/ExecutionEngine/IntelJITEventsWrapper.h
+++ b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h
@@ -18,7 +18,7 @@
#ifndef INTEL_JIT_EVENTS_WRAPPER_H
#define INTEL_JIT_EVENTS_WRAPPER_H
-#include <jitprofiling.h>
+#include "jitprofiling.h"
namespace llvm {
diff --git a/lib/ExecutionEngine/IntelJITEvents/Makefile b/lib/ExecutionEngine/IntelJITEvents/Makefile
index ba75ac6f64..dcf3126cc5 100644
--- a/lib/ExecutionEngine/IntelJITEvents/Makefile
+++ b/lib/ExecutionEngine/IntelJITEvents/Makefile
@@ -11,7 +11,8 @@ LIBRARYNAME = LLVMIntelJITEvents
include $(LEVEL)/Makefile.config
-SOURCES := IntelJITEventListener.cpp
-CPPFLAGS += -I$(INTEL_JITEVENTS_INCDIR) -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+SOURCES := IntelJITEventListener.cpp \
+ jitprofiling.c
+CPPFLAGS += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
include $(LLVM_SRC_ROOT)/Makefile.rules
diff --git a/lib/ExecutionEngine/IntelJITEvents/ittnotify_config.h b/lib/ExecutionEngine/IntelJITEvents/ittnotify_config.h
new file mode 100644
index 0000000000..238065fe0a
--- /dev/null
+++ b/lib/ExecutionEngine/IntelJITEvents/ittnotify_config.h
@@ -0,0 +1,449 @@
+/*===-- ittnotify_config.h - JIT Profiling API internal config-----*- C -*-===*
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===*
+ *
+ * This file provides Intel(R) Performance Analyzer JIT (Just-In-Time)
+ * Profiling API internal config.
+ *
+ *===----------------------------------------------------------------------===*/
+#ifndef _ITTNOTIFY_CONFIG_H_
+#define _ITTNOTIFY_CONFIG_H_
+
+/** @cond exclude_from_documentation */
+#ifndef ITT_OS_WIN
+# define ITT_OS_WIN 1
+#endif /* ITT_OS_WIN */
+
+#ifndef ITT_OS_LINUX
+# define ITT_OS_LINUX 2
+#endif /* ITT_OS_LINUX */
+
+#ifndef ITT_OS_MAC
+# define ITT_OS_MAC 3
+#endif /* ITT_OS_MAC */
+
+#ifndef ITT_OS
+# if defined WIN32 || defined _WIN32
+# define ITT_OS ITT_OS_WIN
+# elif defined( __APPLE__ ) && defined( __MACH__ )
+# define ITT_OS ITT_OS_MAC
+# else
+# define ITT_OS ITT_OS_LINUX
+# endif
+#endif /* ITT_OS */
+
+#ifndef ITT_PLATFORM_WIN
+# define ITT_PLATFORM_WIN 1
+#endif /* ITT_PLATFORM_WIN */
+
+#ifndef ITT_PLATFORM_POSIX
+# define ITT_PLATFORM_POSIX 2
+#endif /* ITT_PLATFORM_POSIX */
+
+#ifndef ITT_PLATFORM
+# if ITT_OS==ITT_OS_WIN
+# define ITT_PLATFORM ITT_PLATFORM_WIN
+# else
+# define ITT_PLATFORM ITT_PLATFORM_POSIX
+# endif /* _WIN32 */
+#endif /* ITT_PLATFORM */
+
+#if defined(_UNICODE) && !defined(UNICODE)
+#define UNICODE
+#endif
+
+#include <stddef.h>
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#include <tchar.h>
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#include <stdint.h>
+#if defined(UNICODE) || defined(_UNICODE)
+#include <wchar.h>
+#endif /* UNICODE || _UNICODE */
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+#ifndef CDECL
+# if ITT_PLATFORM==ITT_PLATFORM_WIN
+# define CDECL __cdecl
+# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+# if defined _M_X64 || defined _M_AMD64 || defined __x86_64__
+# define CDECL /* not actual on x86_64 platform */
+# else /* _M_X64 || _M_AMD64 || __x86_64__ */
+# define CDECL __attribute__ ((cdecl))
+# endif /* _M_X64 || _M_AMD64 || __x86_64__ */
+# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* CDECL */
+
+#ifndef STDCALL
+# if ITT_PLATFORM==ITT_PLATFORM_WIN
+# define STDCALL __stdcall
+# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+# if defined _M_X64 || defined _M_AMD64 || defined __x86_64__
+# define STDCALL /* not supported on x86_64 platform */
+# else /* _M_X64 || _M_AMD64 || __x86_64__ */
+# define STDCALL __attribute__ ((stdcall))
+# endif /* _M_X64 || _M_AMD64 || __x86_64__ */
+# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* STDCALL */
+
+#define ITTAPI CDECL
+#define LIBITTAPI CDECL
+
+/* TODO: Temporary for compatibility! */
+#define ITTAPI_CALL CDECL
+#define LIBITTAPI_CALL CDECL
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+/* use __forceinline (VC++ specific) */
+#define ITT_INLINE __forceinline
+#define ITT_INLINE_ATTRIBUTE /* nothing */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+/*
+ * Generally, functions are not inlined unless optimization is specified.
+ * For functions declared inline, this attribute inlines the function even
+ * if no optimization level was specified.
+ */
+#ifdef __STRICT_ANSI__
+#define ITT_INLINE static
+#else /* __STRICT_ANSI__ */
+#define ITT_INLINE static inline
+#endif /* __STRICT_ANSI__ */
+#define ITT_INLINE_ATTRIBUTE __attribute__ ((always_inline))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+/** @endcond */
+
+#ifndef ITT_ARCH_IA32
+# define ITT_ARCH_IA32 1
+#endif /* ITT_ARCH_IA32 */
+
+#ifndef ITT_ARCH_IA32E
+# define ITT_ARCH_IA32E 2
+#endif /* ITT_ARCH_IA32E */
+
+#ifndef ITT_ARCH_IA64
+# define ITT_ARCH_IA64 3
+#endif /* ITT_ARCH_IA64 */
+
+#ifndef ITT_ARCH
+# if defined _M_X64 || defined _M_AMD64 || defined __x86_64__
+# define ITT_ARCH ITT_ARCH_IA32E
+# elif defined _M_IA64 || defined __ia64
+# define ITT_ARCH ITT_ARCH_IA64
+# else
+# define ITT_ARCH ITT_ARCH_IA32
+# endif
+#endif
+
+#ifdef __cplusplus
+# define ITT_EXTERN_C extern "C"
+#else
+# define ITT_EXTERN_C /* nothing */
+#endif /* __cplusplus */
+
+#define ITT_TO_STR_AUX(x) #x
+#define ITT_TO_STR(x) ITT_TO_STR_AUX(x)
+
+#define __ITT_BUILD_ASSERT(expr, suffix) do { \
+ static char __itt_build_check_##suffix[(expr) ? 1 : -1]; \
+ __itt_build_check_##suffix[0] = 0; \
+} while(0)
+#define _ITT_BUILD_ASSERT(expr, suffix) __ITT_BUILD_ASSERT((expr), suffix)
+#define ITT_BUILD_ASSERT(expr) _ITT_BUILD_ASSERT((expr), __LINE__)
+
+#define ITT_MAGIC { 0xED, 0xAB, 0xAB, 0xEC, 0x0D, 0xEE, 0xDA, 0x30 }
+
+/* Replace with snapshot date YYYYMMDD for promotion build. */
+#define API_VERSION_BUILD 20111111
+
+#ifndef API_VERSION_NUM
+#define API_VERSION_NUM 0.0.0
+#endif /* API_VERSION_NUM */
+
+#define API_VERSION "ITT-API-Version " ITT_TO_STR(API_VERSION_NUM) \
+ " (" ITT_TO_STR(API_VERSION_BUILD) ")"
+
+/* OS communication functions */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#include <windows.h>
+typedef HMODULE lib_t;
+typedef DWORD TIDT;
+typedef CRITICAL_SECTION mutex_t;
+#define MUTEX_INITIALIZER { 0 }
+#define strong_alias(name, aliasname) /* empty for Windows */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#include <dlfcn.h>
+#if defined(UNICODE) || defined(_UNICODE)
+#include <wchar.h>
+#endif /* UNICODE */
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE 1 /* need for PTHREAD_MUTEX_RECURSIVE */
+#endif /* _GNU_SOURCE */
+#include <pthread.h>
+typedef void* lib_t;
+typedef pthread_t TIDT;
+typedef pthread_mutex_t mutex_t;
+#define MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER
+#define _strong_alias(name, aliasname) \
+ extern __typeof (name) aliasname __attribute__ ((alias (#name)));
+#define strong_alias(name, aliasname) _strong_alias(name, aliasname)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_get_proc(lib, name) GetProcAddress(lib, name)
+#define __itt_mutex_init(mutex) InitializeCriticalSection(mutex)
+#define __itt_mutex_lock(mutex) EnterCriticalSection(mutex)
+#define __itt_mutex_unlock(mutex) LeaveCriticalSection(mutex)
+#define __itt_load_lib(name) LoadLibraryA(name)
+#define __itt_unload_lib(handle) FreeLibrary(handle)
+#define __itt_system_error() (int)GetLastError()
+#define __itt_fstrcmp(s1, s2) lstrcmpA(s1, s2)
+#define __itt_fstrlen(s) lstrlenA(s)
+#define __itt_fstrcpyn(s1, s2, l) lstrcpynA(s1, s2, l)
+#define __itt_fstrdup(s) _strdup(s)
+#define __itt_thread_id() GetCurrentThreadId()
+#define __itt_thread_yield() SwitchToThread()
+#ifndef ITT_SIMPLE_INIT
+ITT_INLINE long
+__itt_interlocked_increment(volatile long* ptr) ITT_INLINE_ATTRIBUTE;
+ITT_INLINE long __itt_interlocked_increment(volatile long* ptr)
+{
+ return InterlockedIncrement(ptr);
+}
+#endif /* ITT_SIMPLE_INIT */
+#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+#define __itt_get_proc(lib, name) dlsym(lib, name)
+#define __itt_mutex_init(mutex) {\
+ pthread_mutexattr_t mutex_attr; \
+ int error_code = pthread_mutexattr_init(&mutex_attr); \
+ if (error_code) \
+ __itt_report_error(__itt_error_system, "pthread_mutexattr_init", \
+ error_code); \
+ error_code = pthread_mutexattr_settype(&mutex_attr, \
+ PTHREAD_MUTEX_RECURSIVE); \
+ if (error_code) \
+ __itt_report_error(__itt_error_system, "pthread_mutexattr_settype", \
+ error_code); \
+ error_code = pthread_mutex_init(mutex, &mutex_attr); \
+ if (error_code) \
+ __itt_report_error(__itt_error_system, "pthread_mutex_init", \
+ error_code); \
+ error_code = pthread_mutexattr_destroy(&mutex_attr); \
+ if (error_code) \
+ __itt_report_error(__itt_error_system, "pthread_mutexattr_destroy", \
+ error_code); \
+}
+#define __itt_mutex_lock(mutex) pthread_mutex_lock(mutex)
+#define __itt_mutex_unlock(mutex) pthread_mutex_unlock(mutex)
+#define __itt_load_lib(name) dlopen(name, RTLD_LAZY)
+#define __itt_unload_lib(handle) dlclose(handle)
+#define __itt_system_error() errno
+#define __itt_fstrcmp(s1, s2) strcmp(s1, s2)
+#define __itt_fstrlen(s) strlen(s)
+#define __itt_fstrcpyn(s1, s2, l) strncpy(s1, s2, l)
+#define __itt_fstrdup(s) strdup(s)
+#define __itt_thread_id() pthread_self()
+#define __itt_thread_yield() sched_yield()
+#if ITT_ARCH==ITT_ARCH_IA64
+#ifdef __INTEL_COMPILER
+#define __TBB_machine_fetchadd4(addr, val) __fetchadd4_acq((void *)addr, val)
+#else /* __INTEL_COMPILER */
+/* TODO: Add Support for not Intel compilers for IA64 */
+#endif /* __INTEL_COMPILER */
+#else /* ITT_ARCH!=ITT_ARCH_IA64 */
+ITT_INLINE long
+__TBB_machine_fetchadd4(volatile void* ptr, long addend) ITT_INLINE_ATTRIBUTE;
+ITT_INLINE long __TBB_machine_fetchadd4(volatile void* ptr, long addend)
+{
+ long result;
+ __asm__ __volatile__("lock\nxadd %0,%1"
+ : "=r"(result),"=m"(*(long*)ptr)
+ : "0"(addend), "m"(*(long*)ptr)
+ : "memory");
+ return result;
+}
+#endif /* ITT_ARCH==ITT_ARCH_IA64 */
+#ifndef ITT_SIMPLE_INIT
+ITT_INLINE long
+__itt_interlocked_increment(volatile long* ptr) ITT_INLINE_ATTRIBUTE;
+ITT_INLINE long __itt_interlocked_increment(volatile long* ptr)
+{
+ return __TBB_machine_fetchadd4(ptr, 1) + 1L;
+}
+#endif /* ITT_SIMPLE_INIT */
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+typedef enum {
+ __itt_collection_normal = 0,
+ __itt_collection_paused = 1
+} __itt_collection_state;
+
+typedef enum {
+ __itt_thread_normal = 0,
+ __itt_thread_ignored = 1
+} __itt_thread_state;
+
+#pragma pack(push, 8)
+
+typedef struct ___itt_thread_info
+{
+ const char* nameA; /*!< Copy of original name in ASCII. */
+#if defined(UNICODE) || defined(_UNICODE)
+ const wchar_t* nameW; /*!< Copy of original name in UNICODE. */
+#else /* UNICODE || _UNICODE */
+ void* nameW;
+#endif /* UNICODE || _UNICODE */
+ TIDT tid;
+ __itt_thread_state state; /*!< Thread state (paused or normal) */
+ int extra1; /*!< Reserved to the runtime */
+ void* extra2; /*!< Reserved to the runtime */
+ struct ___itt_thread_info* next;
+} __itt_thread_info;
+
+#include "ittnotify_types.h" /* For __itt_group_id definition */
+
+typedef struct ___itt_api_info_20101001
+{
+ const char* name;
+ void** func_ptr;
+ void* init_func;
+ __itt_group_id group;
+} __itt_api_info_20101001;
+
+typedef struct ___itt_api_info
+{
+ const char* name;
+ void** func_ptr;
+ void* init_func;
+ void* null_func;
+ __itt_group_id group;
+} __itt_api_info;
+
+struct ___itt_domain;
+struct ___itt_string_handle;
+
+typedef struct ___itt_global
+{
+ unsigned char magic[8];
+ unsigned long version_major;
+ unsigned long version_minor;
+ unsigned long version_build;
+ volatile long api_initialized;
+ volatile long mutex_initialized;
+ volatile long atomic_counter;
+ mutex_t mutex;
+ lib_t lib;
+ void* error_handler;
+ const char** dll_path_ptr;
+ __itt_api_info* api_list_ptr;
+ struct ___itt_global* next;
+ /* Joinable structures below */
+ __itt_thread_info* thread_list;
+ struct ___itt_domain* domain_list;
+ struct ___itt_string_handle* string_list;
+ __itt_collection_state state;
+} __itt_global;
+
+#pragma pack(pop)
+
+#define NEW_THREAD_INFO_W(gptr,h,h_tail,t,s,n) { \
+ h = (__itt_thread_info*)malloc(sizeof(__itt_thread_info)); \
+ if (h != NULL) { \
+ h->tid = t; \
+ h->nameA = NULL; \
+ h->nameW = n ? _wcsdup(n) : NULL; \
+ h->state = s; \
+ h->extra1 = 0; /* reserved */ \
+ h->extra2 = NULL; /* reserved */ \
+ h->next = NULL; \
+ if (h_tail == NULL) \
+ (gptr)->thread_list = h; \
+ else \
+ h_tail->next = h; \
+ } \
+}
+
+#define NEW_THREAD_INFO_A(gptr,h,h_tail,t,s,n) { \
+ h = (__itt_thread_info*)malloc(sizeof(__itt_thread_info)); \
+ if (h != NULL) { \
+ h->tid = t; \
+ h->nameA = n ? __itt_fstrdup(n) : NULL; \
+ h->nameW = NULL; \
+ h->state = s; \
+ h->extra1 = 0; /* reserved */ \
+ h->extra2 = NULL; /* reserved */ \
+ h->next = NULL; \
+ if (h_tail == NULL) \
+ (gptr)->thread_list = h; \
+ else \
+ h_tail->next = h; \
+ } \
+}
+
+#define NEW_DOMAIN_W(gptr,h,h_tail,name) { \
+ h = (__itt_domain*)malloc(sizeof(__itt_domain)); \
+ if (h != NULL) { \
+ h->flags = 0; /* domain is disabled by default */ \
+ h->nameA = NULL; \
+ h->nameW = name ? _wcsdup(name) : NULL; \
+ h->extra1 = 0; /* reserved */ \
+ h->extra2 = NULL; /* reserved */ \
+ h->next = NULL; \
+ if (h_tail == NULL) \
+ (gptr)->domain_list = h; \
+ else \
+ h_tail->next = h; \
+ } \
+}
+
+#define NEW_DOMAIN_A(gptr,h,h_tail,name) { \
+ h = (__itt_domain*)malloc(sizeof(__itt_domain)); \
+ if (h != NULL) { \
+ h->flags = 0; /* domain is disabled by default */ \
+ h->nameA = name ? __itt_fstrdup(name) : NULL; \
+ h->nameW = NULL; \
+ h->extra1 = 0; /* reserved */ \
+ h->extra2 = NULL; /* reserved */ \
+ h->next = NULL; \
+ if (h_tail == NULL) \
+ (gptr)->domain_list = h; \
+ else \
+ h_tail->next = h; \
+ } \
+}
+
+#define NEW_STRING_HANDLE_W(gptr,h,h_tail,name) { \
+ h = (__itt_string_handle*)malloc(sizeof(__itt_string_handle)); \
+ if (h != NULL) { \
+ h->strA = NULL; \
+ h->strW = name ? _wcsdup(name) : NULL; \
+ h->extra1 = 0; /* reserved */ \
+ h->extra2 = NULL; /* reserved */ \
+ h->next = NULL; \
+ if (h_tail == NULL) \
+ (gptr)->string_list = h; \
+ else \
+ h_tail->next = h; \
+ } \
+}
+
+#define NEW_STRING_HANDLE_A(gptr,h,h_tail,name) { \
+ h = (__itt_string_handle*)malloc(sizeof(__itt_string_handle)); \
+ if (h != NULL) { \
+ h->strA = name ? __itt_fstrdup(name) : NULL; \
+ h->strW = NULL; \
+ h->extra1 = 0; /* reserved */ \
+ h->extra2 = NULL; /* reserved */ \
+ h->next = NULL; \
+ if (h_tail == NULL) \
+ (gptr)->string_list = h; \
+ else \
+ h_tail->next = h; \
+ } \
+}
+
+#endif /* _ITTNOTIFY_CONFIG_H_ */
diff --git a/lib/ExecutionEngine/IntelJITEvents/ittnotify_types.h b/lib/ExecutionEngine/IntelJITEvents/ittnotify_types.h
new file mode 100644
index 0000000000..5d502ba8e8
--- /dev/null
+++ b/lib/ExecutionEngine/IntelJITEvents/ittnotify_types.h
@@ -0,0 +1,63 @@
+//===-- ittnotify_types.h - Intel(R) Performance Analyzer JIT (Just-In-Time) Profiling API internal types. ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#ifndef _ITTNOTIFY_TYPES_H_
+#define _ITTNOTIFY_TYPES_H_
+
+typedef enum ___itt_group_id
+{
+ __itt_group_none = 0,
+ __itt_group_legacy = 1<<0,
+ __itt_group_control = 1<<1,
+ __itt_group_thread = 1<<2,
+ __itt_group_mark = 1<<3,
+ __itt_group_sync = 1<<4,
+ __itt_group_fsync = 1<<5,
+ __itt_group_jit = 1<<6,
+ __itt_group_model = 1<<7,
+ __itt_group_splitter_min = 1<<7,
+ __itt_group_counter = 1<<8,
+ __itt_group_frame = 1<<9,
+ __itt_group_stitch = 1<<10,
+ __itt_group_heap = 1<<11,
+ __itt_group_splitter_max = 1<<12,
+ __itt_group_structure = 1<<12,
+ __itt_group_suppress = 1<<13,
+ __itt_group_all = -1
+} __itt_group_id;
+
+#pragma pack(push, 8)
+
+typedef struct ___itt_group_list
+{
+ __itt_group_id id;
+ const char* name;
+} __itt_group_list;
+
+#pragma pack(pop)
+
+#define ITT_GROUP_LIST(varname) \
+ static __itt_group_list varname[] = { \
+ { __itt_group_all, "all" }, \
+ { __itt_group_control, "control" }, \
+ { __itt_group_thread, "thread" }, \
+ { __itt_group_mark, "mark" }, \
+ { __itt_group_sync, "sync" }, \
+ { __itt_group_fsync, "fsync" }, \
+ { __itt_group_jit, "jit" }, \
+ { __itt_group_model, "model" }, \
+ { __itt_group_counter, "counter" }, \
+ { __itt_group_frame, "frame" }, \
+ { __itt_group_stitch, "stitch" }, \
+ { __itt_group_heap, "heap" }, \
+ { __itt_group_structure, "structure" }, \
+ { __itt_group_suppress, "suppress" }, \
+ { __itt_group_none, NULL } \
+ }
+
+#endif /* _ITTNOTIFY_TYPES_H_ */
diff --git a/lib/ExecutionEngine/IntelJITEvents/jitprofiling.c b/lib/ExecutionEngine/IntelJITEvents/jitprofiling.c
new file mode 100644
index 0000000000..9b0dafbdca
--- /dev/null
+++ b/lib/ExecutionEngine/IntelJITEvents/jitprofiling.c
@@ -0,0 +1,476 @@
+/*===-- jitprofiling.c - JIT (Just-In-Time) Profiling API----------*- C -*-===*
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===*
+ *
+ * This file provides Intel(R) Performance Analyzer JIT (Just-In-Time)
+ * Profiling API implementation.
+ *
+ *===----------------------------------------------------------------------===*/
+#include "ittnotify_config.h"
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#include <windows.h>
+#pragma optimize("", off)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#include <pthread.h>
+#include <dlfcn.h>
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#include <malloc.h>
+#include <stdlib.h>
+
+#include "jitprofiling.h"
+
+static const char rcsid[] = "\n@(#) $Revision: 243501 $\n";
+
+#define DLL_ENVIRONMENT_VAR "VS_PROFILER"
+
+#ifndef NEW_DLL_ENVIRONMENT_VAR
+#if ITT_ARCH==ITT_ARCH_IA32
+#define NEW_DLL_ENVIRONMENT_VAR "INTEL_JIT_PROFILER32"
+#else
+#define NEW_DLL_ENVIRONMENT_VAR "INTEL_JIT_PROFILER64"
+#endif
+#endif /* NEW_DLL_ENVIRONMENT_VAR */
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define DEFAULT_DLLNAME "JitPI.dll"
+HINSTANCE m_libHandle = NULL;
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define DEFAULT_DLLNAME "libJitPI.so"
+void* m_libHandle = NULL;
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/* default location of JIT profiling agent on Android */
+#define ANDROID_JIT_AGENT_PATH "/data/intel/libittnotify.so"
+
+/* the function pointers */
+typedef unsigned int(*TPInitialize)(void);
+static TPInitialize FUNC_Initialize=NULL;
+
+typedef unsigned int(*TPNotify)(unsigned int, void*);
+static TPNotify FUNC_NotifyEvent=NULL;
+
+static iJIT_IsProfilingActiveFlags executionMode = iJIT_NOTHING_RUNNING;
+
+/* end collector dll part. */
+
+/* loadiJIT_Funcs() : this function is called just in the beginning
+ * and is responsible to load the functions from BistroJavaCollector.dll
+ * result:
+ * on success: the functions loads, iJIT_DLL_is_missing=0, return value = 1
+ * on failure: the functions are NULL, iJIT_DLL_is_missing=1, return value = 0
+ */
+static int loadiJIT_Funcs(void);
+
+/* global representing whether the BistroJavaCollector can't be loaded */
+static int iJIT_DLL_is_missing = 0;
+
+/* Virtual stack - the struct is used as a virtual stack for each thread.
+ * Every thread initializes with a stack of size INIT_TOP_STACK.
+ * Every method entry decreases from the current stack point,
+ * and when a thread stack reaches its top of stack (return from the global
+ * function), the top of stack and the current stack increase. Notice that
+ * when returning from a function the stack pointer is the address of
+ * the function return.
+*/
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+static DWORD threadLocalStorageHandle = 0;
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+static pthread_key_t threadLocalStorageHandle = (pthread_key_t)0;
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+#define INIT_TOP_Stack 10000
+
+typedef struct
+{
+ unsigned int TopStack;
+ unsigned int CurrentStack;
+} ThreadStack, *pThreadStack;
+
+/* end of virtual stack. */
+
+/*
+ * The function for reporting virtual-machine related events to VTune.
+ * Note: when reporting iJVM_EVENT_TYPE_ENTER_NIDS, there is no need to fill
+ * in the stack_id field in the iJIT_Method_NIDS structure, as VTune fills it.
+ * The return value in iJVM_EVENT_TYPE_ENTER_NIDS &&
+ * iJVM_EVENT_TYPE_LEAVE_NIDS events will be 0 in case of failure.
+ * in iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED event
+ * it will be -1 if EventSpecificData == 0 otherwise it will be 0.
+*/
+
+ITT_EXTERN_C int JITAPI
+iJIT_NotifyEvent(iJIT_JVM_EVENT event_type, void *EventSpecificData)
+{
+ int ReturnValue;
+
+ /*
+ * This section is for debugging outside of VTune.
+ * It creates the environment variables that indicates call graph mode.
+ * If running outside of VTune remove the remark.
+ *
+ *
+ * static int firstTime = 1;
+ * char DoCallGraph[12] = "DoCallGraph";
+ * if (firstTime)
+ * {
+ * firstTime = 0;
+ * SetEnvironmentVariable( "BISTRO_COLLECTORS_DO_CALLGRAPH", DoCallGraph);
+ * }
+ *
+ * end of section.
+ */
+
+ /* initialization part - the functions have not been loaded yet. This part
+ * will load the functions, and check if we are in Call Graph mode.
+ * (for special treatment).
+ */
+ if (!FUNC_NotifyEvent)
+ {
+ if (iJIT_DLL_is_missing)
+ return 0;
+
+ /* load the Function from the DLL */
+ if (!loadiJIT_Funcs())
+ return 0;
+
+ /* Call Graph initialization. */
+ }
+
+ /* If the event is method entry/exit, check that in the current mode
+ * VTune is allowed to receive it
+ */
+ if ((event_type == iJVM_EVENT_TYPE_ENTER_NIDS ||
+ event_type == iJVM_EVENT_TYPE_LEAVE_NIDS) &&
+ (executionMode != iJIT_CALLGRAPH_ON))
+ {
+ return 0;
+ }
+ /* This section is performed when method enter event occurs.
+ * It updates the virtual stack, or creates it if this is the first
+ * method entry in the thread. The stack pointer is decreased.
+ */
+ if (event_type == iJVM_EVENT_TYPE_ENTER_NIDS)
+ {
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ pThreadStack threadStack =
+ (pThreadStack)TlsGetValue (threadLocalStorageHandle);
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ pThreadStack threadStack =
+ (pThreadStack)pthread_getspecific(threadLocalStorageHandle);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+ /* check for use of reserved method IDs */
+ if ( ((piJIT_Method_NIDS) EventSpecificData)->method_id <= 999 )
+ return 0;
+
+ if (!threadStack)
+ {
+ /* initialize the stack. */
+ threadStack = (pThreadStack) calloc (sizeof(ThreadStack), 1);
+ threadStack->TopStack = INIT_TOP_Stack;
+ threadStack->CurrentStack = INIT_TOP_Stack;
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ TlsSetValue(threadLocalStorageHandle,(void*)threadStack);
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ pthread_setspecific(threadLocalStorageHandle,(void*)threadStack);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ }
+
+ /* decrease the stack. */
+ ((piJIT_Method_NIDS) EventSpecificData)->stack_id =
+ (threadStack->CurrentStack)--;
+ }
+
+ /* This section is performed when method leave event occurs
+ * It updates the virtual stack.
+ * Increases the stack pointer.
+ * If the stack pointer reached the top (left the global function)
+ * increase the pointer and the top pointer.
+ */
+ if (event_type == iJVM_EVENT_TYPE_LEAVE_NIDS)
+ {
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ pThreadStack threadStack =
+ (pThreadStack)TlsGetValue (threadLocalStorageHandle);
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ pThreadStack threadStack =
+ (pThreadStack)pthread_getspecific(threadLocalStorageHandle);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+ /* check for use of reserved method IDs */
+ if ( ((piJIT_Method_NIDS) EventSpecificData)->method_id <= 999 )
+ return 0;
+
+ if (!threadStack)
+ {
+ /* Error: first report in this thread is method exit */
+ exit (1);
+ }
+
+ ((piJIT_Method_NIDS) EventSpecificData)->stack_id =
+ ++(threadStack->CurrentStack) + 1;
+
+ if (((piJIT_Method_NIDS) EventSpecificData)->stack_id
+ > threadStack->TopStack)
+ ((piJIT_Method_NIDS) EventSpecificData)->stack_id =
+ (unsigned int)-1;
+ }
+
+ if (event_type == iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED)
+ {
+ /* check for use of reserved method IDs */
+ if ( ((piJIT_Method_Load) EventSpecificData)->method_id <= 999 )
+ return 0;
+ }
+
+ ReturnValue = (int)FUNC_NotifyEvent(event_type, EventSpecificData);
+
+ return ReturnValue;
+}
+
+/* The new mode call back routine */
+ITT_EXTERN_C void JITAPI
+iJIT_RegisterCallbackEx(void *userdata, iJIT_ModeChangedEx
+ NewModeCallBackFuncEx)
+{
+ /* is it already missing... or the load of functions from the DLL failed */
+ if (iJIT_DLL_is_missing || !loadiJIT_Funcs())
+ {
+ /* then do not bother with notifications */
+ NewModeCallBackFuncEx(userdata, iJIT_NO_NOTIFICATIONS);
+ /* Error: could not load JIT functions. */
+ return;
+ }
+ /* nothing to do with the callback */
+}
+
+/*
+ * This function allows the user to query in which mode, if at all,
+ *VTune is running
+ */
+ITT_EXTERN_C iJIT_IsProfilingActiveFlags JITAPI iJIT_IsProfilingActive()
+{
+ if (!iJIT_DLL_is_missing)
+ {
+ loadiJIT_Funcs();
+ }
+
+ return executionMode;
+}
+
+/* this function loads the collector dll (BistroJavaCollector)
+ * and the relevant functions.
+ * on success: all functions load, iJIT_DLL_is_missing = 0, return value = 1
+ * on failure: all functions are NULL, iJIT_DLL_is_missing = 1, return value = 0
+ */
+static int loadiJIT_Funcs()
+{
+ static int bDllWasLoaded = 0;
+ char *dllName = (char*)rcsid; /* !! Just to avoid unused code elimination */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ DWORD dNameLength = 0;
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+ if(bDllWasLoaded)
+ {
+ /* dll was already loaded, no need to do it for the second time */
+ return 1;
+ }
+
+ /* Assumes that the DLL will not be found */
+ iJIT_DLL_is_missing = 1;
+ FUNC_NotifyEvent = NULL;
+
+ if (m_libHandle)
+ {
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ FreeLibrary(m_libHandle);
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ dlclose(m_libHandle);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ m_libHandle = NULL;
+ }
+
+ /* Try to get the dll name from the environment */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ dNameLength = GetEnvironmentVariableA(NEW_DLL_ENVIRONMENT_VAR, NULL, 0);
+ if (dNameLength)
+ {
+ DWORD envret = 0;
+ dllName = (char*)malloc(sizeof(char) * (dNameLength + 1));
+ envret = GetEnvironmentVariableA(NEW_DLL_ENVIRONMENT_VAR,
+ dllName, dNameLength);
+ if (envret)
+ {
+ /* Try to load the dll from the PATH... */
+ m_libHandle = LoadLibraryExA(dllName,
+ NULL, LOAD_WITH_ALTERED_SEARCH_PATH);
+ }
+ free(dllName);
+ } else {
+ /* Try to use old VS_PROFILER variable */
+ dNameLength = GetEnvironmentVariableA(DLL_ENVIRONMENT_VAR, NULL, 0);
+ if (dNameLength)
+ {
+ DWORD envret = 0;
+ dllName = (char*)malloc(sizeof(char) * (dNameLength + 1));
+ envret = GetEnvironmentVariableA(DLL_ENVIRONMENT_VAR,
+ dllName, dNameLength);
+ if (envret)
+ {
+ /* Try to load the dll from the PATH... */
+ m_libHandle = LoadLibraryA(dllName);
+ }
+ free(dllName);
+ }
+ }
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ dllName = getenv(NEW_DLL_ENVIRONMENT_VAR);
+ if (!dllName)
+ dllName = getenv(DLL_ENVIRONMENT_VAR);
+#ifdef ANDROID
+ if (!dllName)
+ dllName = ANDROID_JIT_AGENT_PATH;
+#endif
+ if (dllName)
+ {
+ /* Try to load the dll from the PATH... */
+ m_libHandle = dlopen(dllName, RTLD_LAZY);
+ }
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+ if (!m_libHandle)
+ {
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ m_libHandle = LoadLibraryA(DEFAULT_DLLNAME);
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ m_libHandle = dlopen(DEFAULT_DLLNAME, RTLD_LAZY);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ }
+
+ /* if the dll wasn't loaded - exit. */
+ if (!m_libHandle)
+ {
+ iJIT_DLL_is_missing = 1; /* don't try to initialize
+ * JIT agent the second time
+ */
+ return 0;
+ }
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ FUNC_NotifyEvent = (TPNotify)GetProcAddress(m_libHandle, "NotifyEvent");
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ FUNC_NotifyEvent = (TPNotify)dlsym(m_libHandle, "NotifyEvent");
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ if (!FUNC_NotifyEvent)
+ {
+ FUNC_Initialize = NULL;
+ return 0;
+ }
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ FUNC_Initialize = (TPInitialize)GetProcAddress(m_libHandle, "Initialize");
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ FUNC_Initialize = (TPInitialize)dlsym(m_libHandle, "Initialize");
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ if (!FUNC_Initialize)
+ {
+ FUNC_NotifyEvent = NULL;
+ return 0;
+ }
+
+ executionMode = (iJIT_IsProfilingActiveFlags)FUNC_Initialize();
+
+ bDllWasLoaded = 1;
+ iJIT_DLL_is_missing = 0; /* DLL is ok. */
+
+ /*
+ * Call Graph mode: init the thread local storage
+ * (need to store the virtual stack there).
+ */
+ if ( executionMode == iJIT_CALLGRAPH_ON )
+ {
+ /* Allocate a thread local storage slot for the thread "stack" */
+ if (!threadLocalStorageHandle)
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ threadLocalStorageHandle = TlsAlloc();
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ pthread_key_create(&threadLocalStorageHandle, NULL);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ }
+
+ return 1;
+}
+
+/*
+ * This function should be called by the user whenever a thread ends,
+ * to free the thread "virtual stack" storage
+ */
+ITT_EXTERN_C void JITAPI FinalizeThread()
+{
+ if (threadLocalStorageHandle)
+ {
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ pThreadStack threadStack =
+ (pThreadStack)TlsGetValue (threadLocalStorageHandle);
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ pThreadStack threadStack =
+ (pThreadStack)pthread_getspecific(threadLocalStorageHandle);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ if (threadStack)
+ {
+ free (threadStack);
+ threadStack = NULL;
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ TlsSetValue (threadLocalStorageHandle, threadStack);
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ pthread_setspecific(threadLocalStorageHandle, threadStack);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ }
+ }
+}
+
+/*
+ * This function should be called by the user when the process ends,
+ * to free the local storage index
+*/
+ITT_EXTERN_C void JITAPI FinalizeProcess()
+{
+ if (m_libHandle)
+ {
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ FreeLibrary(m_libHandle);
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ dlclose(m_libHandle);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ m_libHandle = NULL;
+ }
+
+ if (threadLocalStorageHandle)
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ TlsFree (threadLocalStorageHandle);
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ pthread_key_delete(threadLocalStorageHandle);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+}
+
+/*
+ * This function should be called by the user for any method once.
+ * The function will return a unique method ID, the user should maintain
+ * the ID for each method
+ */
+ITT_EXTERN_C unsigned int JITAPI iJIT_GetNewMethodID()
+{
+ static unsigned int methodID = 0x100000;
+
+ if (methodID == 0)
+ return 0; /* ERROR : this is not a valid value */
+
+ return methodID++;
+}
diff --git a/lib/ExecutionEngine/IntelJITEvents/jitprofiling.h b/lib/ExecutionEngine/IntelJITEvents/jitprofiling.h
new file mode 100644
index 0000000000..f33fb83ba9
--- /dev/null
+++ b/lib/ExecutionEngine/IntelJITEvents/jitprofiling.h
@@ -0,0 +1,254 @@
+/*===-- jitprofiling.h - JIT Profiling API-------------------------*- C -*-===*
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===*
+ *
+ * This file provides Intel(R) Performance Analyzer JIT (Just-In-Time)
+ * Profiling API declaration.
+ *
+ *===----------------------------------------------------------------------===*/
+#ifndef __JITPROFILING_H__
+#define __JITPROFILING_H__
+
+/*
+ * Various constants used by functions
+ */
+
+/* event notification */
+typedef enum iJIT_jvm_event
+{
+
+ /* shutdown */
+
+ /*
+ * Program exiting EventSpecificData NA
+ */
+ iJVM_EVENT_TYPE_SHUTDOWN = 2,
+
+ /* JIT profiling */
+
+ /*
+ * issued after method code jitted into memory but before code is executed
+ * EventSpecificData is an iJIT_Method_Load
+ */
+ iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED=13,
+
+ /* issued before unload. Method code will no longer be executed, but code
+ * and info are still in memory. The VTune profiler may capture method
+ * code only at this point EventSpecificData is iJIT_Method_Id
+ */
+ iJVM_EVENT_TYPE_METHOD_UNLOAD_START,
+
+ /* Method Profiling */
+
+ /* method name, Id and stack is supplied
+ * issued when a method is about to be entered EventSpecificData is
+ * iJIT_Method_NIDS
+ */
+ iJVM_EVENT_TYPE_ENTER_NIDS = 19,
+
+ /* method name, Id and stack is supplied
+ * issued when a method is about to be left EventSpecificData is
+ * iJIT_Method_NIDS
+ */
+ iJVM_EVENT_TYPE_LEAVE_NIDS
+} iJIT_JVM_EVENT;
+
+typedef enum _iJIT_ModeFlags
+{
+ /* No need to Notify VTune, since VTune is not running */
+ iJIT_NO_NOTIFICATIONS = 0x0000,
+
+ /* when turned on the jit must call
+ * iJIT_NotifyEvent
+ * (
+ * iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED,
+ * )
+ * for all the method already jitted
+ */
+ iJIT_BE_NOTIFY_ON_LOAD = 0x0001,
+
+ /* when turned on the jit must call
+ * iJIT_NotifyEvent
+ * (
+ * iJVM_EVENT_TYPE_METHOD_UNLOAD_FINISHED,
+ * ) for all the method that are unloaded
+ */
+ iJIT_BE_NOTIFY_ON_UNLOAD = 0x0002,
+
+ /* when turned on the jit must instrument all
+ * the currently jited code with calls on
+ * method entries
+ */
+ iJIT_BE_NOTIFY_ON_METHOD_ENTRY = 0x0004,
+
+ /* when turned on the jit must instrument all
+ * the currently jited code with calls
+ * on method exit
+ */
+ iJIT_BE_NOTIFY_ON_METHOD_EXIT = 0x0008
+
+} iJIT_ModeFlags;
+
+
+ /* Flags used by iJIT_IsProfilingActive() */
+typedef enum _iJIT_IsProfilingActiveFlags
+{
+ /* No profiler is running. Currently not used */
+ iJIT_NOTHING_RUNNING = 0x0000,
+
+ /* Sampling is running. This is the default value
+ * returned by iJIT_IsProfilingActive()
+ */
+ iJIT_SAMPLING_ON = 0x0001,
+
+ /* Call Graph is running */
+ iJIT_CALLGRAPH_ON = 0x0002
+
+} iJIT_IsProfilingActiveFlags;
+
+/* Enumerator for the environment of methods*/
+typedef enum _iJDEnvironmentType
+{
+ iJDE_JittingAPI = 2
+} iJDEnvironmentType;
+
+/**********************************
+ * Data structures for the events *
+ **********************************/
+
+/* structure for the events:
+ * iJVM_EVENT_TYPE_METHOD_UNLOAD_START
+ */
+
+typedef struct _iJIT_Method_Id
+{
+ /* Id of the method (same as the one passed in
+ * the iJIT_Method_Load struct
+ */
+ unsigned int method_id;
+
+} *piJIT_Method_Id, iJIT_Method_Id;
+
+
+/* structure for the events:
+ * iJVM_EVENT_TYPE_ENTER_NIDS,
+ * iJVM_EVENT_TYPE_LEAVE_NIDS,
+ * iJVM_EVENT_TYPE_EXCEPTION_OCCURRED_NIDS
+ */
+
+typedef struct _iJIT_Method_NIDS
+{
+ /* unique method ID */
+ unsigned int method_id;
+
+ /* NOTE: no need to fill this field, it's filled by VTune */
+ unsigned int stack_id;
+
+ /* method name (just the method, without the class) */
+ char* method_name;
+} *piJIT_Method_NIDS, iJIT_Method_NIDS;
+
+/* structures for the events:
+ * iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED
+ */
+
+typedef struct _LineNumberInfo
+{
+ /* x86 Offset from the begining of the method*/
+ unsigned int Offset;
+
+ /* source line number from the begining of the source file */
+ unsigned int LineNumber;
+
+} *pLineNumberInfo, LineNumberInfo;
+
+typedef struct _iJIT_Method_Load
+{
+ /* unique method ID - can be any unique value, (except 0 - 999) */
+ unsigned int method_id;
+
+ /* method name (can be with or without the class and signature, in any case
+ * the class name will be added to it)
+ */
+ char* method_name;
+
+ /* virtual address of that method - This determines the method range for the
+ * iJVM_EVENT_TYPE_ENTER/LEAVE_METHOD_ADDR events
+ */
+ void* method_load_address;
+
+ /* Size in memory - Must be exact */
+ unsigned int method_size;
+
+ /* Line Table size in number of entries - Zero if none */
+ unsigned int line_number_size;
+
+ /* Pointer to the begining of the line numbers info array */
+ pLineNumberInfo line_number_table;
+
+ /* unique class ID */
+ unsigned int class_id;
+
+ /* class file name */
+ char* class_file_name;
+
+ /* source file name */
+ char* source_file_name;
+
+ /* bits supplied by the user for saving in the JIT file */
+ void* user_data;
+
+ /* the size of the user data buffer */
+ unsigned int user_data_size;
+
+ /* NOTE: no need to fill this field, it's filled by VTune */
+ iJDEnvironmentType env;
+
+} *piJIT_Method_Load, iJIT_Method_Load;
+
+/* API Functions */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef CDECL
+# if defined WIN32 || defined _WIN32
+# define CDECL __cdecl
+# else /* defined WIN32 || defined _WIN32 */
+# if defined _M_X64 || defined _M_AMD64 || defined __x86_64__
+# define CDECL /* not actual on x86_64 platform */
+# else /* _M_X64 || _M_AMD64 || __x86_64__ */
+# define CDECL __attribute__ ((cdecl))
+# endif /* _M_X64 || _M_AMD64 || __x86_64__ */
+# endif /* defined WIN32 || defined _WIN32 */
+#endif /* CDECL */
+
+#define JITAPI CDECL
+
+/* called when the settings are changed with new settings */
+typedef void (*iJIT_ModeChangedEx)(void *UserData, iJIT_ModeFlags Flags);
+
+int JITAPI iJIT_NotifyEvent(iJIT_JVM_EVENT event_type, void *EventSpecificData);
+
+/* The new mode call back routine */
+void JITAPI iJIT_RegisterCallbackEx(void *userdata,
+ iJIT_ModeChangedEx NewModeCallBackFuncEx);
+
+iJIT_IsProfilingActiveFlags JITAPI iJIT_IsProfilingActive(void);
+
+void JITAPI FinalizeThread(void);
+
+void JITAPI FinalizeProcess(void);
+
+unsigned int JITAPI iJIT_GetNewMethodID(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __JITPROFILING_H__ */
diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp
index 8fed48cef2..ffa79761f2 100644
--- a/lib/MC/MCExpr.cpp
+++ b/lib/MC/MCExpr.cpp
@@ -267,7 +267,7 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) {
/* *** */
-void MCTargetExpr::Anchor() {}
+void MCTargetExpr::anchor() {}
/* *** */
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index f143e6d0ad..d07a3c9e7f 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -1775,7 +1775,7 @@ APFloat::opStatus APFloat::roundToIntegral(roundingMode rounding_mode) {
// If the exponent is large enough, we know that this value is already
// integral, and the arithmetic below would potentially cause it to saturate
// to +/-Inf. Bail out early instead.
- if (exponent+1 >= (int)semanticsPrecision(*semantics))
+ if (category == fcNormal && exponent+1 >= (int)semanticsPrecision(*semantics))
return opOK;
// The algorithm here is quite simple: we add 2^(p-1), where p is the
diff --git a/lib/Support/Errno.cpp b/lib/Support/Errno.cpp
index dd218f6099..00be43b750 100644
--- a/lib/Support/Errno.cpp
+++ b/lib/Support/Errno.cpp
@@ -13,6 +13,7 @@
#include "llvm/Support/Errno.h"
#include "llvm/Config/config.h" // Get autoconf configuration settings
+#include "llvm/Support/raw_ostream.h"
#if HAVE_STRING_H
#include <string.h>
@@ -39,7 +40,7 @@ std::string StrError(int errnum) {
const int MaxErrStrLen = 2000;
char buffer[MaxErrStrLen];
buffer[0] = '\0';
- char* str = buffer;
+ std::string str;
#ifdef HAVE_STRERROR_R
// strerror_r is thread-safe.
if (errnum)
@@ -49,6 +50,7 @@ std::string StrError(int errnum) {
str = strerror_r(errnum,buffer,MaxErrStrLen-1);
# else
strerror_r(errnum,buffer,MaxErrStrLen-1);
+ str = buffer;
# endif
#elif HAVE_DECL_STRERROR_S // "Windows Secure API"
if (errnum)
@@ -58,12 +60,13 @@ std::string StrError(int errnum) {
// the buffer as fast as possible to minimize impact
// of collision of strerror in multiple threads.
if (errnum)
- strncpy(buffer,strerror(errnum),MaxErrStrLen-1);
- buffer[MaxErrStrLen-1] = '\0';
+ str = strerror(errnum);
#else
// Strange that this system doesn't even have strerror
// but, oh well, just use a generic message
- sprintf(buffer, "Error #%d", errnum);
+ raw_string_ostream stream(str);
+ stream << "Error #" << errnum;
+ stream.flush();
#endif
return str;
}
diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp
index a13b9e2f87..9ee3f2db92 100644
--- a/lib/Support/Host.cpp
+++ b/lib/Support/Host.cpp
@@ -234,6 +234,8 @@ std::string sys::getHostCPUName() {
case 37: // Intel Core i7, laptop version.
case 44: // Intel Core i7 processor and Intel Xeon processor. All
// processors are manufactured using the 32 nm process.
+ case 46: // Nehalem EX
+ case 47: // Westmere EX
return "corei7";
// SandyBridge:
diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc
index f70e60d3f5..b82371a7b6 100644
--- a/lib/Support/Unix/Path.inc
+++ b/lib/Support/Unix/Path.inc
@@ -267,7 +267,8 @@ Path::GetCurrentDirectory() {
}
#if defined(__FreeBSD__) || defined (__NetBSD__) || defined(__Bitrig__) || \
- defined(__OpenBSD__) || defined(__minix) || defined(__FreeBSD_kernel__)
+ defined(__OpenBSD__) || defined(__minix) || defined(__FreeBSD_kernel__) || \
+ defined(__linux__) || defined(__CYGWIN__)
static int
test_dir(char buf[PATH_MAX], char ret[PATH_MAX],
const char *dir, const char *bin)
@@ -345,9 +346,17 @@ Path Path::GetMainExecutable(const char *argv0, void *MainAddr) {
return Path(exe_path);
#elif defined(__linux__) || defined(__CYGWIN__)
char exe_path[MAXPATHLEN];
- ssize_t len = readlink("/proc/self/exe", exe_path, sizeof(exe_path));
- if (len >= 0)
- return Path(StringRef(exe_path, len));
+ StringRef aPath("/proc/self/exe");
+ if (sys::fs::exists(aPath)) {
+ // /proc is not always mounted under Linux (chroot for example).
+ ssize_t len = readlink(aPath.str().c_str(), exe_path, sizeof(exe_path));
+ if (len >= 0)
+ return Path(StringRef(exe_path, len));
+ } else {
+ // Fall back to the classical detection.
+ if (getprogpath(exe_path, argv0) != NULL)
+ return Path(exe_path);
+ }
#elif defined(HAVE_DLFCN_H)
// Use dladdr to get executable path if available.
Dl_info DLInfo;
diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc
index e05e81acaf..6d874ea0d0 100644
--- a/lib/Support/Unix/Signals.inc
+++ b/lib/Support/Unix/Signals.inc
@@ -249,7 +249,7 @@ void llvm::sys::AddSignalHandler(void (*FnPtr)(void *), void *Cookie) {
// On glibc systems we have the 'backtrace' function, which works nicely, but
// doesn't demangle symbols.
static void PrintStackTrace(void *) {
-#if defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACE)
+#if defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACES)
static void* StackTrace[256];
// Use backtrace() to output a backtrace on Linux systems with glibc.
int depth = backtrace(StackTrace,
diff --git a/lib/Support/Windows/PathV2.inc b/lib/Support/Windows/PathV2.inc
index 696768ba9d..3dfac66b77 100644
--- a/lib/Support/Windows/PathV2.inc
+++ b/lib/Support/Windows/PathV2.inc
@@ -794,7 +794,7 @@ mapped_file_region::mapped_file_region(const Twine &path,
SmallVector<wchar_t, 128> path_utf16;
// Convert path to UTF-16.
- if (ec = UTF8ToUTF16(path.toStringRef(path_storage), path_utf16))
+ if ((ec = UTF8ToUTF16(path.toStringRef(path_storage), path_utf16)))
return;
// Get file handle for creating a file mapping.
diff --git a/lib/Support/YAMLParser.cpp b/lib/Support/YAMLParser.cpp
index 7c353c89bb..34df636a72 100644
--- a/lib/Support/YAMLParser.cpp
+++ b/lib/Support/YAMLParser.cpp
@@ -903,6 +903,7 @@ bool Scanner::consume(uint32_t Expected) {
void Scanner::skip(uint32_t Distance) {
Current += Distance;
Column += Distance;
+ assert(Current <= End && "Skipped past the end");
}
bool Scanner::isBlankOrBreak(StringRef::iterator Position) {
@@ -1239,6 +1240,12 @@ bool Scanner::scanFlowScalar(bool IsDoubleQuoted) {
}
}
}
+
+ if (Current == End) {
+ setError("Expected quote at end of scalar", Current);
+ return false;
+ }
+
skip(1); // Skip ending quote.
Token T;
T.Kind = Token::TK_Scalar;
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index 9a8cab8ecc..0ac92f1ee8 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -40,6 +40,7 @@ FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM,
FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
FunctionPass *createARMExpandPseudoPass();
+FunctionPass *createARMGlobalBaseRegPass();
FunctionPass *createARMGlobalMergePass(const TargetLowering* tli);
FunctionPass *createARMConstantIslandPass();
FunctionPass *createMLxExpansionPass();
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 38509a3400..00bf1b85ec 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -32,9 +32,6 @@ def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFPv2", "true",
def FeatureVFP3 : SubtargetFeature<"vfp3", "HasVFPv3", "true",
"Enable VFP3 instructions",
[FeatureVFP2]>;
-def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true",
- "Enable VFP4 instructions",
- [FeatureVFP3]>;
def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
"Enable NEON instructions",
[FeatureVFP3]>;
@@ -44,10 +41,16 @@ def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true",
"Does not support ARM mode execution">;
def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true",
"Enable half-precision floating point">;
+def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true",
+ "Enable VFP4 instructions",
+ [FeatureVFP3, FeatureFP16]>;
def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true",
"Restrict VFP3 to 16 double registers">;
def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true",
"Enable divide instructions">;
+def FeatureHWDivARM : SubtargetFeature<"hwdiv-arm",
+ "HasHardwareDivideInARM", "true",
+ "Enable divide instructions in ARM mode">;
def FeatureT2XtPk : SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true",
"Enable Thumb2 extract and pack instructions">;
def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true",
@@ -139,6 +142,13 @@ def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9",
[FeatureVMLxForwarding,
FeatureT2XtPk, FeatureFP16,
FeatureAvoidPartialCPSR]>;
+def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift",
+ "Swift ARM processors",
+ [FeatureNEONForFP, FeatureT2XtPk,
+ FeatureVFP4, FeatureMP, FeatureHWDiv,
+ FeatureHWDivARM, FeatureAvoidPartialCPSR,
+ FeatureHasSlowFPVMLx]>;
+
// FIXME: It has not been determined if A15 has these features.
def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15",
"Cortex-A15 ARM processors",
@@ -240,6 +250,12 @@ def : ProcNoItin<"cortex-m4", [HasV7Ops,
FeatureT2XtPk, FeatureVFP4,
FeatureVFPOnlySP, FeatureMClass]>;
+// Swift uArch Processors.
+def : ProcessorModel<"swift", SwiftModel,
+ [ProcSwift, HasV7Ops, FeatureNEON,
+ FeatureDB, FeatureDSPThumb2,
+ FeatureHasRAS]>;
+
//===----------------------------------------------------------------------===//
// Register File Description
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index c08294918e..42b6bc3cdc 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -49,6 +49,11 @@ static cl::opt<bool>
WidenVMOVS("widen-vmovs", cl::Hidden, cl::init(true),
cl::desc("Widen ARM vmovs to vmovd when possible"));
+static cl::opt<unsigned>
+SwiftPartialUpdateClearance("swift-partial-update-clearance",
+ cl::Hidden, cl::init(12),
+ cl::desc("Clearance before partial register updates"));
+
/// ARM_MLxEntry - Record information about MLA / MLS instructions.
struct ARM_MLxEntry {
uint16_t MLxOpc; // MLA / MLS opcode
@@ -1389,7 +1394,6 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
case ARM::VLDRD:
case ARM::VLDRS:
case ARM::t2LDRi8:
- case ARM::t2LDRDi8:
case ARM::t2LDRSHi8:
case ARM::t2LDRi12:
case ARM::t2LDRSHi12:
@@ -1528,6 +1532,14 @@ isProfitableToIfCvt(MachineBasicBlock &TMBB,
return (TCycles + FCycles + TExtra + FExtra) <= UnpredCost;
}
+bool
+ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
+ MachineBasicBlock &FMBB) const {
+ // Reduce false anti-dependencies to let Swift's out-of-order execution
+ // engine do its thing.
+ return Subtarget.isSwift();
+}
+
/// getInstrPredicate - If instruction is predicated, returns its predicate
/// condition, otherwise returns AL. It also returns the condition code
/// register by reference.
@@ -2344,6 +2356,229 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI,
return true;
}
+static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
+ const MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ default: {
+ const MCInstrDesc &Desc = MI->getDesc();
+ int UOps = ItinData->getNumMicroOps(Desc.getSchedClass());
+ assert(UOps >= 0 && "bad # UOps");
+ return UOps;
+ }
+
+ case ARM::LDRrs:
+ case ARM::LDRBrs:
+ case ARM::STRrs:
+ case ARM::STRBrs: {
+ unsigned ShOpVal = MI->getOperand(3).getImm();
+ bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (!isSub &&
+ (ShImm == 0 ||
+ ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
+ ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
+ return 1;
+ return 2;
+ }
+
+ case ARM::LDRH:
+ case ARM::STRH: {
+ if (!MI->getOperand(2).getReg())
+ return 1;
+
+ unsigned ShOpVal = MI->getOperand(3).getImm();
+ bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (!isSub &&
+ (ShImm == 0 ||
+ ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
+ ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
+ return 1;
+ return 2;
+ }
+
+ case ARM::LDRSB:
+ case ARM::LDRSH:
+ return (ARM_AM::getAM3Op(MI->getOperand(3).getImm()) == ARM_AM::sub) ? 3:2;
+
+ case ARM::LDRSB_POST:
+ case ARM::LDRSH_POST: {
+ unsigned Rt = MI->getOperand(0).getReg();
+ unsigned Rm = MI->getOperand(3).getReg();
+ return (Rt == Rm) ? 4 : 3;
+ }
+
+ case ARM::LDR_PRE_REG:
+ case ARM::LDRB_PRE_REG: {
+ unsigned Rt = MI->getOperand(0).getReg();
+ unsigned Rm = MI->getOperand(3).getReg();
+ if (Rt == Rm)
+ return 3;
+ unsigned ShOpVal = MI->getOperand(4).getImm();
+ bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (!isSub &&
+ (ShImm == 0 ||
+ ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
+ ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
+ return 2;
+ return 3;
+ }
+
+ case ARM::STR_PRE_REG:
+ case ARM::STRB_PRE_REG: {
+ unsigned ShOpVal = MI->getOperand(4).getImm();
+ bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (!isSub &&
+ (ShImm == 0 ||
+ ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
+ ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
+ return 2;
+ return 3;
+ }
+
+ case ARM::LDRH_PRE:
+ case ARM::STRH_PRE: {
+ unsigned Rt = MI->getOperand(0).getReg();
+ unsigned Rm = MI->getOperand(3).getReg();
+ if (!Rm)
+ return 2;
+ if (Rt == Rm)
+ return 3;
+ return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub)
+ ? 3 : 2;
+ }
+
+ case ARM::LDR_POST_REG:
+ case ARM::LDRB_POST_REG:
+ case ARM::LDRH_POST: {
+ unsigned Rt = MI->getOperand(0).getReg();
+ unsigned Rm = MI->getOperand(3).getReg();
+ return (Rt == Rm) ? 3 : 2;
+ }
+
+ case ARM::LDR_PRE_IMM:
+ case ARM::LDRB_PRE_IMM:
+ case ARM::LDR_POST_IMM:
+ case ARM::LDRB_POST_IMM:
+ case ARM::STRB_POST_IMM:
+ case ARM::STRB_POST_REG:
+ case ARM::STRB_PRE_IMM:
+ case ARM::STRH_POST:
+ case ARM::STR_POST_IMM:
+ case ARM::STR_POST_REG:
+ case ARM::STR_PRE_IMM:
+ return 2;
+
+ case ARM::LDRSB_PRE:
+ case ARM::LDRSH_PRE: {
+ unsigned Rm = MI->getOperand(3).getReg();
+ if (Rm == 0)
+ return 3;
+ unsigned Rt = MI->getOperand(0).getReg();
+ if (Rt == Rm)
+ return 4;
+ unsigned ShOpVal = MI->getOperand(4).getImm();
+ bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (!isSub &&
+ (ShImm == 0 ||
+ ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
+ ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
+ return 3;
+ return 4;
+ }
+
+ case ARM::LDRD: {
+ unsigned Rt = MI->getOperand(0).getReg();
+ unsigned Rn = MI->getOperand(2).getReg();
+ unsigned Rm = MI->getOperand(3).getReg();
+ if (Rm)
+ return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) ?4:3;
+ return (Rt == Rn) ? 3 : 2;
+ }
+
+ case ARM::STRD: {
+ unsigned Rm = MI->getOperand(3).getReg();
+ if (Rm)
+ return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) ?4:3;
+ return 2;
+ }
+
+ case ARM::LDRD_POST:
+ case ARM::t2LDRD_POST:
+ return 3;
+
+ case ARM::STRD_POST:
+ case ARM::t2STRD_POST:
+ return 4;
+
+ case ARM::LDRD_PRE: {
+ unsigned Rt = MI->getOperand(0).getReg();
+ unsigned Rn = MI->getOperand(3).getReg();
+ unsigned Rm = MI->getOperand(4).getReg();
+ if (Rm)
+ return (ARM_AM::getAM3Op(MI->getOperand(5).getImm()) == ARM_AM::sub) ?5:4;
+ return (Rt == Rn) ? 4 : 3;
+ }
+
+ case ARM::t2LDRD_PRE: {
+ unsigned Rt = MI->getOperand(0).getReg();
+ unsigned Rn = MI->getOperand(3).getReg();
+ return (Rt == Rn) ? 4 : 3;
+ }
+
+ case ARM::STRD_PRE: {
+ unsigned Rm = MI->getOperand(4).getReg();
+ if (Rm)
+ return (ARM_AM::getAM3Op(MI->getOperand(5).getImm()) == ARM_AM::sub) ?5:4;
+ return 3;
+ }
+
+ case ARM::t2STRD_PRE:
+ return 3;
+
+ case ARM::t2LDR_POST:
+ case ARM::t2LDRB_POST:
+ case ARM::t2LDRB_PRE:
+ case ARM::t2LDRSBi12:
+ case ARM::t2LDRSBi8:
+ case ARM::t2LDRSBpci:
+ case ARM::t2LDRSBs:
+ case ARM::t2LDRH_POST:
+ case ARM::t2LDRH_PRE:
+ case ARM::t2LDRSBT:
+ case ARM::t2LDRSB_POST:
+ case ARM::t2LDRSB_PRE:
+ case ARM::t2LDRSH_POST:
+ case ARM::t2LDRSH_PRE:
+ case ARM::t2LDRSHi12:
+ case ARM::t2LDRSHi8:
+ case ARM::t2LDRSHpci:
+ case ARM::t2LDRSHs:
+ return 2;
+
+ case ARM::t2LDRDi8: {
+ unsigned Rt = MI->getOperand(0).getReg();
+ unsigned Rn = MI->getOperand(2).getReg();
+ return (Rt == Rn) ? 3 : 2;
+ }
+
+ case ARM::t2STRB_POST:
+ case ARM::t2STRB_PRE:
+ case ARM::t2STRBs:
+ case ARM::t2STRDi8:
+ case ARM::t2STRH_POST:
+ case ARM::t2STRH_PRE:
+ case ARM::t2STRHs:
+ case ARM::t2STR_POST:
+ case ARM::t2STR_PRE:
+ case ARM::t2STRs:
+ return 2;
+ }
+}
+
// Return the number of 32-bit words loaded by LDM or stored by STM. If this
// can't be easily determined return 0 (missing MachineMemOperand).
//
@@ -2384,8 +2619,12 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
const MCInstrDesc &Desc = MI->getDesc();
unsigned Class = Desc.getSchedClass();
int ItinUOps = ItinData->getNumMicroOps(Class);
- if (ItinUOps >= 0)
+ if (ItinUOps >= 0) {
+ if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore()))
+ return getNumMicroOpsSwiftLdSt(ItinData, MI);
+
return ItinUOps;
+ }
unsigned Opc = MI->getOpcode();
switch (Opc) {
@@ -2454,7 +2693,43 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
case ARM::t2STMIA_UPD:
case ARM::t2STMDB_UPD: {
unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1;
- if (Subtarget.isCortexA8()) {
+ if (Subtarget.isSwift()) {
+ // rdar://8402126
+ int UOps = 1 + NumRegs; // One for address computation, one for each ld / st.
+ switch (Opc) {
+ default: break;
+ case ARM::VLDMDIA_UPD:
+ case ARM::VLDMDDB_UPD:
+ case ARM::VLDMSIA_UPD:
+ case ARM::VLDMSDB_UPD:
+ case ARM::VSTMDIA_UPD:
+ case ARM::VSTMDDB_UPD:
+ case ARM::VSTMSIA_UPD:
+ case ARM::VSTMSDB_UPD:
+ case ARM::LDMIA_UPD:
+ case ARM::LDMDA_UPD:
+ case ARM::LDMDB_UPD:
+ case ARM::LDMIB_UPD:
+ case ARM::STMIA_UPD:
+ case ARM::STMDA_UPD:
+ case ARM::STMDB_UPD:
+ case ARM::STMIB_UPD:
+ case ARM::tLDMIA_UPD:
+ case ARM::tSTMIA_UPD:
+ case ARM::t2LDMIA_UPD:
+ case ARM::t2LDMDB_UPD:
+ case ARM::t2STMIA_UPD:
+ case ARM::t2STMDB_UPD:
+ ++UOps; // One for base register writeback.
+ break;
+ case ARM::LDMIA_RET:
+ case ARM::tPOP_RET:
+ case ARM::t2LDMIA_RET:
+ UOps += 2; // One for base reg wb, one for write to pc.
+ break;
+ }
+ return UOps;
+ } else if (Subtarget.isCortexA8()) {
if (NumRegs < 4)
return 2;
// 4 registers would be issued: 2, 2.
@@ -2463,7 +2738,7 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
if (NumRegs % 2)
++A8UOps;
return A8UOps;
- } else if (Subtarget.isLikeA9()) {
+ } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
int A9UOps = (NumRegs / 2);
// If there are odd number of registers or if it's not 64-bit aligned,
// then it takes an extra AGU (Address Generation Unit) cycle.
@@ -2496,7 +2771,7 @@ ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
DefCycle = RegNo / 2 + 1;
if (RegNo % 2)
++DefCycle;
- } else if (Subtarget.isLikeA9()) {
+ } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
DefCycle = RegNo;
bool isSLoad = false;
@@ -2540,7 +2815,7 @@ ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
DefCycle = 1;
// Result latency is issue cycle + 2: E2.
DefCycle += 2;
- } else if (Subtarget.isLikeA9()) {
+ } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
DefCycle = (RegNo / 2);
// If there are odd number of registers or if it's not 64-bit aligned,
// then it takes an extra AGU (Address Generation Unit) cycle.
@@ -2571,7 +2846,7 @@ ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
UseCycle = RegNo / 2 + 1;
if (RegNo % 2)
++UseCycle;
- } else if (Subtarget.isLikeA9()) {
+ } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
UseCycle = RegNo;
bool isSStore = false;
@@ -2612,7 +2887,7 @@ ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
UseCycle = 2;
// Read in E3.
UseCycle += 2;
- } else if (Subtarget.isLikeA9()) {
+ } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
UseCycle = (RegNo / 2);
// If there are odd number of registers or if it's not 64-bit aligned,
// then it takes an extra AGU (Address Generation Unit) cycle.
@@ -2822,6 +3097,37 @@ static int adjustDefLatency(const ARMSubtarget &Subtarget,
break;
}
}
+ } else if (Subtarget.isSwift()) {
+ // FIXME: Properly handle all of the latency adjustments for address
+ // writeback.
+ switch (DefMCID->getOpcode()) {
+ default: break;
+ case ARM::LDRrs:
+ case ARM::LDRBrs: {
+ unsigned ShOpVal = DefMI->getOperand(3).getImm();
+ bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (!isSub &&
+ (ShImm == 0 ||
+ ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
+ ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
+ Adjust -= 2;
+ else if (!isSub &&
+ ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
+ --Adjust;
+ break;
+ }
+ case ARM::t2LDRs:
+ case ARM::t2LDRBs:
+ case ARM::t2LDRHs:
+ case ARM::t2LDRSHs: {
+ // Thumb2 mode: lsl only.
+ unsigned ShAmt = DefMI->getOperand(3).getImm();
+ if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3)
+ Adjust -= 2;
+ break;
+ }
+ }
}
if (DefAlign < 8 && Subtarget.isLikeA9()) {
@@ -2998,7 +3304,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
// instructions).
if (Latency > 0 && Subtarget.isThumb2()) {
const MachineFunction *MF = DefMI->getParent()->getParent();
- if (MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+ if (MF->getFunction()->getFnAttributes().hasOptimizeForSizeAttr())
--Latency;
}
return Latency;
@@ -3048,7 +3354,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
if (!UseNode->isMachineOpcode()) {
int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
- if (Subtarget.isLikeA9())
+ if (Subtarget.isLikeA9() || Subtarget.isSwift())
return Latency <= 2 ? 1 : Latency - 1;
else
return Latency <= 3 ? 1 : Latency - 2;
@@ -3092,6 +3398,33 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
break;
}
}
+ } else if (DefIdx == 0 && Latency > 2 && Subtarget.isSwift()) {
+ // FIXME: Properly handle all of the latency adjustments for address
+ // writeback.
+ switch (DefMCID.getOpcode()) {
+ default: break;
+ case ARM::LDRrs:
+ case ARM::LDRBrs: {
+ unsigned ShOpVal =
+ cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (ShImm == 0 ||
+ ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
+ ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
+ Latency -= 2;
+ else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
+ --Latency;
+ break;
+ }
+ case ARM::t2LDRs:
+ case ARM::t2LDRBs:
+ case ARM::t2LDRHs:
+ case ARM::t2LDRSHs: {
+ // Thumb2 mode: lsl 0-3 only.
+ Latency -= 2;
+ break;
+ }
+ }
}
if (DefAlign < 8 && Subtarget.isLikeA9())
@@ -3660,6 +3993,122 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
}
+//===----------------------------------------------------------------------===//
+// Partial register updates
+//===----------------------------------------------------------------------===//
+//
+// Swift renames NEON registers with 64-bit granularity. That means any
+// instruction writing an S-reg implicitly reads the containing D-reg. The
+// problem is mostly avoided by translating f32 operations to v2f32 operations
+// on D-registers, but f32 loads are still a problem.
+//
+// These instructions can load an f32 into a NEON register:
+//
+// VLDRS - Only writes S, partial D update.
+// VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops.
+// VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops.
+//
+// FCONSTD can be used as a dependency-breaking instruction.
+
+
+unsigned ARMBaseInstrInfo::
+getPartialRegUpdateClearance(const MachineInstr *MI,
+ unsigned OpNum,
+ const TargetRegisterInfo *TRI) const {
+ // Only Swift has partial register update problems.
+ if (!SwiftPartialUpdateClearance || !Subtarget.isSwift())
+ return 0;
+
+ assert(TRI && "Need TRI instance");
+
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ if (MO.readsReg())
+ return 0;
+ unsigned Reg = MO.getReg();
+ int UseOp = -1;
+
+ switch(MI->getOpcode()) {
+ // Normal instructions writing only an S-register.
+ case ARM::VLDRS:
+ case ARM::FCONSTS:
+ case ARM::VMOVSR:
+ // rdar://problem/8791586
+ case ARM::VMOVv8i8:
+ case ARM::VMOVv4i16:
+ case ARM::VMOVv2i32:
+ case ARM::VMOVv2f32:
+ case ARM::VMOVv1i64:
+ UseOp = MI->findRegisterUseOperandIdx(Reg, false, TRI);
+ break;
+
+ // Explicitly reads the dependency.
+ case ARM::VLD1LNd32:
+ UseOp = 1;
+ break;
+ default:
+ return 0;
+ }
+
+ // If this instruction actually reads a value from Reg, there is no unwanted
+ // dependency.
+ if (UseOp != -1 && MI->getOperand(UseOp).readsReg())
+ return 0;
+
+ // We must be able to clobber the whole D-reg.
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ // Virtual register must be a foo:ssub_0<def,undef> operand.
+ if (!MO.getSubReg() || MI->readsVirtualRegister(Reg))
+ return 0;
+ } else if (ARM::SPRRegClass.contains(Reg)) {
+ // Physical register: MI must define the full D-reg.
+ unsigned DReg = TRI->getMatchingSuperReg(Reg, ARM::ssub_0,
+ &ARM::DPRRegClass);
+ if (!DReg || !MI->definesRegister(DReg, TRI))
+ return 0;
+ }
+
+ // MI has an unwanted D-register dependency.
+ // Avoid defs in the previous N instructrions.
+ return SwiftPartialUpdateClearance;
+}
+
+// Break a partial register dependency after getPartialRegUpdateClearance
+// returned non-zero.
+void ARMBaseInstrInfo::
+breakPartialRegDependency(MachineBasicBlock::iterator MI,
+ unsigned OpNum,
+ const TargetRegisterInfo *TRI) const {
+ assert(MI && OpNum < MI->getDesc().getNumDefs() && "OpNum is not a def");
+ assert(TRI && "Need TRI instance");
+
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ unsigned Reg = MO.getReg();
+ assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ "Can't break virtual register dependencies.");
+ unsigned DReg = Reg;
+
+ // If MI defines an S-reg, find the corresponding D super-register.
+ if (ARM::SPRRegClass.contains(Reg)) {
+ DReg = ARM::D0 + (Reg - ARM::S0) / 2;
+ assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken");
+ }
+
+ assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps");
+ assert(MI->definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg");
+
+ // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines
+ // the full D-register by loading the same value to both lanes. The
+ // instruction is micro-coded with 2 uops, so don't do this until we can
+ // properly schedule micro-coded instuctions. The dispatcher stalls cause
+ // too big regressions.
+
+ // Insert the dependency-breaking FCONSTD before MI.
+ // 96 is the encoding of 0.5, but the actual value doesn't matter here.
+ AddDefaultPred(BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ get(ARM::FCONSTD), DReg).addImm(96));
+ MI->addRegisterKilled(DReg, TRI, true);
+}
+
bool ARMBaseInstrInfo::hasNOP() const {
return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0;
}
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 304ccc087c..8f4f47b34f 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -182,10 +182,13 @@ public:
virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
unsigned NumCycles,
const BranchProbability
- &Probability) const {
+ &Probability) const {
return NumCycles == 1;
}
+ virtual bool isProfitableToUnpredicate(MachineBasicBlock &TMBB,
+ MachineBasicBlock &FMBB) const;
+
/// analyzeCompare - For a comparison instruction, return the source registers
/// in SrcReg and SrcReg2 if having two register operands, and the value it
/// compares against in CmpValue. Return true if the comparison instruction
@@ -235,6 +238,10 @@ public:
getExecutionDomain(const MachineInstr *MI) const;
void setExecutionDomain(MachineInstr *MI, unsigned Domain) const;
+ unsigned getPartialRegUpdateClearance(const MachineInstr*, unsigned,
+ const TargetRegisterInfo*) const;
+ void breakPartialRegDependency(MachineBasicBlock::iterator, unsigned,
+ const TargetRegisterInfo *TRI) const;
/// Get the number of addresses by LDM or VLDM or zero for unknown.
unsigned getNumLDMAddresses(const MachineInstr *MI) const;
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 277dd57ef2..1cba45c3a5 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -566,7 +566,7 @@ needsStackRealignment(const MachineFunction &MF) const {
const Function *F = MF.getFunction();
unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
- F->hasFnAttr(Attribute::StackAlignment));
+ F->getFnAttributes().hasStackAlignmentAttr());
return requiresRealignment && canRealignStack(MF);
}
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index d6ef3f333b..6b49e37e87 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -194,6 +194,7 @@ class ARMFastISel : public FastISel {
unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg);
unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg);
unsigned ARMSelectCallOp(bool UseReg);
+ unsigned ARMLowerPICELF(const GlobalValue *GV, unsigned Align, EVT VT);
// Call handling routines.
private:
@@ -648,6 +649,9 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) {
Align = TD.getTypeAllocSize(GV->getType());
}
+ if (Subtarget->isTargetELF() && RelocM == Reloc::PIC_)
+ return ARMLowerPICELF(GV, Align, VT);
+
// Grab index.
unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 :
(Subtarget->isThumb() ? 4 : 8);
@@ -2801,6 +2805,47 @@ bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
return true;
}
+unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV,
+ unsigned Align, EVT VT) {
+ bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
+ ARMConstantPoolConstant *CPV =
+ ARMConstantPoolConstant::Create(GV, UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT);
+ unsigned Idx = MCP.getConstantPoolIndex(CPV, Align);
+
+ unsigned Opc;
+ unsigned DestReg1 = createResultReg(TLI.getRegClassFor(VT));
+ // Load value.
+ if (isThumb2) {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::t2LDRpci), DestReg1)
+ .addConstantPoolIndex(Idx));
+ Opc = UseGOTOFF ? ARM::t2ADDrr : ARM::t2LDRs;
+ } else {
+ // The extra immediate is for addrmode2.
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DL, TII.get(ARM::LDRcp), DestReg1)
+ .addConstantPoolIndex(Idx).addImm(0));
+ Opc = UseGOTOFF ? ARM::ADDrr : ARM::LDRrs;
+ }
+
+ unsigned GlobalBaseReg = AFI->getGlobalBaseReg();
+ if (GlobalBaseReg == 0) {
+ GlobalBaseReg = MRI.createVirtualRegister(TLI.getRegClassFor(VT));
+ AFI->setGlobalBaseReg(GlobalBaseReg);
+ }
+
+ unsigned DestReg2 = createResultReg(TLI.getRegClassFor(VT));
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DL, TII.get(Opc), DestReg2)
+ .addReg(DestReg1)
+ .addReg(GlobalBaseReg);
+ if (!UseGOTOFF)
+ MIB.addImm(0);
+ AddOptionalDefs(MIB);
+
+ return DestReg2;
+}
+
namespace llvm {
FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo) {
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 2cedf3172c..52374ec4c1 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -1233,7 +1233,7 @@ static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) {
return;
// Naked functions don't spill callee-saved registers.
- if (MF.getFunction()->hasFnAttr(Attribute::Naked))
+ if (MF.getFunction()->getFnAttributes().hasNakedAttr())
return;
// We are planning to use NEON instructions vst1 / vld1.
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index a44e2a220a..90ae94b3b2 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -347,7 +347,9 @@ bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
if (!CheckVMLxHazard)
return true;
- if (!Subtarget->isCortexA8() && !Subtarget->isLikeA9())
+
+ if (!Subtarget->isCortexA8() && !Subtarget->isLikeA9() &&
+ !Subtarget->isSwift())
return true;
if (!N->hasOneUse())
@@ -385,12 +387,13 @@ bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
ARM_AM::ShiftOpc ShOpcVal,
unsigned ShAmt) {
- if (!Subtarget->isLikeA9())
+ if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
return true;
if (Shift.hasOneUse())
return true;
// R << 2 is free.
- return ShOpcVal == ARM_AM::lsl && ShAmt == 2;
+ return ShOpcVal == ARM_AM::lsl &&
+ (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
}
bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
@@ -518,7 +521,7 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
return false;
// @LOCALMOD-END
if (N.getOpcode() == ISD::MUL &&
- (!Subtarget->isLikeA9() || N.hasOneUse())) {
+ ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
// X * [3,5,9] -> X + X * [2,4,8] etc.
int RHSC = (int)RHS->getZExtValue();
@@ -582,7 +585,8 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
// Try matching (R shl C) + (R).
if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
- !(Subtarget->isLikeA9() || N.getOperand(0).hasOneUse())) {
+ !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
+ N.getOperand(0).hasOneUse())) {
ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
if (ShOpcVal != ARM_AM::no_shift) {
// Check to see if the RHS of the shift is a constant, if not, we can't
@@ -630,7 +634,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDNode *Op,
// @LOCALMOD-END
if (N.getOpcode() == ISD::MUL &&
- (!Subtarget->isLikeA9() || N.hasOneUse())) {
+ (!(Subtarget->isLikeA9() || Subtarget->isSwift()) || N.hasOneUse())) {
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
// X * [3,5,9] -> X + X * [2,4,8] etc.
int RHSC = (int)RHS->getZExtValue();
@@ -697,7 +701,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDNode *Op,
}
}
- if (Subtarget->isLikeA9() && !N.hasOneUse()) {
+ if ((Subtarget->isLikeA9() || Subtarget->isSwift()) && !N.hasOneUse()) {
// Compute R +/- (R << N) and reuse it.
Base = N;
Offset = CurDAG->getRegister(0, MVT::i32);
@@ -753,7 +757,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDNode *Op,
// Try matching (R shl C) + (R).
if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
- !(Subtarget->isLikeA9() || N.getOperand(0).hasOneUse())) {
+ !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
+ N.getOperand(0).hasOneUse())) {
ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
if (ShOpcVal != ARM_AM::no_shift) {
// Check to see if the RHS of the shift is a constant, if not, we can't
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 2e7588b29f..556dacffcc 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -645,9 +645,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
if (!Subtarget->hasV6Ops())
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
- // These are expanded into libcalls.
- if (!Subtarget->hasDivide() || !Subtarget->isThumb2()) {
- // v7M has a hardware divider
+ if (!(Subtarget->hasDivide() && Subtarget->isThumb2()) &&
+ !(Subtarget->hasDivideInARMMode() && !Subtarget->isThumb())) {
+ // These are expanded into libcalls if the cpu doesn't have HW divider.
setOperationAction(ISD::SDIV, MVT::i32, Expand);
setOperationAction(ISD::UDIV, MVT::i32, Expand);
}
@@ -5873,7 +5873,7 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
// ldrex dest, ptr
// (sign extend dest, if required)
// cmp dest, incr
- // cmov.cond scratch2, dest, incr
+ // cmov.cond scratch2, incr, dest
// strex scratch, scratch2, ptr
// cmp scratch, #0
// bne- loopMBB
@@ -5896,7 +5896,7 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
.addReg(oldval).addReg(incr));
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr), scratch2)
- .addReg(oldval).addReg(incr).addImm(Cond).addReg(ARM::CPSR);
+ .addReg(incr).addReg(oldval).addImm(Cond).addReg(ARM::CPSR);
MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr);
if (strOpc == ARM::t2STREX)
@@ -6605,7 +6605,7 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
UnitSize = 2;
} else {
// Check whether we can use NEON instructions.
- if (!MF->getFunction()->hasFnAttr(Attribute::NoImplicitFloat) &&
+ if (!MF->getFunction()->getFnAttributes().hasNoImplicitFloatAttr() &&
Subtarget->hasNEON()) {
if ((Align % 16 == 0) && SizeVal >= 16) {
ldrOpc = ARM::VLD1q32wb_fixed;
@@ -9343,7 +9343,7 @@ EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size,
// See if we can use NEON instructions for this...
if (IsZeroVal &&
- !F->hasFnAttr(Attribute::NoImplicitFloat) &&
+ !F->getFnAttributes().hasNoImplicitFloatAttr() &&
Subtarget->hasNEON()) {
if (memOpAlign(SrcAlign, DstAlign, 16) && Size >= 16) {
return MVT::v4i32;
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index c8966fb97a..67a6820932 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -846,6 +846,23 @@ class AMiscA1I<bits<8> opcod, bits<4> opc7_4, dag oops, dag iops,
let Inst{3-0} = Rm;
}
+// Division instructions.
+class ADivA1I<bits<3> opcod, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, 4, IndexModeNone, ArithMiscFrm, itin,
+ opc, asm, "", pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{27-23} = 0b01110;
+ let Inst{22-20} = opcod;
+ let Inst{19-16} = Rd;
+ let Inst{15-12} = 0b1111;
+ let Inst{11-8} = Rm;
+ let Inst{7-4} = 0b0001;
+ let Inst{3-0} = Rn;
+}
+
// PKH instructions
def PKHLSLAsmOperand : ImmAsmOperand {
let Name = "PKHLSLImm";
@@ -893,6 +910,10 @@ class ARMV5TPat<dag pattern, dag result> : Pat<pattern, result> {
class ARMV5TEPat<dag pattern, dag result> : Pat<pattern, result> {
list<Predicate> Predicates = [IsARM, HasV5TE];
}
+// ARMV5MOPat - Same as ARMV5TEPat with UseMulOps.
+class ARMV5MOPat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsARM, HasV5TE, UseMulOps];
+}
class ARMV6Pat<dag pattern, dag result> : Pat<pattern, result> {
list<Predicate> Predicates = [IsARM, HasV6];
}
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index 31b0c41f08..e62187727c 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -13,13 +13,17 @@
#include "ARMInstrInfo.h"
#include "ARM.h"
+#include "ARMConstantPoolValue.h"
#include "ARMMachineFunctionInfo.h"
+#include "ARMTargetMachine.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInst.h"
using namespace llvm;
@@ -84,3 +88,61 @@ unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const {
return 0;
}
+
+namespace {
+ /// ARMCGBR - Create Global Base Reg pass. This initializes the PIC
+ /// global base register for ARM ELF.
+ struct ARMCGBR : public MachineFunctionPass {
+ static char ID;
+ ARMCGBR() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ if (AFI->getGlobalBaseReg() == 0)
+ return false;
+
+ const ARMTargetMachine *TM =
+ static_cast<const ARMTargetMachine *>(&MF.getTarget());
+ if (TM->getRelocationModel() != Reloc::PIC_)
+ return false;
+
+ LLVMContext* Context = &MF.getFunction()->getContext();
+ GlobalValue *GV = new GlobalVariable(Type::getInt32Ty(*Context), false,
+ GlobalValue::ExternalLinkage, 0,
+ "_GLOBAL_OFFSET_TABLE_");
+ unsigned Id = AFI->createPICLabelUId();
+ ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id);
+ unsigned Align = TM->getTargetData()->getPrefTypeAlignment(GV->getType());
+ unsigned Idx = MF.getConstantPool()->getConstantPoolIndex(CPV, Align);
+
+ MachineBasicBlock &FirstMBB = MF.front();
+ MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+ DebugLoc DL = FirstMBB.findDebugLoc(MBBI);
+ unsigned GlobalBaseReg = AFI->getGlobalBaseReg();
+ unsigned Opc = TM->getSubtarget<ARMSubtarget>().isThumb2() ?
+ ARM::t2LDRpci : ARM::LDRcp;
+ const TargetInstrInfo &TII = *TM->getInstrInfo();
+ MachineInstrBuilder MIB = BuildMI(FirstMBB, MBBI, DL,
+ TII.get(Opc), GlobalBaseReg)
+ .addConstantPoolIndex(Idx);
+ if (Opc == ARM::LDRcp)
+ MIB.addImm(0);
+ AddDefaultPred(MIB);
+
+ return true;
+ }
+
+ virtual const char *getPassName() const {
+ return "ARM PIC Global Base Reg Initialization";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+}
+
+char ARMCGBR::ID = 0;
+FunctionPass*
+llvm::createARMGlobalBaseRegPass() { return new ARMCGBR(); }
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 2060bb9374..118c9ea5dd 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -215,6 +215,8 @@ def HasFP16 : Predicate<"Subtarget->hasFP16()">,
AssemblerPredicate<"FeatureFP16","half-float">;
def HasDivide : Predicate<"Subtarget->hasDivide()">,
AssemblerPredicate<"FeatureHWDiv", "divide">;
+def HasDivideInARM : Predicate<"Subtarget->hasDivideInARMMode()">,
+ AssemblerPredicate<"FeatureHWDivARM">;
def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">,
AssemblerPredicate<"FeatureT2XtPk",
"pack/extract">;
@@ -250,6 +252,7 @@ def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">;
def UseMovt : Predicate<"Subtarget->useMovt()">;
def DontUseMovt : Predicate<"!Subtarget->useMovt()">;
def UseFPVMLx : Predicate<"Subtarget->useFPVMLx()">;
+def UseMulOps : Predicate<"Subtarget->useMulOps()">;
// Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available.
// But only select them if more precision in FP computation is allowed.
@@ -260,6 +263,20 @@ def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion =="
def DontUseFusedMAC : Predicate<"!Subtarget->hasVFP4() || "
"Subtarget->isTargetDarwin()">;
+// VGETLNi32 is microcoded on Swift - prefer VMOV.
+def HasFastVGETLNi32 : Predicate<"!Subtarget->isSwift()">;
+def HasSlowVGETLNi32 : Predicate<"Subtarget->isSwift()">;
+
+// VDUP.32 is microcoded on Swift - prefer VMOV.
+def HasFastVDUP32 : Predicate<"!Subtarget->isSwift()">;
+def HasSlowVDUP32 : Predicate<"Subtarget->isSwift()">;
+
+// Cortex-A9 prefers VMOVSR to VMOVDRR even when using NEON for scalar FP, as
+// this allows more effective execution domain optimization. See
+// setExecutionDomain().
+def UseVMOVSR : Predicate<"Subtarget->isCortexA9() || !Subtarget->useNEONForSinglePrecisionFP()">;
+def DontUseVMOVSR : Predicate<"!Subtarget->isCortexA9() && Subtarget->useNEONForSinglePrecisionFP()">;
+
def IsLE : Predicate<"TLI.isLittleEndian()">;
def IsBE : Predicate<"TLI.isBigEndian()">;
@@ -3593,13 +3610,13 @@ def MULv5: ARMPseudoExpand<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm,
4, IIC_iMUL32,
[(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))],
(MUL GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, cc_out:$s)>,
- Requires<[IsARM, NoV6]>;
+ Requires<[IsARM, NoV6, UseMulOps]>;
}
def MLA : AsMul1I32<0b0000001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
IIC_iMAC32, "mla", "\t$Rd, $Rn, $Rm, $Ra",
[(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))]>,
- Requires<[IsARM, HasV6]> {
+ Requires<[IsARM, HasV6, UseMulOps]> {
bits<4> Ra;
let Inst{15-12} = Ra;
}
@@ -3615,7 +3632,7 @@ def MLAv5: ARMPseudoExpand<(outs GPR:$Rd),
def MLS : AMul1I<0b0000011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
IIC_iMAC32, "mls", "\t$Rd, $Rn, $Rm, $Ra",
[(set GPR:$Rd, (sub GPR:$Ra, (mul GPR:$Rn, GPR:$Rm)))]>,
- Requires<[IsARM, HasV6T2]> {
+ Requires<[IsARM, HasV6T2, UseMulOps]> {
bits<4> Rd;
bits<4> Rm;
bits<4> Rn;
@@ -3721,7 +3738,7 @@ def SMMLA : AMul2Ia <0b0111010, 0b0001, (outs GPR:$Rd),
(ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
IIC_iMAC32, "smmla", "\t$Rd, $Rn, $Rm, $Ra",
[(set GPR:$Rd, (add (mulhs GPR:$Rn, GPR:$Rm), GPR:$Ra))]>,
- Requires<[IsARM, HasV6]>;
+ Requires<[IsARM, HasV6, UseMulOps]>;
def SMMLAR : AMul2Ia <0b0111010, 0b0011, (outs GPR:$Rd),
(ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
@@ -3731,7 +3748,7 @@ def SMMLAR : AMul2Ia <0b0111010, 0b0011, (outs GPR:$Rd),
def SMMLS : AMul2Ia <0b0111010, 0b1101, (outs GPR:$Rd),
(ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
IIC_iMAC32, "smmls", "\t$Rd, $Rn, $Rm, $Ra", []>,
- Requires<[IsARM, HasV6]>;
+ Requires<[IsARM, HasV6, UseMulOps]>;
def SMMLSR : AMul2Ia <0b0111010, 0b1111, (outs GPR:$Rd),
(ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
@@ -3785,7 +3802,7 @@ multiclass AI_smla<string opc, PatFrag opnode> {
[(set GPRnopc:$Rd, (add GPR:$Ra,
(opnode (sext_inreg GPRnopc:$Rn, i16),
(sext_inreg GPRnopc:$Rm, i16))))]>,
- Requires<[IsARM, HasV5TE]>;
+ Requires<[IsARM, HasV5TE, UseMulOps]>;
def BT : AMulxyIa<0b0001000, 0b10, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
@@ -3793,7 +3810,7 @@ multiclass AI_smla<string opc, PatFrag opnode> {
[(set GPRnopc:$Rd,
(add GPR:$Ra, (opnode (sext_inreg GPRnopc:$Rn, i16),
(sra GPRnopc:$Rm, (i32 16)))))]>,
- Requires<[IsARM, HasV5TE]>;
+ Requires<[IsARM, HasV5TE, UseMulOps]>;
def TB : AMulxyIa<0b0001000, 0b01, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
@@ -3801,7 +3818,7 @@ multiclass AI_smla<string opc, PatFrag opnode> {
[(set GPRnopc:$Rd,
(add GPR:$Ra, (opnode (sra GPRnopc:$Rn, (i32 16)),
(sext_inreg GPRnopc:$Rm, i16))))]>,
- Requires<[IsARM, HasV5TE]>;
+ Requires<[IsARM, HasV5TE, UseMulOps]>;
def TT : AMulxyIa<0b0001000, 0b11, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
@@ -3809,7 +3826,7 @@ multiclass AI_smla<string opc, PatFrag opnode> {
[(set GPRnopc:$Rd,
(add GPR:$Ra, (opnode (sra GPRnopc:$Rn, (i32 16)),
(sra GPRnopc:$Rm, (i32 16)))))]>,
- Requires<[IsARM, HasV5TE]>;
+ Requires<[IsARM, HasV5TE, UseMulOps]>;
def WB : AMulxyIa<0b0001001, 0b00, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
@@ -3817,7 +3834,7 @@ multiclass AI_smla<string opc, PatFrag opnode> {
[(set GPRnopc:$Rd,
(add GPR:$Ra, (sra (opnode GPRnopc:$Rn,
(sext_inreg GPRnopc:$Rm, i16)), (i32 16))))]>,
- Requires<[IsARM, HasV5TE]>;
+ Requires<[IsARM, HasV5TE, UseMulOps]>;
def WT : AMulxyIa<0b0001001, 0b10, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
@@ -3825,7 +3842,7 @@ multiclass AI_smla<string opc, PatFrag opnode> {
[(set GPRnopc:$Rd,
(add GPR:$Ra, (sra (opnode GPRnopc:$Rn,
(sra GPRnopc:$Rm, (i32 16))), (i32 16))))]>,
- Requires<[IsARM, HasV5TE]>;
+ Requires<[IsARM, HasV5TE, UseMulOps]>;
}
}
@@ -3928,6 +3945,19 @@ defm SMUA : AI_sdml<0, "smua">;
defm SMUS : AI_sdml<1, "smus">;
//===----------------------------------------------------------------------===//
+// Division Instructions (ARMv7-A with virtualization extension)
+//
+def SDIV : ADivA1I<0b001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iDIV,
+ "sdiv", "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (sdiv GPR:$Rn, GPR:$Rm))]>,
+ Requires<[IsARM, HasDivideInARM]>;
+
+def UDIV : ADivA1I<0b011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iDIV,
+ "udiv", "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (udiv GPR:$Rn, GPR:$Rm))]>,
+ Requires<[IsARM, HasDivideInARM]>;
+
+//===----------------------------------------------------------------------===//
// Misc. Arithmetic Instructions.
//
@@ -4989,32 +5019,32 @@ def : ARMV5TEPat<(sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))),
def : ARMV5TEPat<(sra (mul GPR:$a, sext_16_node:$b), (i32 16)),
(SMULWB GPR:$a, GPR:$b)>;
-def : ARMV5TEPat<(add GPR:$acc,
+def : ARMV5MOPat<(add GPR:$acc,
(mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
(sra (shl GPR:$b, (i32 16)), (i32 16)))),
(SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5TEPat<(add GPR:$acc,
+def : ARMV5MOPat<(add GPR:$acc,
(mul sext_16_node:$a, sext_16_node:$b)),
(SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5TEPat<(add GPR:$acc,
+def : ARMV5MOPat<(add GPR:$acc,
(mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
(sra GPR:$b, (i32 16)))),
(SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5TEPat<(add GPR:$acc,
+def : ARMV5MOPat<(add GPR:$acc,
(mul sext_16_node:$a, (sra GPR:$b, (i32 16)))),
(SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5TEPat<(add GPR:$acc,
+def : ARMV5MOPat<(add GPR:$acc,
(mul (sra GPR:$a, (i32 16)),
(sra (shl GPR:$b, (i32 16)), (i32 16)))),
(SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5TEPat<(add GPR:$acc,
+def : ARMV5MOPat<(add GPR:$acc,
(mul (sra GPR:$a, (i32 16)), sext_16_node:$b)),
(SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5TEPat<(add GPR:$acc,
+def : ARMV5MOPat<(add GPR:$acc,
(sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))),
(i32 16))),
(SMLAWB GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5TEPat<(add GPR:$acc,
+def : ARMV5MOPat<(add GPR:$acc,
(sra (mul GPR:$a, sext_16_node:$b), (i32 16))),
(SMLAWB GPR:$a, GPR:$b, GPR:$acc)>;
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 1bcb48776e..de655f1a0e 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -5043,7 +5043,8 @@ def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00,
(outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane),
IIC_VMOVSI, "vmov", "32", "$R, $V$lane",
[(set GPR:$R, (extractelt (v2i32 DPR:$V),
- imm:$lane))]> {
+ imm:$lane))]>,
+ Requires<[HasNEON, HasFastVGETLNi32]> {
let Inst{21} = lane{0};
}
// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td
@@ -5066,7 +5067,16 @@ def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane),
def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
(VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src,
(DSubReg_i32_reg imm:$lane))),
- (SubReg_i32_lane imm:$lane))>;
+ (SubReg_i32_lane imm:$lane))>,
+ Requires<[HasNEON, HasFastVGETLNi32]>;
+def : Pat<(extractelt (v2i32 DPR:$src), imm:$lane),
+ (COPY_TO_REGCLASS
+ (i32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>,
+ Requires<[HasNEON, HasSlowVGETLNi32]>;
+def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
+ (COPY_TO_REGCLASS
+ (i32 (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>,
+ Requires<[HasNEON, HasSlowVGETLNi32]>;
def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2),
(EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)),
(SSubReg_f32_reg imm:$src2))>;
@@ -5175,14 +5185,23 @@ class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>;
def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>;
-def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>;
+def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>,
+ Requires<[HasNEON, HasFastVDUP32]>;
def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>;
def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>;
def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>;
-def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>;
+// NEONvdup patterns for uarchs with fast VDUP.32.
+def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>,
+ Requires<[HasNEON,HasFastVDUP32]>;
def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>;
+// NEONvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead.
+def : Pat<(v2i32 (NEONvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>,
+ Requires<[HasNEON,HasSlowVDUP32]>;
+def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>,
+ Requires<[HasNEON,HasSlowVDUP32]>;
+
// VDUP : Vector Duplicate Lane (from scalar to all elements)
class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt,
@@ -5619,6 +5638,11 @@ def : N2VSPat<arm_ftoui, VCVTf2ud>;
def : N2VSPat<arm_sitof, VCVTs2fd>;
def : N2VSPat<arm_uitof, VCVTu2fd>;
+// Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers.
+def : Pat<(f32 (bitconvert GPR:$a)),
+ (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>,
+ Requires<[HasNEON, DontUseVMOVSR]>;
+
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 2bb667ef37..37b280f447 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -2396,7 +2396,8 @@ def t2MUL: T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
def t2MLA: T2FourReg<
(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
"mla", "\t$Rd, $Rn, $Rm, $Ra",
- [(set rGPR:$Rd, (add (mul rGPR:$Rn, rGPR:$Rm), rGPR:$Ra))]> {
+ [(set rGPR:$Rd, (add (mul rGPR:$Rn, rGPR:$Rm), rGPR:$Ra))]>,
+ Requires<[IsThumb2, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b000;
@@ -2406,7 +2407,8 @@ def t2MLA: T2FourReg<
def t2MLS: T2FourReg<
(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
"mls", "\t$Rd, $Rn, $Rm, $Ra",
- [(set rGPR:$Rd, (sub rGPR:$Ra, (mul rGPR:$Rn, rGPR:$Rm)))]> {
+ [(set rGPR:$Rd, (sub rGPR:$Ra, (mul rGPR:$Rn, rGPR:$Rm)))]>,
+ Requires<[IsThumb2, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b000;
@@ -2475,7 +2477,7 @@ def t2SMMLA : T2FourReg<
(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
"smmla", "\t$Rd, $Rn, $Rm, $Ra",
[(set rGPR:$Rd, (add (mulhs rGPR:$Rm, rGPR:$Rn), rGPR:$Ra))]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b101;
@@ -2496,7 +2498,7 @@ def t2SMMLS: T2FourReg<
(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
"smmls", "\t$Rd, $Rn, $Rm, $Ra",
[(set rGPR:$Rd, (sub rGPR:$Ra, (mulhs rGPR:$Rn, rGPR:$Rm)))]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b110;
@@ -2601,7 +2603,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
[(set rGPR:$Rd, (add rGPR:$Ra,
(opnode (sext_inreg rGPR:$Rn, i16),
(sext_inreg rGPR:$Rm, i16))))]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -2614,7 +2616,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
!strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra",
[(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sext_inreg rGPR:$Rn, i16),
(sra rGPR:$Rm, (i32 16)))))]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -2627,7 +2629,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
!strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra",
[(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)),
(sext_inreg rGPR:$Rm, i16))))]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -2640,7 +2642,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
!strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra",
[(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)),
(sra rGPR:$Rm, (i32 16)))))]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -2653,7 +2655,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
!strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra",
[(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn,
(sext_inreg rGPR:$Rm, i16)), (i32 16))))]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b011;
@@ -2666,7 +2668,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
!strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra",
[(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn,
(sra rGPR:$Rm, (i32 16))), (i32 16))))]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b011;
@@ -2760,7 +2762,7 @@ def t2SMLSLDX : T2FourReg_mac<1, 0b101, 0b1101, (outs rGPR:$Ra,rGPR:$Rd),
// Division Instructions.
// Signed and unsigned division on v7-M
//
-def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi,
+def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV,
"sdiv", "\t$Rd, $Rn, $Rm",
[(set rGPR:$Rd, (sdiv rGPR:$Rn, rGPR:$Rm))]>,
Requires<[HasDivide, IsThumb2]> {
@@ -2771,7 +2773,7 @@ def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi,
let Inst{7-4} = 0b1111;
}
-def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi,
+def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV,
"udiv", "\t$Rd, $Rn, $Rm",
[(set rGPR:$Rd, (udiv rGPR:$Rn, rGPR:$Rm))]>,
Requires<[HasDivide, IsThumb2]> {
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 7d6692f307..b5a896c699 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -523,10 +523,12 @@ def VMOVRS : AVConv2I<0b11100001, 0b1010,
let D = VFPNeonDomain;
}
+// Bitcast i32 -> f32. NEON prefers to use VMOVDRR.
def VMOVSR : AVConv4I<0b11100000, 0b1010,
(outs SPR:$Sn), (ins GPR:$Rt),
IIC_fpMOVIS, "vmov", "\t$Sn, $Rt",
- [(set SPR:$Sn, (bitconvert GPR:$Rt))]> {
+ [(set SPR:$Sn, (bitconvert GPR:$Rt))]>,
+ Requires<[HasVFP2, UseVMOVSR]> {
// Instruction operands.
bits<5> Sn;
bits<4> Rt;
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index f1c8fc8481..c0ac04b600 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -108,6 +108,11 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// pass.
DenseMap<unsigned, unsigned> CPEClones;
+ /// GlobalBaseReg - keeps track of the virtual register initialized for
+ /// use as the global base register. This is used for PIC in some PIC
+ /// relocation models.
+ unsigned GlobalBaseReg;
+
public:
ARMFunctionInfo() :
isThumb(false),
@@ -119,7 +124,7 @@ public:
GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0),
NumAlignedDPRCS2Regs(0),
JumpTableUId(0), PICLabelUId(0),
- VarArgsFrameIndex(0), HasITBlocks(false) {}
+ VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {}
explicit ARMFunctionInfo(MachineFunction &MF) :
isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
@@ -130,7 +135,7 @@ public:
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
GPRCS1Frames(32), GPRCS2Frames(32), DPRCSFrames(32),
JumpTableUId(0), PICLabelUId(0),
- VarArgsFrameIndex(0), HasITBlocks(false) {}
+ VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {}
bool isThumbFunction() const { return isThumb; }
bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; }
@@ -249,6 +254,9 @@ public:
bool hasITBlocks() const { return HasITBlocks; }
void setHasITBlocks(bool h) { HasITBlocks = h; }
+ unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
+ void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
+
void recordCPEClone(unsigned CPIdx, unsigned CPCloneIdx) {
if (!CPEClones.insert(std::make_pair(CPCloneIdx, CPIdx)).second)
assert(0 && "Duplicate entries!");
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index 6f974fd17d..ed8ac1aff7 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -247,11 +247,16 @@ def CCR : RegisterClass<"ARM", [i32], 32, (add CPSR)> {
}
// Scalar single precision floating point register class..
-def SPR : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 31)>;
+// FIXME: Allocation order changed to s0, s2, s4, ... as a quick hack to
+// avoid partial-write dependencies on D registers (S registers are
+// renamed as portions of D registers).
+def SPR : RegisterClass<"ARM", [f32], 32, (add (decimate
+ (sequence "S%u", 0, 31), 2),
+ (sequence "S%u", 0, 31))>;
// Subset of SPR which can be used as a source of NEON scalars for 16-bit
// operations
-def SPR_8 : RegisterClass<"ARM", [f32], 32, (trunc SPR, 16)>;
+def SPR_8 : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 15)>;
// Scalar double precision floating point / generic 64-bit vector register
// class.
diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td
index 81d2fa37c2..02196d06bf 100644
--- a/lib/Target/ARM/ARMSchedule.td
+++ b/lib/Target/ARM/ARMSchedule.td
@@ -55,6 +55,7 @@ def IIC_iMUL32 : InstrItinClass;
def IIC_iMAC32 : InstrItinClass;
def IIC_iMUL64 : InstrItinClass;
def IIC_iMAC64 : InstrItinClass;
+def IIC_iDIV : InstrItinClass;
def IIC_iLoad_i : InstrItinClass;
def IIC_iLoad_r : InstrItinClass;
def IIC_iLoad_si : InstrItinClass;
@@ -261,3 +262,4 @@ def IIC_VTBX4 : InstrItinClass;
include "ARMScheduleV6.td"
include "ARMScheduleA8.td"
include "ARMScheduleA9.td"
+include "ARMScheduleSwift.td"
diff --git a/lib/Target/ARM/ARMScheduleSwift.td b/lib/Target/ARM/ARMScheduleSwift.td
new file mode 100644
index 0000000000..e9bc3e0f39
--- /dev/null
+++ b/lib/Target/ARM/ARMScheduleSwift.td
@@ -0,0 +1,1085 @@
+//=- ARMScheduleSwift.td - Swift Scheduling Definitions -*- tablegen -*----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the Swift processor..
+//
+//===----------------------------------------------------------------------===//
+
+// ===---------------------------------------------------------------------===//
+// This section contains legacy support for itineraries. This is
+// required until SD and PostRA schedulers are replaced by MachineScheduler.
+
+def SW_DIS0 : FuncUnit;
+def SW_DIS1 : FuncUnit;
+def SW_DIS2 : FuncUnit;
+
+def SW_ALU0 : FuncUnit;
+def SW_ALU1 : FuncUnit;
+def SW_LS : FuncUnit;
+def SW_IDIV : FuncUnit;
+def SW_FDIV : FuncUnit;
+
+// FIXME: Need bypasses.
+// FIXME: Model the multiple stages of IIC_iMOVix2, IIC_iMOVix2addpc, and
+// IIC_iMOVix2ld better.
+// FIXME: Model the special immediate shifts that are not microcoded.
+// FIXME: Do we need to model the fact that uses of r15 in a micro-op force it
+// to issue on pipe 1?
+// FIXME: Model the pipelined behavior of CMP / TST instructions.
+// FIXME: Better model the microcode stages of multiply instructions, especially
+// conditional variants.
+// FIXME: Add preload instruction when it is documented.
+// FIXME: Model non-pipelined nature of FP div / sqrt unit.
+
+def SwiftItineraries : ProcessorItineraries<
+ [SW_DIS0, SW_DIS1, SW_DIS2, SW_ALU0, SW_ALU1, SW_LS, SW_IDIV, SW_FDIV], [], [
+ //
+ // Move instructions, unconditional
+ InstrItinData<IIC_iMOVi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iMOVr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iMOVsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iMOVsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iMOVix2 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2]>,
+ InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [3]>,
+ InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>,
+ InstrStage<1, [SW_LS]>],
+ [5]>,
+ //
+ // MVN instructions
+ InstrItinData<IIC_iMVNi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iMVNr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iMVNsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iMVNsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ //
+ // No operand cycles
+ InstrItinData<IIC_iALUx , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>]>,
+ //
+ // Binary Instructions that produce a result
+ InstrItinData<IIC_iALUi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+ InstrItinData<IIC_iALUr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_iALUsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_iALUsir,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_iALUsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1, 1, 1]>,
+ //
+ // Bitwise Instructions that produce a result
+ InstrItinData<IIC_iBITi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+ InstrItinData<IIC_iBITr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_iBITsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_iBITsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1, 1, 1]>,
+ //
+ // Unary Instructions that produce a result
+
+ // CLZ, RBIT, etc.
+ InstrItinData<IIC_iUNAr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+
+ // BFC, BFI, UBFX, SBFX
+ InstrItinData<IIC_iUNAsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1]>,
+
+ //
+ // Zero and sign extension instructions
+ InstrItinData<IIC_iEXTr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+ InstrItinData<IIC_iEXTAr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_iEXTAsr,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1, 1, 1]>,
+ //
+ // Compare instructions
+ InstrItinData<IIC_iCMPi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iCMPr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+ InstrItinData<IIC_iCMPsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<2, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+ InstrItinData<IIC_iCMPsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<2, [SW_ALU0, SW_ALU1]>],
+ [1, 1, 1]>,
+ //
+ // Test instructions
+ InstrItinData<IIC_iTSTi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iTSTr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+ InstrItinData<IIC_iTSTsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<2, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+ InstrItinData<IIC_iTSTsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<2, [SW_ALU0, SW_ALU1]>],
+ [1, 1, 1]>,
+ //
+ // Move instructions, conditional
+ // FIXME: Correctly model the extra input dep on the destination.
+ InstrItinData<IIC_iCMOVi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iCMOVr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+ InstrItinData<IIC_iCMOVsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+ InstrItinData<IIC_iCMOVsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_iCMOVix2, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2]>,
+
+ // Integer multiply pipeline
+ //
+ InstrItinData<IIC_iMUL16 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_iMAC16 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [3, 1, 1, 1]>,
+ InstrItinData<IIC_iMUL32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+ InstrItinData<IIC_iMAC32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1, 1]>,
+ InstrItinData<IIC_iMUL64 , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0], 1>,
+ InstrStage<1, [SW_ALU0], 3>,
+ InstrStage<1, [SW_ALU0]>],
+ [5, 5, 1, 1]>,
+ InstrItinData<IIC_iMAC64 , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0], 1>,
+ InstrStage<1, [SW_ALU0], 1>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 3>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [5, 6, 1, 1]>,
+ //
+ // Integer divide
+ InstrItinData<IIC_iDIV , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0], 0>,
+ InstrStage<14, [SW_IDIV]>],
+ [14, 1, 1]>,
+
+ // Integer load pipeline
+ // FIXME: The timings are some rough approximations
+ //
+ // Immediate offset
+ InstrItinData<IIC_iLoad_i , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1]>,
+ InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1]>,
+ InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_LS], 1>,
+ InstrStage<1, [SW_LS]>],
+ [3, 4, 1]>,
+ //
+ // Register offset
+ InstrItinData<IIC_iLoad_r , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_LS], 1>,
+ InstrStage<1, [SW_LS], 3>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [3, 4, 1, 1]>,
+ //
+ // Scaled register offset
+ InstrItinData<IIC_iLoad_si , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
+ InstrStage<1, [SW_LS]>],
+ [5, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
+ InstrStage<1, [SW_LS]>],
+ [5, 1, 1]>,
+ //
+ // Immediate offset with update
+ InstrItinData<IIC_iLoad_iu , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1, 1]>,
+ //
+ // Register offset with update
+ InstrItinData<IIC_iLoad_ru , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0], 1>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0], 1>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1, 1, 1]>,
+ InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 0>,
+ InstrStage<1, [SW_LS], 3>,
+ InstrStage<1, [SW_LS], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [3, 4, 1, 1]>,
+ //
+ // Scaled register offset with update
+ InstrItinData<IIC_iLoad_siu , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
+ InstrStage<1, [SW_LS], 3>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [5, 3, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
+ InstrStage<1, [SW_LS], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [5, 3, 1, 1]>,
+ //
+ // Load multiple, def is the 5th operand.
+ // FIXME: This assumes 3 to 4 registers.
+ InstrItinData<IIC_iLoad_m , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1, 1, 3], [], -1>, // dynamic uops
+
+ //
+ // Load multiple + update, defs are the 1st and 5th operands.
+ InstrItinData<IIC_iLoad_mu , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 0>,
+ InstrStage<1, [SW_LS], 3>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1, 1, 1, 3], [], -1>, // dynamic uops
+ //
+ // Load multiple plus branch
+ InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1, 1, 3], [], -1>, // dynamic uops
+ //
+ // Pop, def is the 3rd operand.
+ InstrItinData<IIC_iPop , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 3], [], -1>, // dynamic uops
+ //
+ // Pop + branch, def is the 3rd operand.
+ InstrItinData<IIC_iPop_Br, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 3], [], -1>, // dynamic uops
+
+ //
+ // iLoadi + iALUr for t2LDRpci_pic.
+ InstrItinData<IIC_iLoadiALU, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS], 3>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [4, 1]>,
+
+ // Integer store pipeline
+ ///
+ // Immediate offset
+ InstrItinData<IIC_iStore_i , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1]>,
+ InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1]>,
+ InstrItinData<IIC_iStore_d_i, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_LS], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1]>,
+ //
+ // Register offset
+ InstrItinData<IIC_iStore_r , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_iStore_d_r, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_LS], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1]>,
+ //
+ // Scaled register offset
+ InstrItinData<IIC_iStore_si , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1]>,
+ //
+ // Immediate offset with update
+ InstrItinData<IIC_iStore_iu , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1]>,
+ //
+ // Register offset with update
+ InstrItinData<IIC_iStore_ru , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1, 1]>,
+ InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1, 1]>,
+ //
+ // Scaled register offset with update
+ InstrItinData<IIC_iStore_siu, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
+ InstrStage<1, [SW_LS], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>],
+ [3, 1, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
+ InstrStage<1, [SW_LS], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>],
+ [3, 1, 1, 1]>,
+ //
+ // Store multiple
+ InstrItinData<IIC_iStore_m , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS], 1>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS], 1>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [], [], -1>, // dynamic uops
+ //
+ // Store multiple + update
+ InstrItinData<IIC_iStore_mu, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS], 1>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS], 1>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [2], [], -1>, // dynamic uops
+
+ //
+ // Preload
+ InstrItinData<IIC_Preload, [InstrStage<1, [SW_DIS0], 0>], [1, 1]>,
+
+ // Branch
+ //
+ // no delay slots, so the latency of a branch is unimportant
+ InstrItinData<IIC_Br , [InstrStage<1, [SW_DIS0], 0>]>,
+
+ // FP Special Register to Integer Register File Move
+ InstrItinData<IIC_fpSTAT , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ //
+ // Single-precision FP Unary
+ //
+ // Most floating-point moves get issued on ALU0.
+ InstrItinData<IIC_fpUNA32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1]>,
+ //
+ // Double-precision FP Unary
+ InstrItinData<IIC_fpUNA64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1]>,
+
+ //
+ // Single-precision FP Compare
+ InstrItinData<IIC_fpCMP32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [1, 1]>,
+ //
+ // Double-precision FP Compare
+ InstrItinData<IIC_fpCMP64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [1, 1]>,
+ //
+ // Single to Double FP Convert
+ InstrItinData<IIC_fpCVTSD , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1]>,
+ //
+ // Double to Single FP Convert
+ InstrItinData<IIC_fpCVTDS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1]>,
+
+ //
+ // Single to Half FP Convert
+ InstrItinData<IIC_fpCVTSH , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU1], 4>,
+ InstrStage<1, [SW_ALU1]>],
+ [6, 1]>,
+ //
+ // Half to Single FP Convert
+ InstrItinData<IIC_fpCVTHS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1]>,
+
+ //
+ // Single-Precision FP to Integer Convert
+ InstrItinData<IIC_fpCVTSI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1]>,
+ //
+ // Double-Precision FP to Integer Convert
+ InstrItinData<IIC_fpCVTDI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1]>,
+ //
+ // Integer to Single-Precision FP Convert
+ InstrItinData<IIC_fpCVTIS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1]>,
+ //
+ // Integer to Double-Precision FP Convert
+ InstrItinData<IIC_fpCVTID , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1]>,
+ //
+ // Single-precision FP ALU
+ InstrItinData<IIC_fpALU32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Double-precision FP ALU
+ InstrItinData<IIC_fpALU64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Single-precision FP Multiply
+ InstrItinData<IIC_fpMUL32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1]>,
+ //
+ // Double-precision FP Multiply
+ InstrItinData<IIC_fpMUL64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [6, 1, 1]>,
+ //
+ // Single-precision FP MAC
+ InstrItinData<IIC_fpMAC32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 1]>,
+ //
+ // Double-precision FP MAC
+ InstrItinData<IIC_fpMAC64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [12, 1, 1]>,
+ //
+ // Single-precision Fused FP MAC
+ InstrItinData<IIC_fpFMAC32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 1]>,
+ //
+ // Double-precision Fused FP MAC
+ InstrItinData<IIC_fpFMAC64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [12, 1, 1]>,
+ //
+ // Single-precision FP DIV
+ InstrItinData<IIC_fpDIV32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 0>,
+ InstrStage<15, [SW_FDIV]>],
+ [17, 1, 1]>,
+ //
+ // Double-precision FP DIV
+ InstrItinData<IIC_fpDIV64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 0>,
+ InstrStage<30, [SW_FDIV]>],
+ [32, 1, 1]>,
+ //
+ // Single-precision FP SQRT
+ InstrItinData<IIC_fpSQRT32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 0>,
+ InstrStage<15, [SW_FDIV]>],
+ [17, 1]>,
+ //
+ // Double-precision FP SQRT
+ InstrItinData<IIC_fpSQRT64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 0>,
+ InstrStage<30, [SW_FDIV]>],
+ [32, 1, 1]>,
+
+ //
+ // Integer to Single-precision Move
+ InstrItinData<IIC_fpMOVIS, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_LS], 4>,
+ InstrStage<1, [SW_ALU0]>],
+ [6, 1]>,
+ //
+ // Integer to Double-precision Move
+ InstrItinData<IIC_fpMOVID, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [4, 1]>,
+ //
+ // Single-precision to Integer Move
+ InstrItinData<IIC_fpMOVSI, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1]>,
+ //
+ // Double-precision to Integer Move
+ InstrItinData<IIC_fpMOVDI, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_LS], 3>,
+ InstrStage<1, [SW_LS]>],
+ [3, 4, 1]>,
+ //
+ // Single-precision FP Load
+ InstrItinData<IIC_fpLoad32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [4, 1]>,
+ //
+ // Double-precision FP Load
+ InstrItinData<IIC_fpLoad64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [4, 1]>,
+ //
+ // FP Load Multiple
+ // FIXME: Assumes a single Q register.
+ InstrItinData<IIC_fpLoad_m, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1, 4], [], -1>, // dynamic uops
+ //
+ // FP Load Multiple + update
+ // FIXME: Assumes a single Q register.
+ InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_LS], 4>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1, 1, 1, 4], [], -1>, // dynamic uops
+ //
+ // Single-precision FP Store
+ InstrItinData<IIC_fpStore32,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1]>,
+ //
+ // Double-precision FP Store
+ InstrItinData<IIC_fpStore64,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1]>,
+ //
+ // FP Store Multiple
+ // FIXME: Assumes a single Q register.
+ InstrItinData<IIC_fpStore_m,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1], [], -1>, // dynamic uops
+ //
+ // FP Store Multiple + update
+ // FIXME: Assumes a single Q register.
+ InstrItinData<IIC_fpStore_mu,[InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_LS], 4>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1, 1, 1], [], -1>, // dynamic uops
+ // NEON
+ //
+ // Double-register Integer Unary
+ InstrItinData<IIC_VUNAiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1]>,
+ //
+ // Quad-register Integer Unary
+ InstrItinData<IIC_VUNAiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1]>,
+ //
+ // Double-register Integer Q-Unary
+ InstrItinData<IIC_VQUNAiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1]>,
+ //
+ // Quad-register Integer CountQ-Unary
+ InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1]>,
+ //
+ // Double-register Integer Binary
+ InstrItinData<IIC_VBINiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Quad-register Integer Binary
+ InstrItinData<IIC_VBINiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Double-register Integer Subtract
+ InstrItinData<IIC_VSUBiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Quad-register Integer Subtract
+ InstrItinData<IIC_VSUBiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Double-register Integer Shift
+ InstrItinData<IIC_VSHLiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Quad-register Integer Shift
+ InstrItinData<IIC_VSHLiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Double-register Integer Shift (4 cycle)
+ InstrItinData<IIC_VSHLi4D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+ //
+ // Quad-register Integer Shift (4 cycle)
+ InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+ //
+ // Double-register Integer Binary (4 cycle)
+ InstrItinData<IIC_VBINi4D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+ //
+ // Quad-register Integer Binary (4 cycle)
+ InstrItinData<IIC_VBINi4Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+ //
+ // Double-register Integer Subtract (4 cycle)
+ InstrItinData<IIC_VSUBi4D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+ //
+ // Quad-register Integer Subtract (4 cycle)
+ InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+
+ //
+ // Double-register Integer Count
+ InstrItinData<IIC_VCNTiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Quad-register Integer Count
+ InstrItinData<IIC_VCNTiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Double-register Absolute Difference and Accumulate
+ InstrItinData<IIC_VABAD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1, 1]>,
+ //
+ // Quad-register Absolute Difference and Accumulate
+ InstrItinData<IIC_VABAQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1, 1]>,
+ //
+ // Double-register Integer Pair Add Long
+ InstrItinData<IIC_VPALiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+ //
+ // Quad-register Integer Pair Add Long
+ InstrItinData<IIC_VPALiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+
+ //
+ // Double-register Integer Multiply (.8, .16)
+ InstrItinData<IIC_VMULi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1]>,
+ //
+ // Quad-register Integer Multiply (.8, .16)
+ InstrItinData<IIC_VMULi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1]>,
+
+ //
+ // Double-register Integer Multiply (.32)
+ InstrItinData<IIC_VMULi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1]>,
+ //
+ // Quad-register Integer Multiply (.32)
+ InstrItinData<IIC_VMULi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1]>,
+ //
+ // Double-register Integer Multiply-Accumulate (.8, .16)
+ InstrItinData<IIC_VMACi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1, 1]>,
+ //
+ // Double-register Integer Multiply-Accumulate (.32)
+ InstrItinData<IIC_VMACi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1, 1]>,
+ //
+ // Quad-register Integer Multiply-Accumulate (.8, .16)
+ InstrItinData<IIC_VMACi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1, 1]>,
+ //
+ // Quad-register Integer Multiply-Accumulate (.32)
+ InstrItinData<IIC_VMACi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1, 1]>,
+
+ //
+ // Move
+ InstrItinData<IIC_VMOV, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1]>,
+ //
+ // Move Immediate
+ InstrItinData<IIC_VMOVImm, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2]>,
+ //
+ // Double-register Permute Move
+ InstrItinData<IIC_VMOVD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [2, 1]>,
+ //
+ // Quad-register Permute Move
+ InstrItinData<IIC_VMOVQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [2, 1]>,
+ //
+ // Integer to Single-precision Move
+ InstrItinData<IIC_VMOVIS , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_LS], 4>,
+ InstrStage<1, [SW_ALU0]>],
+ [6, 1]>,
+ //
+ // Integer to Double-precision Move
+ InstrItinData<IIC_VMOVID , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [4, 1, 1]>,
+ //
+ // Single-precision to Integer Move
+ InstrItinData<IIC_VMOVSI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1]>,
+ //
+ // Double-precision to Integer Move
+ InstrItinData<IIC_VMOVDI , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_LS], 3>,
+ InstrStage<1, [SW_LS]>],
+ [3, 4, 1]>,
+ //
+ // Integer to Lane Move
+ // FIXME: I think this is correct, but it is not clear from the tuning guide.
+ InstrItinData<IIC_VMOVISL , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_LS], 4>,
+ InstrStage<1, [SW_ALU0]>],
+ [6, 1]>,
+
+ //
+ // Vector narrow move
+ InstrItinData<IIC_VMOVN, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [2, 1]>,
+ //
+ // Double-register FP Unary
+ // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here,
+ // and they issue on a different pipeline.
+ InstrItinData<IIC_VUNAD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1]>,
+ //
+ // Quad-register FP Unary
+ // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here,
+ // and they issue on a different pipeline.
+ InstrItinData<IIC_VUNAQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1]>,
+ //
+ // Double-register FP Binary
+ // FIXME: We're using this itin for many instructions.
+ InstrItinData<IIC_VBIND, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+
+ //
+ // VPADD, etc.
+ InstrItinData<IIC_VPBIND, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+ //
+ // Double-register FP VMUL
+ InstrItinData<IIC_VFMULD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1]>,
+ //
+ // Quad-register FP Binary
+ InstrItinData<IIC_VBINQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+ //
+ // Quad-register FP VMUL
+ InstrItinData<IIC_VFMULQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1]>,
+ //
+ // Double-register FP Multiple-Accumulate
+ InstrItinData<IIC_VMACD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 1]>,
+ //
+ // Quad-register FP Multiple-Accumulate
+ InstrItinData<IIC_VMACQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 1]>,
+ //
+ // Double-register Fused FP Multiple-Accumulate
+ InstrItinData<IIC_VFMACD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 1]>,
+ //
+ // Quad-register FusedF P Multiple-Accumulate
+ InstrItinData<IIC_VFMACQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 1]>,
+ //
+ // Double-register Reciprical Step
+ InstrItinData<IIC_VRECSD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 1]>,
+ //
+ // Quad-register Reciprical Step
+ InstrItinData<IIC_VRECSQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 1]>,
+ //
+ // Double-register Permute
+ // FIXME: The latencies are unclear from the documentation.
+ InstrItinData<IIC_VPERMD, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1]>],
+ [3, 4, 3, 4]>,
+ //
+ // Quad-register Permute
+ // FIXME: The latencies are unclear from the documentation.
+ InstrItinData<IIC_VPERMQ, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1]>],
+ [3, 4, 3, 4]>,
+ //
+ // Quad-register Permute (3 cycle issue on A9)
+ InstrItinData<IIC_VPERMQ3, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1]>],
+ [3, 4, 3, 4]>,
+
+ //
+ // Double-register VEXT
+ InstrItinData<IIC_VEXTD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [2, 1, 1]>,
+ //
+ // Quad-register VEXT
+ InstrItinData<IIC_VEXTQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [2, 1, 1]>,
+ //
+ // VTB
+ InstrItinData<IIC_VTB1, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_VTB2, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 3, 3]>,
+ InstrItinData<IIC_VTB3, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1]>],
+ [6, 1, 3, 5, 5]>,
+ InstrItinData<IIC_VTB4, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 3, 5, 7, 7]>,
+ //
+ // VTBX
+ InstrItinData<IIC_VTBX1, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_VTBX2, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 3, 3]>,
+ InstrItinData<IIC_VTBX3, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1]>],
+ [6, 1, 3, 5, 5]>,
+ InstrItinData<IIC_VTBX4, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 3, 5, 7, 7]>
+]>;
+
+// ===---------------------------------------------------------------------===//
+// This following definitions describe the simple machine model which
+// will replace itineraries.
+
+// Swift machine model for scheduling and other instruction cost heuristics.
+def SwiftModel : SchedMachineModel {
+ let IssueWidth = 3; // 3 micro-ops are dispatched per cycle.
+ let MinLatency = 0; // Data dependencies are allowed within dispatch groups.
+ let LoadLatency = 3;
+
+ let Itineraries = SwiftItineraries;
+}
+
+// TODO: Add Swift processor and scheduler resources.
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index c8aa0779bc..6562600202 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -41,6 +41,10 @@ NoInlineJumpTables("no-inline-jumptables",
// @LOCALMOD-END
static cl::opt<bool>
+UseFusedMulOps("arm-use-mulops",
+ cl::init(true), cl::Hidden);
+
+static cl::opt<bool>
StrictAlign("arm-strict-align", cl::Hidden,
cl::desc("Disallow all unaligned memory accesses"));
@@ -59,6 +63,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
, HasVFPv4(false)
, HasNEON(false)
, UseNEONForSinglePrecisionFP(false)
+ , UseMulOps(UseFusedMulOps)
, SlowFPVMLx(false)
, HasVMLxForwarding(false)
, SlowFPBrcc(false)
@@ -74,6 +79,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
, HasFP16(false)
, HasD16(false)
, HasHardwareDivide(false)
+ , HasHardwareDivideInARM(false)
, HasT2ExtractPack(false)
, HasDataBarrier(false)
, Pref32BitThumb(false)
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 0a5744e5c1..64081f5be2 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -38,7 +38,7 @@ class StringRef;
class ARMSubtarget : public ARMGenSubtargetInfo {
protected:
enum ARMProcFamilyEnum {
- Others, CortexA8, CortexA9, CortexA15
+ Others, CortexA8, CortexA9, CortexA15, Swift
};
/// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others.
@@ -65,6 +65,10 @@ protected:
/// determine if NEON should actually be used.
bool UseNEONForSinglePrecisionFP;
+ /// UseMulOps - True if non-microcoded fused integer multiply-add and
+ /// multiply-subtract instructions should be used.
+ bool UseMulOps;
+
/// SlowFPVMLx - If the VFP2 / NEON instructions are available, indicates
/// whether the FP VML[AS] instructions are slow (if so, don't use them).
bool SlowFPVMLx;
@@ -115,6 +119,9 @@ protected:
/// HasHardwareDivide - True if subtarget supports [su]div
bool HasHardwareDivide;
+ /// HasHardwareDivideInARM - True if subtarget supports [su]div in ARM mode
+ bool HasHardwareDivideInARM;
+
/// HasT2ExtractPack - True if subtarget supports thumb2 extract/pack
/// instructions.
bool HasT2ExtractPack;
@@ -214,6 +221,7 @@ protected:
bool isCortexA8() const { return ARMProcFamily == CortexA8; }
bool isCortexA9() const { return ARMProcFamily == CortexA9; }
bool isCortexA15() const { return ARMProcFamily == CortexA15; }
+ bool isSwift() const { return ARMProcFamily == Swift; }
bool isCortexM3() const { return CPUString == "cortex-m3"; }
bool isLikeA9() const { return isCortexA9() || isCortexA15(); }
@@ -227,8 +235,10 @@ protected:
return hasNEON() && UseNEONForSinglePrecisionFP; }
bool hasDivide() const { return HasHardwareDivide; }
+ bool hasDivideInARMMode() const { return HasHardwareDivideInARM; }
bool hasT2ExtractPack() const { return HasT2ExtractPack; }
bool hasDataBarrier() const { return HasDataBarrier; }
+ bool useMulOps() const { return UseMulOps; }
bool useFPVMLx() const { return !SlowFPVMLx; }
bool hasVMLxForwarding() const { return HasVMLxForwarding; }
bool isFPBrccSlow() const { return SlowFPBrcc; }
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 4675c98f0d..ac5f14c09c 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -150,6 +150,11 @@ bool ARMPassConfig::addPreISel() {
bool ARMPassConfig::addInstSelector() {
addPass(createARMISelDag(getARMTargetMachine(), getOptLevel()));
+
+ const ARMSubtarget *Subtarget = &getARMSubtarget();
+ if (Subtarget->isTargetELF() && !Subtarget->isThumb1Only() &&
+ TM->Options.EnableFastISel)
+ addPass(createARMGlobalBaseRegPass());
return false;
}
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index bc711dc35f..aa5ba46ab2 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -257,21 +257,11 @@ public:
SmallVectorImpl<MCParsedAsmOperand*> &Operands);
bool ParseDirective(AsmToken DirectiveID);
- bool mnemonicIsValid(StringRef Mnemonic) {
- return mnemonicIsValidImpl(Mnemonic);
- }
-
unsigned checkTargetMatchPredicate(MCInst &Inst);
bool MatchAndEmitInstruction(SMLoc IDLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
MCStreamer &Out);
-
- unsigned getMCInstOperandNum(unsigned Kind, MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- unsigned OperandNum, unsigned &NumMCOperands) {
- return getMCInstOperandNumImpl(Kind, Inst, Operands, OperandNum, NumMCOperands);
- }
};
} // end anonymous namespace
@@ -5676,6 +5666,20 @@ bool ARMAsmParser::
processInstruction(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
switch (Inst.getOpcode()) {
+ // Alias for alternate form of 'ADR Rd, #imm' instruction.
+ case ARM::ADDri: {
+ if (Inst.getOperand(1).getReg() != ARM::PC ||
+ Inst.getOperand(5).getReg() != 0)
+ return false;
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::ADR);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(3));
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
// Aliases for alternate PC+imm syntax of LDR instructions.
case ARM::t2LDRpcrel:
Inst.setOpcode(ARM::t2LDRpci);
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 34c79f945f..dfc424cda2 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -714,6 +714,15 @@ MCAsmBackend *llvm::createARMAsmBackend(const Target &T, StringRef TT, StringRef
else if (TheTriple.getArchName() == "armv6" ||
TheTriple.getArchName() == "thumbv6")
return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V6);
+ else if (TheTriple.getArchName() == "armv7f" ||
+ TheTriple.getArchName() == "thumbv7f")
+ return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7F);
+ else if (TheTriple.getArchName() == "armv7k" ||
+ TheTriple.getArchName() == "thumbv7k")
+ return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7K);
+ else if (TheTriple.getArchName() == "armv7s" ||
+ TheTriple.getArchName() == "thumbv7s")
+ return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7S);
return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7);
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index e581cc82fa..406317cee4 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -71,6 +71,14 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) {
else
// Use CPU to figure out the exact features.
ARMArchFeature = "+v7";
+ } else if (Len >= Idx+2 && TT[Idx+1] == 's') {
+ if (NoCPU)
+ // v7s: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureT2XtPk
+ // Swift
+ ARMArchFeature = "+v7,+swift,+neon,+db,+t2dsp,+t2xtpk";
+ else
+ // Use CPU to figure out the exact features.
+ ARMArchFeature = "+v7";
} else {
// v7 CPUs have lots of different feature sets. If no CPU is specified,
// then assume v7a (e.g. cortex-a8) feature set. Otherwise, return
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index 95640f7df9..2154c93176 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -41,6 +41,12 @@ class ARMMachObjectWriter : public MCMachObjectTargetWriter {
const MCFixup &Fixup, MCValue Target,
uint64_t &FixedValue);
+ bool requiresExternRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCFragment &Fragment,
+ unsigned RelocType, const MCSymbolData *SD,
+ uint64_t FixedValue);
+
public:
ARMMachObjectWriter(bool Is64Bit, uint32_t CPUType,
uint32_t CPUSubtype)
@@ -305,6 +311,46 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
Writer->addRelocation(Fragment->getParent(), MRE);
}
+bool ARMMachObjectWriter::requiresExternRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCFragment &Fragment,
+ unsigned RelocType,
+ const MCSymbolData *SD,
+ uint64_t FixedValue) {
+ // Most cases can be identified purely from the symbol.
+ if (Writer->doesSymbolRequireExternRelocation(SD))
+ return true;
+ int64_t Value = (int64_t)FixedValue; // The displacement is signed.
+ int64_t Range;
+ switch (RelocType) {
+ default:
+ return false;
+ case macho::RIT_ARM_Branch24Bit:
+ // PC pre-adjustment of 8 for these instructions.
+ Value -= 8;
+ // ARM BL/BLX has a 25-bit offset.
+ Range = 0x1ffffff;
+ break;
+ case macho::RIT_ARM_ThumbBranch22Bit:
+ // PC pre-adjustment of 4 for these instructions.
+ Value -= 4;
+ // Thumb BL/BLX has a 24-bit offset.
+ Range = 0xffffff;
+ }
+ // BL/BLX also use external relocations when an internal relocation
+ // would result in the target being out of range. This gives the linker
+ // enough information to generate a branch island.
+ const MCSectionData &SymSD = Asm.getSectionData(
+ SD->getSymbol().getSection());
+ Value += Writer->getSectionAddress(&SymSD);
+ Value -= Writer->getSectionAddress(Fragment.getParent());
+ // If the resultant value would be out of range for an internal relocation,
+ // use an external instead.
+ if (Value > Range || Value < -(Range + 1))
+ return true;
+ return false;
+}
+
void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
const MCAssembler &Asm,
const MCAsmLayout &Layout,
@@ -373,7 +419,8 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
}
// Check whether we need an external or internal relocation.
- if (Writer->doesSymbolRequireExternRelocation(SD)) {
+ if (requiresExternRelocation(Writer, Asm, *Fragment, RelocType, SD,
+ FixedValue)) {
IsExtern = 1;
Index = SD->getIndex();
diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp
index 4ebba0e4d3..70643bcda3 100644
--- a/lib/Target/ARM/MLxExpansionPass.cpp
+++ b/lib/Target/ARM/MLxExpansionPass.cpp
@@ -52,6 +52,7 @@ namespace {
MachineRegisterInfo *MRI;
bool isLikeA9;
+ bool isSwift;
unsigned MIIdx;
MachineInstr* LastMIs[4];
SmallPtrSet<MachineInstr*, 4> IgnoreStall;
@@ -60,6 +61,7 @@ namespace {
void pushStack(MachineInstr *MI);
MachineInstr *getAccDefMI(MachineInstr *MI) const;
unsigned getDefReg(MachineInstr *MI) const;
+ bool hasLoopHazard(MachineInstr *MI) const;
bool hasRAWHazard(unsigned Reg, MachineInstr *MI) const;
bool FindMLxHazard(MachineInstr *MI);
void ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
@@ -135,6 +137,50 @@ unsigned MLxExpansion::getDefReg(MachineInstr *MI) const {
return Reg;
}
+/// hasLoopHazard - Check whether an MLx instruction is chained to itself across
+/// a single-MBB loop.
+bool MLxExpansion::hasLoopHazard(MachineInstr *MI) const {
+ unsigned Reg = MI->getOperand(1).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return false;
+
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineInstr *DefMI = MRI->getVRegDef(Reg);
+ while (true) {
+outer_continue:
+ if (DefMI->getParent() != MBB)
+ break;
+
+ if (DefMI->isPHI()) {
+ for (unsigned i = 1, e = DefMI->getNumOperands(); i < e; i += 2) {
+ if (DefMI->getOperand(i + 1).getMBB() == MBB) {
+ unsigned SrcReg = DefMI->getOperand(i).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ DefMI = MRI->getVRegDef(SrcReg);
+ goto outer_continue;
+ }
+ }
+ }
+ } else if (DefMI->isCopyLike()) {
+ Reg = DefMI->getOperand(1).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ DefMI = MRI->getVRegDef(Reg);
+ continue;
+ }
+ } else if (DefMI->isInsertSubreg()) {
+ Reg = DefMI->getOperand(2).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ DefMI = MRI->getVRegDef(Reg);
+ continue;
+ }
+ }
+
+ break;
+ }
+
+ return DefMI == MI;
+}
+
bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const {
// FIXME: Detect integer instructions properly.
const MCInstrDesc &MCID = MI->getDesc();
@@ -149,6 +195,19 @@ bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const {
return false;
}
+static bool isFpMulInstruction(unsigned Opcode) {
+ switch (Opcode) {
+ case ARM::VMULS:
+ case ARM::VMULfd:
+ case ARM::VMULfq:
+ case ARM::VMULD:
+ case ARM::VMULslfd:
+ case ARM::VMULslfq:
+ return true;
+ default:
+ return false;
+ }
+}
bool MLxExpansion::FindMLxHazard(MachineInstr *MI) {
if (NumExpand >= ExpandLimit)
@@ -171,6 +230,12 @@ bool MLxExpansion::FindMLxHazard(MachineInstr *MI) {
return true;
}
+ // On Swift, we mostly care about hazards from multiplication instructions
+ // writing the accumulator and the pipelining of loop iterations by out-of-
+ // order execution.
+ if (isSwift)
+ return isFpMulInstruction(DefMI->getOpcode()) || hasLoopHazard(MI);
+
if (IgnoreStall.count(MI))
return false;
@@ -316,7 +381,8 @@ bool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) {
TRI = Fn.getTarget().getRegisterInfo();
MRI = &Fn.getRegInfo();
const ARMSubtarget *STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
- isLikeA9 = STI->isLikeA9();
+ isLikeA9 = STI->isLikeA9() || STI->isSwift();
+ isSwift = STI->isSwift();
bool Modified = false;
for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index 703a128ee0..1c891f14d8 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -1350,6 +1350,8 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine
} else {
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
}
+ // Increase jump tables cutover to 5, was 4.
+ setMinimumJumpTableEntries(5);
setOperationAction(ISD::BR_CC, MVT::i32, Expand);
diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
index daceb88076..9e22fd06d1 100644
--- a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
+++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
@@ -44,10 +44,6 @@ class MBlazeAsmParser : public MCTargetAsmParser {
bool ParseDirectiveWord(unsigned Size, SMLoc L);
- bool mnemonicIsValid(StringRef Mnemonic) {
- return mnemonicIsValidImpl(Mnemonic);
- }
-
bool MatchAndEmitInstruction(SMLoc IDLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
MCStreamer &Out);
@@ -60,13 +56,6 @@ class MBlazeAsmParser : public MCTargetAsmParser {
/// }
- unsigned getMCInstOperandNum(unsigned Kind, MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- unsigned OperandNum, unsigned &NumMCOperands) {
- return getMCInstOperandNumImpl(Kind, Inst, Operands, OperandNum,
- NumMCOperands);
- }
-
public:
MBlazeAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser)
: MCTargetAsmParser(), Parser(_Parser) {}
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 4cbd4c8e12..b1ada100f4 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -41,10 +41,6 @@ class MipsAsmParser : public MCTargetAsmParser {
#define GET_ASSEMBLER_HEADER
#include "MipsGenAsmMatcher.inc"
- bool mnemonicIsValid(StringRef Mnemonic) {
- return mnemonicIsValidImpl(Mnemonic);
- }
-
bool MatchAndEmitInstruction(SMLoc IDLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
MCStreamer &Out);
@@ -62,11 +58,6 @@ class MipsAsmParser : public MCTargetAsmParser {
MipsAsmParser::OperandMatchResultTy
parseMemOperand(SmallVectorImpl<MCParsedAsmOperand*>&);
- unsigned
- getMCInstOperandNum(unsigned Kind, MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- unsigned OperandNum, unsigned &NumMCOperands);
-
bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &,
StringRef Mnemonic);
@@ -265,18 +256,6 @@ public:
};
}
-unsigned MipsAsmParser::
-getMCInstOperandNum(unsigned Kind, MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- unsigned OperandNum, unsigned &NumMCOperands) {
- assert (0 && "getMCInstOperandNum() not supported by the Mips target.");
- // The Mips backend doesn't currently include the matcher implementation, so
- // the getMCInstOperandNumImpl() is undefined. This is a temporary
- // work around.
- NumMCOperands = 0;
- return 0;
-}
-
bool MipsAsmParser::
MatchAndEmitInstruction(SMLoc IDLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt
index 0f84358e26..7dec066fb6 100644
--- a/lib/Target/Mips/CMakeLists.txt
+++ b/lib/Target/Mips/CMakeLists.txt
@@ -11,6 +11,7 @@ tablegen(LLVM MipsGenCallingConv.inc -gen-callingconv)
tablegen(LLVM MipsGenSubtargetInfo.inc -gen-subtarget)
tablegen(LLVM MipsGenEDInfo.inc -gen-enhanced-disassembly-info)
tablegen(LLVM MipsGenAsmMatcher.inc -gen-asm-matcher)
+tablegen(LLVM MipsGenMCPseudoLowering.inc -gen-pseudo-lowering)
add_public_tablegen_target(MipsCommonTableGen)
add_llvm_target(MipsCodeGen
diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index aa5747209b..82dbcc5bcf 100644
--- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -108,6 +108,11 @@ static DecodeStatus DecodeCPURegsRegisterClass(MCInst &Inst,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeDSPRegsRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+
static DecodeStatus DecodeFGR64RegisterClass(MCInst &Inst,
unsigned RegNo,
uint64_t Address,
@@ -138,6 +143,11 @@ static DecodeStatus DecodeHWRegs64RegisterClass(MCInst &Inst,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeACRegsRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+
static DecodeStatus DecodeBranchTarget(MCInst &Inst,
unsigned Offset,
uint64_t Address,
@@ -346,6 +356,13 @@ static DecodeStatus DecodeCPURegsRegisterClass(MCInst &Inst,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeDSPRegsRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return DecodeCPURegsRegisterClass(Inst, RegNo, Address, Decoder);
+}
+
static DecodeStatus DecodeFGR64RegisterClass(MCInst &Inst,
unsigned RegNo,
uint64_t Address,
@@ -463,6 +480,18 @@ static DecodeStatus DecodeHWRegs64RegisterClass(MCInst &Inst,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeACRegsRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo >= 4)
+ return MCDisassembler::Fail;
+
+ unsigned Reg = getReg(Decoder, Mips::ACRegsRegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeBranchTarget(MCInst &Inst,
unsigned Offset,
uint64_t Address,
diff --git a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
index 96033276d2..233214b461 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
@@ -122,14 +122,16 @@ inline static unsigned getMipsRegisterNumbering(unsigned RegEnum)
{
switch (RegEnum) {
case Mips::ZERO: case Mips::ZERO_64: case Mips::F0: case Mips::D0_64:
- case Mips::D0: case Mips::FCC0:
+ case Mips::D0: case Mips::FCC0: case Mips::AC0:
return 0;
case Mips::AT: case Mips::AT_64: case Mips::F1: case Mips::D1_64:
+ case Mips::AC1:
return 1;
case Mips::V0: case Mips::V0_64: case Mips::F2: case Mips::D2_64:
- case Mips::D1:
+ case Mips::D1: case Mips::AC2:
return 2;
case Mips::V1: case Mips::V1_64: case Mips::F3: case Mips::D3_64:
+ case Mips::AC3:
return 3;
case Mips::A0: case Mips::A0_64: case Mips::F4: case Mips::D4_64:
case Mips::D2:
diff --git a/lib/Target/Mips/Makefile b/lib/Target/Mips/Makefile
index 93de517316..bd8c517345 100644
--- a/lib/Target/Mips/Makefile
+++ b/lib/Target/Mips/Makefile
@@ -17,7 +17,7 @@ BUILT_SOURCES = MipsGenRegisterInfo.inc MipsGenInstrInfo.inc \
MipsGenDAGISel.inc MipsGenCallingConv.inc \
MipsGenSubtargetInfo.inc MipsGenMCCodeEmitter.inc \
MipsGenEDInfo.inc MipsGenDisassemblerTables.inc \
- MipsGenAsmMatcher.inc
+ MipsGenMCPseudoLowering.inc MipsGenAsmMatcher.inc
DIRS = InstPrinter Disassembler AsmParser TargetInfo MCTargetDesc
diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp
index 9248032340..127c5b89e8 100644
--- a/lib/Target/Mips/Mips16InstrInfo.cpp
+++ b/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -84,7 +84,15 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned SrcReg, bool isKill, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- assert(false && "Implement this function.");
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+ MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore);
+ unsigned Opc = 0;
+ if (Mips::CPU16RegsRegClass.hasSubClassEq(RC))
+ Opc = Mips::SwRxSpImmX16;
+ assert(Opc && "Register class not handled!");
+ BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
}
void Mips16InstrInfo::
@@ -92,7 +100,16 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- assert(false && "Implement this function.");
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+ MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad);
+ unsigned Opc = 0;
+
+ if (Mips::CPU16RegsRegClass.hasSubClassEq(RC))
+ Opc = Mips::LwRxSpImmX16;
+ assert(Opc && "Register class not handled!");
+ BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(0)
+ .addMemOperand(MMO);
}
bool Mips16InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td
index b0ab464a68..b866a5d225 100644
--- a/lib/Target/Mips/Mips16InstrInfo.td
+++ b/lib/Target/Mips/Mips16InstrInfo.td
@@ -29,10 +29,35 @@ class FI8_MOVR3216_ins<string asmstr, InstrItinClass itin>:
//
// I8_MOV32R instruction format (used only by MOV32R instruction)
//
+
class FI8_MOV32R16_ins<string asmstr, InstrItinClass itin>:
FI8_MOV32R16<(outs CPURegs:$r32), (ins CPU16Regs:$rz),
!strconcat(asmstr, "\t$r32, $rz"), [], itin>;
+
+//
+// RR-type instruction format
+//
+
+class FRR16_ins<bits<5> f, string asmstr, InstrItinClass itin> :
+ FRR16<f, (outs CPU16Regs:$rx), (ins CPU16Regs:$ry),
+ !strconcat(asmstr, "\t$rx, $ry"), [], itin> {
+}
+
+class FRxRxRy16_ins<bits<5> f, string asmstr,
+ InstrItinClass itin> :
+ FRR16<f, (outs CPU16Regs:$rz), (ins CPU16Regs:$rx, CPU16Regs:$ry),
+ !strconcat(asmstr, "\t$rz, $ry"),
+ [], itin> {
+ let Constraints = "$rx = $rz";
+}
+
+let rx=0 in
+class FRR16_JALRC_RA_only_ins<bits<1> nd_, bits<1> l_,
+ string asmstr, InstrItinClass itin>:
+ FRR16_JALRC<nd_, l_, 1, (outs), (ins), !strconcat(asmstr, "\t $$ra"),
+ [], itin> ;
+
//
// EXT-RI instruction format
//
@@ -56,30 +81,14 @@ class FEXT_2RI16_ins<bits<5> _op, string asmstr,
!strconcat(asmstr, "\t$rx, $imm"), [], itin> {
let Constraints = "$rx_ = $rx";
}
-
-
-//
-// RR-type instruction format
-//
-
-class FRR16_ins<bits<5> f, string asmstr, InstrItinClass itin> :
- FRR16<f, (outs CPU16Regs:$rx), (ins CPU16Regs:$ry),
- !strconcat(asmstr, "\t$rx, $ry"), [], itin> {
-}
-
-class FRxRxRy16_ins<bits<5> f, string asmstr,
- InstrItinClass itin> :
- FRR16<f, (outs CPU16Regs:$rz), (ins CPU16Regs:$rx, CPU16Regs:$ry),
- !strconcat(asmstr, "\t$rz, $ry"),
- [], itin> {
- let Constraints = "$rx = $rz";
+// this has an explicit sp argument that we ignore to work around a problem
+// in the compiler
+class FEXT_RI16_SP_explicit_ins<bits<5> _op, string asmstr,
+ InstrItinClass itin>:
+ FEXT_RI16<_op, (outs CPU16Regs:$rx), (ins CPUSPReg:$ry, simm16:$imm),
+ !strconcat(asmstr, "\t$rx, $imm ( $ry ); "), [], itin> {
}
-let rx=0 in
-class FRR16_JALRC_RA_only_ins<bits<1> nd_, bits<1> l_,
- string asmstr, InstrItinClass itin>:
- FRR16_JALRC<nd_, l_, 1, (outs), (ins), !strconcat(asmstr, "\t $$ra"),
- [], itin> ;
//
// EXT-RRI instruction format
@@ -122,6 +131,13 @@ class ArithLogic16Defs<bit isCom=0> {
bit neverHasSideEffects = 1;
}
+class MayLoad {
+ bit mayLoad = 1;
+}
+
+class MayStore {
+ bit mayStore = 1;
+}
//
// Format: ADDIU rx, immediate MIPS16e
@@ -169,28 +185,30 @@ def JrRa16: FRR16_JALRC_RA_only_ins<0, 0, "jr", IIAlu>;
// Purpose: Load Byte (Extended)
// To load a byte from memory as a signed value.
//
-def LbRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lb", mem16, IIAlu>;
+def LbRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lb", mem16, IILoad>, MayLoad;
//
// Format: LBU ry, offset(rx) MIPS16e
// Purpose: Load Byte Unsigned (Extended)
// To load a byte from memory as a unsigned value.
//
-def LbuRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lbu", mem16, IIAlu>;
+def LbuRxRyOffMemX16:
+ FEXT_RRI16_mem_ins<0b10100, "lbu", mem16, IILoad>, MayLoad;
//
// Format: LH ry, offset(rx) MIPS16e
// Purpose: Load Halfword signed (Extended)
// To load a halfword from memory as a signed value.
//
-def LhRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lh", mem16, IIAlu>;
+def LhRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lh", mem16, IILoad>, MayLoad;
//
// Format: LHU ry, offset(rx) MIPS16e
// Purpose: Load Halfword unsigned (Extended)
// To load a halfword from memory as an unsigned value.
//
-def LhuRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lhu", mem16, IIAlu>;
+def LhuRxRyOffMemX16:
+ FEXT_RRI16_mem_ins<0b10100, "lhu", mem16, IILoad>, MayLoad;
//
// Format: LI rx, immediate MIPS16e
@@ -204,7 +222,13 @@ def LiRxImmX16: FEXT_RI16_ins<0b01101, "li", IIAlu>;
// Purpose: Load Word (Extended)
// To load a word from memory as a signed value.
//
-def LwRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lw", mem16, IIAlu>;
+def LwRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lw", mem16, IILoad>, MayLoad;
+
+// Format: LW rx, offset(sp) MIPS16e
+// Purpose: Load Word (SP-Relative, Extended)
+// To load an SP-relative word from memory as a signed value.
+//
+def LwRxSpImmX16: FEXT_RI16_SP_explicit_ins<0b10110, "lw", IILoad>, MayLoad;
//
// Format: MOVE r32, rz MIPS16e
@@ -257,7 +281,7 @@ def OrRxRxRy16: FRxRxRy16_ins<0b01101, "or", IIAlu>, ArithLogic16Defs<1>;
let ra=1, s=0,s0=1,s1=1 in
def RestoreRaF16:
FI8_SVRS16<0b1, (outs), (ins uimm16:$frame_size),
- "restore \t$$ra, $$s0, $$s1, $frame_size", [], IILoad > {
+ "restore \t$$ra, $$s0, $$s1, $frame_size", [], IILoad >, MayLoad {
let isCodeGenOnly = 1;
}
@@ -271,7 +295,7 @@ def RestoreRaF16:
let ra=1, s=1,s0=1,s1=1 in
def SaveRaF16:
FI8_SVRS16<0b1, (outs), (ins uimm16:$frame_size),
- "save \t$$ra, $$s0, $$s1, $frame_size", [], IILoad > {
+ "save \t$$ra, $$s0, $$s1, $frame_size", [], IIStore >, MayStore {
let isCodeGenOnly = 1;
}
//
@@ -279,14 +303,16 @@ def SaveRaF16:
// Purpose: Store Byte (Extended)
// To store a byte to memory.
//
-def SbRxRyOffMemX16: FEXT_RRI16_mem2_ins<0b11000, "sb", mem16, IIAlu>;
+def SbRxRyOffMemX16:
+ FEXT_RRI16_mem2_ins<0b11000, "sb", mem16, IIStore>, MayStore;
//
// Format: SH ry, offset(rx) MIPS16e
// Purpose: Store Halfword (Extended)
// To store a halfword to memory.
//
-def ShRxRyOffMemX16: FEXT_RRI16_mem2_ins<0b11001, "sh", mem16, IIAlu>;
+def ShRxRyOffMemX16:
+ FEXT_RRI16_mem2_ins<0b11001, "sh", mem16, IIStore>, MayStore;
//
// Format: SLL rx, ry, sa MIPS16e
@@ -350,9 +376,18 @@ def SubuRxRyRz16: FRRR16_ins<0b11, "subu", IIAlu>, ArithLogic16Defs<0>;
// Purpose: Store Word (Extended)
// To store a word to memory.
//
-def SwRxRyOffMemX16: FEXT_RRI16_mem2_ins<0b11011, "sw", mem16, IIAlu>;
+def SwRxRyOffMemX16:
+ FEXT_RRI16_mem2_ins<0b11011, "sw", mem16, IIStore>, MayStore;
//
+// Format: SW rx, offset(sp) MIPS16e
+// Purpose: Store Word rx (SP-Relative)
+// To store an SP-relative word to memory.
+//
+def SwRxSpImmX16: FEXT_RI16_SP_explicit_ins<0b11010, "sw", IIStore>, MayStore;
+
+//
+//
// Format: XOR rx, ry MIPS16e
// Purpose: Xor
// To do a bitwise logical XOR.
diff --git a/lib/Target/Mips/Mips16RegisterInfo.cpp b/lib/Target/Mips/Mips16RegisterInfo.cpp
index 106e82fd38..bfc6b6cabf 100644
--- a/lib/Target/Mips/Mips16RegisterInfo.cpp
+++ b/lib/Target/Mips/Mips16RegisterInfo.cpp
@@ -57,7 +57,6 @@ void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
MachineInstr &MI = *II;
MachineFunction &MF = *MI.getParent()->getParent();
MachineFrameInfo *MFI = MF.getFrameInfo();
- MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
int MinCSFI = 0;
@@ -77,8 +76,7 @@ void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
// getFrameRegister() returns.
unsigned FrameReg;
- if (MipsFI->isOutArgFI(FrameIndex) ||
- (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI))
+ if (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI)
FrameReg = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP;
else
FrameReg = getFrameRegister(MF);
@@ -94,12 +92,8 @@ void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
// incoming argument, callee-saved register location or local variable.
int64_t Offset;
- if (MipsFI->isOutArgFI(FrameIndex))
- Offset = SPOffset;
- else
- Offset = SPOffset + (int64_t)StackSize;
-
- Offset += MI.getOperand(OpNo + 1).getImm();
+ Offset = SPOffset + (int64_t)StackSize;
+ Offset += MI.getOperand(OpNo + 1).getImm();
DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n");
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index 729b7921b4..1bf4a542d8 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -50,6 +50,13 @@ bool MipsAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
return true;
}
+bool MipsAsmPrinter::lowerOperand(const MachineOperand &MO, MCOperand &MCOp) {
+ MCOp = MCInstLowering.LowerOperand(MO);
+ return MCOp.isValid();
+}
+
+#include "MipsGenMCPseudoLowering.inc"
+
void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
if (MI->isDebugValue()) {
SmallString<128> Str;
@@ -59,6 +66,10 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
return;
}
+ // Do any auto-generated pseudo lowerings.
+ if (emitPseudoExpansionLowering(OutStreamer, MI))
+ return;
+
MachineBasicBlock::const_instr_iterator I = MI;
MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h
index a426f55ba7..efed6357a4 100644
--- a/lib/Target/Mips/MipsAsmPrinter.h
+++ b/lib/Target/Mips/MipsAsmPrinter.h
@@ -32,6 +32,14 @@ class LLVM_LIBRARY_VISIBILITY MipsAsmPrinter : public AsmPrinter {
void EmitInstrWithMacroNoAT(const MachineInstr *MI);
+private:
+ // tblgen'erated function.
+ bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
+ const MachineInstr *MI);
+
+ // lowerOperand - Convert a MachineOperand into the equivalent MCOperand.
+ bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp);
+
public:
const MipsSubtarget *Subtarget;
diff --git a/lib/Target/Mips/MipsDSPInstrFormats.td b/lib/Target/Mips/MipsDSPInstrFormats.td
index d9bcccc617..8e01d06596 100644
--- a/lib/Target/Mips/MipsDSPInstrFormats.td
+++ b/lib/Target/Mips/MipsDSPInstrFormats.td
@@ -23,3 +23,287 @@ def REGIMM_OPCODE : Field6<0b000001>;
class DSPInst : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther> {
let Predicates = [HasDSP];
}
+
+class PseudoDSP<dag outs, dag ins, list<dag> pattern>:
+ MipsPseudo<outs, ins, "", pattern> {
+ let Predicates = [HasDSP];
+}
+
+// ADDU.QB sub-class format.
+class ADDU_QB_FMT<bits<5> op> : DSPInst {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b010000;
+}
+
+class RADDU_W_QB_FMT<bits<5> op> : DSPInst {
+ bits<5> rd;
+ bits<5> rs;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-21} = rs;
+ let Inst{20-16} = 0;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b010000;
+}
+
+// CMPU.EQ.QB sub-class format.
+class CMP_EQ_QB_R2_FMT<bits<5> op> : DSPInst {
+ bits<5> rs;
+ bits<5> rt;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = 0;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b010001;
+}
+
+class CMP_EQ_QB_R3_FMT<bits<5> op> : DSPInst {
+ bits<5> rs;
+ bits<5> rt;
+ bits<5> rd;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b010001;
+}
+
+class PRECR_SRA_PH_W_FMT<bits<5> op> : DSPInst {
+ bits<5> rs;
+ bits<5> rt;
+ bits<5> sa;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = sa;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b010001;
+}
+
+// ABSQ_S.PH sub-class format.
+class ABSQ_S_PH_R2_FMT<bits<5> op> : DSPInst {
+ bits<5> rd;
+ bits<5> rt;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-21} = 0;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b010010;
+}
+
+
+class REPL_FMT<bits<5> op> : DSPInst {
+ bits<5> rd;
+ bits<10> imm;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-16} = imm;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b010010;
+}
+
+// SHLL.QB sub-class format.
+class SHLL_QB_FMT<bits<5> op> : DSPInst {
+ bits<5> rd;
+ bits<5> rt;
+ bits<5> rs_sa;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-21} = rs_sa;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b010011;
+}
+
+// LX sub-class format.
+class LX_FMT<bits<5> op> : DSPInst {
+ bits<5> rd;
+ bits<5> base;
+ bits<5> index;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-21} = base;
+ let Inst{20-16} = index;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b001010;
+}
+
+// ADDUH.QB sub-class format.
+class ADDUH_QB_FMT<bits<5> op> : DSPInst {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b011000;
+}
+
+// APPEND sub-class format.
+class APPEND_FMT<bits<5> op> : DSPInst {
+ bits<5> rt;
+ bits<5> rs;
+ bits<5> sa;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = sa;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b110001;
+}
+
+// DPA.W.PH sub-class format.
+class DPA_W_PH_FMT<bits<5> op> : DSPInst {
+ bits<2> ac;
+ bits<5> rs;
+ bits<5> rt;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-13} = 0;
+ let Inst{12-11} = ac;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b110000;
+}
+
+// MULT sub-class format.
+class MULT_FMT<bits<6> opcode, bits<6> funct> : DSPInst {
+ bits<2> ac;
+ bits<5> rs;
+ bits<5> rt;
+
+ let Opcode = opcode;
+
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-13} = 0;
+ let Inst{12-11} = ac;
+ let Inst{10-6} = 0;
+ let Inst{5-0} = funct;
+}
+
+// EXTR.W sub-class format (type 1).
+class EXTR_W_TY1_FMT<bits<5> op> : DSPInst {
+ bits<5> rt;
+ bits<2> ac;
+ bits<5> shift_rs;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-21} = shift_rs;
+ let Inst{20-16} = rt;
+ let Inst{15-13} = 0;
+ let Inst{12-11} = ac;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b111000;
+}
+
+// SHILO sub-class format.
+class SHILO_R1_FMT<bits<5> op> : DSPInst {
+ bits<2> ac;
+ bits<6> shift;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-20} = shift;
+ let Inst{19-13} = 0;
+ let Inst{12-11} = ac;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b111000;
+}
+
+class SHILO_R2_FMT<bits<5> op> : DSPInst {
+ bits<2> ac;
+ bits<5> rs;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-21} = rs;
+ let Inst{20-13} = 0;
+ let Inst{12-11} = ac;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b111000;
+}
+
+class RDDSP_FMT<bits<5> op> : DSPInst {
+ bits<5> rd;
+ bits<10> mask;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-16} = mask;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b111000;
+}
+
+class WRDSP_FMT<bits<5> op> : DSPInst {
+ bits<5> rs;
+ bits<10> mask;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-21} = rs;
+ let Inst{20-11} = mask;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b111000;
+}
+
+class BPOSGE32_FMT<bits<5> op> : DSPInst {
+ bits<16> offset;
+
+ let Opcode = REGIMM_OPCODE.V;
+
+ let Inst{25-21} = 0;
+ let Inst{20-16} = op;
+ let Inst{15-0} = offset;
+}
+
+// INSV sub-class format.
+class INSV_FMT<bits<6> op> : DSPInst {
+ bits<5> rt;
+ bits<5> rs;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-6} = 0;
+ let Inst{5-0} = op;
+}
diff --git a/lib/Target/Mips/MipsDSPInstrInfo.td b/lib/Target/Mips/MipsDSPInstrInfo.td
index 1a4fd8733a..ef9402865b 100644
--- a/lib/Target/Mips/MipsDSPInstrInfo.td
+++ b/lib/Target/Mips/MipsDSPInstrInfo.td
@@ -18,3 +18,1302 @@ def immZExt4 : ImmLeaf<i32, [{return isUInt<4>(Imm);}]>;
def immZExt8 : ImmLeaf<i32, [{return isUInt<8>(Imm);}]>;
def immZExt10 : ImmLeaf<i32, [{return isUInt<10>(Imm);}]>;
def immSExt6 : ImmLeaf<i32, [{return isInt<6>(Imm);}]>;
+
+// Mips-specific dsp nodes
+def SDT_MipsExtr : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>]>;
+def SDT_MipsShilo : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+def SDT_MipsDPA : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>]>;
+
+class MipsDSPBase<string Opc, SDTypeProfile Prof> :
+ SDNode<!strconcat("MipsISD::", Opc), Prof,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
+
+class MipsDSPSideEffectBase<string Opc, SDTypeProfile Prof> :
+ SDNode<!strconcat("MipsISD::", Opc), Prof,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPSideEffect]>;
+
+def MipsEXTP : MipsDSPSideEffectBase<"EXTP", SDT_MipsExtr>;
+def MipsEXTPDP : MipsDSPSideEffectBase<"EXTPDP", SDT_MipsExtr>;
+def MipsEXTR_S_H : MipsDSPSideEffectBase<"EXTR_S_H", SDT_MipsExtr>;
+def MipsEXTR_W : MipsDSPSideEffectBase<"EXTR_W", SDT_MipsExtr>;
+def MipsEXTR_R_W : MipsDSPSideEffectBase<"EXTR_R_W", SDT_MipsExtr>;
+def MipsEXTR_RS_W : MipsDSPSideEffectBase<"EXTR_RS_W", SDT_MipsExtr>;
+
+def MipsSHILO : MipsDSPBase<"SHILO", SDT_MipsShilo>;
+def MipsMTHLIP : MipsDSPBase<"MTHLIP", SDT_MipsShilo>;
+
+def MipsMULSAQ_S_W_PH : MipsDSPSideEffectBase<"MULSAQ_S_W_PH", SDT_MipsDPA>;
+def MipsMAQ_S_W_PHL : MipsDSPSideEffectBase<"MAQ_S_W_PHL", SDT_MipsDPA>;
+def MipsMAQ_S_W_PHR : MipsDSPSideEffectBase<"MAQ_S_W_PHR", SDT_MipsDPA>;
+def MipsMAQ_SA_W_PHL : MipsDSPSideEffectBase<"MAQ_SA_W_PHL", SDT_MipsDPA>;
+def MipsMAQ_SA_W_PHR : MipsDSPSideEffectBase<"MAQ_SA_W_PHR", SDT_MipsDPA>;
+
+def MipsDPAU_H_QBL : MipsDSPBase<"DPAU_H_QBL", SDT_MipsDPA>;
+def MipsDPAU_H_QBR : MipsDSPBase<"DPAU_H_QBR", SDT_MipsDPA>;
+def MipsDPSU_H_QBL : MipsDSPBase<"DPSU_H_QBL", SDT_MipsDPA>;
+def MipsDPSU_H_QBR : MipsDSPBase<"DPSU_H_QBR", SDT_MipsDPA>;
+def MipsDPAQ_S_W_PH : MipsDSPSideEffectBase<"DPAQ_S_W_PH", SDT_MipsDPA>;
+def MipsDPSQ_S_W_PH : MipsDSPSideEffectBase<"DPSQ_S_W_PH", SDT_MipsDPA>;
+def MipsDPAQ_SA_L_W : MipsDSPSideEffectBase<"DPAQ_SA_L_W", SDT_MipsDPA>;
+def MipsDPSQ_SA_L_W : MipsDSPSideEffectBase<"DPSQ_SA_L_W", SDT_MipsDPA>;
+
+def MipsDPA_W_PH : MipsDSPBase<"DPA_W_PH", SDT_MipsDPA>;
+def MipsDPS_W_PH : MipsDSPBase<"DPS_W_PH", SDT_MipsDPA>;
+def MipsDPAQX_S_W_PH : MipsDSPSideEffectBase<"DPAQX_S_W_PH", SDT_MipsDPA>;
+def MipsDPAQX_SA_W_PH : MipsDSPSideEffectBase<"DPAQX_SA_W_PH", SDT_MipsDPA>;
+def MipsDPAX_W_PH : MipsDSPBase<"DPAX_W_PH", SDT_MipsDPA>;
+def MipsDPSX_W_PH : MipsDSPBase<"DPSX_W_PH", SDT_MipsDPA>;
+def MipsDPSQX_S_W_PH : MipsDSPSideEffectBase<"DPSQX_S_W_PH", SDT_MipsDPA>;
+def MipsDPSQX_SA_W_PH : MipsDSPSideEffectBase<"DPSQX_SA_W_PH", SDT_MipsDPA>;
+def MipsMULSA_W_PH : MipsDSPBase<"MULSA_W_PH", SDT_MipsDPA>;
+
+def MipsMULT : MipsDSPBase<"MULT", SDT_MipsDPA>;
+def MipsMULTU : MipsDSPBase<"MULTU", SDT_MipsDPA>;
+def MipsMADD_DSP : MipsDSPBase<"MADD_DSP", SDT_MipsDPA>;
+def MipsMADDU_DSP : MipsDSPBase<"MADDU_DSP", SDT_MipsDPA>;
+def MipsMSUB_DSP : MipsDSPBase<"MSUB_DSP", SDT_MipsDPA>;
+def MipsMSUBU_DSP : MipsDSPBase<"MSUBU_DSP", SDT_MipsDPA>;
+
+// Flags.
+class IsCommutable {
+ bit isCommutable = 1;
+}
+
+class UseAC {
+ list<Register> Uses = [AC0];
+}
+
+class UseDSPCtrl {
+ list<Register> Uses = [DSPCtrl];
+}
+
+class ClearDefs {
+ list<Register> Defs = [];
+}
+
+// Instruction encoding.
+class ADDU_QB_ENC : ADDU_QB_FMT<0b00000>;
+class ADDU_S_QB_ENC : ADDU_QB_FMT<0b00100>;
+class SUBU_QB_ENC : ADDU_QB_FMT<0b00001>;
+class SUBU_S_QB_ENC : ADDU_QB_FMT<0b00101>;
+class ADDQ_PH_ENC : ADDU_QB_FMT<0b01010>;
+class ADDQ_S_PH_ENC : ADDU_QB_FMT<0b01110>;
+class SUBQ_PH_ENC : ADDU_QB_FMT<0b01011>;
+class SUBQ_S_PH_ENC : ADDU_QB_FMT<0b01111>;
+class ADDQ_S_W_ENC : ADDU_QB_FMT<0b10110>;
+class SUBQ_S_W_ENC : ADDU_QB_FMT<0b10111>;
+class ADDSC_ENC : ADDU_QB_FMT<0b10000>;
+class ADDWC_ENC : ADDU_QB_FMT<0b10001>;
+class MODSUB_ENC : ADDU_QB_FMT<0b10010>;
+class RADDU_W_QB_ENC : RADDU_W_QB_FMT<0b10100>;
+class ABSQ_S_PH_ENC : ABSQ_S_PH_R2_FMT<0b01001>;
+class ABSQ_S_W_ENC : ABSQ_S_PH_R2_FMT<0b10001>;
+class PRECRQ_QB_PH_ENC : CMP_EQ_QB_R3_FMT<0b01100>;
+class PRECRQ_PH_W_ENC : CMP_EQ_QB_R3_FMT<0b10100>;
+class PRECRQ_RS_PH_W_ENC : CMP_EQ_QB_R3_FMT<0b10101>;
+class PRECRQU_S_QB_PH_ENC : CMP_EQ_QB_R3_FMT<0b01111>;
+class PRECEQ_W_PHL_ENC : ABSQ_S_PH_R2_FMT<0b01100>;
+class PRECEQ_W_PHR_ENC : ABSQ_S_PH_R2_FMT<0b01101>;
+class PRECEQU_PH_QBL_ENC : ABSQ_S_PH_R2_FMT<0b00100>;
+class PRECEQU_PH_QBR_ENC : ABSQ_S_PH_R2_FMT<0b00101>;
+class PRECEQU_PH_QBLA_ENC : ABSQ_S_PH_R2_FMT<0b00110>;
+class PRECEQU_PH_QBRA_ENC : ABSQ_S_PH_R2_FMT<0b00111>;
+class PRECEU_PH_QBL_ENC : ABSQ_S_PH_R2_FMT<0b11100>;
+class PRECEU_PH_QBR_ENC : ABSQ_S_PH_R2_FMT<0b11101>;
+class PRECEU_PH_QBLA_ENC : ABSQ_S_PH_R2_FMT<0b11110>;
+class PRECEU_PH_QBRA_ENC : ABSQ_S_PH_R2_FMT<0b11111>;
+class SHLL_QB_ENC : SHLL_QB_FMT<0b00000>;
+class SHLLV_QB_ENC : SHLL_QB_FMT<0b00010>;
+class SHRL_QB_ENC : SHLL_QB_FMT<0b00001>;
+class SHRLV_QB_ENC : SHLL_QB_FMT<0b00011>;
+class SHLL_PH_ENC : SHLL_QB_FMT<0b01000>;
+class SHLLV_PH_ENC : SHLL_QB_FMT<0b01010>;
+class SHLL_S_PH_ENC : SHLL_QB_FMT<0b01100>;
+class SHLLV_S_PH_ENC : SHLL_QB_FMT<0b01110>;
+class SHRA_PH_ENC : SHLL_QB_FMT<0b01001>;
+class SHRAV_PH_ENC : SHLL_QB_FMT<0b01011>;
+class SHRA_R_PH_ENC : SHLL_QB_FMT<0b01101>;
+class SHRAV_R_PH_ENC : SHLL_QB_FMT<0b01111>;
+class SHLL_S_W_ENC : SHLL_QB_FMT<0b10100>;
+class SHLLV_S_W_ENC : SHLL_QB_FMT<0b10110>;
+class SHRA_R_W_ENC : SHLL_QB_FMT<0b10101>;
+class SHRAV_R_W_ENC : SHLL_QB_FMT<0b10111>;
+class MULEU_S_PH_QBL_ENC : ADDU_QB_FMT<0b00110>;
+class MULEU_S_PH_QBR_ENC : ADDU_QB_FMT<0b00111>;
+class MULEQ_S_W_PHL_ENC : ADDU_QB_FMT<0b11100>;
+class MULEQ_S_W_PHR_ENC : ADDU_QB_FMT<0b11101>;
+class MULQ_RS_PH_ENC : ADDU_QB_FMT<0b11111>;
+class MULSAQ_S_W_PH_ENC : DPA_W_PH_FMT<0b00110>;
+class MAQ_S_W_PHL_ENC : DPA_W_PH_FMT<0b10100>;
+class MAQ_S_W_PHR_ENC : DPA_W_PH_FMT<0b10110>;
+class MAQ_SA_W_PHL_ENC : DPA_W_PH_FMT<0b10000>;
+class MAQ_SA_W_PHR_ENC : DPA_W_PH_FMT<0b10010>;
+class DPAU_H_QBL_ENC : DPA_W_PH_FMT<0b00011>;
+class DPAU_H_QBR_ENC : DPA_W_PH_FMT<0b00111>;
+class DPSU_H_QBL_ENC : DPA_W_PH_FMT<0b01011>;
+class DPSU_H_QBR_ENC : DPA_W_PH_FMT<0b01111>;
+class DPAQ_S_W_PH_ENC : DPA_W_PH_FMT<0b00100>;
+class DPSQ_S_W_PH_ENC : DPA_W_PH_FMT<0b00101>;
+class DPAQ_SA_L_W_ENC : DPA_W_PH_FMT<0b01100>;
+class DPSQ_SA_L_W_ENC : DPA_W_PH_FMT<0b01101>;
+class MULT_DSP_ENC : MULT_FMT<0b000000, 0b011000>;
+class MULTU_DSP_ENC : MULT_FMT<0b000000, 0b011001>;
+class MADD_DSP_ENC : MULT_FMT<0b011100, 0b000000>;
+class MADDU_DSP_ENC : MULT_FMT<0b011100, 0b000001>;
+class MSUB_DSP_ENC : MULT_FMT<0b011100, 0b000100>;
+class MSUBU_DSP_ENC : MULT_FMT<0b011100, 0b000101>;
+class CMPU_EQ_QB_ENC : CMP_EQ_QB_R2_FMT<0b00000>;
+class CMPU_LT_QB_ENC : CMP_EQ_QB_R2_FMT<0b00001>;
+class CMPU_LE_QB_ENC : CMP_EQ_QB_R2_FMT<0b00010>;
+class CMPGU_EQ_QB_ENC : CMP_EQ_QB_R3_FMT<0b00100>;
+class CMPGU_LT_QB_ENC : CMP_EQ_QB_R3_FMT<0b00101>;
+class CMPGU_LE_QB_ENC : CMP_EQ_QB_R3_FMT<0b00110>;
+class CMP_EQ_PH_ENC : CMP_EQ_QB_R2_FMT<0b01000>;
+class CMP_LT_PH_ENC : CMP_EQ_QB_R2_FMT<0b01001>;
+class CMP_LE_PH_ENC : CMP_EQ_QB_R2_FMT<0b01010>;
+class BITREV_ENC : ABSQ_S_PH_R2_FMT<0b11011>;
+class PACKRL_PH_ENC : CMP_EQ_QB_R3_FMT<0b01110>;
+class REPL_QB_ENC : REPL_FMT<0b00010>;
+class REPL_PH_ENC : REPL_FMT<0b01010>;
+class REPLV_QB_ENC : ABSQ_S_PH_R2_FMT<0b00011>;
+class REPLV_PH_ENC : ABSQ_S_PH_R2_FMT<0b01011>;
+class PICK_QB_ENC : CMP_EQ_QB_R3_FMT<0b00011>;
+class PICK_PH_ENC : CMP_EQ_QB_R3_FMT<0b01011>;
+class LWX_ENC : LX_FMT<0b00000>;
+class LHX_ENC : LX_FMT<0b00100>;
+class LBUX_ENC : LX_FMT<0b00110>;
+class BPOSGE32_ENC : BPOSGE32_FMT<0b11100>;
+class INSV_ENC : INSV_FMT<0b001100>;
+
+class EXTP_ENC : EXTR_W_TY1_FMT<0b00010>;
+class EXTPV_ENC : EXTR_W_TY1_FMT<0b00011>;
+class EXTPDP_ENC : EXTR_W_TY1_FMT<0b01010>;
+class EXTPDPV_ENC : EXTR_W_TY1_FMT<0b01011>;
+class EXTR_W_ENC : EXTR_W_TY1_FMT<0b00000>;
+class EXTRV_W_ENC : EXTR_W_TY1_FMT<0b00001>;
+class EXTR_R_W_ENC : EXTR_W_TY1_FMT<0b00100>;
+class EXTRV_R_W_ENC : EXTR_W_TY1_FMT<0b00101>;
+class EXTR_RS_W_ENC : EXTR_W_TY1_FMT<0b00110>;
+class EXTRV_RS_W_ENC : EXTR_W_TY1_FMT<0b00111>;
+class EXTR_S_H_ENC : EXTR_W_TY1_FMT<0b01110>;
+class EXTRV_S_H_ENC : EXTR_W_TY1_FMT<0b01111>;
+class SHILO_ENC : SHILO_R1_FMT<0b11010>;
+class SHILOV_ENC : SHILO_R2_FMT<0b11011>;
+class MTHLIP_ENC : SHILO_R2_FMT<0b11111>;
+
+class RDDSP_ENC : RDDSP_FMT<0b10010>;
+class WRDSP_ENC : WRDSP_FMT<0b10011>;
+class ADDU_PH_ENC : ADDU_QB_FMT<0b01000>;
+class ADDU_S_PH_ENC : ADDU_QB_FMT<0b01100>;
+class SUBU_PH_ENC : ADDU_QB_FMT<0b01001>;
+class SUBU_S_PH_ENC : ADDU_QB_FMT<0b01101>;
+class CMPGDU_EQ_QB_ENC : CMP_EQ_QB_R3_FMT<0b11000>;
+class CMPGDU_LT_QB_ENC : CMP_EQ_QB_R3_FMT<0b11001>;
+class CMPGDU_LE_QB_ENC : CMP_EQ_QB_R3_FMT<0b11010>;
+class ABSQ_S_QB_ENC : ABSQ_S_PH_R2_FMT<0b00001>;
+class ADDUH_QB_ENC : ADDUH_QB_FMT<0b00000>;
+class ADDUH_R_QB_ENC : ADDUH_QB_FMT<0b00010>;
+class SUBUH_QB_ENC : ADDUH_QB_FMT<0b00001>;
+class SUBUH_R_QB_ENC : ADDUH_QB_FMT<0b00011>;
+class ADDQH_PH_ENC : ADDUH_QB_FMT<0b01000>;
+class ADDQH_R_PH_ENC : ADDUH_QB_FMT<0b01010>;
+class SUBQH_PH_ENC : ADDUH_QB_FMT<0b01001>;
+class SUBQH_R_PH_ENC : ADDUH_QB_FMT<0b01011>;
+class ADDQH_W_ENC : ADDUH_QB_FMT<0b10000>;
+class ADDQH_R_W_ENC : ADDUH_QB_FMT<0b10010>;
+class SUBQH_W_ENC : ADDUH_QB_FMT<0b10001>;
+class SUBQH_R_W_ENC : ADDUH_QB_FMT<0b10011>;
+class MUL_PH_ENC : ADDUH_QB_FMT<0b01100>;
+class MUL_S_PH_ENC : ADDUH_QB_FMT<0b01110>;
+class MULQ_S_W_ENC : ADDUH_QB_FMT<0b10110>;
+class MULQ_RS_W_ENC : ADDUH_QB_FMT<0b10111>;
+class MULQ_S_PH_ENC : ADDU_QB_FMT<0b11110>;
+class DPA_W_PH_ENC : DPA_W_PH_FMT<0b00000>;
+class DPS_W_PH_ENC : DPA_W_PH_FMT<0b00001>;
+class DPAQX_S_W_PH_ENC : DPA_W_PH_FMT<0b11000>;
+class DPAQX_SA_W_PH_ENC : DPA_W_PH_FMT<0b11010>;
+class DPAX_W_PH_ENC : DPA_W_PH_FMT<0b01000>;
+class DPSX_W_PH_ENC : DPA_W_PH_FMT<0b01001>;
+class DPSQX_S_W_PH_ENC : DPA_W_PH_FMT<0b11001>;
+class DPSQX_SA_W_PH_ENC : DPA_W_PH_FMT<0b11011>;
+class MULSA_W_PH_ENC : DPA_W_PH_FMT<0b00010>;
+class PRECR_QB_PH_ENC : CMP_EQ_QB_R3_FMT<0b01101>;
+class PRECR_SRA_PH_W_ENC : PRECR_SRA_PH_W_FMT<0b11110>;
+class PRECR_SRA_R_PH_W_ENC : PRECR_SRA_PH_W_FMT<0b11111>;
+class SHRA_QB_ENC : SHLL_QB_FMT<0b00100>;
+class SHRAV_QB_ENC : SHLL_QB_FMT<0b00110>;
+class SHRA_R_QB_ENC : SHLL_QB_FMT<0b00101>;
+class SHRAV_R_QB_ENC : SHLL_QB_FMT<0b00111>;
+class SHRL_PH_ENC : SHLL_QB_FMT<0b11001>;
+class SHRLV_PH_ENC : SHLL_QB_FMT<0b11011>;
+class APPEND_ENC : APPEND_FMT<0b00000>;
+class BALIGN_ENC : APPEND_FMT<0b10000>;
+class PREPEND_ENC : APPEND_FMT<0b00001>;
+
+// Instruction desc.
+class ADDU_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin, RegisterClass RCD,
+ RegisterClass RCS, RegisterClass RCT = RCS> {
+ dag OutOperandList = (outs RCD:$rd);
+ dag InOperandList = (ins RCS:$rs, RCT:$rt);
+ string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt");
+ list<dag> Pattern = [(set RCD:$rd, (OpNode RCS:$rs, RCT:$rt))];
+ InstrItinClass Itinerary = itin;
+ list<Register> Defs = [DSPCtrl];
+}
+
+class RADDU_W_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin, RegisterClass RCD,
+ RegisterClass RCS = RCD> {
+ dag OutOperandList = (outs RCD:$rd);
+ dag InOperandList = (ins RCS:$rs);
+ string AsmString = !strconcat(instr_asm, "\t$rd, $rs");
+ list<dag> Pattern = [(set RCD:$rd, (OpNode RCS:$rs))];
+ InstrItinClass Itinerary = itin;
+ list<Register> Defs = [DSPCtrl];
+}
+
+class CMP_EQ_QB_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin, RegisterClass RCS,
+ RegisterClass RCT = RCS> {
+ dag OutOperandList = (outs);
+ dag InOperandList = (ins RCS:$rs, RCT:$rt);
+ string AsmString = !strconcat(instr_asm, "\t$rs, $rt");
+ list<dag> Pattern = [(OpNode RCS:$rs, RCT:$rt)];
+ InstrItinClass Itinerary = itin;
+ list<Register> Defs = [DSPCtrl];
+}
+
+class CMP_EQ_QB_R3_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin, RegisterClass RCD,
+ RegisterClass RCS, RegisterClass RCT = RCS> {
+ dag OutOperandList = (outs RCD:$rd);
+ dag InOperandList = (ins RCS:$rs, RCT:$rt);
+ string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt");
+ list<dag> Pattern = [(set RCD:$rd, (OpNode RCS:$rs, RCT:$rt))];
+ InstrItinClass Itinerary = itin;
+ list<Register> Defs = [DSPCtrl];
+}
+
+class PRECR_SRA_PH_W_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin, RegisterClass RCT,
+ RegisterClass RCS = RCT> {
+ dag OutOperandList = (outs RCT:$rt);
+ dag InOperandList = (ins RCS:$rs, shamt:$sa, RCS:$src);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $sa");
+ list<dag> Pattern = [(set RCT:$rt, (OpNode RCS:$src, RCS:$rs, immZExt5:$sa))];
+ InstrItinClass Itinerary = itin;
+ list<Register> Defs = [DSPCtrl];
+ string Constraints = "$src = $rt";
+}
+
+class ABSQ_S_PH_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin, RegisterClass RCD,
+ RegisterClass RCT = RCD> {
+ dag OutOperandList = (outs RCD:$rd);
+ dag InOperandList = (ins RCT:$rt);
+ string AsmString = !strconcat(instr_asm, "\t$rd, $rt");
+ list<dag> Pattern = [(set RCD:$rd, (OpNode RCT:$rt))];
+ InstrItinClass Itinerary = itin;
+ list<Register> Defs = [DSPCtrl];
+}
+
+class REPL_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ ImmLeaf immPat, InstrItinClass itin, RegisterClass RC> {
+ dag OutOperandList = (outs RC:$rd);
+ dag InOperandList = (ins uimm16:$imm);
+ string AsmString = !strconcat(instr_asm, "\t$rd, $imm");
+ list<dag> Pattern = [(set RC:$rd, (OpNode immPat:$imm))];
+ InstrItinClass Itinerary = itin;
+ list<Register> Defs = [DSPCtrl];
+}
+
+class SHLL_QB_R3_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin, RegisterClass RC> {
+ dag OutOperandList = (outs RC:$rd);
+ dag InOperandList = (ins RC:$rt, CPURegs:$rs_sa);
+ string AsmString = !strconcat(instr_asm, "\t$rd, $rt, $rs_sa");
+ list<dag> Pattern = [(set RC:$rd, (OpNode RC:$rt, CPURegs:$rs_sa))];
+ InstrItinClass Itinerary = itin;
+ list<Register> Defs = [DSPCtrl];
+}
+
+class SHLL_QB_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ SDPatternOperator ImmPat, InstrItinClass itin,
+ RegisterClass RC> {
+ dag OutOperandList = (outs RC:$rd);
+ dag InOperandList = (ins RC:$rt, uimm16:$rs_sa);
+ string AsmString = !strconcat(instr_asm, "\t$rd, $rt, $rs_sa");
+ list<dag> Pattern = [(set RC:$rd, (OpNode RC:$rt, ImmPat:$rs_sa))];
+ InstrItinClass Itinerary = itin;
+ list<Register> Defs = [DSPCtrl];
+}
+
+class LX_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin> {
+ dag OutOperandList = (outs CPURegs:$rd);
+ dag InOperandList = (ins CPURegs:$base, CPURegs:$index);
+ string AsmString = !strconcat(instr_asm, "\t$rd, ${index}(${base})");
+ list<dag> Pattern = [(set CPURegs:$rd,
+ (OpNode CPURegs:$base, CPURegs:$index))];
+ InstrItinClass Itinerary = itin;
+ list<Register> Defs = [DSPCtrl];
+ bit mayLoad = 1;
+}
+
+class ADDUH_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin, RegisterClass RCD,
+ RegisterClass RCS = RCD, RegisterClass RCT = RCD> {
+ dag OutOperandList = (outs RCD:$rd);
+ dag InOperandList = (ins RCS:$rs, RCT:$rt);
+ string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt");
+ list<dag> Pattern = [(set RCD:$rd, (OpNode RCS:$rs, RCT:$rt))];
+ InstrItinClass Itinerary = itin;
+ list<Register> Defs = [DSPCtrl];
+}
+
+class APPEND_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ SDPatternOperator ImmOp, InstrItinClass itin> {
+ dag OutOperandList = (outs CPURegs:$rt);
+ dag InOperandList = (ins CPURegs:$rs, shamt:$sa, CPURegs:$src);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $sa");
+ list<dag> Pattern = [(set CPURegs:$rt,
+ (OpNode CPURegs:$src, CPURegs:$rs, ImmOp:$sa))];
+ InstrItinClass Itinerary = itin;
+ list<Register> Defs = [DSPCtrl];
+ string Constraints = "$src = $rt";
+}
+
+class EXTR_W_TY1_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin> {
+ dag OutOperandList = (outs CPURegs:$rt);
+ dag InOperandList = (ins ACRegs:$ac, CPURegs:$shift_rs);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs");
+ InstrItinClass Itinerary = itin;
+ list<Register> Defs = [DSPCtrl];
+}
+
+class EXTR_W_TY1_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin> {
+ dag OutOperandList = (outs CPURegs:$rt);
+ dag InOperandList = (ins ACRegs:$ac, uimm16:$shift_rs);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs");
+ InstrItinClass Itinerary = itin;
+ list<Register> Defs = [DSPCtrl];
+}
+
+class SHILO_R1_PSEUDO_BASE<SDPatternOperator OpNode, InstrItinClass itin,
+ Instruction realinst> :
+ PseudoDSP<(outs), (ins simm16:$shift), [(OpNode immSExt6:$shift)]>,
+ PseudoInstExpansion<(realinst AC0, simm16:$shift)> {
+ list<Register> Defs = [DSPCtrl, AC0];
+ list<Register> Uses = [AC0];
+ InstrItinClass Itinerary = itin;
+}
+
+class SHILO_R1_DESC_BASE<string instr_asm> {
+ dag OutOperandList = (outs ACRegs:$ac);
+ dag InOperandList = (ins simm16:$shift);
+ string AsmString = !strconcat(instr_asm, "\t$ac, $shift");
+}
+
+class SHILO_R2_PSEUDO_BASE<SDPatternOperator OpNode, InstrItinClass itin,
+ Instruction realinst> :
+ PseudoDSP<(outs), (ins CPURegs:$rs), [(OpNode CPURegs:$rs)]>,
+ PseudoInstExpansion<(realinst AC0, CPURegs:$rs)> {
+ list<Register> Defs = [DSPCtrl, AC0];
+ list<Register> Uses = [AC0];
+ InstrItinClass Itinerary = itin;
+}
+
+class SHILO_R2_DESC_BASE<string instr_asm> {
+ dag OutOperandList = (outs ACRegs:$ac);
+ dag InOperandList = (ins CPURegs:$rs);
+ string AsmString = !strconcat(instr_asm, "\t$ac, $rs");
+}
+
+class MTHLIP_DESC_BASE<string instr_asm> {
+ dag OutOperandList = (outs ACRegs:$ac);
+ dag InOperandList = (ins CPURegs:$rs);
+ string AsmString = !strconcat(instr_asm, "\t$rs, $ac");
+}
+
+class RDDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin> {
+ dag OutOperandList = (outs CPURegs:$rd);
+ dag InOperandList = (ins uimm16:$mask);
+ string AsmString = !strconcat(instr_asm, "\t$rd, $mask");
+ list<dag> Pattern = [(set CPURegs:$rd, (OpNode immZExt10:$mask))];
+ InstrItinClass Itinerary = itin;
+ list<Register> Uses = [DSPCtrl];
+}
+
+class WRDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin> {
+ dag OutOperandList = (outs);
+ dag InOperandList = (ins CPURegs:$rs, uimm16:$mask);
+ string AsmString = !strconcat(instr_asm, "\t$rs, $mask");
+ list<dag> Pattern = [(OpNode CPURegs:$rs, immZExt10:$mask)];
+ InstrItinClass Itinerary = itin;
+ list<Register> Defs = [DSPCtrl];
+}
+
+class DPA_W_PH_PSEUDO_BASE<SDPatternOperator OpNode, InstrItinClass itin,
+ Instruction realinst> :
+ PseudoDSP<(outs), (ins CPURegs:$rs, CPURegs:$rt),
+ [(OpNode CPURegs:$rs, CPURegs:$rt)]>,
+ PseudoInstExpansion<(realinst AC0, CPURegs:$rs, CPURegs:$rt)> {
+ list<Register> Defs = [DSPCtrl, AC0];
+ list<Register> Uses = [AC0];
+ InstrItinClass Itinerary = itin;
+}
+
+class DPA_W_PH_DESC_BASE<string instr_asm> {
+ dag OutOperandList = (outs ACRegs:$ac);
+ dag InOperandList = (ins CPURegs:$rs, CPURegs:$rt);
+ string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt");
+}
+
+class MULT_PSEUDO_BASE<SDPatternOperator OpNode, InstrItinClass itin,
+ Instruction realinst> :
+ PseudoDSP<(outs), (ins CPURegs:$rs, CPURegs:$rt),
+ [(OpNode CPURegs:$rs, CPURegs:$rt)]>,
+ PseudoInstExpansion<(realinst AC0, CPURegs:$rs, CPURegs:$rt)> {
+ list<Register> Defs = [DSPCtrl, AC0];
+ InstrItinClass Itinerary = itin;
+}
+
+class MULT_DESC_BASE<string instr_asm> {
+ dag OutOperandList = (outs ACRegs:$ac);
+ dag InOperandList = (ins CPURegs:$rs, CPURegs:$rt);
+ string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt");
+}
+
+class BPOSGE32_PSEUDO_DESC_BASE<SDPatternOperator OpNode, InstrItinClass itin> :
+ MipsPseudo<(outs CPURegs:$dst), (ins), "", [(set CPURegs:$dst, (OpNode))]> {
+ list<Register> Uses = [DSPCtrl];
+ bit usesCustomInserter = 1;
+}
+
+class BPOSGE32_DESC_BASE<string instr_asm, InstrItinClass itin> {
+ dag OutOperandList = (outs);
+ dag InOperandList = (ins brtarget:$offset);
+ string AsmString = !strconcat(instr_asm, "\t$offset");
+ InstrItinClass Itinerary = itin;
+ list<Register> Uses = [DSPCtrl];
+ bit isBranch = 1;
+ bit isTerminator = 1;
+ bit hasDelaySlot = 1;
+}
+
+class INSV_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin> {
+ dag OutOperandList = (outs CPURegs:$rt);
+ dag InOperandList = (ins CPURegs:$src, CPURegs:$rs);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $rs");
+ list<dag> Pattern = [(set CPURegs:$rt, (OpNode CPURegs:$src, CPURegs:$rs))];
+ InstrItinClass Itinerary = itin;
+ list<Register> Uses = [DSPCtrl];
+ string Constraints = "$src = $rt";
+}
+
+//===----------------------------------------------------------------------===//
+// MIPS DSP Rev 1
+//===----------------------------------------------------------------------===//
+
+// Addition/subtraction
+class ADDU_QB_DESC : ADDU_QB_DESC_BASE<"addu.qb", int_mips_addu_qb, NoItinerary,
+ DSPRegs, DSPRegs>, IsCommutable;
+
+class ADDU_S_QB_DESC : ADDU_QB_DESC_BASE<"addu_s.qb", int_mips_addu_s_qb,
+ NoItinerary, DSPRegs, DSPRegs>,
+ IsCommutable;
+
+class SUBU_QB_DESC : ADDU_QB_DESC_BASE<"subu.qb", int_mips_subu_qb, NoItinerary,
+ DSPRegs, DSPRegs>;
+
+class SUBU_S_QB_DESC : ADDU_QB_DESC_BASE<"subu_s.qb", int_mips_subu_s_qb,
+ NoItinerary, DSPRegs, DSPRegs>;
+
+class ADDQ_PH_DESC : ADDU_QB_DESC_BASE<"addq.ph", int_mips_addq_ph, NoItinerary,
+ DSPRegs, DSPRegs>, IsCommutable;
+
+class ADDQ_S_PH_DESC : ADDU_QB_DESC_BASE<"addq_s.ph", int_mips_addq_s_ph,
+ NoItinerary, DSPRegs, DSPRegs>,
+ IsCommutable;
+
+class SUBQ_PH_DESC : ADDU_QB_DESC_BASE<"subq.ph", int_mips_subq_ph, NoItinerary,
+ DSPRegs, DSPRegs>;
+
+class SUBQ_S_PH_DESC : ADDU_QB_DESC_BASE<"subq_s.ph", int_mips_subq_s_ph,
+ NoItinerary, DSPRegs, DSPRegs>;
+
+class ADDQ_S_W_DESC : ADDU_QB_DESC_BASE<"addq_s.w", int_mips_addq_s_w,
+ NoItinerary, CPURegs, CPURegs>,
+ IsCommutable;
+
+class SUBQ_S_W_DESC : ADDU_QB_DESC_BASE<"subq_s.w", int_mips_subq_s_w,
+ NoItinerary, CPURegs, CPURegs>;
+
+class ADDSC_DESC : ADDU_QB_DESC_BASE<"addsc", int_mips_addsc, NoItinerary,
+ CPURegs, CPURegs>, IsCommutable;
+
+class ADDWC_DESC : ADDU_QB_DESC_BASE<"addwc", int_mips_addwc, NoItinerary,
+ CPURegs, CPURegs>,
+ IsCommutable, UseDSPCtrl;
+
+class MODSUB_DESC : ADDU_QB_DESC_BASE<"modsub", int_mips_modsub, NoItinerary,
+ CPURegs, CPURegs>, ClearDefs;
+
+class RADDU_W_QB_DESC : RADDU_W_QB_DESC_BASE<"raddu.w.qb", int_mips_raddu_w_qb,
+ NoItinerary, CPURegs, DSPRegs>,
+ ClearDefs;
+
+// Absolute value
+class ABSQ_S_PH_DESC : ABSQ_S_PH_R2_DESC_BASE<"absq_s.ph", int_mips_absq_s_ph,
+ NoItinerary, DSPRegs>;
+
+class ABSQ_S_W_DESC : ABSQ_S_PH_R2_DESC_BASE<"absq_s.w", int_mips_absq_s_w,
+ NoItinerary, CPURegs>;
+
+// Precision reduce/expand
+class PRECRQ_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrq.qb.ph",
+ int_mips_precrq_qb_ph,
+ NoItinerary, DSPRegs, DSPRegs>,
+ ClearDefs;
+
+class PRECRQ_PH_W_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrq.ph.w",
+ int_mips_precrq_ph_w,
+ NoItinerary, DSPRegs, CPURegs>,
+ ClearDefs;
+
+class PRECRQ_RS_PH_W_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrq_rs.ph.w",
+ int_mips_precrq_rs_ph_w,
+ NoItinerary, DSPRegs,
+ CPURegs>;
+
+class PRECRQU_S_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrqu_s.qb.ph",
+ int_mips_precrqu_s_qb_ph,
+ NoItinerary, DSPRegs,
+ DSPRegs>;
+
+class PRECEQ_W_PHL_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceq.w.phl",
+ int_mips_preceq_w_phl,
+ NoItinerary, CPURegs, DSPRegs>,
+ ClearDefs;
+
+class PRECEQ_W_PHR_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceq.w.phr",
+ int_mips_preceq_w_phr,
+ NoItinerary, CPURegs, DSPRegs>,
+ ClearDefs;
+
+class PRECEQU_PH_QBL_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbl",
+ int_mips_precequ_ph_qbl,
+ NoItinerary, DSPRegs>,
+ ClearDefs;
+
+class PRECEQU_PH_QBR_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbr",
+ int_mips_precequ_ph_qbr,
+ NoItinerary, DSPRegs>,
+ ClearDefs;
+
+class PRECEQU_PH_QBLA_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbla",
+ int_mips_precequ_ph_qbla,
+ NoItinerary, DSPRegs>,
+ ClearDefs;
+
+class PRECEQU_PH_QBRA_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbra",
+ int_mips_precequ_ph_qbra,
+ NoItinerary, DSPRegs>,
+ ClearDefs;
+
+class PRECEU_PH_QBL_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbl",
+ int_mips_preceu_ph_qbl,
+ NoItinerary, DSPRegs>,
+ ClearDefs;
+
+class PRECEU_PH_QBR_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbr",
+ int_mips_preceu_ph_qbr,
+ NoItinerary, DSPRegs>,
+ ClearDefs;
+
+class PRECEU_PH_QBLA_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbla",
+ int_mips_preceu_ph_qbla,
+ NoItinerary, DSPRegs>,
+ ClearDefs;
+
+class PRECEU_PH_QBRA_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbra",
+ int_mips_preceu_ph_qbra,
+ NoItinerary, DSPRegs>,
+ ClearDefs;
+
+// Shift
+class SHLL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shll.qb", int_mips_shll_qb, immZExt3,
+ NoItinerary, DSPRegs>;
+
+class SHLLV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shllv.qb", int_mips_shll_qb,
+ NoItinerary, DSPRegs>;
+
+class SHRL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shrl.qb", int_mips_shrl_qb, immZExt3,
+ NoItinerary, DSPRegs>, ClearDefs;
+
+class SHRLV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrlv.qb", int_mips_shrl_qb,
+ NoItinerary, DSPRegs>, ClearDefs;
+
+class SHLL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shll.ph", int_mips_shll_ph, immZExt4,
+ NoItinerary, DSPRegs>;
+
+class SHLLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv.ph", int_mips_shll_ph,
+ NoItinerary, DSPRegs>;
+
+class SHLL_S_PH_DESC : SHLL_QB_R2_DESC_BASE<"shll_s.ph", int_mips_shll_s_ph,
+ immZExt4, NoItinerary, DSPRegs>;
+
+class SHLLV_S_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv_s.ph", int_mips_shll_s_ph,
+ NoItinerary, DSPRegs>;
+
+class SHRA_PH_DESC : SHLL_QB_R2_DESC_BASE<"shra.ph", int_mips_shra_ph, immZExt4,
+ NoItinerary, DSPRegs>, ClearDefs;
+
+class SHRAV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrav.ph", int_mips_shra_ph,
+ NoItinerary, DSPRegs>, ClearDefs;
+
+class SHRA_R_PH_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.ph", int_mips_shra_r_ph,
+ immZExt4, NoItinerary, DSPRegs>,
+ ClearDefs;
+
+class SHRAV_R_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.ph", int_mips_shra_r_ph,
+ NoItinerary, DSPRegs>, ClearDefs;
+
+class SHLL_S_W_DESC : SHLL_QB_R2_DESC_BASE<"shll_s.w", int_mips_shll_s_w,
+ immZExt5, NoItinerary, CPURegs>;
+
+class SHLLV_S_W_DESC : SHLL_QB_R3_DESC_BASE<"shllv_s.w", int_mips_shll_s_w,
+ NoItinerary, CPURegs>;
+
+class SHRA_R_W_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.w", int_mips_shra_r_w,
+ immZExt5, NoItinerary, CPURegs>,
+ ClearDefs;
+
+class SHRAV_R_W_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.w", int_mips_shra_r_w,
+ NoItinerary, CPURegs>;
+
+// Multiplication
+class MULEU_S_PH_QBL_DESC : ADDU_QB_DESC_BASE<"muleu_s.ph.qbl",
+ int_mips_muleu_s_ph_qbl,
+ NoItinerary, DSPRegs, DSPRegs>;
+
+class MULEU_S_PH_QBR_DESC : ADDU_QB_DESC_BASE<"muleu_s.ph.qbr",
+ int_mips_muleu_s_ph_qbr,
+ NoItinerary, DSPRegs, DSPRegs>;
+
+class MULEQ_S_W_PHL_DESC : ADDU_QB_DESC_BASE<"muleq_s.w.phl",
+ int_mips_muleq_s_w_phl,
+ NoItinerary, CPURegs, DSPRegs>,
+ IsCommutable;
+
+class MULEQ_S_W_PHR_DESC : ADDU_QB_DESC_BASE<"muleq_s.w.phr",
+ int_mips_muleq_s_w_phr,
+ NoItinerary, CPURegs, DSPRegs>,
+ IsCommutable;
+
+class MULQ_RS_PH_DESC : ADDU_QB_DESC_BASE<"mulq_rs.ph", int_mips_mulq_rs_ph,
+ NoItinerary, DSPRegs, DSPRegs>,
+ IsCommutable;
+
+class MULSAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsaq_s.w.ph">;
+
+class MAQ_S_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phl">;
+
+class MAQ_S_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phr">;
+
+class MAQ_SA_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phl">;
+
+class MAQ_SA_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phr">;
+
+// Dot product with accumulate/subtract
+class DPAU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpau.h.qbl">;
+
+class DPAU_H_QBR_DESC : DPA_W_PH_DESC_BASE<"dpau.h.qbr">;
+
+class DPSU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpsu.h.qbl">;
+
+class DPSU_H_QBR_DESC : DPA_W_PH_DESC_BASE<"dpsu.h.qbr">;
+
+class DPAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaq_s.w.ph">;
+
+class DPSQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsq_s.w.ph">;
+
+class DPAQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpaq_sa.l.w">;
+
+class DPSQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpsq_sa.l.w">;
+
+class MULT_DSP_DESC : MULT_DESC_BASE<"mult">;
+
+class MULTU_DSP_DESC : MULT_DESC_BASE<"multu">;
+
+class MADD_DSP_DESC : MULT_DESC_BASE<"madd">;
+
+class MADDU_DSP_DESC : MULT_DESC_BASE<"maddu">;
+
+class MSUB_DSP_DESC : MULT_DESC_BASE<"msub">;
+
+class MSUBU_DSP_DESC : MULT_DESC_BASE<"msubu">;
+
+// Comparison
+class CMPU_EQ_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.eq.qb",
+ int_mips_cmpu_eq_qb, NoItinerary,
+ DSPRegs>, IsCommutable;
+
+class CMPU_LT_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.lt.qb",
+ int_mips_cmpu_lt_qb, NoItinerary,
+ DSPRegs>, IsCommutable;
+
+class CMPU_LE_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.le.qb",
+ int_mips_cmpu_le_qb, NoItinerary,
+ DSPRegs>, IsCommutable;
+
+class CMPGU_EQ_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgu.eq.qb",
+ int_mips_cmpgu_eq_qb,
+ NoItinerary, CPURegs, DSPRegs>,
+ IsCommutable;
+
+class CMPGU_LT_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgu.lt.qb",
+ int_mips_cmpgu_lt_qb,
+ NoItinerary, CPURegs, DSPRegs>,
+ IsCommutable;
+
+class CMPGU_LE_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgu.le.qb",
+ int_mips_cmpgu_le_qb,
+ NoItinerary, CPURegs, DSPRegs>,
+ IsCommutable;
+
+class CMP_EQ_PH_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmp.eq.ph", int_mips_cmp_eq_ph,
+ NoItinerary, DSPRegs>,
+ IsCommutable;
+
+class CMP_LT_PH_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmp.lt.ph", int_mips_cmp_lt_ph,
+ NoItinerary, DSPRegs>,
+ IsCommutable;
+
+class CMP_LE_PH_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmp.le.ph", int_mips_cmp_le_ph,
+ NoItinerary, DSPRegs>,
+ IsCommutable;
+
+// Misc
+class BITREV_DESC : ABSQ_S_PH_R2_DESC_BASE<"bitrev", int_mips_bitrev,
+ NoItinerary, CPURegs>, ClearDefs;
+
+class PACKRL_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"packrl.ph", int_mips_packrl_ph,
+ NoItinerary, DSPRegs, DSPRegs>,
+ ClearDefs;
+
+class REPL_QB_DESC : REPL_DESC_BASE<"repl.qb", int_mips_repl_qb, immZExt8,
+ NoItinerary, DSPRegs>, ClearDefs;
+
+class REPL_PH_DESC : REPL_DESC_BASE<"repl.ph", int_mips_repl_ph, immZExt10,
+ NoItinerary, DSPRegs>, ClearDefs;
+
+class REPLV_QB_DESC : ABSQ_S_PH_R2_DESC_BASE<"replv.qb", int_mips_repl_qb,
+ NoItinerary, DSPRegs, CPURegs>,
+ ClearDefs;
+
+class REPLV_PH_DESC : ABSQ_S_PH_R2_DESC_BASE<"replv.ph", int_mips_repl_ph,
+ NoItinerary, DSPRegs, CPURegs>,
+ ClearDefs;
+
+class PICK_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"pick.qb", int_mips_pick_qb,
+ NoItinerary, DSPRegs, DSPRegs>,
+ ClearDefs, UseDSPCtrl;
+
+class PICK_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"pick.ph", int_mips_pick_ph,
+ NoItinerary, DSPRegs, DSPRegs>,
+ ClearDefs, UseDSPCtrl;
+
+class LWX_DESC : LX_DESC_BASE<"lwx", int_mips_lwx, NoItinerary>, ClearDefs;
+
+class LHX_DESC : LX_DESC_BASE<"lhx", int_mips_lhx, NoItinerary>, ClearDefs;
+
+class LBUX_DESC : LX_DESC_BASE<"lbux", int_mips_lbux, NoItinerary>, ClearDefs;
+
+class BPOSGE32_DESC : BPOSGE32_DESC_BASE<"bposge32", NoItinerary>;
+
+// Extr
+class EXTP_DESC : EXTR_W_TY1_R1_DESC_BASE<"extp", MipsEXTP, NoItinerary>;
+
+class EXTPV_DESC : EXTR_W_TY1_R2_DESC_BASE<"extpv", MipsEXTP, NoItinerary>;
+
+class EXTPDP_DESC : EXTR_W_TY1_R1_DESC_BASE<"extpdp", MipsEXTPDP, NoItinerary>;
+
+class EXTPDPV_DESC : EXTR_W_TY1_R2_DESC_BASE<"extpdpv", MipsEXTPDP,
+ NoItinerary>;
+
+class EXTR_W_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr.w", MipsEXTR_W, NoItinerary>;
+
+class EXTRV_W_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv.w", MipsEXTR_W,
+ NoItinerary>;
+
+class EXTR_R_W_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_r.w", MipsEXTR_R_W,
+ NoItinerary>;
+
+class EXTRV_R_W_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_r.w", MipsEXTR_R_W,
+ NoItinerary>;
+
+class EXTR_RS_W_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_rs.w", MipsEXTR_RS_W,
+ NoItinerary>;
+
+class EXTRV_RS_W_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_rs.w", MipsEXTR_RS_W,
+ NoItinerary>;
+
+class EXTR_S_H_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_s.h", MipsEXTR_S_H,
+ NoItinerary>;
+
+class EXTRV_S_H_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_s.h", MipsEXTR_S_H,
+ NoItinerary>;
+
+class SHILO_DESC : SHILO_R1_DESC_BASE<"shilo">;
+
+class SHILOV_DESC : SHILO_R2_DESC_BASE<"shilov">;
+
+class MTHLIP_DESC : MTHLIP_DESC_BASE<"mthlip">;
+
+class RDDSP_DESC : RDDSP_DESC_BASE<"rddsp", int_mips_rddsp, NoItinerary>;
+
+class WRDSP_DESC : WRDSP_DESC_BASE<"wrdsp", int_mips_wrdsp, NoItinerary>;
+
+class INSV_DESC : INSV_DESC_BASE<"insv", int_mips_insv, NoItinerary>;
+
+//===----------------------------------------------------------------------===//
+// MIPS DSP Rev 2
+// Addition/subtraction
+class ADDU_PH_DESC : ADDU_QB_DESC_BASE<"addu.ph", int_mips_addu_ph, NoItinerary,
+ DSPRegs, DSPRegs>, IsCommutable;
+
+class ADDU_S_PH_DESC : ADDU_QB_DESC_BASE<"addu_s.ph", int_mips_addu_s_ph,
+ NoItinerary, DSPRegs, DSPRegs>,
+ IsCommutable;
+
+class SUBU_PH_DESC : ADDU_QB_DESC_BASE<"subu.ph", int_mips_subu_ph, NoItinerary,
+ DSPRegs, DSPRegs>;
+
+class SUBU_S_PH_DESC : ADDU_QB_DESC_BASE<"subu_s.ph", int_mips_subu_s_ph,
+ NoItinerary, DSPRegs, DSPRegs>;
+
+class ADDUH_QB_DESC : ADDUH_QB_DESC_BASE<"adduh.qb", int_mips_adduh_qb,
+ NoItinerary, DSPRegs>,
+ ClearDefs, IsCommutable;
+
+class ADDUH_R_QB_DESC : ADDUH_QB_DESC_BASE<"adduh_r.qb", int_mips_adduh_r_qb,
+ NoItinerary, DSPRegs>,
+ ClearDefs, IsCommutable;
+
+class SUBUH_QB_DESC : ADDUH_QB_DESC_BASE<"subuh.qb", int_mips_subuh_qb,
+ NoItinerary, DSPRegs>, ClearDefs;
+
+class SUBUH_R_QB_DESC : ADDUH_QB_DESC_BASE<"subuh_r.qb", int_mips_subuh_r_qb,
+ NoItinerary, DSPRegs>, ClearDefs;
+
+class ADDQH_PH_DESC : ADDUH_QB_DESC_BASE<"addqh.ph", int_mips_addqh_ph,
+ NoItinerary, DSPRegs>,
+ ClearDefs, IsCommutable;
+
+class ADDQH_R_PH_DESC : ADDUH_QB_DESC_BASE<"addqh_r.ph", int_mips_addqh_r_ph,
+ NoItinerary, DSPRegs>,
+ ClearDefs, IsCommutable;
+
+class SUBQH_PH_DESC : ADDUH_QB_DESC_BASE<"subqh.ph", int_mips_subqh_ph,
+ NoItinerary, DSPRegs>, ClearDefs;
+
+class SUBQH_R_PH_DESC : ADDUH_QB_DESC_BASE<"subqh_r.ph", int_mips_subqh_r_ph,
+ NoItinerary, DSPRegs>, ClearDefs;
+
+class ADDQH_W_DESC : ADDUH_QB_DESC_BASE<"addqh.w", int_mips_addqh_w,
+ NoItinerary, CPURegs>,
+ ClearDefs, IsCommutable;
+
+class ADDQH_R_W_DESC : ADDUH_QB_DESC_BASE<"addqh_r.w", int_mips_addqh_r_w,
+ NoItinerary, CPURegs>,
+ ClearDefs, IsCommutable;
+
+class SUBQH_W_DESC : ADDUH_QB_DESC_BASE<"subqh.w", int_mips_subqh_w,
+ NoItinerary, CPURegs>, ClearDefs;
+
+class SUBQH_R_W_DESC : ADDUH_QB_DESC_BASE<"subqh_r.w", int_mips_subqh_r_w,
+ NoItinerary, CPURegs>, ClearDefs;
+
+// Comparison
+class CMPGDU_EQ_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgdu.eq.qb",
+ int_mips_cmpgdu_eq_qb,
+ NoItinerary, CPURegs, DSPRegs>,
+ IsCommutable;
+
+class CMPGDU_LT_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgdu.lt.qb",
+ int_mips_cmpgdu_lt_qb,
+ NoItinerary, CPURegs, DSPRegs>,
+ IsCommutable;
+
+class CMPGDU_LE_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgdu.le.qb",
+ int_mips_cmpgdu_le_qb,
+ NoItinerary, CPURegs, DSPRegs>,
+ IsCommutable;
+
+// Absolute
+class ABSQ_S_QB_DESC : ABSQ_S_PH_R2_DESC_BASE<"absq_s.qb", int_mips_absq_s_qb,
+ NoItinerary, DSPRegs>;
+
+// Multiplication
+class MUL_PH_DESC : ADDUH_QB_DESC_BASE<"mul.ph", int_mips_mul_ph, NoItinerary,
+ DSPRegs>, IsCommutable;
+
+class MUL_S_PH_DESC : ADDUH_QB_DESC_BASE<"mul_s.ph", int_mips_mul_s_ph,
+ NoItinerary, DSPRegs>, IsCommutable;
+
+class MULQ_S_W_DESC : ADDUH_QB_DESC_BASE<"mulq_s.w", int_mips_mulq_s_w,
+ NoItinerary, CPURegs>, IsCommutable;
+
+class MULQ_RS_W_DESC : ADDUH_QB_DESC_BASE<"mulq_rs.w", int_mips_mulq_rs_w,
+ NoItinerary, CPURegs>, IsCommutable;
+
+class MULQ_S_PH_DESC : ADDU_QB_DESC_BASE<"mulq_s.ph", int_mips_mulq_s_ph,
+ NoItinerary, DSPRegs, DSPRegs>,
+ IsCommutable;
+
+// Dot product with accumulate/subtract
+class DPA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpa.w.ph">;
+
+class DPS_W_PH_DESC : DPA_W_PH_DESC_BASE<"dps.w.ph">;
+
+class DPAQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_s.w.ph">;
+
+class DPAQX_SA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_sa.w.ph">;
+
+class DPAX_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpax.w.ph">;
+
+class DPSX_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsx.w.ph">;
+
+class DPSQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_s.w.ph">;
+
+class DPSQX_SA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_sa.w.ph">;
+
+class MULSA_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsa.w.ph">;
+
+// Precision reduce/expand
+class PRECR_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precr.qb.ph",
+ int_mips_precr_qb_ph,
+ NoItinerary, DSPRegs, DSPRegs>;
+
+class PRECR_SRA_PH_W_DESC : PRECR_SRA_PH_W_DESC_BASE<"precr_sra.ph.w",
+ int_mips_precr_sra_ph_w,
+ NoItinerary, DSPRegs,
+ CPURegs>, ClearDefs;
+
+class PRECR_SRA_R_PH_W_DESC : PRECR_SRA_PH_W_DESC_BASE<"precr_sra_r.ph.w",
+ int_mips_precr_sra_r_ph_w,
+ NoItinerary, DSPRegs,
+ CPURegs>, ClearDefs;
+
+// Shift
+class SHRA_QB_DESC : SHLL_QB_R2_DESC_BASE<"shra.qb", int_mips_shra_qb, immZExt3,
+ NoItinerary, DSPRegs>, ClearDefs;
+
+class SHRAV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrav.qb", int_mips_shra_qb,
+ NoItinerary, DSPRegs>, ClearDefs;
+
+class SHRA_R_QB_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.qb", int_mips_shra_r_qb,
+ immZExt3, NoItinerary, DSPRegs>,
+ ClearDefs;
+
+class SHRAV_R_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.qb", int_mips_shra_r_qb,
+ NoItinerary, DSPRegs>, ClearDefs;
+
+class SHRL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shrl.ph", int_mips_shrl_ph, immZExt4,
+ NoItinerary, DSPRegs>, ClearDefs;
+
+class SHRLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrlv.ph", int_mips_shrl_ph,
+ NoItinerary, DSPRegs>, ClearDefs;
+
+// Misc
+class APPEND_DESC : APPEND_DESC_BASE<"append", int_mips_append, immZExt5,
+ NoItinerary>, ClearDefs;
+
+class BALIGN_DESC : APPEND_DESC_BASE<"balign", int_mips_balign, immZExt2,
+ NoItinerary>, ClearDefs;
+
+class PREPEND_DESC : APPEND_DESC_BASE<"prepend", int_mips_prepend, immZExt5,
+ NoItinerary>, ClearDefs;
+
+// Pseudos.
+def BPOSGE32_PSEUDO : BPOSGE32_PSEUDO_DESC_BASE<int_mips_bposge32, NoItinerary>;
+
+// Instruction defs.
+// MIPS DSP Rev 1
+def ADDU_QB : ADDU_QB_ENC, ADDU_QB_DESC;
+def ADDU_S_QB : ADDU_S_QB_ENC, ADDU_S_QB_DESC;
+def SUBU_QB : SUBU_QB_ENC, SUBU_QB_DESC;
+def SUBU_S_QB : SUBU_S_QB_ENC, SUBU_S_QB_DESC;
+def ADDQ_PH : ADDQ_PH_ENC, ADDQ_PH_DESC;
+def ADDQ_S_PH : ADDQ_S_PH_ENC, ADDQ_S_PH_DESC;
+def SUBQ_PH : SUBQ_PH_ENC, SUBQ_PH_DESC;
+def SUBQ_S_PH : SUBQ_S_PH_ENC, SUBQ_S_PH_DESC;
+def ADDQ_S_W : ADDQ_S_W_ENC, ADDQ_S_W_DESC;
+def SUBQ_S_W : SUBQ_S_W_ENC, SUBQ_S_W_DESC;
+def ADDSC : ADDSC_ENC, ADDSC_DESC;
+def ADDWC : ADDWC_ENC, ADDWC_DESC;
+def MODSUB : MODSUB_ENC, MODSUB_DESC;
+def RADDU_W_QB : RADDU_W_QB_ENC, RADDU_W_QB_DESC;
+def ABSQ_S_PH : ABSQ_S_PH_ENC, ABSQ_S_PH_DESC;
+def ABSQ_S_W : ABSQ_S_W_ENC, ABSQ_S_W_DESC;
+def PRECRQ_QB_PH : PRECRQ_QB_PH_ENC, PRECRQ_QB_PH_DESC;
+def PRECRQ_PH_W : PRECRQ_PH_W_ENC, PRECRQ_PH_W_DESC;
+def PRECRQ_RS_PH_W : PRECRQ_RS_PH_W_ENC, PRECRQ_RS_PH_W_DESC;
+def PRECRQU_S_QB_PH : PRECRQU_S_QB_PH_ENC, PRECRQU_S_QB_PH_DESC;
+def PRECEQ_W_PHL : PRECEQ_W_PHL_ENC, PRECEQ_W_PHL_DESC;
+def PRECEQ_W_PHR : PRECEQ_W_PHR_ENC, PRECEQ_W_PHR_DESC;
+def PRECEQU_PH_QBL : PRECEQU_PH_QBL_ENC, PRECEQU_PH_QBL_DESC;
+def PRECEQU_PH_QBR : PRECEQU_PH_QBR_ENC, PRECEQU_PH_QBR_DESC;
+def PRECEQU_PH_QBLA : PRECEQU_PH_QBLA_ENC, PRECEQU_PH_QBLA_DESC;
+def PRECEQU_PH_QBRA : PRECEQU_PH_QBRA_ENC, PRECEQU_PH_QBRA_DESC;
+def PRECEU_PH_QBL : PRECEU_PH_QBL_ENC, PRECEU_PH_QBL_DESC;
+def PRECEU_PH_QBR : PRECEU_PH_QBR_ENC, PRECEU_PH_QBR_DESC;
+def PRECEU_PH_QBLA : PRECEU_PH_QBLA_ENC, PRECEU_PH_QBLA_DESC;
+def PRECEU_PH_QBRA : PRECEU_PH_QBRA_ENC, PRECEU_PH_QBRA_DESC;
+def SHLL_QB : SHLL_QB_ENC, SHLL_QB_DESC;
+def SHLLV_QB : SHLLV_QB_ENC, SHLLV_QB_DESC;
+def SHRL_QB : SHRL_QB_ENC, SHRL_QB_DESC;
+def SHRLV_QB : SHRLV_QB_ENC, SHRLV_QB_DESC;
+def SHLL_PH : SHLL_PH_ENC, SHLL_PH_DESC;
+def SHLLV_PH : SHLLV_PH_ENC, SHLLV_PH_DESC;
+def SHLL_S_PH : SHLL_S_PH_ENC, SHLL_S_PH_DESC;
+def SHLLV_S_PH : SHLLV_S_PH_ENC, SHLLV_S_PH_DESC;
+def SHRA_PH : SHRA_PH_ENC, SHRA_PH_DESC;
+def SHRAV_PH : SHRAV_PH_ENC, SHRAV_PH_DESC;
+def SHRA_R_PH : SHRA_R_PH_ENC, SHRA_R_PH_DESC;
+def SHRAV_R_PH : SHRAV_R_PH_ENC, SHRAV_R_PH_DESC;
+def SHLL_S_W : SHLL_S_W_ENC, SHLL_S_W_DESC;
+def SHLLV_S_W : SHLLV_S_W_ENC, SHLLV_S_W_DESC;
+def SHRA_R_W : SHRA_R_W_ENC, SHRA_R_W_DESC;
+def SHRAV_R_W : SHRAV_R_W_ENC, SHRAV_R_W_DESC;
+def MULEU_S_PH_QBL : MULEU_S_PH_QBL_ENC, MULEU_S_PH_QBL_DESC;
+def MULEU_S_PH_QBR : MULEU_S_PH_QBR_ENC, MULEU_S_PH_QBR_DESC;
+def MULEQ_S_W_PHL : MULEQ_S_W_PHL_ENC, MULEQ_S_W_PHL_DESC;
+def MULEQ_S_W_PHR : MULEQ_S_W_PHR_ENC, MULEQ_S_W_PHR_DESC;
+def MULQ_RS_PH : MULQ_RS_PH_ENC, MULQ_RS_PH_DESC;
+def MULSAQ_S_W_PH : MULSAQ_S_W_PH_ENC, MULSAQ_S_W_PH_DESC;
+def MAQ_S_W_PHL : MAQ_S_W_PHL_ENC, MAQ_S_W_PHL_DESC;
+def MAQ_S_W_PHR : MAQ_S_W_PHR_ENC, MAQ_S_W_PHR_DESC;
+def MAQ_SA_W_PHL : MAQ_SA_W_PHL_ENC, MAQ_SA_W_PHL_DESC;
+def MAQ_SA_W_PHR : MAQ_SA_W_PHR_ENC, MAQ_SA_W_PHR_DESC;
+def DPAU_H_QBL : DPAU_H_QBL_ENC, DPAU_H_QBL_DESC;
+def DPAU_H_QBR : DPAU_H_QBR_ENC, DPAU_H_QBR_DESC;
+def DPSU_H_QBL : DPSU_H_QBL_ENC, DPSU_H_QBL_DESC;
+def DPSU_H_QBR : DPSU_H_QBR_ENC, DPSU_H_QBR_DESC;
+def DPAQ_S_W_PH : DPAQ_S_W_PH_ENC, DPAQ_S_W_PH_DESC;
+def DPSQ_S_W_PH : DPSQ_S_W_PH_ENC, DPSQ_S_W_PH_DESC;
+def DPAQ_SA_L_W : DPAQ_SA_L_W_ENC, DPAQ_SA_L_W_DESC;
+def DPSQ_SA_L_W : DPSQ_SA_L_W_ENC, DPSQ_SA_L_W_DESC;
+def MULT_DSP : MULT_DSP_ENC, MULT_DSP_DESC;
+def MULTU_DSP : MULTU_DSP_ENC, MULTU_DSP_DESC;
+def MADD_DSP : MADD_DSP_ENC, MADD_DSP_DESC;
+def MADDU_DSP : MADDU_DSP_ENC, MADDU_DSP_DESC;
+def MSUB_DSP : MSUB_DSP_ENC, MSUB_DSP_DESC;
+def MSUBU_DSP : MSUBU_DSP_ENC, MSUBU_DSP_DESC;
+def CMPU_EQ_QB : CMPU_EQ_QB_ENC, CMPU_EQ_QB_DESC;
+def CMPU_LT_QB : CMPU_LT_QB_ENC, CMPU_LT_QB_DESC;
+def CMPU_LE_QB : CMPU_LE_QB_ENC, CMPU_LE_QB_DESC;
+def CMPGU_EQ_QB : CMPGU_EQ_QB_ENC, CMPGU_EQ_QB_DESC;
+def CMPGU_LT_QB : CMPGU_LT_QB_ENC, CMPGU_LT_QB_DESC;
+def CMPGU_LE_QB : CMPGU_LE_QB_ENC, CMPGU_LE_QB_DESC;
+def CMP_EQ_PH : CMP_EQ_PH_ENC, CMP_EQ_PH_DESC;
+def CMP_LT_PH : CMP_LT_PH_ENC, CMP_LT_PH_DESC;
+def CMP_LE_PH : CMP_LE_PH_ENC, CMP_LE_PH_DESC;
+def BITREV : BITREV_ENC, BITREV_DESC;
+def PACKRL_PH : PACKRL_PH_ENC, PACKRL_PH_DESC;
+def REPL_QB : REPL_QB_ENC, REPL_QB_DESC;
+def REPL_PH : REPL_PH_ENC, REPL_PH_DESC;
+def REPLV_QB : REPLV_QB_ENC, REPLV_QB_DESC;
+def REPLV_PH : REPLV_PH_ENC, REPLV_PH_DESC;
+def PICK_QB : PICK_QB_ENC, PICK_QB_DESC;
+def PICK_PH : PICK_PH_ENC, PICK_PH_DESC;
+def LWX : LWX_ENC, LWX_DESC;
+def LHX : LHX_ENC, LHX_DESC;
+def LBUX : LBUX_ENC, LBUX_DESC;
+def BPOSGE32 : BPOSGE32_ENC, BPOSGE32_DESC;
+def INSV : INSV_ENC, INSV_DESC;
+def EXTP : EXTP_ENC, EXTP_DESC;
+def EXTPV : EXTPV_ENC, EXTPV_DESC;
+def EXTPDP : EXTPDP_ENC, EXTPDP_DESC;
+def EXTPDPV : EXTPDPV_ENC, EXTPDPV_DESC;
+def EXTR_W : EXTR_W_ENC, EXTR_W_DESC;
+def EXTRV_W : EXTRV_W_ENC, EXTRV_W_DESC;
+def EXTR_R_W : EXTR_R_W_ENC, EXTR_R_W_DESC;
+def EXTRV_R_W : EXTRV_R_W_ENC, EXTRV_R_W_DESC;
+def EXTR_RS_W : EXTR_RS_W_ENC, EXTR_RS_W_DESC;
+def EXTRV_RS_W : EXTRV_RS_W_ENC, EXTRV_RS_W_DESC;
+def EXTR_S_H : EXTR_S_H_ENC, EXTR_S_H_DESC;
+def EXTRV_S_H : EXTRV_S_H_ENC, EXTRV_S_H_DESC;
+def SHILO : SHILO_ENC, SHILO_DESC;
+def SHILOV : SHILOV_ENC, SHILOV_DESC;
+def MTHLIP : MTHLIP_ENC, MTHLIP_DESC;
+def RDDSP : RDDSP_ENC, RDDSP_DESC;
+def WRDSP : WRDSP_ENC, WRDSP_DESC;
+
+// MIPS DSP Rev 2
+let Predicates = [HasDSPR2] in {
+
+def ADDU_PH : ADDU_PH_ENC, ADDU_PH_DESC;
+def ADDU_S_PH : ADDU_S_PH_ENC, ADDU_S_PH_DESC;
+def SUBU_PH : SUBU_PH_ENC, SUBU_PH_DESC;
+def SUBU_S_PH : SUBU_S_PH_ENC, SUBU_S_PH_DESC;
+def CMPGDU_EQ_QB : CMPGDU_EQ_QB_ENC, CMPGDU_EQ_QB_DESC;
+def CMPGDU_LT_QB : CMPGDU_LT_QB_ENC, CMPGDU_LT_QB_DESC;
+def CMPGDU_LE_QB : CMPGDU_LE_QB_ENC, CMPGDU_LE_QB_DESC;
+def ABSQ_S_QB : ABSQ_S_QB_ENC, ABSQ_S_QB_DESC;
+def ADDUH_QB : ADDUH_QB_ENC, ADDUH_QB_DESC;
+def ADDUH_R_QB : ADDUH_R_QB_ENC, ADDUH_R_QB_DESC;
+def SUBUH_QB : SUBUH_QB_ENC, SUBUH_QB_DESC;
+def SUBUH_R_QB : SUBUH_R_QB_ENC, SUBUH_R_QB_DESC;
+def ADDQH_PH : ADDQH_PH_ENC, ADDQH_PH_DESC;
+def ADDQH_R_PH : ADDQH_R_PH_ENC, ADDQH_R_PH_DESC;
+def SUBQH_PH : SUBQH_PH_ENC, SUBQH_PH_DESC;
+def SUBQH_R_PH : SUBQH_R_PH_ENC, SUBQH_R_PH_DESC;
+def ADDQH_W : ADDQH_W_ENC, ADDQH_W_DESC;
+def ADDQH_R_W : ADDQH_R_W_ENC, ADDQH_R_W_DESC;
+def SUBQH_W : SUBQH_W_ENC, SUBQH_W_DESC;
+def SUBQH_R_W : SUBQH_R_W_ENC, SUBQH_R_W_DESC;
+def MUL_PH : MUL_PH_ENC, MUL_PH_DESC;
+def MUL_S_PH : MUL_S_PH_ENC, MUL_S_PH_DESC;
+def MULQ_S_W : MULQ_S_W_ENC, MULQ_S_W_DESC;
+def MULQ_RS_W : MULQ_RS_W_ENC, MULQ_RS_W_DESC;
+def MULQ_S_PH : MULQ_S_PH_ENC, MULQ_S_PH_DESC;
+def DPA_W_PH : DPA_W_PH_ENC, DPA_W_PH_DESC;
+def DPS_W_PH : DPS_W_PH_ENC, DPS_W_PH_DESC;
+def DPAQX_S_W_PH : DPAQX_S_W_PH_ENC, DPAQX_S_W_PH_DESC;
+def DPAQX_SA_W_PH : DPAQX_SA_W_PH_ENC, DPAQX_SA_W_PH_DESC;
+def DPAX_W_PH : DPAX_W_PH_ENC, DPAX_W_PH_DESC;
+def DPSX_W_PH : DPSX_W_PH_ENC, DPSX_W_PH_DESC;
+def DPSQX_S_W_PH : DPSQX_S_W_PH_ENC, DPSQX_S_W_PH_DESC;
+def DPSQX_SA_W_PH : DPSQX_SA_W_PH_ENC, DPSQX_SA_W_PH_DESC;
+def MULSA_W_PH : MULSA_W_PH_ENC, MULSA_W_PH_DESC;
+def PRECR_QB_PH : PRECR_QB_PH_ENC, PRECR_QB_PH_DESC;
+def PRECR_SRA_PH_W : PRECR_SRA_PH_W_ENC, PRECR_SRA_PH_W_DESC;
+def PRECR_SRA_R_PH_W : PRECR_SRA_R_PH_W_ENC, PRECR_SRA_R_PH_W_DESC;
+def SHRA_QB : SHRA_QB_ENC, SHRA_QB_DESC;
+def SHRAV_QB : SHRAV_QB_ENC, SHRAV_QB_DESC;
+def SHRA_R_QB : SHRA_R_QB_ENC, SHRA_R_QB_DESC;
+def SHRAV_R_QB : SHRAV_R_QB_ENC, SHRAV_R_QB_DESC;
+def SHRL_PH : SHRL_PH_ENC, SHRL_PH_DESC;
+def SHRLV_PH : SHRLV_PH_ENC, SHRLV_PH_DESC;
+def APPEND : APPEND_ENC, APPEND_DESC;
+def BALIGN : BALIGN_ENC, BALIGN_DESC;
+def PREPEND : PREPEND_ENC, PREPEND_DESC;
+
+}
+
+// Pseudos.
+def MULSAQ_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMULSAQ_S_W_PH, NoItinerary,
+ MULSAQ_S_W_PH>;
+def MAQ_S_W_PHL_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMAQ_S_W_PHL, NoItinerary,
+ MAQ_S_W_PHL>;
+def MAQ_S_W_PHR_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMAQ_S_W_PHR, NoItinerary,
+ MAQ_S_W_PHR>;
+def MAQ_SA_W_PHL_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMAQ_SA_W_PHL, NoItinerary,
+ MAQ_SA_W_PHL>;
+def MAQ_SA_W_PHR_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMAQ_SA_W_PHR, NoItinerary,
+ MAQ_SA_W_PHR>;
+def DPAU_H_QBL_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAU_H_QBL, NoItinerary,
+ DPAU_H_QBL>;
+def DPAU_H_QBR_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAU_H_QBR, NoItinerary,
+ DPAU_H_QBR>;
+def DPSU_H_QBL_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSU_H_QBL, NoItinerary,
+ DPSU_H_QBL>;
+def DPSU_H_QBR_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSU_H_QBR, NoItinerary,
+ DPSU_H_QBR>;
+def DPAQ_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAQ_S_W_PH, NoItinerary,
+ DPAQ_S_W_PH>;
+def DPSQ_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSQ_S_W_PH, NoItinerary,
+ DPSQ_S_W_PH>;
+def DPAQ_SA_L_W_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAQ_SA_L_W, NoItinerary,
+ DPAQ_SA_L_W>;
+def DPSQ_SA_L_W_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSQ_SA_L_W, NoItinerary,
+ DPSQ_SA_L_W>;
+
+def MULT_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMULT, NoItinerary, MULT_DSP>,
+ IsCommutable;
+def MULTU_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMULTU, NoItinerary, MULTU_DSP>,
+ IsCommutable;
+def MADD_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMADD_DSP, NoItinerary, MADD_DSP>,
+ IsCommutable, UseAC;
+def MADDU_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMADDU_DSP, NoItinerary, MADDU_DSP>,
+ IsCommutable, UseAC;
+def MSUB_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMSUB_DSP, NoItinerary, MSUB_DSP>,
+ UseAC;
+def MSUBU_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMSUBU_DSP, NoItinerary, MSUBU_DSP>,
+ UseAC;
+
+def SHILO_PSEUDO : SHILO_R1_PSEUDO_BASE<MipsSHILO, NoItinerary, SHILO>;
+def SHILOV_PSEUDO : SHILO_R2_PSEUDO_BASE<MipsSHILO, NoItinerary, SHILOV>;
+def MTHLIP_PSEUDO : SHILO_R2_PSEUDO_BASE<MipsMTHLIP, NoItinerary, MTHLIP>;
+
+let Predicates = [HasDSPR2] in {
+
+def DPA_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPA_W_PH, NoItinerary, DPA_W_PH>;
+def DPS_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPS_W_PH, NoItinerary, DPS_W_PH>;
+def DPAQX_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAQX_S_W_PH, NoItinerary,
+ DPAQX_S_W_PH>;
+def DPAQX_SA_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAQX_SA_W_PH, NoItinerary,
+ DPAQX_SA_W_PH>;
+def DPAX_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAX_W_PH, NoItinerary,
+ DPAX_W_PH>;
+def DPSX_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSX_W_PH, NoItinerary,
+ DPSX_W_PH>;
+def DPSQX_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSQX_S_W_PH, NoItinerary,
+ DPSQX_S_W_PH>;
+def DPSQX_SA_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSQX_SA_W_PH, NoItinerary,
+ DPSQX_SA_W_PH>;
+def MULSA_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMULSA_W_PH, NoItinerary,
+ MULSA_W_PH>;
+
+}
+
+// Patterns.
+class DSPPat<dag pattern, dag result, Predicate pred = HasDSP> :
+ Pat<pattern, result>, Requires<[pred]>;
+
+class BitconvertPat<ValueType DstVT, ValueType SrcVT, RegisterClass DstRC,
+ RegisterClass SrcRC> :
+ DSPPat<(DstVT (bitconvert (SrcVT SrcRC:$src))),
+ (COPY_TO_REGCLASS SrcRC:$src, DstRC)>;
+
+def : BitconvertPat<i32, v2i16, CPURegs, DSPRegs>;
+def : BitconvertPat<i32, v4i8, CPURegs, DSPRegs>;
+def : BitconvertPat<v2i16, i32, DSPRegs, CPURegs>;
+def : BitconvertPat<v4i8, i32, DSPRegs, CPURegs>;
+
+def : DSPPat<(v2i16 (load addr:$a)),
+ (v2i16 (COPY_TO_REGCLASS (LW addr:$a), DSPRegs))>;
+def : DSPPat<(v4i8 (load addr:$a)),
+ (v4i8 (COPY_TO_REGCLASS (LW addr:$a), DSPRegs))>;
+def : DSPPat<(store (v2i16 DSPRegs:$val), addr:$a),
+ (SW (COPY_TO_REGCLASS DSPRegs:$val, CPURegs), addr:$a)>;
+def : DSPPat<(store (v4i8 DSPRegs:$val), addr:$a),
+ (SW (COPY_TO_REGCLASS DSPRegs:$val, CPURegs), addr:$a)>;
+
+// Extr patterns.
+class EXTR_W_TY1_R2_Pat<SDPatternOperator OpNode, Instruction Instr> :
+ DSPPat<(i32 (OpNode CPURegs:$rs)), (Instr AC0, CPURegs:$rs)>;
+
+class EXTR_W_TY1_R1_Pat<SDPatternOperator OpNode, Instruction Instr> :
+ DSPPat<(i32 (OpNode immZExt5:$shift)), (Instr AC0, immZExt5:$shift)>;
+
+def : EXTR_W_TY1_R1_Pat<MipsEXTP, EXTP>;
+def : EXTR_W_TY1_R2_Pat<MipsEXTP, EXTPV>;
+def : EXTR_W_TY1_R1_Pat<MipsEXTPDP, EXTPDP>;
+def : EXTR_W_TY1_R2_Pat<MipsEXTPDP, EXTPDPV>;
+def : EXTR_W_TY1_R1_Pat<MipsEXTR_W, EXTR_W>;
+def : EXTR_W_TY1_R2_Pat<MipsEXTR_W, EXTRV_W>;
+def : EXTR_W_TY1_R1_Pat<MipsEXTR_R_W, EXTR_R_W>;
+def : EXTR_W_TY1_R2_Pat<MipsEXTR_R_W, EXTRV_R_W>;
+def : EXTR_W_TY1_R1_Pat<MipsEXTR_RS_W, EXTR_RS_W>;
+def : EXTR_W_TY1_R2_Pat<MipsEXTR_RS_W, EXTRV_RS_W>;
+def : EXTR_W_TY1_R1_Pat<MipsEXTR_S_H, EXTR_S_H>;
+def : EXTR_W_TY1_R2_Pat<MipsEXTR_S_H, EXTRV_S_H>;
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index b1220d6250..e9f330ffc1 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -875,6 +875,8 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const
case ISD::SRL_PARTS: return LowerShiftRightParts(Op, DAG, false);
case ISD::LOAD: return LowerLOAD(Op, DAG);
case ISD::STORE: return LowerSTORE(Op, DAG);
+ case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, DAG);
// @LOCALMOD-BEGIN
case ISD::NACL_TP_TLS_OFFSET: return LowerNaClTpTlsOffset(Op, DAG);
@@ -988,6 +990,70 @@ static MachineBasicBlock* ExpandCondMov(MachineInstr *MI, MachineBasicBlock *BB,
return BB;
}
*/
+
+MachineBasicBlock *
+MipsTargetLowering::EmitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{
+ // $bb:
+ // bposge32_pseudo $vr0
+ // =>
+ // $bb:
+ // bposge32 $tbb
+ // $fbb:
+ // li $vr2, 0
+ // b $sink
+ // $tbb:
+ // li $vr1, 1
+ // $sink:
+ // $vr0 = phi($vr2, $fbb, $vr1, $tbb)
+
+ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetRegisterClass *RC = &Mips::CPURegsRegClass;
+ DebugLoc DL = MI->getDebugLoc();
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = llvm::next(MachineFunction::iterator(BB));
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, FBB);
+ F->insert(It, TBB);
+ F->insert(It, Sink);
+
+ // Transfer the remainder of BB and its successor edges to Sink.
+ Sink->splice(Sink->begin(), BB, llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ Sink->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Add successors.
+ BB->addSuccessor(FBB);
+ BB->addSuccessor(TBB);
+ FBB->addSuccessor(Sink);
+ TBB->addSuccessor(Sink);
+
+ // Insert the real bposge32 instruction to $BB.
+ BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB);
+
+ // Fill $FBB.
+ unsigned VR2 = RegInfo.createVirtualRegister(RC);
+ BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2)
+ .addReg(Mips::ZERO).addImm(0);
+ BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink);
+
+ // Fill $TBB.
+ unsigned VR1 = RegInfo.createVirtualRegister(RC);
+ BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1)
+ .addReg(Mips::ZERO).addImm(1);
+
+ // Insert phi function to $Sink.
+ BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI),
+ MI->getOperand(0).getReg())
+ .addReg(VR2).addMBB(FBB).addReg(VR1).addMBB(TBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return Sink;
+}
+
MachineBasicBlock *
MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
@@ -1096,6 +1162,8 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case Mips::ATOMIC_CMP_SWAP_I64:
case Mips::ATOMIC_CMP_SWAP_I64_P8:
return EmitAtomicCmpSwap(MI, BB, 8);
+ case Mips::BPOSGE32_PSEUDO:
+ return EmitBPOSGE32(MI, BB);
}
}
@@ -2340,6 +2408,151 @@ SDValue MipsTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
return CreateStoreLR(MipsISD::SDR, DAG, SD, SDL, IsLittle ? 0 : 7);
}
+// This function expands mips intrinsic nodes which have 64-bit input operands
+// or output values.
+//
+// out64 = intrinsic-node in64
+// =>
+// lo = copy (extract-element (in64, 0))
+// hi = copy (extract-element (in64, 1))
+// mips-specific-node
+// v0 = copy lo
+// v1 = copy hi
+// out64 = merge-values (v0, v1)
+//
+static SDValue LowerDSPIntr(SDValue Op, SelectionDAG &DAG,
+ unsigned Opc, bool HasI64In, bool HasI64Out) {
+ DebugLoc DL = Op.getDebugLoc();
+ bool HasChainIn = Op->getOperand(0).getValueType() == MVT::Other;
+ SDValue Chain = HasChainIn ? Op->getOperand(0) : DAG.getEntryNode();
+ SmallVector<SDValue, 3> Ops;
+
+ if (HasI64In) {
+ SDValue InLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32,
+ Op->getOperand(1 + HasChainIn),
+ DAG.getConstant(0, MVT::i32));
+ SDValue InHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32,
+ Op->getOperand(1 + HasChainIn),
+ DAG.getConstant(1, MVT::i32));
+
+ Chain = DAG.getCopyToReg(Chain, DL, Mips::LO, InLo, SDValue());
+ Chain = DAG.getCopyToReg(Chain, DL, Mips::HI, InHi, Chain.getValue(1));
+
+ Ops.push_back(Chain);
+ Ops.append(Op->op_begin() + HasChainIn + 2, Op->op_end());
+ Ops.push_back(Chain.getValue(1));
+ } else {
+ Ops.push_back(Chain);
+ Ops.append(Op->op_begin() + HasChainIn + 1, Op->op_end());
+ }
+
+ if (!HasI64Out)
+ return DAG.getNode(Opc, DL, Op->value_begin(), Op->getNumValues(),
+ Ops.begin(), Ops.size());
+
+ SDValue Intr = DAG.getNode(Opc, DL, DAG.getVTList(MVT::Other, MVT::Glue),
+ Ops.begin(), Ops.size());
+ SDValue OutLo = DAG.getCopyFromReg(Intr.getValue(0), DL, Mips::LO, MVT::i32,
+ Intr.getValue(1));
+ SDValue OutHi = DAG.getCopyFromReg(OutLo.getValue(1), DL, Mips::HI, MVT::i32,
+ OutLo.getValue(2));
+ SDValue Out = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, OutLo, OutHi);
+
+ if (!HasChainIn)
+ return Out;
+
+ SDValue Vals[] = { Out, OutHi.getValue(1) };
+ return DAG.getMergeValues(Vals, 2, DL);
+}
+
+SDValue MipsTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
+ SelectionDAG &DAG) const {
+ switch (cast<ConstantSDNode>(Op->getOperand(0))->getZExtValue()) {
+ default:
+ return SDValue();
+ case Intrinsic::mips_shilo:
+ return LowerDSPIntr(Op, DAG, MipsISD::SHILO, true, true);
+ case Intrinsic::mips_dpau_h_qbl:
+ return LowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL, true, true);
+ case Intrinsic::mips_dpau_h_qbr:
+ return LowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR, true, true);
+ case Intrinsic::mips_dpsu_h_qbl:
+ return LowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL, true, true);
+ case Intrinsic::mips_dpsu_h_qbr:
+ return LowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR, true, true);
+ case Intrinsic::mips_dpa_w_ph:
+ return LowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH, true, true);
+ case Intrinsic::mips_dps_w_ph:
+ return LowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH, true, true);
+ case Intrinsic::mips_dpax_w_ph:
+ return LowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH, true, true);
+ case Intrinsic::mips_dpsx_w_ph:
+ return LowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH, true, true);
+ case Intrinsic::mips_mulsa_w_ph:
+ return LowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH, true, true);
+ case Intrinsic::mips_mult:
+ return LowerDSPIntr(Op, DAG, MipsISD::MULT, false, true);
+ case Intrinsic::mips_multu:
+ return LowerDSPIntr(Op, DAG, MipsISD::MULTU, false, true);
+ case Intrinsic::mips_madd:
+ return LowerDSPIntr(Op, DAG, MipsISD::MADD_DSP, true, true);
+ case Intrinsic::mips_maddu:
+ return LowerDSPIntr(Op, DAG, MipsISD::MADDU_DSP, true, true);
+ case Intrinsic::mips_msub:
+ return LowerDSPIntr(Op, DAG, MipsISD::MSUB_DSP, true, true);
+ case Intrinsic::mips_msubu:
+ return LowerDSPIntr(Op, DAG, MipsISD::MSUBU_DSP, true, true);
+ }
+}
+
+SDValue MipsTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
+ SelectionDAG &DAG) const {
+ switch (cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue()) {
+ default:
+ return SDValue();
+ case Intrinsic::mips_extp:
+ return LowerDSPIntr(Op, DAG, MipsISD::EXTP, true, false);
+ case Intrinsic::mips_extpdp:
+ return LowerDSPIntr(Op, DAG, MipsISD::EXTPDP, true, false);
+ case Intrinsic::mips_extr_w:
+ return LowerDSPIntr(Op, DAG, MipsISD::EXTR_W, true, false);
+ case Intrinsic::mips_extr_r_w:
+ return LowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W, true, false);
+ case Intrinsic::mips_extr_rs_w:
+ return LowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W, true, false);
+ case Intrinsic::mips_extr_s_h:
+ return LowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H, true, false);
+ case Intrinsic::mips_mthlip:
+ return LowerDSPIntr(Op, DAG, MipsISD::MTHLIP, true, true);
+ case Intrinsic::mips_mulsaq_s_w_ph:
+ return LowerDSPIntr(Op, DAG, MipsISD::MULSAQ_S_W_PH, true, true);
+ case Intrinsic::mips_maq_s_w_phl:
+ return LowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL, true, true);
+ case Intrinsic::mips_maq_s_w_phr:
+ return LowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR, true, true);
+ case Intrinsic::mips_maq_sa_w_phl:
+ return LowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHL, true, true);
+ case Intrinsic::mips_maq_sa_w_phr:
+ return LowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHR, true, true);
+ case Intrinsic::mips_dpaq_s_w_ph:
+ return LowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH, true, true);
+ case Intrinsic::mips_dpsq_s_w_ph:
+ return LowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH, true, true);
+ case Intrinsic::mips_dpaq_sa_l_w:
+ return LowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W, true, true);
+ case Intrinsic::mips_dpsq_sa_l_w:
+ return LowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W, true, true);
+ case Intrinsic::mips_dpaqx_s_w_ph:
+ return LowerDSPIntr(Op, DAG, MipsISD::DPAQX_S_W_PH, true, true);
+ case Intrinsic::mips_dpaqx_sa_w_ph:
+ return LowerDSPIntr(Op, DAG, MipsISD::DPAQX_SA_W_PH, true, true);
+ case Intrinsic::mips_dpsqx_s_w_ph:
+ return LowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH, true, true);
+ case Intrinsic::mips_dpsqx_sa_w_ph:
+ return LowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH, true, true);
+ }
+}
+
//===----------------------------------------------------------------------===//
// Calling Convention Implementation
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index 4e9398430b..2dce449765 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -202,6 +202,8 @@ namespace llvm {
bool IsSRA) const;
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
// @LOCALMOD-BEGIN
SDValue LowerNaClTpTlsOffset(SDValue Op, SelectionDAG &DAG) const;
@@ -265,6 +267,8 @@ namespace llvm {
virtual unsigned getJumpTableEncoding() const;
+ MachineBasicBlock *EmitBPOSGE32(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
unsigned Size, unsigned BinOpcode, bool Nand = false) const;
MachineBasicBlock *EmitAtomicBinaryPartword(MachineInstr *MI,
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 3f98ae857b..6fa94a96e5 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -1266,3 +1266,8 @@ include "MipsCondMov.td"
include "Mips16InstrFormats.td"
include "Mips16InstrInfo.td"
+
+// DSP
+include "MipsDSPInstrFormats.td"
+include "MipsDSPInstrInfo.td"
+
diff --git a/lib/Target/Mips/MipsMCInstLower.h b/lib/Target/Mips/MipsMCInstLower.h
index 3eab5a452e..c4a6016105 100644
--- a/lib/Target/Mips/MipsMCInstLower.h
+++ b/lib/Target/Mips/MipsMCInstLower.h
@@ -33,11 +33,11 @@ public:
MipsMCInstLower(MipsAsmPrinter &asmprinter);
void Initialize(Mangler *mang, MCContext *C);
void Lower(const MachineInstr *MI, MCInst &OutMI) const;
+ MCOperand LowerOperand(const MachineOperand& MO, unsigned offset = 0) const;
private:
MCOperand LowerSymbolOperand(const MachineOperand &MO,
MachineOperandType MOTy, unsigned Offset) const;
- MCOperand LowerOperand(const MachineOperand& MO, unsigned offset = 0) const;
};
}
diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h
index df3c4c0de0..93ce94803a 100644
--- a/lib/Target/Mips/MipsMachineFunction.h
+++ b/lib/Target/Mips/MipsMachineFunction.h
@@ -45,9 +45,7 @@ class MipsFunctionInfo : public MachineFunctionInfo {
// Range of frame object indices.
// InArgFIRange: Range of indices of all frame objects created during call to
// LowerFormalArguments.
- // OutArgFIRange: Range of indices of all frame objects created during call to
- // LowerCall except for the frame object for restoring $gp.
- std::pair<int, int> InArgFIRange, OutArgFIRange;
+ std::pair<int, int> InArgFIRange;
unsigned MaxCallFrameSize;
bool EmitNOAT;
@@ -56,7 +54,7 @@ public:
MipsFunctionInfo(MachineFunction& MF)
: MF(MF), SRetReturnReg(0), GlobalBaseReg(0),
VarArgsFrameIndex(0), InArgFIRange(std::make_pair(-1, 0)),
- OutArgFIRange(std::make_pair(-1, 0)), MaxCallFrameSize(0), EmitNOAT(false)
+ MaxCallFrameSize(0), EmitNOAT(false)
{}
bool isInArgFI(int FI) const {
@@ -64,16 +62,6 @@ public:
}
void setLastInArgFI(int FI) { InArgFIRange.second = FI; }
- bool isOutArgFI(int FI) const {
- return FI <= OutArgFIRange.first && FI >= OutArgFIRange.second;
- }
- void extendOutArgFIRange(int FirstFI, int LastFI) {
- if (!OutArgFIRange.second)
- // this must be the first time this function was called.
- OutArgFIRange.first = FirstFI;
- OutArgFIRange.second = LastFI;
- }
-
unsigned getSRetReturnReg() const { return SRetReturnReg; }
void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index ae4813e128..a72e3b857f 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -14,6 +14,8 @@ let Namespace = "Mips" in {
def sub_fpeven : SubRegIndex;
def sub_fpodd : SubRegIndex;
def sub_32 : SubRegIndex;
+def sub_lo : SubRegIndex;
+def sub_hi : SubRegIndex;
}
// We have banks of 32 registers each.
@@ -247,33 +249,11 @@ let Namespace = "Mips" in {
def HWR29_64 : Register<"29">;
// Accum registers
- def LO0 : Register<"ac0"> {
- let Aliases = [LO];
- }
- def HI0 : Register<"hi0"> {
- let Aliases = [HI];
- }
- def LO1 : Register<"ac1">;
- def HI1 : Register<"hi1">;
- def LO2 : Register<"ac2">;
- def HI2 : Register<"hi2">;
- def LO3 : Register<"ac3">;
- def HI3 : Register<"hi3">;
-
- let SubRegIndices = [sub_32] in {
- def LO0_64 : RegisterWithSubRegs<"ac0", [LO0]> {
- let Aliases = [LO64];
- }
- def HI0_64 : RegisterWithSubRegs<"hi0", [HI0]> {
- let Aliases = [HI64];
- }
- def LO1_64 : RegisterWithSubRegs<"ac1", [LO1]>;
- def HI1_64 : RegisterWithSubRegs<"hi1", [HI1]>;
- def LO2_64 : RegisterWithSubRegs<"ac2", [LO2]>;
- def HI2_64 : RegisterWithSubRegs<"hi2", [HI2]>;
- def LO3_64 : RegisterWithSubRegs<"ac3", [LO3]>;
- def HI3_64 : RegisterWithSubRegs<"hi3", [HI3]>;
- }
+ let SubRegIndices = [sub_lo, sub_hi] in
+ def AC0 : RegisterWithSubRegs<"ac0", [LO, HI]>;
+ def AC1 : Register<"ac1">;
+ def AC2 : Register<"ac2">;
+ def AC3 : Register<"ac3">;
def DSPCtrl : Register<"dspctrl">;
}
@@ -322,6 +302,7 @@ def CPU16Regs : RegisterClass<"Mips", [i32], 32, (add
def CPURAReg : RegisterClass<"Mips", [i32], 32, (add RA)>;
+def CPUSPReg : RegisterClass<"Mips", [i32], 32, (add SP)>;
// 64bit fp:
// * FGR64 - 32 64-bit registers
@@ -357,9 +338,5 @@ def HILO64 : RegisterClass<"Mips", [i64], 64, (add HI64, LO64)>;
def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>;
def HWRegs64 : RegisterClass<"Mips", [i64], 32, (add HWR29_64)>;
-// Accum Registers
-def HIRegs : RegisterClass<"Mips", [i32], 32, (sequence "HI%u", 0, 3)>;
-def LORegs : RegisterClass<"Mips", [i32], 32, (sequence "LO%u", 0, 3)>;
-
-def HI64Regs : RegisterClass<"Mips", [i64], 64, (sequence "HI%u_64", 0, 3)>;
-def LO64Regs : RegisterClass<"Mips", [i64], 64, (sequence "LO%u_64", 0, 3)>;
+// Accumulator Registers
+def ACRegs : RegisterClass<"Mips", [i64], 64, (sequence "AC%u", 0, 3)>;
diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp
index d868f73758..8e2c2c5174 100644
--- a/lib/Target/Mips/MipsSERegisterInfo.cpp
+++ b/lib/Target/Mips/MipsSERegisterInfo.cpp
@@ -91,8 +91,7 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
// getFrameRegister() returns.
unsigned FrameReg;
- if (MipsFI->isOutArgFI(FrameIndex) ||
- (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI))
+ if (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI)
FrameReg = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP;
else
FrameReg = getFrameRegister(MF);
@@ -106,12 +105,8 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
// incoming argument, callee-saved register location or local variable.
int64_t Offset;
- if (MipsFI->isOutArgFI(FrameIndex))
- Offset = SPOffset;
- else
- Offset = SPOffset + (int64_t)StackSize;
-
- Offset += MI.getOperand(OpNo + 1).getImm();
+ Offset = SPOffset + (int64_t)StackSize;
+ Offset += MI.getOperand(OpNo + 1).getImm();
DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n");
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
index 7f5927d8ed..1ff41ca358 100644
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -31,7 +31,8 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
MipsArchVersion(Mips32), MipsABI(UnknownABI), IsLittle(little),
IsSingleFloat(false), IsFP64bit(false), IsGP64bit(false), HasVFPU(false),
IsLinux(true), HasSEInReg(false), HasCondMov(false), HasMulDivAdd(false),
- HasMinMax(false), HasSwap(false), HasBitCount(false), InMips16Mode(false)
+ HasMinMax(false), HasSwap(false), HasBitCount(false), InMips16Mode(false),
+ HasDSP(false), HasDSPR2(false), IsAndroid(false)
// @LOCALMOD-START
, TargetTriple(TT)
// @LOCALMOD-END
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index 97d3600b1b..d8851a04eb 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -193,7 +193,7 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const {
// to adjust the stack pointer (we fit in the Red Zone). For 64-bit
// SVR4, we also require a stack frame if we need to spill the CR,
// since this spill area is addressed relative to the stack pointer.
- bool DisableRedZone = MF.getFunction()->hasFnAttr(Attribute::NoRedZone);
+ bool DisableRedZone = MF.getFunction()->getFnAttributes().hasNoRedZoneAttr();
// FIXME SVR4 The 32-bit SVR4 ABI has no red zone. However, it can
// still generate stackless code if all local vars are reg-allocated.
// Try: (FrameSize <= 224
@@ -255,7 +255,7 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
// Naked functions have no stack frame pushed, so we don't have a frame
// pointer.
- if (MF.getFunction()->hasFnAttr(Attribute::Naked))
+ if (MF.getFunction()->getFnAttributes().hasNakedAttr())
return false;
return MF.getTarget().Options.DisableFramePointerElim(MF) ||
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 2e8fa1842a..27f26cd5fd 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -2048,7 +2048,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin_Or_64SVR4(
SmallVector<SDValue, 8> MemOps;
unsigned nAltivecParamsAtEnd = 0;
- for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
+ Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
+ for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo, ++FuncArg) {
SDValue ArgVal;
bool needsLoad = false;
EVT ObjectVT = Ins[ArgNo].VT;
@@ -2103,7 +2104,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin_Or_64SVR4(
EVT ObjType = (ObjSize == 1 ? MVT::i8 :
(ObjSize == 2 ? MVT::i16 : MVT::i32));
SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo(),
+ MachinePointerInfo(FuncArg,
+ CurArgOffset),
ObjType, false, false, 0);
MemOps.push_back(Store);
++GPR_idx;
@@ -2136,7 +2138,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin_Or_64SVR4(
}
SDValue Store = DAG.getStore(Val.getValue(1), dl, Shifted, FIN,
- MachinePointerInfo(),
+ MachinePointerInfo(FuncArg, ArgOffset),
false, false, 0);
MemOps.push_back(Store);
++GPR_idx;
@@ -6000,7 +6002,7 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
bool is31 = (getTargetMachine().Options.DisableFramePointerElim(MF) ||
MFI->hasVarSizedObjects()) &&
MFI->getStackSize() &&
- !MF.getFunction()->hasFnAttr(Attribute::Naked);
+ !MF.getFunction()->getFnAttributes().hasNakedAttr();
unsigned FrameReg = isPPC64 ? (is31 ? PPC::X31 : PPC::X1) :
(is31 ? PPC::R31 : PPC::R1);
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 285e74a4c2..1665d7313c 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -596,7 +596,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// to Offset to get the correct offset.
// Naked functions have stack size 0, although getStackSize may not reflect that
// because we didn't call all the pieces that compute it for naked functions.
- if (!MF.getFunction()->hasFnAttr(Attribute::Naked))
+ if (!MF.getFunction()->getFnAttributes().hasNakedAttr())
Offset += MFI->getStackSize();
// If we can, encode the offset directly into the instruction. If this is a
diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp
index cc6dc1e259..0040147022 100644
--- a/lib/Target/TargetData.cpp
+++ b/lib/Target/TargetData.cpp
@@ -314,6 +314,8 @@ void
TargetData::setAlignment(AlignTypeEnum align_type, unsigned abi_align,
unsigned pref_align, uint32_t bit_width) {
assert(abi_align <= pref_align && "Preferred alignment worse than ABI!");
+ assert(pref_align < (1 << 16) && "Alignment doesn't fit in bitfield");
+ assert(bit_width < (1 << 24) && "Bit width doesn't fit in bitfield");
for (unsigned i = 0, e = Alignments.size(); i != e; ++i) {
if (Alignments[i].AlignType == align_type &&
Alignments[i].TypeBitWidth == bit_width) {
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 77961e53ae..9263bdde20 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -60,10 +60,6 @@ private:
bool ParseDirectiveWord(unsigned Size, SMLoc L);
bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
- bool mnemonicIsValid(StringRef Mnemonic) {
- return mnemonicIsValidImpl(Mnemonic);
- }
-
bool processInstruction(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
@@ -77,13 +73,6 @@ private:
unsigned &OrigErrorInfo,
bool matchingInlineAsm = false);
- unsigned getMCInstOperandNum(unsigned Kind, MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- unsigned OperandNum, unsigned &NumMCOperands) {
- return getMCInstOperandNumImpl(Kind, Inst, Operands, OperandNum,
- NumMCOperands);
- }
-
/// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi)
/// in 64bit mode or (%esi) or %es:(%esi) in 32bit mode.
bool isSrcOp(X86Operand &Op);
@@ -1636,16 +1625,20 @@ MatchInstruction(SMLoc IDLoc, unsigned &Kind,
unsigned Match1, Match2, Match3, Match4;
unsigned tKind;
- Match1 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore);
+ Match1 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore,
+ isParsingIntelSyntax());
if (Match1 == Match_Success) Kind = tKind;
Tmp[Base.size()] = Suffixes[1];
- Match2 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore);
+ Match2 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore,
+ isParsingIntelSyntax());
if (Match2 == Match_Success) Kind = tKind;
Tmp[Base.size()] = Suffixes[2];
- Match3 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore);
+ Match3 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore,
+ isParsingIntelSyntax());
if (Match3 == Match_Success) Kind = tKind;
Tmp[Base.size()] = Suffixes[3];
- Match4 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore);
+ Match4 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore,
+ isParsingIntelSyntax());
if (Match4 == Match_Success) Kind = tKind;
// Restore the old token.
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
index 46e72f9f60..b123afa001 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -15,6 +15,7 @@
#define DEBUG_TYPE "asm-printer"
#include "X86ATTInstPrinter.h"
#include "X86InstComments.h"
+#include "MCTargetDesc/X86BaseInfo.h"
#include "MCTargetDesc/X86MCTargetDesc.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -38,6 +39,12 @@ void X86ATTInstPrinter::printRegName(raw_ostream &OS,
void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
StringRef Annot) {
+ const MCInstrDesc &Desc = MII.get(MI->getOpcode());
+ uint64_t TSFlags = Desc.TSFlags;
+
+ if (TSFlags & X86II::LOCK)
+ OS << "\tlock\n";
+
// Try to print any aliases first.
if (!printAliasInstr(MI, OS))
printInstruction(MI, OS);
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
index ad14e34707..f9bb3be9d7 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
@@ -15,6 +15,7 @@
#define DEBUG_TYPE "asm-printer"
#include "X86IntelInstPrinter.h"
#include "X86InstComments.h"
+#include "MCTargetDesc/X86BaseInfo.h"
#include "MCTargetDesc/X86MCTargetDesc.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCExpr.h"
@@ -32,6 +33,12 @@ void X86IntelInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
StringRef Annot) {
+ const MCInstrDesc &Desc = MII.get(MI->getOpcode());
+ uint64_t TSFlags = Desc.TSFlags;
+
+ if (TSFlags & X86II::LOCK)
+ OS << "\tlock\n";
+
printInstruction(MI, OS);
// Next always print the annotation.
diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
index 3c0e3e6f2d..7706b9308e 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -34,6 +34,10 @@ AsmWriterFlavor("x86-asm-syntax", cl::init(ATT),
clEnumValN(Intel, "intel", "Emit Intel-style assembly"),
clEnumValEnd));
+static cl::opt<bool>
+MarkedJTDataRegions("mark-data-regions", cl::init(false),
+ cl::desc("Mark code section jump table data regions."),
+ cl::Hidden);
void X86MCAsmInfoDarwin::anchor() { }
@@ -59,6 +63,7 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) {
SupportsDebugInformation = true;
DwarfUsesInlineInfoSection = true;
+ UseDataRegionDirectives = MarkedJTDataRegions;
// Exceptions handling
ExceptionsType = ExceptionHandling::DwarfCFI;
diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
index f0f1982d57..7ff058edbc 100644
--- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
@@ -11,11 +11,13 @@
#include "MCTargetDesc/X86MCTargetDesc.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCMachObjectWriter.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCValue.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
#include "llvm/Object/MachOFormat.h"
using namespace llvm;
@@ -23,7 +25,7 @@ using namespace llvm::object;
namespace {
class X86MachObjectWriter : public MCMachObjectTargetWriter {
- void RecordScatteredRelocation(MachObjectWriter *Writer,
+ bool RecordScatteredRelocation(MachObjectWriter *Writer,
const MCAssembler &Asm,
const MCAsmLayout &Layout,
const MCFragment *Fragment,
@@ -335,7 +337,7 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer,
Writer->addRelocation(Fragment->getParent(), MRE);
}
-void X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
+bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
const MCAssembler &Asm,
const MCAsmLayout &Layout,
const MCFragment *Fragment,
@@ -381,6 +383,19 @@ void X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
// Relocations are written out in reverse order, so the PAIR comes first.
if (Type == macho::RIT_Difference ||
Type == macho::RIT_Generic_LocalDifference) {
+ // If the offset is too large to fit in a scattered relocation,
+ // we're hosed. It's an unfortunate limitation of the MachO format.
+ if (FixupOffset > 0xffffff) {
+ char Buffer[32];
+ format("0x%x", FixupOffset).print(Buffer, sizeof(Buffer));
+ Asm.getContext().FatalError(Fixup.getLoc(),
+ Twine("Section too large, can't encode "
+ "r_address (") + Buffer +
+ ") into 24 bits of scattered "
+ "relocation entry.");
+ llvm_unreachable("fatal error returned?!");
+ }
+
macho::RelocationEntry MRE;
MRE.Word0 = ((0 << 0) |
(macho::RIT_Pair << 24) |
@@ -389,6 +404,16 @@ void X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
macho::RF_Scattered);
MRE.Word1 = Value2;
Writer->addRelocation(Fragment->getParent(), MRE);
+ } else {
+ // If the offset is more than 24-bits, it won't fit in a scattered
+ // relocation offset field, so we fall back to using a non-scattered
+ // relocation. This is a bit risky, as if the offset reaches out of
+ // the block and the linker is doing scattered loading on this
+ // symbol, things can go badly.
+ //
+ // Required for 'as' compatibility.
+ if (FixupOffset > 0xffffff)
+ return false;
}
macho::RelocationEntry MRE;
@@ -399,6 +424,7 @@ void X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
macho::RF_Scattered);
MRE.Word1 = Value;
Writer->addRelocation(Fragment->getParent(), MRE);
+ return true;
}
void X86MachObjectWriter::RecordTLVPRelocation(MachObjectWriter *Writer,
@@ -469,9 +495,11 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,
// If this is a difference or a defined symbol plus an offset, then we need a
// scattered relocation entry. Differences always require scattered
// relocations.
- if (Target.getSymB())
- return RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
- Target, Log2Size, FixedValue);
+ if (Target.getSymB()) {
+ RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
+ Target, Log2Size, FixedValue);
+ return;
+ }
// Get the symbol data, if any.
MCSymbolData *SD = 0;
@@ -483,9 +511,13 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,
uint32_t Offset = Target.getConstant();
if (IsPCRel)
Offset += 1 << Log2Size;
- if (Offset && SD && !Writer->doesSymbolRequireExternRelocation(SD))
- return RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
- Target, Log2Size, FixedValue);
+ // Try to record the scattered relocation if needed. Fall back to non
+ // scattered if necessary (see comments in RecordScatteredRelocation()
+ // for details).
+ if (Offset && SD && !Writer->doesSymbolRequireExternRelocation(SD) &&
+ RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
+ Target, Log2Size, FixedValue))
+ return;
// See <reloc.h>.
uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 0d8def0e47..85922f1277 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -676,7 +676,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// function, and use up to 128 bytes of stack space, don't have a frame
// pointer, calls, or dynamic alloca then we do not need to adjust the
// stack pointer (we fit in the Red Zone).
- if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) &&
+ if (Is64Bit && !Fn->getFnAttributes().hasNoRedZoneAttr() &&
!RegInfo->needsStackRealignment(MF) &&
!MFI->hasVarSizedObjects() && // No dynamic alloca.
!MFI->adjustsStack() && // No calls.
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index b409e88148..767e261a82 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -443,7 +443,7 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
void X86DAGToDAGISel::PreprocessISelDAG() {
// OptForSize is used in pattern predicates that isel is matching.
- OptForSize = MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+ OptForSize = MF->getFunction()->getFnAttributes().hasOptimizeForSizeAttr();
for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
E = CurDAG->allnodes_end(); I != E; ) {
@@ -2253,6 +2253,10 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
case X86ISD::ATOMSUB64_DAG:
case X86ISD::ATOMNAND64_DAG:
case X86ISD::ATOMAND64_DAG:
+ case X86ISD::ATOMMAX64_DAG:
+ case X86ISD::ATOMMIN64_DAG:
+ case X86ISD::ATOMUMAX64_DAG:
+ case X86ISD::ATOMUMIN64_DAG:
case X86ISD::ATOMSWAP64_DAG: {
unsigned Opc;
switch (Opcode) {
@@ -2263,6 +2267,10 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
case X86ISD::ATOMSUB64_DAG: Opc = X86::ATOMSUB6432; break;
case X86ISD::ATOMNAND64_DAG: Opc = X86::ATOMNAND6432; break;
case X86ISD::ATOMAND64_DAG: Opc = X86::ATOMAND6432; break;
+ case X86ISD::ATOMMAX64_DAG: Opc = X86::ATOMMAX6432; break;
+ case X86ISD::ATOMMIN64_DAG: Opc = X86::ATOMMIN6432; break;
+ case X86ISD::ATOMUMAX64_DAG: Opc = X86::ATOMUMAX6432; break;
+ case X86ISD::ATOMUMIN64_DAG: Opc = X86::ATOMUMIN6432; break;
case X86ISD::ATOMSWAP64_DAG: Opc = X86::ATOMSWAP6432; break;
}
SDNode *RetVal = SelectAtomic64(Node, Opc);
@@ -2389,13 +2397,16 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
SDValue N1 = Node->getOperand(1);
bool isSigned = Opcode == ISD::SMUL_LOHI;
+ bool hasBMI2 = Subtarget->hasBMI2();
if (!isSigned) {
switch (NVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break;
case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
- case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
- case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
+ case MVT::i32: Opc = hasBMI2 ? X86::MULX32rr : X86::MUL32r;
+ MOpc = hasBMI2 ? X86::MULX32rm : X86::MUL32m; break;
+ case MVT::i64: Opc = hasBMI2 ? X86::MULX64rr : X86::MUL64r;
+ MOpc = hasBMI2 ? X86::MULX64rm : X86::MUL64m; break;
}
} else {
switch (NVT.getSimpleVT().SimpleTy) {
@@ -2407,13 +2418,31 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
}
}
- unsigned LoReg, HiReg;
- switch (NVT.getSimpleVT().SimpleTy) {
- default: llvm_unreachable("Unsupported VT!");
- case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break;
- case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break;
- case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break;
- case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break;
+ unsigned SrcReg, LoReg, HiReg;
+ switch (Opc) {
+ default: llvm_unreachable("Unknown MUL opcode!");
+ case X86::IMUL8r:
+ case X86::MUL8r:
+ SrcReg = LoReg = X86::AL; HiReg = X86::AH;
+ break;
+ case X86::IMUL16r:
+ case X86::MUL16r:
+ SrcReg = LoReg = X86::AX; HiReg = X86::DX;
+ break;
+ case X86::IMUL32r:
+ case X86::MUL32r:
+ SrcReg = LoReg = X86::EAX; HiReg = X86::EDX;
+ break;
+ case X86::IMUL64r:
+ case X86::MUL64r:
+ SrcReg = LoReg = X86::RAX; HiReg = X86::RDX;
+ break;
+ case X86::MULX32rr:
+ SrcReg = X86::EDX; LoReg = HiReg = 0;
+ break;
+ case X86::MULX64rr:
+ SrcReg = X86::RDX; LoReg = HiReg = 0;
+ break;
}
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
@@ -2425,22 +2454,47 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
std::swap(N0, N1);
}
- SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
+ SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg,
N0, SDValue()).getValue(1);
+ SDValue ResHi, ResLo;
if (foldedLoad) {
+ SDValue Chain;
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
InFlag };
- SDNode *CNode =
- CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops,
- array_lengthof(Ops));
- InFlag = SDValue(CNode, 1);
+ if (MOpc == X86::MULX32rm || MOpc == X86::MULX64rm) {
+ SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other, MVT::Glue);
+ SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops,
+ array_lengthof(Ops));
+ ResHi = SDValue(CNode, 0);
+ ResLo = SDValue(CNode, 1);
+ Chain = SDValue(CNode, 2);
+ InFlag = SDValue(CNode, 3);
+ } else {
+ SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
+ SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops,
+ array_lengthof(Ops));
+ Chain = SDValue(CNode, 0);
+ InFlag = SDValue(CNode, 1);
+ }
// Update the chain.
- ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
+ ReplaceUses(N1.getValue(1), Chain);
} else {
- SDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag);
- InFlag = SDValue(CNode, 0);
+ SDValue Ops[] = { N1, InFlag };
+ if (Opc == X86::MULX32rr || Opc == X86::MULX64rr) {
+ SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Glue);
+ SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops,
+ array_lengthof(Ops));
+ ResHi = SDValue(CNode, 0);
+ ResLo = SDValue(CNode, 1);
+ InFlag = SDValue(CNode, 2);
+ } else {
+ SDVTList VTs = CurDAG->getVTList(MVT::Glue);
+ SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops,
+ array_lengthof(Ops));
+ InFlag = SDValue(CNode, 0);
+ }
}
// Prevent use of AH in a REX instruction by referencing AX instead.
@@ -2465,19 +2519,25 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
}
// Copy the low half of the result, if it is needed.
if (!SDValue(Node, 0).use_empty()) {
- SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- LoReg, NVT, InFlag);
- InFlag = Result.getValue(2);
- ReplaceUses(SDValue(Node, 0), Result);
- DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
+ if (ResLo.getNode() == 0) {
+ assert(LoReg && "Register for low half is not defined!");
+ ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, NVT,
+ InFlag);
+ InFlag = ResLo.getValue(2);
+ }
+ ReplaceUses(SDValue(Node, 0), ResLo);
+ DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG); dbgs() << '\n');
}
// Copy the high half of the result, if it is needed.
if (!SDValue(Node, 1).use_empty()) {
- SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- HiReg, NVT, InFlag);
- InFlag = Result.getValue(2);
- ReplaceUses(SDValue(Node, 1), Result);
- DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
+ if (ResHi.getNode() == 0) {
+ assert(HiReg && "Register for high half is not defined!");
+ ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, NVT,
+ InFlag);
+ InFlag = ResHi.getValue(2);
+ }
+ ReplaceUses(SDValue(Node, 1), ResHi);
+ DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG); dbgs() << '\n');
}
return NULL;
@@ -2678,7 +2738,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
MVT::i8, Reg);
// Emit a testb.
- return CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, Subreg, Imm);
+ SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32,
+ Subreg, Imm);
+ // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
+ // one, do not call ReplaceAllUsesWith.
+ ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
+ SDValue(NewNode, 0));
+ return NULL;
}
// For example, "testl %eax, $2048" to "testb %ah, $8".
@@ -2709,8 +2775,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
// Emit a testb. The EXTRACT_SUBREG becomes a COPY that can only
// target GR8_NOREX registers, so make sure the register class is
// forced.
- return CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl, MVT::i32,
- Subreg, ShiftedImm);
+ SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl,
+ MVT::i32, Subreg, ShiftedImm);
+ // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
+ // one, do not call ReplaceAllUsesWith.
+ ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
+ SDValue(NewNode, 0));
+ return NULL;
}
// For example, "testl %eax, $32776" to "testw %ax, $32776".
@@ -2726,7 +2797,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
MVT::i16, Reg);
// Emit a testw.
- return CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32, Subreg, Imm);
+ SDNode *NewNode = CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32,
+ Subreg, Imm);
+ // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
+ // one, do not call ReplaceAllUsesWith.
+ ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
+ SDValue(NewNode, 0));
+ return NULL;
}
// For example, "testq %rax, $268468232" to "testl %eax, $268468232".
@@ -2742,7 +2819,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
MVT::i32, Reg);
// Emit a testl.
- return CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32, Subreg, Imm);
+ SDNode *NewNode = CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32,
+ Subreg, Imm);
+ // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
+ // one, do not call ReplaceAllUsesWith.
+ ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
+ SDValue(NewNode, 0));
+ return NULL;
}
}
break;
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index bdfe245027..ffaf04cea7 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -522,6 +522,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom);
}
if (Subtarget->hasCmpxchg16b()) {
@@ -1357,7 +1361,7 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
// cases like PR2962. This should be removed when PR2962 is fixed.
const Function *F = MF.getFunction();
if (IsZeroVal &&
- !F->hasFnAttr(Attribute::NoImplicitFloat)) {
+ !F->getFnAttributes().hasNoImplicitFloatAttr()) {
if (Size >= 16 &&
(Subtarget->isUnalignedMemAccessFast() ||
((DstAlign == 0 || DstAlign >= 16) &&
@@ -2048,7 +2052,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs,
TotalNumIntRegs);
- bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat);
+ bool NoImplicitFloatOps = Fn->getFnAttributes().hasNoImplicitFloatAttr();
assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
"SSE register cannot be used when SSE is disabled!");
assert(!(NumXMMRegs && MF.getTarget().Options.UseSoftFloat &&
@@ -2240,7 +2244,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Check if it's really possible to do a tail call.
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
isVarArg, SR != NotStructReturn,
- MF.getFunction()->hasStructRetAttr(),
+ MF.getFunction()->hasStructRetAttr(), CLI.RetTy,
Outs, OutVals, Ins, DAG);
// Sibcalls are automatically detected tailcalls which do not require
@@ -2524,7 +2528,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
OpFlags = X86II::MO_DARWIN_STUB;
} else if (Subtarget->isPICStyleRIPRel() &&
isa<Function>(GV) &&
- cast<Function>(GV)->hasFnAttr(Attribute::NonLazyBind)) {
+ cast<Function>(GV)->getFnAttributes().hasNonLazyBindAttr()) {
// If the function is marked as non-lazy, generate an indirect call
// which loads from the GOT directly. This avoids runtime overhead
// at the cost of eager binding (and one extra byte of encoding).
@@ -2761,6 +2765,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
bool isVarArg,
bool isCalleeStructRet,
bool isCallerStructRet,
+ Type *RetTy,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -2772,6 +2777,13 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// If -tailcallopt is specified, make fastcc functions tail-callable.
const MachineFunction &MF = DAG.getMachineFunction();
const Function *CallerF = DAG.getMachineFunction().getFunction();
+
+ // If the function return type is x86_fp80 and the callee return type is not,
+ // then the FP_EXTEND of the call result is not a nop. It's not safe to
+ // perform a tailcall optimization here.
+ if (CallerF->getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
+ return false;
+
CallingConv::ID CallerCC = CallerF->getCallingConv();
bool CCMatch = CallerCC == CalleeCC;
@@ -6661,7 +6673,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
bool HasAVX = Subtarget->hasAVX();
bool HasAVX2 = Subtarget->hasAVX2();
MachineFunction &MF = DAG.getMachineFunction();
- bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+ bool OptForSize = MF.getFunction()->getFnAttributes().hasOptimizeForSizeAttr();
assert(VT.getSizeInBits() != 64 && "Can't lower MMX shuffles");
@@ -9783,7 +9795,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
// Sanity Check: Make sure using fp_offset makes sense.
assert(!getTargetMachine().Options.UseSoftFloat &&
!(DAG.getMachineFunction()
- .getFunction()->hasFnAttr(Attribute::NoImplicitFloat)) &&
+ .getFunction()->getFnAttributes().hasNoImplicitFloatAttr()) &&
Subtarget->hasSSE1());
}
@@ -11769,6 +11781,10 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::ATOMIC_LOAD_OR:
case ISD::ATOMIC_LOAD_SUB:
case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_LOAD_UMIN:
case ISD::ATOMIC_SWAP: {
unsigned Opc;
switch (N->getOpcode()) {
@@ -11791,6 +11807,18 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::ATOMIC_LOAD_XOR:
Opc = X86ISD::ATOMXOR64_DAG;
break;
+ case ISD::ATOMIC_LOAD_MAX:
+ Opc = X86ISD::ATOMMAX64_DAG;
+ break;
+ case ISD::ATOMIC_LOAD_MIN:
+ Opc = X86ISD::ATOMMIN64_DAG;
+ break;
+ case ISD::ATOMIC_LOAD_UMAX:
+ Opc = X86ISD::ATOMUMAX64_DAG;
+ break;
+ case ISD::ATOMIC_LOAD_UMIN:
+ Opc = X86ISD::ATOMUMIN64_DAG;
+ break;
case ISD::ATOMIC_SWAP:
Opc = X86ISD::ATOMSWAP64_DAG;
break;
@@ -12182,6 +12210,10 @@ static unsigned getNonAtomic6432Opcode(unsigned Opc, unsigned &HiOpc) {
case X86::ATOMADD6432: HiOpc = X86::ADC32rr; return X86::ADD32rr;
case X86::ATOMSUB6432: HiOpc = X86::SBB32rr; return X86::SUB32rr;
case X86::ATOMSWAP6432: HiOpc = X86::MOV32rr; return X86::MOV32rr;
+ case X86::ATOMMAX6432: HiOpc = X86::SETLr; return X86::SETLr;
+ case X86::ATOMMIN6432: HiOpc = X86::SETGr; return X86::SETGr;
+ case X86::ATOMUMAX6432: HiOpc = X86::SETBr; return X86::SETBr;
+ case X86::ATOMUMIN6432: HiOpc = X86::SETAr; return X86::SETAr;
}
llvm_unreachable("Unhandled atomic-load-op opcode!");
}
@@ -12499,6 +12531,7 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
SrcHiReg = MI->getOperand(CurOp++).getReg();
const TargetRegisterClass *RC = &X86::GR32RegClass;
+ const TargetRegisterClass *RC8 = &X86::GR8RegClass;
unsigned LCMPXCHGOpc = X86::LCMPXCHG8B;
unsigned LOADOpc = X86::MOV32rm;
@@ -12586,6 +12619,55 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
BuildMI(mainMBB, DL, TII->get(NOTOpc), t1H).addReg(t2H);
break;
}
+ case X86::ATOMMAX6432:
+ case X86::ATOMMIN6432:
+ case X86::ATOMUMAX6432:
+ case X86::ATOMUMIN6432: {
+ unsigned HiOpc;
+ unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc);
+ unsigned cL = MRI.createVirtualRegister(RC8);
+ unsigned cH = MRI.createVirtualRegister(RC8);
+ unsigned cL32 = MRI.createVirtualRegister(RC);
+ unsigned cH32 = MRI.createVirtualRegister(RC);
+ unsigned cc = MRI.createVirtualRegister(RC);
+ // cl := cmp src_lo, lo
+ BuildMI(mainMBB, DL, TII->get(X86::CMP32rr))
+ .addReg(SrcLoReg).addReg(LoReg);
+ BuildMI(mainMBB, DL, TII->get(LoOpc), cL);
+ BuildMI(mainMBB, DL, TII->get(X86::MOVZX32rr8), cL32).addReg(cL);
+ // ch := cmp src_hi, hi
+ BuildMI(mainMBB, DL, TII->get(X86::CMP32rr))
+ .addReg(SrcHiReg).addReg(HiReg);
+ BuildMI(mainMBB, DL, TII->get(HiOpc), cH);
+ BuildMI(mainMBB, DL, TII->get(X86::MOVZX32rr8), cH32).addReg(cH);
+ // cc := if (src_hi == hi) ? cl : ch;
+ if (Subtarget->hasCMov()) {
+ BuildMI(mainMBB, DL, TII->get(X86::CMOVE32rr), cc)
+ .addReg(cH32).addReg(cL32);
+ } else {
+ MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), cc)
+ .addReg(cH32).addReg(cL32)
+ .addImm(X86::COND_E);
+ mainMBB = EmitLoweredSelect(MIB, mainMBB);
+ }
+ BuildMI(mainMBB, DL, TII->get(X86::TEST32rr)).addReg(cc).addReg(cc);
+ if (Subtarget->hasCMov()) {
+ BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t1L)
+ .addReg(SrcLoReg).addReg(LoReg);
+ BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t1H)
+ .addReg(SrcHiReg).addReg(HiReg);
+ } else {
+ MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t1L)
+ .addReg(SrcLoReg).addReg(LoReg)
+ .addImm(X86::COND_NE);
+ mainMBB = EmitLoweredSelect(MIB, mainMBB);
+ MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t1H)
+ .addReg(SrcHiReg).addReg(HiReg)
+ .addImm(X86::COND_NE);
+ mainMBB = EmitLoweredSelect(MIB, mainMBB);
+ }
+ break;
+ }
case X86::ATOMSWAP6432: {
unsigned HiOpc;
unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc);
@@ -13576,6 +13658,10 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case X86::ATOMNAND6432:
case X86::ATOMADD6432:
case X86::ATOMSUB6432:
+ case X86::ATOMMAX6432:
+ case X86::ATOMMIN6432:
+ case X86::ATOMUMAX6432:
+ case X86::ATOMUMIN6432:
case X86::ATOMSWAP6432:
return EmitAtomicLoadArith6432(MI, BB);
@@ -15562,7 +15648,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
const Function *F = DAG.getMachineFunction().getFunction();
- bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat);
+ bool NoImplicitFloatOps = F->getFnAttributes().hasNoImplicitFloatAttr();
bool F64IsLegal = !DAG.getTarget().Options.UseSoftFloat && !NoImplicitFloatOps
&& Subtarget->hasSSE2();
if ((VT.isVector() ||
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index d3545b0e9f..a53909b7a0 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -355,6 +355,10 @@ namespace llvm {
ATOMXOR64_DAG,
ATOMAND64_DAG,
ATOMNAND64_DAG,
+ ATOMMAX64_DAG,
+ ATOMMIN64_DAG,
+ ATOMUMAX64_DAG,
+ ATOMUMIN64_DAG,
ATOMSWAP64_DAG,
// LCMPXCHG_DAG, LCMPXCHG8_DAG, LCMPXCHG16_DAG - Compare and swap.
@@ -752,6 +756,7 @@ namespace llvm {
bool isVarArg,
bool isCalleeStructRet,
bool isCallerStructRet,
+ Type *RetTy,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index 1296bcbe89..3a1ac11f9c 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -561,7 +561,6 @@ defm ATOMSWAP : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMSWAP">;
// TODO: Get this to fold the constant into the instruction.
let isCodeGenOnly = 1, Defs = [EFLAGS] in
def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero),
- "lock\n\t"
"or{l}\t{$zero, $dst|$dst, $zero}",
[], IIC_ALU_MEM>, Requires<[In32BitMode]>, LOCK;
@@ -581,72 +580,72 @@ let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in {
def #NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
RegOpc{3}, RegOpc{2}, RegOpc{1}, 0 },
MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
- !strconcat("lock\n\t", mnemonic, "{b}\t",
+ !strconcat(mnemonic, "{b}\t",
"{$src2, $dst|$dst, $src2}"),
[], IIC_ALU_NONMEM>, LOCK;
def #NAME#16mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
- !strconcat("lock\n\t", mnemonic, "{w}\t",
+ !strconcat(mnemonic, "{w}\t",
"{$src2, $dst|$dst, $src2}"),
[], IIC_ALU_NONMEM>, OpSize, LOCK;
def #NAME#32mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
- !strconcat("lock\n\t", mnemonic, "{l}\t",
+ !strconcat(mnemonic, "{l}\t",
"{$src2, $dst|$dst, $src2}"),
[], IIC_ALU_NONMEM>, LOCK;
def #NAME#64mr : RI<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
- !strconcat("lock\n\t", mnemonic, "{q}\t",
+ !strconcat(mnemonic, "{q}\t",
"{$src2, $dst|$dst, $src2}"),
[], IIC_ALU_NONMEM>, LOCK;
def #NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 },
ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2),
- !strconcat("lock\n\t", mnemonic, "{b}\t",
+ !strconcat(mnemonic, "{b}\t",
"{$src2, $dst|$dst, $src2}"),
[], IIC_ALU_MEM>, LOCK;
def #NAME#16mi : Ii16<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
ImmMod, (outs), (ins i16mem :$dst, i16imm :$src2),
- !strconcat("lock\n\t", mnemonic, "{w}\t",
+ !strconcat(mnemonic, "{w}\t",
"{$src2, $dst|$dst, $src2}"),
[], IIC_ALU_MEM>, OpSize, LOCK;
def #NAME#32mi : Ii32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
ImmMod, (outs), (ins i32mem :$dst, i32imm :$src2),
- !strconcat("lock\n\t", mnemonic, "{l}\t",
+ !strconcat(mnemonic, "{l}\t",
"{$src2, $dst|$dst, $src2}"),
[], IIC_ALU_MEM>, LOCK;
def #NAME#64mi32 : RIi32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
ImmMod, (outs), (ins i64mem :$dst, i64i32imm :$src2),
- !strconcat("lock\n\t", mnemonic, "{q}\t",
+ !strconcat(mnemonic, "{q}\t",
"{$src2, $dst|$dst, $src2}"),
[], IIC_ALU_MEM>, LOCK;
def #NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2),
- !strconcat("lock\n\t", mnemonic, "{w}\t",
+ !strconcat(mnemonic, "{w}\t",
"{$src2, $dst|$dst, $src2}"),
[], IIC_ALU_MEM>, OpSize, LOCK;
def #NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2),
- !strconcat("lock\n\t", mnemonic, "{l}\t",
+ !strconcat(mnemonic, "{l}\t",
"{$src2, $dst|$dst, $src2}"),
[], IIC_ALU_MEM>, LOCK;
def #NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2),
- !strconcat("lock\n\t", mnemonic, "{q}\t",
+ !strconcat(mnemonic, "{q}\t",
"{$src2, $dst|$dst, $src2}"),
[], IIC_ALU_MEM>, LOCK;
@@ -666,16 +665,16 @@ multiclass LOCK_ArithUnOp<bits<8> Opc8, bits<8> Opc, Format Form,
let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in {
def #NAME#8m : I<Opc8, Form, (outs), (ins i8mem :$dst),
- !strconcat("lock\n\t", mnemonic, "{b}\t$dst"),
+ !strconcat(mnemonic, "{b}\t$dst"),
[], IIC_UNARY_MEM>, LOCK;
def #NAME#16m : I<Opc, Form, (outs), (ins i16mem:$dst),
- !strconcat("lock\n\t", mnemonic, "{w}\t$dst"),
+ !strconcat(mnemonic, "{w}\t$dst"),
[], IIC_UNARY_MEM>, OpSize, LOCK;
def #NAME#32m : I<Opc, Form, (outs), (ins i32mem:$dst),
- !strconcat("lock\n\t", mnemonic, "{l}\t$dst"),
+ !strconcat(mnemonic, "{l}\t$dst"),
[], IIC_UNARY_MEM>, LOCK;
def #NAME#64m : RI<Opc, Form, (outs), (ins i64mem:$dst),
- !strconcat("lock\n\t", mnemonic, "{q}\t$dst"),
+ !strconcat(mnemonic, "{q}\t$dst"),
[], IIC_UNARY_MEM>, LOCK;
}
}
@@ -689,7 +688,7 @@ multiclass LCMPXCHG_UnOp<bits<8> Opc, Format Form, string mnemonic,
InstrItinClass itin> {
let isCodeGenOnly = 1 in {
def #NAME# : I<Opc, Form, (outs), (ins x86memop:$ptr),
- !strconcat("lock\n\t", mnemonic, "\t$ptr"),
+ !strconcat(mnemonic, "\t$ptr"),
[(frag addr:$ptr)], itin>, TB, LOCK;
}
}
@@ -700,23 +699,19 @@ multiclass LCMPXCHG_BinOp<bits<8> Opc8, bits<8> Opc, Format Form,
let isCodeGenOnly = 1 in {
let Defs = [AL, EFLAGS], Uses = [AL] in
def #NAME#8 : I<Opc8, Form, (outs), (ins i8mem:$ptr, GR8:$swap),
- !strconcat("lock\n\t", mnemonic,
- "{b}\t{$swap, $ptr|$ptr, $swap}"),
+ !strconcat(mnemonic, "{b}\t{$swap, $ptr|$ptr, $swap}"),
[(frag addr:$ptr, GR8:$swap, 1)], itin8>, TB, LOCK;
let Defs = [AX, EFLAGS], Uses = [AX] in
def #NAME#16 : I<Opc, Form, (outs), (ins i16mem:$ptr, GR16:$swap),
- !strconcat("lock\n\t", mnemonic,
- "{w}\t{$swap, $ptr|$ptr, $swap}"),
+ !strconcat(mnemonic, "{w}\t{$swap, $ptr|$ptr, $swap}"),
[(frag addr:$ptr, GR16:$swap, 2)], itin>, TB, OpSize, LOCK;
let Defs = [EAX, EFLAGS], Uses = [EAX] in
def #NAME#32 : I<Opc, Form, (outs), (ins i32mem:$ptr, GR32:$swap),
- !strconcat("lock\n\t", mnemonic,
- "{l}\t{$swap, $ptr|$ptr, $swap}"),
+ !strconcat(mnemonic, "{l}\t{$swap, $ptr|$ptr, $swap}"),
[(frag addr:$ptr, GR32:$swap, 4)], itin>, TB, LOCK;
let Defs = [RAX, EFLAGS], Uses = [RAX] in
def #NAME#64 : RI<Opc, Form, (outs), (ins i64mem:$ptr, GR64:$swap),
- !strconcat("lock\n\t", mnemonic,
- "{q}\t{$swap, $ptr|$ptr, $swap}"),
+ !strconcat(mnemonic, "{q}\t{$swap, $ptr|$ptr, $swap}"),
[(frag addr:$ptr, GR64:$swap, 8)], itin>, TB, LOCK;
}
}
@@ -744,31 +739,27 @@ multiclass ATOMIC_LOAD_BINOP<bits<8> opc8, bits<8> opc, string mnemonic,
let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1 in {
def #NAME#8 : I<opc8, MRMSrcMem, (outs GR8:$dst),
(ins GR8:$val, i8mem:$ptr),
- !strconcat("lock\n\t", mnemonic,
- "{b}\t{$val, $ptr|$ptr, $val}"),
+ !strconcat(mnemonic, "{b}\t{$val, $ptr|$ptr, $val}"),
[(set GR8:$dst,
(!cast<PatFrag>(frag # "_8") addr:$ptr, GR8:$val))],
itin8>;
def #NAME#16 : I<opc, MRMSrcMem, (outs GR16:$dst),
(ins GR16:$val, i16mem:$ptr),
- !strconcat("lock\n\t", mnemonic,
- "{w}\t{$val, $ptr|$ptr, $val}"),
+ !strconcat(mnemonic, "{w}\t{$val, $ptr|$ptr, $val}"),
[(set
GR16:$dst,
(!cast<PatFrag>(frag # "_16") addr:$ptr, GR16:$val))],
itin>, OpSize;
def #NAME#32 : I<opc, MRMSrcMem, (outs GR32:$dst),
(ins GR32:$val, i32mem:$ptr),
- !strconcat("lock\n\t", mnemonic,
- "{l}\t{$val, $ptr|$ptr, $val}"),
+ !strconcat(mnemonic, "{l}\t{$val, $ptr|$ptr, $val}"),
[(set
GR32:$dst,
(!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val))],
itin>;
def #NAME#64 : RI<opc, MRMSrcMem, (outs GR64:$dst),
(ins GR64:$val, i64mem:$ptr),
- !strconcat("lock\n\t", mnemonic,
- "{q}\t{$val, $ptr|$ptr, $val}"),
+ !strconcat(mnemonic, "{q}\t{$val, $ptr|$ptr, $val}"),
[(set
GR64:$dst,
(!cast<PatFrag>(frag # "_64") addr:$ptr, GR64:$val))],
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index e595876dcf..af570adb79 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -566,6 +566,16 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VSQRTPSYr_Int, X86::VSQRTPSYm_Int, TB_ALIGN_32 },
{ X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrm, TB_NO_REVERSE },
{ X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrm, TB_NO_REVERSE },
+
+ // BMI/BMI2 foldable instructions
+ { X86::RORX32ri, X86::RORX32mi, 0 },
+ { X86::RORX64ri, X86::RORX64mi, 0 },
+ { X86::SARX32rr, X86::SARX32rm, 0 },
+ { X86::SARX64rr, X86::SARX64rm, 0 },
+ { X86::SHRX32rr, X86::SHRX32rm, 0 },
+ { X86::SHRX64rr, X86::SHRX64rm, 0 },
+ { X86::SHLX32rr, X86::SHLX32rm, 0 },
+ { X86::SHLX64rr, X86::SHLX64rm, 0 },
};
for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) {
@@ -1145,6 +1155,10 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4mr, TB_ALIGN_16 },
{ X86::VFMSUBADDPS4rrY, X86::VFMSUBADDPS4mrY, TB_ALIGN_32 },
{ X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4mrY, TB_ALIGN_32 },
+
+ // BMI/BMI2 foldable instructions
+ { X86::MULX32rr, X86::MULX32rm, 0 },
+ { X86::MULX64rr, X86::MULX64rm, 0 },
};
for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) {
@@ -3812,7 +3826,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
// Unless optimizing for size, don't fold to avoid partial
// register update stalls
- if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize) &&
+ if (!MF.getFunction()->getFnAttributes().hasOptimizeForSizeAttr() &&
hasPartialRegUpdate(MI->getOpcode()))
return 0;
@@ -3853,7 +3867,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
// Unless optimizing for size, don't fold to avoid partial
// register update stalls
- if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize) &&
+ if (!MF.getFunction()->getFnAttributes().hasOptimizeForSizeAttr() &&
hasPartialRegUpdate(MI->getOpcode()))
return 0;
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 4fce5acc23..5074724fb8 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -568,17 +568,17 @@ def HasMMX : Predicate<"Subtarget->hasMMX()">;
def Has3DNow : Predicate<"Subtarget->has3DNow()">;
def Has3DNowA : Predicate<"Subtarget->has3DNowA()">;
def HasSSE1 : Predicate<"Subtarget->hasSSE1()">;
-def UseSSE1 : Predicate<"Subtarget->hasSSE1() && Subtarget->hasNoAVX()">;
+def UseSSE1 : Predicate<"Subtarget->hasSSE1() && !Subtarget->hasAVX()">;
def HasSSE2 : Predicate<"Subtarget->hasSSE2()">;
-def UseSSE2 : Predicate<"Subtarget->hasSSE2() && Subtarget->hasNoAVX()">;
+def UseSSE2 : Predicate<"Subtarget->hasSSE2() && !Subtarget->hasAVX()">;
def HasSSE3 : Predicate<"Subtarget->hasSSE3()">;
-def UseSSE3 : Predicate<"Subtarget->hasSSE3() && Subtarget->hasNoAVX()">;
+def UseSSE3 : Predicate<"Subtarget->hasSSE3() && !Subtarget->hasAVX()">;
def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">;
-def UseSSSE3 : Predicate<"Subtarget->hasSSSE3() && Subtarget->hasNoAVX()">;
+def UseSSSE3 : Predicate<"Subtarget->hasSSSE3() && !Subtarget->hasAVX()">;
def HasSSE41 : Predicate<"Subtarget->hasSSE41()">;
-def UseSSE41 : Predicate<"Subtarget->hasSSE41() && Subtarget->hasNoAVX()">;
+def UseSSE41 : Predicate<"Subtarget->hasSSE41() && !Subtarget->hasAVX()">;
def HasSSE42 : Predicate<"Subtarget->hasSSE42()">;
-def UseSSE42 : Predicate<"Subtarget->hasSSE42() && Subtarget->hasNoAVX()">;
+def UseSSE42 : Predicate<"Subtarget->hasSSE42() && !Subtarget->hasAVX()">;
def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">;
def HasAVX : Predicate<"Subtarget->hasAVX()">;
def HasAVX2 : Predicate<"Subtarget->hasAVX2()">;
diff --git a/lib/Target/X86/X86InstrShiftRotate.td b/lib/Target/X86/X86InstrShiftRotate.td
index bdeb63ffbd..893488c159 100644
--- a/lib/Target/X86/X86InstrShiftRotate.td
+++ b/lib/Target/X86/X86InstrShiftRotate.td
@@ -839,6 +839,16 @@ def SHRD64mri8 : RIi8<0xAC, MRMDestMem,
} // Defs = [EFLAGS]
+def ROT32L2R_imm8 : SDNodeXForm<imm, [{
+ // Convert a ROTL shamt to a ROTR shamt on 32-bit integer.
+ return getI8Imm(32 - N->getZExtValue());
+}]>;
+
+def ROT64L2R_imm8 : SDNodeXForm<imm, [{
+ // Convert a ROTL shamt to a ROTR shamt on 64-bit integer.
+ return getI8Imm(64 - N->getZExtValue());
+}]>;
+
multiclass bmi_rotate<string asm, RegisterClass RC, X86MemOperand x86memop> {
let neverHasSideEffects = 1 in {
def ri : Ii8<0xF0, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, i8imm:$src2),
@@ -873,4 +883,72 @@ let Predicates = [HasBMI2] in {
defm SHRX64 : bmi_shift<"shrx{q}", GR64, i64mem>, T8XD, VEX_W;
defm SHLX32 : bmi_shift<"shlx{l}", GR32, i32mem>, T8, OpSize;
defm SHLX64 : bmi_shift<"shlx{q}", GR64, i64mem>, T8, OpSize, VEX_W;
+
+ // Prefer RORX which is non-destructive and doesn't update EFLAGS.
+ let AddedComplexity = 10 in {
+ def : Pat<(rotl GR32:$src, (i8 imm:$shamt)),
+ (RORX32ri GR32:$src, (ROT32L2R_imm8 imm:$shamt))>;
+ def : Pat<(rotl GR64:$src, (i8 imm:$shamt)),
+ (RORX64ri GR64:$src, (ROT64L2R_imm8 imm:$shamt))>;
+ }
+
+ def : Pat<(rotl (loadi32 addr:$src), (i8 imm:$shamt)),
+ (RORX32mi addr:$src, (ROT32L2R_imm8 imm:$shamt))>;
+ def : Pat<(rotl (loadi64 addr:$src), (i8 imm:$shamt)),
+ (RORX64mi addr:$src, (ROT64L2R_imm8 imm:$shamt))>;
+
+ // Prefer SARX/SHRX/SHLX over SAR/SHR/SHL with variable shift BUT not
+ // immedidate shift, i.e. the following code is considered better
+ //
+ // mov %edi, %esi
+ // shl $imm, %esi
+ // ... %edi, ...
+ //
+ // than
+ //
+ // movb $imm, %sil
+ // shlx %sil, %edi, %esi
+ // ... %edi, ...
+ //
+ let AddedComplexity = 1 in {
+ def : Pat<(sra GR32:$src1, GR8:$src2),
+ (SARX32rr GR32:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(sra GR64:$src1, GR8:$src2),
+ (SARX64rr GR64:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+ def : Pat<(srl GR32:$src1, GR8:$src2),
+ (SHRX32rr GR32:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(srl GR64:$src1, GR8:$src2),
+ (SHRX64rr GR64:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+ def : Pat<(shl GR32:$src1, GR8:$src2),
+ (SHLX32rr GR32:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(shl GR64:$src1, GR8:$src2),
+ (SHLX64rr GR64:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ }
+
+ // Patterns on SARXrm/SHRXrm/SHLXrm are explicitly omitted to favor
+ //
+ // mov (%ecx), %esi
+ // shl $imm, $esi
+ //
+ // over
+ //
+ // movb $imm %al
+ // shlx %al, (%ecx), %esi
+ //
+ // As SARXrr/SHRXrr/SHLXrr is favored on variable shift, the peephole
+ // optimization will fold them into SARXrm/SHRXrm/SHLXrm if possible.
}
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 582f5e99ff..262d32e4e6 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -423,7 +423,7 @@ bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
const Function *F = MF.getFunction();
unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
- F->hasFnAttr(Attribute::StackAlignment));
+ F->getFnAttributes().hasStackAlignmentAttr());
// If we've requested that we force align the stack do so now.
if (ForceStackAlign)
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 4c7b8fc4de..921ded8f2d 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -205,7 +205,6 @@ public:
bool hasSSE42() const { return X86SSELevel >= SSE42; }
bool hasAVX() const { return X86SSELevel >= AVX; }
bool hasAVX2() const { return X86SSELevel >= AVX2; }
- bool hasNoAVX() const { return X86SSELevel < AVX; }
bool hasSSE4A() const { return HasSSE4A; }
bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index b94dd69deb..10f5b6e658 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -592,14 +592,6 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
Type *RetTy = FTy->getReturnType();
- // Work around LLVM bug PR56: the CWriter cannot emit varargs functions which
- // have zero fixed arguments.
- bool ExtraArgHack = false;
- if (Params.empty() && FTy->isVarArg()) {
- ExtraArgHack = true;
- Params.push_back(Type::getInt32Ty(F->getContext()));
- }
-
// Construct the new function type using the new arguments.
FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg());
@@ -711,9 +703,6 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
}
}
- if (ExtraArgHack)
- Args.push_back(Constant::getNullValue(Type::getInt32Ty(F->getContext())));
-
// Push any varargs arguments on the list.
for (; AI != CS.arg_end(); ++AI, ++ArgIndex) {
Args.push_back(*AI);
@@ -870,16 +859,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
}
// Increment I2 past all of the arguments added for this promoted pointer.
- for (unsigned i = 0, e = ArgIndices.size(); i != e; ++i)
- ++I2;
+ std::advance(I2, ArgIndices.size());
}
- // Notify the alias analysis implementation that we inserted a new argument.
- if (ExtraArgHack)
- AA.copyValue(Constant::getNullValue(Type::getInt32Ty(F->getContext())),
- NF->arg_begin());
-
-
// Tell the alias analysis that the old function is about to disappear.
AA.replaceWithNewValue(F, NF);
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index fd23a935b9..c7429c5954 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -717,9 +717,9 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
// here. Currently, this should not be possible, but special handling might be
// required when new return value attributes are added.
if (NRetTy->isVoidTy())
- RAttrs &= ~Attribute::typeIncompatible(NRetTy);
+ RAttrs &= ~Attributes::typeIncompatible(NRetTy);
else
- assert((RAttrs & Attribute::typeIncompatible(NRetTy)) == 0
+ assert((RAttrs & Attributes::typeIncompatible(NRetTy)) == 0
&& "Return attributes no longer compatible?");
if (RAttrs)
@@ -786,7 +786,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
Attributes RAttrs = CallPAL.getRetAttributes();
Attributes FnAttrs = CallPAL.getFnAttributes();
// Adjust in case the function was changed to return void.
- RAttrs &= ~Attribute::typeIncompatible(NF->getReturnType());
+ RAttrs &= ~Attributes::typeIncompatible(NF->getReturnType());
if (RAttrs)
AttributesVec.push_back(AttributeWithIndex::get(0, RAttrs));
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index b888e95982..b1ba6be5ff 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -962,7 +962,9 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
// If we get here we could have other crazy uses that are transitively
// loaded.
assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) ||
- isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser)) &&
+ isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser) ||
+ isa<BitCastInst>(GlobalUser) ||
+ isa<GetElementPtrInst>(GlobalUser)) &&
"Only expect load and stores!");
}
}
diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp
index 664ddf6f7a..42f0991360 100644
--- a/lib/Transforms/IPO/InlineAlways.cpp
+++ b/lib/Transforms/IPO/InlineAlways.cpp
@@ -65,7 +65,7 @@ Pass *llvm::createAlwaysInlinerPass(bool InsertLifetime) {
/// \brief Minimal filter to detect invalid constructs for inlining.
static bool isInlineViable(Function &F) {
- bool ReturnsTwice = F.hasFnAttr(Attribute::ReturnsTwice);
+ bool ReturnsTwice = F.getFnAttributes().hasReturnsTwiceAttr();
for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
// Disallow inlining of functions which contain an indirect branch.
if (isa<IndirectBrInst>(BI->getTerminator()))
@@ -114,7 +114,7 @@ InlineCost AlwaysInliner::getInlineCost(CallSite CS) {
if (Callee->isDeclaration()) return InlineCost::getNever();
// Return never for anything not marked as always inline.
- if (!Callee->hasFnAttr(Attribute::AlwaysInline))
+ if (!Callee->getFnAttributes().hasAlwaysInlineAttr())
return InlineCost::getNever();
// Do some minimal analysis to preclude non-viable functions.
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index a9263baa44..7932b40bdc 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -93,10 +93,10 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
// If the inlined function had a higher stack protection level than the
// calling function, then bump up the caller's stack protection level.
- if (Callee->hasFnAttr(Attribute::StackProtectReq))
+ if (Callee->getFnAttributes().hasStackProtectReqAttr())
Caller->addFnAttr(Attribute::StackProtectReq);
- else if (Callee->hasFnAttr(Attribute::StackProtect) &&
- !Caller->hasFnAttr(Attribute::StackProtectReq))
+ else if (Callee->getFnAttributes().hasStackProtectAttr() &&
+ !Caller->getFnAttributes().hasStackProtectReqAttr())
Caller->addFnAttr(Attribute::StackProtect);
// Look at all of the allocas that we inlined through this call site. If we
@@ -209,7 +209,7 @@ unsigned Inliner::getInlineThreshold(CallSite CS) const {
// would decrease the threshold.
Function *Caller = CS.getCaller();
bool OptSize = Caller && !Caller->isDeclaration() &&
- Caller->hasFnAttr(Attribute::OptimizeForSize);
+ Caller->getFnAttributes().hasOptimizeForSizeAttr();
if (!(InlineLimit.getNumOccurrences() > 0) && OptSize &&
OptSizeThreshold < thres)
thres = OptSizeThreshold;
@@ -217,7 +217,7 @@ unsigned Inliner::getInlineThreshold(CallSite CS) const {
// Listen to the inlinehint attribute when it would increase the threshold.
Function *Callee = CS.getCalledFunction();
bool InlineHint = Callee && !Callee->isDeclaration() &&
- Callee->hasFnAttr(Attribute::InlineHint);
+ Callee->getFnAttributes().hasInlineHintAttr();
if (InlineHint && HintThreshold > thres)
thres = HintThreshold;
@@ -533,7 +533,7 @@ bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) {
// Handle the case when this function is called and we only want to care
// about always-inline functions. This is a bit of a hack to share code
// between here and the InlineAlways pass.
- if (AlwaysInlineOnly && !F->hasFnAttr(Attribute::AlwaysInline))
+ if (AlwaysInlineOnly && !F->getFnAttributes().hasAlwaysInlineAttr())
continue;
// If the only remaining users of the function are dead constants, remove
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index c81b333813..9e328b9ac9 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -211,13 +211,12 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
// FIXME: We shouldn't bother with this anymore.
MPM.add(createStripDeadPrototypesPass()); // Get rid of dead prototypes
- // GlobalOpt already deletes dead functions and globals, at -O3 try a
+ // GlobalOpt already deletes dead functions and globals, at -O2 try a
// late pass of GlobalDCE. It is capable of deleting dead cycles.
- if (OptLevel > 2)
+ if (OptLevel > 1) {
MPM.add(createGlobalDCEPass()); // Remove dead fns and globals.
-
- if (OptLevel > 1)
MPM.add(createConstantMergePass()); // Merge dup global constants
+ }
}
addExtensionsToPM(EP_OptimizerLast, MPM);
}
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 23c08699ff..ac30dcdcbf 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1037,7 +1037,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
Attributes RAttrs = CallerPAL.getRetAttributes();
- if (RAttrs & Attribute::typeIncompatible(NewRetTy))
+ if (RAttrs & Attributes::typeIncompatible(NewRetTy))
return false; // Attribute not compatible with transformed value.
}
@@ -1067,7 +1067,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
return false; // Cannot transform this parameter value.
Attributes Attrs = CallerPAL.getParamAttributes(i + 1);
- if (Attrs & Attribute::typeIncompatible(ParamTy))
+ if (Attrs & Attributes::typeIncompatible(ParamTy))
return false; // Attribute not compatible with transformed value.
// If the parameter is passed as a byval argument, then we have to have a
@@ -1141,7 +1141,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
// If the return value is not being used, the type may not be compatible
// with the existing attributes. Wipe out any problematic attributes.
- RAttrs &= ~Attribute::typeIncompatible(NewRetTy);
+ RAttrs &= ~Attributes::typeIncompatible(NewRetTy);
// Add the new return attributes.
if (RAttrs)
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 5b6cf4a4a8..a446e427e5 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -264,26 +264,28 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
}
}
- // Check to see if this allocation is only modified by a memcpy/memmove from
- // a constant global whose alignment is equal to or exceeds that of the
- // allocation. If this is the case, we can change all users to use
- // the constant global instead. This is commonly produced by the CFE by
- // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
- // is only subsequently read.
- SmallVector<Instruction *, 4> ToDelete;
- if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) {
- if (AI.getAlignment() <= getPointeeAlignment(Copy->getSource(), *TD)) {
- DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
- DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
- for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
- EraseInstFromFunction(*ToDelete[i]);
- Constant *TheSrc = cast<Constant>(Copy->getSource());
- Instruction *NewI
- = ReplaceInstUsesWith(AI, ConstantExpr::getBitCast(TheSrc,
- AI.getType()));
- EraseInstFromFunction(*Copy);
- ++NumGlobalCopies;
- return NewI;
+ if (TD) {
+ // Check to see if this allocation is only modified by a memcpy/memmove from
+ // a constant global whose alignment is equal to or exceeds that of the
+ // allocation. If this is the case, we can change all users to use
+ // the constant global instead. This is commonly produced by the CFE by
+ // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
+ // is only subsequently read.
+ SmallVector<Instruction *, 4> ToDelete;
+ if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) {
+ if (AI.getAlignment() <= getPointeeAlignment(Copy->getSource(), *TD)) {
+ DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
+ DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
+ for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
+ EraseInstFromFunction(*ToDelete[i]);
+ Constant *TheSrc = cast<Constant>(Copy->getSource());
+ Instruction *NewI
+ = ReplaceInstUsesWith(AI, ConstantExpr::getBitCast(TheSrc,
+ AI.getType()));
+ EraseInstFromFunction(*Copy);
+ ++NumGlobalCopies;
+ return NewI;
+ }
}
}
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 291e80019e..0ba7340e64 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -903,7 +903,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
return &SI;
}
- if (VectorType* VecTy = dyn_cast<VectorType>(SI.getType())) {
+ if (VectorType *VecTy = dyn_cast<VectorType>(SI.getType())) {
unsigned VWidth = VecTy->getNumElements();
APInt UndefElts(VWidth, 0);
APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
@@ -912,6 +912,28 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
return ReplaceInstUsesWith(SI, V);
return &SI;
}
+
+ if (ConstantVector *CV = dyn_cast<ConstantVector>(CondVal)) {
+ // Form a shufflevector instruction.
+ SmallVector<Constant *, 8> Mask(VWidth);
+ Type *Int32Ty = Type::getInt32Ty(CV->getContext());
+ for (unsigned i = 0; i != VWidth; ++i) {
+ Constant *Elem = cast<Constant>(CV->getOperand(i));
+ if (ConstantInt *E = dyn_cast<ConstantInt>(Elem))
+ Mask[i] = ConstantInt::get(Int32Ty, i + (E->isZero() ? VWidth : 0));
+ else if (isa<UndefValue>(Elem))
+ Mask[i] = UndefValue::get(Int32Ty);
+ else
+ return 0;
+ }
+ Constant *MaskVal = ConstantVector::get(Mask);
+ Value *V = Builder->CreateShuffleVector(TrueVal, FalseVal, MaskVal);
+ return ReplaceInstUsesWith(SI, V);
+ }
+
+ if (isa<ConstantAggregateZero>(CondVal)) {
+ return ReplaceInstUsesWith(SI, FalseVal);
+ }
}
return 0;
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index afa6a4b5e6..1b102bd243 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -854,7 +854,7 @@ bool AddressSanitizer::handleFunction(Module &M, Function &F) {
// If needed, insert __asan_init before checking for AddressSafety attr.
maybeInsertAsanInitAtFunctionEntry(F);
- if (!F.hasFnAttr(Attribute::AddressSafety)) return false;
+ if (!F.getFnAttributes().hasAddressSafetyAttr()) return false;
if (!ClDebugFunc.empty() && ClDebugFunc != F.getName())
return false;
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 495cdc6321..305d70f27b 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -149,7 +149,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
TLInfo = &getAnalysis<TargetLibraryInfo>();
DT = getAnalysisIfAvailable<DominatorTree>();
PFI = getAnalysisIfAvailable<ProfileInfo>();
- OptSize = F.hasFnAttr(Attribute::OptimizeForSize);
+ OptSize = F.getFnAttributes().hasOptimizeForSizeAttr();
/// This optimization identifies DIV instructions that can be
/// profitably bypassed and carried out with a shorter, faster divide.
@@ -226,7 +226,8 @@ bool CodeGenPrepare::EliminateFallThrough(Function &F) {
// edge, just collapse it.
BasicBlock *SinglePred = BB->getSinglePredecessor();
- if (!SinglePred || SinglePred == BB) continue;
+ // Don't merge if BB's address is taken.
+ if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue;
BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
if (Term && !Term->isConditional()) {
@@ -788,7 +789,7 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) {
}
// If we eliminated all predecessors of the block, delete the block now.
- if (Changed && pred_begin(BB) == pred_end(BB))
+ if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB))
BB->eraseFromParent();
return Changed;
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 9b0aadb0b5..3ec6f3dcc3 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -235,6 +235,11 @@ bool CorrelatedValuePropagation::processSwitch(SwitchInst *SI) {
// This case never fires - remove it.
CI.getCaseSuccessor()->removePredecessor(BB);
SI->removeCase(CI); // Does not invalidate the iterator.
+
+ // The condition can be modified by removePredecessor's PHI simplification
+ // logic.
+ Cond = SI->getCondition();
+
++NumDeadCases;
Changed = true;
} else if (State == LazyValueInfo::True) {
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 1ff4329c84..301ee2f663 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -30,6 +30,7 @@
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Support/Debug.h"
#include "llvm/ADT/SetVector.h"
@@ -45,6 +46,7 @@ namespace {
AliasAnalysis *AA;
MemoryDependenceAnalysis *MD;
DominatorTree *DT;
+ const TargetLibraryInfo *TLI;
static char ID; // Pass identification, replacement for typeid
DSE() : FunctionPass(ID), AA(0), MD(0), DT(0) {
@@ -55,6 +57,7 @@ namespace {
AA = &getAnalysis<AliasAnalysis>();
MD = &getAnalysis<MemoryDependenceAnalysis>();
DT = &getAnalysis<DominatorTree>();
+ TLI = AA->getTargetLibraryInfo();
bool Changed = false;
for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
@@ -144,7 +147,7 @@ static void DeleteDeadInstruction(Instruction *I,
/// hasMemoryWrite - Does this instruction write some memory? This only returns
/// true for things that we can analyze with other helpers below.
-static bool hasMemoryWrite(Instruction *I) {
+static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo *TLI) {
if (isa<StoreInst>(I))
return true;
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
@@ -159,6 +162,26 @@ static bool hasMemoryWrite(Instruction *I) {
return true;
}
}
+ if (CallSite CS = I) {
+ if (Function *F = CS.getCalledFunction()) {
+ if (TLI && TLI->has(LibFunc::strcpy) &&
+ F->getName() == TLI->getName(LibFunc::strcpy)) {
+ return true;
+ }
+ if (TLI && TLI->has(LibFunc::strncpy) &&
+ F->getName() == TLI->getName(LibFunc::strncpy)) {
+ return true;
+ }
+ if (TLI && TLI->has(LibFunc::strcat) &&
+ F->getName() == TLI->getName(LibFunc::strcat)) {
+ return true;
+ }
+ if (TLI && TLI->has(LibFunc::strncat) &&
+ F->getName() == TLI->getName(LibFunc::strncat)) {
+ return true;
+ }
+ }
+ }
return false;
}
@@ -206,7 +229,8 @@ getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
/// instruction if any.
static AliasAnalysis::Location
getLocForRead(Instruction *Inst, AliasAnalysis &AA) {
- assert(hasMemoryWrite(Inst) && "Unknown instruction case");
+ assert(hasMemoryWrite(Inst, AA.getTargetLibraryInfo()) &&
+ "Unknown instruction case");
// The only instructions that both read and write are the mem transfer
// instructions (memcpy/memmove).
@@ -223,23 +247,29 @@ static bool isRemovable(Instruction *I) {
if (StoreInst *SI = dyn_cast<StoreInst>(I))
return SI->isUnordered();
- IntrinsicInst *II = cast<IntrinsicInst>(I);
- switch (II->getIntrinsicID()) {
- default: llvm_unreachable("doesn't pass 'hasMemoryWrite' predicate");
- case Intrinsic::lifetime_end:
- // Never remove dead lifetime_end's, e.g. because it is followed by a
- // free.
- return false;
- case Intrinsic::init_trampoline:
- // Always safe to remove init_trampoline.
- return true;
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ default: llvm_unreachable("doesn't pass 'hasMemoryWrite' predicate");
+ case Intrinsic::lifetime_end:
+ // Never remove dead lifetime_end's, e.g. because it is followed by a
+ // free.
+ return false;
+ case Intrinsic::init_trampoline:
+ // Always safe to remove init_trampoline.
+ return true;
- case Intrinsic::memset:
- case Intrinsic::memmove:
- case Intrinsic::memcpy:
- // Don't remove volatile memory intrinsics.
- return !cast<MemIntrinsic>(II)->isVolatile();
+ case Intrinsic::memset:
+ case Intrinsic::memmove:
+ case Intrinsic::memcpy:
+ // Don't remove volatile memory intrinsics.
+ return !cast<MemIntrinsic>(II)->isVolatile();
+ }
}
+
+ if (CallSite CS = I)
+ return CS.getInstruction()->use_empty();
+
+ return false;
}
@@ -250,14 +280,19 @@ static bool isShortenable(Instruction *I) {
if (isa<StoreInst>(I))
return false;
- IntrinsicInst *II = cast<IntrinsicInst>(I);
- switch (II->getIntrinsicID()) {
- default: return false;
- case Intrinsic::memset:
- case Intrinsic::memcpy:
- // Do shorten memory intrinsics.
- return true;
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ default: return false;
+ case Intrinsic::memset:
+ case Intrinsic::memcpy:
+ // Do shorten memory intrinsics.
+ return true;
+ }
}
+
+ // Don't shorten libcalls calls for now.
+
+ return false;
}
/// getStoredPointerOperand - Return the pointer that is being written to.
@@ -267,12 +302,18 @@ static Value *getStoredPointerOperand(Instruction *I) {
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
return MI->getDest();
- IntrinsicInst *II = cast<IntrinsicInst>(I);
- switch (II->getIntrinsicID()) {
- default: llvm_unreachable("Unexpected intrinsic!");
- case Intrinsic::init_trampoline:
- return II->getArgOperand(0);
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ default: llvm_unreachable("Unexpected intrinsic!");
+ case Intrinsic::init_trampoline:
+ return II->getArgOperand(0);
+ }
}
+
+ CallSite CS = I;
+ // All the supported functions so far happen to have dest as their first
+ // argument.
+ return CS.getArgument(0);
}
static uint64_t getPointerSize(const Value *V, AliasAnalysis &AA) {
@@ -455,13 +496,13 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
Instruction *Inst = BBI++;
// Handle 'free' calls specially.
- if (CallInst *F = isFreeCall(Inst, AA->getTargetLibraryInfo())) {
+ if (CallInst *F = isFreeCall(Inst, TLI)) {
MadeChange |= HandleFree(F);
continue;
}
// If we find something that writes memory, get its memory dependence.
- if (!hasMemoryWrite(Inst))
+ if (!hasMemoryWrite(Inst, TLI))
continue;
MemDepResult InstDep = MD->getDependency(Inst);
@@ -484,7 +525,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
// in case we need it.
WeakVH NextInst(BBI);
- DeleteDeadInstruction(SI, *MD, AA->getTargetLibraryInfo());
+ DeleteDeadInstruction(SI, *MD, TLI);
if (NextInst == 0) // Next instruction deleted.
BBI = BB.begin();
@@ -531,7 +572,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
<< *DepWrite << "\n KILLER: " << *Inst << '\n');
// Delete the store and now-dead instructions that feed it.
- DeleteDeadInstruction(DepWrite, *MD, AA->getTargetLibraryInfo());
+ DeleteDeadInstruction(DepWrite, *MD, TLI);
++NumFastStores;
MadeChange = true;
@@ -628,7 +669,7 @@ bool DSE::HandleFree(CallInst *F) {
MemDepResult Dep = MD->getPointerDependencyFrom(Loc, false, InstPt, BB);
while (Dep.isDef() || Dep.isClobber()) {
Instruction *Dependency = Dep.getInst();
- if (!hasMemoryWrite(Dependency) || !isRemovable(Dependency))
+ if (!hasMemoryWrite(Dependency, TLI) || !isRemovable(Dependency))
break;
Value *DepPointer =
@@ -641,7 +682,7 @@ bool DSE::HandleFree(CallInst *F) {
Instruction *Next = llvm::next(BasicBlock::iterator(Dependency));
// DCE instructions only used to calculate that store
- DeleteDeadInstruction(Dependency, *MD, AA->getTargetLibraryInfo());
+ DeleteDeadInstruction(Dependency, *MD, TLI);
++NumFastStores;
MadeChange = true;
@@ -681,8 +722,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
// Okay, so these are dead heap objects, but if the pointer never escapes
// then it's leaked by this function anyways.
- else if (isAllocLikeFn(I, AA->getTargetLibraryInfo()) &&
- !PointerMayBeCaptured(I, true, true))
+ else if (isAllocLikeFn(I, TLI) && !PointerMayBeCaptured(I, true, true))
DeadStackObjects.insert(I);
}
@@ -698,7 +738,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
--BBI;
// If we find a store, check to see if it points into a dead stack value.
- if (hasMemoryWrite(BBI) && isRemovable(BBI)) {
+ if (hasMemoryWrite(BBI, TLI) && isRemovable(BBI)) {
// See through pointer-to-pointer bitcasts
SmallVector<Value *, 4> Pointers;
GetUnderlyingObjects(getStoredPointerOperand(BBI), Pointers);
@@ -726,8 +766,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
dbgs() << '\n');
// DCE instructions only used to calculate that store.
- DeleteDeadInstruction(Dead, *MD, AA->getTargetLibraryInfo(),
- &DeadStackObjects);
+ DeleteDeadInstruction(Dead, *MD, TLI, &DeadStackObjects);
++NumFastStores;
MadeChange = true;
continue;
@@ -735,10 +774,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
}
// Remove any dead non-memory-mutating instructions.
- if (isInstructionTriviallyDead(BBI, AA->getTargetLibraryInfo())) {
+ if (isInstructionTriviallyDead(BBI, TLI)) {
Instruction *Inst = BBI++;
- DeleteDeadInstruction(Inst, *MD, AA->getTargetLibraryInfo(),
- &DeadStackObjects);
+ DeleteDeadInstruction(Inst, *MD, TLI, &DeadStackObjects);
++NumFastOther;
MadeChange = true;
continue;
@@ -754,7 +792,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
if (CallSite CS = cast<Value>(BBI)) {
// Remove allocation function calls from the list of dead stack objects;
// there can't be any references before the definition.
- if (isAllocLikeFn(BBI, AA->getTargetLibraryInfo()))
+ if (isAllocLikeFn(BBI, TLI))
DeadStackObjects.remove(BBI);
// If this call does not access memory, it can't be loading any of our
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 09a186f7f9..f8709a537f 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -145,7 +145,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
// not user specified.
unsigned Threshold = CurrentThreshold;
if (!UserThreshold &&
- Header->getParent()->hasFnAttr(Attribute::OptimizeForSize))
+ Header->getParent()->getFnAttributes().hasOptimizeForSizeAttr())
Threshold = OptSizeUnrollThreshold;
// Find trip count and trip multiple if count is not available
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 58f7739888..74c8f43ec2 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -638,7 +638,7 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) {
// Check to see if it would be profitable to unswitch current loop.
// Do not do non-trivial unswitch while optimizing for size.
- if (OptimizeForSize || F->hasFnAttr(Attribute::OptimizeForSize))
+ if (OptimizeForSize || F->getFnAttributes().hasOptimizeForSizeAttr())
return false;
UnswitchNontrivialCondition(LoopCond, Val, currentLoop);
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index e3182d319c..a8dc0533bf 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -202,11 +202,11 @@ public:
use_iterator use_begin(const_iterator I) { return Uses[I - begin()].begin(); }
use_iterator use_end(unsigned Idx) { return Uses[Idx].end(); }
use_iterator use_end(const_iterator I) { return Uses[I - begin()].end(); }
- void use_insert(unsigned Idx, use_iterator UI, const PartitionUse &U) {
- Uses[Idx].insert(UI, U);
+ void use_push_back(unsigned Idx, const PartitionUse &U) {
+ Uses[Idx].push_back(U);
}
- void use_insert(const_iterator I, use_iterator UI, const PartitionUse &U) {
- Uses[I - begin()].insert(UI, U);
+ void use_push_back(const_iterator I, const PartitionUse &U) {
+ Uses[I - begin()].push_back(U);
}
void use_erase(unsigned Idx, use_iterator UI) { Uses[Idx].erase(UI); }
void use_erase(const_iterator I, use_iterator UI) {
@@ -522,8 +522,10 @@ private:
void insertUse(Instruction &I, int64_t Offset, uint64_t Size,
bool IsSplittable = false) {
- // Completely skip uses which don't overlap the allocation.
- if ((Offset >= 0 && (uint64_t)Offset >= AllocSize) ||
+ // Completely skip uses which have a zero size or don't overlap the
+ // allocation.
+ if (Size == 0 ||
+ (Offset >= 0 && (uint64_t)Offset >= AllocSize) ||
(Offset < 0 && (uint64_t)-Offset >= Size)) {
DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte use @" << Offset
<< " which starts past the end of the " << AllocSize
@@ -660,11 +662,14 @@ private:
bool Inserted = false;
llvm::tie(PMI, Inserted)
= MemTransferPartitionMap.insert(std::make_pair(&II, NewIdx));
- if (!Inserted && Offsets.IsSplittable) {
+ if (Offsets.IsSplittable &&
+ (!Inserted || II.getRawSource() == II.getRawDest())) {
// We've found a memory transfer intrinsic which refers to the alloca as
- // both a source and dest. We refuse to split these to simplify splitting
- // logic. If possible, SROA will still split them into separate allocas
- // and then re-analyze.
+ // both a source and dest. This is detected either by direct equality of
+ // the operand values, or when we visit the intrinsic twice due to two
+ // different chains of values leading to it. We refuse to split these to
+ // simplify splitting logic. If possible, SROA will still split them into
+ // separate allocas and then re-analyze.
Offsets.IsSplittable = false;
P.Partitions[PMI->second].IsSplittable = false;
P.Partitions[NewIdx].IsSplittable = false;
@@ -697,6 +702,9 @@ private:
SmallVector<std::pair<Instruction *, Instruction *>, 4> Uses;
Visited.insert(Root);
Uses.push_back(std::make_pair(cast<Instruction>(*U), Root));
+ // If there are no loads or stores, the access is dead. We mark that as
+ // a size zero access.
+ Size = 0;
do {
Instruction *I, *UsedI;
llvm::tie(UsedI, I) = Uses.pop_back_val();
@@ -824,9 +832,9 @@ private:
}
void insertUse(Instruction &User, int64_t Offset, uint64_t Size) {
- // If the use extends outside of the allocation, record it as a dead use
- // for elimination later.
- if ((uint64_t)Offset >= AllocSize ||
+ // If the use has a zero size or extends outside of the allocation, record
+ // it as a dead use for elimination later.
+ if (Size == 0 || (uint64_t)Offset >= AllocSize ||
(Offset < 0 && (uint64_t)-Offset >= Size))
return markAsDead(User);
@@ -853,7 +861,7 @@ private:
PartitionUse NewUse(std::max(I->BeginOffset, BeginOffset),
std::min(I->EndOffset, EndOffset),
&User, cast<Instruction>(*U));
- P.Uses[I - P.begin()].push_back(NewUse);
+ P.use_push_back(I, NewUse);
if (isa<PHINode>(U->getUser()) || isa<SelectInst>(U->getUser()))
P.PHIOrSelectOpMap[std::make_pair(&User, U->get())]
= std::make_pair(I - P.begin(), P.Uses[I - P.begin()].size() - 1);
@@ -1102,8 +1110,6 @@ AllocaPartitioning::AllocaPartitioning(const TargetData &TD, AllocaInst &AI)
Uses.resize(Partitions.size());
UseBuilder UB(TD, AI, *this);
UB();
- for (iterator I = Partitions.begin(), E = Partitions.end(); I != E; ++I)
- std::stable_sort(use_begin(I), use_end(I));
}
Type *AllocaPartitioning::getCommonType(iterator I) const {
@@ -1890,7 +1896,8 @@ private:
Value *extractInteger(IRBuilder<> &IRB, IntegerType *TargetTy,
uint64_t Offset) {
assert(IntPromotionTy && "Alloca is not an integer we can extract from");
- Value *V = IRB.CreateLoad(&NewAI, getName(".load"));
+ Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ getName(".load"));
assert(Offset >= NewAllocaBeginOffset && "Out of bounds offset");
uint64_t RelOffset = Offset - NewAllocaBeginOffset;
if (RelOffset)
@@ -1906,7 +1913,7 @@ private:
StoreInst *insertInteger(IRBuilder<> &IRB, Value *V, uint64_t Offset) {
IntegerType *Ty = cast<IntegerType>(V->getType());
if (Ty == IntPromotionTy)
- return IRB.CreateStore(V, &NewAI);
+ return IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
assert(Ty->getBitWidth() < IntPromotionTy->getBitWidth() &&
"Cannot insert a larger integer!");
@@ -1918,10 +1925,12 @@ private:
APInt Mask = ~Ty->getMask().zext(IntPromotionTy->getBitWidth())
.shl(RelOffset*8);
- Value *Old = IRB.CreateAnd(IRB.CreateLoad(&NewAI, getName(".oldload")),
+ Value *Old = IRB.CreateAnd(IRB.CreateAlignedLoad(&NewAI,
+ NewAI.getAlignment(),
+ getName(".oldload")),
Mask, getName(".mask"));
- return IRB.CreateStore(IRB.CreateOr(Old, V, getName(".insert")),
- &NewAI);
+ return IRB.CreateAlignedStore(IRB.CreateOr(Old, V, getName(".insert")),
+ &NewAI, NewAI.getAlignment());
}
void deleteIfTriviallyDead(Value *V) {
@@ -1943,12 +1952,12 @@ private:
Value *Result;
if (LI.getType() == VecTy->getElementType() ||
BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) {
- Result
- = IRB.CreateExtractElement(IRB.CreateLoad(&NewAI, getName(".load")),
- getIndex(IRB, BeginOffset),
- getName(".extract"));
+ Result = IRB.CreateExtractElement(
+ IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")),
+ getIndex(IRB, BeginOffset), getName(".extract"));
} else {
- Result = IRB.CreateLoad(&NewAI, getName(".load"));
+ Result = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ getName(".load"));
}
if (Result->getType() != LI.getType())
Result = getValueCast(IRB, Result, LI.getType());
@@ -1983,6 +1992,9 @@ private:
Value *NewPtr = getAdjustedAllocaPtr(IRB,
LI.getPointerOperand()->getType());
LI.setOperand(0, NewPtr);
+ if (LI.getAlignment())
+ LI.setAlignment(MinAlign(NewAI.getAlignment(),
+ BeginOffset - NewAllocaBeginOffset));
DEBUG(dbgs() << " to: " << LI << "\n");
deleteIfTriviallyDead(OldOp);
@@ -1996,13 +2008,14 @@ private:
BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) {
if (V->getType() != ElementTy)
V = getValueCast(IRB, V, ElementTy);
- V = IRB.CreateInsertElement(IRB.CreateLoad(&NewAI, getName(".load")), V,
- getIndex(IRB, BeginOffset),
+ LoadInst *LI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ getName(".load"));
+ V = IRB.CreateInsertElement(LI, V, getIndex(IRB, BeginOffset),
getName(".insert"));
} else if (V->getType() != VecTy) {
V = getValueCast(IRB, V, VecTy);
}
- StoreInst *Store = IRB.CreateStore(V, &NewAI);
+ StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
Pass.DeadInsts.push_back(&SI);
(void)Store;
@@ -2033,6 +2046,9 @@ private:
Value *NewPtr = getAdjustedAllocaPtr(IRB,
SI.getPointerOperand()->getType());
SI.setOperand(1, NewPtr);
+ if (SI.getAlignment())
+ SI.setAlignment(MinAlign(NewAI.getAlignment(),
+ BeginOffset - NewAllocaBeginOffset));
DEBUG(dbgs() << " to: " << SI << "\n");
deleteIfTriviallyDead(OldOp);
@@ -2048,6 +2064,15 @@ private:
// pointer to the new alloca.
if (!isa<Constant>(II.getLength())) {
II.setDest(getAdjustedAllocaPtr(IRB, II.getRawDest()->getType()));
+
+ Type *CstTy = II.getAlignmentCst()->getType();
+ if (!NewAI.getAlignment())
+ II.setAlignment(ConstantInt::get(CstTy, 0));
+ else
+ II.setAlignment(
+ ConstantInt::get(CstTy, MinAlign(NewAI.getAlignment(),
+ BeginOffset - NewAllocaBeginOffset)));
+
deleteIfTriviallyDead(OldPtr);
return false;
}
@@ -2067,11 +2092,15 @@ private:
!TD.isLegalInteger(TD.getTypeSizeInBits(ScalarTy)))) {
Type *SizeTy = II.getLength()->getType();
Constant *Size = ConstantInt::get(SizeTy, EndOffset - BeginOffset);
+ unsigned Align = 1;
+ if (NewAI.getAlignment())
+ Align = MinAlign(NewAI.getAlignment(),
+ BeginOffset - NewAllocaBeginOffset);
CallInst *New
= IRB.CreateMemSet(getAdjustedAllocaPtr(IRB,
II.getRawDest()->getType()),
- II.getValue(), Size, II.getAlignment(),
+ II.getValue(), Size, Align,
II.isVolatile());
(void)New;
DEBUG(dbgs() << " to: " << *New << "\n");
@@ -2109,11 +2138,13 @@ private:
// If this is an element-wide memset of a vectorizable alloca, insert it.
if (VecTy && (BeginOffset > NewAllocaBeginOffset ||
EndOffset < NewAllocaEndOffset)) {
- StoreInst *Store = IRB.CreateStore(
- IRB.CreateInsertElement(IRB.CreateLoad(&NewAI, getName(".load")), V,
- getIndex(IRB, BeginOffset),
+ StoreInst *Store = IRB.CreateAlignedStore(
+ IRB.CreateInsertElement(IRB.CreateAlignedLoad(&NewAI,
+ NewAI.getAlignment(),
+ getName(".load")),
+ V, getIndex(IRB, BeginOffset),
getName(".insert")),
- &NewAI);
+ &NewAI, NewAI.getAlignment());
(void)Store;
DEBUG(dbgs() << " to: " << *Store << "\n");
return true;
@@ -2131,7 +2162,8 @@ private:
assert(V->getType() == VecTy);
}
- Value *New = IRB.CreateStore(V, &NewAI, II.isVolatile());
+ Value *New = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
+ II.isVolatile());
(void)New;
DEBUG(dbgs() << " to: " << *New << "\n");
return !II.isVolatile();
@@ -2164,6 +2196,13 @@ private:
else
II.setSource(getAdjustedAllocaPtr(IRB, II.getRawSource()->getType()));
+ Type *CstTy = II.getAlignmentCst()->getType();
+ if (II.getAlignment() > 1)
+ II.setAlignment(ConstantInt::get(
+ CstTy, MinAlign(II.getAlignment(),
+ MinAlign(NewAI.getAlignment(),
+ BeginOffset - NewAllocaBeginOffset))));
+
DEBUG(dbgs() << " to: " << II << "\n");
deleteIfTriviallyDead(OldOp);
return false;
@@ -2221,6 +2260,11 @@ private:
OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy,
getName("." + OtherPtr->getName()));
+ unsigned Align = II.getAlignment();
+ if (Align > 1)
+ Align = MinAlign(RelOffset.zextOrTrunc(64).getZExtValue(),
+ MinAlign(II.getAlignment(), NewAI.getAlignment()));
+
// Strip all inbounds GEPs and pointer casts to try to dig out any root
// alloca that should be re-examined after rewriting this instruction.
if (AllocaInst *AI
@@ -2236,8 +2280,7 @@ private:
CallInst *New = IRB.CreateMemCpy(IsDest ? OurPtr : OtherPtr,
IsDest ? OtherPtr : OurPtr,
- Size, II.getAlignment(),
- II.isVolatile());
+ Size, Align, II.isVolatile());
(void)New;
DEBUG(dbgs() << " to: " << *New << "\n");
return false;
@@ -2251,22 +2294,25 @@ private:
Value *Src;
if (IsVectorElement && !IsDest) {
// We have to extract rather than load.
- Src = IRB.CreateExtractElement(IRB.CreateLoad(SrcPtr,
- getName(".copyload")),
- getIndex(IRB, BeginOffset),
- getName(".copyextract"));
+ Src = IRB.CreateExtractElement(
+ IRB.CreateAlignedLoad(SrcPtr, Align, getName(".copyload")),
+ getIndex(IRB, BeginOffset),
+ getName(".copyextract"));
} else {
- Src = IRB.CreateLoad(SrcPtr, II.isVolatile(), getName(".copyload"));
+ Src = IRB.CreateAlignedLoad(SrcPtr, Align, II.isVolatile(),
+ getName(".copyload"));
}
if (IsVectorElement && IsDest) {
// We have to insert into a loaded copy before storing.
- Src = IRB.CreateInsertElement(IRB.CreateLoad(&NewAI, getName(".load")),
- Src, getIndex(IRB, BeginOffset),
- getName(".insert"));
+ Src = IRB.CreateInsertElement(
+ IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")),
+ Src, getIndex(IRB, BeginOffset),
+ getName(".insert"));
}
- Value *Store = IRB.CreateStore(Src, DstPtr, II.isVolatile());
+ StoreInst *Store = cast<StoreInst>(
+ IRB.CreateAlignedStore(Src, DstPtr, Align, II.isVolatile()));
(void)Store;
DEBUG(dbgs() << " to: " << *Store << "\n");
return !II.isVolatile();
@@ -2460,8 +2506,7 @@ private:
else {
AllocaPartitioning::PartitionUse OtherUse = *UI;
OtherUse.User = Load;
- P.use_insert(PI, std::upper_bound(UI, P.use_end(PI), OtherUse),
- OtherUse);
+ P.use_push_back(PI, OtherUse);
}
}
}
@@ -2559,7 +2604,7 @@ private:
LoadInst *OtherLoad = IsTrueVal ? FL : TL;
assert(OtherUse.Ptr == OtherLoad->getOperand(0));
OtherUse.User = OtherLoad;
- P.use_insert(PI, P.use_end(PI), OtherUse);
+ P.use_push_back(PI, OtherUse);
}
// Transfer alignment and TBAA info if present.
@@ -2576,8 +2621,6 @@ private:
LI->replaceAllUsesWith(V);
Pass.DeadInsts.push_back(LI);
}
- if (PI != P.end())
- std::stable_sort(P.use_begin(PI), P.use_end(PI));
deleteIfTriviallyDead(OldPtr);
return NewPtr == &NewAI;
@@ -2959,9 +3002,19 @@ bool SROA::rewriteAllocaPartition(AllocaInst &AI,
assert(PI == P.begin() && "Begin offset is zero on later partition");
NewAI = &AI;
} else {
- // FIXME: The alignment here is overly conservative -- we could in many
- // cases get away with much weaker alignment constraints.
- NewAI = new AllocaInst(AllocaTy, 0, AI.getAlignment(),
+ unsigned Alignment = AI.getAlignment();
+ if (!Alignment) {
+ // The minimum alignment which users can rely on when the explicit
+ // alignment is omitted or zero is that required by the ABI for this
+ // type.
+ Alignment = TD->getABITypeAlignment(AI.getAllocatedType());
+ }
+ Alignment = MinAlign(Alignment, PI->BeginOffset);
+ // If we will get at least this much alignment from the type alone, leave
+ // the alloca's alignment unconstrained.
+ if (Alignment <= TD->getABITypeAlignment(AllocaTy))
+ Alignment = 0;
+ NewAI = new AllocaInst(AllocaTy, 0, Alignment,
AI.getName() + ".sroa." + Twine(PI - P.begin()),
&AI);
++NumNewAllocas;
diff --git a/lib/Transforms/Utils/IntegerDivision.cpp b/lib/Transforms/Utils/IntegerDivision.cpp
index 9d630349ab..55227e2714 100644
--- a/lib/Transforms/Utils/IntegerDivision.cpp
+++ b/lib/Transforms/Utils/IntegerDivision.cpp
@@ -23,11 +23,69 @@
using namespace llvm;
+/// Generate code to compute the remainder of two signed integers. Returns the
+/// remainder, which will have the sign of the dividend. Builder's insert point
+/// should be pointing where the caller wants code generated, e.g. at the srem
+/// instruction. This will generate a urem in the process, and Builder's insert
+/// point will be pointing at the uren (if present, i.e. not folded), ready to
+/// be expanded if the user wishes
+static Value *generateSignedRemainderCode(Value *Dividend, Value *Divisor,
+ IRBuilder<> &Builder) {
+ ConstantInt *ThirtyOne = Builder.getInt32(31);
+
+ // ; %dividend_sgn = ashr i32 %dividend, 31
+ // ; %divisor_sgn = ashr i32 %divisor, 31
+ // ; %dvd_xor = xor i32 %dividend, %dividend_sgn
+ // ; %dvs_xor = xor i32 %divisor, %divisor_sgn
+ // ; %u_dividend = sub i32 %dvd_xor, %dividend_sgn
+ // ; %u_divisor = sub i32 %dvs_xor, %divisor_sgn
+ // ; %urem = urem i32 %dividend, %divisor
+ // ; %xored = xor i32 %urem, %dividend_sgn
+ // ; %srem = sub i32 %xored, %dividend_sgn
+ Value *DividendSign = Builder.CreateAShr(Dividend, ThirtyOne);
+ Value *DivisorSign = Builder.CreateAShr(Divisor, ThirtyOne);
+ Value *DvdXor = Builder.CreateXor(Dividend, DividendSign);
+ Value *DvsXor = Builder.CreateXor(Divisor, DivisorSign);
+ Value *UDividend = Builder.CreateSub(DvdXor, DividendSign);
+ Value *UDivisor = Builder.CreateSub(DvsXor, DivisorSign);
+ Value *URem = Builder.CreateURem(UDividend, UDivisor);
+ Value *Xored = Builder.CreateXor(URem, DividendSign);
+ Value *SRem = Builder.CreateSub(Xored, DividendSign);
+
+ if (Instruction *URemInst = dyn_cast<Instruction>(URem))
+ Builder.SetInsertPoint(URemInst);
+
+ return SRem;
+}
+
+
+/// Generate code to compute the remainder of two unsigned integers. Returns the
+/// remainder. Builder's insert point should be pointing where the caller wants
+/// code generated, e.g. at the urem instruction. This will generate a udiv in
+/// the process, and Builder's insert point will be pointing at the udiv (if
+/// present, i.e. not folded), ready to be expanded if the user wishes
+static Value *generatedUnsignedRemainderCode(Value *Dividend, Value *Divisor,
+ IRBuilder<> &Builder) {
+ // Remainder = Dividend - Quotient*Divisor
+
+ // ; %quotient = udiv i32 %dividend, %divisor
+ // ; %product = mul i32 %divisor, %quotient
+ // ; %remainder = sub i32 %dividend, %product
+ Value *Quotient = Builder.CreateUDiv(Dividend, Divisor);
+ Value *Product = Builder.CreateMul(Divisor, Quotient);
+ Value *Remainder = Builder.CreateSub(Dividend, Product);
+
+ if (Instruction *UDiv = dyn_cast<Instruction>(Quotient))
+ Builder.SetInsertPoint(UDiv);
+
+ return Remainder;
+}
+
/// Generate code to divide two signed integers. Returns the quotient, rounded
-/// towards 0. Builder's insert point should be pointing at the sdiv
-/// instruction. This will generate a udiv in the process, and Builder's insert
-/// point will be pointing at the udiv (if present, i.e. not folded), ready to
-/// be expanded if the user wishes.
+/// towards 0. Builder's insert point should be pointing where the caller wants
+/// code generated, e.g. at the sdiv instruction. This will generate a udiv in
+/// the process, and Builder's insert point will be pointing at the udiv (if
+/// present, i.e. not folded), ready to be expanded if the user wishes.
static Value *generateSignedDivisionCode(Value *Dividend, Value *Divisor,
IRBuilder<> &Builder) {
// Implementation taken from compiler-rt's __divsi3
@@ -62,8 +120,8 @@ static Value *generateSignedDivisionCode(Value *Dividend, Value *Divisor,
}
/// Generates code to divide two unsigned scalar 32-bit integers. Returns the
-/// quotient, rounded towards 0. Builder's insert point should be pointing at
-/// the udiv instruction.
+/// quotient, rounded towards 0. Builder's insert point should be pointing where
+/// the caller wants code generated, e.g. at the udiv instruction.
static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
IRBuilder<> &Builder) {
// The basic algorithm can be found in the compiler-rt project's
@@ -265,6 +323,56 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
return Q_5;
}
+/// Generate code to calculate the remainder of two integers, replacing Rem with
+/// the generated code. This currently generates code using the udiv expansion,
+/// but future work includes generating more specialized code, e.g. when more
+/// information about the operands are known. Currently only implements 32bit
+/// scalar division (due to udiv's limitation), but future work is removing this
+/// limitation.
+///
+/// @brief Replace Rem with generated code.
+bool llvm::expandRemainder(BinaryOperator *Rem) {
+ assert((Rem->getOpcode() == Instruction::SRem ||
+ Rem->getOpcode() == Instruction::URem) &&
+ "Trying to expand remainder from a non-remainder function");
+
+ IRBuilder<> Builder(Rem);
+
+ // First prepare the sign if it's a signed remainder
+ if (Rem->getOpcode() == Instruction::SRem) {
+ Value *Remainder = generateSignedRemainderCode(Rem->getOperand(0),
+ Rem->getOperand(1), Builder);
+
+ Rem->replaceAllUsesWith(Remainder);
+ Rem->dropAllReferences();
+ Rem->eraseFromParent();
+
+ // If we didn't actually generate a udiv instruction, we're done
+ BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint());
+ if (!BO || BO->getOpcode() != Instruction::URem)
+ return true;
+
+ Rem = BO;
+ }
+
+ Value *Remainder = generatedUnsignedRemainderCode(Rem->getOperand(0),
+ Rem->getOperand(1),
+ Builder);
+
+ Rem->replaceAllUsesWith(Remainder);
+ Rem->dropAllReferences();
+ Rem->eraseFromParent();
+
+ // Expand the udiv
+ if (BinaryOperator *UDiv = dyn_cast<BinaryOperator>(Builder.GetInsertPoint())) {
+ assert(UDiv->getOpcode() == Instruction::UDiv && "Non-udiv in expansion?");
+ expandDivision(UDiv);
+ }
+
+ return true;
+}
+
+
/// Generate code to divide two integers, replacing Div with the generated
/// code. This currently generates code similarly to compiler-rt's
/// implementations, but future work includes generating more specialized code
@@ -287,7 +395,7 @@ bool llvm::expandDivision(BinaryOperator *Div) {
if (Div->getOpcode() == Instruction::SDiv) {
// Lower the code to unsigned division, and reset Div to point to the udiv.
Value *Quotient = generateSignedDivisionCode(Div->getOperand(0),
- Div->getOperand(1), Builder);
+ Div->getOperand(1), Builder);
Div->replaceAllUsesWith(Quotient);
Div->dropAllReferences();
Div->eraseFromParent();
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 876ff2c337..065325b7c2 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -58,9 +58,10 @@ static cl::opt<bool>
SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
cl::desc("Sink common instructions down to the end block"));
-STATISTIC(NumSpeculations, "Number of speculative executed instructions");
+STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables");
STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block");
+STATISTIC(NumSpeculations, "Number of speculative executed instructions");
namespace {
/// ValueEqualityComparisonCase - Represents a case of a switch.
@@ -3240,83 +3241,227 @@ static bool GetCaseResults(SwitchInst *SI,
return true;
}
-/// BuildLookupTable - Build a lookup table with the contents of Results, using
-/// DefaultResult to fill the holes in the table. If the table ends up
-/// containing the same result in each element, set *SingleResult to that value
-/// and return NULL.
-static GlobalVariable *BuildLookupTable(Module &M,
- uint64_t TableSize,
- ConstantInt *Offset,
- const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Results,
- Constant *DefaultResult,
- Constant **SingleResult) {
- assert(Results.size() && "Need values to build lookup table");
- assert(TableSize >= Results.size() && "Table needs to hold all values");
+namespace {
+ /// SwitchLookupTable - This class represents a lookup table that can be used
+ /// to replace a switch.
+ class SwitchLookupTable {
+ public:
+ /// SwitchLookupTable - Create a lookup table to use as a switch replacement
+ /// with the contents of Values, using DefaultValue to fill any holes in the
+ /// table.
+ SwitchLookupTable(Module &M,
+ uint64_t TableSize,
+ ConstantInt *Offset,
+ const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Values,
+ Constant *DefaultValue,
+ const TargetData *TD);
+
+ /// BuildLookup - Build instructions with Builder to retrieve the value at
+ /// the position given by Index in the lookup table.
+ Value *BuildLookup(Value *Index, IRBuilder<> &Builder);
+
+ /// WouldFitInRegister - Return true if a table with TableSize elements of
+ /// type ElementType would fit in a target-legal register.
+ static bool WouldFitInRegister(const TargetData *TD,
+ uint64_t TableSize,
+ const Type *ElementType);
+
+ private:
+ // Depending on the contents of the table, it can be represented in
+ // different ways.
+ enum {
+ // For tables where each element contains the same value, we just have to
+ // store that single value and return it for each lookup.
+ SingleValueKind,
+
+ // For small tables with integer elements, we can pack them into a bitmap
+ // that fits into a target-legal register. Values are retrieved by
+ // shift and mask operations.
+ BitMapKind,
+
+ // The table is stored as an array of values. Values are retrieved by load
+ // instructions from the table.
+ ArrayKind
+ } Kind;
+
+ // For SingleValueKind, this is the single value.
+ Constant *SingleValue;
+
+ // For BitMapKind, this is the bitmap.
+ ConstantInt *BitMap;
+ IntegerType *BitMapElementTy;
+
+ // For ArrayKind, this is the array.
+ GlobalVariable *Array;
+ };
+}
+
+SwitchLookupTable::SwitchLookupTable(Module &M,
+ uint64_t TableSize,
+ ConstantInt *Offset,
+ const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Values,
+ Constant *DefaultValue,
+ const TargetData *TD) {
+ assert(Values.size() && "Can't build lookup table without values!");
+ assert(TableSize >= Values.size() && "Can't fit values in table!");
// If all values in the table are equal, this is that value.
- Constant *SameResult = Results.begin()->second;
+ SingleValue = Values.begin()->second;
// Build up the table contents.
- std::vector<Constant*> TableContents(TableSize);
- for (size_t I = 0, E = Results.size(); I != E; ++I) {
- ConstantInt *CaseVal = Results[I].first;
- Constant *CaseRes = Results[I].second;
-
- uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
+ SmallVector<Constant*, 64> TableContents(TableSize);
+ for (size_t I = 0, E = Values.size(); I != E; ++I) {
+ ConstantInt *CaseVal = Values[I].first;
+ Constant *CaseRes = Values[I].second;
+ assert(CaseRes->getType() == DefaultValue->getType());
+
+ uint64_t Idx = (CaseVal->getValue() - Offset->getValue())
+ .getLimitedValue();
TableContents[Idx] = CaseRes;
- if (CaseRes != SameResult)
- SameResult = NULL;
+ if (CaseRes != SingleValue)
+ SingleValue = NULL;
}
// Fill in any holes in the table with the default result.
- if (Results.size() < TableSize) {
- for (unsigned i = 0; i < TableSize; ++i) {
- if (!TableContents[i])
- TableContents[i] = DefaultResult;
+ if (Values.size() < TableSize) {
+ for (uint64_t I = 0; I < TableSize; ++I) {
+ if (!TableContents[I])
+ TableContents[I] = DefaultValue;
}
- if (DefaultResult != SameResult)
- SameResult = NULL;
+ if (DefaultValue != SingleValue)
+ SingleValue = NULL;
+ }
+
+ // If each element in the table contains the same value, we only need to store
+ // that single value.
+ if (SingleValue) {
+ Kind = SingleValueKind;
+ return;
}
- // Same result was used in the entire table; just return that.
- if (SameResult) {
- *SingleResult = SameResult;
- return NULL;
+ // If the type is integer and the table fits in a register, build a bitmap.
+ if (WouldFitInRegister(TD, TableSize, DefaultValue->getType())) {
+ IntegerType *IT = cast<IntegerType>(DefaultValue->getType());
+ APInt TableInt(TableSize * IT->getBitWidth(), 0);
+ for (uint64_t I = TableSize; I > 0; --I) {
+ TableInt <<= IT->getBitWidth();
+ ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]);
+ TableInt |= Val->getValue().zext(TableInt.getBitWidth());
+ }
+ BitMap = ConstantInt::get(M.getContext(), TableInt);
+ BitMapElementTy = IT;
+ Kind = BitMapKind;
+ ++NumBitMaps;
+ return;
}
- ArrayType *ArrayTy = ArrayType::get(DefaultResult->getType(), TableSize);
+ // Store the table in an array.
+ ArrayType *ArrayTy = ArrayType::get(DefaultValue->getType(), TableSize);
Constant *Initializer = ConstantArray::get(ArrayTy, TableContents);
- GlobalVariable *GV = new GlobalVariable(M, ArrayTy, /*constant=*/ true,
- GlobalVariable::PrivateLinkage,
- Initializer,
- "switch.table");
- GV->setUnnamedAddr(true);
- return GV;
+ Array = new GlobalVariable(M, ArrayTy, /*constant=*/ true,
+ GlobalVariable::PrivateLinkage,
+ Initializer,
+ "switch.table");
+ Array->setUnnamedAddr(true);
+ Kind = ArrayKind;
+}
+
+Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
+ switch (Kind) {
+ case SingleValueKind:
+ return SingleValue;
+ case BitMapKind: {
+ // Type of the bitmap (e.g. i59).
+ IntegerType *MapTy = BitMap->getType();
+
+ // Cast Index to the same type as the bitmap.
+ // Note: The Index is <= the number of elements in the table, so
+ // truncating it to the width of the bitmask is safe.
+ Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
+
+ // Multiply the shift amount by the element width.
+ ShiftAmt = Builder.CreateMul(ShiftAmt,
+ ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
+ "switch.shiftamt");
+
+ // Shift down.
+ Value *DownShifted = Builder.CreateLShr(BitMap, ShiftAmt,
+ "switch.downshift");
+ // Mask off.
+ return Builder.CreateTrunc(DownShifted, BitMapElementTy,
+ "switch.masked");
+ }
+ case ArrayKind: {
+ Value *GEPIndices[] = { Builder.getInt32(0), Index };
+ Value *GEP = Builder.CreateInBoundsGEP(Array, GEPIndices,
+ "switch.gep");
+ return Builder.CreateLoad(GEP, "switch.load");
+ }
+ }
+ llvm_unreachable("Unknown lookup table kind!");
+}
+
+bool SwitchLookupTable::WouldFitInRegister(const TargetData *TD,
+ uint64_t TableSize,
+ const Type *ElementType) {
+ if (!TD)
+ return false;
+ const IntegerType *IT = dyn_cast<IntegerType>(ElementType);
+ if (!IT)
+ return false;
+ // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
+ // are <= 15, we could try to narrow the type.
+
+ // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
+ if (TableSize >= UINT_MAX/IT->getBitWidth())
+ return false;
+ return TD->fitsInLegalInteger(TableSize * IT->getBitWidth());
+}
+
+/// ShouldBuildLookupTable - Determine whether a lookup table should be built
+/// for this switch, based on the number of caes, size of the table and the
+/// types of the results.
+static bool ShouldBuildLookupTable(SwitchInst *SI,
+ uint64_t TableSize,
+ const TargetData *TD,
+ const SmallDenseMap<PHINode*, Type*>& ResultTypes) {
+ // The table density should be at least 40%. This is the same criterion as for
+ // jump tables, see SelectionDAGBuilder::handleJTSwitchCase.
+ // FIXME: Find the best cut-off.
+ if (SI->getNumCases() > TableSize || TableSize >= UINT64_MAX / 10)
+ return false; // TableSize overflowed, or mul below might overflow.
+ if (SI->getNumCases() * 10 >= TableSize * 4)
+ return true;
+
+ // If each table would fit in a register, we should build it anyway.
+ for (SmallDenseMap<PHINode*, Type*>::const_iterator I = ResultTypes.begin(),
+ E = ResultTypes.end(); I != E; ++I) {
+ if (!SwitchLookupTable::WouldFitInRegister(TD, TableSize, I->second))
+ return false;
+ }
+ return true;
}
/// SwitchToLookupTable - If the switch is only used to initialize one or more
/// phi nodes in a common successor block with different constant values,
/// replace the switch with lookup tables.
static bool SwitchToLookupTable(SwitchInst *SI,
- IRBuilder<> &Builder) {
+ IRBuilder<> &Builder,
+ const TargetData* TD) {
assert(SI->getNumCases() > 1 && "Degenerate switch?");
// FIXME: Handle unreachable cases.
// FIXME: If the switch is too sparse for a lookup table, perhaps we could
// split off a dense part and build a lookup table for that.
- // FIXME: If the results are all integers and the lookup table would fit in a
- // target-legal register, we should store them as a bitmap and use shift/mask
- // to look up the result.
-
// FIXME: This creates arrays of GEPs to constant strings, which means each
// GEP needs a runtime relocation in PIC code. We should just build one big
// string and lookup indices into that.
- // Ignore the switch if the number of cases are too small.
+ // Ignore the switch if the number of cases is too small.
// This is similar to the check when building jump tables in
// SelectionDAGBuilder::handleJTSwitchCase.
// FIXME: Determine the best cut-off.
@@ -3370,33 +3515,12 @@ static bool SwitchToLookupTable(SwitchInst *SI,
}
APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue();
- // The table density should be at lest 40%. This is the same criterion as for
- // jump tables, see SelectionDAGBuilder::handleJTSwitchCase.
- // FIXME: Find the best cut-off.
- // Be careful to avoid overlow in the density computation.
- if (RangeSpread.zextOrSelf(64).ugt(UINT64_MAX / 4 - 1))
- return false;
uint64_t TableSize = RangeSpread.getLimitedValue() + 1;
- if (SI->getNumCases() * 10 < TableSize * 4)
+ if (!ShouldBuildLookupTable(SI, TableSize, TD, ResultTypes))
return false;
- // Build the lookup tables.
- SmallDenseMap<PHINode*, GlobalVariable*> LookupTables;
- SmallDenseMap<PHINode*, Constant*> SingleResults;
-
- Module &Mod = *CommonDest->getParent()->getParent();
- for (SmallVector<PHINode*, 4>::iterator I = PHIs.begin(), E = PHIs.end();
- I != E; ++I) {
- PHINode *PHI = *I;
-
- Constant *SingleResult = NULL;
- LookupTables[PHI] = BuildLookupTable(Mod, TableSize, MinCaseVal,
- ResultLists[PHI], DefaultResults[PHI],
- &SingleResult);
- SingleResults[PHI] = SingleResult;
- }
-
// Create the BB that does the lookups.
+ Module &Mod = *CommonDest->getParent()->getParent();
BasicBlock *LookupBB = BasicBlock::Create(Mod.getContext(),
"switch.lookup",
CommonDest->getParent(),
@@ -3414,19 +3538,13 @@ static bool SwitchToLookupTable(SwitchInst *SI,
// Populate the BB that does the lookups.
Builder.SetInsertPoint(LookupBB);
bool ReturnedEarly = false;
- for (SmallVector<PHINode*, 4>::iterator I = PHIs.begin(), E = PHIs.end();
- I != E; ++I) {
- PHINode *PHI = *I;
- // There was a single result for this phi; just use that.
- if (Constant *SingleResult = SingleResults[PHI]) {
- PHI->addIncoming(SingleResult, LookupBB);
- continue;
- }
+ for (size_t I = 0, E = PHIs.size(); I != E; ++I) {
+ PHINode *PHI = PHIs[I];
+
+ SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultLists[PHI],
+ DefaultResults[PHI], TD);
- Value *GEPIndices[] = { Builder.getInt32(0), TableIndex };
- Value *GEP = Builder.CreateInBoundsGEP(LookupTables[PHI], GEPIndices,
- "switch.gep");
- Value *Result = Builder.CreateLoad(GEP, "switch.load");
+ Value *Result = Table.BuildLookup(TableIndex, Builder);
// If the result is used to return immediately from the function, we want to
// do that right here.
@@ -3494,7 +3612,7 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
if (ForwardSwitchConditionToPHI(SI))
return SimplifyCFG(BB) | true;
- if (SwitchToLookupTable(SI, Builder))
+ if (SwitchToLookupTable(SI, Builder, TD))
return SimplifyCFG(BB) | true;
return false;
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
index fc2538db64..a30b09321b 100644
--- a/lib/Transforms/Utils/ValueMapper.cpp
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -21,7 +21,7 @@
using namespace llvm;
// Out of line method to get vtable etc for class.
-void ValueMapTypeRemapper::Anchor() {}
+void ValueMapTypeRemapper::anchor() {}
Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
ValueMapTypeRemapper *TypeMapper) {
diff --git a/lib/VMCore/Attributes.cpp b/lib/VMCore/Attributes.cpp
index af8163fd40..7d3197cb0d 100644
--- a/lib/VMCore/Attributes.cpp
+++ b/lib/VMCore/Attributes.cpp
@@ -12,6 +12,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/Attributes.h"
+#include "AttributesImpl.h"
+#include "LLVMContextImpl.h"
#include "llvm/Type.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/FoldingSet.h"
@@ -94,21 +96,52 @@ std::string Attributes::getAsString() const {
return Result;
}
-Attributes Attribute::typeIncompatible(Type *Ty) {
- Attributes Incompatible = None;
+Attributes Attributes::typeIncompatible(Type *Ty) {
+ Attributes Incompatible = Attribute::None;
if (!Ty->isIntegerTy())
// Attributes that only apply to integers.
- Incompatible |= SExt | ZExt;
+ Incompatible |= Attribute::SExt | Attribute::ZExt;
if (!Ty->isPointerTy())
// Attributes that only apply to pointers.
- Incompatible |= ByVal | Nest | NoAlias | StructRet | NoCapture;
+ Incompatible |= Attribute::ByVal | Attribute::Nest | Attribute::NoAlias |
+ Attribute::StructRet | Attribute::NoCapture;
return Incompatible;
}
//===----------------------------------------------------------------------===//
+// AttributeImpl Definition
+//===----------------------------------------------------------------------===//
+
+Attributes::Attributes(AttributesImpl *A) : Bits(0) {}
+
+Attributes Attributes::get(LLVMContext &Context, Attributes::Builder &B) {
+ // If there are no attributes, return an empty Attributes class.
+ if (B.Bits == 0)
+ return Attributes();
+
+ // Otherwise, build a key to look up the existing attributes.
+ LLVMContextImpl *pImpl = Context.pImpl;
+ FoldingSetNodeID ID;
+ ID.AddInteger(B.Bits);
+
+ void *InsertPoint;
+ AttributesImpl *PA = pImpl->AttrsSet.FindNodeOrInsertPos(ID, InsertPoint);
+
+ if (!PA) {
+ // If we didn't find any existing attributes of the same shape then create a
+ // new one and insert it.
+ PA = new AttributesImpl(B.Bits);
+ pImpl->AttrsSet.InsertNode(PA, InsertPoint);
+ }
+
+ // Return the AttributesList that we found or created.
+ return Attributes(PA);
+}
+
+//===----------------------------------------------------------------------===//
// AttributeListImpl Definition
//===----------------------------------------------------------------------===//
diff --git a/lib/VMCore/AttributesImpl.h b/lib/VMCore/AttributesImpl.h
new file mode 100644
index 0000000000..90890a14c3
--- /dev/null
+++ b/lib/VMCore/AttributesImpl.h
@@ -0,0 +1,40 @@
+//===-- AttributesImpl.h - Attributes Internals -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines various helper methods and classes used by LLVMContextImpl
+// for creating and managing attributes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ATTRIBUTESIMPL_H
+#define LLVM_ATTRIBUTESIMPL_H
+
+#include "llvm/ADT/FoldingSet.h"
+
+namespace llvm {
+
+class AttributesImpl : public FoldingSetNode {
+ uint64_t Bits; // FIXME: We will be expanding this.
+
+ void operator=(const AttributesImpl &) LLVM_DELETED_FUNCTION;
+ AttributesImpl(const AttributesImpl &) LLVM_DELETED_FUNCTION;
+public:
+ AttributesImpl(uint64_t bits) : Bits(bits) {}
+
+ void Profile(FoldingSetNodeID &ID) const {
+ Profile(ID, Bits);
+ }
+ static void Profile(FoldingSetNodeID &ID, uint64_t Bits) {
+ ID.AddInteger(Bits);
+ }
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/lib/VMCore/Function.cpp b/lib/VMCore/Function.cpp
index 2e0b3168c9..012d27603a 100644
--- a/lib/VMCore/Function.cpp
+++ b/lib/VMCore/Function.cpp
@@ -78,7 +78,7 @@ unsigned Argument::getArgNo() const {
/// in its containing function.
bool Argument::hasByValAttr() const {
if (!getType()->isPointerTy()) return false;
- return getParent()->paramHasAttr(getArgNo()+1, Attribute::ByVal);
+ return getParent()->getParamAttributes(getArgNo()+1).hasByValAttr();
}
unsigned Argument::getParamAlignment() const {
@@ -91,21 +91,21 @@ unsigned Argument::getParamAlignment() const {
/// it in its containing function.
bool Argument::hasNestAttr() const {
if (!getType()->isPointerTy()) return false;
- return getParent()->paramHasAttr(getArgNo()+1, Attribute::Nest);
+ return getParent()->getParamAttributes(getArgNo()+1).hasNestAttr();
}
/// hasNoAliasAttr - Return true if this argument has the noalias attribute on
/// it in its containing function.
bool Argument::hasNoAliasAttr() const {
if (!getType()->isPointerTy()) return false;
- return getParent()->paramHasAttr(getArgNo()+1, Attribute::NoAlias);
+ return getParent()->getParamAttributes(getArgNo()+1).hasNoAliasAttr();
}
/// hasNoCaptureAttr - Return true if this argument has the nocapture attribute
/// on it in its containing function.
bool Argument::hasNoCaptureAttr() const {
if (!getType()->isPointerTy()) return false;
- return getParent()->paramHasAttr(getArgNo()+1, Attribute::NoCapture);
+ return getParent()->getParamAttributes(getArgNo()+1).hasNoCaptureAttr();
}
/// hasSRetAttr - Return true if this argument has the sret attribute on
@@ -114,7 +114,7 @@ bool Argument::hasStructRetAttr() const {
if (!getType()->isPointerTy()) return false;
if (this != getParent()->arg_begin())
return false; // StructRet param must be first param
- return getParent()->paramHasAttr(1, Attribute::StructRet);
+ return getParent()->getParamAttributes(1).hasStructRetAttr();
}
/// addAttr - Add a Attribute to an argument
diff --git a/lib/VMCore/IRBuilder.cpp b/lib/VMCore/IRBuilder.cpp
index 5c4e6d9642..04f08fe28e 100644
--- a/lib/VMCore/IRBuilder.cpp
+++ b/lib/VMCore/IRBuilder.cpp
@@ -80,7 +80,7 @@ CreateMemSet(Value *Ptr, Value *Val, Value *Size, unsigned Align,
CallInst *IRBuilderBase::
CreateMemCpy(Value *Dst, Value *Src, Value *Size, unsigned Align,
- bool isVolatile, MDNode *TBAATag) {
+ bool isVolatile, MDNode *TBAATag, MDNode *TBAAStructTag) {
Dst = getCastedInt8PtrValue(Dst);
Src = getCastedInt8PtrValue(Src);
@@ -94,6 +94,10 @@ CreateMemCpy(Value *Dst, Value *Src, Value *Size, unsigned Align,
// Set the TBAA info if present.
if (TBAATag)
CI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+
+ // Set the TBAA Struct info if present.
+ if (TBAAStructTag)
+ CI->setMetadata(LLVMContext::MD_tbaa_struct, TBAAStructTag);
return CI;
}
diff --git a/lib/VMCore/LLVMContextImpl.cpp b/lib/VMCore/LLVMContextImpl.cpp
index 6279bb823d..a86363b632 100644
--- a/lib/VMCore/LLVMContextImpl.cpp
+++ b/lib/VMCore/LLVMContextImpl.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "LLVMContextImpl.h"
+#include "llvm/Attributes.h"
#include "llvm/Module.h"
#include "llvm/ADT/STLExtras.h"
#include <algorithm>
@@ -93,6 +94,11 @@ LLVMContextImpl::~LLVMContextImpl() {
E = CDSConstants.end(); I != E; ++I)
delete I->second;
CDSConstants.clear();
+
+ // Destroy attributes.
+ for (FoldingSetIterator<AttributesImpl> I = AttrsSet.begin(),
+ E = AttrsSet.end(); I != E; ++I)
+ delete &*I;
// Destroy MDNodes. ~MDNode can move and remove nodes between the MDNodeSet
// and the NonUniquedMDNodes sets, so copy the values out first.
@@ -107,6 +113,7 @@ LLVMContextImpl::~LLVMContextImpl() {
(*I)->destroy();
assert(MDNodeSet.empty() && NonUniquedMDNodes.empty() &&
"Destroying all MDNodes didn't empty the Context's sets.");
+
// Destroy MDStrings.
DeleteContainerSeconds(MDStringCache);
}
diff --git a/lib/VMCore/LLVMContextImpl.h b/lib/VMCore/LLVMContextImpl.h
index 2252028b15..ee31814c05 100644
--- a/lib/VMCore/LLVMContextImpl.h
+++ b/lib/VMCore/LLVMContextImpl.h
@@ -16,6 +16,7 @@
#define LLVM_LLVMCONTEXT_IMPL_H
#include "llvm/LLVMContext.h"
+#include "AttributesImpl.h"
#include "ConstantsContext.h"
#include "LeaksContext.h"
#include "llvm/Constants.h"
@@ -253,10 +254,13 @@ public:
typedef DenseMap<DenseMapAPFloatKeyInfo::KeyTy, ConstantFP*,
DenseMapAPFloatKeyInfo> FPMapTy;
FPMapTy FPConstants;
+
+ FoldingSet<AttributesImpl> AttrsSet;
StringMap<Value*> MDStringCache;
-
+
FoldingSet<MDNode> MDNodeSet;
+
// MDNodes may be uniqued or not uniqued. When they're not uniqued, they
// aren't in the MDNodeSet, but they're still shared between objects, so no
// one object can destroy them. This set allows us to at least destroy them
diff --git a/lib/VMCore/ValueTypes.cpp b/lib/VMCore/ValueTypes.cpp
index e9370f62e6..2ee9f0f4c9 100644
--- a/lib/VMCore/ValueTypes.cpp
+++ b/lib/VMCore/ValueTypes.cpp
@@ -56,31 +56,31 @@ bool EVT::isExtendedVector() const {
}
bool EVT::isExtended16BitVector() const {
- return isExtendedVector() && getSizeInBits() == 16;
+ return isExtendedVector() && getExtendedSizeInBits() == 16;
}
bool EVT::isExtended32BitVector() const {
- return isExtendedVector() && getSizeInBits() == 32;
+ return isExtendedVector() && getExtendedSizeInBits() == 32;
}
bool EVT::isExtended64BitVector() const {
- return isExtendedVector() && getSizeInBits() == 64;
+ return isExtendedVector() && getExtendedSizeInBits() == 64;
}
bool EVT::isExtended128BitVector() const {
- return isExtendedVector() && getSizeInBits() == 128;
+ return isExtendedVector() && getExtendedSizeInBits() == 128;
}
bool EVT::isExtended256BitVector() const {
- return isExtendedVector() && getSizeInBits() == 256;
+ return isExtendedVector() && getExtendedSizeInBits() == 256;
}
bool EVT::isExtended512BitVector() const {
- return isExtendedVector() && getSizeInBits() == 512;
+ return isExtendedVector() && getExtendedSizeInBits() == 512;
}
bool EVT::isExtended1024BitVector() const {
- return isExtendedVector() && getSizeInBits() == 1024;
+ return isExtendedVector() && getExtendedSizeInBits() == 1024;
}
EVT EVT::getExtendedVectorElementType() const {
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index 647a52fbdd..292456ab63 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -546,7 +546,7 @@ void Verifier::VerifyParameterAttrs(Attributes Attrs, Type *Ty,
MutI.getAsString() + " are incompatible!", V);
}
- Attributes TypeI = Attrs & Attribute::typeIncompatible(Ty);
+ Attributes TypeI = Attrs & Attributes::typeIncompatible(Ty);
Assert1(!TypeI, "Wrong type for attribute " +
TypeI.getAsString(), V);
diff --git a/test/Analysis/CallGraph/do-nothing-intrinsic.ll b/test/Analysis/CallGraph/do-nothing-intrinsic.ll
new file mode 100644
index 0000000000..f28ad10f57
--- /dev/null
+++ b/test/Analysis/CallGraph/do-nothing-intrinsic.ll
@@ -0,0 +1,13 @@
+; RUN: opt < %s -basiccg
+; PR13903
+
+define void @main() {
+ invoke void @llvm.donothing()
+ to label %ret unwind label %unw
+unw:
+ %tmp = landingpad i8 personality i8 0 cleanup
+ br label %ret
+ret:
+ ret void
+}
+declare void @llvm.donothing() nounwind readnone
diff --git a/test/CodeGen/ARM/2010-12-07-PEIBug.ll b/test/CodeGen/ARM/2010-12-07-PEIBug.ll
index 770ad4466a..4879f4e10b 100644
--- a/test/CodeGen/ARM/2010-12-07-PEIBug.ll
+++ b/test/CodeGen/ARM/2010-12-07-PEIBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a9 | FileCheck %s
; rdar://8728956
define hidden void @foo() nounwind ssp {
diff --git a/test/CodeGen/ARM/2012-05-04-vmov.ll b/test/CodeGen/ARM/2012-05-04-vmov.ll
new file mode 100644
index 0000000000..d52ef2cc5a
--- /dev/null
+++ b/test/CodeGen/ARM/2012-05-04-vmov.ll
@@ -0,0 +1,11 @@
+; RUN: llc -O1 -march=arm -mcpu=cortex-a9 < %s | FileCheck -check-prefix=A9-CHECK %s
+; RUN: llc -O1 -march=arm -mcpu=swift < %s | FileCheck -check-prefix=SWIFT-CHECK %s
+; Check that swift doesn't use vmov.32. <rdar://problem/10453003>.
+
+define <2 x i32> @testuvec(<2 x i32> %A, <2 x i32> %B) nounwind {
+entry:
+ %div = udiv <2 x i32> %A, %B
+ ret <2 x i32> %div
+; A9-CHECK: vmov.32
+; SWIFT-CHECK-NOT: vmov.32
+}
diff --git a/test/CodeGen/ARM/2012-05-10-PreferVMOVtoVDUP32.ll b/test/CodeGen/ARM/2012-05-10-PreferVMOVtoVDUP32.ll
new file mode 100644
index 0000000000..dd678436c0
--- /dev/null
+++ b/test/CodeGen/ARM/2012-05-10-PreferVMOVtoVDUP32.ll
@@ -0,0 +1,14 @@
+; RUN: llc -march=arm -mcpu=swift < %s | FileCheck %s
+; <rdar://problem/10451892>
+
+define void @f(i32 %x, i32* %p) nounwind ssp {
+entry:
+; CHECK-NOT: vdup.32
+ %vecinit.i = insertelement <2 x i32> undef, i32 %x, i32 0
+ %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %x, i32 1
+ %0 = bitcast i32* %p to i8*
+ tail call void @llvm.arm.neon.vst1.v2i32(i8* %0, <2 x i32> %vecinit1.i, i32 4)
+ ret void
+}
+
+declare void @llvm.arm.neon.vst1.v2i32(i8*, <2 x i32>, i32) nounwind
diff --git a/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv.ll b/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv.ll
new file mode 100644
index 0000000000..75766099a2
--- /dev/null
+++ b/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 2>&1 | FileCheck %s
+
+; Check for error message:
+; CHECK: non-trivial scalar-to-vector conversion, possible invalid constraint for vector type
+
+define void @f() nounwind ssp {
+ %1 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } asm "vldm $4, { ${0:q}, ${1:q}, ${2:q}, ${3:q} }", "=r,=r,=r,=r,r"(i64* undef) nounwind, !srcloc !0
+ ret void
+}
+
+!0 = metadata !{i32 318437}
diff --git a/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv2.ll b/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv2.ll
new file mode 100644
index 0000000000..6fa1391474
--- /dev/null
+++ b/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv2.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 2>&1 | FileCheck %s
+
+; Check for error message:
+; CHECK: scalar-to-vector conversion failed, possible invalid constraint for vector type
+
+define hidden void @f(i32* %corr, i32 %order) nounwind ssp {
+ tail call void asm sideeffect "vst1.s32 { ${1:q}, ${2:q} }, [$0]", "r,{q0},{q1}"(i32* %corr, <2 x i64>* undef, <2 x i64>* undef) nounwind, !srcloc !0
+ ret void
+}
+
+!0 = metadata !{i32 257}
diff --git a/test/CodeGen/ARM/atomicrmw_minmax.ll b/test/CodeGen/ARM/atomicrmw_minmax.ll
new file mode 100644
index 0000000000..69f1384e12
--- /dev/null
+++ b/test/CodeGen/ARM/atomicrmw_minmax.ll
@@ -0,0 +1,21 @@
+; RUN: llc -march=arm -mcpu=cortex-a9 < %s | FileCheck %s
+
+; CHECK: max:
+define i32 @max(i8 %ctx, i32* %ptr, i32 %val)
+{
+; CHECK: ldrex
+; CHECK: cmp [[old:r[0-9]*]], [[val:r[0-9]*]]
+; CHECK: movhi {{r[0-9]*}}, [[old]]
+ %old = atomicrmw umax i32* %ptr, i32 %val monotonic
+ ret i32 %old
+}
+
+; CHECK: min:
+define i32 @min(i8 %ctx, i32* %ptr, i32 %val)
+{
+; CHECK: ldrex
+; CHECK: cmp [[old:r[0-9]*]], [[val:r[0-9]*]]
+; CHECK: movlo {{r[0-9]*}}, [[old]]
+ %old = atomicrmw umin i32* %ptr, i32 %val monotonic
+ ret i32 %old
+}
diff --git a/test/CodeGen/ARM/avoid-cpsr-rmw.ll b/test/CodeGen/ARM/avoid-cpsr-rmw.ll
index 1b385ab79c..96e83dd88e 100644
--- a/test/CodeGen/ARM/avoid-cpsr-rmw.ll
+++ b/test/CodeGen/ARM/avoid-cpsr-rmw.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a9 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=swift | FileCheck %s
; Avoid some 's' 16-bit instruction which partially update CPSR (and add false
; dependency) when it isn't dependent on last CPSR defining instruction.
; rdar://8928208
diff --git a/test/CodeGen/ARM/call-noret.ll b/test/CodeGen/ARM/call-noret.ll
new file mode 100644
index 0000000000..d294f2cf1a
--- /dev/null
+++ b/test/CodeGen/ARM/call-noret.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -mtriple=armv7-apple-ios -mcpu=cortex-a8 | FileCheck %s -check-prefix=ARM
+; RUN: llc < %s -mtriple=armv7-apple-ios -mcpu=swift | FileCheck %s -check-prefix=SWIFT
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 | FileCheck %s -check-prefix=T2
+; rdar://8979299
+
+define void @t1() noreturn nounwind ssp {
+entry:
+; ARM: t1:
+; ARM: mov lr, pc
+; ARM: b _bar
+
+; SWIFT: t1:
+; SWIFT: mov lr, pc
+; SWIFT: b _bar
+
+; T2: t1:
+; T2: blx _bar
+ tail call void @bar() noreturn nounwind
+ unreachable
+}
+
+define void @t2() noreturn nounwind ssp {
+entry:
+; ARM: t2:
+; ARM: mov lr, pc
+; ARM: b _t1
+
+; SWIFT: t2:
+; SWIFT: mov lr, pc
+; SWIFT: b _t1
+
+; T2: t2:
+; T2: mov lr, pc
+; T2: b.w _t1
+ tail call void @t1() noreturn nounwind
+ unreachable
+}
+
+declare void @bar() noreturn
diff --git a/test/CodeGen/ARM/div.ll b/test/CodeGen/ARM/div.ll
index 3d29e05a0c..82cfca182b 100644
--- a/test/CodeGen/ARM/div.ll
+++ b/test/CodeGen/ARM/div.ll
@@ -1,9 +1,13 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s -check-prefix=CHECK-ARM
+; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-a8 | FileCheck %s -check-prefix=CHECK-ARM
+; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=swift | FileCheck %s -check-prefix=CHECK-SWIFT
define i32 @f1(i32 %a, i32 %b) {
entry:
; CHECK-ARM: f1
; CHECK-ARM: __divsi3
+
+; CHECK-SWIFT: f1
+; CHECK-SWIFT: sdiv
%tmp1 = sdiv i32 %a, %b ; <i32> [#uses=1]
ret i32 %tmp1
}
@@ -12,6 +16,9 @@ define i32 @f2(i32 %a, i32 %b) {
entry:
; CHECK-ARM: f2
; CHECK-ARM: __udivsi3
+
+; CHECK-SWIFT: f2
+; CHECK-SWIFT: udiv
%tmp1 = udiv i32 %a, %b ; <i32> [#uses=1]
ret i32 %tmp1
}
@@ -20,6 +27,10 @@ define i32 @f3(i32 %a, i32 %b) {
entry:
; CHECK-ARM: f3
; CHECK-ARM: __modsi3
+
+; CHECK-SWIFT: f3
+; CHECK-SWIFT: sdiv
+; CHECK-SWIFT: mls
%tmp1 = srem i32 %a, %b ; <i32> [#uses=1]
ret i32 %tmp1
}
@@ -28,6 +39,10 @@ define i32 @f4(i32 %a, i32 %b) {
entry:
; CHECK-ARM: f4
; CHECK-ARM: __umodsi3
+
+; CHECK-SWIFT: f4
+; CHECK-SWIFT: udiv
+; CHECK-SWIFT: mls
%tmp1 = urem i32 %a, %b ; <i32> [#uses=1]
ret i32 %tmp1
}
diff --git a/test/CodeGen/ARM/domain-conv-vmovs.ll b/test/CodeGen/ARM/domain-conv-vmovs.ll
index 18e169357b..a5c4114458 100644
--- a/test/CodeGen/ARM/domain-conv-vmovs.ll
+++ b/test/CodeGen/ARM/domain-conv-vmovs.ll
@@ -79,8 +79,8 @@ define float @test_ineligible(float, float %in) {
; internal fault).
call void @bar()
; CHECL: bl bar
-; CHECK: vext.32
-; CHECK: vext.32
+; CHECK: vext.32
+; CHECK: vext.32
ret float %val
}
diff --git a/test/CodeGen/ARM/fabss.ll b/test/CodeGen/ARM/fabss.ll
index bcb4ee7452..46c2f1c65f 100644
--- a/test/CodeGen/ARM/fabss.ll
+++ b/test/CodeGen/ARM/fabss.ll
@@ -14,12 +14,12 @@ entry:
declare float @fabsf(float)
; VFP2: test:
-; VFP2: vabs.f32 s1, s1
+; VFP2: vabs.f32 s2, s2
; NFP1: test:
; NFP1: vabs.f32 d1, d1
; NFP0: test:
-; NFP0: vabs.f32 s1, s1
+; NFP0: vabs.f32 s2, s2
; CORTEXA8: test:
; CORTEXA8: vadd.f32 [[D1:d[0-9]+]]
diff --git a/test/CodeGen/ARM/fadds.ll b/test/CodeGen/ARM/fadds.ll
index e35103c045..48ef5ed88f 100644
--- a/test/CodeGen/ARM/fadds.ll
+++ b/test/CodeGen/ARM/fadds.ll
@@ -10,14 +10,14 @@ entry:
}
; VFP2: test:
-; VFP2: vadd.f32 s0, s1, s0
+; VFP2: vadd.f32 s
; NFP1: test:
-; NFP1: vadd.f32 d0, d1, d0
+; NFP1: vadd.f32 d
; NFP0: test:
-; NFP0: vadd.f32 s0, s1, s0
+; NFP0: vadd.f32 s
; CORTEXA8: test:
-; CORTEXA8: vadd.f32 d0, d1, d0
+; CORTEXA8: vadd.f32 d
; CORTEXA9: test:
; CORTEXA9: vadd.f32 s{{.}}, s{{.}}, s{{.}}
diff --git a/test/CodeGen/ARM/fast-isel-pic.ll b/test/CodeGen/ARM/fast-isel-pic.ll
index 392a845d2c..867d53f973 100644
--- a/test/CodeGen/ARM/fast-isel-pic.ll
+++ b/test/CodeGen/ARM/fast-isel-pic.ll
@@ -1,6 +1,8 @@
; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=pic -mtriple=arm-apple-ios | FileCheck %s --check-prefix=ARM
; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARMv7
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=pic -mtriple=thumbv7-none-linux-gnueabi | FileCheck %s --check-prefix=THUMB-ELF
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=pic -mtriple=armv7-none-linux-gnueabi | FileCheck %s --check-prefix=ARMv7-ELF
@g = global i32 0, align 4
@@ -10,6 +12,10 @@ entry:
; THUMB: movw [[reg0:r[0-9]+]],
; THUMB: movt [[reg0]],
; THUMB: add [[reg0]], pc
+; THUMB-ELF: LoadGV
+; THUMB-ELF: ldr.n r[[reg0:[0-9]+]],
+; THUMB-ELF: ldr.n r[[reg1:[0-9]+]],
+; THUMB-ELF: ldr r[[reg0]], [r[[reg1]], r[[reg0]]]
; ARM: LoadGV
; ARM: ldr [[reg1:r[0-9]+]],
; ARM: add [[reg1]], pc, [[reg1]]
@@ -17,6 +23,10 @@ entry:
; ARMv7: movw [[reg2:r[0-9]+]],
; ARMv7: movt [[reg2]],
; ARMv7: add [[reg2]], pc, [[reg2]]
+; ARMv7-ELF: LoadGV
+; ARMv7-ELF: ldr r[[reg2:[0-9]+]],
+; ARMv7-ELF: ldr r[[reg3:[0-9]+]],
+; ARMv7-ELF: ldr r[[reg2]], [r[[reg3]], r[[reg2]]]
%tmp = load i32* @g
ret i32 %tmp
}
@@ -30,6 +40,10 @@ entry:
; THUMB: movt r[[reg3]],
; THUMB: add r[[reg3]], pc
; THUMB: ldr r[[reg3]], [r[[reg3]]]
+; THUMB-ELF: LoadIndirectSymbol
+; THUMB-ELF: ldr.n r[[reg3:[0-9]+]],
+; THUMB-ELF: ldr.n r[[reg4:[0-9]+]],
+; THUMB-ELF: ldr r[[reg3]], [r[[reg4]], r[[reg3]]]
; ARM: LoadIndirectSymbol
; ARM: ldr [[reg4:r[0-9]+]],
; ARM: ldr [[reg4]], [pc, [[reg4]]]
@@ -38,6 +52,10 @@ entry:
; ARMv7: movt r[[reg5]],
; ARMv7: add r[[reg5]], pc, r[[reg5]]
; ARMv7: ldr r[[reg5]], [r[[reg5]]]
+; ARMv7-ELF: LoadIndirectSymbol
+; ARMv7-ELF: ldr r[[reg5:[0-9]+]],
+; ARMv7-ELF: ldr r[[reg6:[0-9]+]],
+; ARMv7-ELF: ldr r[[reg5]], [r[[reg6]], r[[reg5]]]
%tmp = load i32* @i
ret i32 %tmp
}
diff --git a/test/CodeGen/ARM/fdivs.ll b/test/CodeGen/ARM/fdivs.ll
index 31c1ca9405..8fab002135 100644
--- a/test/CodeGen/ARM/fdivs.ll
+++ b/test/CodeGen/ARM/fdivs.ll
@@ -10,14 +10,14 @@ entry:
}
; VFP2: test:
-; VFP2: vdiv.f32 s0, s1, s0
+; VFP2: vdiv.f32 s0, s2, s0
; NFP1: test:
-; NFP1: vdiv.f32 s0, s1, s0
+; NFP1: vdiv.f32 s0, s2, s0
; NFP0: test:
-; NFP0: vdiv.f32 s0, s1, s0
+; NFP0: vdiv.f32 s0, s2, s0
; CORTEXA8: test:
-; CORTEXA8: vdiv.f32 s0, s1, s0
+; CORTEXA8: vdiv.f32 s0, s2, s0
; CORTEXA9: test:
; CORTEXA9: vdiv.f32 s{{.}}, s{{.}}, s{{.}}
diff --git a/test/CodeGen/ARM/fmuls.ll b/test/CodeGen/ARM/fmuls.ll
index 3c3182bc63..1566a9272d 100644
--- a/test/CodeGen/ARM/fmuls.ll
+++ b/test/CodeGen/ARM/fmuls.ll
@@ -10,15 +10,15 @@ entry:
}
; VFP2: test:
-; VFP2: vmul.f32 s0, s1, s0
+; VFP2: vmul.f32 s
; NFP1: test:
-; NFP1: vmul.f32 d0, d1, d0
+; NFP1: vmul.f32 d
; NFP0: test:
-; NFP0: vmul.f32 s0, s1, s0
+; NFP0: vmul.f32 s
; CORTEXA8: test:
-; CORTEXA8: vmul.f32 d0, d1, d0
+; CORTEXA8: vmul.f32 d
; CORTEXA9: test:
; CORTEXA9: vmul.f32 s{{.}}, s{{.}}, s{{.}}
diff --git a/test/CodeGen/ARM/fp_convert.ll b/test/CodeGen/ARM/fp_convert.ll
index 7002cecf36..44298b9c5d 100644
--- a/test/CodeGen/ARM/fp_convert.ll
+++ b/test/CodeGen/ARM/fp_convert.ll
@@ -31,7 +31,7 @@ define float @test3(i32 %a, i32 %b) {
; VFP2: test3:
; VFP2: vcvt.f32.u32 s{{.}}, s{{.}}
; NEON: test3:
-; NEON: vcvt.f32.u32 d0, d0
+; NEON: vcvt.f32.u32 d
entry:
%0 = add i32 %a, %b
%1 = uitofp i32 %0 to float
@@ -42,7 +42,7 @@ define float @test4(i32 %a, i32 %b) {
; VFP2: test4:
; VFP2: vcvt.f32.s32 s{{.}}, s{{.}}
; NEON: test4:
-; NEON: vcvt.f32.s32 d0, d0
+; NEON: vcvt.f32.s32 d
entry:
%0 = add i32 %a, %b
%1 = sitofp i32 %0 to float
diff --git a/test/CodeGen/ARM/fsubs.ll b/test/CodeGen/ARM/fsubs.ll
index bea8d5f4f3..f039e74c8e 100644
--- a/test/CodeGen/ARM/fsubs.ll
+++ b/test/CodeGen/ARM/fsubs.ll
@@ -8,6 +8,6 @@ entry:
ret float %0
}
-; VFP2: vsub.f32 s0, s1, s0
-; NFP1: vsub.f32 d0, d1, d0
-; NFP0: vsub.f32 s0, s1, s0
+; VFP2: vsub.f32 s
+; NFP1: vsub.f32 d
+; NFP0: vsub.f32 s
diff --git a/test/CodeGen/ARM/ifcvt1.ll b/test/CodeGen/ARM/ifcvt1.ll
index cd870bb5d4..fd831442c1 100644
--- a/test/CodeGen/ARM/ifcvt1.ll
+++ b/test/CodeGen/ARM/ifcvt1.ll
@@ -1,17 +1,21 @@
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
+; RUN: llc < %s -march=arm -mcpu=swift | FileCheck %s -check-prefix=SWIFT
define i32 @t1(i32 %a, i32 %b) {
-; CHECK: t1:
+; A8: t1:
+; SWIFT: t1:
%tmp2 = icmp eq i32 %a, 0
br i1 %tmp2, label %cond_false, label %cond_true
cond_true:
-; CHECK: subeq r0, r1, #1
+; A8: subeq r0, r1, #1
+; SWIFT: sub r0, r1, #1
%tmp5 = add i32 %b, 1
ret i32 %tmp5
cond_false:
-; CHECK: addne r0, r1, #1
+; A8: addne r0, r1, #1
+; SWIFT: addne r0, r1, #1
%tmp7 = add i32 %b, -1
ret i32 %tmp7
}
diff --git a/test/CodeGen/ARM/ifcvt12.ll b/test/CodeGen/ARM/ifcvt12.ll
new file mode 100644
index 0000000000..77bdca57e5
--- /dev/null
+++ b/test/CodeGen/ARM/ifcvt12.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -mcpu=cortex-a8 | FileCheck %s
+define i32 @f1(i32 %a, i32 %b, i32 %c) {
+; CHECK: f1:
+; CHECK: mlsne r0, r0, r1, r2
+ %tmp1 = icmp eq i32 %a, 0
+ br i1 %tmp1, label %cond_false, label %cond_true
+
+cond_true:
+ %tmp2 = mul i32 %a, %b
+ %tmp3 = sub i32 %c, %tmp2
+ ret i32 %tmp3
+
+cond_false:
+ ret i32 %a
+}
diff --git a/test/CodeGen/ARM/ifcvt5.ll b/test/CodeGen/ARM/ifcvt5.ll
index 95f5c97f2a..5081791bc2 100644
--- a/test/CodeGen/ARM/ifcvt5.ll
+++ b/test/CodeGen/ARM/ifcvt5.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-apple-ios -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
+; RUN: llc < %s -mtriple=armv7-apple-ios -mcpu=swift | FileCheck %s -check-prefix=SWIFT
+; rdar://8402126
@x = external global i32* ; <i32**> [#uses=1]
@@ -10,8 +12,12 @@ entry:
}
define i32 @t1(i32 %a, i32 %b) {
-; CHECK: t1:
-; CHECK: poplt {r7, pc}
+; A8: t1:
+; A8: poplt {r7, pc}
+
+; SWIFT: t1:
+; SWIFT: pop {r7, pc}
+; SWIFT: pop {r7, pc}
entry:
%tmp1 = icmp sgt i32 %a, 10 ; <i1> [#uses=1]
br i1 %tmp1, label %cond_true, label %UnifiedReturnBlock
diff --git a/test/CodeGen/ARM/ldr_post.ll b/test/CodeGen/ARM/ldr_post.ll
index 8ddf025dbf..a6ca434483 100644
--- a/test/CodeGen/ARM/ldr_post.ll
+++ b/test/CodeGen/ARM/ldr_post.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s -march=arm | FileCheck %s
+; RUN: llc < %s -march=arm -mcpu=swift | FileCheck %s
; CHECK: test1:
; CHECK: ldr {{.*, \[.*]}}, -r2
diff --git a/test/CodeGen/ARM/ldr_pre.ll b/test/CodeGen/ARM/ldr_pre.ll
index e904e5fd2c..6c40ad7326 100644
--- a/test/CodeGen/ARM/ldr_pre.ll
+++ b/test/CodeGen/ARM/ldr_pre.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s -march=arm | FileCheck %s
+; RUN: llc < %s -march=arm -mcpu=swift | FileCheck %s
; CHECK: test1:
; CHECK: ldr {{.*!}}
diff --git a/test/CodeGen/ARM/mls.ll b/test/CodeGen/ARM/mls.ll
index a6cdba4454..066bf98de6 100644
--- a/test/CodeGen/ARM/mls.ll
+++ b/test/CodeGen/ARM/mls.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s -march=arm -mattr=+v6t2 | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+v6t2 -arm-use-mulops=false | FileCheck %s -check-prefix=NO_MULOPS
define i32 @f1(i32 %a, i32 %b, i32 %c) {
%tmp1 = mul i32 %a, %b
@@ -13,4 +14,15 @@ define i32 @f2(i32 %a, i32 %b, i32 %c) {
ret i32 %tmp2
}
+; CHECK: f1:
; CHECK: mls r0, r0, r1, r2
+; NO_MULOPS: f1:
+; NO_MULOPS: mul r0, r0, r1
+; NO_MULOPS-NEXT: sub r0, r2, r0
+
+; CHECK: f2:
+; CHECK: mul r0, r0, r1
+; CHECK-NEXT: sub r0, r0, r2
+; NO_MULOPS: f2:
+; NO_MULOPS: mul r0, r0, r1
+; NO_MULOPS-NEXT: sub r0, r0, r2
diff --git a/test/CodeGen/ARM/neon-fma.ll b/test/CodeGen/ARM/neon-fma.ll
new file mode 100644
index 0000000000..d2cca5009d
--- /dev/null
+++ b/test/CodeGen/ARM/neon-fma.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -mcpu=swift | FileCheck %s
+
+; CHECK: test_v2f32
+; CHECK: vfma.f32 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+
+define <2 x float> @test_v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone ssp {
+entry:
+ %call = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone
+ ret <2 x float> %call
+}
+
+; CHECK: test_v4f32
+; CHECK: vfma.f32 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
+
+define <4 x float> @test_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) nounwind readnone ssp {
+entry:
+ %call = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) nounwind readnone
+ ret <4 x float> %call
+}
+
+declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/neon_ld2.ll b/test/CodeGen/ARM/neon_ld2.ll
index 630db93035..497619ed74 100644
--- a/test/CodeGen/ARM/neon_ld2.ll
+++ b/test/CodeGen/ARM/neon_ld2.ll
@@ -1,10 +1,16 @@
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc < %s -march=arm -mcpu=swift | FileCheck %s --check-prefix=SWIFT
; CHECK: t1
; CHECK: vld1.64
; CHECK: vld1.64
; CHECK: vadd.i64 q
; CHECK: vst1.64
+; SWIFT: t1
+; SWIFT: vld1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+, :128\]}}
+; SWIFT: vld1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+, :128\]}}
+; SWIFT: vadd.i64 q
+; SWIFT: vst1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+, :128\]}}
define void @t1(<4 x i32>* %r, <2 x i64>* %a, <2 x i64>* %b) nounwind {
entry:
%0 = load <2 x i64>* %a, align 16 ; <<2 x i64>> [#uses=1]
@@ -21,6 +27,12 @@ entry:
; CHECK: vsub.i64 q
; CHECK: vmov r0, r1, d
; CHECK: vmov r2, r3, d
+; SWIFT: t2
+; SWIFT: vld1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+, :128\]}}
+; SWIFT: vld1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+, :128\]}}
+; SWIFT: vsub.i64 q
+; SWIFT: vmov r0, r1, d
+; SWIFT: vmov r2, r3, d
define <4 x i32> @t2(<2 x i64>* %a, <2 x i64>* %b) nounwind readonly {
entry:
%0 = load <2 x i64>* %a, align 16 ; <<2 x i64>> [#uses=1]
@@ -30,3 +42,18 @@ entry:
ret <4 x i32> %3
}
+; Limited alignment.
+; SWIFT: t3
+; SWIFT: vld1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+}}
+; SWIFT: vld1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+}}
+; SWIFT: vadd.i64 q
+; SWIFT: vst1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+}}
+define void @t3(<4 x i32>* %r, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+entry:
+ %0 = load <2 x i64>* %a, align 8
+ %1 = load <2 x i64>* %b, align 8
+ %2 = add <2 x i64> %0, %1
+ %3 = bitcast <2 x i64> %2 to <4 x i32>
+ store <4 x i32> %3, <4 x i32>* %r, align 8
+ ret void
+}
diff --git a/test/CodeGen/ARM/opt-shuff-tstore.ll b/test/CodeGen/ARM/opt-shuff-tstore.ll
index df98e231cc..74c9a21355 100644
--- a/test/CodeGen/ARM/opt-shuff-tstore.ll
+++ b/test/CodeGen/ARM/opt-shuff-tstore.ll
@@ -2,7 +2,7 @@
; CHECK: func_4_8
; CHECK: vst1.32
-; CHECK-NEXT: bx lr
+; CHECK: bx lr
define void @func_4_8(<4 x i8> %param, <4 x i8>* %p) {
%r = add <4 x i8> %param, <i8 1, i8 2, i8 3, i8 4>
store <4 x i8> %r, <4 x i8>* %p
@@ -11,7 +11,7 @@ define void @func_4_8(<4 x i8> %param, <4 x i8>* %p) {
; CHECK: func_2_16
; CHECK: vst1.32
-; CHECK-NEXT: bx lr
+; CHECK: bx lr
define void @func_2_16(<2 x i16> %param, <2 x i16>* %p) {
%r = add <2 x i16> %param, <i16 1, i16 2>
store <2 x i16> %r, <2 x i16>* %p
diff --git a/test/CodeGen/ARM/reg_sequence.ll b/test/CodeGen/ARM/reg_sequence.ll
index 206b96cd07..6d6586e4f2 100644
--- a/test/CodeGen/ARM/reg_sequence.ll
+++ b/test/CodeGen/ARM/reg_sequence.ll
@@ -124,7 +124,6 @@ return1:
return2:
; CHECK: %return2
; CHECK: vadd.i32
-; CHECK: vorr {{q[0-9]+}}, {{q[0-9]+}}
; CHECK-NOT: vmov
; CHECK: vst2.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
%tmp100 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0 ; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/ARM/subreg-remat.ll b/test/CodeGen/ARM/subreg-remat.ll
index 03ae12c6de..455bfce0f2 100644
--- a/test/CodeGen/ARM/subreg-remat.ll
+++ b/test/CodeGen/ARM/subreg-remat.ll
@@ -4,14 +4,14 @@ target triple = "thumbv7-apple-ios"
;
; The vector %v2 is built like this:
;
-; %vreg6:ssub_1<def> = VMOVSR %vreg0<kill>, pred:14, pred:%noreg, %vreg6<imp-def>; DPR_VFP2:%vreg6 GPR:%vreg0
+; %vreg6:ssub_1<def> = ...
; %vreg6:ssub_0<def> = VLDRS <cp#0>, 0, pred:14, pred:%noreg; mem:LD4[ConstantPool] DPR_VFP2:%vreg6
;
; When %vreg6 spills, the VLDRS constant pool load cannot be rematerialized
; since it implicitly reads the ssub_1 sub-register.
;
; CHECK: f1
-; CHECK: vmov s1, r0
+; CHECK: vmov d0, r0, r0
; CHECK: vldr s0, LCPI
; The vector must be spilled:
; CHECK: vstr d0,
diff --git a/test/CodeGen/Mips/dsp-r1.ll b/test/CodeGen/Mips/dsp-r1.ll
new file mode 100644
index 0000000000..c9dc8cfd0b
--- /dev/null
+++ b/test/CodeGen/Mips/dsp-r1.ll
@@ -0,0 +1,1241 @@
+; RUN: llc -march=mipsel -mattr=+dsp < %s | FileCheck %s
+
+define i32 @test__builtin_mips_extr_w1(i32 %i0, i32, i64 %a0) nounwind {
+entry:
+; CHECK: extr.w
+
+ %1 = tail call i32 @llvm.mips.extr.w(i64 %a0, i32 15)
+ ret i32 %1
+}
+
+declare i32 @llvm.mips.extr.w(i64, i32) nounwind
+
+define i32 @test__builtin_mips_extr_w2(i32 %i0, i32, i64 %a0, i32 %a1) nounwind {
+entry:
+; CHECK: extrv.w
+
+ %1 = tail call i32 @llvm.mips.extr.w(i64 %a0, i32 %a1)
+ ret i32 %1
+}
+
+define i32 @test__builtin_mips_extr_r_w1(i32 %i0, i32, i64 %a0) nounwind {
+entry:
+; CHECK: extr_r.w
+
+ %1 = tail call i32 @llvm.mips.extr.r.w(i64 %a0, i32 15)
+ ret i32 %1
+}
+
+declare i32 @llvm.mips.extr.r.w(i64, i32) nounwind
+
+define i32 @test__builtin_mips_extr_s_h1(i32 %i0, i32, i64 %a0, i32 %a1) nounwind {
+entry:
+; CHECK: extrv_s.h
+
+ %1 = tail call i32 @llvm.mips.extr.s.h(i64 %a0, i32 %a1)
+ ret i32 %1
+}
+
+declare i32 @llvm.mips.extr.s.h(i64, i32) nounwind
+
+define i32 @test__builtin_mips_extr_rs_w1(i32 %i0, i32, i64 %a0) nounwind {
+entry:
+; CHECK: extr_rs.w
+
+ %1 = tail call i32 @llvm.mips.extr.rs.w(i64 %a0, i32 15)
+ ret i32 %1
+}
+
+declare i32 @llvm.mips.extr.rs.w(i64, i32) nounwind
+
+define i32 @test__builtin_mips_extr_rs_w2(i32 %i0, i32, i64 %a0, i32 %a1) nounwind {
+entry:
+; CHECK: extrv_rs.w
+
+ %1 = tail call i32 @llvm.mips.extr.rs.w(i64 %a0, i32 %a1)
+ ret i32 %1
+}
+
+define i32 @test__builtin_mips_extr_s_h2(i32 %i0, i32, i64 %a0) nounwind {
+entry:
+; CHECK: extr_s.h
+
+ %1 = tail call i32 @llvm.mips.extr.s.h(i64 %a0, i32 15)
+ ret i32 %1
+}
+
+define i32 @test__builtin_mips_extr_r_w2(i32 %i0, i32, i64 %a0, i32 %a1) nounwind {
+entry:
+; CHECK: extrv_r.w
+
+ %1 = tail call i32 @llvm.mips.extr.r.w(i64 %a0, i32 %a1)
+ ret i32 %1
+}
+
+define i32 @test__builtin_mips_extp1(i32 %i0, i32, i64 %a0) nounwind {
+entry:
+; CHECK: extp ${{[0-9]+}}
+
+ %1 = tail call i32 @llvm.mips.extp(i64 %a0, i32 15)
+ ret i32 %1
+}
+
+declare i32 @llvm.mips.extp(i64, i32) nounwind
+
+define i32 @test__builtin_mips_extp2(i32 %i0, i32, i64 %a0, i32 %a1) nounwind {
+entry:
+; CHECK: extpv
+
+ %1 = tail call i32 @llvm.mips.extp(i64 %a0, i32 %a1)
+ ret i32 %1
+}
+
+define i32 @test__builtin_mips_extpdp1(i32 %i0, i32, i64 %a0) nounwind {
+entry:
+; CHECK: extpdp ${{[0-9]+}}
+
+ %1 = tail call i32 @llvm.mips.extpdp(i64 %a0, i32 15)
+ ret i32 %1
+}
+
+declare i32 @llvm.mips.extpdp(i64, i32) nounwind
+
+define i32 @test__builtin_mips_extpdp2(i32 %i0, i32, i64 %a0, i32 %a1) nounwind {
+entry:
+; CHECK: extpdpv
+
+ %1 = tail call i32 @llvm.mips.extpdp(i64 %a0, i32 %a1)
+ ret i32 %1
+}
+
+define i64 @test__builtin_mips_dpau_h_qbl1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind readnone {
+entry:
+; CHECK: dpau.h.qbl
+
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %2 = bitcast i32 %a2.coerce to <4 x i8>
+ %3 = tail call i64 @llvm.mips.dpau.h.qbl(i64 %a0, <4 x i8> %1, <4 x i8> %2)
+ ret i64 %3
+}
+
+declare i64 @llvm.mips.dpau.h.qbl(i64, <4 x i8>, <4 x i8>) nounwind readnone
+
+define i64 @test__builtin_mips_dpau_h_qbr1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind readnone {
+entry:
+; CHECK: dpau.h.qbr
+
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %2 = bitcast i32 %a2.coerce to <4 x i8>
+ %3 = tail call i64 @llvm.mips.dpau.h.qbr(i64 %a0, <4 x i8> %1, <4 x i8> %2)
+ ret i64 %3
+}
+
+declare i64 @llvm.mips.dpau.h.qbr(i64, <4 x i8>, <4 x i8>) nounwind readnone
+
+define i64 @test__builtin_mips_dpsu_h_qbl1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind readnone {
+entry:
+; CHECK: dpsu.h.qbl
+
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %2 = bitcast i32 %a2.coerce to <4 x i8>
+ %3 = tail call i64 @llvm.mips.dpsu.h.qbl(i64 %a0, <4 x i8> %1, <4 x i8> %2)
+ ret i64 %3
+}
+
+declare i64 @llvm.mips.dpsu.h.qbl(i64, <4 x i8>, <4 x i8>) nounwind readnone
+
+define i64 @test__builtin_mips_dpsu_h_qbr1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind readnone {
+entry:
+; CHECK: dpsu.h.qbr
+
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %2 = bitcast i32 %a2.coerce to <4 x i8>
+ %3 = tail call i64 @llvm.mips.dpsu.h.qbr(i64 %a0, <4 x i8> %1, <4 x i8> %2)
+ ret i64 %3
+}
+
+declare i64 @llvm.mips.dpsu.h.qbr(i64, <4 x i8>, <4 x i8>) nounwind readnone
+
+define i64 @test__builtin_mips_dpaq_s_w_ph1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind {
+entry:
+; CHECK: dpaq_s.w.ph
+
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = bitcast i32 %a2.coerce to <2 x i16>
+ %3 = tail call i64 @llvm.mips.dpaq.s.w.ph(i64 %a0, <2 x i16> %1, <2 x i16> %2)
+ ret i64 %3
+}
+
+declare i64 @llvm.mips.dpaq.s.w.ph(i64, <2 x i16>, <2 x i16>) nounwind
+
+define i64 @test__builtin_mips_dpaq_sa_l_w1(i32 %i0, i32, i64 %a0, i32 %a1, i32 %a2) nounwind {
+entry:
+; CHECK: dpaq_sa.l.w
+
+ %1 = tail call i64 @llvm.mips.dpaq.sa.l.w(i64 %a0, i32 %a1, i32 %a2)
+ ret i64 %1
+}
+
+declare i64 @llvm.mips.dpaq.sa.l.w(i64, i32, i32) nounwind
+
+define i64 @test__builtin_mips_dpsq_s_w_ph1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind {
+entry:
+; CHECK: dpsq_s.w.ph
+
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = bitcast i32 %a2.coerce to <2 x i16>
+ %3 = tail call i64 @llvm.mips.dpsq.s.w.ph(i64 %a0, <2 x i16> %1, <2 x i16> %2)
+ ret i64 %3
+}
+
+declare i64 @llvm.mips.dpsq.s.w.ph(i64, <2 x i16>, <2 x i16>) nounwind
+
+define i64 @test__builtin_mips_dpsq_sa_l_w1(i32 %i0, i32, i64 %a0, i32 %a1, i32 %a2) nounwind {
+entry:
+; CHECK: dpsq_sa.l.w
+
+ %1 = tail call i64 @llvm.mips.dpsq.sa.l.w(i64 %a0, i32 %a1, i32 %a2)
+ ret i64 %1
+}
+
+declare i64 @llvm.mips.dpsq.sa.l.w(i64, i32, i32) nounwind
+
+define i64 @test__builtin_mips_mulsaq_s_w_ph1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind {
+entry:
+; CHECK: mulsaq_s.w.ph
+
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = bitcast i32 %a2.coerce to <2 x i16>
+ %3 = tail call i64 @llvm.mips.mulsaq.s.w.ph(i64 %a0, <2 x i16> %1, <2 x i16> %2)
+ ret i64 %3
+}
+
+declare i64 @llvm.mips.mulsaq.s.w.ph(i64, <2 x i16>, <2 x i16>) nounwind
+
+define i64 @test__builtin_mips_maq_s_w_phl1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind {
+entry:
+; CHECK: maq_s.w.phl
+
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = bitcast i32 %a2.coerce to <2 x i16>
+ %3 = tail call i64 @llvm.mips.maq.s.w.phl(i64 %a0, <2 x i16> %1, <2 x i16> %2)
+ ret i64 %3
+}
+
+declare i64 @llvm.mips.maq.s.w.phl(i64, <2 x i16>, <2 x i16>) nounwind
+
+define i64 @test__builtin_mips_maq_s_w_phr1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind {
+entry:
+; CHECK: maq_s.w.phr
+
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = bitcast i32 %a2.coerce to <2 x i16>
+ %3 = tail call i64 @llvm.mips.maq.s.w.phr(i64 %a0, <2 x i16> %1, <2 x i16> %2)
+ ret i64 %3
+}
+
+declare i64 @llvm.mips.maq.s.w.phr(i64, <2 x i16>, <2 x i16>) nounwind
+
+define i64 @test__builtin_mips_maq_sa_w_phl1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind {
+entry:
+; CHECK: maq_sa.w.phl
+
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = bitcast i32 %a2.coerce to <2 x i16>
+ %3 = tail call i64 @llvm.mips.maq.sa.w.phl(i64 %a0, <2 x i16> %1, <2 x i16> %2)
+ ret i64 %3
+}
+
+declare i64 @llvm.mips.maq.sa.w.phl(i64, <2 x i16>, <2 x i16>) nounwind
+
+define i64 @test__builtin_mips_maq_sa_w_phr1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind {
+entry:
+; CHECK: maq_sa.w.phr
+
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = bitcast i32 %a2.coerce to <2 x i16>
+ %3 = tail call i64 @llvm.mips.maq.sa.w.phr(i64 %a0, <2 x i16> %1, <2 x i16> %2)
+ ret i64 %3
+}
+
+declare i64 @llvm.mips.maq.sa.w.phr(i64, <2 x i16>, <2 x i16>) nounwind
+
+define i64 @test__builtin_mips_shilo1(i32 %i0, i32, i64 %a0) nounwind readnone {
+entry:
+; CHECK: shilo $ac{{[0-9]}}
+
+ %1 = tail call i64 @llvm.mips.shilo(i64 %a0, i32 0)
+ ret i64 %1
+}
+
+declare i64 @llvm.mips.shilo(i64, i32) nounwind readnone
+
+define i64 @test__builtin_mips_shilo2(i32 %i0, i32, i64 %a0, i32 %a1) nounwind readnone {
+entry:
+; CHECK: shilov
+
+ %1 = tail call i64 @llvm.mips.shilo(i64 %a0, i32 %a1)
+ ret i64 %1
+}
+
+define i64 @test__builtin_mips_mthlip1(i32 %i0, i32, i64 %a0, i32 %a1) nounwind {
+entry:
+; CHECK: mthlip ${{[0-9]+}}
+
+ %1 = tail call i64 @llvm.mips.mthlip(i64 %a0, i32 %a1)
+ ret i64 %1
+}
+
+declare i64 @llvm.mips.mthlip(i64, i32) nounwind
+
+define i32 @test__builtin_mips_bposge321(i32 %i0) nounwind readonly {
+entry:
+; CHECK: bposge32 $BB{{[0-9]+}}
+
+ %0 = tail call i32 @llvm.mips.bposge32()
+ ret i32 %0
+}
+
+declare i32 @llvm.mips.bposge32() nounwind readonly
+
+define i64 @test__builtin_mips_madd1(i32 %i0, i32, i64 %a0, i32 %a1, i32 %a2) nounwind readnone {
+entry:
+; CHECK: madd $ac{{[0-9]}}
+
+ %1 = tail call i64 @llvm.mips.madd(i64 %a0, i32 %a1, i32 %a2)
+ ret i64 %1
+}
+
+declare i64 @llvm.mips.madd(i64, i32, i32) nounwind readnone
+
+define i64 @test__builtin_mips_maddu1(i32 %i0, i32, i64 %a0, i32 %a1, i32 %a2) nounwind readnone {
+entry:
+; CHECK: maddu $ac{{[0-9]}}
+
+ %1 = tail call i64 @llvm.mips.maddu(i64 %a0, i32 %a1, i32 %a2)
+ ret i64 %1
+}
+
+declare i64 @llvm.mips.maddu(i64, i32, i32) nounwind readnone
+
+define i64 @test__builtin_mips_msub1(i32 %i0, i32, i64 %a0, i32 %a1, i32 %a2) nounwind readnone {
+entry:
+; CHECK: msub $ac{{[0-9]}}
+
+ %1 = tail call i64 @llvm.mips.msub(i64 %a0, i32 %a1, i32 %a2)
+ ret i64 %1
+}
+
+declare i64 @llvm.mips.msub(i64, i32, i32) nounwind readnone
+
+define i64 @test__builtin_mips_msubu1(i32 %i0, i32, i64 %a0, i32 %a1, i32 %a2) nounwind readnone {
+entry:
+; CHECK: msubu $ac{{[0-9]}}
+
+ %1 = tail call i64 @llvm.mips.msubu(i64 %a0, i32 %a1, i32 %a2)
+ ret i64 %1
+}
+
+declare i64 @llvm.mips.msubu(i64, i32, i32) nounwind readnone
+
+define i64 @test__builtin_mips_mult1(i32 %i0, i32 %a0, i32 %a1) nounwind readnone {
+entry:
+; CHECK: mult $ac{{[0-9]}}
+
+ %0 = tail call i64 @llvm.mips.mult(i32 %a0, i32 %a1)
+ ret i64 %0
+}
+
+declare i64 @llvm.mips.mult(i32, i32) nounwind readnone
+
+define i64 @test__builtin_mips_multu1(i32 %i0, i32 %a0, i32 %a1) nounwind readnone {
+entry:
+; CHECK: multu $ac{{[0-9]}}
+
+ %0 = tail call i64 @llvm.mips.multu(i32 %a0, i32 %a1)
+ ret i64 %0
+}
+
+declare i64 @llvm.mips.multu(i32, i32) nounwind readnone
+
+define { i32 } @test__builtin_mips_addq_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: addq.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call <2 x i16> @llvm.mips.addq.ph(<2 x i16> %0, <2 x i16> %1)
+ %3 = bitcast <2 x i16> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.addq.ph(<2 x i16>, <2 x i16>) nounwind
+
+define { i32 } @test__builtin_mips_addq_s_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: addq_s.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call <2 x i16> @llvm.mips.addq.s.ph(<2 x i16> %0, <2 x i16> %1)
+ %3 = bitcast <2 x i16> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.addq.s.ph(<2 x i16>, <2 x i16>) nounwind
+
+define i32 @test__builtin_mips_addq_s_w1(i32 %i0, i32 %a0, i32 %a1) nounwind {
+entry:
+; CHECK: addq_s.w
+
+ %0 = tail call i32 @llvm.mips.addq.s.w(i32 %a0, i32 %a1)
+ ret i32 %0
+}
+
+declare i32 @llvm.mips.addq.s.w(i32, i32) nounwind
+
+define { i32 } @test__builtin_mips_addu_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: addu.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %2 = tail call <4 x i8> @llvm.mips.addu.qb(<4 x i8> %0, <4 x i8> %1)
+ %3 = bitcast <4 x i8> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <4 x i8> @llvm.mips.addu.qb(<4 x i8>, <4 x i8>) nounwind
+
+define { i32 } @test__builtin_mips_addu_s_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: addu_s.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %2 = tail call <4 x i8> @llvm.mips.addu.s.qb(<4 x i8> %0, <4 x i8> %1)
+ %3 = bitcast <4 x i8> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <4 x i8> @llvm.mips.addu.s.qb(<4 x i8>, <4 x i8>) nounwind
+
+define { i32 } @test__builtin_mips_subq_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: subq.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call <2 x i16> @llvm.mips.subq.ph(<2 x i16> %0, <2 x i16> %1)
+ %3 = bitcast <2 x i16> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.subq.ph(<2 x i16>, <2 x i16>) nounwind
+
+define { i32 } @test__builtin_mips_subq_s_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: subq_s.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call <2 x i16> @llvm.mips.subq.s.ph(<2 x i16> %0, <2 x i16> %1)
+ %3 = bitcast <2 x i16> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.subq.s.ph(<2 x i16>, <2 x i16>) nounwind
+
+define i32 @test__builtin_mips_subq_s_w1(i32 %i0, i32 %a0, i32 %a1) nounwind {
+entry:
+; CHECK: subq_s.w
+
+ %0 = tail call i32 @llvm.mips.subq.s.w(i32 %a0, i32 %a1)
+ ret i32 %0
+}
+
+declare i32 @llvm.mips.subq.s.w(i32, i32) nounwind
+
+define { i32 } @test__builtin_mips_subu_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: subu.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %2 = tail call <4 x i8> @llvm.mips.subu.qb(<4 x i8> %0, <4 x i8> %1)
+ %3 = bitcast <4 x i8> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <4 x i8> @llvm.mips.subu.qb(<4 x i8>, <4 x i8>) nounwind
+
+define { i32 } @test__builtin_mips_subu_s_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: subu_s.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %2 = tail call <4 x i8> @llvm.mips.subu.s.qb(<4 x i8> %0, <4 x i8> %1)
+ %3 = bitcast <4 x i8> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <4 x i8> @llvm.mips.subu.s.qb(<4 x i8>, <4 x i8>) nounwind
+
+define i32 @test__builtin_mips_addsc1(i32 %i0, i32 %a0, i32 %a1) nounwind {
+entry:
+; CHECK: addsc ${{[0-9]+}}
+
+ %0 = tail call i32 @llvm.mips.addsc(i32 %a0, i32 %a1)
+ ret i32 %0
+}
+
+declare i32 @llvm.mips.addsc(i32, i32) nounwind
+
+define i32 @test__builtin_mips_addwc1(i32 %i0, i32 %a0, i32 %a1) nounwind {
+entry:
+; CHECK: addwc ${{[0-9]+}}
+
+ %0 = tail call i32 @llvm.mips.addwc(i32 %a0, i32 %a1)
+ ret i32 %0
+}
+
+declare i32 @llvm.mips.addwc(i32, i32) nounwind
+
+define i32 @test__builtin_mips_modsub1(i32 %i0, i32 %a0, i32 %a1) nounwind readnone {
+entry:
+; CHECK: modsub ${{[0-9]+}}
+
+ %0 = tail call i32 @llvm.mips.modsub(i32 %a0, i32 %a1)
+ ret i32 %0
+}
+
+declare i32 @llvm.mips.modsub(i32, i32) nounwind readnone
+
+define i32 @test__builtin_mips_raddu_w_qb1(i32 %i0, i32 %a0.coerce) nounwind readnone {
+entry:
+; CHECK: raddu.w.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = tail call i32 @llvm.mips.raddu.w.qb(<4 x i8> %0)
+ ret i32 %1
+}
+
+declare i32 @llvm.mips.raddu.w.qb(<4 x i8>) nounwind readnone
+
+define { i32 } @test__builtin_mips_muleu_s_ph_qbl1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: muleu_s.ph.qbl
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call <2 x i16> @llvm.mips.muleu.s.ph.qbl(<4 x i8> %0, <2 x i16> %1)
+ %3 = bitcast <2 x i16> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.muleu.s.ph.qbl(<4 x i8>, <2 x i16>) nounwind
+
+define { i32 } @test__builtin_mips_muleu_s_ph_qbr1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: muleu_s.ph.qbr
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call <2 x i16> @llvm.mips.muleu.s.ph.qbr(<4 x i8> %0, <2 x i16> %1)
+ %3 = bitcast <2 x i16> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.muleu.s.ph.qbr(<4 x i8>, <2 x i16>) nounwind
+
+define { i32 } @test__builtin_mips_mulq_rs_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: mulq_rs.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call <2 x i16> @llvm.mips.mulq.rs.ph(<2 x i16> %0, <2 x i16> %1)
+ %3 = bitcast <2 x i16> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.mulq.rs.ph(<2 x i16>, <2 x i16>) nounwind
+
+define i32 @test__builtin_mips_muleq_s_w_phl1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: muleq_s.w.phl
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call i32 @llvm.mips.muleq.s.w.phl(<2 x i16> %0, <2 x i16> %1)
+ ret i32 %2
+}
+
+declare i32 @llvm.mips.muleq.s.w.phl(<2 x i16>, <2 x i16>) nounwind
+
+define i32 @test__builtin_mips_muleq_s_w_phr1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: muleq_s.w.phr
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call i32 @llvm.mips.muleq.s.w.phr(<2 x i16> %0, <2 x i16> %1)
+ ret i32 %2
+}
+
+declare i32 @llvm.mips.muleq.s.w.phr(<2 x i16>, <2 x i16>) nounwind
+
+define { i32 } @test__builtin_mips_precrq_qb_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind readnone {
+entry:
+; CHECK: precrq.qb.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call <4 x i8> @llvm.mips.precrq.qb.ph(<2 x i16> %0, <2 x i16> %1)
+ %3 = bitcast <4 x i8> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <4 x i8> @llvm.mips.precrq.qb.ph(<2 x i16>, <2 x i16>) nounwind readnone
+
+define { i32 } @test__builtin_mips_precrq_ph_w1(i32 %i0, i32 %a0, i32 %a1) nounwind readnone {
+entry:
+; CHECK: precrq.ph.w
+
+ %0 = tail call <2 x i16> @llvm.mips.precrq.ph.w(i32 %a0, i32 %a1)
+ %1 = bitcast <2 x i16> %0 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.precrq.ph.w(i32, i32) nounwind readnone
+
+define { i32 } @test__builtin_mips_precrq_rs_ph_w1(i32 %i0, i32 %a0, i32 %a1) nounwind {
+entry:
+; CHECK: precrq_rs.ph.w
+
+ %0 = tail call <2 x i16> @llvm.mips.precrq.rs.ph.w(i32 %a0, i32 %a1)
+ %1 = bitcast <2 x i16> %0 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.precrq.rs.ph.w(i32, i32) nounwind
+
+define { i32 } @test__builtin_mips_precrqu_s_qb_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: precrqu_s.qb.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call <4 x i8> @llvm.mips.precrqu.s.qb.ph(<2 x i16> %0, <2 x i16> %1)
+ %3 = bitcast <4 x i8> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <4 x i8> @llvm.mips.precrqu.s.qb.ph(<2 x i16>, <2 x i16>) nounwind
+
+
+define i32 @test__builtin_mips_cmpu_eq_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: cmpu.eq.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ tail call void @llvm.mips.cmpu.eq.qb(<4 x i8> %0, <4 x i8> %1)
+ %2 = tail call i32 @llvm.mips.rddsp(i32 31)
+ ret i32 %2
+}
+
+declare void @llvm.mips.cmpu.eq.qb(<4 x i8>, <4 x i8>) nounwind
+
+declare i32 @llvm.mips.rddsp(i32) nounwind readonly
+
+define i32 @test__builtin_mips_cmpu_lt_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: cmpu.lt.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ tail call void @llvm.mips.cmpu.lt.qb(<4 x i8> %0, <4 x i8> %1)
+ %2 = tail call i32 @llvm.mips.rddsp(i32 31)
+ ret i32 %2
+}
+
+declare void @llvm.mips.cmpu.lt.qb(<4 x i8>, <4 x i8>) nounwind
+
+define i32 @test__builtin_mips_cmpu_le_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: cmpu.le.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ tail call void @llvm.mips.cmpu.le.qb(<4 x i8> %0, <4 x i8> %1)
+ %2 = tail call i32 @llvm.mips.rddsp(i32 31)
+ ret i32 %2
+}
+
+declare void @llvm.mips.cmpu.le.qb(<4 x i8>, <4 x i8>) nounwind
+
+define i32 @test__builtin_mips_cmpgu_eq_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: cmpgu.eq.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %2 = tail call i32 @llvm.mips.cmpgu.eq.qb(<4 x i8> %0, <4 x i8> %1)
+ ret i32 %2
+}
+
+declare i32 @llvm.mips.cmpgu.eq.qb(<4 x i8>, <4 x i8>) nounwind
+
+define i32 @test__builtin_mips_cmpgu_lt_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: cmpgu.lt.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %2 = tail call i32 @llvm.mips.cmpgu.lt.qb(<4 x i8> %0, <4 x i8> %1)
+ ret i32 %2
+}
+
+declare i32 @llvm.mips.cmpgu.lt.qb(<4 x i8>, <4 x i8>) nounwind
+
+define i32 @test__builtin_mips_cmpgu_le_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: cmpgu.le.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %2 = tail call i32 @llvm.mips.cmpgu.le.qb(<4 x i8> %0, <4 x i8> %1)
+ ret i32 %2
+}
+
+declare i32 @llvm.mips.cmpgu.le.qb(<4 x i8>, <4 x i8>) nounwind
+
+define i32 @test__builtin_mips_cmp_eq_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: cmp.eq.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ tail call void @llvm.mips.cmp.eq.ph(<2 x i16> %0, <2 x i16> %1)
+ %2 = tail call i32 @llvm.mips.rddsp(i32 31)
+ ret i32 %2
+}
+
+declare void @llvm.mips.cmp.eq.ph(<2 x i16>, <2 x i16>) nounwind
+
+define i32 @test__builtin_mips_cmp_lt_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: cmp.lt.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ tail call void @llvm.mips.cmp.lt.ph(<2 x i16> %0, <2 x i16> %1)
+ %2 = tail call i32 @llvm.mips.rddsp(i32 31)
+ ret i32 %2
+}
+
+declare void @llvm.mips.cmp.lt.ph(<2 x i16>, <2 x i16>) nounwind
+
+define i32 @test__builtin_mips_cmp_le_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: cmp.le.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ tail call void @llvm.mips.cmp.le.ph(<2 x i16> %0, <2 x i16> %1)
+ %2 = tail call i32 @llvm.mips.rddsp(i32 31)
+ ret i32 %2
+}
+
+declare void @llvm.mips.cmp.le.ph(<2 x i16>, <2 x i16>) nounwind
+
+define { i32 } @test__builtin_mips_pick_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind readonly {
+entry:
+; CHECK: pick.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %2 = tail call <4 x i8> @llvm.mips.pick.qb(<4 x i8> %0, <4 x i8> %1)
+ %3 = bitcast <4 x i8> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <4 x i8> @llvm.mips.pick.qb(<4 x i8>, <4 x i8>) nounwind readonly
+
+define { i32 } @test__builtin_mips_pick_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind readonly {
+entry:
+; CHECK: pick.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call <2 x i16> @llvm.mips.pick.ph(<2 x i16> %0, <2 x i16> %1)
+ %3 = bitcast <2 x i16> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.pick.ph(<2 x i16>, <2 x i16>) nounwind readonly
+
+define { i32 } @test__builtin_mips_packrl_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind readnone {
+entry:
+; CHECK: packrl.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call <2 x i16> @llvm.mips.packrl.ph(<2 x i16> %0, <2 x i16> %1)
+ %3 = bitcast <2 x i16> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.packrl.ph(<2 x i16>, <2 x i16>) nounwind readnone
+
+define i32 @test__builtin_mips_rddsp1(i32 %i0) nounwind readonly {
+entry:
+; CHECK: rddsp ${{[0-9]+}}
+
+ %0 = tail call i32 @llvm.mips.rddsp(i32 31)
+ ret i32 %0
+}
+
+define { i32 } @test__builtin_mips_shll_qb1(i32 %i0, i32 %a0.coerce) nounwind {
+entry:
+; CHECK: shll.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = tail call <4 x i8> @llvm.mips.shll.qb(<4 x i8> %0, i32 3)
+ %2 = bitcast <4 x i8> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <4 x i8> @llvm.mips.shll.qb(<4 x i8>, i32) nounwind
+
+define { i32 } @test__builtin_mips_shll_qb2(i32 %i0, i32 %a0.coerce, i32 %a1) nounwind {
+entry:
+; CHECK: shllv.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = tail call <4 x i8> @llvm.mips.shll.qb(<4 x i8> %0, i32 %a1)
+ %2 = bitcast <4 x i8> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+define { i32 } @test__builtin_mips_shll_ph1(i32 %i0, i32 %a0.coerce) nounwind {
+entry:
+; CHECK: shll.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = tail call <2 x i16> @llvm.mips.shll.ph(<2 x i16> %0, i32 7)
+ %2 = bitcast <2 x i16> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.shll.ph(<2 x i16>, i32) nounwind
+
+define { i32 } @test__builtin_mips_shll_ph2(i32 %i0, i32 %a0.coerce, i32 %a1) nounwind {
+entry:
+; CHECK: shllv.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = tail call <2 x i16> @llvm.mips.shll.ph(<2 x i16> %0, i32 %a1)
+ %2 = bitcast <2 x i16> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+define { i32 } @test__builtin_mips_shll_s_ph1(i32 %i0, i32 %a0.coerce) nounwind {
+entry:
+; CHECK: shll_s.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = tail call <2 x i16> @llvm.mips.shll.s.ph(<2 x i16> %0, i32 7)
+ %2 = bitcast <2 x i16> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.shll.s.ph(<2 x i16>, i32) nounwind
+
+define { i32 } @test__builtin_mips_shll_s_ph2(i32 %i0, i32 %a0.coerce, i32 %a1) nounwind {
+entry:
+; CHECK: shllv_s.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = tail call <2 x i16> @llvm.mips.shll.s.ph(<2 x i16> %0, i32 %a1)
+ %2 = bitcast <2 x i16> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+define i32 @test__builtin_mips_shll_s_w1(i32 %i0, i32 %a0) nounwind {
+entry:
+; CHECK: shll_s.w
+
+ %0 = tail call i32 @llvm.mips.shll.s.w(i32 %a0, i32 15)
+ ret i32 %0
+}
+
+declare i32 @llvm.mips.shll.s.w(i32, i32) nounwind
+
+define i32 @test__builtin_mips_shll_s_w2(i32 %i0, i32 %a0, i32 %a1) nounwind {
+entry:
+; CHECK: shllv_s.w
+
+ %0 = tail call i32 @llvm.mips.shll.s.w(i32 %a0, i32 %a1)
+ ret i32 %0
+}
+
+define { i32 } @test__builtin_mips_shrl_qb1(i32 %i0, i32 %a0.coerce) nounwind readnone {
+entry:
+; CHECK: shrl.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = tail call <4 x i8> @llvm.mips.shrl.qb(<4 x i8> %0, i32 3)
+ %2 = bitcast <4 x i8> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <4 x i8> @llvm.mips.shrl.qb(<4 x i8>, i32) nounwind readnone
+
+define { i32 } @test__builtin_mips_shrl_qb2(i32 %i0, i32 %a0.coerce, i32 %a1) nounwind readnone {
+entry:
+; CHECK: shrlv.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = tail call <4 x i8> @llvm.mips.shrl.qb(<4 x i8> %0, i32 %a1)
+ %2 = bitcast <4 x i8> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+define { i32 } @test__builtin_mips_shra_ph1(i32 %i0, i32 %a0.coerce) nounwind readnone {
+entry:
+; CHECK: shra.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = tail call <2 x i16> @llvm.mips.shra.ph(<2 x i16> %0, i32 7)
+ %2 = bitcast <2 x i16> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.shra.ph(<2 x i16>, i32) nounwind readnone
+
+define { i32 } @test__builtin_mips_shra_ph2(i32 %i0, i32 %a0.coerce, i32 %a1) nounwind readnone {
+entry:
+; CHECK: shrav.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = tail call <2 x i16> @llvm.mips.shra.ph(<2 x i16> %0, i32 %a1)
+ %2 = bitcast <2 x i16> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+define { i32 } @test__builtin_mips_shra_r_ph1(i32 %i0, i32 %a0.coerce) nounwind readnone {
+entry:
+; CHECK: shra_r.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = tail call <2 x i16> @llvm.mips.shra.r.ph(<2 x i16> %0, i32 7)
+ %2 = bitcast <2 x i16> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.shra.r.ph(<2 x i16>, i32) nounwind readnone
+
+define { i32 } @test__builtin_mips_shra_r_ph2(i32 %i0, i32 %a0.coerce, i32 %a1) nounwind readnone {
+entry:
+; CHECK: shrav_r.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = tail call <2 x i16> @llvm.mips.shra.r.ph(<2 x i16> %0, i32 %a1)
+ %2 = bitcast <2 x i16> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+define i32 @test__builtin_mips_shra_r_w1(i32 %i0, i32 %a0) nounwind readnone {
+entry:
+; CHECK: shra_r.w
+
+ %0 = tail call i32 @llvm.mips.shra.r.w(i32 %a0, i32 15)
+ ret i32 %0
+}
+
+declare i32 @llvm.mips.shra.r.w(i32, i32) nounwind readnone
+
+define i32 @test__builtin_mips_shra_r_w2(i32 %i0, i32 %a0, i32 %a1) nounwind readnone {
+entry:
+; CHECK: shrav_r.w
+
+ %0 = tail call i32 @llvm.mips.shra.r.w(i32 %a0, i32 %a1)
+ ret i32 %0
+}
+
+define { i32 } @test__builtin_mips_absq_s_ph1(i32 %i0, i32 %a0.coerce) nounwind {
+entry:
+; CHECK: absq_s.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = tail call <2 x i16> @llvm.mips.absq.s.ph(<2 x i16> %0)
+ %2 = bitcast <2 x i16> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.absq.s.ph(<2 x i16>) nounwind
+
+define i32 @test__builtin_mips_absq_s_w1(i32 %i0, i32 %a0) nounwind {
+entry:
+; CHECK: absq_s.w
+
+ %0 = tail call i32 @llvm.mips.absq.s.w(i32 %a0)
+ ret i32 %0
+}
+
+declare i32 @llvm.mips.absq.s.w(i32) nounwind
+
+define i32 @test__builtin_mips_preceq_w_phl1(i32 %i0, i32 %a0.coerce) nounwind readnone {
+entry:
+; CHECK: preceq.w.phl
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = tail call i32 @llvm.mips.preceq.w.phl(<2 x i16> %0)
+ ret i32 %1
+}
+
+declare i32 @llvm.mips.preceq.w.phl(<2 x i16>) nounwind readnone
+
+define i32 @test__builtin_mips_preceq_w_phr1(i32 %i0, i32 %a0.coerce) nounwind readnone {
+entry:
+; CHECK: preceq.w.phr
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = tail call i32 @llvm.mips.preceq.w.phr(<2 x i16> %0)
+ ret i32 %1
+}
+
+declare i32 @llvm.mips.preceq.w.phr(<2 x i16>) nounwind readnone
+
+define { i32 } @test__builtin_mips_precequ_ph_qbl1(i32 %i0, i32 %a0.coerce) nounwind readnone {
+entry:
+; CHECK: precequ.ph.qbl
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = tail call <2 x i16> @llvm.mips.precequ.ph.qbl(<4 x i8> %0)
+ %2 = bitcast <2 x i16> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.precequ.ph.qbl(<4 x i8>) nounwind readnone
+
+define { i32 } @test__builtin_mips_precequ_ph_qbr1(i32 %i0, i32 %a0.coerce) nounwind readnone {
+entry:
+; CHECK: precequ.ph.qbr
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = tail call <2 x i16> @llvm.mips.precequ.ph.qbr(<4 x i8> %0)
+ %2 = bitcast <2 x i16> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.precequ.ph.qbr(<4 x i8>) nounwind readnone
+
+define { i32 } @test__builtin_mips_precequ_ph_qbla1(i32 %i0, i32 %a0.coerce) nounwind readnone {
+entry:
+; CHECK: precequ.ph.qbla
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = tail call <2 x i16> @llvm.mips.precequ.ph.qbla(<4 x i8> %0)
+ %2 = bitcast <2 x i16> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.precequ.ph.qbla(<4 x i8>) nounwind readnone
+
+define { i32 } @test__builtin_mips_precequ_ph_qbra1(i32 %i0, i32 %a0.coerce) nounwind readnone {
+entry:
+; CHECK: precequ.ph.qbra
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = tail call <2 x i16> @llvm.mips.precequ.ph.qbra(<4 x i8> %0)
+ %2 = bitcast <2 x i16> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.precequ.ph.qbra(<4 x i8>) nounwind readnone
+
+define { i32 } @test__builtin_mips_preceu_ph_qbl1(i32 %i0, i32 %a0.coerce) nounwind readnone {
+entry:
+; CHECK: preceu.ph.qbl
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = tail call <2 x i16> @llvm.mips.preceu.ph.qbl(<4 x i8> %0)
+ %2 = bitcast <2 x i16> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.preceu.ph.qbl(<4 x i8>) nounwind readnone
+
+define { i32 } @test__builtin_mips_preceu_ph_qbr1(i32 %i0, i32 %a0.coerce) nounwind readnone {
+entry:
+; CHECK: preceu.ph.qbr
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = tail call <2 x i16> @llvm.mips.preceu.ph.qbr(<4 x i8> %0)
+ %2 = bitcast <2 x i16> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.preceu.ph.qbr(<4 x i8>) nounwind readnone
+
+define { i32 } @test__builtin_mips_preceu_ph_qbla1(i32 %i0, i32 %a0.coerce) nounwind readnone {
+entry:
+; CHECK: preceu.ph.qbla
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = tail call <2 x i16> @llvm.mips.preceu.ph.qbla(<4 x i8> %0)
+ %2 = bitcast <2 x i16> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.preceu.ph.qbla(<4 x i8>) nounwind readnone
+
+define { i32 } @test__builtin_mips_preceu_ph_qbra1(i32 %i0, i32 %a0.coerce) nounwind readnone {
+entry:
+; CHECK: preceu.ph.qbra
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = tail call <2 x i16> @llvm.mips.preceu.ph.qbra(<4 x i8> %0)
+ %2 = bitcast <2 x i16> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.preceu.ph.qbra(<4 x i8>) nounwind readnone
+
+define { i32 } @test__builtin_mips_repl_qb1(i32 %i0) nounwind readnone {
+entry:
+; CHECK: repl.qb
+
+ %0 = tail call <4 x i8> @llvm.mips.repl.qb(i32 127)
+ %1 = bitcast <4 x i8> %0 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <4 x i8> @llvm.mips.repl.qb(i32) nounwind readnone
+
+define { i32 } @test__builtin_mips_repl_qb2(i32 %i0, i32 %a0) nounwind readnone {
+entry:
+; CHECK: replv.qb
+
+ %0 = tail call <4 x i8> @llvm.mips.repl.qb(i32 %a0)
+ %1 = bitcast <4 x i8> %0 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0
+ ret { i32 } %.fca.0.insert
+}
+
+define { i32 } @test__builtin_mips_repl_ph1(i32 %i0) nounwind readnone {
+entry:
+; CHECK: repl.ph
+
+ %0 = tail call <2 x i16> @llvm.mips.repl.ph(i32 0)
+ %1 = bitcast <2 x i16> %0 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.repl.ph(i32) nounwind readnone
+
+define { i32 } @test__builtin_mips_repl_ph2(i32 %i0, i32 %a0) nounwind readnone {
+entry:
+; CHECK: replv.ph
+
+ %0 = tail call <2 x i16> @llvm.mips.repl.ph(i32 %a0)
+ %1 = bitcast <2 x i16> %0 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0
+ ret { i32 } %.fca.0.insert
+}
+
+define i32 @test__builtin_mips_bitrev1(i32 %i0, i32 %a0) nounwind readnone {
+entry:
+; CHECK: bitrev ${{[0-9]+}}
+
+ %0 = tail call i32 @llvm.mips.bitrev(i32 %a0)
+ ret i32 %0
+}
+
+declare i32 @llvm.mips.bitrev(i32) nounwind readnone
+
+define i32 @test__builtin_mips_lbux1(i32 %i0, i8* %a0, i32 %a1) nounwind readonly {
+entry:
+; CHECK: lbux ${{[0-9]+}}
+
+ %0 = tail call i32 @llvm.mips.lbux(i8* %a0, i32 %a1)
+ ret i32 %0
+}
+
+declare i32 @llvm.mips.lbux(i8*, i32) nounwind readonly
+
+define i32 @test__builtin_mips_lhx1(i32 %i0, i8* %a0, i32 %a1) nounwind readonly {
+entry:
+; CHECK: lhx ${{[0-9]+}}
+
+ %0 = tail call i32 @llvm.mips.lhx(i8* %a0, i32 %a1)
+ ret i32 %0
+}
+
+declare i32 @llvm.mips.lhx(i8*, i32) nounwind readonly
+
+define i32 @test__builtin_mips_lwx1(i32 %i0, i8* %a0, i32 %a1) nounwind readonly {
+entry:
+; CHECK: lwx ${{[0-9]+}}
+
+ %0 = tail call i32 @llvm.mips.lwx(i8* %a0, i32 %a1)
+ ret i32 %0
+}
+
+declare i32 @llvm.mips.lwx(i8*, i32) nounwind readonly
+
+define i32 @test__builtin_mips_wrdsp1(i32 %i0, i32 %a0) nounwind {
+entry:
+; CHECK: wrdsp ${{[0-9]+}}
+
+ tail call void @llvm.mips.wrdsp(i32 %a0, i32 31)
+ %0 = tail call i32 @llvm.mips.rddsp(i32 31)
+ ret i32 %0
+}
+
+declare void @llvm.mips.wrdsp(i32, i32) nounwind
diff --git a/test/CodeGen/Mips/dsp-r2.ll b/test/CodeGen/Mips/dsp-r2.ll
new file mode 100644
index 0000000000..631f9e43c2
--- /dev/null
+++ b/test/CodeGen/Mips/dsp-r2.ll
@@ -0,0 +1,568 @@
+; RUN: llc -march=mipsel -mattr=+dspr2 < %s | FileCheck %s
+
+define i64 @test__builtin_mips_dpa_w_ph1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind readnone {
+entry:
+; CHECK: dpa.w.ph
+
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = bitcast i32 %a2.coerce to <2 x i16>
+ %3 = tail call i64 @llvm.mips.dpa.w.ph(i64 %a0, <2 x i16> %1, <2 x i16> %2)
+ ret i64 %3
+}
+
+declare i64 @llvm.mips.dpa.w.ph(i64, <2 x i16>, <2 x i16>) nounwind readnone
+
+define i64 @test__builtin_mips_dps_w_ph1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind readnone {
+entry:
+; CHECK: dps.w.ph
+
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = bitcast i32 %a2.coerce to <2 x i16>
+ %3 = tail call i64 @llvm.mips.dps.w.ph(i64 %a0, <2 x i16> %1, <2 x i16> %2)
+ ret i64 %3
+}
+
+declare i64 @llvm.mips.dps.w.ph(i64, <2 x i16>, <2 x i16>) nounwind readnone
+
+define i64 @test__builtin_mips_mulsa_w_ph1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind readnone {
+entry:
+; CHECK: mulsa.w.ph
+
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = bitcast i32 %a2.coerce to <2 x i16>
+ %3 = tail call i64 @llvm.mips.mulsa.w.ph(i64 %a0, <2 x i16> %1, <2 x i16> %2)
+ ret i64 %3
+}
+
+declare i64 @llvm.mips.mulsa.w.ph(i64, <2 x i16>, <2 x i16>) nounwind readnone
+
+define i64 @test__builtin_mips_dpax_w_ph1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind readnone {
+entry:
+; CHECK: dpax.w.ph
+
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = bitcast i32 %a2.coerce to <2 x i16>
+ %3 = tail call i64 @llvm.mips.dpax.w.ph(i64 %a0, <2 x i16> %1, <2 x i16> %2)
+ ret i64 %3
+}
+
+declare i64 @llvm.mips.dpax.w.ph(i64, <2 x i16>, <2 x i16>) nounwind readnone
+
+define i64 @test__builtin_mips_dpsx_w_ph1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind readnone {
+entry:
+; CHECK: dpsx.w.ph
+
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = bitcast i32 %a2.coerce to <2 x i16>
+ %3 = tail call i64 @llvm.mips.dpsx.w.ph(i64 %a0, <2 x i16> %1, <2 x i16> %2)
+ ret i64 %3
+}
+
+declare i64 @llvm.mips.dpsx.w.ph(i64, <2 x i16>, <2 x i16>) nounwind readnone
+
+define i64 @test__builtin_mips_dpaqx_s_w_ph1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind {
+entry:
+; CHECK: dpaqx_s.w.ph
+
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = bitcast i32 %a2.coerce to <2 x i16>
+ %3 = tail call i64 @llvm.mips.dpaqx.s.w.ph(i64 %a0, <2 x i16> %1, <2 x i16> %2)
+ ret i64 %3
+}
+
+declare i64 @llvm.mips.dpaqx.s.w.ph(i64, <2 x i16>, <2 x i16>) nounwind
+
+define i64 @test__builtin_mips_dpaqx_sa_w_ph1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind {
+entry:
+; CHECK: dpaqx_sa.w.ph
+
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = bitcast i32 %a2.coerce to <2 x i16>
+ %3 = tail call i64 @llvm.mips.dpaqx.sa.w.ph(i64 %a0, <2 x i16> %1, <2 x i16> %2)
+ ret i64 %3
+}
+
+declare i64 @llvm.mips.dpaqx.sa.w.ph(i64, <2 x i16>, <2 x i16>) nounwind
+
+define i64 @test__builtin_mips_dpsqx_s_w_ph1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind {
+entry:
+; CHECK: dpsqx_s.w.ph
+
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = bitcast i32 %a2.coerce to <2 x i16>
+ %3 = tail call i64 @llvm.mips.dpsqx.s.w.ph(i64 %a0, <2 x i16> %1, <2 x i16> %2)
+ ret i64 %3
+}
+
+declare i64 @llvm.mips.dpsqx.s.w.ph(i64, <2 x i16>, <2 x i16>) nounwind
+
+define i64 @test__builtin_mips_dpsqx_sa_w_ph1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind {
+entry:
+; CHECK: dpsqx_sa.w.ph
+
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = bitcast i32 %a2.coerce to <2 x i16>
+ %3 = tail call i64 @llvm.mips.dpsqx.sa.w.ph(i64 %a0, <2 x i16> %1, <2 x i16> %2)
+ ret i64 %3
+}
+
+declare i64 @llvm.mips.dpsqx.sa.w.ph(i64, <2 x i16>, <2 x i16>) nounwind
+
+define { i32 } @test__builtin_mips_addu_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: addu.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call <2 x i16> @llvm.mips.addu.ph(<2 x i16> %0, <2 x i16> %1)
+ %3 = bitcast <2 x i16> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.addu.ph(<2 x i16>, <2 x i16>) nounwind
+
+define { i32 } @test__builtin_mips_addu_s_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: addu_s.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call <2 x i16> @llvm.mips.addu.s.ph(<2 x i16> %0, <2 x i16> %1)
+ %3 = bitcast <2 x i16> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.addu.s.ph(<2 x i16>, <2 x i16>) nounwind
+
+define { i32 } @test__builtin_mips_mulq_s_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: mulq_s.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call <2 x i16> @llvm.mips.mulq.s.ph(<2 x i16> %0, <2 x i16> %1)
+ %3 = bitcast <2 x i16> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.mulq.s.ph(<2 x i16>, <2 x i16>) nounwind
+
+define { i32 } @test__builtin_mips_subu_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: subu.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call <2 x i16> @llvm.mips.subu.ph(<2 x i16> %0, <2 x i16> %1)
+ %3 = bitcast <2 x i16> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.subu.ph(<2 x i16>, <2 x i16>) nounwind
+
+define { i32 } @test__builtin_mips_subu_s_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: subu_s.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call <2 x i16> @llvm.mips.subu.s.ph(<2 x i16> %0, <2 x i16> %1)
+ %3 = bitcast <2 x i16> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.subu.s.ph(<2 x i16>, <2 x i16>) nounwind
+
+define i32 @test__builtin_mips_cmpgdu_eq_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: cmpgdu.eq.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %2 = tail call i32 @llvm.mips.cmpgdu.eq.qb(<4 x i8> %0, <4 x i8> %1)
+ ret i32 %2
+}
+
+declare i32 @llvm.mips.cmpgdu.eq.qb(<4 x i8>, <4 x i8>) nounwind
+
+define i32 @test__builtin_mips_cmpgdu_lt_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: cmpgdu.lt.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %2 = tail call i32 @llvm.mips.cmpgdu.lt.qb(<4 x i8> %0, <4 x i8> %1)
+ ret i32 %2
+}
+
+declare i32 @llvm.mips.cmpgdu.lt.qb(<4 x i8>, <4 x i8>) nounwind
+
+define i32 @test__builtin_mips_cmpgdu_le_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: cmpgdu.le.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %2 = tail call i32 @llvm.mips.cmpgdu.le.qb(<4 x i8> %0, <4 x i8> %1)
+ ret i32 %2
+}
+
+declare i32 @llvm.mips.cmpgdu.le.qb(<4 x i8>, <4 x i8>) nounwind
+
+define { i32 } @test__builtin_mips_precr_qb_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: precr.qb.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call <4 x i8> @llvm.mips.precr.qb.ph(<2 x i16> %0, <2 x i16> %1)
+ %3 = bitcast <4 x i8> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <4 x i8> @llvm.mips.precr.qb.ph(<2 x i16>, <2 x i16>) nounwind
+
+define { i32 } @test__builtin_mips_precr_sra_ph_w1(i32 %i0, i32 %a0, i32 %a1) nounwind readnone {
+entry:
+; CHECK: precr_sra.ph.w
+
+ %0 = tail call <2 x i16> @llvm.mips.precr.sra.ph.w(i32 %a0, i32 %a1, i32 15)
+ %1 = bitcast <2 x i16> %0 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.precr.sra.ph.w(i32, i32, i32) nounwind readnone
+
+define { i32 } @test__builtin_mips_precr_sra_r_ph_w1(i32 %i0, i32 %a0, i32 %a1) nounwind readnone {
+entry:
+; CHECK: precr_sra_r.ph.w
+
+ %0 = tail call <2 x i16> @llvm.mips.precr.sra.r.ph.w(i32 %a0, i32 %a1, i32 15)
+ %1 = bitcast <2 x i16> %0 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.precr.sra.r.ph.w(i32, i32, i32) nounwind readnone
+
+define { i32 } @test__builtin_mips_shra_qb1(i32 %i0, i32 %a0.coerce) nounwind readnone {
+entry:
+; CHECK: shra.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = tail call <4 x i8> @llvm.mips.shra.qb(<4 x i8> %0, i32 3)
+ %2 = bitcast <4 x i8> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <4 x i8> @llvm.mips.shra.qb(<4 x i8>, i32) nounwind readnone
+
+define { i32 } @test__builtin_mips_shra_r_qb1(i32 %i0, i32 %a0.coerce) nounwind readnone {
+entry:
+; CHECK: shra_r.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = tail call <4 x i8> @llvm.mips.shra.r.qb(<4 x i8> %0, i32 3)
+ %2 = bitcast <4 x i8> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <4 x i8> @llvm.mips.shra.r.qb(<4 x i8>, i32) nounwind readnone
+
+define { i32 } @test__builtin_mips_shra_qb2(i32 %i0, i32 %a0.coerce, i32 %a1) nounwind readnone {
+entry:
+; CHECK: shrav.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = tail call <4 x i8> @llvm.mips.shra.qb(<4 x i8> %0, i32 %a1)
+ %2 = bitcast <4 x i8> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+define { i32 } @test__builtin_mips_shra_r_qb2(i32 %i0, i32 %a0.coerce, i32 %a1) nounwind readnone {
+entry:
+; CHECK: shrav_r.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = tail call <4 x i8> @llvm.mips.shra.r.qb(<4 x i8> %0, i32 %a1)
+ %2 = bitcast <4 x i8> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+define { i32 } @test__builtin_mips_shrl_ph1(i32 %i0, i32 %a0.coerce) nounwind readnone {
+entry:
+; CHECK: shrl.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = tail call <2 x i16> @llvm.mips.shrl.ph(<2 x i16> %0, i32 7)
+ %2 = bitcast <2 x i16> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.shrl.ph(<2 x i16>, i32) nounwind readnone
+
+define { i32 } @test__builtin_mips_shrl_ph2(i32 %i0, i32 %a0.coerce, i32 %a1) nounwind readnone {
+entry:
+; CHECK: shrlv.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = tail call <2 x i16> @llvm.mips.shrl.ph(<2 x i16> %0, i32 %a1)
+ %2 = bitcast <2 x i16> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+define { i32 } @test__builtin_mips_absq_s_qb1(i32 %i0, i32 %a0.coerce) nounwind {
+entry:
+; CHECK: absq_s.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = tail call <4 x i8> @llvm.mips.absq.s.qb(<4 x i8> %0)
+ %2 = bitcast <4 x i8> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <4 x i8> @llvm.mips.absq.s.qb(<4 x i8>) nounwind
+
+define { i32 } @test__builtin_mips_mul_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: mul.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call <2 x i16> @llvm.mips.mul.ph(<2 x i16> %0, <2 x i16> %1)
+ %3 = bitcast <2 x i16> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.mul.ph(<2 x i16>, <2 x i16>) nounwind
+
+define { i32 } @test__builtin_mips_mul_s_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: mul_s.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call <2 x i16> @llvm.mips.mul.s.ph(<2 x i16> %0, <2 x i16> %1)
+ %3 = bitcast <2 x i16> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.mul.s.ph(<2 x i16>, <2 x i16>) nounwind
+
+define i32 @test__builtin_mips_mulq_rs_w1(i32 %i0, i32 %a0, i32 %a1) nounwind {
+entry:
+; CHECK: mulq_rs.w
+
+ %0 = tail call i32 @llvm.mips.mulq.rs.w(i32 %a0, i32 %a1)
+ ret i32 %0
+}
+
+declare i32 @llvm.mips.mulq.rs.w(i32, i32) nounwind
+
+define i32 @test__builtin_mips_mulq_s_w1(i32 %i0, i32 %a0, i32 %a1) nounwind {
+entry:
+; CHECK: mulq_s.w
+
+ %0 = tail call i32 @llvm.mips.mulq.s.w(i32 %a0, i32 %a1)
+ ret i32 %0
+}
+
+declare i32 @llvm.mips.mulq.s.w(i32, i32) nounwind
+
+define { i32 } @test__builtin_mips_adduh_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind readnone {
+entry:
+; CHECK: adduh.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %2 = tail call <4 x i8> @llvm.mips.adduh.qb(<4 x i8> %0, <4 x i8> %1)
+ %3 = bitcast <4 x i8> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <4 x i8> @llvm.mips.adduh.qb(<4 x i8>, <4 x i8>) nounwind readnone
+
+define { i32 } @test__builtin_mips_adduh_r_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind readnone {
+entry:
+; CHECK: adduh_r.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %2 = tail call <4 x i8> @llvm.mips.adduh.r.qb(<4 x i8> %0, <4 x i8> %1)
+ %3 = bitcast <4 x i8> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <4 x i8> @llvm.mips.adduh.r.qb(<4 x i8>, <4 x i8>) nounwind readnone
+
+define { i32 } @test__builtin_mips_subuh_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind readnone {
+entry:
+; CHECK: subuh.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %2 = tail call <4 x i8> @llvm.mips.subuh.qb(<4 x i8> %0, <4 x i8> %1)
+ %3 = bitcast <4 x i8> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <4 x i8> @llvm.mips.subuh.qb(<4 x i8>, <4 x i8>) nounwind readnone
+
+define { i32 } @test__builtin_mips_subuh_r_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind readnone {
+entry:
+; CHECK: subuh_r.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %2 = tail call <4 x i8> @llvm.mips.subuh.r.qb(<4 x i8> %0, <4 x i8> %1)
+ %3 = bitcast <4 x i8> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <4 x i8> @llvm.mips.subuh.r.qb(<4 x i8>, <4 x i8>) nounwind readnone
+
+define { i32 } @test__builtin_mips_addqh_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind readnone {
+entry:
+; CHECK: addqh.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call <2 x i16> @llvm.mips.addqh.ph(<2 x i16> %0, <2 x i16> %1)
+ %3 = bitcast <2 x i16> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.addqh.ph(<2 x i16>, <2 x i16>) nounwind readnone
+
+define { i32 } @test__builtin_mips_addqh_r_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind readnone {
+entry:
+; CHECK: addqh_r.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call <2 x i16> @llvm.mips.addqh.r.ph(<2 x i16> %0, <2 x i16> %1)
+ %3 = bitcast <2 x i16> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.addqh.r.ph(<2 x i16>, <2 x i16>) nounwind readnone
+
+define i32 @test__builtin_mips_addqh_w1(i32 %i0, i32 %a0, i32 %a1) nounwind readnone {
+entry:
+; CHECK: addqh.w
+
+ %0 = tail call i32 @llvm.mips.addqh.w(i32 %a0, i32 %a1)
+ ret i32 %0
+}
+
+declare i32 @llvm.mips.addqh.w(i32, i32) nounwind readnone
+
+define i32 @test__builtin_mips_addqh_r_w1(i32 %i0, i32 %a0, i32 %a1) nounwind readnone {
+entry:
+; CHECK: addqh_r.w
+
+ %0 = tail call i32 @llvm.mips.addqh.r.w(i32 %a0, i32 %a1)
+ ret i32 %0
+}
+
+declare i32 @llvm.mips.addqh.r.w(i32, i32) nounwind readnone
+
+define { i32 } @test__builtin_mips_subqh_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind readnone {
+entry:
+; CHECK: subqh.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call <2 x i16> @llvm.mips.subqh.ph(<2 x i16> %0, <2 x i16> %1)
+ %3 = bitcast <2 x i16> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.subqh.ph(<2 x i16>, <2 x i16>) nounwind readnone
+
+define { i32 } @test__builtin_mips_subqh_r_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind readnone {
+entry:
+; CHECK: subqh_r.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call <2 x i16> @llvm.mips.subqh.r.ph(<2 x i16> %0, <2 x i16> %1)
+ %3 = bitcast <2 x i16> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.subqh.r.ph(<2 x i16>, <2 x i16>) nounwind readnone
+
+define i32 @test__builtin_mips_subqh_w1(i32 %i0, i32 %a0, i32 %a1) nounwind readnone {
+entry:
+; CHECK: subqh.w
+
+ %0 = tail call i32 @llvm.mips.subqh.w(i32 %a0, i32 %a1)
+ ret i32 %0
+}
+
+declare i32 @llvm.mips.subqh.w(i32, i32) nounwind readnone
+
+define i32 @test__builtin_mips_subqh_r_w1(i32 %i0, i32 %a0, i32 %a1) nounwind readnone {
+entry:
+; CHECK: subqh_r.w
+
+ %0 = tail call i32 @llvm.mips.subqh.r.w(i32 %a0, i32 %a1)
+ ret i32 %0
+}
+
+declare i32 @llvm.mips.subqh.r.w(i32, i32) nounwind readnone
+
+define i32 @test__builtin_mips_append1(i32 %i0, i32 %a0, i32 %a1) nounwind readnone {
+entry:
+; CHECK: append ${{[0-9]+}}
+
+ %0 = tail call i32 @llvm.mips.append(i32 %a0, i32 %a1, i32 15)
+ ret i32 %0
+}
+
+declare i32 @llvm.mips.append(i32, i32, i32) nounwind readnone
+
+define i32 @test__builtin_mips_balign1(i32 %i0, i32 %a0, i32 %a1) nounwind readnone {
+entry:
+; CHECK: balign ${{[0-9]+}}
+
+ %0 = tail call i32 @llvm.mips.balign(i32 %a0, i32 %a1, i32 1)
+ ret i32 %0
+}
+
+declare i32 @llvm.mips.balign(i32, i32, i32) nounwind readnone
+
+define i32 @test__builtin_mips_prepend1(i32 %i0, i32 %a0, i32 %a1) nounwind readnone {
+entry:
+; CHECK: prepend ${{[0-9]+}}
+
+ %0 = tail call i32 @llvm.mips.prepend(i32 %a0, i32 %a1, i32 15)
+ ret i32 %0
+}
+
+declare i32 @llvm.mips.prepend(i32, i32, i32) nounwind readnone
diff --git a/test/CodeGen/Mips/vector-load-store.ll b/test/CodeGen/Mips/vector-load-store.ll
new file mode 100644
index 0000000000..d889963099
--- /dev/null
+++ b/test/CodeGen/Mips/vector-load-store.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=mipsel -mattr=+dsp < %s | FileCheck %s
+
+@g1 = common global <2 x i16> zeroinitializer, align 4
+@g0 = common global <2 x i16> zeroinitializer, align 4
+@g3 = common global <4 x i8> zeroinitializer, align 4
+@g2 = common global <4 x i8> zeroinitializer, align 4
+
+define void @func_v2i16() nounwind {
+entry:
+; CHECK: lw
+; CHECK: sw
+
+ %0 = load <2 x i16>* @g1, align 4
+ store <2 x i16> %0, <2 x i16>* @g0, align 4
+ ret void
+}
+
+define void @func_v4i8() nounwind {
+entry:
+; CHECK: lw
+; CHECK: sw
+
+ %0 = load <4 x i8>* @g3, align 4
+ store <4 x i8> %0, <4 x i8>* @g2, align 4
+ ret void
+}
+
diff --git a/test/CodeGen/PowerPC/pr13891.ll b/test/CodeGen/PowerPC/pr13891.ll
new file mode 100644
index 0000000000..3ae73850a3
--- /dev/null
+++ b/test/CodeGen/PowerPC/pr13891.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.foo = type { i8, i8 }
+
+define void @_Z5check3foos(%struct.foo* nocapture byval %f, i16 signext %i) noinline {
+; CHECK: _Z5check3foos:
+; CHECK: sth 3, {{[0-9]+}}(1)
+; CHECK: lha {{[0-9]+}}, {{[0-9]+}}(1)
+entry:
+ %0 = bitcast %struct.foo* %f to i16*
+ %1 = load i16* %0, align 2
+ %bf.val.sext = ashr i16 %1, 8
+ %cmp = icmp eq i16 %bf.val.sext, %i
+ br i1 %cmp, label %if.end, label %if.then
+
+if.then: ; preds = %entry
+ %conv = sext i16 %bf.val.sext to i32
+ tail call void @exit(i32 %conv)
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
+declare void @exit(i32)
diff --git a/test/CodeGen/Thumb2/cortex-fp.ll b/test/CodeGen/Thumb2/cortex-fp.ll
index d06f8a7bee..b7df2fbf54 100644
--- a/test/CodeGen/Thumb2/cortex-fp.ll
+++ b/test/CodeGen/Thumb2/cortex-fp.ll
@@ -7,8 +7,8 @@ define float @foo(float %a, float %b) {
entry:
; CHECK: foo
; CORTEXM3: blx ___mulsf3
-; CORTEXM4: vmul.f32 s0, s1, s0
-; CORTEXA8: vmul.f32 d0, d1, d0
+; CORTEXM4: vmul.f32 s0, s2, s0
+; CORTEXA8: vmul.f32 d
%0 = fmul float %a, %b
ret float %0
}
@@ -19,6 +19,6 @@ entry:
%0 = fmul double %a, %b
; CORTEXM3: blx ___muldf3
; CORTEXM4: blx ___muldf3
-; CORTEXA8: vmul.f64 d16, d17, d16
+; CORTEXA8: vmul.f64 d
ret double %0
}
diff --git a/test/CodeGen/Thumb2/div.ll b/test/CodeGen/Thumb2/div.ll
index 2c00c70c0d..f89746a303 100644
--- a/test/CodeGen/Thumb2/div.ll
+++ b/test/CodeGen/Thumb2/div.ll
@@ -2,6 +2,8 @@
; RUN: | FileCheck %s -check-prefix=CHECK-THUMB
; RUN: llc < %s -march=thumb -mcpu=cortex-m3 -mattr=+thumb2 \
; RUN: | FileCheck %s -check-prefix=CHECK-THUMBV7M
+; RUN: llc < %s -march=thumb -mcpu=swift \
+; RUN: | FileCheck %s -check-prefix=CHECK-SWIFT-T2
define i32 @f1(i32 %a, i32 %b) {
entry:
@@ -9,6 +11,8 @@ entry:
; CHECK-THUMB: __divsi3
; CHECK-THUMBV7M: f1
; CHECK-THUMBV7M: sdiv
+; CHECK-SWIFT-T2: f1
+; CHECK-SWIFT-T2: sdiv
%tmp1 = sdiv i32 %a, %b ; <i32> [#uses=1]
ret i32 %tmp1
}
@@ -19,6 +23,8 @@ entry:
; CHECK-THUMB: __udivsi3
; CHECK-THUMBV7M: f2
; CHECK-THUMBV7M: udiv
+; CHECK-SWIFT-T2: f2
+; CHECK-SWIFT-T2: udiv
%tmp1 = udiv i32 %a, %b ; <i32> [#uses=1]
ret i32 %tmp1
}
@@ -29,6 +35,8 @@ entry:
; CHECK-THUMB: __modsi3
; CHECK-THUMBV7M: f3
; CHECK-THUMBV7M: sdiv
+; CHECK-SWIFT-T2: f3
+; CHECK-SWIFT-T2: sdiv
%tmp1 = srem i32 %a, %b ; <i32> [#uses=1]
ret i32 %tmp1
}
@@ -39,6 +47,8 @@ entry:
; CHECK-THUMB: __umodsi3
; CHECK-THUMBV7M: f4
; CHECK-THUMBV7M: udiv
+; CHECK-SWIFT-T2: f4
+; CHECK-SWIFT-T2: udiv
%tmp1 = urem i32 %a, %b ; <i32> [#uses=1]
ret i32 %tmp1
}
diff --git a/test/CodeGen/Thumb2/thumb2-mla.ll b/test/CodeGen/Thumb2/thumb2-mla.ll
index c4cc749ea5..594d9742b0 100644
--- a/test/CodeGen/Thumb2/thumb2-mla.ll
+++ b/test/CodeGen/Thumb2/thumb2-mla.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2 -arm-use-mulops=false | FileCheck %s -check-prefix=NO_MULOPS
define i32 @f1(i32 %a, i32 %b, i32 %c) {
%tmp1 = mul i32 %a, %b
@@ -7,6 +8,9 @@ define i32 @f1(i32 %a, i32 %b, i32 %c) {
}
; CHECK: f1:
; CHECK: mla r0, r0, r1, r2
+; NO_MULOPS: f1:
+; NO_MULOPS: muls r0, r1, r0
+; NO_MULOPS-NEXT: add r0, r2
define i32 @f2(i32 %a, i32 %b, i32 %c) {
%tmp1 = mul i32 %a, %b
@@ -15,3 +19,6 @@ define i32 @f2(i32 %a, i32 %b, i32 %c) {
}
; CHECK: f2:
; CHECK: mla r0, r0, r1, r2
+; NO_MULOPS: f2:
+; NO_MULOPS: muls r0, r1, r0
+; NO_MULOPS-NEXT: add r0, r2
diff --git a/test/CodeGen/Thumb2/thumb2-smla.ll b/test/CodeGen/Thumb2/thumb2-smla.ll
index c128eccd66..aaaedfa42e 100644
--- a/test/CodeGen/Thumb2/thumb2-smla.ll
+++ b/test/CodeGen/Thumb2/thumb2-smla.ll
@@ -1,8 +1,12 @@
; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk,+t2dsp | FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk,+t2dsp -arm-use-mulops=false | FileCheck %s -check-prefix=NO_MULOPS
define i32 @f3(i32 %a, i16 %x, i32 %y) {
; CHECK: f3
; CHECK: smlabt r0, r1, r2, r0
+; NO_MULOPS: f3
+; NO_MULOPS: smultb r1, r2, r1
+; NO_MULOPS-NEXT: add r0, r1
%tmp = sext i16 %x to i32 ; <i32> [#uses=1]
%tmp2 = ashr i32 %y, 16 ; <i32> [#uses=1]
%tmp3 = mul i32 %tmp2, %tmp ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2012-09-28-CGPBug.ll b/test/CodeGen/X86/2012-09-28-CGPBug.ll
new file mode 100644
index 0000000000..32d7d012dd
--- /dev/null
+++ b/test/CodeGen/X86/2012-09-28-CGPBug.ll
@@ -0,0 +1,53 @@
+; RUN: llc -mtriple=i386-apple-macosx < %s | FileCheck %s
+; rdar://12396696
+
+@JT = global [4 x i32] [i32 sub (i32 ptrtoint (i8* blockaddress(@h, %18) to i32), i32 ptrtoint (i8* blockaddress(@h, %11) to i32)), i32 sub (i32 ptrtoint (i8* blockaddress(@h, %17) to i32), i32 ptrtoint (i8* blockaddress(@h, %11) to i32)), i32 sub (i32 ptrtoint (i8* blockaddress(@h, %22) to i32), i32 ptrtoint (i8* blockaddress(@h, %18) to i32)), i32 sub (i32 ptrtoint (i8* blockaddress(@h, %22) to i32), i32 ptrtoint (i8* blockaddress(@h, %17) to i32))]
+@gGlobalLock = external global i8*
+@.str40 = external global [35 x i8]
+
+; CHECK: _JT:
+; CHECK-NOT: .long Ltmp{{[0-9]+}}-1
+; CHECK-NOT: .long 1-Ltmp{{[0-9]+}}
+; CHECK: .long Ltmp{{[0-9]+}}-Ltmp{{[0-9]+}}
+; CHECK: .long Ltmp{{[0-9]+}}-Ltmp{{[0-9]+}}
+; CHECK: .long Ltmp{{[0-9]+}}-Ltmp{{[0-9]+}}
+; CHECK: .long Ltmp{{[0-9]+}}-Ltmp{{[0-9]+}}
+
+define void @h(i8*) nounwind ssp {
+ %2 = alloca i8*
+ store i8* %0, i8** %2
+ %3 = load i8** %2
+ %4 = bitcast i8* %3 to { i32, i32 }*
+ %5 = getelementptr { i32, i32 }* %4, i32 0, i32 0
+ %6 = load i32* %5
+ %7 = srem i32 %6, 2
+ %8 = icmp slt i32 %6, 2
+ %9 = select i1 %8, i32 %6, i32 %7
+ %10 = icmp eq i32 %9, 0
+ br label %11
+
+; <label>:11 ; preds = %1
+ %12 = zext i1 %10 to i32
+ %13 = getelementptr [4 x i32]* @JT, i32 0, i32 %12
+ %14 = load i32* %13
+ %15 = add i32 %14, ptrtoint (i8* blockaddress(@h, %11) to i32)
+ %16 = inttoptr i32 %15 to i8*
+ indirectbr i8* %16, [label %17, label %18]
+
+; <label>:17 ; preds = %11
+ tail call void (i8*, ...)* @g(i8* getelementptr inbounds ([35 x i8]* @.str40, i32 0, i32 0))
+ br label %22
+
+; <label>:18 ; preds = %11
+ %19 = call i32 @f(i32 -1037694186) nounwind
+ %20 = inttoptr i32 %19 to i32 (i8**)*
+ %21 = tail call i32 %20(i8** @gGlobalLock)
+ br label %22
+
+; <label>:22 ; preds = %18, %17
+ ret void
+}
+
+declare i32 @f(i32)
+
+declare void @g(i8*, ...)
diff --git a/test/CodeGen/X86/atomic-minmax-i6432.ll b/test/CodeGen/X86/atomic-minmax-i6432.ll
new file mode 100644
index 0000000000..01a926489b
--- /dev/null
+++ b/test/CodeGen/X86/atomic-minmax-i6432.ll
@@ -0,0 +1,51 @@
+; RUN: llc -march=x86 -mattr=+cmov -mtriple=i386-pc-linux < %s | FileCheck %s
+@sc64 = external global i64
+
+define void @atomic_maxmin_i6432() {
+; CHECK: atomic_maxmin_i6432
+ %1 = atomicrmw max i64* @sc64, i64 5 acquire
+; CHECK: [[LABEL:.LBB[0-9]+_[0-9]+]]
+; CHECK: cmpl
+; CHECK: setl
+; CHECK: cmpl
+; CHECK: setl
+; CHECK: cmovne
+; CHECK: cmovne
+; CHECK: lock
+; CHECK-NEXT: cmpxchg8b
+; CHECK: jne [[LABEL]]
+ %2 = atomicrmw min i64* @sc64, i64 6 acquire
+; CHECK: [[LABEL:.LBB[0-9]+_[0-9]+]]
+; CHECK: cmpl
+; CHECK: setg
+; CHECK: cmpl
+; CHECK: setg
+; CHECK: cmovne
+; CHECK: cmovne
+; CHECK: lock
+; CHECK-NEXT: cmpxchg8b
+; CHECK: jne [[LABEL]]
+ %3 = atomicrmw umax i64* @sc64, i64 7 acquire
+; CHECK: [[LABEL:.LBB[0-9]+_[0-9]+]]
+; CHECK: cmpl
+; CHECK: setb
+; CHECK: cmpl
+; CHECK: setb
+; CHECK: cmovne
+; CHECK: cmovne
+; CHECK: lock
+; CHECK-NEXT: cmpxchg8b
+; CHECK: jne [[LABEL]]
+ %4 = atomicrmw umin i64* @sc64, i64 8 acquire
+; CHECK: [[LABEL:.LBB[0-9]+_[0-9]+]]
+; CHECK: cmpl
+; CHECK: seta
+; CHECK: cmpl
+; CHECK: seta
+; CHECK: cmovne
+; CHECK: cmovne
+; CHECK: lock
+; CHECK-NEXT: cmpxchg8b
+; CHECK: jne [[LABEL]]
+ ret void
+}
diff --git a/test/CodeGen/X86/atomic6432.ll b/test/CodeGen/X86/atomic6432.ll
index 556c36ebfd..f9b21c5bc7 100644
--- a/test/CodeGen/X86/atomic6432.ll
+++ b/test/CodeGen/X86/atomic6432.ll
@@ -1,5 +1,4 @@
; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 | FileCheck %s --check-prefix X32
-; XFAIL: *
@sc64 = external global i64
diff --git a/test/CodeGen/X86/crash.ll b/test/CodeGen/X86/crash.ll
index 9badfc82e9..ae5804195c 100644
--- a/test/CodeGen/X86/crash.ll
+++ b/test/CodeGen/X86/crash.ll
@@ -442,3 +442,38 @@ entry:
ret void
}
declare void @_Z6PrintFz(...)
+
+@a = external global i32, align 4
+@fn1.g = private unnamed_addr constant [9 x i32*] [i32* null, i32* @a, i32* null, i32* null, i32* null, i32* null, i32* null, i32* null, i32* null], align 16
+@e = external global i32, align 4
+
+define void @pr13943() nounwind uwtable ssp {
+entry:
+ %srcval = load i576* bitcast ([9 x i32*]* @fn1.g to i576*), align 16
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %g.0 = phi i576 [ %srcval, %entry ], [ %ins, %for.inc ]
+ %0 = load i32* @e, align 4
+ %1 = lshr i576 %g.0, 64
+ %2 = trunc i576 %1 to i64
+ %3 = inttoptr i64 %2 to i32*
+ %cmp = icmp eq i32* undef, %3
+ %conv2 = zext i1 %cmp to i32
+ %and = and i32 %conv2, %0
+ tail call void (...)* @fn3(i32 %and) nounwind
+ %tobool = icmp eq i32 undef, 0
+ br i1 %tobool, label %for.inc, label %if.then
+
+if.then: ; preds = %for.cond
+ ret void
+
+for.inc: ; preds = %for.cond
+ %4 = shl i576 %1, 384
+ %mask = and i576 %g.0, -726838724295606890509921801691610055141362320587174446476410459910173841445449629921945328942266354949348255351381262292727973638307841
+ %5 = and i576 %4, 726838724295606890509921801691610055141362320587174446476410459910173841445449629921945328942266354949348255351381262292727973638307840
+ %ins = or i576 %5, %mask
+ br label %for.cond
+}
+
+declare void @fn3(...)
diff --git a/test/CodeGen/X86/jump_sign.ll b/test/CodeGen/X86/jump_sign.ll
index 0cf397d4af..78d9e06f59 100644
--- a/test/CodeGen/X86/jump_sign.ll
+++ b/test/CodeGen/X86/jump_sign.ll
@@ -278,3 +278,31 @@ entry:
%cond = select i1 %cmp, i32 %add, i32 0
ret i32 %cond
}
+
+; PR13966
+@b = common global i32 0, align 4
+@a = common global i32 0, align 4
+define i32 @test1(i32 %p1) nounwind uwtable {
+entry:
+; CHECK: test1:
+; CHECK: testb
+; CHECK: j
+; CHECK: ret
+ %0 = load i32* @b, align 4
+ %cmp = icmp ult i32 %0, %p1
+ %conv = zext i1 %cmp to i32
+ %1 = load i32* @a, align 4
+ %and = and i32 %conv, %1
+ %conv1 = trunc i32 %and to i8
+ %2 = urem i8 %conv1, 3
+ %tobool = icmp eq i8 %2, 0
+ br i1 %tobool, label %if.end, label %if.then
+
+if.then:
+ %dec = add nsw i32 %1, -1
+ store i32 %dec, i32* @a, align 4
+ br label %if.end
+
+if.end:
+ ret i32 undef
+}
diff --git a/test/CodeGen/X86/mulx32.ll b/test/CodeGen/X86/mulx32.ll
new file mode 100644
index 0000000000..b75ac009e7
--- /dev/null
+++ b/test/CodeGen/X86/mulx32.ll
@@ -0,0 +1,22 @@
+; RUN: llc -mcpu=core-avx2 -march=x86 < %s | FileCheck %s
+
+define i64 @f1(i32 %a, i32 %b) {
+ %x = zext i32 %a to i64
+ %y = zext i32 %b to i64
+ %r = mul i64 %x, %y
+; CHECK: f1
+; CHECK: mulxl
+; CHECK: ret
+ ret i64 %r
+}
+
+define i64 @f2(i32 %a, i32* %p) {
+ %b = load i32* %p
+ %x = zext i32 %a to i64
+ %y = zext i32 %b to i64
+ %r = mul i64 %x, %y
+; CHECK: f2
+; CHECK: mulxl ({{.+}}), %{{.+}}, %{{.+}}
+; CHECK: ret
+ ret i64 %r
+}
diff --git a/test/CodeGen/X86/mulx64.ll b/test/CodeGen/X86/mulx64.ll
new file mode 100644
index 0000000000..d5730282a1
--- /dev/null
+++ b/test/CodeGen/X86/mulx64.ll
@@ -0,0 +1,22 @@
+; RUN: llc -mcpu=core-avx2 -march=x86-64 < %s | FileCheck %s
+
+define i128 @f1(i64 %a, i64 %b) {
+ %x = zext i64 %a to i128
+ %y = zext i64 %b to i128
+ %r = mul i128 %x, %y
+; CHECK: f1
+; CHECK: mulxq
+; CHECK: ret
+ ret i128 %r
+}
+
+define i128 @f2(i64 %a, i64* %p) {
+ %b = load i64* %p
+ %x = zext i64 %a to i128
+ %y = zext i64 %b to i128
+ %r = mul i128 %x, %y
+; CHECK: f2
+; CHECK: mulxq ({{.+}}), %{{.+}}, %{{.+}}
+; CHECK: ret
+ ret i128 %r
+}
diff --git a/test/CodeGen/X86/phys_subreg_coalesce-3.ll b/test/CodeGen/X86/phys_subreg_coalesce-3.ll
index 51320dd6d0..2a20e7ad6f 100644
--- a/test/CodeGen/X86/phys_subreg_coalesce-3.ll
+++ b/test/CodeGen/X86/phys_subreg_coalesce-3.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=corei7 | FileCheck %s
; rdar://5571034
; This requires physreg joining, %vreg13 is live everywhere:
diff --git a/test/CodeGen/X86/pic_jumptable.ll b/test/CodeGen/X86/pic_jumptable.ll
index 8c16dc68b2..bdd8859358 100644
--- a/test/CodeGen/X86/pic_jumptable.ll
+++ b/test/CodeGen/X86/pic_jumptable.ll
@@ -1,5 +1,7 @@
; RUN: llc < %s -relocation-model=pic -mtriple=i386-linux-gnu -asm-verbose=false \
; RUN: | FileCheck %s --check-prefix=CHECK-LINUX
+; RUN: llc < %s -relocation-model=pic -mark-data-regions -mtriple=i686-apple-darwin -asm-verbose=false \
+; RUN: | FileCheck %s --check-prefix=CHECK-DATA
; RUN: llc < %s -relocation-model=pic -mtriple=i686-apple-darwin -asm-verbose=false \
; RUN: | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin | not grep 'lJTI'
@@ -16,6 +18,16 @@ entry:
; CHECK: Ltmp0 = LJTI0_0-L0$pb
; CHECK-NEXT: addl Ltmp0(%eax,%ecx,4)
; CHECK-NEXT: jmpl *%eax
+
+;; When data-in-code markers are enabled, we should see them around the jump
+;; table.
+; CHECK-DATA: .data_region jt32
+; CHECK-DATA: LJTI0_0
+; CHECK-DATA: .end_data_region
+
+;; When they're not enabled, make sure we don't see them at all.
+; CHECK-NOT: .data_region
+; CHECK-LINUX-NOT: .data_region
%Y_addr = alloca i32 ; <i32*> [#uses=2]
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
store i32 %Y, i32* %Y_addr
diff --git a/test/CodeGen/X86/ptr-rotate.ll b/test/CodeGen/X86/ptr-rotate.ll
index 6debd16ba5..fbd13b5036 100644
--- a/test/CodeGen/X86/ptr-rotate.ll
+++ b/test/CodeGen/X86/ptr-rotate.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=i386-apple-darwin -o - < %s | FileCheck %s
+; RUN: llc -mtriple=i386-apple-darwin -mcpu=corei7 -o - < %s | FileCheck %s
define i32 @func(i8* %A) nounwind readnone {
entry:
diff --git a/test/CodeGen/X86/rot32.ll b/test/CodeGen/X86/rot32.ll
index 99602fd64f..e95a734e04 100644
--- a/test/CodeGen/X86/rot32.ll
+++ b/test/CodeGen/X86/rot32.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=x86 | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=corei7 | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=core-avx2 | FileCheck %s --check-prefix=BMI2
define i32 @foo(i32 %x, i32 %y, i32 %z) nounwind readnone {
entry:
@@ -48,12 +49,25 @@ define i32 @xfoo(i32 %x, i32 %y, i32 %z) nounwind readnone {
entry:
; CHECK: xfoo:
; CHECK: roll $7
+; BMI2: xfoo:
+; BMI2: rorxl $25
%0 = lshr i32 %x, 25
%1 = shl i32 %x, 7
%2 = or i32 %0, %1
ret i32 %2
}
+define i32 @xfoop(i32* %p) nounwind readnone {
+entry:
+; BMI2: xfoop:
+; BMI2: rorxl $25, ({{.+}}), %{{.+}}
+ %x = load i32* %p
+ %a = lshr i32 %x, 25
+ %b = shl i32 %x, 7
+ %c = or i32 %a, %b
+ ret i32 %c
+}
+
define i32 @xbar(i32 %x, i32 %y, i32 %z) nounwind readnone {
entry:
; CHECK: xbar:
@@ -68,12 +82,25 @@ define i32 @xun(i32 %x, i32 %y, i32 %z) nounwind readnone {
entry:
; CHECK: xun:
; CHECK: roll $25
+; BMI2: xun:
+; BMI2: rorxl $7
%0 = lshr i32 %x, 7
%1 = shl i32 %x, 25
%2 = or i32 %0, %1
ret i32 %2
}
+define i32 @xunp(i32* %p) nounwind readnone {
+entry:
+; BMI2: xunp:
+; BMI2: rorxl $7, ({{.+}}), %{{.+}}
+ %x = load i32* %p
+ %a = lshr i32 %x, 7
+ %b = shl i32 %x, 25
+ %c = or i32 %a, %b
+ ret i32 %c
+}
+
define i32 @xbu(i32 %x, i32 %y, i32 %z) nounwind readnone {
entry:
; CHECK: xbu:
diff --git a/test/CodeGen/X86/rot64.ll b/test/CodeGen/X86/rot64.ll
index 4e082bb860..7fa982d83b 100644
--- a/test/CodeGen/X86/rot64.ll
+++ b/test/CodeGen/X86/rot64.ll
@@ -1,8 +1,9 @@
-; RUN: llc < %s -march=x86-64 > %t
-; RUN: grep rol %t | count 3
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 > %t
+; RUN: grep rol %t | count 5
; RUN: grep ror %t | count 1
; RUN: grep shld %t | count 2
; RUN: grep shrd %t | count 2
+; RUN: llc < %s -march=x86-64 -mcpu=core-avx2 | FileCheck %s --check-prefix=BMI2
define i64 @foo(i64 %x, i64 %y, i64 %z) nounwind readnone {
entry:
@@ -42,12 +43,25 @@ entry:
define i64 @xfoo(i64 %x, i64 %y, i64 %z) nounwind readnone {
entry:
+; BMI2: xfoo:
+; BMI2: rorxq $57
%0 = lshr i64 %x, 57
%1 = shl i64 %x, 7
%2 = or i64 %0, %1
ret i64 %2
}
+define i64 @xfoop(i64* %p) nounwind readnone {
+entry:
+; BMI2: xfoop:
+; BMI2: rorxq $57, ({{.+}}), %{{.+}}
+ %x = load i64* %p
+ %a = lshr i64 %x, 57
+ %b = shl i64 %x, 7
+ %c = or i64 %a, %b
+ ret i64 %c
+}
+
define i64 @xbar(i64 %x, i64 %y, i64 %z) nounwind readnone {
entry:
%0 = shl i64 %y, 7
@@ -58,12 +72,25 @@ entry:
define i64 @xun(i64 %x, i64 %y, i64 %z) nounwind readnone {
entry:
+; BMI2: xun:
+; BMI2: rorxq $7
%0 = lshr i64 %x, 7
%1 = shl i64 %x, 57
%2 = or i64 %0, %1
ret i64 %2
}
+define i64 @xunp(i64* %p) nounwind readnone {
+entry:
+; BMI2: xunp:
+; BMI2: rorxq $7, ({{.+}}), %{{.+}}
+ %x = load i64* %p
+ %a = lshr i64 %x, 7
+ %b = shl i64 %x, 57
+ %c = or i64 %a, %b
+ ret i64 %c
+}
+
define i64 @xbu(i64 %x, i64 %y, i64 %z) nounwind readnone {
entry:
%0 = lshr i64 %y, 7
diff --git a/test/CodeGen/X86/rotate2.ll b/test/CodeGen/X86/rotate2.ll
index 2eea3999e7..2316c70850 100644
--- a/test/CodeGen/X86/rotate2.ll
+++ b/test/CodeGen/X86/rotate2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 | grep rol | count 2
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 | grep rol | count 2
define i64 @test1(i64 %x) nounwind {
entry:
diff --git a/test/CodeGen/X86/shift-bmi2.ll b/test/CodeGen/X86/shift-bmi2.ll
new file mode 100644
index 0000000000..d1f321f177
--- /dev/null
+++ b/test/CodeGen/X86/shift-bmi2.ll
@@ -0,0 +1,178 @@
+; RUN: llc -mtriple=i386-unknown-unknown -mcpu=core-avx2 < %s | FileCheck --check-prefix=BMI2 %s
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=core-avx2 < %s | FileCheck --check-prefix=BMI264 %s
+
+define i32 @shl32(i32 %x, i32 %shamt) nounwind uwtable readnone {
+entry:
+ %shl = shl i32 %x, %shamt
+; BMI2: shl32
+; BMI2: shlxl
+; BMI2: ret
+; BMI264: shl32
+; BMI264: shlxl
+; BMI264: ret
+ ret i32 %shl
+}
+
+define i32 @shl32i(i32 %x) nounwind uwtable readnone {
+entry:
+ %shl = shl i32 %x, 5
+; BMI2: shl32i
+; BMI2-NOT: shlxl
+; BMI2: ret
+; BMI264: shl32i
+; BMI264-NOT: shlxl
+; BMI264: ret
+ ret i32 %shl
+}
+
+define i32 @shl32p(i32* %p, i32 %shamt) nounwind uwtable readnone {
+entry:
+ %x = load i32* %p
+ %shl = shl i32 %x, %shamt
+; BMI2: shl32p
+; BMI2: shlxl %{{.+}}, ({{.+}}), %{{.+}}
+; BMI2: ret
+; BMI264: shl32p
+; BMI264: shlxl %{{.+}}, ({{.+}}), %{{.+}}
+; BMI264: ret
+ ret i32 %shl
+}
+
+define i32 @shl32pi(i32* %p) nounwind uwtable readnone {
+entry:
+ %x = load i32* %p
+ %shl = shl i32 %x, 5
+; BMI2: shl32pi
+; BMI2-NOT: shlxl
+; BMI2: ret
+; BMI264: shl32pi
+; BMI264-NOT: shlxl
+; BMI264: ret
+ ret i32 %shl
+}
+
+define i64 @shl64(i64 %x, i64 %shamt) nounwind uwtable readnone {
+entry:
+ %shl = shl i64 %x, %shamt
+; BMI264: shl64
+; BMI264: shlxq
+; BMI264: ret
+ ret i64 %shl
+}
+
+define i64 @shl64i(i64 %x) nounwind uwtable readnone {
+entry:
+ %shl = shl i64 %x, 7
+; BMI264: shl64i
+; BMI264-NOT: shlxq
+; BMI264: ret
+ ret i64 %shl
+}
+
+define i64 @shl64p(i64* %p, i64 %shamt) nounwind uwtable readnone {
+entry:
+ %x = load i64* %p
+ %shl = shl i64 %x, %shamt
+; BMI264: shl64p
+; BMI264: shlxq %{{.+}}, ({{.+}}), %{{.+}}
+; BMI264: ret
+ ret i64 %shl
+}
+
+define i64 @shl64pi(i64* %p) nounwind uwtable readnone {
+entry:
+ %x = load i64* %p
+ %shl = shl i64 %x, 7
+; BMI264: shl64p
+; BMI264-NOT: shlxq
+; BMI264: ret
+ ret i64 %shl
+}
+
+define i32 @lshr32(i32 %x, i32 %shamt) nounwind uwtable readnone {
+entry:
+ %shl = lshr i32 %x, %shamt
+; BMI2: lshr32
+; BMI2: shrxl
+; BMI2: ret
+; BMI264: lshr32
+; BMI264: shrxl
+; BMI264: ret
+ ret i32 %shl
+}
+
+define i32 @lshr32p(i32* %p, i32 %shamt) nounwind uwtable readnone {
+entry:
+ %x = load i32* %p
+ %shl = lshr i32 %x, %shamt
+; BMI2: lshr32p
+; BMI2: shrxl %{{.+}}, ({{.+}}), %{{.+}}
+; BMI2: ret
+; BMI264: lshr32
+; BMI264: shrxl %{{.+}}, ({{.+}}), %{{.+}}
+; BMI264: ret
+ ret i32 %shl
+}
+
+define i64 @lshr64(i64 %x, i64 %shamt) nounwind uwtable readnone {
+entry:
+ %shl = lshr i64 %x, %shamt
+; BMI264: lshr64
+; BMI264: shrxq
+; BMI264: ret
+ ret i64 %shl
+}
+
+define i64 @lshr64p(i64* %p, i64 %shamt) nounwind uwtable readnone {
+entry:
+ %x = load i64* %p
+ %shl = lshr i64 %x, %shamt
+; BMI264: lshr64p
+; BMI264: shrxq %{{.+}}, ({{.+}}), %{{.+}}
+; BMI264: ret
+ ret i64 %shl
+}
+
+define i32 @ashr32(i32 %x, i32 %shamt) nounwind uwtable readnone {
+entry:
+ %shl = ashr i32 %x, %shamt
+; BMI2: ashr32
+; BMI2: sarxl
+; BMI2: ret
+; BMI264: ashr32
+; BMI264: sarxl
+; BMI264: ret
+ ret i32 %shl
+}
+
+define i32 @ashr32p(i32* %p, i32 %shamt) nounwind uwtable readnone {
+entry:
+ %x = load i32* %p
+ %shl = ashr i32 %x, %shamt
+; BMI2: ashr32p
+; BMI2: sarxl %{{.+}}, ({{.+}}), %{{.+}}
+; BMI2: ret
+; BMI264: ashr32
+; BMI264: sarxl %{{.+}}, ({{.+}}), %{{.+}}
+; BMI264: ret
+ ret i32 %shl
+}
+
+define i64 @ashr64(i64 %x, i64 %shamt) nounwind uwtable readnone {
+entry:
+ %shl = ashr i64 %x, %shamt
+; BMI264: ashr64
+; BMI264: sarxq
+; BMI264: ret
+ ret i64 %shl
+}
+
+define i64 @ashr64p(i64* %p, i64 %shamt) nounwind uwtable readnone {
+entry:
+ %x = load i64* %p
+ %shl = ashr i64 %x, %shamt
+; BMI264: ashr64p
+; BMI264: sarxq %{{.+}}, ({{.+}}), %{{.+}}
+; BMI264: ret
+ ret i64 %shl
+}
diff --git a/test/CodeGen/X86/tailcall-64.ll b/test/CodeGen/X86/tailcall-64.ll
index adc8620060..ecc253ba58 100644
--- a/test/CodeGen/X86/tailcall-64.ll
+++ b/test/CodeGen/X86/tailcall-64.ll
@@ -1,6 +1,4 @@
-; RUN: llc < %s | FileCheck %s
-target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-apple-darwin11.4.0"
+; RUN: llc -mtriple=x86_64-apple-macosx -mcpu=core2 < %s | FileCheck %s
declare i64 @testi()
@@ -132,3 +130,28 @@ entry:
%call = tail call i32 (i8*, ...)* %0(i8* null, i32 0, i32 0, i32 0, i32 0, i32 0) nounwind
ret i32 %call
}
+
+define x86_fp80 @fp80_call(x86_fp80 %x) nounwind {
+entry:
+; CHECK: fp80_call:
+; CHECK: jmp _fp80_callee
+ %call = tail call x86_fp80 @fp80_callee(x86_fp80 %x) nounwind
+ ret x86_fp80 %call
+}
+
+declare x86_fp80 @fp80_callee(x86_fp80)
+
+; rdar://12229511
+define x86_fp80 @trunc_fp80(x86_fp80 %x) nounwind {
+entry:
+; CHECK: trunc_fp80
+; CHECK: callq _trunc
+; CHECK-NOT: jmp _trunc
+; CHECK: ret
+ %conv = fptrunc x86_fp80 %x to double
+ %call = tail call double @trunc(double %conv) nounwind readnone
+ %conv1 = fpext double %call to x86_fp80
+ ret x86_fp80 %conv1
+}
+
+declare double @trunc(double) nounwind readnone
diff --git a/test/CodeGen/X86/targetLoweringGeneric.ll b/test/CodeGen/X86/targetLoweringGeneric.ll
index ba5f8f8361..a773e9daef 100644
--- a/test/CodeGen/X86/targetLoweringGeneric.ll
+++ b/test/CodeGen/X86/targetLoweringGeneric.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=i386-apple-darwin9 -fast-isel=false -O0 < %s | FileCheck %s
+; RUN: llc -mtriple=i386-apple-darwin9 -mcpu=corei7 -fast-isel=false -O0 < %s | FileCheck %s
; Gather non-machine specific tests for the transformations in
; CodeGen/SelectionDAG/TargetLowering. Currently, these
diff --git a/test/DebugInfo/bug_null_debuginfo.ll b/test/DebugInfo/bug_null_debuginfo.ll
index a7fdf70d71..b17affed89 100644
--- a/test/DebugInfo/bug_null_debuginfo.ll
+++ b/test/DebugInfo/bug_null_debuginfo.ll
@@ -1,5 +1,4 @@
-; RUN: llc
-
+; RUN: llc < %s
!llvm.dbg.cu = !{!0}
diff --git a/test/MC/ARM/arm-arithmetic-aliases.s b/test/MC/ARM/arm-arithmetic-aliases.s
index 9895cfc02b..3ed4448581 100644
--- a/test/MC/ARM/arm-arithmetic-aliases.s
+++ b/test/MC/ARM/arm-arithmetic-aliases.s
@@ -124,3 +124,7 @@ bicseq r2, r3
@ CHECK: bicseq r2, r2, #6 @ encoding: [0x06,0x20,0xd2,0x03]
@ CHECK: bicseq r2, r2, r3 @ encoding: [0x03,0x20,0xd2,0x01]
@ CHECK: bicseq r2, r2, r3 @ encoding: [0x03,0x20,0xd2,0x01]
+
+add r0, pc, #123
+
+@ CHECK: adr r0, #123 @ encoding: [0x7b,0x00,0x8f,0xe2]
diff --git a/test/MC/MachO/ARM/long-call-branch-island-relocation.s b/test/MC/MachO/ARM/long-call-branch-island-relocation.s
new file mode 100644
index 0000000000..8ee7da54b5
--- /dev/null
+++ b/test/MC/MachO/ARM/long-call-branch-island-relocation.s
@@ -0,0 +1,43 @@
+@ RUN: llvm-mc -n -triple armv7-apple-darwin10 %s -filetype=obj -o %t.o
+@ RUN: macho-dump --dump-section-data < %t.o | FileCheck %s
+
+@ rdar://12359919
+
+ .syntax unified
+ .text
+
+ .globl _bar
+ .align 2
+ .code 16
+ .thumb_func _bar
+_bar:
+ push {r7, lr}
+ mov r7, sp
+ bl _foo
+ pop {r7, pc}
+
+
+_junk:
+@ Make the _foo symbol sufficiently far away to force the 'bl' relocation
+@ above to be out of range. On Darwin, the assembler deals with this by
+@ generating an external relocation so the linker can create a branch
+@ island.
+
+ .space 20000000
+
+ .section __TEXT,initcode,regular,pure_instructions
+
+ .globl _foo
+ .align 2
+ .code 16
+_foo:
+ push {r7, lr}
+ mov r7, sp
+ pop {r7, pc}
+
+
+@ CHECK: ('_relocations', [
+@ CHECK: # Relocation 0
+@ CHECK: (('word-0', 0x4),
+@ CHECK: ('word-1', 0x6d000002)),
+@ CHECK: ])
diff --git a/test/MC/MachO/i386-large-relocations.s b/test/MC/MachO/i386-large-relocations.s
new file mode 100644
index 0000000000..e5a1cfb2c5
--- /dev/null
+++ b/test/MC/MachO/i386-large-relocations.s
@@ -0,0 +1,36 @@
+// RUN: llvm-mc -triple i386-apple-darwin10 %s -filetype=obj -o - | macho-dump | FileCheck %s
+
+.space 0x1ed280
+ .section __DATA,__const
+ .align 4
+.space 0x5181020
+_foo:
+ .long _bar
+ .long 0
+ .long _bar+8
+ .long _bar+24
+ .long 0
+ .long _bar+16
+
+.zerofill __DATA,__bss,__dummy,0x5d780
+.zerofill __DATA,__bss,_bar,48,4
+
+// Normally scattered relocations are used for sym+offset expressions. When
+// the value exceeds 24-bits, however, it's outside what MachO can encode,
+// so the assembler falls back to non-scattered relocations.
+// rdar://12358909
+
+// CHECK: ('_relocations', [
+// CHECK: # Relocation 0
+// CHECK: (('word-0', 0x5181034),
+// CHECK: ('word-1', 0x4000003)),
+// CHECK: # Relocation 1
+// CHECK: (('word-0', 0x518102c),
+// CHECK: ('word-1', 0x4000003)),
+// CHECK: # Relocation 2
+// CHECK: (('word-0', 0x5181028),
+// CHECK: ('word-1', 0x4000003)),
+// CHECK: # Relocation 3
+// CHECK: (('word-0', 0x5181020),
+// CHECK: ('word-1', 0x4000003)),
+// CHECK: ])
diff --git a/test/Other/lint.ll b/test/Other/lint.ll
index c84f56f8f6..78bbbe9e6f 100644
--- a/test/Other/lint.ll
+++ b/test/Other/lint.ll
@@ -9,8 +9,11 @@ declare void @has_noaliases(i32* noalias %p, i32* %q)
declare void @one_arg(i32)
@CG = constant i32 7
+@E = external global i8
define i32 @foo() noreturn {
+ %buf = alloca i8
+ %buf2 = alloca {i8, i8}, align 2
; CHECK: Caller and callee calling convention differ
call void @bar()
; CHECK: Null pointer dereference
@@ -26,8 +29,10 @@ define i32 @foo() noreturn {
; CHECK: Address one pointer dereference
store i32 0, i32* inttoptr (i64 1 to i32*)
; CHECK: Memory reference address is misaligned
- %x = inttoptr i32 1 to i32*
- load i32* %x, align 4
+ store i8 0, i8* %buf, align 2
+; CHECK: Memory reference address is misaligned
+ %gep = getelementptr {i8, i8}* %buf2, i32 0, i32 1
+ store i8 0, i8* %gep, align 2
; CHECK: Division by zero
%sd = sdiv i32 2, 0
; CHECK: Division by zero
@@ -75,6 +80,18 @@ define i32 @foo() noreturn {
; CHECK: Write to read-only memory
call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (i32* @CG to i8*), i8* bitcast (i32* @CG to i8*), i64 1, i32 1, i1 0)
+; CHECK: Undefined behavior: Buffer overflow
+ %wider = bitcast i8* %buf to i16*
+ store i16 0, i16* %wider
+; CHECK: Undefined behavior: Buffer overflow
+ %inner = getelementptr {i8, i8}* %buf2, i32 0, i32 1
+ %wider2 = bitcast i8* %inner to i16*
+ store i16 0, i16* %wider2
+; CHECK: Undefined behavior: Buffer overflow
+ %before = getelementptr i8* %buf, i32 -1
+ %wider3 = bitcast i8* %before to i16*
+ store i16 0, i16* %wider3
+
br label %next
next:
@@ -84,6 +101,10 @@ next:
ret i32 0
foo:
+; CHECK-NOT: Undefined behavior: Buffer overflow
+; CHECK-NOT: Memory reference address is misaligned
+ %e = bitcast i8* @E to i64*
+ store i64 0, i64* %e
%z = add i32 0, 0
; CHECK: unreachable immediately preceded by instruction without side effects
unreachable
diff --git a/test/Transforms/CorrelatedValuePropagation/crash.ll b/test/Transforms/CorrelatedValuePropagation/crash.ll
index 80c43d0f1d..9723d18252 100644
--- a/test/Transforms/CorrelatedValuePropagation/crash.ll
+++ b/test/Transforms/CorrelatedValuePropagation/crash.ll
@@ -35,3 +35,28 @@ srf.exit.i:
func_29.exit:
ret void
}
+
+; PR13972
+define void @test3() nounwind {
+for.body:
+ br label %return
+
+for.cond.i: ; preds = %if.else.i, %for.body.i
+ %e.2.i = phi i32 [ %e.2.i, %if.else.i ], [ -8, %for.body.i ]
+ br i1 undef, label %return, label %for.body.i
+
+for.body.i: ; preds = %for.cond.i
+ switch i32 %e.2.i, label %for.cond3.i [
+ i32 -3, label %if.else.i
+ i32 0, label %for.cond.i
+ ]
+
+for.cond3.i: ; preds = %for.cond3.i, %for.body.i
+ br label %for.cond3.i
+
+if.else.i: ; preds = %for.body.i
+ br label %for.cond.i
+
+return: ; preds = %for.cond.i, %for.body
+ ret void
+}
diff --git a/test/Transforms/DeadStoreElimination/libcalls.ll b/test/Transforms/DeadStoreElimination/libcalls.ll
new file mode 100644
index 0000000000..4639c0bc96
--- /dev/null
+++ b/test/Transforms/DeadStoreElimination/libcalls.ll
@@ -0,0 +1,70 @@
+; RUN: opt -S -basicaa -dse < %s | FileCheck %s
+
+declare i8* @strcpy(i8* %dest, i8* %src) nounwind
+define void @test1(i8* %src) {
+; CHECK: @test1
+ %B = alloca [16 x i8]
+ %dest = getelementptr inbounds [16 x i8]* %B, i64 0, i64 0
+; CHECK-NOT: @strcpy
+ %call = call i8* @strcpy(i8* %dest, i8* %src)
+; CHECK: ret void
+ ret void
+}
+
+declare i8* @strncpy(i8* %dest, i8* %src, i32 %n) nounwind
+define void @test2(i8* %src) {
+; CHECK: @test2
+ %B = alloca [16 x i8]
+ %dest = getelementptr inbounds [16 x i8]* %B, i64 0, i64 0
+; CHECK-NOT: @strncpy
+ %call = call i8* @strncpy(i8* %dest, i8* %src, i32 12)
+; CHECK: ret void
+ ret void
+}
+
+declare i8* @strcat(i8* %dest, i8* %src) nounwind
+define void @test3(i8* %src) {
+; CHECK: @test3
+ %B = alloca [16 x i8]
+ %dest = getelementptr inbounds [16 x i8]* %B, i64 0, i64 0
+; CHECK-NOT: @strcat
+ %call = call i8* @strcat(i8* %dest, i8* %src)
+; CHECK: ret void
+ ret void
+}
+
+declare i8* @strncat(i8* %dest, i8* %src, i32 %n) nounwind
+define void @test4(i8* %src) {
+; CHECK: @test4
+ %B = alloca [16 x i8]
+ %dest = getelementptr inbounds [16 x i8]* %B, i64 0, i64 0
+; CHECK-NOT: @strncat
+ %call = call i8* @strncat(i8* %dest, i8* %src, i32 12)
+; CHECK: ret void
+ ret void
+}
+
+define void @test5(i8* nocapture %src) {
+; CHECK: @test5
+ %dest = alloca [100 x i8], align 16
+ %arraydecay = getelementptr inbounds [100 x i8]* %dest, i64 0, i64 0
+ %call = call i8* @strcpy(i8* %arraydecay, i8* %src)
+; CHECK: %call = call i8* @strcpy
+ %arrayidx = getelementptr inbounds i8* %call, i64 10
+ store i8 97, i8* %arrayidx, align 1
+ ret void
+}
+
+declare void @user(i8* %p)
+define void @test6(i8* %src) {
+; CHECK: @test6
+ %B = alloca [16 x i8]
+ %dest = getelementptr inbounds [16 x i8]* %B, i64 0, i64 0
+; CHECK: @strcpy
+ %call = call i8* @strcpy(i8* %dest, i8* %src)
+; CHECK: @user
+ call void @user(i8* %dest)
+; CHECK: ret void
+ ret void
+}
+
diff --git a/test/Transforms/GlobalOpt/load-store-global.ll b/test/Transforms/GlobalOpt/load-store-global.ll
index f824b2c11c..25a53370fa 100644
--- a/test/Transforms/GlobalOpt/load-store-global.ll
+++ b/test/Transforms/GlobalOpt/load-store-global.ll
@@ -1,15 +1,38 @@
-; RUN: opt < %s -globalopt -S | not grep G
+; RUN: opt < %s -globalopt -S | FileCheck %s
@G = internal global i32 17 ; <i32*> [#uses=3]
+; CHECK-NOT: @G
define void @foo() {
%V = load i32* @G ; <i32> [#uses=1]
store i32 %V, i32* @G
ret void
+; CHECK: @foo
+; CHECK-NEXT: ret void
}
define i32 @bar() {
%X = load i32* @G ; <i32> [#uses=1]
ret i32 %X
+; CHECK: @bar
+; CHECK-NEXT: ret i32 17
+}
+
+@a = internal global i64* null, align 8
+; CHECK-NOT: @a
+
+; PR13968
+define void @qux() nounwind {
+ %b = bitcast i64** @a to i8*
+ %g = getelementptr i64** @a, i32 1
+ %cmp = icmp ne i8* null, %b
+ %cmp2 = icmp eq i8* null, %b
+ %cmp3 = icmp eq i64** null, %g
+ store i64* inttoptr (i64 1 to i64*), i64** @a, align 8
+ %l = load i64** @a, align 8
+ ret void
+; CHECK: @qux
+; CHECK-NOT: store
+; CHECK-NOT: load
}
diff --git a/test/Transforms/InstCombine/2012-09-24-MemcpyFromGlobalCrash.ll b/test/Transforms/InstCombine/2012-09-24-MemcpyFromGlobalCrash.ll
new file mode 100644
index 0000000000..4cd60b42fb
--- /dev/null
+++ b/test/Transforms/InstCombine/2012-09-24-MemcpyFromGlobalCrash.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Check we don't crash due to lack of target data.
+
+@G = constant [100 x i8] zeroinitializer
+
+declare void @bar(i8*)
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+define void @test() {
+; CHECK: @test
+; CHECK: llvm.memcpy
+; CHECK: ret void
+ %A = alloca [100 x i8]
+ %a = getelementptr inbounds [100 x i8]* %A, i64 0, i64 0
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* getelementptr inbounds ([100 x i8]* @G, i64 0, i32 0), i64 100, i32 4, i1 false)
+ call void @bar(i8* %a) readonly
+ ret void
+}
diff --git a/test/Transforms/InstCombine/memcpy_chk-1.ll b/test/Transforms/InstCombine/memcpy_chk-1.ll
new file mode 100644
index 0000000000..7c7d91808a
--- /dev/null
+++ b/test/Transforms/InstCombine/memcpy_chk-1.ll
@@ -0,0 +1,60 @@
+; Test lib call simplification of __memcpy_chk calls with various values
+; for dstlen and len.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+%struct.T1 = type { [100 x i32], [100 x i32], [1024 x i8] }
+%struct.T2 = type { [100 x i32], [100 x i32], [1024 x i8] }
+%struct.T3 = type { [100 x i32], [100 x i32], [2048 x i8] }
+
+@t1 = common global %struct.T1 zeroinitializer
+@t2 = common global %struct.T2 zeroinitializer
+@t3 = common global %struct.T3 zeroinitializer
+
+; Check cases where dstlen >= len.
+
+define void @test_simplify1() {
+; CHECK: @test_simplify1
+ %dst = bitcast %struct.T1* @t1 to i8*
+ %src = bitcast %struct.T2* @t2 to i8*
+
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64
+ call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 1824, i64 1824)
+ ret void
+}
+
+define void @test_simplify2() {
+; CHECK: @test_simplify2
+ %dst = bitcast %struct.T1* @t1 to i8*
+ %src = bitcast %struct.T3* @t3 to i8*
+
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64
+ call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 1824, i64 2848)
+ ret void
+}
+
+; Check cases where dstlen < len.
+
+define void @test_no_simplify1() {
+; CHECK: @test_no_simplify1
+ %dst = bitcast %struct.T3* @t3 to i8*
+ %src = bitcast %struct.T1* @t1 to i8*
+
+; CHECK-NEXT: call i8* @__memcpy_chk
+ call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 2848, i64 1824)
+ ret void
+}
+
+define void @test_no_simplify2() {
+; CHECK: @test_no_simplify2
+ %dst = bitcast %struct.T1* @t1 to i8*
+ %src = bitcast %struct.T2* @t2 to i8*
+
+; CHECK-NEXT: call i8* @__memcpy_chk
+ call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 1024, i64 0)
+ ret void
+}
+
+declare i8* @__memcpy_chk(i8*, i8*, i64, i64)
diff --git a/test/Transforms/InstCombine/memcpy_chk-2.ll b/test/Transforms/InstCombine/memcpy_chk-2.ll
new file mode 100644
index 0000000000..aa43029d47
--- /dev/null
+++ b/test/Transforms/InstCombine/memcpy_chk-2.ll
@@ -0,0 +1,24 @@
+; Test that lib call simplification doesn't simplify __memcpy_chk calls
+; with the wrong prototype.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+%struct.T1 = type { [100 x i32], [100 x i32], [1024 x i8] }
+%struct.T2 = type { [100 x i32], [100 x i32], [1024 x i8] }
+
+@t1 = common global %struct.T1 zeroinitializer
+@t2 = common global %struct.T2 zeroinitializer
+
+define void @test_no_simplify() {
+; CHECK: @test_no_simplify
+ %dst = bitcast %struct.T1* @t1 to i8*
+ %src = bitcast %struct.T2* @t2 to i8*
+
+; CHECK-NEXT: call i8* @__memcpy_chk
+ call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 1824)
+ ret void
+}
+
+declare i8* @__memcpy_chk(i8*, i8*, i64)
diff --git a/test/Transforms/InstCombine/memmove_chk-1.ll b/test/Transforms/InstCombine/memmove_chk-1.ll
new file mode 100644
index 0000000000..f9ff9a103a
--- /dev/null
+++ b/test/Transforms/InstCombine/memmove_chk-1.ll
@@ -0,0 +1,60 @@
+; Test lib call simplification of __memmove_chk calls with various values
+; for dstlen and len.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+%struct.T1 = type { [100 x i32], [100 x i32], [1024 x i8] }
+%struct.T2 = type { [100 x i32], [100 x i32], [1024 x i8] }
+%struct.T3 = type { [100 x i32], [100 x i32], [2048 x i8] }
+
+@t1 = common global %struct.T1 zeroinitializer
+@t2 = common global %struct.T2 zeroinitializer
+@t3 = common global %struct.T3 zeroinitializer
+
+; Check cases where dstlen >= len.
+
+define void @test_simplify1() {
+; CHECK: @test_simplify1
+ %dst = bitcast %struct.T1* @t1 to i8*
+ %src = bitcast %struct.T2* @t2 to i8*
+
+; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64
+ call i8* @__memmove_chk(i8* %dst, i8* %src, i64 1824, i64 1824)
+ ret void
+}
+
+define void @test_simplify2() {
+; CHECK: @test_simplify2
+ %dst = bitcast %struct.T1* @t1 to i8*
+ %src = bitcast %struct.T3* @t3 to i8*
+
+; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64
+ call i8* @__memmove_chk(i8* %dst, i8* %src, i64 1824, i64 2848)
+ ret void
+}
+
+; Check cases where dstlen < len.
+
+define void @test_no_simplify1() {
+; CHECK: @test_no_simplify1
+ %dst = bitcast %struct.T3* @t3 to i8*
+ %src = bitcast %struct.T1* @t1 to i8*
+
+; CHECK-NEXT: call i8* @__memmove_chk
+ call i8* @__memmove_chk(i8* %dst, i8* %src, i64 2848, i64 1824)
+ ret void
+}
+
+define void @test_no_simplify2() {
+; CHECK: @test_no_simplify2
+ %dst = bitcast %struct.T1* @t1 to i8*
+ %src = bitcast %struct.T2* @t2 to i8*
+
+; CHECK-NEXT: call i8* @__memmove_chk
+ call i8* @__memmove_chk(i8* %dst, i8* %src, i64 1024, i64 0)
+ ret void
+}
+
+declare i8* @__memmove_chk(i8*, i8*, i64, i64)
diff --git a/test/Transforms/InstCombine/memmove_chk-2.ll b/test/Transforms/InstCombine/memmove_chk-2.ll
new file mode 100644
index 0000000000..f0a915fde2
--- /dev/null
+++ b/test/Transforms/InstCombine/memmove_chk-2.ll
@@ -0,0 +1,24 @@
+; Test that lib call simplification doesn't simplify __memmove_chk calls
+; with the wrong prototype.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+%struct.T1 = type { [100 x i32], [100 x i32], [1024 x i8] }
+%struct.T2 = type { [100 x i32], [100 x i32], [1024 x i8] }
+
+@t1 = common global %struct.T1 zeroinitializer
+@t2 = common global %struct.T2 zeroinitializer
+
+define void @test_no_simplify() {
+; CHECK: @test_no_simplify
+ %dst = bitcast %struct.T1* @t1 to i8*
+ %src = bitcast %struct.T2* @t2 to i8*
+
+; CHECK-NEXT: call i8* @__memmove_chk
+ call i8* @__memmove_chk(i8* %dst, i8* %src, i64 1824)
+ ret void
+}
+
+declare i8* @__memmove_chk(i8*, i8*, i64)
diff --git a/test/Transforms/InstCombine/memset_chk-1.ll b/test/Transforms/InstCombine/memset_chk-1.ll
new file mode 100644
index 0000000000..be4c1cfccd
--- /dev/null
+++ b/test/Transforms/InstCombine/memset_chk-1.ll
@@ -0,0 +1,61 @@
+; Test lib call simplification of __memset_chk calls with various values
+; for dstlen and len.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; rdar://7719085
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+%struct.T = type { [100 x i32], [100 x i32], [1024 x i8] }
+@t = common global %struct.T zeroinitializer
+
+; Check cases where dstlen >= len.
+
+define void @test_simplify1() {
+; CHECK: @test_simplify1
+ %dst = bitcast %struct.T* @t to i8*
+
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64
+ call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 1824)
+ ret void
+}
+
+define void @test_simplify2() {
+; CHECK: @test_simplify2
+ %dst = bitcast %struct.T* @t to i8*
+
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64
+ call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 3648)
+ ret void
+}
+
+define void @test_simplify3() {
+; CHECK: @test_simplify3
+ %dst = bitcast %struct.T* @t to i8*
+
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64
+ call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 -1)
+ ret void
+}
+
+; Check cases where dstlen < len.
+
+define void @test_no_simplify1() {
+; CHECK: @test_no_simplify1
+ %dst = bitcast %struct.T* @t to i8*
+
+; CHECK-NEXT: call i8* @__memset_chk
+ call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 400)
+ ret void
+}
+
+define void @test_no_simplify2() {
+; CHECK: @test_no_simplify2
+ %dst = bitcast %struct.T* @t to i8*
+
+; CHECK-NEXT: call i8* @__memset_chk
+ call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 0)
+ ret void
+}
+
+declare i8* @__memset_chk(i8*, i32, i64, i64)
diff --git a/test/Transforms/InstCombine/memset_chk-2.ll b/test/Transforms/InstCombine/memset_chk-2.ll
new file mode 100644
index 0000000000..60fbf163c2
--- /dev/null
+++ b/test/Transforms/InstCombine/memset_chk-2.ll
@@ -0,0 +1,20 @@
+; Test that lib call simplification doesn't simplify __memset_chk calls
+; with the wrong prototype.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+%struct.T = type { [100 x i32], [100 x i32], [1024 x i8] }
+@t = common global %struct.T zeroinitializer
+
+define void @test_no_simplify() {
+; CHECK: @test_no_simplify
+ %dst = bitcast %struct.T* @t to i8*
+
+; CHECK-NEXT: call i8* @__memset_chk
+ call i8* @__memset_chk(i8* %dst, i32 0, i64 1824)
+ ret void
+}
+
+declare i8* @__memset_chk(i8*, i32, i64)
diff --git a/test/Transforms/InstCombine/memset_chk.ll b/test/Transforms/InstCombine/memset_chk.ll
deleted file mode 100644
index 58ecda582f..0000000000
--- a/test/Transforms/InstCombine/memset_chk.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: opt < %s -instcombine -S | FileCheck %s
-; rdar://7719085
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-
-%struct.data = type { [100 x i32], [100 x i32], [1024 x i8] }
-
-define i32 @t() nounwind ssp {
-; CHECK: @t
-; CHECK: @llvm.memset.p0i8.i64
-entry:
- %0 = alloca %struct.data, align 8 ; <%struct.data*> [#uses=1]
- %1 = bitcast %struct.data* %0 to i8* ; <i8*> [#uses=1]
- %2 = call i8* @__memset_chk(i8* %1, i32 0, i64 1824, i64 1824) nounwind ; <i8*> [#uses=0]
- ret i32 0
-}
-
-declare i8* @__memset_chk(i8*, i32, i64, i64) nounwind
diff --git a/test/Transforms/InstCombine/strcpy_chk-1.ll b/test/Transforms/InstCombine/strcpy_chk-1.ll
new file mode 100644
index 0000000000..c03e8a348b
--- /dev/null
+++ b/test/Transforms/InstCombine/strcpy_chk-1.ll
@@ -0,0 +1,88 @@
+; Test lib call simplification of __strcpy_chk calls with various values
+; for src, dst, and slen.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@a = common global [60 x i8] zeroinitializer, align 1
+@b = common global [60 x i8] zeroinitializer, align 1
+@.str = private constant [8 x i8] c"abcdefg\00"
+
+; Check cases where slen >= strlen (src).
+
+define void @test_simplify1() {
+; CHECK: @test_simplify1
+ %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+ %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0
+
+; CHECK-NEXT: call i8* @strcpy
+ call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 60)
+ ret void
+}
+
+define void @test_simplify2() {
+; CHECK: @test_simplify2
+ %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+ %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0
+
+; CHECK-NEXT: call i8* @strcpy
+ call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 8)
+ ret void
+}
+
+define void @test_simplify3() {
+; CHECK: @test_simplify3
+ %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+ %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0
+
+; CHECK-NEXT: call i8* @strcpy
+ call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 -1)
+ ret void
+}
+
+; Check cases where there are no string constants.
+
+define void @test_simplify4() {
+; CHECK: @test_simplify4
+ %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+ %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0
+
+; CHECK-NEXT: call i8* @strcpy
+ call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 -1)
+ ret void
+}
+
+define void @test_no_simplify1() {
+; CHECK: @test_no_simplify1
+ %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+ %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0
+
+; CHECK-NEXT: call i8* @__strcpy_chk
+ call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 8)
+ ret void
+}
+
+; Check case were slen < strlen (src).
+
+define void @test_no_simplify2() {
+; CHECK: @test_no_simplify2
+ %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+ %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0
+
+; CHECK-NEXT: call i8* @__strcpy_chk
+ call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 3)
+ ret void
+}
+
+define void @test_no_simplify3() {
+; CHECK: @test_no_simplify3
+ %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+ %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0
+
+; CHECK-NEXT: call i8* @__strcpy_chk
+ call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 0)
+ ret void
+}
+
+declare i8* @__strcpy_chk(i8*, i8*, i32) nounwind
diff --git a/test/Transforms/InstCombine/strcpy_chk-2.ll b/test/Transforms/InstCombine/strcpy_chk-2.ll
new file mode 100644
index 0000000000..d76ea5d068
--- /dev/null
+++ b/test/Transforms/InstCombine/strcpy_chk-2.ll
@@ -0,0 +1,21 @@
+; Test that lib call simplification doesn't simplify __strcpy_chk calls
+; with the wrong prototype.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@a = common global [60 x i16] zeroinitializer, align 1
+@.str = private constant [8 x i8] c"abcdefg\00"
+
+define void @test_no_simplify() {
+; CHECK: @test_no_simplify
+ %dst = getelementptr inbounds [60 x i16]* @a, i32 0, i32 0
+ %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0
+
+; CHECK-NEXT: call i16* @__strcpy_chk
+ call i16* @__strcpy_chk(i16* %dst, i8* %src, i32 8)
+ ret void
+}
+
+declare i16* @__strcpy_chk(i16*, i8*, i32)
diff --git a/test/Transforms/InstCombine/strcpy_chk.ll b/test/Transforms/InstCombine/strcpy_chk.ll
deleted file mode 100644
index 8835a0ba46..0000000000
--- a/test/Transforms/InstCombine/strcpy_chk.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: opt < %s -instcombine -S | FileCheck %s
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-@a = common global [60 x i8] zeroinitializer, align 1 ; <[60 x i8]*> [#uses=1]
-@.str = private constant [8 x i8] c"abcdefg\00" ; <[8 x i8]*> [#uses=1]
-
-define i8* @foo() nounwind {
-; CHECK: @foo
-; CHECK-NEXT: call i8* @strcpy
- %call = call i8* @__strcpy_chk(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([8 x i8]* @.str, i32 0, i32 0), i32 60) ; <i8*> [#uses=1]
- ret i8* %call
-}
-
-declare i8* @__strcpy_chk(i8*, i8*, i32) nounwind
diff --git a/test/Transforms/InstCombine/strncpy_chk-1.ll b/test/Transforms/InstCombine/strncpy_chk-1.ll
new file mode 100644
index 0000000000..ae7e2fb5f1
--- /dev/null
+++ b/test/Transforms/InstCombine/strncpy_chk-1.ll
@@ -0,0 +1,66 @@
+; Test lib call simplification of __strncpy_chk calls with various values
+; for len and dstlen.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@a = common global [60 x i8] zeroinitializer, align 1
+@b = common global [60 x i8] zeroinitializer, align 1
+@.str = private constant [8 x i8] c"abcdefg\00"
+
+; Check cases where dstlen >= len
+
+define void @test_simplify1() {
+; CHECK: @test_simplify1
+ %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+ %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0
+
+; CHECK-NEXT: call i8* @strncpy
+ call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 8, i32 60)
+ ret void
+}
+
+define void @test_simplify2() {
+; CHECK: @test_simplify2
+ %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+ %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0
+
+; CHECK-NEXT: call i8* @strncpy
+ call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 8, i32 8)
+ ret void
+}
+
+define void @test_simplify3() {
+; CHECK: @test_simplify3
+ %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+ %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0
+
+; CHECK-NEXT: call i8* @strncpy
+ call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 8, i32 60)
+ ret void
+}
+
+; Check cases where dstlen < len
+
+define void @test_no_simplify1() {
+; CHECK: @test_no_simplify1
+ %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+ %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0
+
+; CHECK-NEXT: call i8* @__strncpy_chk
+ call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 8, i32 4)
+ ret void
+}
+
+define void @test_no_simplify2() {
+; CHECK: @test_no_simplify2
+ %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+ %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0
+
+; CHECK-NEXT: call i8* @__strncpy_chk
+ call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 8, i32 0)
+ ret void
+}
+
+declare i8* @__strncpy_chk(i8*, i8*, i32, i32)
diff --git a/test/Transforms/InstCombine/strncpy_chk-2.ll b/test/Transforms/InstCombine/strncpy_chk-2.ll
new file mode 100644
index 0000000000..a0f132ebf6
--- /dev/null
+++ b/test/Transforms/InstCombine/strncpy_chk-2.ll
@@ -0,0 +1,21 @@
+; Test that lib call simplification doesn't simplify __strncpy_chk calls
+; with the wrong prototype.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@a = common global [60 x i16] zeroinitializer, align 1
+@b = common global [60 x i16] zeroinitializer, align 1
+
+define void @test_no_simplify() {
+; CHECK: @test_no_simplify
+ %dst = getelementptr inbounds [60 x i16]* @a, i32 0, i32 0
+ %src = getelementptr inbounds [60 x i16]* @b, i32 0, i32 0
+
+; CHECK-NEXT: call i16* @__strncpy_chk
+ call i16* @__strncpy_chk(i16* %dst, i16* %src, i32 60, i32 60)
+ ret void
+}
+
+declare i16* @__strncpy_chk(i16*, i16*, i32, i32)
diff --git a/test/Transforms/InstCombine/vec_demanded_elts.ll b/test/Transforms/InstCombine/vec_demanded_elts.ll
index 0019a57627..2d90750a2f 100644
--- a/test/Transforms/InstCombine/vec_demanded_elts.ll
+++ b/test/Transforms/InstCombine/vec_demanded_elts.ll
@@ -196,7 +196,7 @@ define <4 x float> @test_select(float %f, float %g) {
; CHECK-NOT: insertelement
; CHECK: %a3 = insertelement <4 x float> %a0, float 3.000000e+00, i32 3
; CHECK-NOT: insertelement
-; CHECK: %ret = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x float> %a3, <4 x float> <float undef, float 4.000000e+00, float 5.000000e+00, float undef>
+; CHECK: shufflevector <4 x float> %a3, <4 x float> <float undef, float 4.000000e+00, float 5.000000e+00, float undef>, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
%a0 = insertelement <4 x float> undef, float %f, i32 0
%a1 = insertelement <4 x float> %a0, float 1.000000e+00, i32 1
%a2 = insertelement <4 x float> %a1, float 2.000000e+00, i32 2
diff --git a/test/Transforms/InstCombine/vec_shuffle.ll b/test/Transforms/InstCombine/vec_shuffle.ll
index 8f78c2e6bd..14f532195d 100644
--- a/test/Transforms/InstCombine/vec_shuffle.ll
+++ b/test/Transforms/InstCombine/vec_shuffle.ll
@@ -153,3 +153,46 @@ define <8 x i8> @test12a(<8 x i8> %tmp6, <8 x i8> %tmp2) nounwind {
ret <8 x i8> %tmp3
}
+; We should form a shuffle out of a select with constant condition.
+define <4 x i16> @test13a(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: @test13a
+; CHECK-NEXT: shufflevector <4 x i16> %lhs, <4 x i16> %rhs, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEXT: ret
+ %A = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>,
+ <4 x i16> %lhs, <4 x i16> %rhs
+ ret <4 x i16> %A
+}
+
+define <4 x i16> @test13b(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: @test13b
+; CHECK-NEXT: ret <4 x i16> %lhs
+ %A = select <4 x i1> <i1 true, i1 undef, i1 true, i1 true>,
+ <4 x i16> %lhs, <4 x i16> %rhs
+ ret <4 x i16> %A
+}
+
+define <4 x i16> @test13c(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: @test13c
+; CHECK-NEXT: shufflevector <4 x i16> %lhs, <4 x i16> %rhs, <4 x i32> <i32 0, i32 undef, i32 2, i32 7>
+; CHECK-NEXT: ret
+ %A = select <4 x i1> <i1 true, i1 undef, i1 true, i1 false>,
+ <4 x i16> %lhs, <4 x i16> %rhs
+ ret <4 x i16> %A
+}
+
+define <4 x i16> @test13d(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: @test13d
+; CHECK: select
+; CHECK-NEXT: ret
+ %A = select <4 x i1> <i1 true, i1 icmp ugt (<4 x i16>(<4 x i16>, <4 x i16>)* @test13a, <4 x i16>(<4 x i16>, <4 x i16>)* @test13b), i1 true, i1 false>,
+ <4 x i16> %lhs, <4 x i16> %rhs
+ ret <4 x i16> %A
+}
+
+define <4 x i16> @test13e(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: @test13e
+; CHECK-NEXT: ret <4 x i16> %rhs
+ %A = select <4 x i1> <i1 false, i1 false, i1 false, i1 false>,
+ <4 x i16> %lhs, <4 x i16> %rhs
+ ret <4 x i16> %A
+}
diff --git a/test/Transforms/LoopUnroll/pr11361.ll b/test/Transforms/LoopUnroll/pr11361.ll
index 7ce7f5fe46..62de2f728d 100644
--- a/test/Transforms/LoopUnroll/pr11361.ll
+++ b/test/Transforms/LoopUnroll/pr11361.ll
@@ -1,4 +1,4 @@
-; RUN: opt -loop-unroll -disable-output
+; RUN: opt -loop-unroll -disable-output < %s
; PR11361
; This tests for an iterator invalidation issue.
diff --git a/test/Transforms/LoopUnswitch/2011-06-02-CritSwitch.ll b/test/Transforms/LoopUnswitch/2011-06-02-CritSwitch.ll
index 61c54ddb15..609520064a 100644
--- a/test/Transforms/LoopUnswitch/2011-06-02-CritSwitch.ll
+++ b/test/Transforms/LoopUnswitch/2011-06-02-CritSwitch.ll
@@ -1,4 +1,4 @@
-; RUN: opt -loop-unswitch -disable-output
+; RUN: opt -loop-unswitch -disable-output < %s
; PR10031
define i32 @test(i32 %command) {
diff --git a/test/Transforms/PhaseOrdering/gdce.ll b/test/Transforms/PhaseOrdering/gdce.ll
new file mode 100644
index 0000000000..273e47e97c
--- /dev/null
+++ b/test/Transforms/PhaseOrdering/gdce.ll
@@ -0,0 +1,106 @@
+; RUN: opt -O2 -S %s | FileCheck %s
+
+; Run global DCE to eliminate unused ctor and dtor.
+; rdar://9142819
+
+; CHECK: main
+; CHECK-NOT: _ZN4BaseC1Ev
+; CHECK-NOT: _ZN4BaseD1Ev
+; CHECK-NOT: _ZN4BaseD2Ev
+; CHECK-NOT: _ZN4BaseC2Ev
+; CHECK-NOT: _ZN4BaseD0Ev
+
+%class.Base = type { i32 (...)** }
+
+@_ZTV4Base = linkonce_odr unnamed_addr constant [4 x i8*] [i8* null, i8* bitcast ({ i8*, i8* }* @_ZTI4Base to i8*), i8* bitcast (void (%class.Base*)* @_ZN4BaseD1Ev to i8*), i8* bitcast (void (%class.Base*)* @_ZN4BaseD0Ev to i8*)]
+@_ZTVN10__cxxabiv117__class_type_infoE = external global i8*
+@_ZTS4Base = linkonce_odr constant [6 x i8] c"4Base\00"
+@_ZTI4Base = linkonce_odr unnamed_addr constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8** @_ZTVN10__cxxabiv117__class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([6 x i8]* @_ZTS4Base, i32 0, i32 0) }
+
+define i32 @main() uwtable ssp {
+entry:
+ %retval = alloca i32, align 4
+ %b = alloca %class.Base, align 8
+ %cleanup.dest.slot = alloca i32
+ store i32 0, i32* %retval
+ call void @_ZN4BaseC1Ev(%class.Base* %b)
+ store i32 0, i32* %retval
+ store i32 1, i32* %cleanup.dest.slot
+ call void @_ZN4BaseD1Ev(%class.Base* %b)
+ %0 = load i32* %retval
+ ret i32 %0
+}
+
+define linkonce_odr void @_ZN4BaseC1Ev(%class.Base* %this) unnamed_addr uwtable ssp align 2 {
+entry:
+ %this.addr = alloca %class.Base*, align 8
+ store %class.Base* %this, %class.Base** %this.addr, align 8
+ %this1 = load %class.Base** %this.addr
+ call void @_ZN4BaseC2Ev(%class.Base* %this1)
+ ret void
+}
+
+define linkonce_odr void @_ZN4BaseD1Ev(%class.Base* %this) unnamed_addr uwtable ssp align 2 {
+entry:
+ %this.addr = alloca %class.Base*, align 8
+ store %class.Base* %this, %class.Base** %this.addr, align 8
+ %this1 = load %class.Base** %this.addr
+ call void @_ZN4BaseD2Ev(%class.Base* %this1)
+ ret void
+}
+
+define linkonce_odr void @_ZN4BaseD2Ev(%class.Base* %this) unnamed_addr nounwind uwtable ssp align 2 {
+entry:
+ %this.addr = alloca %class.Base*, align 8
+ store %class.Base* %this, %class.Base** %this.addr, align 8
+ %this1 = load %class.Base** %this.addr
+ ret void
+}
+
+define linkonce_odr void @_ZN4BaseC2Ev(%class.Base* %this) unnamed_addr nounwind uwtable ssp align 2 {
+entry:
+ %this.addr = alloca %class.Base*, align 8
+ store %class.Base* %this, %class.Base** %this.addr, align 8
+ %this1 = load %class.Base** %this.addr
+ %0 = bitcast %class.Base* %this1 to i8***
+ store i8** getelementptr inbounds ([4 x i8*]* @_ZTV4Base, i64 0, i64 2), i8*** %0
+ ret void
+}
+
+define linkonce_odr void @_ZN4BaseD0Ev(%class.Base* %this) unnamed_addr uwtable ssp align 2 {
+entry:
+ %this.addr = alloca %class.Base*, align 8
+ %exn.slot = alloca i8*
+ %ehselector.slot = alloca i32
+ store %class.Base* %this, %class.Base** %this.addr, align 8
+ %this1 = load %class.Base** %this.addr
+ invoke void @_ZN4BaseD1Ev(%class.Base* %this1)
+ to label %invoke.cont unwind label %lpad
+
+invoke.cont: ; preds = %entry
+ %0 = bitcast %class.Base* %this1 to i8*
+ call void @_ZdlPv(i8* %0) nounwind
+ ret void
+
+lpad: ; preds = %entry
+ %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+ cleanup
+ %2 = extractvalue { i8*, i32 } %1, 0
+ store i8* %2, i8** %exn.slot
+ %3 = extractvalue { i8*, i32 } %1, 1
+ store i32 %3, i32* %ehselector.slot
+ %4 = bitcast %class.Base* %this1 to i8*
+ call void @_ZdlPv(i8* %4) nounwind
+ br label %eh.resume
+
+eh.resume: ; preds = %lpad
+ %exn = load i8** %exn.slot
+ %sel = load i32* %ehselector.slot
+ %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0
+ %lpad.val2 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1
+ resume { i8*, i32 } %lpad.val2
+}
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @_ZdlPv(i8*) nounwind
diff --git a/test/Transforms/SROA/alignment.ll b/test/Transforms/SROA/alignment.ll
new file mode 100644
index 0000000000..02a67551a3
--- /dev/null
+++ b/test/Transforms/SROA/alignment.ll
@@ -0,0 +1,85 @@
+; RUN: opt < %s -sroa -S | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
+
+define void @test1({ i8, i8 }* %a, { i8, i8 }* %b) {
+; CHECK: @test1
+; CHECK: %[[gep_a0:.*]] = getelementptr inbounds { i8, i8 }* %a, i64 0, i32 0
+; CHECK: %[[a0:.*]] = load i8* %[[gep_a0]], align 16
+; CHECK: %[[gep_a1:.*]] = getelementptr inbounds { i8, i8 }* %a, i64 0, i32 1
+; CHECK: %[[a1:.*]] = load i8* %[[gep_a1]], align 1
+; CHECK: %[[gep_b0:.*]] = getelementptr inbounds { i8, i8 }* %b, i64 0, i32 0
+; CHECK: store i8 %[[a0]], i8* %[[gep_b0]], align 16
+; CHECK: %[[gep_b1:.*]] = getelementptr inbounds { i8, i8 }* %b, i64 0, i32 1
+; CHECK: store i8 %[[a1]], i8* %[[gep_b1]], align 1
+; CHECK: ret void
+
+entry:
+ %alloca = alloca { i8, i8 }, align 16
+ %gep_a = getelementptr { i8, i8 }* %a, i32 0, i32 0
+ %gep_alloca = getelementptr { i8, i8 }* %alloca, i32 0, i32 0
+ %gep_b = getelementptr { i8, i8 }* %b, i32 0, i32 0
+
+ store i8 420, i8* %gep_alloca, align 16
+
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %gep_alloca, i8* %gep_a, i32 2, i32 16, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %gep_b, i8* %gep_alloca, i32 2, i32 16, i1 false)
+ ret void
+}
+
+define void @test2() {
+; CHECK: @test2
+; CHECK: alloca i16
+; CHECK: load i8* %{{.*}}, align 1
+; CHECK: store i8 42, i8* %{{.*}}, align 1
+; CHECK: ret void
+
+entry:
+ %a = alloca { i8, i8, i8, i8 }, align 2
+ %gep1 = getelementptr { i8, i8, i8, i8 }* %a, i32 0, i32 1
+ %cast1 = bitcast i8* %gep1 to i16*
+ store volatile i16 0, i16* %cast1
+ %gep2 = getelementptr { i8, i8, i8, i8 }* %a, i32 0, i32 2
+ %result = load i8* %gep2, align 2
+ store i8 42, i8* %gep2, align 2
+ ret void
+}
+
+define void @PR13920(<2 x i64>* %a, i16* %b) {
+; Test that alignments on memcpy intrinsics get propagated to loads and stores.
+; CHECK: @PR13920
+; CHECK: load <2 x i64>* %a, align 2
+; CHECK: store <2 x i64> {{.*}}, <2 x i64>* {{.*}}, align 2
+; CHECK: ret void
+
+entry:
+ %aa = alloca <2 x i64>, align 16
+ %aptr = bitcast <2 x i64>* %a to i8*
+ %aaptr = bitcast <2 x i64>* %aa to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %aaptr, i8* %aptr, i32 16, i32 2, i1 false)
+ %bptr = bitcast i16* %b to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %bptr, i8* %aaptr, i32 16, i32 2, i1 false)
+ ret void
+}
+
+define void @test3(i8* %x) {
+; Test that when we promote an alloca to a type with lower ABI alignment, we
+; provide the needed explicit alignment that code using the alloca may be
+; expecting. However, also check that any offset within an alloca can in turn
+; reduce the alignment.
+; CHECK: @test3
+; CHECK: alloca [22 x i8], align 8
+; CHECK: alloca [18 x i8], align 2
+; CHECK: ret void
+
+entry:
+ %a = alloca { i8*, i8*, i8* }
+ %b = alloca { i8*, i8*, i8* }
+ %a_raw = bitcast { i8*, i8*, i8* }* %a to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a_raw, i8* %x, i32 22, i32 8, i1 false)
+ %b_raw = bitcast { i8*, i8*, i8* }* %b to i8*
+ %b_gep = getelementptr i8* %b_raw, i32 6
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b_gep, i8* %x, i32 18, i32 2, i1 false)
+ ret void
+}
diff --git a/test/Transforms/SROA/basictest.ll b/test/Transforms/SROA/basictest.ll
index a61de05f45..e58cef63ba 100644
--- a/test/Transforms/SROA/basictest.ll
+++ b/test/Transforms/SROA/basictest.ll
@@ -855,3 +855,45 @@ entry:
%result = or i8 %load, %load2
ret i8 %result
}
+
+%PR13916.struct = type { i8 }
+
+define void @PR13916.1() {
+; Ensure that we handle overlapping memcpy intrinsics correctly, especially in
+; the case where there is a directly identical value for both source and dest.
+; CHECK: @PR13916.1
+; FIXME: We shouldn't leave this alloca around.
+; CHECK: alloca
+; CHECK: ret void
+
+entry:
+ %a = alloca i8
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %a, i32 1, i32 1, i1 false)
+ %tmp2 = load i8* %a
+ ret void
+}
+
+define void @PR13916.2() {
+; Check whether we continue to handle them correctly when they start off with
+; different pointer value chains, but during rewriting we coalesce them into the
+; same value.
+; CHECK: @PR13916.2
+; FIXME: We shouldn't leave this alloca around.
+; CHECK: alloca
+; CHECK: ret void
+
+entry:
+ %a = alloca %PR13916.struct, align 1
+ br i1 undef, label %if.then, label %if.end
+
+if.then:
+ %tmp0 = bitcast %PR13916.struct* %a to i8*
+ %tmp1 = bitcast %PR13916.struct* %a to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp0, i8* %tmp1, i32 1, i32 1, i1 false)
+ br label %if.end
+
+if.end:
+ %gep = getelementptr %PR13916.struct* %a, i32 0, i32 0
+ %tmp2 = load i8* %gep
+ ret void
+}
diff --git a/test/Transforms/SROA/phi-and-select.ll b/test/Transforms/SROA/phi-and-select.ll
index ad0c55748d..b55d917f72 100644
--- a/test/Transforms/SROA/phi-and-select.ll
+++ b/test/Transforms/SROA/phi-and-select.ll
@@ -327,3 +327,48 @@ exit:
%load = load i32* %a
ret i32 %load
}
+
+define i32 @PR13905() {
+; Check a pattern where we have a chain of dead phi nodes to ensure they are
+; deleted and promotion can proceed.
+; CHECK: @PR13905
+; CHECK-NOT: alloca i32
+; CHECK: ret i32 undef
+
+entry:
+ %h = alloca i32
+ store i32 0, i32* %h
+ br i1 undef, label %loop1, label %exit
+
+loop1:
+ %phi1 = phi i32* [ null, %entry ], [ %h, %loop1 ], [ %h, %loop2 ]
+ br i1 undef, label %loop1, label %loop2
+
+loop2:
+ br i1 undef, label %loop1, label %exit
+
+exit:
+ %phi2 = phi i32* [ %phi1, %loop2 ], [ null, %entry ]
+ ret i32 undef
+}
+
+define i32 @PR13906() {
+; Another pattern which can lead to crashes due to failing to clear out dead
+; PHI nodes or select nodes. This triggers subtly differently from the above
+; cases because the PHI node is (recursively) alive, but the select is dead.
+; CHECK: @PR13906
+; CHECK-NOT: alloca
+
+entry:
+ %c = alloca i32
+ store i32 0, i32* %c
+ br label %for.cond
+
+for.cond:
+ %d.0 = phi i32* [ undef, %entry ], [ %c, %if.then ], [ %d.0, %for.cond ]
+ br i1 undef, label %if.then, label %for.cond
+
+if.then:
+ %tmpcast.d.0 = select i1 undef, i32* %c, i32* %d.0
+ br label %for.cond
+}
diff --git a/test/Transforms/SimplifyCFG/switch_to_lookup_table.ll b/test/Transforms/SimplifyCFG/switch_to_lookup_table.ll
index f0bd688050..134ac4eeb1 100644
--- a/test/Transforms/SimplifyCFG/switch_to_lookup_table.ll
+++ b/test/Transforms/SimplifyCFG/switch_to_lookup_table.ll
@@ -6,17 +6,14 @@ target triple = "x86_64-unknown-linux-gnu"
; The table for @f
; CHECK: @switch.table = private unnamed_addr constant [7 x i32] [i32 55, i32 123, i32 0, i32 -1, i32 27, i32 62, i32 1]
-; The int table for @h
-; CHECK: @switch.table1 = private unnamed_addr constant [4 x i8] c"*\09X\05"
-
; The float table for @h
-; CHECK: @switch.table2 = private unnamed_addr constant [4 x float] [float 0x40091EB860000000, float 0x3FF3BE76C0000000, float 0x4012449BA0000000, float 0x4001AE1480000000]
+; CHECK: @switch.table1 = private unnamed_addr constant [4 x float] [float 0x40091EB860000000, float 0x3FF3BE76C0000000, float 0x4012449BA0000000, float 0x4001AE1480000000]
; The table for @foostring
-; CHECK: @switch.table3 = private unnamed_addr constant [4 x i8*] [i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str1, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str2, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str3, i64 0, i64 0)]
+; CHECK: @switch.table2 = private unnamed_addr constant [4 x i8*] [i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str1, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str2, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str3, i64 0, i64 0)]
; The table for @earlyreturncrash
-; CHECK: @switch.table4 = private unnamed_addr constant [4 x i32] [i32 42, i32 9, i32 88, i32 5]
+; CHECK: @switch.table3 = private unnamed_addr constant [4 x i32] [i32 42, i32 9, i32 88, i32 5]
; A simple int-to-int selection switch.
; It is dense enough to be replaced by table lookup.
@@ -88,14 +85,15 @@ sw.epilog:
; CHECK-NEXT: %0 = icmp ult i32 %switch.tableidx, 4
; CHECK-NEXT: br i1 %0, label %switch.lookup, label %sw.epilog
; CHECK: switch.lookup:
-; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i8]* @switch.table1, i32 0, i32 %switch.tableidx
-; CHECK-NEXT: %switch.load = load i8* %switch.gep
-; CHECK-NEXT: %switch.gep1 = getelementptr inbounds [4 x float]* @switch.table2, i32 0, i32 %switch.tableidx
-; CHECK-NEXT: %switch.load2 = load float* %switch.gep1
+; CHECK-NEXT: %switch.shiftamt = mul i32 %switch.tableidx, 8
+; CHECK-NEXT: %switch.downshift = lshr i32 89655594, %switch.shiftamt
+; CHECK-NEXT: %switch.masked = trunc i32 %switch.downshift to i8
+; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x float]* @switch.table1, i32 0, i32 %switch.tableidx
+; CHECK-NEXT: %switch.load = load float* %switch.gep
; CHECK-NEXT: br label %sw.epilog
; CHECK: sw.epilog:
-; CHECK-NEXT: %a.0 = phi i8 [ %switch.load, %switch.lookup ], [ 7, %entry ]
-; CHECK-NEXT: %b.0 = phi float [ %switch.load2, %switch.lookup ], [ 0x4023FAE140000000, %entry ]
+; CHECK-NEXT: %a.0 = phi i8 [ %switch.masked, %switch.lookup ], [ 7, %entry ]
+; CHECK-NEXT: %b.0 = phi float [ %switch.load, %switch.lookup ], [ 0x4023FAE140000000, %entry ]
; CHECK-NEXT: call void @dummy(i8 signext %a.0, float %b.0)
; CHECK-NEXT: ret void
}
@@ -137,7 +135,7 @@ return:
; CHECK-NEXT: %0 = icmp ult i32 %switch.tableidx, 4
; CHECK-NEXT: br i1 %0, label %switch.lookup, label %return
; CHECK: switch.lookup:
-; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i8*]* @switch.table3, i32 0, i32 %switch.tableidx
+; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i8*]* @switch.table2, i32 0, i32 %switch.tableidx
; CHECK-NEXT: %switch.load = load i8** %switch.gep
; CHECK-NEXT: ret i8* %switch.load
}
@@ -166,9 +164,108 @@ sw.epilog:
; CHECK: @earlyreturncrash
; CHECK: switch.lookup:
-; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i32]* @switch.table4, i32 0, i32 %switch.tableidx
+; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i32]* @switch.table3, i32 0, i32 %switch.tableidx
; CHECK-NEXT: %switch.load = load i32* %switch.gep
; CHECK-NEXT: ret i32 %switch.load
; CHECK: sw.epilog:
; CHECK-NEXT: ret i32 7
}
+
+
+; Example 7 from http://blog.regehr.org/archives/320
+; It is not dense enough for a regular table, but the results
+; can be packed into a bitmap.
+
+define i32 @crud(i8 zeroext %c) {
+entry:
+ %cmp = icmp ult i8 %c, 33
+ br i1 %cmp, label %lor.end, label %switch.early.test
+
+switch.early.test:
+ switch i8 %c, label %lor.rhs [
+ i8 92, label %lor.end
+ i8 62, label %lor.end
+ i8 60, label %lor.end
+ i8 59, label %lor.end
+ i8 58, label %lor.end
+ i8 46, label %lor.end
+ i8 44, label %lor.end
+ i8 34, label %lor.end
+ i8 39, label %switch.edge
+ ]
+
+switch.edge: br label %lor.end
+lor.rhs: br label %lor.end
+
+lor.end:
+ %0 = phi i1 [ true, %switch.early.test ],
+ [ false, %lor.rhs ],
+ [ true, %entry ],
+ [ true, %switch.early.test ],
+ [ true, %switch.early.test ],
+ [ true, %switch.early.test ],
+ [ true, %switch.early.test ],
+ [ true, %switch.early.test ],
+ [ true, %switch.early.test ],
+ [ true, %switch.early.test ],
+ [ true, %switch.edge ]
+ %lor.ext = zext i1 %0 to i32
+ ret i32 %lor.ext
+
+; CHECK: @crud
+; CHECK: entry:
+; CHECK-NEXT: %cmp = icmp ult i8 %c, 33
+; CHECK-NEXT: br i1 %cmp, label %lor.end, label %switch.early.test
+; CHECK: switch.early.test:
+; CHECK-NEXT: %switch.tableidx = sub i8 %c, 34
+; CHECK-NEXT: %0 = icmp ult i8 %switch.tableidx, 59
+; CHECK-NEXT: br i1 %0, label %switch.lookup, label %lor.end
+; CHECK: switch.lookup:
+; CHECK-NEXT: %switch.cast = zext i8 %switch.tableidx to i59
+; CHECK-NEXT: %switch.shiftamt = mul i59 %switch.cast, 1
+; CHECK-NEXT: %switch.downshift = lshr i59 -288230375765830623, %switch.shiftamt
+; CHECK-NEXT: %switch.masked = trunc i59 %switch.downshift to i1
+; CHECK-NEXT: br label %lor.end
+; CHECK: lor.end:
+; CHECK-NEXT: %1 = phi i1 [ true, %entry ], [ %switch.masked, %switch.lookup ], [ false, %switch.early.test ]
+; CHECK-NEXT: %lor.ext = zext i1 %1 to i32
+; CHECK-NEXT: ret i32 %lor.ext
+}
+
+; PR13946
+define i32 @overflow(i32 %type) nounwind {
+entry:
+ switch i32 %type, label %sw.default [
+ i32 -2147483648, label %sw.bb
+ i32 0, label %sw.bb
+ i32 1, label %sw.bb1
+ i32 2, label %sw.bb2
+ i32 -2147483645, label %sw.bb3
+ i32 3, label %sw.bb3
+ ]
+
+sw.bb:
+ br label %if.end
+
+sw.bb1:
+ br label %if.end
+
+sw.bb2:
+ br label %if.end
+
+sw.bb3:
+ br label %if.end
+
+sw.default:
+ br label %if.end
+
+if.else:
+ br label %if.end
+
+if.end:
+ %dirent_type.0 = phi i32 [ 3, %sw.default ], [ 6, %sw.bb3 ], [ 5, %sw.bb2 ], [ 0, %sw.bb1 ], [ 3, %sw.bb ], [ 0, %if.else ]
+ ret i32 %dirent_type.0
+; CHECK: define i32 @overflow
+; CHECK: switch
+; CHECK: phi
+}
diff --git a/tools/lli/CMakeLists.txt b/tools/lli/CMakeLists.txt
index 68cb921028..a9c7adf978 100644
--- a/tools/lli/CMakeLists.txt
+++ b/tools/lli/CMakeLists.txt
@@ -1,6 +1,4 @@
-link_directories( ${LLVM_INTEL_JITEVENTS_LIBDIR} )
-
set(LLVM_LINK_COMPONENTS mcjit jit interpreter nativecodegen bitreader asmparser selectiondag)
if( LLVM_USE_OPROFILE )
diff --git a/unittests/ADT/APFloatTest.cpp b/unittests/ADT/APFloatTest.cpp
index 00b62feaeb..c8d7177d86 100644
--- a/unittests/ADT/APFloatTest.cpp
+++ b/unittests/ADT/APFloatTest.cpp
@@ -689,6 +689,23 @@ TEST(APFloatTest, roundToIntegral) {
P = R;
P.roundToIntegral(APFloat::rmNearestTiesToEven);
EXPECT_EQ(R.convertToDouble(), P.convertToDouble());
+
+ P = APFloat::getZero(APFloat::IEEEdouble);
+ P.roundToIntegral(APFloat::rmTowardZero);
+ EXPECT_EQ(0.0, P.convertToDouble());
+ P = APFloat::getZero(APFloat::IEEEdouble, true);
+ P.roundToIntegral(APFloat::rmTowardZero);
+ EXPECT_EQ(-0.0, P.convertToDouble());
+ P = APFloat::getNaN(APFloat::IEEEdouble);
+ P.roundToIntegral(APFloat::rmTowardZero);
+ EXPECT_TRUE(IsNAN(P.convertToDouble()));
+ P = APFloat::getInf(APFloat::IEEEdouble);
+ P.roundToIntegral(APFloat::rmTowardZero);
+ EXPECT_TRUE(IsInf(P.convertToDouble()) && P.convertToDouble() > 0.0);
+ P = APFloat::getInf(APFloat::IEEEdouble, true);
+ P.roundToIntegral(APFloat::rmTowardZero);
+ EXPECT_TRUE(IsInf(P.convertToDouble()) && P.convertToDouble() < 0.0);
+
}
TEST(APFloatTest, getLargest) {
diff --git a/unittests/ExecutionEngine/JIT/CMakeLists.txt b/unittests/ExecutionEngine/JIT/CMakeLists.txt
index d43d72de40..11cf784e1e 100644
--- a/unittests/ExecutionEngine/JIT/CMakeLists.txt
+++ b/unittests/ExecutionEngine/JIT/CMakeLists.txt
@@ -14,8 +14,6 @@ set(LLVM_OPTIONAL_SOURCES
)
if( LLVM_USE_INTEL_JITEVENTS )
- include_directories( ${LLVM_INTEL_JITEVENTS_INCDIR} )
- link_directories( ${LLVM_INTEL_JITEVENTS_LIBDIR} )
set(ProfileTestSources
IntelJITEventListenerTest.cpp
)
diff --git a/unittests/ExecutionEngine/JIT/IntelJITEventListenerTest.cpp b/unittests/ExecutionEngine/JIT/IntelJITEventListenerTest.cpp
index 8ed7a15be3..d3f66a27e9 100644
--- a/unittests/ExecutionEngine/JIT/IntelJITEventListenerTest.cpp
+++ b/unittests/ExecutionEngine/JIT/IntelJITEventListenerTest.cpp
@@ -11,7 +11,10 @@
using namespace llvm;
-#include "llvm/ExecutionEngine/IntelJITEventsWrapper.h"
+// Because we want to keep the implementation details of the Intel API used to
+// communicate with Amplifier out of the public header files, the header below
+// is included from the source tree instead.
+#include "../../../lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h"
#include <map>
#include <list>
@@ -80,7 +83,7 @@ public:
EXPECT_TRUE(0 != MockWrapper);
Listener.reset(JITEventListener::createIntelJITEventListener(
- MockWrapper.get()));
+ MockWrapper.take()));
EXPECT_TRUE(0 != Listener);
EE->RegisterJITEventListener(Listener.get());
}
diff --git a/unittests/Transforms/Utils/CMakeLists.txt b/unittests/Transforms/Utils/CMakeLists.txt
index 365bfbb0bf..730d83b838 100644
--- a/unittests/Transforms/Utils/CMakeLists.txt
+++ b/unittests/Transforms/Utils/CMakeLists.txt
@@ -4,5 +4,6 @@ set(LLVM_LINK_COMPONENTS
add_llvm_unittest(UtilsTests
Cloning.cpp
+ IntegerDivision.cpp
Local.cpp
)
diff --git a/unittests/Transforms/Utils/IntegerDivision.cpp b/unittests/Transforms/Utils/IntegerDivision.cpp
new file mode 100644
index 0000000000..a3211391d6
--- /dev/null
+++ b/unittests/Transforms/Utils/IntegerDivision.cpp
@@ -0,0 +1,142 @@
+//===- IntegerDivision.cpp - Unit tests for the integer division code -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Function.h"
+#include "llvm/IRBuilder.h"
+#include "llvm/Module.h"
+#include "llvm/Transforms/Utils/IntegerDivision.h"
+
+using namespace llvm;
+
+namespace {
+
+TEST(IntegerDivision, SDiv) {
+ LLVMContext &C(getGlobalContext());
+ Module M("test division", C);
+ IRBuilder<> Builder(C);
+
+ SmallVector<Type*, 2> ArgTys(2, Builder.getInt32Ty());
+ Function *F = Function::Create(FunctionType::get(Builder.getInt32Ty(),
+ ArgTys, false),
+ GlobalValue::ExternalLinkage, "F", &M);
+ assert(F->getArgumentList().size() == 2);
+
+ BasicBlock *BB = BasicBlock::Create(C, "", F);
+ Builder.SetInsertPoint(BB);
+
+ Function::arg_iterator AI = F->arg_begin();
+ Value *A = AI++;
+ Value *B = AI++;
+
+ Value *Div = Builder.CreateSDiv(A, B);
+ EXPECT_TRUE(BB->front().getOpcode() == Instruction::SDiv);
+
+ Value *Ret = Builder.CreateRet(Div);
+
+ expandDivision(cast<BinaryOperator>(Div));
+ EXPECT_TRUE(BB->front().getOpcode() == Instruction::AShr);
+
+ Instruction* Quotient = dyn_cast<Instruction>(cast<User>(Ret)->getOperand(0));
+ EXPECT_TRUE(Quotient && Quotient->getOpcode() == Instruction::Sub);
+}
+
+TEST(IntegerDivision, UDiv) {
+ LLVMContext &C(getGlobalContext());
+ Module M("test division", C);
+ IRBuilder<> Builder(C);
+
+ SmallVector<Type*, 2> ArgTys(2, Builder.getInt32Ty());
+ Function *F = Function::Create(FunctionType::get(Builder.getInt32Ty(),
+ ArgTys, false),
+ GlobalValue::ExternalLinkage, "F", &M);
+ assert(F->getArgumentList().size() == 2);
+
+ BasicBlock *BB = BasicBlock::Create(C, "", F);
+ Builder.SetInsertPoint(BB);
+
+ Function::arg_iterator AI = F->arg_begin();
+ Value *A = AI++;
+ Value *B = AI++;
+
+ Value *Div = Builder.CreateUDiv(A, B);
+ EXPECT_TRUE(BB->front().getOpcode() == Instruction::UDiv);
+
+ Value *Ret = Builder.CreateRet(Div);
+
+ expandDivision(cast<BinaryOperator>(Div));
+ EXPECT_TRUE(BB->front().getOpcode() == Instruction::ICmp);
+
+ Instruction* Quotient = dyn_cast<Instruction>(cast<User>(Ret)->getOperand(0));
+ EXPECT_TRUE(Quotient && Quotient->getOpcode() == Instruction::PHI);
+}
+
+TEST(IntegerDivision, SRem) {
+ LLVMContext &C(getGlobalContext());
+ Module M("test remainder", C);
+ IRBuilder<> Builder(C);
+
+ SmallVector<Type*, 2> ArgTys(2, Builder.getInt32Ty());
+ Function *F = Function::Create(FunctionType::get(Builder.getInt32Ty(),
+ ArgTys, false),
+ GlobalValue::ExternalLinkage, "F", &M);
+ assert(F->getArgumentList().size() == 2);
+
+ BasicBlock *BB = BasicBlock::Create(C, "", F);
+ Builder.SetInsertPoint(BB);
+
+ Function::arg_iterator AI = F->arg_begin();
+ Value *A = AI++;
+ Value *B = AI++;
+
+ Value *Rem = Builder.CreateSRem(A, B);
+ EXPECT_TRUE(BB->front().getOpcode() == Instruction::SRem);
+
+ Value *Ret = Builder.CreateRet(Rem);
+
+ expandRemainder(cast<BinaryOperator>(Rem));
+ EXPECT_TRUE(BB->front().getOpcode() == Instruction::AShr);
+
+ Instruction* Remainder = dyn_cast<Instruction>(cast<User>(Ret)->getOperand(0));
+ EXPECT_TRUE(Remainder && Remainder->getOpcode() == Instruction::Sub);
+}
+
+TEST(IntegerDivision, URem) {
+ LLVMContext &C(getGlobalContext());
+ Module M("test remainder", C);
+ IRBuilder<> Builder(C);
+
+ SmallVector<Type*, 2> ArgTys(2, Builder.getInt32Ty());
+ Function *F = Function::Create(FunctionType::get(Builder.getInt32Ty(),
+ ArgTys, false),
+ GlobalValue::ExternalLinkage, "F", &M);
+ assert(F->getArgumentList().size() == 2);
+
+ BasicBlock *BB = BasicBlock::Create(C, "", F);
+ Builder.SetInsertPoint(BB);
+
+ Function::arg_iterator AI = F->arg_begin();
+ Value *A = AI++;
+ Value *B = AI++;
+
+ Value *Rem = Builder.CreateURem(A, B);
+ EXPECT_TRUE(BB->front().getOpcode() == Instruction::URem);
+
+ Value *Ret = Builder.CreateRet(Rem);
+
+ expandRemainder(cast<BinaryOperator>(Rem));
+ EXPECT_TRUE(BB->front().getOpcode() == Instruction::ICmp);
+
+ Instruction* Remainder = dyn_cast<Instruction>(cast<User>(Ret)->getOperand(0));
+ EXPECT_TRUE(Remainder && Remainder->getOpcode() == Instruction::Sub);
+}
+
+}
diff --git a/utils/TableGen/AsmMatcherEmitter.cpp b/utils/TableGen/AsmMatcherEmitter.cpp
index 593de698a9..7b49723d21 100644
--- a/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/utils/TableGen/AsmMatcherEmitter.cpp
@@ -1714,9 +1714,9 @@ static void emitConvertToMCInst(CodeGenTarget &Target, StringRef ClassName,
raw_string_ostream OpOS(OperandFnBody);
// Start the operand number lookup function.
OpOS << "unsigned " << Target.getName() << ClassName << "::\n"
- << "getMCInstOperandNumImpl(unsigned Kind, MCInst &Inst,\n"
- << " const SmallVectorImpl<MCParsedAsmOperand*> "
- << "&Operands,\n unsigned OperandNum, unsigned "
+ << "getMCInstOperandNum(unsigned Kind,\n"
+ << " const SmallVectorImpl<MCParsedAsmOperand*> "
+ << "&Operands,\n unsigned OperandNum, unsigned "
<< "&NumMCOperands) {\n"
<< " assert(Kind < CVT_NUM_SIGNATURES && \"Invalid signature!\");\n"
<< " NumMCOperands = 0;\n"
@@ -2617,11 +2617,11 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
<< "unsigned Opcode,\n"
<< " const SmallVectorImpl<MCParsedAsmOperand*> "
<< "&Operands);\n";
- OS << " unsigned getMCInstOperandNumImpl(unsigned Kind, MCInst &Inst,\n"
- << " const "
+ OS << " unsigned getMCInstOperandNum(unsigned Kind,\n"
+ << " const "
<< "SmallVectorImpl<MCParsedAsmOperand*> &Operands,\n "
- << " unsigned OperandNum, unsigned &NumMCOperands);\n";
- OS << " bool mnemonicIsValidImpl(StringRef Mnemonic);\n";
+ << " unsigned OperandNum, unsigned &NumMCOperands);\n";
+ OS << " bool mnemonicIsValid(StringRef Mnemonic);\n";
OS << " unsigned MatchInstructionImpl(\n"
<< " const SmallVectorImpl<MCParsedAsmOperand*> &Operands,\n"
<< " unsigned &Kind, MCInst &Inst, "
@@ -2800,7 +2800,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
// A method to determine if a mnemonic is in the list.
OS << "bool " << Target.getName() << ClassName << "::\n"
- << "mnemonicIsValidImpl(StringRef Mnemonic) {\n";
+ << "mnemonicIsValid(StringRef Mnemonic) {\n";
OS << " // Search the table.\n";
OS << " std::pair<const MatchEntry*, const MatchEntry*> MnemonicRange =\n";
OS << " std::equal_range(MatchTable, MatchTable+"