diff options
253 files changed, 11290 insertions, 1813 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index df781f52cd..d3edc02198 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -115,6 +115,11 @@ if(LLVM_ENABLE_TIMESTAMPS) set(ENABLE_TIMESTAMPS 1) endif() +option(LLVM_ENABLE_BACKTRACES "Enable embedding backtraces on crash." ON) +if(LLVM_ENABLE_BACKTRACES) + set(ENABLE_BACKTRACES 1) +endif() + option(LLVM_ENABLE_FFI "Use libffi to call external functions from the interpreter" OFF) set(FFI_LIBRARY_DIR "" CACHE PATH "Additional directory, where CMake should search for libffi.so") set(FFI_INCLUDE_DIR "" CACHE PATH "Additional directory, where CMake should search for ffi.h or ffi/ffi.h") @@ -172,23 +177,7 @@ option(LLVM_USE_INTEL_JITEVENTS if( LLVM_USE_INTEL_JITEVENTS ) # Verify we are on a supported platform - if( CMAKE_SYSTEM_NAME MATCHES "Windows" OR CMAKE_SYSTEM_NAME MATCHES "Linux" ) - # Directory where Intel Parallel Amplifier XE 2011 is installed. - if ( WIN32 ) - set(LLVM_INTEL_JITEVENTS_DIR $ENV{VTUNE_AMPLIFIER_XE_2011_DIR}) - else ( WIN32 ) - set(LLVM_INTEL_JITEVENTS_DIR "/opt/intel/vtune_amplifier_xe_2011") - endif ( WIN32 ) - - # Set include and library search paths for Intel JIT Events API - set(LLVM_INTEL_JITEVENTS_INCDIR "${LLVM_INTEL_JITEVENTS_DIR}/include") - - if ( CMAKE_SIZEOF_VOID_P EQUAL 8 ) - set(LLVM_INTEL_JITEVENTS_LIBDIR "${LLVM_INTEL_JITEVENTS_DIR}/lib64") - else ( CMAKE_SIZEOF_VOID_P EQUAL 8 ) - set(LLVM_INTEL_JITEVENTS_LIBDIR "${LLVM_INTEL_JITEVENTS_DIR}/lib32") - endif ( CMAKE_SIZEOF_VOID_P EQUAL 8 ) - else() + if( NOT CMAKE_SYSTEM_NAME MATCHES "Windows" AND NOT CMAKE_SYSTEM_NAME MATCHES "Linux" ) message(FATAL_ERROR "Intel JIT API support is available on Linux and Windows only.") endif() @@ -117,16 +117,18 @@ cross-compile-build-tools: unset CFLAGS ; \ unset CXXFLAGS ; \ unset SDKROOT ; \ + unset UNIVERSAL_SDK_PATH ; \ $(PROJ_SRC_DIR)/configure --build=$(BUILD_TRIPLE) \ --host=$(BUILD_TRIPLE) --target=$(BUILD_TRIPLE) \ --disable-polly ; \ cd .. ; \ fi; \ - (unset SDKROOT; \ - $(MAKE) -C BuildTools \ + ($(MAKE) -C BuildTools \ BUILD_DIRS_ONLY=1 \ NACL_SANDBOX=0 \ UNIVERSAL= \ + UNIVERSAL_SDK_PATH= \ + SDKROOT= \ TARGET_NATIVE_ARCH="$(TARGET_NATIVE_ARCH)" \ TARGETS_TO_BUILD="$(TARGETS_TO_BUILD)" \ ENABLE_OPTIMIZED=$(ENABLE_OPTIMIZED) \ diff --git a/autoconf/configure.ac b/autoconf/configure.ac index 354d3d7322..12dd5ea783 100644 --- a/autoconf/configure.ac +++ b/autoconf/configure.ac @@ -1295,46 +1295,23 @@ AC_DEFINE_UNQUOTED([LLVM_USE_OPROFILE],$USE_OPROFILE, dnl Enable support for Intel JIT Events API. AC_ARG_WITH(intel-jitevents, - AS_HELP_STRING([--with-intel-jitevents=<vtune-amplifier-dir>], - [Specify location of run-time support library for Intel JIT API (default=/opt/intel/vtune_amplifier_xe_2011)]), + AS_HELP_STRING([--with-intel-jitevents Notify Intel JIT profiling API of generated code]), [ + case "$withval" in + yes) AC_SUBST(USE_INTEL_JITEVENTS,[1]);; + no) AC_SUBST(USE_INTEL_JITEVENTS,[0]);; + *) AC_MSG_ERROR([Invalid setting for --with-intel-jitevents. Use "yes" or "no"]);; + esac + case $llvm_cv_os_type in Linux|Win32|Cygwin|MingW) ;; - *) - AC_MSG_ERROR([ - Intel JIT API support is available on Linux and Windows only."]) ;; + *) AC_MSG_ERROR([Intel JIT API support is available on Linux and Windows only.]);; esac - AC_SUBST(USE_INTEL_JITEVENTS, [1]) case "$llvm_cv_target_arch" in - x86) llvm_intel_jitevents_archdir="lib32";; - x86_64) llvm_intel_jitevents_archdir="lib64";; - *) echo "Target architecture $llvm_cv_target_arch does not support Intel JIT Events API" - exit -1;; - esac - INTEL_JITEVENTS_INCDIR="/opt/intel/vtune_amplifier_xe_2011/include" - INTEL_JITEVENTS_LIBDIR="/opt/intel/vtune_amplifier_xe_2011/$llvm_intel_jitevents_archdir" - case "$withval" in - /* | [[A-Za-z]]:[[\\/]]*) INTEL_JITEVENTS_INCDIR=$withval/include - INTEL_JITEVENTS_LIBDIR=$withval/$llvm_intel_jitevents_archdir ;; - *) ;; + x86|x86_64) ;; + *) AC_MSG_ERROR([Target architecture $llvm_cv_target_arch does not support Intel JIT Events API.]);; esac - - AC_SUBST(INTEL_JITEVENTS_INCDIR) - AC_SUBST(INTEL_JITEVENTS_LIBDIR) - - LIBS="$LIBS -L${INTEL_JITEVENTS_LIBDIR}" - CPPFLAGS="$CPPFLAGS -I$INTEL_JITEVENTS_INCDIR" - - AC_SEARCH_LIBS(iJIT_IsProfilingActive, jitprofiling, [], [ - echo "Error! Cannot find libjitprofiling.a. Please check path specified in flag --with-intel-jitevents" - exit -1 - ]) - AC_CHECK_HEADER([jitprofiling.h], [], [ - echo "Error! Cannot find jitprofiling.h. Please check path specified in flag --with-intel-jitevents" - exit -1 - ]) - ], [ AC_SUBST(USE_INTEL_JITEVENTS, [0]) @@ -766,8 +766,6 @@ COVERED_SWITCH_DEFAULT USE_UDIS86 USE_OPROFILE USE_INTEL_JITEVENTS -INTEL_JITEVENTS_INCDIR -INTEL_JITEVENTS_LIBDIR XML2CONFIG LIBXML2_LIBS LIBXML2_INC @@ -1462,10 +1460,8 @@ Optional Packages: --with-udis86=<path> Use udis86 external x86 disassembler library --with-oprofile=<prefix> Tell OProfile >= 0.9.4 how to symbolize JIT output - --with-intel-jitevents=<vtune-amplifier-dir> - Specify location of run-time support library for - Intel JIT API - (default=/opt/intel/vtune_amplifier_xe_2011) + --with-intel-jitevents Notify Intel JIT profiling API of generated code + Some influential environment variables: CC C compiler command @@ -10321,7 +10317,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<EOF -#line 10319 "configure" +#line 10315 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -13581,308 +13577,30 @@ _ACEOF # Check whether --with-intel-jitevents was given. if test "${with_intel_jitevents+set}" = set; then withval=$with_intel_jitevents; + case "$withval" in + yes) USE_INTEL_JITEVENTS=1 +;; + no) USE_INTEL_JITEVENTS=0 +;; + *) { { echo "$as_me:$LINENO: error: Invalid setting for --with-intel-jitevents. Use \"yes\" or \"no\"" >&5 +echo "$as_me: error: Invalid setting for --with-intel-jitevents. Use \"yes\" or \"no\"" >&2;} + { (exit 1); exit 1; }; };; + esac + case $llvm_cv_os_type in Linux|Win32|Cygwin|MingW) ;; - *) - { { echo "$as_me:$LINENO: error: - Intel JIT API support is available on Linux and Windows only.\"" >&5 -echo "$as_me: error: - Intel JIT API support is available on Linux and Windows only.\"" >&2;} - { (exit 1); exit 1; }; } ;; + *) { { echo "$as_me:$LINENO: error: Intel JIT API support is available on Linux and Windows only." >&5 +echo "$as_me: error: Intel JIT API support is available on Linux and Windows only." >&2;} + { (exit 1); exit 1; }; };; esac - USE_INTEL_JITEVENTS=1 - case "$llvm_cv_target_arch" in - x86) llvm_intel_jitevents_archdir="lib32";; - x86_64) llvm_intel_jitevents_archdir="lib64";; - *) echo "Target architecture $llvm_cv_target_arch does not support Intel JIT Events API" - exit -1;; - esac - INTEL_JITEVENTS_INCDIR="/opt/intel/vtune_amplifier_xe_2011/include" - INTEL_JITEVENTS_LIBDIR="/opt/intel/vtune_amplifier_xe_2011/$llvm_intel_jitevents_archdir" - case "$withval" in - /* | [A-Za-z]:[\\/]*) INTEL_JITEVENTS_INCDIR=$withval/include - INTEL_JITEVENTS_LIBDIR=$withval/$llvm_intel_jitevents_archdir ;; - *) ;; + x86|x86_64) ;; + *) { { echo "$as_me:$LINENO: error: Target architecture $llvm_cv_target_arch does not support Intel JIT Events API." >&5 +echo "$as_me: error: Target architecture $llvm_cv_target_arch does not support Intel JIT Events API." >&2;} + { (exit 1); exit 1; }; };; esac - - - - LIBS="$LIBS -L${INTEL_JITEVENTS_LIBDIR}" - CPPFLAGS="$CPPFLAGS -I$INTEL_JITEVENTS_INCDIR" - - { echo "$as_me:$LINENO: checking for library containing iJIT_IsProfilingActive" >&5 -echo $ECHO_N "checking for library containing iJIT_IsProfilingActive... $ECHO_C" >&6; } -if test "${ac_cv_search_iJIT_IsProfilingActive+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - ac_func_search_save_LIBS=$LIBS -cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ - -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif -char iJIT_IsProfilingActive (); -int -main () -{ -return iJIT_IsProfilingActive (); - ; - return 0; -} -_ACEOF -for ac_lib in '' jitprofiling; do - if test -z "$ac_lib"; then - ac_res="none required" - else - ac_res=-l$ac_lib - LIBS="-l$ac_lib $ac_func_search_save_LIBS" - fi - rm -f conftest.$ac_objext conftest$ac_exeext -if { (ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_link") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && - { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' - { (case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_try") 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); }; } && - { ac_try='test -s conftest$ac_exeext' - { (case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_try") 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); }; }; then - ac_cv_search_iJIT_IsProfilingActive=$ac_res -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - -fi - -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext - if test "${ac_cv_search_iJIT_IsProfilingActive+set}" = set; then - break -fi -done -if test "${ac_cv_search_iJIT_IsProfilingActive+set}" = set; then - : -else - ac_cv_search_iJIT_IsProfilingActive=no -fi -rm conftest.$ac_ext -LIBS=$ac_func_search_save_LIBS -fi -{ echo "$as_me:$LINENO: result: $ac_cv_search_iJIT_IsProfilingActive" >&5 -echo "${ECHO_T}$ac_cv_search_iJIT_IsProfilingActive" >&6; } -ac_res=$ac_cv_search_iJIT_IsProfilingActive -if test "$ac_res" != no; then - test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" - -else - - echo "Error! Cannot find libjitprofiling.a. Please check path specified in flag --with-intel-jitevents" - exit -1 - -fi - - if test "${ac_cv_header_jitprofiling_h+set}" = set; then - { echo "$as_me:$LINENO: checking for jitprofiling.h" >&5 -echo $ECHO_N "checking for jitprofiling.h... $ECHO_C" >&6; } -if test "${ac_cv_header_jitprofiling_h+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -fi -{ echo "$as_me:$LINENO: result: $ac_cv_header_jitprofiling_h" >&5 -echo "${ECHO_T}$ac_cv_header_jitprofiling_h" >&6; } -else - # Is the header compilable? -{ echo "$as_me:$LINENO: checking jitprofiling.h usability" >&5 -echo $ECHO_N "checking jitprofiling.h usability... $ECHO_C" >&6; } -cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ -$ac_includes_default -#include <jitprofiling.h> -_ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && - { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' - { (case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_try") 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); }; } && - { ac_try='test -s conftest.$ac_objext' - { (case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_try") 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); }; }; then - ac_header_compiler=yes -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ac_header_compiler=no -fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 -echo "${ECHO_T}$ac_header_compiler" >&6; } - -# Is the header present? -{ echo "$as_me:$LINENO: checking jitprofiling.h presence" >&5 -echo $ECHO_N "checking jitprofiling.h presence... $ECHO_C" >&6; } -cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ -#include <jitprofiling.h> -_ACEOF -if { (ac_try="$ac_cpp conftest.$ac_ext" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } >/dev/null; then - if test -s conftest.err; then - ac_cpp_err=$ac_c_preproc_warn_flag - ac_cpp_err=$ac_cpp_err$ac_c_werror_flag - else - ac_cpp_err= - fi -else - ac_cpp_err=yes -fi -if test -z "$ac_cpp_err"; then - ac_header_preproc=yes -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ac_header_preproc=no -fi - -rm -f conftest.err conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 -echo "${ECHO_T}$ac_header_preproc" >&6; } - -# So? What about this header? -case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in - yes:no: ) - { echo "$as_me:$LINENO: WARNING: jitprofiling.h: accepted by the compiler, rejected by the preprocessor!" >&5 -echo "$as_me: WARNING: jitprofiling.h: accepted by the compiler, rejected by the preprocessor!" >&2;} - { echo "$as_me:$LINENO: WARNING: jitprofiling.h: proceeding with the compiler's result" >&5 -echo "$as_me: WARNING: jitprofiling.h: proceeding with the compiler's result" >&2;} - ac_header_preproc=yes - ;; - no:yes:* ) - { echo "$as_me:$LINENO: WARNING: jitprofiling.h: present but cannot be compiled" >&5 -echo "$as_me: WARNING: jitprofiling.h: present but cannot be compiled" >&2;} - { echo "$as_me:$LINENO: WARNING: jitprofiling.h: check for missing prerequisite headers?" >&5 -echo "$as_me: WARNING: jitprofiling.h: check for missing prerequisite headers?" >&2;} - { echo "$as_me:$LINENO: WARNING: jitprofiling.h: see the Autoconf documentation" >&5 -echo "$as_me: WARNING: jitprofiling.h: see the Autoconf documentation" >&2;} - { echo "$as_me:$LINENO: WARNING: jitprofiling.h: section \"Present But Cannot Be Compiled\"" >&5 -echo "$as_me: WARNING: jitprofiling.h: section \"Present But Cannot Be Compiled\"" >&2;} - { echo "$as_me:$LINENO: WARNING: jitprofiling.h: proceeding with the preprocessor's result" >&5 -echo "$as_me: WARNING: jitprofiling.h: proceeding with the preprocessor's result" >&2;} - { echo "$as_me:$LINENO: WARNING: jitprofiling.h: in the future, the compiler will take precedence" >&5 -echo "$as_me: WARNING: jitprofiling.h: in the future, the compiler will take precedence" >&2;} - ( cat <<\_ASBOX -## ------------------------------------ ## -## Report this to http://llvm.org/bugs/ ## -## ------------------------------------ ## -_ASBOX - ) | sed "s/^/$as_me: WARNING: /" >&2 - ;; -esac -{ echo "$as_me:$LINENO: checking for jitprofiling.h" >&5 -echo $ECHO_N "checking for jitprofiling.h... $ECHO_C" >&6; } -if test "${ac_cv_header_jitprofiling_h+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - ac_cv_header_jitprofiling_h=$ac_header_preproc -fi -{ echo "$as_me:$LINENO: result: $ac_cv_header_jitprofiling_h" >&5 -echo "${ECHO_T}$ac_cv_header_jitprofiling_h" >&6; } - -fi -if test $ac_cv_header_jitprofiling_h = yes; then - : -else - - echo "Error! Cannot find jitprofiling.h. Please check path specified in flag --with-intel-jitevents" - exit -1 - -fi - - - - else USE_INTEL_JITEVENTS=0 @@ -22313,8 +22031,6 @@ COVERED_SWITCH_DEFAULT!$COVERED_SWITCH_DEFAULT$ac_delim USE_UDIS86!$USE_UDIS86$ac_delim USE_OPROFILE!$USE_OPROFILE$ac_delim USE_INTEL_JITEVENTS!$USE_INTEL_JITEVENTS$ac_delim -INTEL_JITEVENTS_INCDIR!$INTEL_JITEVENTS_INCDIR$ac_delim -INTEL_JITEVENTS_LIBDIR!$INTEL_JITEVENTS_LIBDIR$ac_delim XML2CONFIG!$XML2CONFIG$ac_delim LIBXML2_LIBS!$LIBXML2_LIBS$ac_delim LIBXML2_INC!$LIBXML2_INC$ac_delim @@ -22343,7 +22059,7 @@ LIBOBJS!$LIBOBJS$ac_delim LTLIBOBJS!$LTLIBOBJS$ac_delim _ACEOF - if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 95; then + if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 93; then break elif $ac_last_try; then { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5 diff --git a/docs/CMake.rst b/docs/CMake.rst index e1761c5b1d..7f0420c446 100644 --- a/docs/CMake.rst +++ b/docs/CMake.rst @@ -273,11 +273,6 @@ LLVM-specific variables **LLVM_USE_INTEL_JITEVENTS**:BOOL Enable building support for Intel JIT Events API. Defaults to OFF -**LLVM_INTEL_JITEVENTS_DIR**:PATH - Path to installation of Intel(R) VTune(TM) Amplifier XE 2011, used to locate - the ``jitprofiling`` library. Default = ``%VTUNE_AMPLIFIER_XE_2011_DIR%`` - (Windows) | ``/opt/intel/vtune_amplifier_xe_2011`` (Linux) - Executing the test suite ======================== diff --git a/docs/HowToBuildOnARM.rst b/docs/HowToBuildOnARM.rst new file mode 100644 index 0000000000..6f9ac4adc0 --- /dev/null +++ b/docs/HowToBuildOnARM.rst @@ -0,0 +1,34 @@ +.. _how_to_build_on_arm: + +=================================================================== +How To Build On ARM +=================================================================== + +.. sectionauthor:: Wei-Ren Chen (陳韋任) <chenwj@iis.sinica.edu.tw> + +Introduction +============ + +This document contains information about building/testing LLVM and +Clang on ARM. + +Notes On Building LLVM/Clang on ARM +===================================== +Here are some notes on building/testing LLVM/Clang on ARM. Note that +ARM encompasses a wide variety of CPUs; this advice is primarily based +on the ARMv6 and ARMv7 architectures and may be inapplicable to older chips. + +#. If you are building LLVM/Clang on an ARM board with 1G of memory or less, + please use ``gold`` rather then GNU ``ld``. + Building LLVM/Clang with ``--enable-optimized`` + is prefered since it consumes less memory. Otherwise, the building + process will very likely fail due to insufficient memory. In any + case it is probably a good idea to set up a swap partition. + +#. If you want to run ``make + check-all`` after building LLVM/Clang, to avoid false alarms (eg, ARCMT + failure) please use the following configuration: + + .. code-block:: bash + + $ ../$LLVM_SRC_DIR/configure --with-abi=aapcs diff --git a/docs/HowToSubmitABug.html b/docs/HowToSubmitABug.html deleted file mode 100644 index 39f8385129..0000000000 --- a/docs/HowToSubmitABug.html +++ /dev/null @@ -1,345 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" - "http://www.w3.org/TR/html4/strict.dtd"> -<html> -<head> - <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> - <title>How to submit an LLVM bug report</title> - <link rel="stylesheet" href="_static/llvm.css" type="text/css"> -</head> -<body> - -<h1> - How to submit an LLVM bug report -</h1> - -<table class="layout" style="width: 90%" > -<tr class="layout"> - <td class="left"> -<ol> - <li><a href="#introduction">Introduction - Got bugs?</a></li> - <li><a href="#crashers">Crashing Bugs</a> - <ul> - <li><a href="#front-end">Front-end bugs</a> - <li><a href="#ct_optimizer">Compile-time optimization bugs</a> - <li><a href="#ct_codegen">Code generator bugs</a> - </ul></li> - <li><a href="#miscompilations">Miscompilations</a></li> - <li><a href="#codegen">Incorrect code generation (JIT and LLC)</a></li> -</ol> -<div class="doc_author"> - <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a> and - <a href="http://misha.brukman.net">Misha Brukman</a></p> -</div> -</td> -</tr> -</table> - -<!-- *********************************************************************** --> -<h2> - <a name="introduction">Introduction - Got bugs?</a> -</h2> -<!-- *********************************************************************** --> - -<div> - -<p>If you're working with LLVM and run into a bug, we definitely want to know -about it. This document describes what you can do to increase the odds of -getting it fixed quickly.</p> - -<p>Basically you have to do two things at a minimum. First, decide whether the -bug <a href="#crashers">crashes the compiler</a> (or an LLVM pass), or if the -compiler is <a href="#miscompilations">miscompiling</a> the program (i.e., the -compiler successfully produces an executable, but it doesn't run right). Based -on -what type of bug it is, follow the instructions in the linked section to narrow -down the bug so that the person who fixes it will be able to find the problem -more easily.</p> - -<p>Once you have a reduced test-case, go to <a -href="http://llvm.org/bugs/enter_bug.cgi">the LLVM Bug Tracking -System</a> and fill out the form with the necessary details (note that you don't -need to pick a category, just use the "new-bugs" category if you're not sure). -The bug description should contain the following -information:</p> - -<ul> - <li>All information necessary to reproduce the problem.</li> - <li>The reduced test-case that triggers the bug.</li> - <li>The location where you obtained LLVM (if not from our Subversion - repository).</li> -</ul> - -<p>Thanks for helping us make LLVM better!</p> - -</div> - -<!-- *********************************************************************** --> -<h2> - <a name="crashers">Crashing Bugs</a> -</h2> -<!-- *********************************************************************** --> - -<div> - -<p>More often than not, bugs in the compiler cause it to crash—often due -to an assertion failure of some sort. The most important -piece of the puzzle is to figure out if it is crashing in the GCC front-end -or if it is one of the LLVM libraries (e.g. the optimizer or code generator) -that has problems.</p> - -<p>To figure out which component is crashing (the front-end, -optimizer or code generator), run the -<tt><b>llvm-gcc</b></tt> command line as you were when the crash occurred, but -with the following extra command line options:</p> - -<ul> - <li><tt><b>-O0 -emit-llvm</b></tt>: If <tt>llvm-gcc</tt> still crashes when - passed these options (which disable the optimizer and code generator), then - the crash is in the front-end. Jump ahead to the section on <a - href="#front-end">front-end bugs</a>.</li> - - <li><tt><b>-emit-llvm</b></tt>: If <tt>llvm-gcc</tt> crashes with this option - (which disables the code generator), you found an optimizer bug. Jump ahead - to <a href="#ct_optimizer"> compile-time optimization bugs</a>.</li> - - <li>Otherwise, you have a code generator crash. Jump ahead to <a - href="#ct_codegen">code generator bugs</a>.</li> - -</ul> - -<!-- ======================================================================= --> -<h3> - <a name="front-end">Front-end bugs</a> -</h3> - -<div> - -<p>If the problem is in the front-end, you should re-run the same -<tt>llvm-gcc</tt> command that resulted in the crash, but add the -<tt>-save-temps</tt> option. The compiler will crash again, but it will leave -behind a <tt><i>foo</i>.i</tt> file (containing preprocessed C source code) and -possibly <tt><i>foo</i>.s</tt> for each -compiled <tt><i>foo</i>.c</tt> file. Send us the <tt><i>foo</i>.i</tt> file, -along with the options you passed to llvm-gcc, and a brief description of the -error it caused.</p> - -<p>The <a href="http://delta.tigris.org/">delta</a> tool helps to reduce the -preprocessed file down to the smallest amount of code that still replicates the -problem. You're encouraged to use delta to reduce the code to make the -developers' lives easier. <a -href="http://gcc.gnu.org/wiki/A_guide_to_testcase_reduction">This website</a> -has instructions on the best way to use delta.</p> - -</div> - -<!-- ======================================================================= --> -<h3> - <a name="ct_optimizer">Compile-time optimization bugs</a> -</h3> - -<div> - -<p>If you find that a bug crashes in the optimizer, compile your test-case to a -<tt>.bc</tt> file by passing "<tt><b>-emit-llvm -O0 -c -o foo.bc</b></tt>". -Then run:</p> - -<div class="doc_code"> -<p><tt><b>opt</b> -std-compile-opts -debug-pass=Arguments foo.bc - -disable-output</tt></p> -</div> - -<p>This command should do two things: it should print out a list of passes, and -then it should crash in the same way as llvm-gcc. If it doesn't crash, please -follow the instructions for a <a href="#front-end">front-end bug</a>.</p> - -<p>If this does crash, then you should be able to debug this with the following -bugpoint command:</p> - -<div class="doc_code"> -<p><tt><b>bugpoint</b> foo.bc <list of passes printed by -<b>opt</b>></tt></p> -</div> - -<p>Please run this, then file a bug with the instructions and reduced .bc files -that bugpoint emits. If something goes wrong with bugpoint, please submit the -"foo.bc" file and the list of passes printed by <b>opt</b>.</p> - -</div> - -<!-- ======================================================================= --> -<h3> - <a name="ct_codegen">Code generator bugs</a> -</h3> - -<div> - -<p>If you find a bug that crashes llvm-gcc in the code generator, compile your -source file to a .bc file by passing "<tt><b>-emit-llvm -c -o foo.bc</b></tt>" -to llvm-gcc (in addition to the options you already pass). Once your have -foo.bc, one of the following commands should fail:</p> - -<ol> -<li><tt><b>llc</b> foo.bc</tt></li> -<li><tt><b>llc</b> foo.bc -relocation-model=pic</tt></li> -<li><tt><b>llc</b> foo.bc -relocation-model=static</tt></li> -</ol> - -<p>If none of these crash, please follow the instructions for a -<a href="#front-end">front-end bug</a>. If one of these do crash, you should -be able to reduce this with one of the following bugpoint command lines (use -the one corresponding to the command above that failed):</p> - -<ol> -<li><tt><b>bugpoint</b> -run-llc foo.bc</tt></li> -<li><tt><b>bugpoint</b> -run-llc foo.bc --tool-args - -relocation-model=pic</tt></li> -<li><tt><b>bugpoint</b> -run-llc foo.bc --tool-args - -relocation-model=static</tt></li> -</ol> - -<p>Please run this, then file a bug with the instructions and reduced .bc file -that bugpoint emits. If something goes wrong with bugpoint, please submit the -"foo.bc" file and the option that llc crashes with.</p> - -</div> - -</div> - -<!-- *********************************************************************** --> -<h2> - <a name="miscompilations">Miscompilations</a> -</h2> -<!-- *********************************************************************** --> - -<div> - -<p>If llvm-gcc successfully produces an executable, but that executable doesn't -run right, this is either a bug in the code or a bug in the -compiler. The first thing to check is to make sure it is not using undefined -behavior (e.g. reading a variable before it is defined). In particular, check -to see if the program <a href="http://valgrind.org/">valgrind</a>s clean, -passes purify, or some other memory checker tool. Many of the "LLVM bugs" that -we have chased down ended up being bugs in the program being compiled, not - LLVM.</p> - -<p>Once you determine that the program itself is not buggy, you should choose -which code generator you wish to compile the program with (e.g. LLC or the JIT) -and optionally a series of LLVM passes to run. For example:</p> - -<div class="doc_code"> -<p><tt> -<b>bugpoint</b> -run-llc [... optzn passes ...] file-to-test.bc --args -- [program arguments]</tt></p> -</div> - -<p><tt>bugpoint</tt> will try to narrow down your list of passes to the one pass -that causes an error, and simplify the bitcode file as much as it can to assist -you. It will print a message letting you know how to reproduce the resulting -error.</p> - -</div> - -<!-- *********************************************************************** --> -<h2> - <a name="codegen">Incorrect code generation</a> -</h2> -<!-- *********************************************************************** --> - -<div> - -<p>Similarly to debugging incorrect compilation by mis-behaving passes, you can -debug incorrect code generation by either LLC or the JIT, using -<tt>bugpoint</tt>. The process <tt>bugpoint</tt> follows in this case is to try -to narrow the code down to a function that is miscompiled by one or the other -method, but since for correctness, the entire program must be run, -<tt>bugpoint</tt> will compile the code it deems to not be affected with the C -Backend, and then link in the shared object it generates.</p> - -<p>To debug the JIT:</p> - -<div class="doc_code"> -<pre> -bugpoint -run-jit -output=[correct output file] [bitcode file] \ - --tool-args -- [arguments to pass to lli] \ - --args -- [program arguments] -</pre> -</div> - -<p>Similarly, to debug the LLC, one would run:</p> - -<div class="doc_code"> -<pre> -bugpoint -run-llc -output=[correct output file] [bitcode file] \ - --tool-args -- [arguments to pass to llc] \ - --args -- [program arguments] -</pre> -</div> - -<p><b>Special note:</b> if you are debugging MultiSource or SPEC tests that -already exist in the <tt>llvm/test</tt> hierarchy, there is an easier way to -debug the JIT, LLC, and CBE, using the pre-written Makefile targets, which -will pass the program options specified in the Makefiles:</p> - -<div class="doc_code"> -<p><tt> -cd llvm/test/../../program<br> -make bugpoint-jit -</tt></p> -</div> - -<p>At the end of a successful <tt>bugpoint</tt> run, you will be presented -with two bitcode files: a <em>safe</em> file which can be compiled with the C -backend and the <em>test</em> file which either LLC or the JIT -mis-codegenerates, and thus causes the error.</p> - -<p>To reproduce the error that <tt>bugpoint</tt> found, it is sufficient to do -the following:</p> - -<ol> - -<li><p>Regenerate the shared object from the safe bitcode file:</p> - -<div class="doc_code"> -<p><tt> -<b>llc</b> -march=c safe.bc -o safe.c<br> -<b>gcc</b> -shared safe.c -o safe.so -</tt></p> -</div></li> - -<li><p>If debugging LLC, compile test bitcode native and link with the shared - object:</p> - -<div class="doc_code"> -<p><tt> -<b>llc</b> test.bc -o test.s<br> -<b>gcc</b> test.s safe.so -o test.llc<br> -./test.llc [program options] -</tt></p> -</div></li> - -<li><p>If debugging the JIT, load the shared object and supply the test - bitcode:</p> - -<div class="doc_code"> -<p><tt><b>lli</b> -load=safe.so test.bc [program options]</tt></p> -</div></li> - -</ol> - -</div> - -<!-- *********************************************************************** --> -<hr> -<address> - <a href="http://jigsaw.w3.org/css-validator/check/referer"><img - src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a> - <a href="http://validator.w3.org/check/referer"><img - src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a> - - <a href="mailto:sabre@nondot.org">Chris Lattner</a><br> - <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a> - <br> - Last modified: $Date$ -</address> - -</body> -</html> diff --git a/docs/HowToSubmitABug.rst b/docs/HowToSubmitABug.rst new file mode 100644 index 0000000000..ff2d649ce3 --- /dev/null +++ b/docs/HowToSubmitABug.rst @@ -0,0 +1,233 @@ +.. _how-to-submit-a-bug-report: + +================================ +How to submit an LLVM bug report +================================ + +.. sectionauthor:: Chris Lattner <sabre@nondot.org> and Misha Brukman <http://misha.brukman.net> + +Introduction - Got bugs? +======================== + + +If you're working with LLVM and run into a bug, we definitely want to know +about it. This document describes what you can do to increase the odds of +getting it fixed quickly. + +Basically you have to do two things at a minimum. First, decide whether +the bug `crashes the compiler`_ (or an LLVM pass), or if the +compiler is `miscompiling`_ the program (i.e., the +compiler successfully produces an executable, but it doesn't run right). +Based on what type of bug it is, follow the instructions in the linked +section to narrow down the bug so that the person who fixes it will be able +to find the problem more easily. + +Once you have a reduced test-case, go to `the LLVM Bug Tracking System +<http://llvm.org/bugs/enter_bug.cgi>`_ and fill out the form with the +necessary details (note that you don't need to pick a category, just use +the "new-bugs" category if you're not sure). The bug description should +contain the following information: + +* All information necessary to reproduce the problem. +* The reduced test-case that triggers the bug. +* The location where you obtained LLVM (if not from our Subversion + repository). + +Thanks for helping us make LLVM better! + +.. _crashes the compiler: + +Crashing Bugs +============= + +More often than not, bugs in the compiler cause it to crash---often due to +an assertion failure of some sort. The most important piece of the puzzle +is to figure out if it is crashing in the GCC front-end or if it is one of +the LLVM libraries (e.g. the optimizer or code generator) that has +problems. + +To figure out which component is crashing (the front-end, optimizer or code +generator), run the ``llvm-gcc`` command line as you were when the crash +occurred, but with the following extra command line options: + +* ``-O0 -emit-llvm``: If ``llvm-gcc`` still crashes when passed these + options (which disable the optimizer and code generator), then the crash + is in the front-end. Jump ahead to the section on :ref:`front-end bugs + <front-end>`. + +* ``-emit-llvm``: If ``llvm-gcc`` crashes with this option (which disables + the code generator), you found an optimizer bug. Jump ahead to + `compile-time optimization bugs`_. + +* Otherwise, you have a code generator crash. Jump ahead to `code + generator bugs`_. + +.. _front-end bug: +.. _front-end: + +Front-end bugs +-------------- + +If the problem is in the front-end, you should re-run the same ``llvm-gcc`` +command that resulted in the crash, but add the ``-save-temps`` option. +The compiler will crash again, but it will leave behind a ``foo.i`` file +(containing preprocessed C source code) and possibly ``foo.s`` for each +compiled ``foo.c`` file. Send us the ``foo.i`` file, along with the options +you passed to ``llvm-gcc``, and a brief description of the error it caused. + +The `delta <http://delta.tigris.org/>`_ tool helps to reduce the +preprocessed file down to the smallest amount of code that still replicates +the problem. You're encouraged to use delta to reduce the code to make the +developers' lives easier. `This website +<http://gcc.gnu.org/wiki/A_guide_to_testcase_reduction>`_ has instructions +on the best way to use delta. + +.. _compile-time optimization bugs: + +Compile-time optimization bugs +------------------------------ + +If you find that a bug crashes in the optimizer, compile your test-case to a +``.bc`` file by passing "``-emit-llvm -O0 -c -o foo.bc``". +Then run: + +.. code-block:: bash + + opt -std-compile-opts -debug-pass=Arguments foo.bc -disable-output + +This command should do two things: it should print out a list of passes, and +then it should crash in the same way as llvm-gcc. If it doesn't crash, please +follow the instructions for a `front-end bug`_. + +If this does crash, then you should be able to debug this with the following +bugpoint command: + +.. code-block:: bash + + bugpoint foo.bc <list of passes printed by opt> + +Please run this, then file a bug with the instructions and reduced .bc +files that bugpoint emits. If something goes wrong with bugpoint, please +submit the "foo.bc" file and the list of passes printed by ``opt``. + +.. _code generator bugs: + +Code generator bugs +------------------- + +If you find a bug that crashes llvm-gcc in the code generator, compile your +source file to a .bc file by passing "``-emit-llvm -c -o foo.bc``" to +llvm-gcc (in addition to the options you already pass). Once your have +foo.bc, one of the following commands should fail: + +#. ``llc foo.bc`` +#. ``llc foo.bc -relocation-model=pic`` +#. ``llc foo.bc -relocation-model=static`` + +If none of these crash, please follow the instructions for a `front-end +bug`_. If one of these do crash, you should be able to reduce this with +one of the following bugpoint command lines (use the one corresponding to +the command above that failed): + +#. ``bugpoint -run-llc foo.bc`` +#. ``bugpoint -run-llc foo.bc --tool-args -relocation-model=pic`` +#. ``bugpoint -run-llc foo.bc --tool-args -relocation-model=static`` + +Please run this, then file a bug with the instructions and reduced .bc file +that bugpoint emits. If something goes wrong with bugpoint, please submit +the "foo.bc" file and the option that llc crashes with. + +.. _miscompiling: + +Miscompilations +=============== + +If llvm-gcc successfully produces an executable, but that executable +doesn't run right, this is either a bug in the code or a bug in the +compiler. The first thing to check is to make sure it is not using +undefined behavior (e.g. reading a variable before it is defined). In +particular, check to see if the program `valgrind +<http://valgrind.org/>`_'s clean, passes purify, or some other memory +checker tool. Many of the "LLVM bugs" that we have chased down ended up +being bugs in the program being compiled, not LLVM. + +Once you determine that the program itself is not buggy, you should choose +which code generator you wish to compile the program with (e.g. LLC or the JIT) +and optionally a series of LLVM passes to run. For example: + +.. code-block:: bash + + bugpoint -run-llc [... optzn passes ...] file-to-test.bc --args -- [program arguments] + +bugpoint will try to narrow down your list of passes to the one pass that +causes an error, and simplify the bitcode file as much as it can to assist +you. It will print a message letting you know how to reproduce the +resulting error. + +Incorrect code generation +========================= + +Similarly to debugging incorrect compilation by mis-behaving passes, you +can debug incorrect code generation by either LLC or the JIT, using +``bugpoint``. The process ``bugpoint`` follows in this case is to try to +narrow the code down to a function that is miscompiled by one or the other +method, but since for correctness, the entire program must be run, +``bugpoint`` will compile the code it deems to not be affected with the C +Backend, and then link in the shared object it generates. + +To debug the JIT: + +.. code-block:: bash + + bugpoint -run-jit -output=[correct output file] [bitcode file] \ + --tool-args -- [arguments to pass to lli] \ + --args -- [program arguments] + +Similarly, to debug the LLC, one would run: + +.. code-block:: bash + + bugpoint -run-llc -output=[correct output file] [bitcode file] \ + --tool-args -- [arguments to pass to llc] \ + --args -- [program arguments] + +**Special note:** if you are debugging MultiSource or SPEC tests that +already exist in the ``llvm/test`` hierarchy, there is an easier way to +debug the JIT, LLC, and CBE, using the pre-written Makefile targets, which +will pass the program options specified in the Makefiles: + +.. code-block:: bash + + cd llvm/test/../../program + make bugpoint-jit + +At the end of a successful ``bugpoint`` run, you will be presented +with two bitcode files: a *safe* file which can be compiled with the C +backend and the *test* file which either LLC or the JIT +mis-codegenerates, and thus causes the error. + +To reproduce the error that ``bugpoint`` found, it is sufficient to do +the following: + +#. Regenerate the shared object from the safe bitcode file: + + .. code-block:: bash + + llc -march=c safe.bc -o safe.c + gcc -shared safe.c -o safe.so + +#. If debugging LLC, compile test bitcode native and link with the shared + object: + + .. code-block:: bash + + llc test.bc -o test.s + gcc test.s safe.so -o test.llc + ./test.llc [program options] + +#. If debugging the JIT, load the shared object and supply the test + bitcode: + + .. code-block:: bash + + lli -load=safe.so test.bc [program options] diff --git a/docs/index.rst b/docs/index.rst index 53d3e7c01b..50f76a3e3f 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -15,43 +15,43 @@ research projects. Similarly, documentation is broken down into several high-level groupings targeted at different audiences: - * **Design & Overview** +* **Design & Overview** - Several introductory papers and presentations are available at - :ref:`design_and_overview`. + Several introductory papers and presentations are available at + :ref:`design_and_overview`. - * **Publications** +* **Publications** - The list of `publications <http://llvm.org/pubs>`_ based on LLVM. + The list of `publications <http://llvm.org/pubs>`_ based on LLVM. - * **User Guides** +* **User Guides** - Those new to the LLVM system should first vist the :ref:`userguides`. + Those new to the LLVM system should first vist the :ref:`userguides`. - NOTE: If you are a user who is only interested in using LLVM-based - compilers, you should look into `Clang <http://clang.llvm.org>`_ or - `DragonEgg <http://dragonegg.llvm.org>`_ instead. The documentation here is - intended for users who have a need to work with the intermediate LLVM - representation. + NOTE: If you are a user who is only interested in using LLVM-based + compilers, you should look into `Clang <http://clang.llvm.org>`_ or + `DragonEgg <http://dragonegg.llvm.org>`_ instead. The documentation here is + intended for users who have a need to work with the intermediate LLVM + representation. - * **API Clients** +* **API Clients** - Developers of applications which use LLVM as a library should visit the - :ref:`programming`. + Developers of applications which use LLVM as a library should visit the + :ref:`programming`. - * **Subsystems** +* **Subsystems** - API clients and LLVM developers may be interested in the - :ref:`subsystems` documentation. + API clients and LLVM developers may be interested in the + :ref:`subsystems` documentation. - * **Development Process** +* **Development Process** - Additional documentation on the LLVM project can be found at - :ref:`development_process`. + Additional documentation on the LLVM project can be found at + :ref:`development_process`. - * **Mailing Lists** +* **Mailing Lists** - For more information, consider consulting the LLVM :ref:`mailing_lists`. + For more information, consider consulting the LLVM :ref:`mailing_lists`. .. toctree:: :maxdepth: 2 diff --git a/docs/userguides.rst b/docs/userguides.rst index c7197ef628..c5dd979224 100644 --- a/docs/userguides.rst +++ b/docs/userguides.rst @@ -7,6 +7,7 @@ User Guides :hidden: CMake + HowToBuildOnARM CommandGuide/index DeveloperPolicy GettingStartedVS @@ -15,6 +16,7 @@ User Guides Packaging HowToAddABuilder yaml2obj + HowToSubmitABug * `The LLVM Getting Started Guide <GettingStarted.html>`_ @@ -26,7 +28,11 @@ User Guides An addendum to the main Getting Started guide for those using the `CMake build system <http://www.cmake.org>`_. - + +* :ref:`how_to_build_on_arm` + + Notes on building and testing LLVM/Clang on ARM. + * `Getting Started with the LLVM System using Microsoft Visual Studio <GettingStartedVS.html>`_ @@ -59,7 +65,7 @@ User Guides This describes new features, known bugs, and other limitations. -* `How to Submit A Bug Report <HowToSubmitABug.html>`_ +* :ref:`how-to-submit-a-bug-report` Instructions for properly submitting information about any bugs you run into in the LLVM system. diff --git a/include/llvm/ADT/PackedVector.h b/include/llvm/ADT/PackedVector.h index 2eaddc2b4e..1ae2a77e7e 100644 --- a/include/llvm/ADT/PackedVector.h +++ b/include/llvm/ADT/PackedVector.h @@ -19,32 +19,32 @@ namespace llvm { -template <typename T, unsigned BitNum, bool isSigned> +template <typename T, unsigned BitNum, typename BitVectorTy, bool isSigned> class PackedVectorBase; // This won't be necessary if we can specialize members without specializing // the parent template. -template <typename T, unsigned BitNum> -class PackedVectorBase<T, BitNum, false> { +template <typename T, unsigned BitNum, typename BitVectorTy> +class PackedVectorBase<T, BitNum, BitVectorTy, false> { protected: - static T getValue(const llvm::BitVector &Bits, unsigned Idx) { + static T getValue(const BitVectorTy &Bits, unsigned Idx) { T val = T(); for (unsigned i = 0; i != BitNum; ++i) val = T(val | ((Bits[(Idx << (BitNum-1)) + i] ? 1UL : 0UL) << i)); return val; } - static void setValue(llvm::BitVector &Bits, unsigned Idx, T val) { + static void setValue(BitVectorTy &Bits, unsigned Idx, T val) { assert((val >> BitNum) == 0 && "value is too big"); for (unsigned i = 0; i != BitNum; ++i) Bits[(Idx << (BitNum-1)) + i] = val & (T(1) << i); } }; -template <typename T, unsigned BitNum> -class PackedVectorBase<T, BitNum, true> { +template <typename T, unsigned BitNum, typename BitVectorTy> +class PackedVectorBase<T, BitNum, BitVectorTy, true> { protected: - static T getValue(const llvm::BitVector &Bits, unsigned Idx) { + static T getValue(const BitVectorTy &Bits, unsigned Idx) { T val = T(); for (unsigned i = 0; i != BitNum-1; ++i) val = T(val | ((Bits[(Idx << (BitNum-1)) + i] ? 1UL : 0UL) << i)); @@ -53,7 +53,7 @@ protected: return val; } - static void setValue(llvm::BitVector &Bits, unsigned Idx, T val) { + static void setValue(BitVectorTy &Bits, unsigned Idx, T val) { if (val < 0) { val = ~val; Bits.set((Idx << (BitNum-1)) + BitNum-1); @@ -71,11 +71,12 @@ protected: /// @endcode /// will create a vector accepting values -2, -1, 0, 1. Any other value will hit /// an assertion. -template <typename T, unsigned BitNum> -class PackedVector : public PackedVectorBase<T, BitNum, +template <typename T, unsigned BitNum, typename BitVectorTy = BitVector> +class PackedVector : public PackedVectorBase<T, BitNum, BitVectorTy, std::numeric_limits<T>::is_signed> { - llvm::BitVector Bits; - typedef PackedVectorBase<T, BitNum, std::numeric_limits<T>::is_signed> base; + BitVectorTy Bits; + typedef PackedVectorBase<T, BitNum, BitVectorTy, + std::numeric_limits<T>::is_signed> base; public: class reference { diff --git a/include/llvm/ADT/SparseBitVector.h b/include/llvm/ADT/SparseBitVector.h index 791f1082c2..306e92832f 100644 --- a/include/llvm/ADT/SparseBitVector.h +++ b/include/llvm/ADT/SparseBitVector.h @@ -262,6 +262,22 @@ public: } }; +template <unsigned ElementSize> +struct ilist_traits<SparseBitVectorElement<ElementSize> > + : public ilist_default_traits<SparseBitVectorElement<ElementSize> > { + typedef SparseBitVectorElement<ElementSize> Element; + + Element *createSentinel() const { return static_cast<Element *>(&Sentinel); } + static void destroySentinel(Element *) {} + + Element *provideInitialHead() const { return createSentinel(); } + Element *ensureHead(Element *) const { return createSentinel(); } + static void noteHead(Element *, Element *) {} + +private: + mutable ilist_half_node<Element> Sentinel; +}; + template <unsigned ElementSize = 128> class SparseBitVector { typedef ilist<SparseBitVectorElement<ElementSize> > ElementList; diff --git a/include/llvm/Attributes.h b/include/llvm/Attributes.h index 9dc2c1aa57..c9589603f9 100644 --- a/include/llvm/Attributes.h +++ b/include/llvm/Attributes.h @@ -22,6 +22,7 @@ namespace llvm { +class LLVMContext; class Type; namespace Attribute { @@ -96,16 +97,160 @@ DECLARE_LLVM_ATTRIBUTE(AddressSafety,1ULL<<32) ///< Address safety checking is o #undef DECLARE_LLVM_ATTRIBUTE +/// Note that uwtable is about the ABI or the user mandating an entry in the +/// unwind table. The nounwind attribute is about an exception passing by the +/// function. +/// In a theoretical system that uses tables for profiling and sjlj for +/// exceptions, they would be fully independent. In a normal system that +/// uses tables for both, the semantics are: +/// nil = Needs an entry because an exception might pass by. +/// nounwind = No need for an entry +/// uwtable = Needs an entry because the ABI says so and because +/// an exception might pass by. +/// uwtable + nounwind = Needs an entry because the ABI says so. + +/// @brief Attributes that only apply to function parameters. +const AttrConst ParameterOnly = {ByVal_i | Nest_i | + StructRet_i | NoCapture_i}; + +/// @brief Attributes that may be applied to the function itself. These cannot +/// be used on return values or function parameters. +const AttrConst FunctionOnly = {NoReturn_i | NoUnwind_i | ReadNone_i | + ReadOnly_i | NoInline_i | AlwaysInline_i | OptimizeForSize_i | + StackProtect_i | StackProtectReq_i | NoRedZone_i | NoImplicitFloat_i | + Naked_i | InlineHint_i | StackAlignment_i | + UWTable_i | NonLazyBind_i | ReturnsTwice_i | AddressSafety_i}; + +/// @brief Parameter attributes that do not apply to vararg call arguments. +const AttrConst VarArgsIncompatible = {StructRet_i}; + +/// @brief Attributes that are mutually incompatible. +const AttrConst MutuallyIncompatible[5] = { + {ByVal_i | Nest_i | StructRet_i}, + {ByVal_i | Nest_i | InReg_i }, + {ZExt_i | SExt_i}, + {ReadNone_i | ReadOnly_i}, + {NoInline_i | AlwaysInline_i} +}; + } // namespace Attribute +/// AttributeImpl - The internal representation of the Attributes class. This is +/// uniquified. +class AttributesImpl; + /// Attributes - A bitset of attributes. class Attributes { // Currently, we need less than 64 bits. uint64_t Bits; + + explicit Attributes(AttributesImpl *A); public: - Attributes() : Bits(0) { } - explicit Attributes(uint64_t Val) : Bits(Val) { } - /*implicit*/ Attributes(Attribute::AttrConst Val) : Bits(Val.v) { } + Attributes() : Bits(0) {} + explicit Attributes(uint64_t Val) : Bits(Val) {} + /*implicit*/ Attributes(Attribute::AttrConst Val) : Bits(Val.v) {} + + class Builder { + friend class Attributes; + uint64_t Bits; + public: + Builder() : Bits(0) {} + Builder(const Attributes &A) : Bits(A.Bits) {} + + void addZExtAttr() { + Bits |= Attribute::ZExt_i; + } + void addSExtAttr() { + Bits |= Attribute::SExt_i; + } + void addNoReturnAttr() { + Bits |= Attribute::NoReturn_i; + } + void addInRegAttr() { + Bits |= Attribute::InReg_i; + } + void addStructRetAttr() { + Bits |= Attribute::StructRet_i; + } + void addNoUnwindAttr() { + Bits |= Attribute::NoUnwind_i; + } + void addNoAliasAttr() { + Bits |= Attribute::NoAlias_i; + } + void addByValAttr() { + Bits |= Attribute::ByVal_i; + } + void addNestAttr() { + Bits |= Attribute::Nest_i; + } + void addReadNoneAttr() { + Bits |= Attribute::ReadNone_i; + } + void addReadOnlyAttr() { + Bits |= Attribute::ReadOnly_i; + } + void addNoInlineAttr() { + Bits |= Attribute::NoInline_i; + } + void addAlwaysInlineAttr() { + Bits |= Attribute::AlwaysInline_i; + } + void addOptimizeForSizeAttr() { + Bits |= Attribute::OptimizeForSize_i; + } + void addStackProtectAttr() { + Bits |= Attribute::StackProtect_i; + } + void addStackProtectReqAttr() { + Bits |= Attribute::StackProtectReq_i; + } + void addNoCaptureAttr() { + Bits |= Attribute::NoCapture_i; + } + void addNoRedZoneAttr() { + Bits |= Attribute::NoRedZone_i; + } + void addNoImplicitFloatAttr() { + Bits |= Attribute::NoImplicitFloat_i; + } + void addNakedAttr() { + Bits |= Attribute::Naked_i; + } + void addInlineHintAttr() { + Bits |= Attribute::InlineHint_i; + } + void addReturnsTwiceAttr() { + Bits |= Attribute::ReturnsTwice_i; + } + void addUWTableAttr() { + Bits |= Attribute::UWTable_i; + } + void addNonLazyBindAttr() { + Bits |= Attribute::NonLazyBind_i; + } + void addAddressSafetyAttr() { + Bits |= Attribute::AddressSafety_i; + } + void addAlignmentAttr(unsigned Align) { + if (Align == 0) return; + assert(isPowerOf2_32(Align) && "Alignment must be a power of two."); + assert(Align <= 0x40000000 && "Alignment too large."); + Bits |= (Log2_32(Align) + 1) << 16; + } + void addStackAlignmentAttr(unsigned Align) { + // Default alignment, allow the target to define how to align it. + if (Align == 0) return; + + assert(isPowerOf2_32(Align) && "Alignment must be a power of two."); + assert(Align <= 0x100 && "Alignment too large."); + Bits |= (Log2_32(Align) + 1) << 26; + } + }; + + /// get - Return a uniquified Attributes object. This takes the uniquified + /// value from the Builder and wraps it in the Attributes class. + static Attributes get(LLVMContext &Context, Builder &B); // Attribute query methods. // FIXME: StackAlignment & Alignment attributes have no predicate methods. @@ -198,20 +343,12 @@ public: return Bits & Attribute::AddressSafety_i; } - uint64_t getRawAlignment() const { - return Bits & Attribute::Alignment_i; - } - uint64_t getRawStackAlignment() const { - return Bits & Attribute::StackAlignment_i; - } - /// This returns the alignment field of an attribute as a byte alignment /// value. unsigned getAlignment() const { if (!hasAlignmentAttr()) return 0; - - return 1U << ((getRawAlignment() >> 16) - 1); + return 1U << (((Bits & Attribute::Alignment_i) >> 16) - 1); } /// This returns the stack alignment field of an attribute as a byte alignment @@ -219,32 +356,7 @@ public: unsigned getStackAlignment() const { if (!hasStackAlignmentAttr()) return 0; - - return 1U << ((getRawStackAlignment() >> 26) - 1); - } - - /// This turns an int alignment (a power of 2, normally) into the form used - /// internally in Attributes. - static Attributes constructAlignmentFromInt(unsigned i) { - // Default alignment, allow the target to define how to align it. - if (i == 0) - return Attribute::None; - - assert(isPowerOf2_32(i) && "Alignment must be a power of two."); - assert(i <= 0x40000000 && "Alignment too large."); - return Attributes((Log2_32(i)+1) << 16); - } - - /// This turns an int stack alignment (which must be a power of 2) into the - /// form used internally in Attributes. - static Attributes constructStackAlignmentFromInt(unsigned i) { - // Default alignment, allow the target to define how to align it. - if (i == 0) - return Attribute::None; - - assert(isPowerOf2_32(i) && "Alignment must be a power of two."); - assert(i <= 0x100 && "Alignment too large."); - return Attributes((Log2_32(i)+1) << 26); + return 1U << (((Bits & Attribute::StackAlignment_i) >> 26) - 1); } // This is a "safe bool() operator". @@ -276,107 +388,86 @@ public: Attributes operator ~ () const { return Attributes(~Bits); } uint64_t Raw() const { return Bits; } - /// The set of Attributes set in Attributes is converted to a string of - /// equivalent mnemonics. This is, presumably, for writing out the mnemonics - /// for the assembly writer. - /// @brief Convert attribute bits to text - std::string getAsString() const; -}; - -namespace Attribute { + /// This turns an int alignment (a power of 2, normally) into the form used + /// internally in Attributes. + static Attributes constructAlignmentFromInt(unsigned i) { + // Default alignment, allow the target to define how to align it. + if (i == 0) + return Attribute::None; -/// Note that uwtable is about the ABI or the user mandating an entry in the -/// unwind table. The nounwind attribute is about an exception passing by the -/// function. -/// In a theoretical system that uses tables for profiling and sjlj for -/// exceptions, they would be fully independent. In a normal system that -/// uses tables for both, the semantics are: -/// nil = Needs an entry because an exception might pass by. -/// nounwind = No need for an entry -/// uwtable = Needs an entry because the ABI says so and because -/// an exception might pass by. -/// uwtable + nounwind = Needs an entry because the ABI says so. + assert(isPowerOf2_32(i) && "Alignment must be a power of two."); + assert(i <= 0x40000000 && "Alignment too large."); + return Attributes((Log2_32(i)+1) << 16); + } -/// @brief Attributes that only apply to function parameters. -const AttrConst ParameterOnly = {ByVal_i | Nest_i | - StructRet_i | NoCapture_i}; + /// This turns an int stack alignment (which must be a power of 2) into the + /// form used internally in Attributes. + static Attributes constructStackAlignmentFromInt(unsigned i) { + // Default alignment, allow the target to define how to align it. + if (i == 0) + return Attribute::None; -/// @brief Attributes that may be applied to the function itself. These cannot -/// be used on return values or function parameters. -const AttrConst FunctionOnly = {NoReturn_i | NoUnwind_i | ReadNone_i | - ReadOnly_i | NoInline_i | AlwaysInline_i | OptimizeForSize_i | - StackProtect_i | StackProtectReq_i | NoRedZone_i | NoImplicitFloat_i | - Naked_i | InlineHint_i | StackAlignment_i | - UWTable_i | NonLazyBind_i | ReturnsTwice_i | AddressSafety_i}; + assert(isPowerOf2_32(i) && "Alignment must be a power of two."); + assert(i <= 0x100 && "Alignment too large."); + return Attributes((Log2_32(i)+1) << 26); + } -/// @brief Parameter attributes that do not apply to vararg call arguments. -const AttrConst VarArgsIncompatible = {StructRet_i}; + /// @brief Which attributes cannot be applied to a type. + static Attributes typeIncompatible(Type *Ty); + + /// This returns an integer containing an encoding of all the LLVM attributes + /// found in the given attribute bitset. Any change to this encoding is a + /// breaking change to bitcode compatibility. + static uint64_t encodeLLVMAttributesForBitcode(Attributes Attrs) { + // FIXME: It doesn't make sense to store the alignment information as an + // expanded out value, we should store it as a log2 value. However, we + // can't just change that here without breaking bitcode compatibility. If + // this ever becomes a problem in practice, we should introduce new tag + // numbers in the bitcode file and have those tags use a more efficiently + // encoded alignment field. + + // Store the alignment in the bitcode as a 16-bit raw value instead of a + // 5-bit log2 encoded value. Shift the bits above the alignment up by 11 + // bits. + uint64_t EncodedAttrs = Attrs.Raw() & 0xffff; + if (Attrs.hasAlignmentAttr()) + EncodedAttrs |= (1ULL << 16) << + (((Attrs.Bits & Attribute::Alignment_i) - 1) >> 16); + EncodedAttrs |= (Attrs.Raw() & (0xfffULL << 21)) << 11; + return EncodedAttrs; + } + + /// This returns an attribute bitset containing the LLVM attributes that have + /// been decoded from the given integer. This function must stay in sync with + /// 'encodeLLVMAttributesForBitcode'. + static Attributes decodeLLVMAttributesForBitcode(uint64_t EncodedAttrs) { + // The alignment is stored as a 16-bit raw value from bits 31--16. We shift + // the bits above 31 down by 11 bits. + unsigned Alignment = (EncodedAttrs & (0xffffULL << 16)) >> 16; + assert((!Alignment || isPowerOf2_32(Alignment)) && + "Alignment must be a power of two."); + + Attributes Attrs(EncodedAttrs & 0xffff); + if (Alignment) + Attrs |= Attributes::constructAlignmentFromInt(Alignment); + Attrs |= Attributes((EncodedAttrs & (0xfffULL << 32)) >> 11); + return Attrs; + } -/// @brief Attributes that are mutually incompatible. -const AttrConst MutuallyIncompatible[5] = { - {ByVal_i | Nest_i | StructRet_i}, - {ByVal_i | Nest_i | InReg_i }, - {ZExt_i | SExt_i}, - {ReadNone_i | ReadOnly_i}, - {NoInline_i | AlwaysInline_i} + /// The set of Attributes set in Attributes is converted to a string of + /// equivalent mnemonics. This is, presumably, for writing out the mnemonics + /// for the assembly writer. + /// @brief Convert attribute bits to text + std::string getAsString() const; }; -/// @brief Which attributes cannot be applied to a type. -Attributes typeIncompatible(Type *Ty); - -/// This returns an integer containing an encoding of all the -/// LLVM attributes found in the given attribute bitset. Any -/// change to this encoding is a breaking change to bitcode -/// compatibility. -inline uint64_t encodeLLVMAttributesForBitcode(Attributes Attrs) { - // FIXME: It doesn't make sense to store the alignment information as an - // expanded out value, we should store it as a log2 value. However, we can't - // just change that here without breaking bitcode compatibility. If this ever - // becomes a problem in practice, we should introduce new tag numbers in the - // bitcode file and have those tags use a more efficiently encoded alignment - // field. - - // Store the alignment in the bitcode as a 16-bit raw value instead of a - // 5-bit log2 encoded value. Shift the bits above the alignment up by - // 11 bits. - - uint64_t EncodedAttrs = Attrs.Raw() & 0xffff; - if (Attrs.hasAlignmentAttr()) - EncodedAttrs |= (1ull << 16) << - ((Attrs.getRawAlignment() - 1) >> 16); - EncodedAttrs |= (Attrs.Raw() & (0xfffull << 21)) << 11; - - return EncodedAttrs; -} - -/// This returns an attribute bitset containing the LLVM attributes -/// that have been decoded from the given integer. This function -/// must stay in sync with 'encodeLLVMAttributesForBitcode'. -inline Attributes decodeLLVMAttributesForBitcode(uint64_t EncodedAttrs) { - // The alignment is stored as a 16-bit raw value from bits 31--16. - // We shift the bits above 31 down by 11 bits. - - unsigned Alignment = (EncodedAttrs & (0xffffull << 16)) >> 16; - assert((!Alignment || isPowerOf2_32(Alignment)) && - "Alignment must be a power of two."); - - Attributes Attrs(EncodedAttrs & 0xffff); - if (Alignment) - Attrs |= Attributes::constructAlignmentFromInt(Alignment); - Attrs |= Attributes((EncodedAttrs & (0xfffull << 32)) >> 11); - - return Attrs; -} - -} // end namespace Attribute - /// This is just a pair of values to associate a set of attributes /// with an index. struct AttributeWithIndex { - Attributes Attrs; ///< The attributes that are set, or'd together. - unsigned Index; ///< Index of the parameter for which the attributes apply. - ///< Index 0 is used for return value attributes. - ///< Index ~0U is used for function attributes. + Attributes Attrs; ///< The attributes that are set, or'd together. + unsigned Index; ///< Index of the parameter for which the attributes apply. + ///< Index 0 is used for return value attributes. + ///< Index ~0U is used for function attributes. static AttributeWithIndex get(unsigned Idx, Attributes Attrs) { AttributeWithIndex P; diff --git a/include/llvm/CodeGen/MachineModuleInfoImpls.h b/include/llvm/CodeGen/MachineModuleInfoImpls.h index 9401ffd199..7afc7eb6b3 100644 --- a/include/llvm/CodeGen/MachineModuleInfoImpls.h +++ b/include/llvm/CodeGen/MachineModuleInfoImpls.h @@ -38,7 +38,7 @@ namespace llvm { /// this GV is external. DenseMap<MCSymbol*, StubValueTy> HiddenGVStubs; - virtual void Anchor(); // Out of line virtual method. + virtual void anchor(); // Out of line virtual method. public: MachineModuleInfoMachO(const MachineModuleInfo &) {} @@ -76,7 +76,7 @@ namespace llvm { /// mode. DenseMap<MCSymbol*, StubValueTy> GVStubs; - virtual void Anchor(); // Out of line virtual method. + virtual void anchor(); // Out of line virtual method. public: MachineModuleInfoELF(const MachineModuleInfo &) {} diff --git a/include/llvm/CodeGen/ValueTypes.h b/include/llvm/CodeGen/ValueTypes.h index 2d92d025b4..240199291a 100644 --- a/include/llvm/CodeGen/ValueTypes.h +++ b/include/llvm/CodeGen/ValueTypes.h @@ -181,6 +181,18 @@ namespace llvm { SimpleTy <= MVT::LAST_VECTOR_VALUETYPE); } + /// is16BitVector - Return true if this is a 16-bit vector type. + bool is16BitVector() const { + return (SimpleTy == MVT::v2i8 || SimpleTy == MVT::v1i16 || + SimpleTy == MVT::v16i1); + } + + /// is32BitVector - Return true if this is a 32-bit vector type. + bool is32BitVector() const { + return (SimpleTy == MVT::v4i8 || SimpleTy == MVT::v2i16 || + SimpleTy == MVT::v1i32); + } + /// is64BitVector - Return true if this is a 64-bit vector type. bool is64BitVector() const { return (SimpleTy == MVT::v8i8 || SimpleTy == MVT::v4i16 || @@ -563,19 +575,12 @@ namespace llvm { /// is16BitVector - Return true if this is a 16-bit vector type. bool is16BitVector() const { - if (!isSimple()) - return isExtended16BitVector(); - - return (V == MVT::v2i8 || V==MVT::v1i16 || V == MVT::v16i1); + return isSimple() ? V.is16BitVector() : isExtended16BitVector(); } /// is32BitVector - Return true if this is a 32-bit vector type. bool is32BitVector() const { - if (!isSimple()) - return isExtended32BitVector(); - - return (V == MVT::v4i8 || V==MVT::v2i16 - || V==MVT::v1i32); + return isSimple() ? V.is32BitVector() : isExtended32BitVector(); } /// is64BitVector - Return true if this is a 64-bit vector type. diff --git a/include/llvm/Config/config.h.cmake b/include/llvm/Config/config.h.cmake index eb20b6470b..ac760f911a 100644 --- a/include/llvm/Config/config.h.cmake +++ b/include/llvm/Config/config.h.cmake @@ -17,6 +17,9 @@ /* Default <path> to all compiler invocations for --sysroot=<path>. */ #undef DEFAULT_SYSROOT +/* Define if you want backtraces on crash */ +#cmakedefine ENABLE_BACKTRACES + /* Define if position independent code is enabled */ #cmakedefine ENABLE_PIC diff --git a/include/llvm/Function.h b/include/llvm/Function.h index fbd2594a45..fa6d0d3f5b 100644 --- a/include/llvm/Function.h +++ b/include/llvm/Function.h @@ -168,10 +168,10 @@ public: /// void setAttributes(const AttrListPtr &attrs) { AttributeList = attrs; } - /// hasFnAttr - Return true if this function has the given attribute. - bool hasFnAttr(Attributes N) const { - // Function Attributes are stored at ~0 index - return AttributeList.paramHasAttr(~0U, N); + /// getFnAttributes - Return the function attributes for querying. + /// + Attributes getFnAttributes() const { + return AttributeList.getFnAttributes(); } /// addFnAttr - Add function attributes to this function. @@ -195,6 +195,11 @@ public: void setGC(const char *Str); void clearGC(); + /// getParamAttributes - Return the parameter attributes for querying. + Attributes getParamAttributes(unsigned Idx) const { + return AttributeList.getParamAttributes(Idx); + } + /// @brief Determine whether the function has the given attribute. bool paramHasAttr(unsigned i, Attributes attr) const { return AttributeList.paramHasAttr(i, attr); @@ -213,7 +218,7 @@ public: /// @brief Determine if the function does not access memory. bool doesNotAccessMemory() const { - return hasFnAttr(Attribute::ReadNone); + return getFnAttributes().hasReadNoneAttr(); } void setDoesNotAccessMemory(bool DoesNotAccessMemory = true) { if (DoesNotAccessMemory) addFnAttr(Attribute::ReadNone); @@ -222,7 +227,7 @@ public: /// @brief Determine if the function does not access or only reads memory. bool onlyReadsMemory() const { - return doesNotAccessMemory() || hasFnAttr(Attribute::ReadOnly); + return doesNotAccessMemory() || getFnAttributes().hasReadOnlyAttr(); } void setOnlyReadsMemory(bool OnlyReadsMemory = true) { if (OnlyReadsMemory) addFnAttr(Attribute::ReadOnly); @@ -231,7 +236,7 @@ public: /// @brief Determine if the function cannot return. bool doesNotReturn() const { - return hasFnAttr(Attribute::NoReturn); + return getFnAttributes().hasNoReturnAttr(); } void setDoesNotReturn(bool DoesNotReturn = true) { if (DoesNotReturn) addFnAttr(Attribute::NoReturn); @@ -240,7 +245,7 @@ public: /// @brief Determine if the function cannot unwind. bool doesNotThrow() const { - return hasFnAttr(Attribute::NoUnwind); + return getFnAttributes().hasNoUnwindAttr(); } void setDoesNotThrow(bool DoesNotThrow = true) { if (DoesNotThrow) addFnAttr(Attribute::NoUnwind); @@ -250,7 +255,7 @@ public: /// @brief True if the ABI mandates (or the user requested) that this /// function be in a unwind table. bool hasUWTable() const { - return hasFnAttr(Attribute::UWTable); + return getFnAttributes().hasUWTableAttr(); } void setHasUWTable(bool HasUWTable = true) { if (HasUWTable) @@ -267,13 +272,14 @@ public: /// @brief Determine if the function returns a structure through first /// pointer argument. bool hasStructRetAttr() const { - return paramHasAttr(1, Attribute::StructRet); + return getParamAttributes(1).hasStructRetAttr(); } /// @brief Determine if the parameter does not alias other parameters. /// @param n The parameter to check. 1 is the first parameter, 0 is the return bool doesNotAlias(unsigned n) const { - return paramHasAttr(n, Attribute::NoAlias); + return n != 0 ? getParamAttributes(n).hasNoAliasAttr() : + AttributeList.getRetAttributes().hasNoAliasAttr(); } void setDoesNotAlias(unsigned n, bool DoesNotAlias = true) { if (DoesNotAlias) addAttribute(n, Attribute::NoAlias); @@ -283,7 +289,7 @@ public: /// @brief Determine if the parameter can be captured. /// @param n The parameter to check. 1 is the first parameter, 0 is the return bool doesNotCapture(unsigned n) const { - return paramHasAttr(n, Attribute::NoCapture); + return getParamAttributes(n).hasNoCaptureAttr(); } void setDoesNotCapture(unsigned n, bool DoesNotCapture = true) { if (DoesNotCapture) addAttribute(n, Attribute::NoCapture); diff --git a/include/llvm/IRBuilder.h b/include/llvm/IRBuilder.h index ae82c25e3d..46720983e4 100644 --- a/include/llvm/IRBuilder.h +++ b/include/llvm/IRBuilder.h @@ -285,12 +285,15 @@ public: /// If the pointers aren't i8*, they will be converted. If a TBAA tag is /// specified, it will be added to the instruction. CallInst *CreateMemCpy(Value *Dst, Value *Src, uint64_t Size, unsigned Align, - bool isVolatile = false, MDNode *TBAATag = 0) { - return CreateMemCpy(Dst, Src, getInt64(Size), Align, isVolatile, TBAATag); + bool isVolatile = false, MDNode *TBAATag = 0, + MDNode *TBAAStructTag = 0) { + return CreateMemCpy(Dst, Src, getInt64(Size), Align, isVolatile, TBAATag, + TBAAStructTag); } CallInst *CreateMemCpy(Value *Dst, Value *Src, Value *Size, unsigned Align, - bool isVolatile = false, MDNode *TBAATag = 0); + bool isVolatile = false, MDNode *TBAATag = 0, + MDNode *TBAAStructTag = 0); /// CreateMemMove - Create and insert a memmove between the specified /// pointers. If the pointers aren't i8*, they will be converted. If a TBAA @@ -810,6 +813,31 @@ public: StoreInst *CreateStore(Value *Val, Value *Ptr, bool isVolatile = false) { return Insert(new StoreInst(Val, Ptr, isVolatile)); } + // Provided to resolve 'CreateAlignedLoad(Ptr, Align, "...")' correctly, + // instead of converting the string to 'bool' for the isVolatile parameter. + LoadInst *CreateAlignedLoad(Value *Ptr, unsigned Align, const char *Name) { + LoadInst *LI = CreateLoad(Ptr, Name); + LI->setAlignment(Align); + return LI; + } + LoadInst *CreateAlignedLoad(Value *Ptr, unsigned Align, + const Twine &Name = "") { + LoadInst *LI = CreateLoad(Ptr, Name); + LI->setAlignment(Align); + return LI; + } + LoadInst *CreateAlignedLoad(Value *Ptr, unsigned Align, bool isVolatile, + const Twine &Name = "") { + LoadInst *LI = CreateLoad(Ptr, isVolatile, Name); + LI->setAlignment(Align); + return LI; + } + StoreInst *CreateAlignedStore(Value *Val, Value *Ptr, unsigned Align, + bool isVolatile = false) { + StoreInst *SI = CreateStore(Val, Ptr, isVolatile); + SI->setAlignment(Align); + return SI; + } FenceInst *CreateFence(AtomicOrdering Ordering, SynchronizationScope SynchScope = CrossThread) { return Insert(new FenceInst(Context, Ordering, SynchScope)); @@ -970,6 +998,30 @@ public: Value *CreateSExt(Value *V, Type *DestTy, const Twine &Name = "") { return CreateCast(Instruction::SExt, V, DestTy, Name); } + /// CreateZExtOrTrunc - Create a ZExt or Trunc from the integer value V to + /// DestTy. Return the value untouched if the type of V is already DestTy. + Value *CreateZExtOrTrunc(Value *V, IntegerType *DestTy, + const Twine &Name = "") { + assert(isa<IntegerType>(V->getType()) && "Can only zero extend integers!"); + IntegerType *IntTy = cast<IntegerType>(V->getType()); + if (IntTy->getBitWidth() < DestTy->getBitWidth()) + return CreateZExt(V, DestTy, Name); + if (IntTy->getBitWidth() > DestTy->getBitWidth()) + return CreateTrunc(V, DestTy, Name); + return V; + } + /// CreateSExtOrTrunc - Create a SExt or Trunc from the integer value V to + /// DestTy. Return the value untouched if the type of V is already DestTy. + Value *CreateSExtOrTrunc(Value *V, IntegerType *DestTy, + const Twine &Name = "") { + assert(isa<IntegerType>(V->getType()) && "Can only sign extend integers!"); + IntegerType *IntTy = cast<IntegerType>(V->getType()); + if (IntTy->getBitWidth() < DestTy->getBitWidth()) + return CreateSExt(V, DestTy, Name); + if (IntTy->getBitWidth() > DestTy->getBitWidth()) + return CreateTrunc(V, DestTy, Name); + return V; + } Value *CreateFPToUI(Value *V, Type *DestTy, const Twine &Name = ""){ return CreateCast(Instruction::FPToUI, V, DestTy, Name); } diff --git a/include/llvm/MC/MCExpr.h b/include/llvm/MC/MCExpr.h index f36db3c05a..5032887248 100644 --- a/include/llvm/MC/MCExpr.h +++ b/include/llvm/MC/MCExpr.h @@ -446,7 +446,7 @@ public: /// NOTE: All subclasses are required to have trivial destructors because /// MCExprs are bump pointer allocated and not destructed. class MCTargetExpr : public MCExpr { - virtual void Anchor(); + virtual void anchor(); protected: MCTargetExpr() : MCExpr(Target) {} virtual ~MCTargetExpr() {} diff --git a/include/llvm/MC/MCInstrDesc.h b/include/llvm/MC/MCInstrDesc.h index dbf16d8700..02383f8bc6 100644 --- a/include/llvm/MC/MCInstrDesc.h +++ b/include/llvm/MC/MCInstrDesc.h @@ -1,4 +1,4 @@ -//===-- llvm/Mc/McInstrDesc.h - Instruction Descriptors -*- C++ -*-===// +//===-- llvm/MC/MCInstrDesc.h - Instruction Descriptors -*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/include/llvm/MC/MCTargetAsmParser.h b/include/llvm/MC/MCTargetAsmParser.h index a771ed7a9d..2b5a672d6d 100644 --- a/include/llvm/MC/MCTargetAsmParser.h +++ b/include/llvm/MC/MCTargetAsmParser.h @@ -115,7 +115,7 @@ public: return Match_Success; } - virtual unsigned getMCInstOperandNum(unsigned Kind, MCInst &Inst, + virtual unsigned getMCInstOperandNum(unsigned Kind, const SmallVectorImpl<MCParsedAsmOperand*> &Operands, unsigned OperandNum, unsigned &NumMCOperands) = 0; diff --git a/include/llvm/Object/MachOFormat.h b/include/llvm/Object/MachOFormat.h index e4bfcc67fe..c0f700d3c8 100644 --- a/include/llvm/Object/MachOFormat.h +++ b/include/llvm/Object/MachOFormat.h @@ -61,7 +61,10 @@ namespace mach { CSARM_V6 = 6, CSARM_V5TEJ = 7, CSARM_XSCALE = 8, - CSARM_V7 = 9 + CSARM_V7 = 9, + CSARM_V7F = 10, + CSARM_V7S = 11, + CSARM_V7K = 12 }; /// \brief PowerPC Machine Subtypes. diff --git a/include/llvm/Operator.h b/include/llvm/Operator.h index cf6d8e2c37..459df2b289 100644 --- a/include/llvm/Operator.h +++ b/include/llvm/Operator.h @@ -35,7 +35,9 @@ private: void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION; void *operator new(size_t s) LLVM_DELETED_FUNCTION; Operator() LLVM_DELETED_FUNCTION; - ~Operator() LLVM_DELETED_FUNCTION; + // NOTE: cannot use LLVM_DELETED_FUNCTION because gcc errors when deleting + // an override of a non-deleted function. + ~Operator(); public: /// getOpcode - Return the opcode for this Instruction or ConstantExpr. @@ -77,7 +79,7 @@ public: }; private: - ~OverflowingBinaryOperator() LLVM_DELETED_FUNCTION; + ~OverflowingBinaryOperator(); // DO NOT IMPLEMENT friend class BinaryOperator; friend class ConstantExpr; @@ -131,7 +133,7 @@ public: }; private: - ~PossiblyExactOperator() LLVM_DELETED_FUNCTION; + ~PossiblyExactOperator(); // DO NOT IMPLEMENT friend class BinaryOperator; friend class ConstantExpr; @@ -168,7 +170,7 @@ public: /// information about relaxed accuracy requirements attached to them. class FPMathOperator : public Operator { private: - ~FPMathOperator() LLVM_DELETED_FUNCTION; + ~FPMathOperator(); // DO NOT IMPLEMENT public: diff --git a/include/llvm/Support/TargetFolder.h b/include/llvm/Support/TargetFolder.h index c65faa6621..a02db2fe66 100644 --- a/include/llvm/Support/TargetFolder.h +++ b/include/llvm/Support/TargetFolder.h @@ -177,7 +177,14 @@ public: return Fold(ConstantExpr::getIntegerCast(C, DestTy, isSigned)); } Constant *CreatePointerCast(Constant *C, Type *DestTy) const { - return ConstantExpr::getPointerCast(C, DestTy); + if (C->getType() == DestTy) + return C; // avoid calling Fold + return Fold(ConstantExpr::getPointerCast(C, DestTy)); + } + Constant *CreateFPCast(Constant *C, Type *DestTy) const { + if (C->getType() == DestTy) + return C; // avoid calling Fold + return Fold(ConstantExpr::getFPCast(C, DestTy)); } Constant *CreateBitCast(Constant *C, Type *DestTy) const { return CreateCast(Instruction::BitCast, C, DestTy); diff --git a/include/llvm/Target/TargetData.h b/include/llvm/Target/TargetData.h index 4f94ab751c..c97af7db68 100644 --- a/include/llvm/Target/TargetData.h +++ b/include/llvm/Target/TargetData.h @@ -53,10 +53,10 @@ enum AlignTypeEnum { /// @note The unusual order of elements in the structure attempts to reduce /// padding and make the structure slightly more cache friendly. struct TargetAlignElem { - AlignTypeEnum AlignType : 8; ///< Alignment type (AlignTypeEnum) - unsigned ABIAlign; ///< ABI alignment for this type/bitw - unsigned PrefAlign; ///< Pref. alignment for this type/bitw - uint32_t TypeBitWidth; ///< Type bit width + uint32_t AlignType : 8; ///< Alignment type (AlignTypeEnum) + uint32_t TypeBitWidth : 24; ///< Type bit width + uint32_t ABIAlign : 16; ///< ABI alignment for this type/bitw + uint32_t PrefAlign : 16; ///< Pref. alignment for this type/bitw /// Initializer static TargetAlignElem get(AlignTypeEnum align_type, unsigned abi_align, diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index c1043aad37..a78ef58b88 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -724,6 +724,12 @@ public: return SupportJumpTables; } + /// getMinimumJumpTableEntries - return integer threshold on number of + /// blocks to use jump tables rather than if sequence. + int getMinimumJumpTableEntries() const { + return MinimumJumpTableEntries; + } + /// getStackPointerRegisterToSaveRestore - If a physical register, this /// specifies the register that llvm.savestack/llvm.restorestack should save /// and restore. @@ -1044,6 +1050,12 @@ protected: SupportJumpTables = Val; } + /// setMinimumJumpTableEntries - Indicate the number of blocks to generate + /// jump tables rather than if sequence. + void setMinimumJumpTableEntries(int Val) { + MinimumJumpTableEntries = Val; + } + /// setStackPointerRegisterToSaveRestore - If set to a physical register, this /// specifies the register that llvm.savestack/llvm.restorestack should save /// and restore. @@ -1838,6 +1850,9 @@ private: /// If it's not true, then each jumptable must be lowered into if-then-else's. bool SupportJumpTables; + /// MinimumJumpTableEntries - Number of blocks threshold to use jump tables. + int MinimumJumpTableEntries; + /// BooleanContents - Information about the contents of the high-bits in /// boolean values held in a type wider than i1. See getBooleanContents. BooleanContent BooleanContents; diff --git a/include/llvm/Transforms/Utils/IntegerDivision.h b/include/llvm/Transforms/Utils/IntegerDivision.h index 8d3f53e6f9..cecc8075de 100644 --- a/include/llvm/Transforms/Utils/IntegerDivision.h +++ b/include/llvm/Transforms/Utils/IntegerDivision.h @@ -23,6 +23,16 @@ namespace llvm { namespace llvm { + /// Generate code to calculate the remainder of two integers, replacing Rem + /// with the generated code. This currently generates code using the udiv + /// expansion, but future work includes generating more specialized code, + /// e.g. when more information about the operands are known. Currently only + /// implements 32bit scalar division (due to udiv's limitation), but future + /// work is removing this limitation. + /// + /// @brief Replace Rem with generated code. + bool expandRemainder(BinaryOperator *Rem); + /// Generate code to divide two integers, replacing Div with the generated /// code. This currently generates code similarly to compiler-rt's /// implementations, but future work includes generating more specialized code diff --git a/include/llvm/Transforms/Utils/ValueMapper.h b/include/llvm/Transforms/Utils/ValueMapper.h index 8594707a84..5390c5e8ed 100644 --- a/include/llvm/Transforms/Utils/ValueMapper.h +++ b/include/llvm/Transforms/Utils/ValueMapper.h @@ -25,7 +25,7 @@ namespace llvm { /// ValueMapTypeRemapper - This is a class that can be implemented by clients /// to remap types when cloning constants and instructions. class ValueMapTypeRemapper { - virtual void Anchor(); // Out of line method. + virtual void anchor(); // Out of line method. public: virtual ~ValueMapTypeRemapper() {} diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp index acda34ba14..9a1ca63c1c 100644 --- a/lib/Analysis/CodeMetrics.cpp +++ b/lib/Analysis/CodeMetrics.cpp @@ -196,7 +196,7 @@ void CodeMetrics::analyzeFunction(Function *F, const TargetData *TD) { // as volatile if they are live across a setjmp call, and they probably // won't do this in callers. exposesReturnsTwice = F->callsFunctionThatReturnsTwice() && - !F->hasFnAttr(Attribute::ReturnsTwice); + !F->getFnAttributes().hasReturnsTwiceAttr(); // Look at the size of the callee. for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB) diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/IPA/CallGraph.cpp index 17631ddb30..dec0eced27 100644 --- a/lib/Analysis/IPA/CallGraph.cpp +++ b/lib/Analysis/IPA/CallGraph.cpp @@ -141,12 +141,13 @@ private: for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE; ++II) { CallSite CS(cast<Value>(II)); - if (CS && !isa<IntrinsicInst>(II)) { + if (CS) { const Function *Callee = CS.getCalledFunction(); - if (Callee) - Node->addCalledFunction(CS, getOrInsertFunction(Callee)); - else + if (!Callee) + // Indirect calls of intrinsics are not allowed so no need to check. Node->addCalledFunction(CS, CallsExternalNode); + else if (!Callee->isIntrinsic()) + Node->addCalledFunction(CS, getOrInsertFunction(Callee)); } } } diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index 1a94665096..7ecc06bbb2 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -128,7 +128,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { public: CallAnalyzer(const TargetData *TD, Function &Callee, int Threshold) : TD(TD), F(Callee), Threshold(Threshold), Cost(0), - AlwaysInline(F.hasFnAttr(Attribute::AlwaysInline)), + AlwaysInline(F.getFnAttributes().hasAlwaysInlineAttr()), IsCallerRecursive(false), IsRecursiveCall(false), ExposesReturnsTwice(false), HasDynamicAlloca(false), AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0), @@ -613,7 +613,7 @@ bool CallAnalyzer::visitStore(StoreInst &I) { bool CallAnalyzer::visitCallSite(CallSite CS) { if (CS.isCall() && cast<CallInst>(CS.getInstruction())->canReturnTwice() && - !F.hasFnAttr(Attribute::ReturnsTwice)) { + !F.getFnAttributes().hasReturnsTwiceAttr()) { // This aborts the entire analysis. ExposesReturnsTwice = true; return false; @@ -1043,7 +1043,7 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, Function *Callee, // something else. Don't inline functions marked noinline or call sites // marked noinline. if (!Callee || Callee->mayBeOverridden() || - Callee->hasFnAttr(Attribute::NoInline) || CS.isNoInline()) + Callee->getFnAttributes().hasNoInlineAttr() || CS.isNoInline()) return llvm::InlineCost::getNever(); DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp index 83bdf5286a..7bd945733b 100644 --- a/lib/Analysis/Lint.cpp +++ b/lib/Analysis/Lint.cpp @@ -411,14 +411,50 @@ void Lint::visitMemoryReference(Instruction &I, "Undefined behavior: Branch to non-blockaddress", &I); } + // Check for buffer overflows and misalignment. if (TD) { - if (Align == 0 && Ty) Align = TD->getABITypeAlignment(Ty); + // Only handles memory references that read/write something simple like an + // alloca instruction or a global variable. + int64_t Offset = 0; + if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, *TD)) { + // OK, so the access is to a constant offset from Ptr. Check that Ptr is + // something we can handle and if so extract the size of this base object + // along with its alignment. + uint64_t BaseSize = AliasAnalysis::UnknownSize; + unsigned BaseAlign = 0; + + if (AllocaInst *AI = dyn_cast<AllocaInst>(Base)) { + Type *ATy = AI->getAllocatedType(); + if (!AI->isArrayAllocation() && ATy->isSized()) + BaseSize = TD->getTypeAllocSize(ATy); + BaseAlign = AI->getAlignment(); + if (BaseAlign == 0 && ATy->isSized()) + BaseAlign = TD->getABITypeAlignment(ATy); + } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) { + // If the global may be defined differently in another compilation unit + // then don't warn about funky memory accesses. + if (GV->hasDefinitiveInitializer()) { + Type *GTy = GV->getType()->getElementType(); + if (GTy->isSized()) + BaseSize = TD->getTypeAllocSize(GTy); + BaseAlign = GV->getAlignment(); + if (BaseAlign == 0 && GTy->isSized()) + BaseAlign = TD->getABITypeAlignment(GTy); + } + } - if (Align != 0) { - unsigned BitWidth = TD->getTypeSizeInBits(Ptr->getType()); - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - ComputeMaskedBits(Ptr, KnownZero, KnownOne, TD); - Assert1(!(KnownOne & APInt::getLowBitsSet(BitWidth, Log2_32(Align))), + // Accesses from before the start or after the end of the object are not + // defined. + Assert1(Size == AliasAnalysis::UnknownSize || + BaseSize == AliasAnalysis::UnknownSize || + (Offset >= 0 && Offset + Size <= BaseSize), + "Undefined behavior: Buffer overflow", &I); + + // Accesses that say that the memory is more aligned than it is are not + // defined. + if (Align == 0 && Ty && Ty->isSized()) + Align = TD->getABITypeAlignment(Ty); + Assert1(!BaseAlign || Align <= MinAlign(BaseAlign, Offset), "Undefined behavior: Memory reference address is misaligned", &I); } } diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index 5736c3569d..9ce9f8c801 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -327,7 +327,7 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, return 0; if (LIOffs+NewLoadByteSize > MemLocEnd && - LI->getParent()->getParent()->hasFnAttr(Attribute::AddressSafety)) { + LI->getParent()->getParent()->getFnAttributes().hasAddressSafetyAttr()){ // We will be reading past the location accessed by the original program. // While this is safe in a regular build, Address Safety analysis tools // may start reporting false warnings. So, don't do widening. diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index eedec8383a..66a8e17e11 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -919,23 +919,13 @@ bool LLParser::ParseOptionalAddrSpace(unsigned &AddrSpace) { bool LLParser::ParseOptionalAttrs(Attributes &Attrs, unsigned AttrKind) { Attrs = Attribute::None; LocTy AttrLoc = Lex.getLoc(); + bool HaveError = false; while (1) { - switch (Lex.getKind()) { + lltok::Kind Token = Lex.getKind(); + switch (Token) { default: // End of attributes. - if (AttrKind != 2 && (Attrs & Attribute::FunctionOnly)) - return Error(AttrLoc, "invalid use of function-only attribute"); - - // As a hack, we allow "align 2" on functions as a synonym for - // "alignstack 2". - if (AttrKind == 2 && - (Attrs & ~(Attribute::FunctionOnly | Attribute::Alignment))) - return Error(AttrLoc, "invalid use of attribute on a function"); - - if (AttrKind != 0 && (Attrs & Attribute::ParameterOnly)) - return Error(AttrLoc, "invalid use of parameter-only attribute"); - - return false; + return HaveError; case lltok::kw_zeroext: Attrs |= Attribute::ZExt; break; case lltok::kw_signext: Attrs |= Attribute::SExt; break; case lltok::kw_inreg: Attrs |= Attribute::InReg; break; @@ -980,6 +970,51 @@ bool LLParser::ParseOptionalAttrs(Attributes &Attrs, unsigned AttrKind) { } } + + // Perform some error checking. + switch (Token) { + default: + if (AttrKind == 2) + HaveError |= Error(AttrLoc, "invalid use of attribute on a function"); + break; + case lltok::kw_align: + // As a hack, we allow "align 2" on functions as a synonym for + // "alignstack 2". + break; + + // Parameter Only: + case lltok::kw_sret: + case lltok::kw_nocapture: + case lltok::kw_byval: + case lltok::kw_nest: + if (AttrKind != 0) + HaveError |= Error(AttrLoc, "invalid use of parameter-only attribute"); + break; + + // Function Only: + case lltok::kw_noreturn: + case lltok::kw_nounwind: + case lltok::kw_readnone: + case lltok::kw_readonly: + case lltok::kw_noinline: + case lltok::kw_alwaysinline: + case lltok::kw_optsize: + case lltok::kw_ssp: + case lltok::kw_sspreq: + case lltok::kw_noredzone: + case lltok::kw_noimplicitfloat: + case lltok::kw_naked: + case lltok::kw_inlinehint: + case lltok::kw_alignstack: + case lltok::kw_uwtable: + case lltok::kw_nonlazybind: + case lltok::kw_returns_twice: + case lltok::kw_address_safety: + if (AttrKind != 2) + HaveError |= Error(AttrLoc, "invalid use of function-only attribute"); + break; + } + Lex.Lex(); } } diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 4a11223711..c3bffc5d63 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -477,7 +477,7 @@ bool BitcodeReader::ParseAttributeBlock() { for (unsigned i = 0, e = Record.size(); i != e; i += 2) { Attributes ReconstitutedAttr = - Attribute::decodeLLVMAttributesForBitcode(Record[i+1]); + Attributes::decodeLLVMAttributesForBitcode(Record[i+1]); Record[i+1] = ReconstitutedAttr.Raw(); } diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index 94ebe190d4..b3f1bb13a9 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -177,7 +177,7 @@ static void WriteAttributeTable(const ValueEnumerator &VE, for (unsigned i = 0, e = A.getNumSlots(); i != e; ++i) { const AttributeWithIndex &PAWI = A.getSlot(i); Record.push_back(PAWI.Index); - Record.push_back(Attribute::encodeLLVMAttributesForBitcode(PAWI.Attrs)); + Record.push_back(Attributes::encodeLLVMAttributesForBitcode(PAWI.Attrs)); } Stream.EmitRecord(bitc::PARAMATTR_CODE_ENTRY, Record); diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 55aa4ee665..d506d7e507 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1140,6 +1140,11 @@ void AsmPrinter::EmitJumpTableInfo() { EmitAlignment(Log2_32(MJTI->getEntryAlignment(*TM.getTargetData()))); + // Jump tables in code sections are marked with a data_region directive + // where that's supported. + if (!JTInDiffSection) + OutStreamer.EmitDataRegion(MCDR_DataRegionJT32); + for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) { const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs; @@ -1180,6 +1185,8 @@ void AsmPrinter::EmitJumpTableInfo() { for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) EmitJumpTableEntry(MJTI, JTBBs[ii], JTI); } + if (!JTInDiffSection) + OutStreamer.EmitDataRegion(MCDR_DataRegionEnd); } /// EmitJumpTableEntry - Emit a jump table entry for the specified MBB to the diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index efe022b074..5494c0f784 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -590,7 +590,7 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1, // instructions that would be deleted in the merge. MachineFunction *MF = MBB1->getParent(); if (EffectiveTailLen >= 2 && - MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize) && + MF->getFunction()->getFnAttributes().hasOptimizeForSizeAttr() && (I1 == MBB1->begin() || I2 == MBB2->begin())) return true; diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp index 99233dfc2e..1009a1e29c 100644 --- a/lib/CodeGen/CodePlacementOpt.cpp +++ b/lib/CodeGen/CodePlacementOpt.cpp @@ -373,7 +373,7 @@ bool CodePlacementOpt::OptimizeIntraLoopEdges(MachineFunction &MF) { /// bool CodePlacementOpt::AlignLoops(MachineFunction &MF) { const Function *F = MF.getFunction(); - if (F->hasFnAttr(Attribute::OptimizeForSize)) + if (F->getFnAttributes().hasOptimizeForSizeAttr()) return false; unsigned Align = TLI->getPrefLoopAlignment(); diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index f4ebcd6fa4..c3bf2d234c 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -427,7 +427,7 @@ void LiveInterval::join(LiveInterval &Other, // If we have to apply a mapping to our base interval assignment, rewrite it // now. - if (MustMapCurValNos) { + if (MustMapCurValNos && !empty()) { // Map the first live range. iterator OutIt = begin(); diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp index b4ce9aa8c1..82710414b3 100644 --- a/lib/CodeGen/LiveRangeEdit.cpp +++ b/lib/CodeGen/LiveRangeEdit.cpp @@ -87,7 +87,7 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI, // We can't remat physreg uses, unless it is a constant. if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { - if (MRI.isConstantPhysReg(MO.getReg(), VRM->getMachineFunction())) + if (MRI.isConstantPhysReg(MO.getReg(), *OrigMI->getParent()->getParent())) continue; return false; } diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 9a8cc48172..1f1ce671f5 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -1013,7 +1013,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // exclusively on the loop info here so that we can align backedges in // unnatural CFGs and backedges that were introduced purely because of the // loop rotations done during this layout pass. - if (F.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) + if (F.getFunction()->getFnAttributes().hasOptimizeForSizeAttr()) return; unsigned Align = TLI->getPrefLoopAlignment(); if (!Align) diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 304e39e159..34fbfe20f4 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -59,13 +59,13 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM, RegInfo = 0; MFInfo = 0; FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameLowering()); - if (Fn->hasFnAttr(Attribute::StackAlignment)) + if (Fn->getFnAttributes().hasStackAlignmentAttr()) FrameInfo->ensureMaxAlignment(Fn->getAttributes(). getFnAttributes().getStackAlignment()); ConstantPool = new (Allocator) MachineConstantPool(TM.getTargetData()); Alignment = TM.getTargetLowering()->getMinFunctionAlignment(); // FIXME: Shouldn't use pref alignment if explicit alignment is set on Fn. - if (!Fn->hasFnAttr(Attribute::OptimizeForSize)) + if (!Fn->getFnAttributes().hasOptimizeForSizeAttr()) Alignment = std::max(Alignment, TM.getTargetLowering()->getPrefFunctionAlignment()); FunctionNumber = FunctionNum; diff --git a/lib/CodeGen/MachineModuleInfoImpls.cpp b/lib/CodeGen/MachineModuleInfoImpls.cpp index 5ab56c09f5..a1c7e9f5fb 100644 --- a/lib/CodeGen/MachineModuleInfoImpls.cpp +++ b/lib/CodeGen/MachineModuleInfoImpls.cpp @@ -21,8 +21,8 @@ using namespace llvm; //===----------------------------------------------------------------------===// // Out of line virtual method. -void MachineModuleInfoMachO::Anchor() {} -void MachineModuleInfoELF::Anchor() {} +void MachineModuleInfoMachO::anchor() {} +void MachineModuleInfoELF::anchor() {} static int SortSymbolPair(const void *LHS, const void *RHS) { typedef std::pair<MCSymbol*, MachineModuleInfoImpl::StubValueTy> PairTy; diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index c791ffb28c..3a4125475e 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -96,7 +96,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { placeCSRSpillsAndRestores(Fn); // Add the code to save and restore the callee saved registers - if (!F->hasFnAttr(Attribute::Naked)) + if (!F->getFnAttributes().hasNakedAttr()) insertCSRSpillsAndRestores(Fn); // Allow the target machine to make final modifications to the function @@ -111,7 +111,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { // called functions. Because of this, calculateCalleeSavedRegisters() // must be called before this function in order to set the AdjustsStack // and MaxCallFrameSize variables. - if (!F->hasFnAttr(Attribute::Naked)) + if (!F->getFnAttributes().hasNakedAttr()) insertPrologEpilogCode(Fn); // Replace all MO_FrameIndex operands with physical register references @@ -221,7 +221,7 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { return; // In Naked functions we aren't going to save any registers. - if (Fn.getFunction()->hasFnAttr(Attribute::Naked)) + if (Fn.getFunction()->getFnAttributes().hasNakedAttr()) return; std::vector<CalleeSavedInfo> CSI; diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index dd0f548867..f45072f1ac 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -70,7 +70,7 @@ VerifyCoalescing("verify-coalescing", // Temporary option for testing new coalescer algo. static cl::opt<bool> -NewCoalescer("new-coalescer", cl::Hidden, +NewCoalescer("new-coalescer", cl::Hidden, cl::init(true), cl::desc("Use new coalescer algorithm")); namespace { @@ -1732,6 +1732,12 @@ void JoinVals::pruneValues(JoinVals &Other, case CR_Replace: // This value takes precedence over the value in Other.LI. LIS->pruneValue(&Other.LI, Def, &EndPoints); + // Remove <def,read-undef> flags. This def is now a partial redef. + if (!Def.isBlock()) + for (MIOperands MO(Indexes->getInstructionFromIndex(Def)); + MO.isValid(); ++MO) + if (MO->isReg() && MO->isDef() && MO->getReg() == LI.reg) + MO->setIsUndef(false); DEBUG(dbgs() << "\t\tpruned " << PrintReg(Other.LI.reg) << " at " << Def << ": " << Other.LI << '\n'); break; diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 0107ded953..d115991858 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2999,7 +2999,7 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT)); if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) return DAG.getNode(ISD::ROTL, N->getDebugLoc(), VT, BSwap, ShAmt); - else if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) + if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, BSwap, ShAmt); return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, BSwap, ShAmt), @@ -3217,11 +3217,8 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) { if ((LShVal + RShVal) != OpSizeInBits) return 0; - SDValue Rot; - if (HasROTL) - Rot = DAG.getNode(ISD::ROTL, DL, VT, LHSShiftArg, LHSShiftAmt); - else - Rot = DAG.getNode(ISD::ROTR, DL, VT, LHSShiftArg, RHSShiftAmt); + SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, + LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt); // If there is an AND of either shifted operand, apply it to the result. if (LHSMask.getNode() || RHSMask.getNode()) { @@ -3254,12 +3251,8 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) { if (ConstantSDNode *SUBC = dyn_cast<ConstantSDNode>(RHSShiftAmt.getOperand(0))) { if (SUBC->getAPIntValue() == OpSizeInBits) { - if (HasROTL) - return DAG.getNode(ISD::ROTL, DL, VT, - LHSShiftArg, LHSShiftAmt).getNode(); - else - return DAG.getNode(ISD::ROTR, DL, VT, - LHSShiftArg, RHSShiftAmt).getNode(); + return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, + HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode(); } } } @@ -3271,25 +3264,21 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) { if (ConstantSDNode *SUBC = dyn_cast<ConstantSDNode>(LHSShiftAmt.getOperand(0))) { if (SUBC->getAPIntValue() == OpSizeInBits) { - if (HasROTR) - return DAG.getNode(ISD::ROTR, DL, VT, - LHSShiftArg, RHSShiftAmt).getNode(); - else - return DAG.getNode(ISD::ROTL, DL, VT, - LHSShiftArg, LHSShiftAmt).getNode(); + return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg, + HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode(); } } } // Look for sign/zext/any-extended or truncate cases: - if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND - || LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND - || LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND - || LHSShiftAmt.getOpcode() == ISD::TRUNCATE) && - (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND - || RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND - || RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND - || RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) { + if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || + LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || + LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || + LHSShiftAmt.getOpcode() == ISD::TRUNCATE) && + (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || + RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || + RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || + RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) { SDValue LExtOp0 = LHSShiftAmt.getOperand(0); SDValue RExtOp0 = RHSShiftAmt.getOperand(0); if (RExtOp0.getOpcode() == ISD::SUB && @@ -4428,20 +4417,18 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // If the desired elements are smaller or larger than the source // elements we can use a matching integer vector type and then // truncate/sign extend - else { - EVT MatchingElementType = - EVT::getIntegerVT(*DAG.getContext(), - N0VT.getScalarType().getSizeInBits()); - EVT MatchingVectorType = - EVT::getVectorVT(*DAG.getContext(), MatchingElementType, - N0VT.getVectorNumElements()); + EVT MatchingElementType = + EVT::getIntegerVT(*DAG.getContext(), + N0VT.getScalarType().getSizeInBits()); + EVT MatchingVectorType = + EVT::getVectorVT(*DAG.getContext(), MatchingElementType, + N0VT.getVectorNumElements()); - if (SVT == MatchingVectorType) { - SDValue VsetCC = DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, - N0.getOperand(0), N0.getOperand(1), - cast<CondCodeSDNode>(N0.getOperand(2))->get()); - return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); - } + if (SVT == MatchingVectorType) { + SDValue VsetCC = DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, + N0.getOperand(0), N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); } } @@ -5251,13 +5238,12 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // if the source is smaller than the dest, we still need an extend return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, N0.getOperand(0)); - else if (N0.getOperand(0).getValueType().bitsGT(VT)) + if (N0.getOperand(0).getValueType().bitsGT(VT)) // if the source is larger than the dest, than we just need the truncate return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0)); - else - // if the source and dest are the same type, we can drop both the extend - // and the truncate. - return N0.getOperand(0); + // if the source and dest are the same type, we can drop both the extend + // and the truncate. + return N0.getOperand(0); } // Fold extract-and-trunc into a narrow extract. For example: diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index bd33479b94..a48a6256e5 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3521,7 +3521,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize); + bool OptSize = MF.getFunction()->getFnAttributes().hasOptimizeForSizeAttr(); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -3614,7 +3614,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize); + bool OptSize = MF.getFunction()->getFnAttributes().hasOptimizeForSizeAttr(); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -3692,7 +3692,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize); + bool OptSize = MF.getFunction()->getFnAttributes().hasOptimizeForSizeAttr(); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 4f6ff08407..65becbe44f 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -89,7 +89,7 @@ static const unsigned MaxParallelChains = 64; static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, const SDValue *Parts, unsigned NumParts, - EVT PartVT, EVT ValueVT); + EVT PartVT, EVT ValueVT, const Value *V); /// getCopyFromParts - Create a value that contains the specified legal parts /// combined into the value they represent. If the parts combine to a type @@ -99,9 +99,11 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, const SDValue *Parts, unsigned NumParts, EVT PartVT, EVT ValueVT, + const Value *V, ISD::NodeType AssertOp = ISD::DELETED_NODE) { if (ValueVT.isVector()) - return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT); + return getCopyFromPartsVector(DAG, DL, Parts, NumParts, + PartVT, ValueVT, V); assert(NumParts > 0 && "No parts to assemble!"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -125,9 +127,9 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, if (RoundParts > 2) { Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2, - PartVT, HalfVT); + PartVT, HalfVT, V); Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2, - RoundParts / 2, PartVT, HalfVT); + RoundParts / 2, PartVT, HalfVT, V); } else { Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]); Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]); @@ -143,7 +145,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, unsigned OddParts = NumParts - RoundParts; EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits); Hi = getCopyFromParts(DAG, DL, - Parts + RoundParts, OddParts, PartVT, OddVT); + Parts + RoundParts, OddParts, PartVT, OddVT, V); // Combine the round and odd parts. Lo = Val; @@ -172,7 +174,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, assert(ValueVT.isFloatingPoint() && PartVT.isInteger() && !PartVT.isVector() && "Unexpected split"); EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); - Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT); + Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V); } } @@ -210,14 +212,14 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, llvm_unreachable("Unknown mismatch!"); } -/// getCopyFromParts - Create a value that contains the specified legal parts -/// combined into the value they represent. If the parts combine to a type -/// larger then ValueVT then AssertOp can be used to specify whether the extra -/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT -/// (ISD::AssertSext). +/// getCopyFromPartsVector - Create a value that contains the specified legal +/// parts combined into the value they represent. If the parts combine to a +/// type larger then ValueVT then AssertOp can be used to specify whether the +/// extra bits are known to be zero (ISD::AssertZext) or sign extended from +/// ValueVT (ISD::AssertSext). static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, const SDValue *Parts, unsigned NumParts, - EVT PartVT, EVT ValueVT) { + EVT PartVT, EVT ValueVT, const Value *V) { assert(ValueVT.isVector() && "Not a vector value"); assert(NumParts > 0 && "No parts to assemble!"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -243,7 +245,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, // as appropriate. for (unsigned i = 0; i != NumParts; ++i) Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1, - PartVT, IntermediateVT); + PartVT, IntermediateVT, V); } else if (NumParts > 0) { // If the intermediate type was expanded, build the intermediate // operands from the parts. @@ -252,7 +254,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, unsigned Factor = NumParts / NumIntermediates; for (unsigned i = 0; i != NumIntermediates; ++i) Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor, - PartVT, IntermediateVT); + PartVT, IntermediateVT, V); } // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the @@ -300,8 +302,19 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); // Handle cases such as i8 -> <1 x i1> - assert(ValueVT.getVectorNumElements() == 1 && - "Only trivial scalar-to-vector conversions should get here!"); + if (ValueVT.getVectorNumElements() != 1) { + LLVMContext &Ctx = *DAG.getContext(); + Twine ErrMsg("non-trivial scalar-to-vector conversion"); + if (const Instruction *I = dyn_cast_or_null<Instruction>(V)) { + if (const CallInst *CI = dyn_cast<CallInst>(I)) + if (isa<InlineAsm>(CI->getCalledValue())) + ErrMsg = ErrMsg + ", possible invalid constraint for vector type"; + Ctx.emitError(I, ErrMsg); + } else { + Ctx.emitError(ErrMsg); + } + report_fatal_error("Cannot handle scalar-to-vector conversion!"); + } if (ValueVT.getVectorNumElements() == 1 && ValueVT.getVectorElementType() != PartVT) { @@ -313,25 +326,22 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val); } - - - static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc dl, SDValue Val, SDValue *Parts, unsigned NumParts, - EVT PartVT); + EVT PartVT, const Value *V); /// getCopyToParts - Create a series of nodes that contain the specified value /// split into legal parts. If the parts contain more bits than Val, then, for /// integers, ExtendKind can be used to specify how to generate the extra bits. static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, SDValue Val, SDValue *Parts, unsigned NumParts, - EVT PartVT, + EVT PartVT, const Value *V, ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { EVT ValueVT = Val.getValueType(); // Handle the vector case separately. if (ValueVT.isVector()) - return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT); + return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned PartBits = PartVT.getSizeInBits(); @@ -383,7 +393,19 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, "Failed to tile the value with PartVT!"); if (NumParts == 1) { - assert(PartVT == ValueVT && "Type conversion failed!"); + if (PartVT != ValueVT) { + LLVMContext &Ctx = *DAG.getContext(); + Twine ErrMsg("scalar-to-vector conversion failed"); + if (const Instruction *I = dyn_cast_or_null<Instruction>(V)) { + if (const CallInst *CI = dyn_cast<CallInst>(I)) + if (isa<InlineAsm>(CI->getCalledValue())) + ErrMsg = ErrMsg + ", possible invalid constraint for vector type"; + Ctx.emitError(I, ErrMsg); + } else { + Ctx.emitError(ErrMsg); + } + } + Parts[0] = Val; return; } @@ -398,7 +420,7 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, unsigned OddParts = NumParts - RoundParts; SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val, DAG.getIntPtrConstant(RoundBits)); - getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT); + getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V); if (TLI.isBigEndian()) // The odd parts were reversed by getCopyToParts - unreverse them. @@ -444,7 +466,7 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, /// value split into legal parts. static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, SDValue Val, SDValue *Parts, unsigned NumParts, - EVT PartVT) { + EVT PartVT, const Value *V) { EVT ValueVT = Val.getValueType(); assert(ValueVT.isVector() && "Not a vector"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -530,7 +552,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, // If the register was not expanded, promote or copy the value, // as appropriate. for (unsigned i = 0; i != NumParts; ++i) - getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT); + getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V); } else if (NumParts > 0) { // If the intermediate type was expanded, split each the value into // legal parts. @@ -538,13 +560,10 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, "Must expand into a divisible number of parts!"); unsigned Factor = NumParts / NumIntermediates; for (unsigned i = 0; i != NumIntermediates; ++i) - getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT); + getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT, V); } } - - - namespace { /// RegsForValue - This struct represents the registers (physical or virtual) /// that a particular set of values is assigned, and the type information @@ -622,14 +641,15 @@ namespace { /// If the Flag pointer is NULL, no flag is used. SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, DebugLoc dl, - SDValue &Chain, SDValue *Flag) const; + SDValue &Chain, SDValue *Flag, + const Value *V = 0) const; /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the /// specified value into the registers specified by this object. This uses /// Chain/Flag as the input and updates them for the output Chain/Flag. /// If the Flag pointer is NULL, no flag is used. void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, - SDValue &Chain, SDValue *Flag) const; + SDValue &Chain, SDValue *Flag, const Value *V) const; /// AddInlineAsmOperands - Add this value to the specified inlineasm node /// operand list. This adds the code marker, matching input operand index @@ -648,7 +668,8 @@ namespace { SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, DebugLoc dl, - SDValue &Chain, SDValue *Flag) const { + SDValue &Chain, SDValue *Flag, + const Value *V) const { // A Value with type {} or [0 x %t] needs no registers. if (ValueVTs.empty()) return SDValue(); @@ -722,7 +743,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, } Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), - NumRegs, RegisterVT, ValueVT); + NumRegs, RegisterVT, ValueVT, V); Part += NumRegs; Parts.clear(); } @@ -737,7 +758,8 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, /// Chain/Flag as the input and updates them for the output Chain/Flag. /// If the Flag pointer is NULL, no flag is used. void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, - SDValue &Chain, SDValue *Flag) const { + SDValue &Chain, SDValue *Flag, + const Value *V) const { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // Get the list of the values's legal parts. @@ -749,7 +771,7 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, EVT RegisterVT = RegVTs[Value]; getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), - &Parts[Part], NumParts, RegisterVT); + &Parts[Part], NumParts, RegisterVT, V); Part += NumParts; } @@ -994,7 +1016,7 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { unsigned InReg = It->second; RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType()); SDValue Chain = DAG.getEntryNode(); - N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL); + N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL, V); resolveDanglingDebugInfo(V, N); return N; } @@ -1149,7 +1171,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { unsigned InReg = FuncInfo.InitializeRegForValue(Inst); RegsForValue RFV(*DAG.getContext(), TLI, InReg, Inst->getType()); SDValue Chain = DAG.getEntryNode(); - return RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL); + return RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL, V); } llvm_unreachable("Can't get register for value!"); @@ -1218,7 +1240,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { SmallVector<SDValue, 4> Parts(NumParts); getCopyToParts(DAG, getCurDebugLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + j), - &Parts[0], NumParts, PartVT, ExtendKind); + &Parts[0], NumParts, PartVT, &I, ExtendKind); // 'inreg' on function refers to return value ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); @@ -2093,7 +2115,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) TSize += I->size(); - if (!areJTsAllowed(TLI) || TSize.ult(4)) + if (!areJTsAllowed(TLI) || TSize.ult(TLI.getMinimumJumpTableEntries())) return false; APInt Range = ComputeRange(First, Last); @@ -2565,9 +2587,10 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { if (handleSmallSwitchRange(CR, WorkList, SV, Default, SwitchMBB)) continue; - // If the switch has more than 5 blocks, and at least 40% dense, and the + // If the switch has more than N blocks, and is at least 40% dense, and the // target supports indirect branches, then emit a jump table rather than // lowering the switch to a binary tree of conditional branches. + // N defaults to 4 and is controlled via TLS.getMinimumJumpTableEntries(). if (handleJTSwitchCase(CR, WorkList, SV, Default, SwitchMBB)) continue; @@ -4377,7 +4400,7 @@ static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS, return DAG.getConstantFP(1.0, LHS.getValueType()); const Function *F = DAG.getMachineFunction().getFunction(); - if (!F->hasFnAttr(Attribute::OptimizeForSize) || + if (!F->getFnAttributes().hasOptimizeForSizeAttr() || // If optimizing for size, don't insert too many multiplies. This // inserts up to 5 multiplies. CountPopulation_32(Val)+Log2_32(Val) < 7) { @@ -6244,7 +6267,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Use the produced MatchedRegs object to MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(), - Chain, &Flag); + Chain, &Flag, CS.getInstruction()); MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, true, OpInfo.getMatchedOperand(), DAG, AsmNodeOperands); @@ -6326,7 +6349,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(), - Chain, &Flag); + Chain, &Flag, CS.getInstruction()); OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0, DAG, AsmNodeOperands); @@ -6357,7 +6380,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // and set it as the value of the call. if (!RetValRegs.Regs.empty()) { SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), - Chain, &Flag); + Chain, &Flag, CS.getInstruction()); // FIXME: Why don't we do this for inline asms with MRVs? if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) { @@ -6397,7 +6420,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { RegsForValue &OutRegs = IndirectStoresToEmit[i].first; const Value *Ptr = IndirectStoresToEmit[i].second; SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), - Chain, &Flag); + Chain, &Flag, IA); StoresToEmit.push_back(std::make_pair(OutVal, Ptr)); } @@ -6515,7 +6538,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { ExtendKind = ISD::ZERO_EXTEND; getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, - PartVT, ExtendKind); + PartVT, CLI.CS ? CLI.CS->getInstruction() : 0, ExtendKind); for (unsigned j = 0; j != NumParts; ++j) { // if it isn't first piece, alignment must be 1 @@ -6596,7 +6619,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg], - NumRegs, RegisterVT, VT, + NumRegs, RegisterVT, VT, NULL, AssertOp)); CurReg += NumRegs; } @@ -6635,7 +6658,7 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { RegsForValue RFV(V->getContext(), TLI, Reg, V->getType()); SDValue Chain = DAG.getEntryNode(); - RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0); + RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0, V); PendingExports.push_back(Chain); } @@ -6777,7 +6800,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { EVT RegVT = TLI.getRegisterType(*CurDAG->getContext(), VT); ISD::NodeType AssertOp = ISD::DELETED_NODE; SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, - RegVT, VT, AssertOp); + RegVT, VT, NULL, AssertOp); MachineFunction& MF = SDB->DAG.getMachineFunction(); MachineRegisterInfo& RegInfo = MF.getRegInfo(); @@ -6818,7 +6841,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts, PartVT, VT, - AssertOp)); + NULL, AssertOp)); } i += NumParts; diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 56f3a45c9a..be3ecf34f7 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -613,6 +613,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm, ShouldFoldAtomicFences = false; InsertFencesForAtomic = false; SupportJumpTables = true; + MinimumJumpTableEntries = 4; InitLibcallNames(LibcallRoutineNames); InitCmpLibcallCCs(CmpLibcallCCs); diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index a04ac3fbc1..a58c144659 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -137,10 +137,10 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool InStruct) const { /// add a guard variable to functions that call alloca, and functions with /// buffers larger than SSPBufferSize bytes. bool StackProtector::RequiresStackProtector() const { - if (F->hasFnAttr(Attribute::StackProtectReq)) + if (F->getFnAttributes().hasStackProtectReqAttr()) return true; - if (!F->hasFnAttr(Attribute::StackProtect)) + if (!F->getFnAttributes().hasStackProtectAttr()) return false; for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp index a813fa65ac..230ea038e2 100644 --- a/lib/CodeGen/TailDuplication.cpp +++ b/lib/CodeGen/TailDuplication.cpp @@ -552,7 +552,7 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF, // compensate for the duplication. unsigned MaxDuplicateCount; if (TailDuplicateSize.getNumOccurrences() == 0 && - MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) + MF.getFunction()->getFnAttributes().hasOptimizeForSizeAttr()) MaxDuplicateCount = 1; else MaxDuplicateCount = TailDuplicateSize; diff --git a/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt b/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt index 7d67d0d8be..348308897d 100644 --- a/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt +++ b/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt @@ -1,11 +1,6 @@ - -include_directories( ${LLVM_INTEL_JITEVENTS_INCDIR} ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -set(system_libs - ${system_libs} - jitprofiling - ) +include_directories( ${CMAKE_CURRENT_SOURCE_DIR}/.. ) add_llvm_library(LLVMIntelJITEvents IntelJITEventListener.cpp + jitprofiling.c ) diff --git a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp index c11c17eac7..23f8607322 100644 --- a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp +++ b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp @@ -22,12 +22,12 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/ExecutionEngine/IntelJITEventsWrapper.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/Errno.h" #include "llvm/Support/ValueHandle.h" #include "EventListenerCommon.h" +#include "IntelJITEventsWrapper.h" using namespace llvm; using namespace llvm::jitprofiling; @@ -37,13 +37,13 @@ namespace { class IntelJITEventListener : public JITEventListener { typedef DenseMap<void*, unsigned int> MethodIDMap; - IntelJITEventsWrapper& Wrapper; + OwningPtr<IntelJITEventsWrapper> Wrapper; MethodIDMap MethodIDs; FilenameCache Filenames; public: - IntelJITEventListener(IntelJITEventsWrapper& libraryWrapper) - : Wrapper(libraryWrapper) { + IntelJITEventListener(IntelJITEventsWrapper* libraryWrapper) { + Wrapper.reset(libraryWrapper); } ~IntelJITEventListener() { @@ -94,7 +94,7 @@ static iJIT_Method_Load FunctionDescToIntelJITFormat( void IntelJITEventListener::NotifyFunctionEmitted( const Function &F, void *FnStart, size_t FnSize, const EmittedFunctionDetails &Details) { - iJIT_Method_Load FunctionMessage = FunctionDescToIntelJITFormat(Wrapper, + iJIT_Method_Load FunctionMessage = FunctionDescToIntelJITFormat(*Wrapper, F.getName().data(), reinterpret_cast<uint64_t>(FnStart), FnSize); @@ -151,15 +151,15 @@ void IntelJITEventListener::NotifyFunctionEmitted( FunctionMessage.line_number_table = 0; } - Wrapper.iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, - &FunctionMessage); + Wrapper->iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, + &FunctionMessage); MethodIDs[FnStart] = FunctionMessage.method_id; } void IntelJITEventListener::NotifyFreeingMachineCode(void *FnStart) { MethodIDMap::iterator I = MethodIDs.find(FnStart); if (I != MethodIDs.end()) { - Wrapper.iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_UNLOAD_START, &I->second); + Wrapper->iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_UNLOAD_START, &I->second); MethodIDs.erase(I); } } @@ -168,15 +168,13 @@ void IntelJITEventListener::NotifyFreeingMachineCode(void *FnStart) { namespace llvm { JITEventListener *JITEventListener::createIntelJITEventListener() { - static OwningPtr<IntelJITEventsWrapper> JITProfilingWrapper( - new IntelJITEventsWrapper); - return new IntelJITEventListener(*JITProfilingWrapper); + return new IntelJITEventListener(new IntelJITEventsWrapper); } // for testing JITEventListener *JITEventListener::createIntelJITEventListener( IntelJITEventsWrapper* TestImpl) { - return new IntelJITEventListener(*TestImpl); + return new IntelJITEventListener(TestImpl); } } // namespace llvm diff --git a/include/llvm/ExecutionEngine/IntelJITEventsWrapper.h b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h index ca87342029..7ab08e15a8 100644 --- a/include/llvm/ExecutionEngine/IntelJITEventsWrapper.h +++ b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h @@ -18,7 +18,7 @@ #ifndef INTEL_JIT_EVENTS_WRAPPER_H #define INTEL_JIT_EVENTS_WRAPPER_H -#include <jitprofiling.h> +#include "jitprofiling.h" namespace llvm { diff --git a/lib/ExecutionEngine/IntelJITEvents/Makefile b/lib/ExecutionEngine/IntelJITEvents/Makefile index ba75ac6f64..dcf3126cc5 100644 --- a/lib/ExecutionEngine/IntelJITEvents/Makefile +++ b/lib/ExecutionEngine/IntelJITEvents/Makefile @@ -11,7 +11,8 @@ LIBRARYNAME = LLVMIntelJITEvents include $(LEVEL)/Makefile.config -SOURCES := IntelJITEventListener.cpp -CPPFLAGS += -I$(INTEL_JITEVENTS_INCDIR) -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. +SOURCES := IntelJITEventListener.cpp \ + jitprofiling.c +CPPFLAGS += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. include $(LLVM_SRC_ROOT)/Makefile.rules diff --git a/lib/ExecutionEngine/IntelJITEvents/ittnotify_config.h b/lib/ExecutionEngine/IntelJITEvents/ittnotify_config.h new file mode 100644 index 0000000000..238065fe0a --- /dev/null +++ b/lib/ExecutionEngine/IntelJITEvents/ittnotify_config.h @@ -0,0 +1,449 @@ +/*===-- ittnotify_config.h - JIT Profiling API internal config-----*- C -*-===* + * + * The LLVM Compiler Infrastructure + * + * This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===* + * + * This file provides Intel(R) Performance Analyzer JIT (Just-In-Time) + * Profiling API internal config. + * + *===----------------------------------------------------------------------===*/ +#ifndef _ITTNOTIFY_CONFIG_H_ +#define _ITTNOTIFY_CONFIG_H_ + +/** @cond exclude_from_documentation */ +#ifndef ITT_OS_WIN +# define ITT_OS_WIN 1 +#endif /* ITT_OS_WIN */ + +#ifndef ITT_OS_LINUX +# define ITT_OS_LINUX 2 +#endif /* ITT_OS_LINUX */ + +#ifndef ITT_OS_MAC +# define ITT_OS_MAC 3 +#endif /* ITT_OS_MAC */ + +#ifndef ITT_OS +# if defined WIN32 || defined _WIN32 +# define ITT_OS ITT_OS_WIN +# elif defined( __APPLE__ ) && defined( __MACH__ ) +# define ITT_OS ITT_OS_MAC +# else +# define ITT_OS ITT_OS_LINUX +# endif +#endif /* ITT_OS */ + +#ifndef ITT_PLATFORM_WIN +# define ITT_PLATFORM_WIN 1 +#endif /* ITT_PLATFORM_WIN */ + +#ifndef ITT_PLATFORM_POSIX +# define ITT_PLATFORM_POSIX 2 +#endif /* ITT_PLATFORM_POSIX */ + +#ifndef ITT_PLATFORM +# if ITT_OS==ITT_OS_WIN +# define ITT_PLATFORM ITT_PLATFORM_WIN +# else +# define ITT_PLATFORM ITT_PLATFORM_POSIX +# endif /* _WIN32 */ +#endif /* ITT_PLATFORM */ + +#if defined(_UNICODE) && !defined(UNICODE) +#define UNICODE +#endif + +#include <stddef.h> +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#include <tchar.h> +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#include <stdint.h> +#if defined(UNICODE) || defined(_UNICODE) +#include <wchar.h> +#endif /* UNICODE || _UNICODE */ +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +#ifndef CDECL +# if ITT_PLATFORM==ITT_PLATFORM_WIN +# define CDECL __cdecl +# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +# if defined _M_X64 || defined _M_AMD64 || defined __x86_64__ +# define CDECL /* not actual on x86_64 platform */ +# else /* _M_X64 || _M_AMD64 || __x86_64__ */ +# define CDECL __attribute__ ((cdecl)) +# endif /* _M_X64 || _M_AMD64 || __x86_64__ */ +# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* CDECL */ + +#ifndef STDCALL +# if ITT_PLATFORM==ITT_PLATFORM_WIN +# define STDCALL __stdcall +# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +# if defined _M_X64 || defined _M_AMD64 || defined __x86_64__ +# define STDCALL /* not supported on x86_64 platform */ +# else /* _M_X64 || _M_AMD64 || __x86_64__ */ +# define STDCALL __attribute__ ((stdcall)) +# endif /* _M_X64 || _M_AMD64 || __x86_64__ */ +# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* STDCALL */ + +#define ITTAPI CDECL +#define LIBITTAPI CDECL + +/* TODO: Temporary for compatibility! */ +#define ITTAPI_CALL CDECL +#define LIBITTAPI_CALL CDECL + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +/* use __forceinline (VC++ specific) */ +#define ITT_INLINE __forceinline +#define ITT_INLINE_ATTRIBUTE /* nothing */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +/* + * Generally, functions are not inlined unless optimization is specified. + * For functions declared inline, this attribute inlines the function even + * if no optimization level was specified. + */ +#ifdef __STRICT_ANSI__ +#define ITT_INLINE static +#else /* __STRICT_ANSI__ */ +#define ITT_INLINE static inline +#endif /* __STRICT_ANSI__ */ +#define ITT_INLINE_ATTRIBUTE __attribute__ ((always_inline)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +/** @endcond */ + +#ifndef ITT_ARCH_IA32 +# define ITT_ARCH_IA32 1 +#endif /* ITT_ARCH_IA32 */ + +#ifndef ITT_ARCH_IA32E +# define ITT_ARCH_IA32E 2 +#endif /* ITT_ARCH_IA32E */ + +#ifndef ITT_ARCH_IA64 +# define ITT_ARCH_IA64 3 +#endif /* ITT_ARCH_IA64 */ + +#ifndef ITT_ARCH +# if defined _M_X64 || defined _M_AMD64 || defined __x86_64__ +# define ITT_ARCH ITT_ARCH_IA32E +# elif defined _M_IA64 || defined __ia64 +# define ITT_ARCH ITT_ARCH_IA64 +# else +# define ITT_ARCH ITT_ARCH_IA32 +# endif +#endif + +#ifdef __cplusplus +# define ITT_EXTERN_C extern "C" +#else +# define ITT_EXTERN_C /* nothing */ +#endif /* __cplusplus */ + +#define ITT_TO_STR_AUX(x) #x +#define ITT_TO_STR(x) ITT_TO_STR_AUX(x) + +#define __ITT_BUILD_ASSERT(expr, suffix) do { \ + static char __itt_build_check_##suffix[(expr) ? 1 : -1]; \ + __itt_build_check_##suffix[0] = 0; \ +} while(0) +#define _ITT_BUILD_ASSERT(expr, suffix) __ITT_BUILD_ASSERT((expr), suffix) +#define ITT_BUILD_ASSERT(expr) _ITT_BUILD_ASSERT((expr), __LINE__) + +#define ITT_MAGIC { 0xED, 0xAB, 0xAB, 0xEC, 0x0D, 0xEE, 0xDA, 0x30 } + +/* Replace with snapshot date YYYYMMDD for promotion build. */ +#define API_VERSION_BUILD 20111111 + +#ifndef API_VERSION_NUM +#define API_VERSION_NUM 0.0.0 +#endif /* API_VERSION_NUM */ + +#define API_VERSION "ITT-API-Version " ITT_TO_STR(API_VERSION_NUM) \ + " (" ITT_TO_STR(API_VERSION_BUILD) ")" + +/* OS communication functions */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#include <windows.h> +typedef HMODULE lib_t; +typedef DWORD TIDT; +typedef CRITICAL_SECTION mutex_t; +#define MUTEX_INITIALIZER { 0 } +#define strong_alias(name, aliasname) /* empty for Windows */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#include <dlfcn.h> +#if defined(UNICODE) || defined(_UNICODE) +#include <wchar.h> +#endif /* UNICODE */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE 1 /* need for PTHREAD_MUTEX_RECURSIVE */ +#endif /* _GNU_SOURCE */ +#include <pthread.h> +typedef void* lib_t; +typedef pthread_t TIDT; +typedef pthread_mutex_t mutex_t; +#define MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER +#define _strong_alias(name, aliasname) \ + extern __typeof (name) aliasname __attribute__ ((alias (#name))); +#define strong_alias(name, aliasname) _strong_alias(name, aliasname) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_get_proc(lib, name) GetProcAddress(lib, name) +#define __itt_mutex_init(mutex) InitializeCriticalSection(mutex) +#define __itt_mutex_lock(mutex) EnterCriticalSection(mutex) +#define __itt_mutex_unlock(mutex) LeaveCriticalSection(mutex) +#define __itt_load_lib(name) LoadLibraryA(name) +#define __itt_unload_lib(handle) FreeLibrary(handle) +#define __itt_system_error() (int)GetLastError() +#define __itt_fstrcmp(s1, s2) lstrcmpA(s1, s2) +#define __itt_fstrlen(s) lstrlenA(s) +#define __itt_fstrcpyn(s1, s2, l) lstrcpynA(s1, s2, l) +#define __itt_fstrdup(s) _strdup(s) +#define __itt_thread_id() GetCurrentThreadId() +#define __itt_thread_yield() SwitchToThread() +#ifndef ITT_SIMPLE_INIT +ITT_INLINE long +__itt_interlocked_increment(volatile long* ptr) ITT_INLINE_ATTRIBUTE; +ITT_INLINE long __itt_interlocked_increment(volatile long* ptr) +{ + return InterlockedIncrement(ptr); +} +#endif /* ITT_SIMPLE_INIT */ +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +#define __itt_get_proc(lib, name) dlsym(lib, name) +#define __itt_mutex_init(mutex) {\ + pthread_mutexattr_t mutex_attr; \ + int error_code = pthread_mutexattr_init(&mutex_attr); \ + if (error_code) \ + __itt_report_error(__itt_error_system, "pthread_mutexattr_init", \ + error_code); \ + error_code = pthread_mutexattr_settype(&mutex_attr, \ + PTHREAD_MUTEX_RECURSIVE); \ + if (error_code) \ + __itt_report_error(__itt_error_system, "pthread_mutexattr_settype", \ + error_code); \ + error_code = pthread_mutex_init(mutex, &mutex_attr); \ + if (error_code) \ + __itt_report_error(__itt_error_system, "pthread_mutex_init", \ + error_code); \ + error_code = pthread_mutexattr_destroy(&mutex_attr); \ + if (error_code) \ + __itt_report_error(__itt_error_system, "pthread_mutexattr_destroy", \ + error_code); \ +} +#define __itt_mutex_lock(mutex) pthread_mutex_lock(mutex) +#define __itt_mutex_unlock(mutex) pthread_mutex_unlock(mutex) +#define __itt_load_lib(name) dlopen(name, RTLD_LAZY) +#define __itt_unload_lib(handle) dlclose(handle) +#define __itt_system_error() errno +#define __itt_fstrcmp(s1, s2) strcmp(s1, s2) +#define __itt_fstrlen(s) strlen(s) +#define __itt_fstrcpyn(s1, s2, l) strncpy(s1, s2, l) +#define __itt_fstrdup(s) strdup(s) +#define __itt_thread_id() pthread_self() +#define __itt_thread_yield() sched_yield() +#if ITT_ARCH==ITT_ARCH_IA64 +#ifdef __INTEL_COMPILER +#define __TBB_machine_fetchadd4(addr, val) __fetchadd4_acq((void *)addr, val) +#else /* __INTEL_COMPILER */ +/* TODO: Add Support for not Intel compilers for IA64 */ +#endif /* __INTEL_COMPILER */ +#else /* ITT_ARCH!=ITT_ARCH_IA64 */ +ITT_INLINE long +__TBB_machine_fetchadd4(volatile void* ptr, long addend) ITT_INLINE_ATTRIBUTE; +ITT_INLINE long __TBB_machine_fetchadd4(volatile void* ptr, long addend) +{ + long result; + __asm__ __volatile__("lock\nxadd %0,%1" + : "=r"(result),"=m"(*(long*)ptr) + : "0"(addend), "m"(*(long*)ptr) + : "memory"); + return result; +} +#endif /* ITT_ARCH==ITT_ARCH_IA64 */ +#ifndef ITT_SIMPLE_INIT +ITT_INLINE long +__itt_interlocked_increment(volatile long* ptr) ITT_INLINE_ATTRIBUTE; +ITT_INLINE long __itt_interlocked_increment(volatile long* ptr) +{ + return __TBB_machine_fetchadd4(ptr, 1) + 1L; +} +#endif /* ITT_SIMPLE_INIT */ +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +typedef enum { + __itt_collection_normal = 0, + __itt_collection_paused = 1 +} __itt_collection_state; + +typedef enum { + __itt_thread_normal = 0, + __itt_thread_ignored = 1 +} __itt_thread_state; + +#pragma pack(push, 8) + +typedef struct ___itt_thread_info +{ + const char* nameA; /*!< Copy of original name in ASCII. */ +#if defined(UNICODE) || defined(_UNICODE) + const wchar_t* nameW; /*!< Copy of original name in UNICODE. */ +#else /* UNICODE || _UNICODE */ + void* nameW; +#endif /* UNICODE || _UNICODE */ + TIDT tid; + __itt_thread_state state; /*!< Thread state (paused or normal) */ + int extra1; /*!< Reserved to the runtime */ + void* extra2; /*!< Reserved to the runtime */ + struct ___itt_thread_info* next; +} __itt_thread_info; + +#include "ittnotify_types.h" /* For __itt_group_id definition */ + +typedef struct ___itt_api_info_20101001 +{ + const char* name; + void** func_ptr; + void* init_func; + __itt_group_id group; +} __itt_api_info_20101001; + +typedef struct ___itt_api_info +{ + const char* name; + void** func_ptr; + void* init_func; + void* null_func; + __itt_group_id group; +} __itt_api_info; + +struct ___itt_domain; +struct ___itt_string_handle; + +typedef struct ___itt_global +{ + unsigned char magic[8]; + unsigned long version_major; + unsigned long version_minor; + unsigned long version_build; + volatile long api_initialized; + volatile long mutex_initialized; + volatile long atomic_counter; + mutex_t mutex; + lib_t lib; + void* error_handler; + const char** dll_path_ptr; + __itt_api_info* api_list_ptr; + struct ___itt_global* next; + /* Joinable structures below */ + __itt_thread_info* thread_list; + struct ___itt_domain* domain_list; + struct ___itt_string_handle* string_list; + __itt_collection_state state; +} __itt_global; + +#pragma pack(pop) + +#define NEW_THREAD_INFO_W(gptr,h,h_tail,t,s,n) { \ + h = (__itt_thread_info*)malloc(sizeof(__itt_thread_info)); \ + if (h != NULL) { \ + h->tid = t; \ + h->nameA = NULL; \ + h->nameW = n ? _wcsdup(n) : NULL; \ + h->state = s; \ + h->extra1 = 0; /* reserved */ \ + h->extra2 = NULL; /* reserved */ \ + h->next = NULL; \ + if (h_tail == NULL) \ + (gptr)->thread_list = h; \ + else \ + h_tail->next = h; \ + } \ +} + +#define NEW_THREAD_INFO_A(gptr,h,h_tail,t,s,n) { \ + h = (__itt_thread_info*)malloc(sizeof(__itt_thread_info)); \ + if (h != NULL) { \ + h->tid = t; \ + h->nameA = n ? __itt_fstrdup(n) : NULL; \ + h->nameW = NULL; \ + h->state = s; \ + h->extra1 = 0; /* reserved */ \ + h->extra2 = NULL; /* reserved */ \ + h->next = NULL; \ + if (h_tail == NULL) \ + (gptr)->thread_list = h; \ + else \ + h_tail->next = h; \ + } \ +} + +#define NEW_DOMAIN_W(gptr,h,h_tail,name) { \ + h = (__itt_domain*)malloc(sizeof(__itt_domain)); \ + if (h != NULL) { \ + h->flags = 0; /* domain is disabled by default */ \ + h->nameA = NULL; \ + h->nameW = name ? _wcsdup(name) : NULL; \ + h->extra1 = 0; /* reserved */ \ + h->extra2 = NULL; /* reserved */ \ + h->next = NULL; \ + if (h_tail == NULL) \ + (gptr)->domain_list = h; \ + else \ + h_tail->next = h; \ + } \ +} + +#define NEW_DOMAIN_A(gptr,h,h_tail,name) { \ + h = (__itt_domain*)malloc(sizeof(__itt_domain)); \ + if (h != NULL) { \ + h->flags = 0; /* domain is disabled by default */ \ + h->nameA = name ? __itt_fstrdup(name) : NULL; \ + h->nameW = NULL; \ + h->extra1 = 0; /* reserved */ \ + h->extra2 = NULL; /* reserved */ \ + h->next = NULL; \ + if (h_tail == NULL) \ + (gptr)->domain_list = h; \ + else \ + h_tail->next = h; \ + } \ +} + +#define NEW_STRING_HANDLE_W(gptr,h,h_tail,name) { \ + h = (__itt_string_handle*)malloc(sizeof(__itt_string_handle)); \ + if (h != NULL) { \ + h->strA = NULL; \ + h->strW = name ? _wcsdup(name) : NULL; \ + h->extra1 = 0; /* reserved */ \ + h->extra2 = NULL; /* reserved */ \ + h->next = NULL; \ + if (h_tail == NULL) \ + (gptr)->string_list = h; \ + else \ + h_tail->next = h; \ + } \ +} + +#define NEW_STRING_HANDLE_A(gptr,h,h_tail,name) { \ + h = (__itt_string_handle*)malloc(sizeof(__itt_string_handle)); \ + if (h != NULL) { \ + h->strA = name ? __itt_fstrdup(name) : NULL; \ + h->strW = NULL; \ + h->extra1 = 0; /* reserved */ \ + h->extra2 = NULL; /* reserved */ \ + h->next = NULL; \ + if (h_tail == NULL) \ + (gptr)->string_list = h; \ + else \ + h_tail->next = h; \ + } \ +} + +#endif /* _ITTNOTIFY_CONFIG_H_ */ diff --git a/lib/ExecutionEngine/IntelJITEvents/ittnotify_types.h b/lib/ExecutionEngine/IntelJITEvents/ittnotify_types.h new file mode 100644 index 0000000000..5d502ba8e8 --- /dev/null +++ b/lib/ExecutionEngine/IntelJITEvents/ittnotify_types.h @@ -0,0 +1,63 @@ +//===-- ittnotify_types.h - Intel(R) Performance Analyzer JIT (Just-In-Time) Profiling API internal types. ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef _ITTNOTIFY_TYPES_H_ +#define _ITTNOTIFY_TYPES_H_ + +typedef enum ___itt_group_id +{ + __itt_group_none = 0, + __itt_group_legacy = 1<<0, + __itt_group_control = 1<<1, + __itt_group_thread = 1<<2, + __itt_group_mark = 1<<3, + __itt_group_sync = 1<<4, + __itt_group_fsync = 1<<5, + __itt_group_jit = 1<<6, + __itt_group_model = 1<<7, + __itt_group_splitter_min = 1<<7, + __itt_group_counter = 1<<8, + __itt_group_frame = 1<<9, + __itt_group_stitch = 1<<10, + __itt_group_heap = 1<<11, + __itt_group_splitter_max = 1<<12, + __itt_group_structure = 1<<12, + __itt_group_suppress = 1<<13, + __itt_group_all = -1 +} __itt_group_id; + +#pragma pack(push, 8) + +typedef struct ___itt_group_list +{ + __itt_group_id id; + const char* name; +} __itt_group_list; + +#pragma pack(pop) + +#define ITT_GROUP_LIST(varname) \ + static __itt_group_list varname[] = { \ + { __itt_group_all, "all" }, \ + { __itt_group_control, "control" }, \ + { __itt_group_thread, "thread" }, \ + { __itt_group_mark, "mark" }, \ + { __itt_group_sync, "sync" }, \ + { __itt_group_fsync, "fsync" }, \ + { __itt_group_jit, "jit" }, \ + { __itt_group_model, "model" }, \ + { __itt_group_counter, "counter" }, \ + { __itt_group_frame, "frame" }, \ + { __itt_group_stitch, "stitch" }, \ + { __itt_group_heap, "heap" }, \ + { __itt_group_structure, "structure" }, \ + { __itt_group_suppress, "suppress" }, \ + { __itt_group_none, NULL } \ + } + +#endif /* _ITTNOTIFY_TYPES_H_ */ diff --git a/lib/ExecutionEngine/IntelJITEvents/jitprofiling.c b/lib/ExecutionEngine/IntelJITEvents/jitprofiling.c new file mode 100644 index 0000000000..9b0dafbdca --- /dev/null +++ b/lib/ExecutionEngine/IntelJITEvents/jitprofiling.c @@ -0,0 +1,476 @@ +/*===-- jitprofiling.c - JIT (Just-In-Time) Profiling API----------*- C -*-===* + * + * The LLVM Compiler Infrastructure + * + * This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===* + * + * This file provides Intel(R) Performance Analyzer JIT (Just-In-Time) + * Profiling API implementation. + * + *===----------------------------------------------------------------------===*/ +#include "ittnotify_config.h" + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#include <windows.h> +#pragma optimize("", off) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#include <pthread.h> +#include <dlfcn.h> +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#include <malloc.h> +#include <stdlib.h> + +#include "jitprofiling.h" + +static const char rcsid[] = "\n@(#) $Revision: 243501 $\n"; + +#define DLL_ENVIRONMENT_VAR "VS_PROFILER" + +#ifndef NEW_DLL_ENVIRONMENT_VAR +#if ITT_ARCH==ITT_ARCH_IA32 +#define NEW_DLL_ENVIRONMENT_VAR "INTEL_JIT_PROFILER32" +#else +#define NEW_DLL_ENVIRONMENT_VAR "INTEL_JIT_PROFILER64" +#endif +#endif /* NEW_DLL_ENVIRONMENT_VAR */ + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define DEFAULT_DLLNAME "JitPI.dll" +HINSTANCE m_libHandle = NULL; +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define DEFAULT_DLLNAME "libJitPI.so" +void* m_libHandle = NULL; +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/* default location of JIT profiling agent on Android */ +#define ANDROID_JIT_AGENT_PATH "/data/intel/libittnotify.so" + +/* the function pointers */ +typedef unsigned int(*TPInitialize)(void); +static TPInitialize FUNC_Initialize=NULL; + +typedef unsigned int(*TPNotify)(unsigned int, void*); +static TPNotify FUNC_NotifyEvent=NULL; + +static iJIT_IsProfilingActiveFlags executionMode = iJIT_NOTHING_RUNNING; + +/* end collector dll part. */ + +/* loadiJIT_Funcs() : this function is called just in the beginning + * and is responsible to load the functions from BistroJavaCollector.dll + * result: + * on success: the functions loads, iJIT_DLL_is_missing=0, return value = 1 + * on failure: the functions are NULL, iJIT_DLL_is_missing=1, return value = 0 + */ +static int loadiJIT_Funcs(void); + +/* global representing whether the BistroJavaCollector can't be loaded */ +static int iJIT_DLL_is_missing = 0; + +/* Virtual stack - the struct is used as a virtual stack for each thread. + * Every thread initializes with a stack of size INIT_TOP_STACK. + * Every method entry decreases from the current stack point, + * and when a thread stack reaches its top of stack (return from the global + * function), the top of stack and the current stack increase. Notice that + * when returning from a function the stack pointer is the address of + * the function return. +*/ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +static DWORD threadLocalStorageHandle = 0; +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +static pthread_key_t threadLocalStorageHandle = (pthread_key_t)0; +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +#define INIT_TOP_Stack 10000 + +typedef struct +{ + unsigned int TopStack; + unsigned int CurrentStack; +} ThreadStack, *pThreadStack; + +/* end of virtual stack. */ + +/* + * The function for reporting virtual-machine related events to VTune. + * Note: when reporting iJVM_EVENT_TYPE_ENTER_NIDS, there is no need to fill + * in the stack_id field in the iJIT_Method_NIDS structure, as VTune fills it. + * The return value in iJVM_EVENT_TYPE_ENTER_NIDS && + * iJVM_EVENT_TYPE_LEAVE_NIDS events will be 0 in case of failure. + * in iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED event + * it will be -1 if EventSpecificData == 0 otherwise it will be 0. +*/ + +ITT_EXTERN_C int JITAPI +iJIT_NotifyEvent(iJIT_JVM_EVENT event_type, void *EventSpecificData) +{ + int ReturnValue; + + /* + * This section is for debugging outside of VTune. + * It creates the environment variables that indicates call graph mode. + * If running outside of VTune remove the remark. + * + * + * static int firstTime = 1; + * char DoCallGraph[12] = "DoCallGraph"; + * if (firstTime) + * { + * firstTime = 0; + * SetEnvironmentVariable( "BISTRO_COLLECTORS_DO_CALLGRAPH", DoCallGraph); + * } + * + * end of section. + */ + + /* initialization part - the functions have not been loaded yet. This part + * will load the functions, and check if we are in Call Graph mode. + * (for special treatment). + */ + if (!FUNC_NotifyEvent) + { + if (iJIT_DLL_is_missing) + return 0; + + /* load the Function from the DLL */ + if (!loadiJIT_Funcs()) + return 0; + + /* Call Graph initialization. */ + } + + /* If the event is method entry/exit, check that in the current mode + * VTune is allowed to receive it + */ + if ((event_type == iJVM_EVENT_TYPE_ENTER_NIDS || + event_type == iJVM_EVENT_TYPE_LEAVE_NIDS) && + (executionMode != iJIT_CALLGRAPH_ON)) + { + return 0; + } + /* This section is performed when method enter event occurs. + * It updates the virtual stack, or creates it if this is the first + * method entry in the thread. The stack pointer is decreased. + */ + if (event_type == iJVM_EVENT_TYPE_ENTER_NIDS) + { +#if ITT_PLATFORM==ITT_PLATFORM_WIN + pThreadStack threadStack = + (pThreadStack)TlsGetValue (threadLocalStorageHandle); +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + pThreadStack threadStack = + (pThreadStack)pthread_getspecific(threadLocalStorageHandle); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + + /* check for use of reserved method IDs */ + if ( ((piJIT_Method_NIDS) EventSpecificData)->method_id <= 999 ) + return 0; + + if (!threadStack) + { + /* initialize the stack. */ + threadStack = (pThreadStack) calloc (sizeof(ThreadStack), 1); + threadStack->TopStack = INIT_TOP_Stack; + threadStack->CurrentStack = INIT_TOP_Stack; +#if ITT_PLATFORM==ITT_PLATFORM_WIN + TlsSetValue(threadLocalStorageHandle,(void*)threadStack); +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + pthread_setspecific(threadLocalStorageHandle,(void*)threadStack); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + } + + /* decrease the stack. */ + ((piJIT_Method_NIDS) EventSpecificData)->stack_id = + (threadStack->CurrentStack)--; + } + + /* This section is performed when method leave event occurs + * It updates the virtual stack. + * Increases the stack pointer. + * If the stack pointer reached the top (left the global function) + * increase the pointer and the top pointer. + */ + if (event_type == iJVM_EVENT_TYPE_LEAVE_NIDS) + { +#if ITT_PLATFORM==ITT_PLATFORM_WIN + pThreadStack threadStack = + (pThreadStack)TlsGetValue (threadLocalStorageHandle); +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + pThreadStack threadStack = + (pThreadStack)pthread_getspecific(threadLocalStorageHandle); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + + /* check for use of reserved method IDs */ + if ( ((piJIT_Method_NIDS) EventSpecificData)->method_id <= 999 ) + return 0; + + if (!threadStack) + { + /* Error: first report in this thread is method exit */ + exit (1); + } + + ((piJIT_Method_NIDS) EventSpecificData)->stack_id = + ++(threadStack->CurrentStack) + 1; + + if (((piJIT_Method_NIDS) EventSpecificData)->stack_id + > threadStack->TopStack) + ((piJIT_Method_NIDS) EventSpecificData)->stack_id = + (unsigned int)-1; + } + + if (event_type == iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED) + { + /* check for use of reserved method IDs */ + if ( ((piJIT_Method_Load) EventSpecificData)->method_id <= 999 ) + return 0; + } + + ReturnValue = (int)FUNC_NotifyEvent(event_type, EventSpecificData); + + return ReturnValue; +} + +/* The new mode call back routine */ +ITT_EXTERN_C void JITAPI +iJIT_RegisterCallbackEx(void *userdata, iJIT_ModeChangedEx + NewModeCallBackFuncEx) +{ + /* is it already missing... or the load of functions from the DLL failed */ + if (iJIT_DLL_is_missing || !loadiJIT_Funcs()) + { + /* then do not bother with notifications */ + NewModeCallBackFuncEx(userdata, iJIT_NO_NOTIFICATIONS); + /* Error: could not load JIT functions. */ + return; + } + /* nothing to do with the callback */ +} + +/* + * This function allows the user to query in which mode, if at all, + *VTune is running + */ +ITT_EXTERN_C iJIT_IsProfilingActiveFlags JITAPI iJIT_IsProfilingActive() +{ + if (!iJIT_DLL_is_missing) + { + loadiJIT_Funcs(); + } + + return executionMode; +} + +/* this function loads the collector dll (BistroJavaCollector) + * and the relevant functions. + * on success: all functions load, iJIT_DLL_is_missing = 0, return value = 1 + * on failure: all functions are NULL, iJIT_DLL_is_missing = 1, return value = 0 + */ +static int loadiJIT_Funcs() +{ + static int bDllWasLoaded = 0; + char *dllName = (char*)rcsid; /* !! Just to avoid unused code elimination */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN + DWORD dNameLength = 0; +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + + if(bDllWasLoaded) + { + /* dll was already loaded, no need to do it for the second time */ + return 1; + } + + /* Assumes that the DLL will not be found */ + iJIT_DLL_is_missing = 1; + FUNC_NotifyEvent = NULL; + + if (m_libHandle) + { +#if ITT_PLATFORM==ITT_PLATFORM_WIN + FreeLibrary(m_libHandle); +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + dlclose(m_libHandle); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + m_libHandle = NULL; + } + + /* Try to get the dll name from the environment */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN + dNameLength = GetEnvironmentVariableA(NEW_DLL_ENVIRONMENT_VAR, NULL, 0); + if (dNameLength) + { + DWORD envret = 0; + dllName = (char*)malloc(sizeof(char) * (dNameLength + 1)); + envret = GetEnvironmentVariableA(NEW_DLL_ENVIRONMENT_VAR, + dllName, dNameLength); + if (envret) + { + /* Try to load the dll from the PATH... */ + m_libHandle = LoadLibraryExA(dllName, + NULL, LOAD_WITH_ALTERED_SEARCH_PATH); + } + free(dllName); + } else { + /* Try to use old VS_PROFILER variable */ + dNameLength = GetEnvironmentVariableA(DLL_ENVIRONMENT_VAR, NULL, 0); + if (dNameLength) + { + DWORD envret = 0; + dllName = (char*)malloc(sizeof(char) * (dNameLength + 1)); + envret = GetEnvironmentVariableA(DLL_ENVIRONMENT_VAR, + dllName, dNameLength); + if (envret) + { + /* Try to load the dll from the PATH... */ + m_libHandle = LoadLibraryA(dllName); + } + free(dllName); + } + } +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + dllName = getenv(NEW_DLL_ENVIRONMENT_VAR); + if (!dllName) + dllName = getenv(DLL_ENVIRONMENT_VAR); +#ifdef ANDROID + if (!dllName) + dllName = ANDROID_JIT_AGENT_PATH; +#endif + if (dllName) + { + /* Try to load the dll from the PATH... */ + m_libHandle = dlopen(dllName, RTLD_LAZY); + } +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + + if (!m_libHandle) + { +#if ITT_PLATFORM==ITT_PLATFORM_WIN + m_libHandle = LoadLibraryA(DEFAULT_DLLNAME); +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + m_libHandle = dlopen(DEFAULT_DLLNAME, RTLD_LAZY); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + } + + /* if the dll wasn't loaded - exit. */ + if (!m_libHandle) + { + iJIT_DLL_is_missing = 1; /* don't try to initialize + * JIT agent the second time + */ + return 0; + } + +#if ITT_PLATFORM==ITT_PLATFORM_WIN + FUNC_NotifyEvent = (TPNotify)GetProcAddress(m_libHandle, "NotifyEvent"); +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + FUNC_NotifyEvent = (TPNotify)dlsym(m_libHandle, "NotifyEvent"); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + if (!FUNC_NotifyEvent) + { + FUNC_Initialize = NULL; + return 0; + } + +#if ITT_PLATFORM==ITT_PLATFORM_WIN + FUNC_Initialize = (TPInitialize)GetProcAddress(m_libHandle, "Initialize"); +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + FUNC_Initialize = (TPInitialize)dlsym(m_libHandle, "Initialize"); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + if (!FUNC_Initialize) + { + FUNC_NotifyEvent = NULL; + return 0; + } + + executionMode = (iJIT_IsProfilingActiveFlags)FUNC_Initialize(); + + bDllWasLoaded = 1; + iJIT_DLL_is_missing = 0; /* DLL is ok. */ + + /* + * Call Graph mode: init the thread local storage + * (need to store the virtual stack there). + */ + if ( executionMode == iJIT_CALLGRAPH_ON ) + { + /* Allocate a thread local storage slot for the thread "stack" */ + if (!threadLocalStorageHandle) +#if ITT_PLATFORM==ITT_PLATFORM_WIN + threadLocalStorageHandle = TlsAlloc(); +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + pthread_key_create(&threadLocalStorageHandle, NULL); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + } + + return 1; +} + +/* + * This function should be called by the user whenever a thread ends, + * to free the thread "virtual stack" storage + */ +ITT_EXTERN_C void JITAPI FinalizeThread() +{ + if (threadLocalStorageHandle) + { +#if ITT_PLATFORM==ITT_PLATFORM_WIN + pThreadStack threadStack = + (pThreadStack)TlsGetValue (threadLocalStorageHandle); +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + pThreadStack threadStack = + (pThreadStack)pthread_getspecific(threadLocalStorageHandle); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + if (threadStack) + { + free (threadStack); + threadStack = NULL; +#if ITT_PLATFORM==ITT_PLATFORM_WIN + TlsSetValue (threadLocalStorageHandle, threadStack); +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + pthread_setspecific(threadLocalStorageHandle, threadStack); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + } + } +} + +/* + * This function should be called by the user when the process ends, + * to free the local storage index +*/ +ITT_EXTERN_C void JITAPI FinalizeProcess() +{ + if (m_libHandle) + { +#if ITT_PLATFORM==ITT_PLATFORM_WIN + FreeLibrary(m_libHandle); +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + dlclose(m_libHandle); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + m_libHandle = NULL; + } + + if (threadLocalStorageHandle) +#if ITT_PLATFORM==ITT_PLATFORM_WIN + TlsFree (threadLocalStorageHandle); +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + pthread_key_delete(threadLocalStorageHandle); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +} + +/* + * This function should be called by the user for any method once. + * The function will return a unique method ID, the user should maintain + * the ID for each method + */ +ITT_EXTERN_C unsigned int JITAPI iJIT_GetNewMethodID() +{ + static unsigned int methodID = 0x100000; + + if (methodID == 0) + return 0; /* ERROR : this is not a valid value */ + + return methodID++; +} diff --git a/lib/ExecutionEngine/IntelJITEvents/jitprofiling.h b/lib/ExecutionEngine/IntelJITEvents/jitprofiling.h new file mode 100644 index 0000000000..f33fb83ba9 --- /dev/null +++ b/lib/ExecutionEngine/IntelJITEvents/jitprofiling.h @@ -0,0 +1,254 @@ +/*===-- jitprofiling.h - JIT Profiling API-------------------------*- C -*-===* + * + * The LLVM Compiler Infrastructure + * + * This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===* + * + * This file provides Intel(R) Performance Analyzer JIT (Just-In-Time) + * Profiling API declaration. + * + *===----------------------------------------------------------------------===*/ +#ifndef __JITPROFILING_H__ +#define __JITPROFILING_H__ + +/* + * Various constants used by functions + */ + +/* event notification */ +typedef enum iJIT_jvm_event +{ + + /* shutdown */ + + /* + * Program exiting EventSpecificData NA + */ + iJVM_EVENT_TYPE_SHUTDOWN = 2, + + /* JIT profiling */ + + /* + * issued after method code jitted into memory but before code is executed + * EventSpecificData is an iJIT_Method_Load + */ + iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED=13, + + /* issued before unload. Method code will no longer be executed, but code + * and info are still in memory. The VTune profiler may capture method + * code only at this point EventSpecificData is iJIT_Method_Id + */ + iJVM_EVENT_TYPE_METHOD_UNLOAD_START, + + /* Method Profiling */ + + /* method name, Id and stack is supplied + * issued when a method is about to be entered EventSpecificData is + * iJIT_Method_NIDS + */ + iJVM_EVENT_TYPE_ENTER_NIDS = 19, + + /* method name, Id and stack is supplied + * issued when a method is about to be left EventSpecificData is + * iJIT_Method_NIDS + */ + iJVM_EVENT_TYPE_LEAVE_NIDS +} iJIT_JVM_EVENT; + +typedef enum _iJIT_ModeFlags +{ + /* No need to Notify VTune, since VTune is not running */ + iJIT_NO_NOTIFICATIONS = 0x0000, + + /* when turned on the jit must call + * iJIT_NotifyEvent + * ( + * iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, + * ) + * for all the method already jitted + */ + iJIT_BE_NOTIFY_ON_LOAD = 0x0001, + + /* when turned on the jit must call + * iJIT_NotifyEvent + * ( + * iJVM_EVENT_TYPE_METHOD_UNLOAD_FINISHED, + * ) for all the method that are unloaded + */ + iJIT_BE_NOTIFY_ON_UNLOAD = 0x0002, + + /* when turned on the jit must instrument all + * the currently jited code with calls on + * method entries + */ + iJIT_BE_NOTIFY_ON_METHOD_ENTRY = 0x0004, + + /* when turned on the jit must instrument all + * the currently jited code with calls + * on method exit + */ + iJIT_BE_NOTIFY_ON_METHOD_EXIT = 0x0008 + +} iJIT_ModeFlags; + + + /* Flags used by iJIT_IsProfilingActive() */ +typedef enum _iJIT_IsProfilingActiveFlags +{ + /* No profiler is running. Currently not used */ + iJIT_NOTHING_RUNNING = 0x0000, + + /* Sampling is running. This is the default value + * returned by iJIT_IsProfilingActive() + */ + iJIT_SAMPLING_ON = 0x0001, + + /* Call Graph is running */ + iJIT_CALLGRAPH_ON = 0x0002 + +} iJIT_IsProfilingActiveFlags; + +/* Enumerator for the environment of methods*/ +typedef enum _iJDEnvironmentType +{ + iJDE_JittingAPI = 2 +} iJDEnvironmentType; + +/********************************** + * Data structures for the events * + **********************************/ + +/* structure for the events: + * iJVM_EVENT_TYPE_METHOD_UNLOAD_START + */ + +typedef struct _iJIT_Method_Id +{ + /* Id of the method (same as the one passed in + * the iJIT_Method_Load struct + */ + unsigned int method_id; + +} *piJIT_Method_Id, iJIT_Method_Id; + + +/* structure for the events: + * iJVM_EVENT_TYPE_ENTER_NIDS, + * iJVM_EVENT_TYPE_LEAVE_NIDS, + * iJVM_EVENT_TYPE_EXCEPTION_OCCURRED_NIDS + */ + +typedef struct _iJIT_Method_NIDS +{ + /* unique method ID */ + unsigned int method_id; + + /* NOTE: no need to fill this field, it's filled by VTune */ + unsigned int stack_id; + + /* method name (just the method, without the class) */ + char* method_name; +} *piJIT_Method_NIDS, iJIT_Method_NIDS; + +/* structures for the events: + * iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED + */ + +typedef struct _LineNumberInfo +{ + /* x86 Offset from the begining of the method*/ + unsigned int Offset; + + /* source line number from the begining of the source file */ + unsigned int LineNumber; + +} *pLineNumberInfo, LineNumberInfo; + +typedef struct _iJIT_Method_Load +{ + /* unique method ID - can be any unique value, (except 0 - 999) */ + unsigned int method_id; + + /* method name (can be with or without the class and signature, in any case + * the class name will be added to it) + */ + char* method_name; + + /* virtual address of that method - This determines the method range for the + * iJVM_EVENT_TYPE_ENTER/LEAVE_METHOD_ADDR events + */ + void* method_load_address; + + /* Size in memory - Must be exact */ + unsigned int method_size; + + /* Line Table size in number of entries - Zero if none */ + unsigned int line_number_size; + + /* Pointer to the begining of the line numbers info array */ + pLineNumberInfo line_number_table; + + /* unique class ID */ + unsigned int class_id; + + /* class file name */ + char* class_file_name; + + /* source file name */ + char* source_file_name; + + /* bits supplied by the user for saving in the JIT file */ + void* user_data; + + /* the size of the user data buffer */ + unsigned int user_data_size; + + /* NOTE: no need to fill this field, it's filled by VTune */ + iJDEnvironmentType env; + +} *piJIT_Method_Load, iJIT_Method_Load; + +/* API Functions */ +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef CDECL +# if defined WIN32 || defined _WIN32 +# define CDECL __cdecl +# else /* defined WIN32 || defined _WIN32 */ +# if defined _M_X64 || defined _M_AMD64 || defined __x86_64__ +# define CDECL /* not actual on x86_64 platform */ +# else /* _M_X64 || _M_AMD64 || __x86_64__ */ +# define CDECL __attribute__ ((cdecl)) +# endif /* _M_X64 || _M_AMD64 || __x86_64__ */ +# endif /* defined WIN32 || defined _WIN32 */ +#endif /* CDECL */ + +#define JITAPI CDECL + +/* called when the settings are changed with new settings */ +typedef void (*iJIT_ModeChangedEx)(void *UserData, iJIT_ModeFlags Flags); + +int JITAPI iJIT_NotifyEvent(iJIT_JVM_EVENT event_type, void *EventSpecificData); + +/* The new mode call back routine */ +void JITAPI iJIT_RegisterCallbackEx(void *userdata, + iJIT_ModeChangedEx NewModeCallBackFuncEx); + +iJIT_IsProfilingActiveFlags JITAPI iJIT_IsProfilingActive(void); + +void JITAPI FinalizeThread(void); + +void JITAPI FinalizeProcess(void); + +unsigned int JITAPI iJIT_GetNewMethodID(void); + +#ifdef __cplusplus +} +#endif + +#endif /* __JITPROFILING_H__ */ diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp index 8fed48cef2..ffa79761f2 100644 --- a/lib/MC/MCExpr.cpp +++ b/lib/MC/MCExpr.cpp @@ -267,7 +267,7 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) { /* *** */ -void MCTargetExpr::Anchor() {} +void MCTargetExpr::anchor() {} /* *** */ diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index f143e6d0ad..d07a3c9e7f 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -1775,7 +1775,7 @@ APFloat::opStatus APFloat::roundToIntegral(roundingMode rounding_mode) { // If the exponent is large enough, we know that this value is already // integral, and the arithmetic below would potentially cause it to saturate // to +/-Inf. Bail out early instead. - if (exponent+1 >= (int)semanticsPrecision(*semantics)) + if (category == fcNormal && exponent+1 >= (int)semanticsPrecision(*semantics)) return opOK; // The algorithm here is quite simple: we add 2^(p-1), where p is the diff --git a/lib/Support/Errno.cpp b/lib/Support/Errno.cpp index dd218f6099..00be43b750 100644 --- a/lib/Support/Errno.cpp +++ b/lib/Support/Errno.cpp @@ -13,6 +13,7 @@ #include "llvm/Support/Errno.h" #include "llvm/Config/config.h" // Get autoconf configuration settings +#include "llvm/Support/raw_ostream.h" #if HAVE_STRING_H #include <string.h> @@ -39,7 +40,7 @@ std::string StrError(int errnum) { const int MaxErrStrLen = 2000; char buffer[MaxErrStrLen]; buffer[0] = '\0'; - char* str = buffer; + std::string str; #ifdef HAVE_STRERROR_R // strerror_r is thread-safe. if (errnum) @@ -49,6 +50,7 @@ std::string StrError(int errnum) { str = strerror_r(errnum,buffer,MaxErrStrLen-1); # else strerror_r(errnum,buffer,MaxErrStrLen-1); + str = buffer; # endif #elif HAVE_DECL_STRERROR_S // "Windows Secure API" if (errnum) @@ -58,12 +60,13 @@ std::string StrError(int errnum) { // the buffer as fast as possible to minimize impact // of collision of strerror in multiple threads. if (errnum) - strncpy(buffer,strerror(errnum),MaxErrStrLen-1); - buffer[MaxErrStrLen-1] = '\0'; + str = strerror(errnum); #else // Strange that this system doesn't even have strerror // but, oh well, just use a generic message - sprintf(buffer, "Error #%d", errnum); + raw_string_ostream stream(str); + stream << "Error #" << errnum; + stream.flush(); #endif return str; } diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp index a13b9e2f87..9ee3f2db92 100644 --- a/lib/Support/Host.cpp +++ b/lib/Support/Host.cpp @@ -234,6 +234,8 @@ std::string sys::getHostCPUName() { case 37: // Intel Core i7, laptop version. case 44: // Intel Core i7 processor and Intel Xeon processor. All // processors are manufactured using the 32 nm process. + case 46: // Nehalem EX + case 47: // Westmere EX return "corei7"; // SandyBridge: diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc index f70e60d3f5..b82371a7b6 100644 --- a/lib/Support/Unix/Path.inc +++ b/lib/Support/Unix/Path.inc @@ -267,7 +267,8 @@ Path::GetCurrentDirectory() { } #if defined(__FreeBSD__) || defined (__NetBSD__) || defined(__Bitrig__) || \ - defined(__OpenBSD__) || defined(__minix) || defined(__FreeBSD_kernel__) + defined(__OpenBSD__) || defined(__minix) || defined(__FreeBSD_kernel__) || \ + defined(__linux__) || defined(__CYGWIN__) static int test_dir(char buf[PATH_MAX], char ret[PATH_MAX], const char *dir, const char *bin) @@ -345,9 +346,17 @@ Path Path::GetMainExecutable(const char *argv0, void *MainAddr) { return Path(exe_path); #elif defined(__linux__) || defined(__CYGWIN__) char exe_path[MAXPATHLEN]; - ssize_t len = readlink("/proc/self/exe", exe_path, sizeof(exe_path)); - if (len >= 0) - return Path(StringRef(exe_path, len)); + StringRef aPath("/proc/self/exe"); + if (sys::fs::exists(aPath)) { + // /proc is not always mounted under Linux (chroot for example). + ssize_t len = readlink(aPath.str().c_str(), exe_path, sizeof(exe_path)); + if (len >= 0) + return Path(StringRef(exe_path, len)); + } else { + // Fall back to the classical detection. + if (getprogpath(exe_path, argv0) != NULL) + return Path(exe_path); + } #elif defined(HAVE_DLFCN_H) // Use dladdr to get executable path if available. Dl_info DLInfo; diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc index e05e81acaf..6d874ea0d0 100644 --- a/lib/Support/Unix/Signals.inc +++ b/lib/Support/Unix/Signals.inc @@ -249,7 +249,7 @@ void llvm::sys::AddSignalHandler(void (*FnPtr)(void *), void *Cookie) { // On glibc systems we have the 'backtrace' function, which works nicely, but // doesn't demangle symbols. static void PrintStackTrace(void *) { -#if defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACE) +#if defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACES) static void* StackTrace[256]; // Use backtrace() to output a backtrace on Linux systems with glibc. int depth = backtrace(StackTrace, diff --git a/lib/Support/Windows/PathV2.inc b/lib/Support/Windows/PathV2.inc index 696768ba9d..3dfac66b77 100644 --- a/lib/Support/Windows/PathV2.inc +++ b/lib/Support/Windows/PathV2.inc @@ -794,7 +794,7 @@ mapped_file_region::mapped_file_region(const Twine &path, SmallVector<wchar_t, 128> path_utf16; // Convert path to UTF-16. - if (ec = UTF8ToUTF16(path.toStringRef(path_storage), path_utf16)) + if ((ec = UTF8ToUTF16(path.toStringRef(path_storage), path_utf16))) return; // Get file handle for creating a file mapping. diff --git a/lib/Support/YAMLParser.cpp b/lib/Support/YAMLParser.cpp index 7c353c89bb..34df636a72 100644 --- a/lib/Support/YAMLParser.cpp +++ b/lib/Support/YAMLParser.cpp @@ -903,6 +903,7 @@ bool Scanner::consume(uint32_t Expected) { void Scanner::skip(uint32_t Distance) { Current += Distance; Column += Distance; + assert(Current <= End && "Skipped past the end"); } bool Scanner::isBlankOrBreak(StringRef::iterator Position) { @@ -1239,6 +1240,12 @@ bool Scanner::scanFlowScalar(bool IsDoubleQuoted) { } } } + + if (Current == End) { + setError("Expected quote at end of scalar", Current); + return false; + } + skip(1); // Skip ending quote. Token T; T.Kind = Token::TK_Scalar; diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index 9a8cab8ecc..0ac92f1ee8 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -40,6 +40,7 @@ FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM, FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false); FunctionPass *createARMExpandPseudoPass(); +FunctionPass *createARMGlobalBaseRegPass(); FunctionPass *createARMGlobalMergePass(const TargetLowering* tli); FunctionPass *createARMConstantIslandPass(); FunctionPass *createMLxExpansionPass(); diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 38509a3400..00bf1b85ec 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -32,9 +32,6 @@ def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFPv2", "true", def FeatureVFP3 : SubtargetFeature<"vfp3", "HasVFPv3", "true", "Enable VFP3 instructions", [FeatureVFP2]>; -def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true", - "Enable VFP4 instructions", - [FeatureVFP3]>; def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", "Enable NEON instructions", [FeatureVFP3]>; @@ -44,10 +41,16 @@ def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true", "Does not support ARM mode execution">; def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true", "Enable half-precision floating point">; +def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true", + "Enable VFP4 instructions", + [FeatureVFP3, FeatureFP16]>; def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true", "Restrict VFP3 to 16 double registers">; def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true", "Enable divide instructions">; +def FeatureHWDivARM : SubtargetFeature<"hwdiv-arm", + "HasHardwareDivideInARM", "true", + "Enable divide instructions in ARM mode">; def FeatureT2XtPk : SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true", "Enable Thumb2 extract and pack instructions">; def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true", @@ -139,6 +142,13 @@ def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9", [FeatureVMLxForwarding, FeatureT2XtPk, FeatureFP16, FeatureAvoidPartialCPSR]>; +def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift", + "Swift ARM processors", + [FeatureNEONForFP, FeatureT2XtPk, + FeatureVFP4, FeatureMP, FeatureHWDiv, + FeatureHWDivARM, FeatureAvoidPartialCPSR, + FeatureHasSlowFPVMLx]>; + // FIXME: It has not been determined if A15 has these features. def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15", "Cortex-A15 ARM processors", @@ -240,6 +250,12 @@ def : ProcNoItin<"cortex-m4", [HasV7Ops, FeatureT2XtPk, FeatureVFP4, FeatureVFPOnlySP, FeatureMClass]>; +// Swift uArch Processors. +def : ProcessorModel<"swift", SwiftModel, + [ProcSwift, HasV7Ops, FeatureNEON, + FeatureDB, FeatureDSPThumb2, + FeatureHasRAS]>; + //===----------------------------------------------------------------------===// // Register File Description //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index c08294918e..42b6bc3cdc 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -49,6 +49,11 @@ static cl::opt<bool> WidenVMOVS("widen-vmovs", cl::Hidden, cl::init(true), cl::desc("Widen ARM vmovs to vmovd when possible")); +static cl::opt<unsigned> +SwiftPartialUpdateClearance("swift-partial-update-clearance", + cl::Hidden, cl::init(12), + cl::desc("Clearance before partial register updates")); + /// ARM_MLxEntry - Record information about MLA / MLS instructions. struct ARM_MLxEntry { uint16_t MLxOpc; // MLA / MLS opcode @@ -1389,7 +1394,6 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, case ARM::VLDRD: case ARM::VLDRS: case ARM::t2LDRi8: - case ARM::t2LDRDi8: case ARM::t2LDRSHi8: case ARM::t2LDRi12: case ARM::t2LDRSHi12: @@ -1528,6 +1532,14 @@ isProfitableToIfCvt(MachineBasicBlock &TMBB, return (TCycles + FCycles + TExtra + FExtra) <= UnpredCost; } +bool +ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB, + MachineBasicBlock &FMBB) const { + // Reduce false anti-dependencies to let Swift's out-of-order execution + // engine do its thing. + return Subtarget.isSwift(); +} + /// getInstrPredicate - If instruction is predicated, returns its predicate /// condition, otherwise returns AL. It also returns the condition code /// register by reference. @@ -2344,6 +2356,229 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI, return true; } +static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, + const MachineInstr *MI) { + switch (MI->getOpcode()) { + default: { + const MCInstrDesc &Desc = MI->getDesc(); + int UOps = ItinData->getNumMicroOps(Desc.getSchedClass()); + assert(UOps >= 0 && "bad # UOps"); + return UOps; + } + + case ARM::LDRrs: + case ARM::LDRBrs: + case ARM::STRrs: + case ARM::STRBrs: { + unsigned ShOpVal = MI->getOperand(3).getImm(); + bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; + unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); + if (!isSub && + (ShImm == 0 || + ((ShImm == 1 || ShImm == 2 || ShImm == 3) && + ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) + return 1; + return 2; + } + + case ARM::LDRH: + case ARM::STRH: { + if (!MI->getOperand(2).getReg()) + return 1; + + unsigned ShOpVal = MI->getOperand(3).getImm(); + bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; + unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); + if (!isSub && + (ShImm == 0 || + ((ShImm == 1 || ShImm == 2 || ShImm == 3) && + ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) + return 1; + return 2; + } + + case ARM::LDRSB: + case ARM::LDRSH: + return (ARM_AM::getAM3Op(MI->getOperand(3).getImm()) == ARM_AM::sub) ? 3:2; + + case ARM::LDRSB_POST: + case ARM::LDRSH_POST: { + unsigned Rt = MI->getOperand(0).getReg(); + unsigned Rm = MI->getOperand(3).getReg(); + return (Rt == Rm) ? 4 : 3; + } + + case ARM::LDR_PRE_REG: + case ARM::LDRB_PRE_REG: { + unsigned Rt = MI->getOperand(0).getReg(); + unsigned Rm = MI->getOperand(3).getReg(); + if (Rt == Rm) + return 3; + unsigned ShOpVal = MI->getOperand(4).getImm(); + bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; + unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); + if (!isSub && + (ShImm == 0 || + ((ShImm == 1 || ShImm == 2 || ShImm == 3) && + ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) + return 2; + return 3; + } + + case ARM::STR_PRE_REG: + case ARM::STRB_PRE_REG: { + unsigned ShOpVal = MI->getOperand(4).getImm(); + bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; + unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); + if (!isSub && + (ShImm == 0 || + ((ShImm == 1 || ShImm == 2 || ShImm == 3) && + ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) + return 2; + return 3; + } + + case ARM::LDRH_PRE: + case ARM::STRH_PRE: { + unsigned Rt = MI->getOperand(0).getReg(); + unsigned Rm = MI->getOperand(3).getReg(); + if (!Rm) + return 2; + if (Rt == Rm) + return 3; + return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) + ? 3 : 2; + } + + case ARM::LDR_POST_REG: + case ARM::LDRB_POST_REG: + case ARM::LDRH_POST: { + unsigned Rt = MI->getOperand(0).getReg(); + unsigned Rm = MI->getOperand(3).getReg(); + return (Rt == Rm) ? 3 : 2; + } + + case ARM::LDR_PRE_IMM: + case ARM::LDRB_PRE_IMM: + case ARM::LDR_POST_IMM: + case ARM::LDRB_POST_IMM: + case ARM::STRB_POST_IMM: + case ARM::STRB_POST_REG: + case ARM::STRB_PRE_IMM: + case ARM::STRH_POST: + case ARM::STR_POST_IMM: + case ARM::STR_POST_REG: + case ARM::STR_PRE_IMM: + return 2; + + case ARM::LDRSB_PRE: + case ARM::LDRSH_PRE: { + unsigned Rm = MI->getOperand(3).getReg(); + if (Rm == 0) + return 3; + unsigned Rt = MI->getOperand(0).getReg(); + if (Rt == Rm) + return 4; + unsigned ShOpVal = MI->getOperand(4).getImm(); + bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; + unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); + if (!isSub && + (ShImm == 0 || + ((ShImm == 1 || ShImm == 2 || ShImm == 3) && + ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) + return 3; + return 4; + } + + case ARM::LDRD: { + unsigned Rt = MI->getOperand(0).getReg(); + unsigned Rn = MI->getOperand(2).getReg(); + unsigned Rm = MI->getOperand(3).getReg(); + if (Rm) + return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) ?4:3; + return (Rt == Rn) ? 3 : 2; + } + + case ARM::STRD: { + unsigned Rm = MI->getOperand(3).getReg(); + if (Rm) + return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) ?4:3; + return 2; + } + + case ARM::LDRD_POST: + case ARM::t2LDRD_POST: + return 3; + + case ARM::STRD_POST: + case ARM::t2STRD_POST: + return 4; + + case ARM::LDRD_PRE: { + unsigned Rt = MI->getOperand(0).getReg(); + unsigned Rn = MI->getOperand(3).getReg(); + unsigned Rm = MI->getOperand(4).getReg(); + if (Rm) + return (ARM_AM::getAM3Op(MI->getOperand(5).getImm()) == ARM_AM::sub) ?5:4; + return (Rt == Rn) ? 4 : 3; + } + + case ARM::t2LDRD_PRE: { + unsigned Rt = MI->getOperand(0).getReg(); + unsigned Rn = MI->getOperand(3).getReg(); + return (Rt == Rn) ? 4 : 3; + } + + case ARM::STRD_PRE: { + unsigned Rm = MI->getOperand(4).getReg(); + if (Rm) + return (ARM_AM::getAM3Op(MI->getOperand(5).getImm()) == ARM_AM::sub) ?5:4; + return 3; + } + + case ARM::t2STRD_PRE: + return 3; + + case ARM::t2LDR_POST: + case ARM::t2LDRB_POST: + case ARM::t2LDRB_PRE: + case ARM::t2LDRSBi12: + case ARM::t2LDRSBi8: + case ARM::t2LDRSBpci: + case ARM::t2LDRSBs: + case ARM::t2LDRH_POST: + case ARM::t2LDRH_PRE: + case ARM::t2LDRSBT: + case ARM::t2LDRSB_POST: + case ARM::t2LDRSB_PRE: + case ARM::t2LDRSH_POST: + case ARM::t2LDRSH_PRE: + case ARM::t2LDRSHi12: + case ARM::t2LDRSHi8: + case ARM::t2LDRSHpci: + case ARM::t2LDRSHs: + return 2; + + case ARM::t2LDRDi8: { + unsigned Rt = MI->getOperand(0).getReg(); + unsigned Rn = MI->getOperand(2).getReg(); + return (Rt == Rn) ? 3 : 2; + } + + case ARM::t2STRB_POST: + case ARM::t2STRB_PRE: + case ARM::t2STRBs: + case ARM::t2STRDi8: + case ARM::t2STRH_POST: + case ARM::t2STRH_PRE: + case ARM::t2STRHs: + case ARM::t2STR_POST: + case ARM::t2STR_PRE: + case ARM::t2STRs: + return 2; + } +} + // Return the number of 32-bit words loaded by LDM or stored by STM. If this // can't be easily determined return 0 (missing MachineMemOperand). // @@ -2384,8 +2619,12 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, const MCInstrDesc &Desc = MI->getDesc(); unsigned Class = Desc.getSchedClass(); int ItinUOps = ItinData->getNumMicroOps(Class); - if (ItinUOps >= 0) + if (ItinUOps >= 0) { + if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore())) + return getNumMicroOpsSwiftLdSt(ItinData, MI); + return ItinUOps; + } unsigned Opc = MI->getOpcode(); switch (Opc) { @@ -2454,7 +2693,43 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, case ARM::t2STMIA_UPD: case ARM::t2STMDB_UPD: { unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1; - if (Subtarget.isCortexA8()) { + if (Subtarget.isSwift()) { + // rdar://8402126 + int UOps = 1 + NumRegs; // One for address computation, one for each ld / st. + switch (Opc) { + default: break; + case ARM::VLDMDIA_UPD: + case ARM::VLDMDDB_UPD: + case ARM::VLDMSIA_UPD: + case ARM::VLDMSDB_UPD: + case ARM::VSTMDIA_UPD: + case ARM::VSTMDDB_UPD: + case ARM::VSTMSIA_UPD: + case ARM::VSTMSDB_UPD: + case ARM::LDMIA_UPD: + case ARM::LDMDA_UPD: + case ARM::LDMDB_UPD: + case ARM::LDMIB_UPD: + case ARM::STMIA_UPD: + case ARM::STMDA_UPD: + case ARM::STMDB_UPD: + case ARM::STMIB_UPD: + case ARM::tLDMIA_UPD: + case ARM::tSTMIA_UPD: + case ARM::t2LDMIA_UPD: + case ARM::t2LDMDB_UPD: + case ARM::t2STMIA_UPD: + case ARM::t2STMDB_UPD: + ++UOps; // One for base register writeback. + break; + case ARM::LDMIA_RET: + case ARM::tPOP_RET: + case ARM::t2LDMIA_RET: + UOps += 2; // One for base reg wb, one for write to pc. + break; + } + return UOps; + } else if (Subtarget.isCortexA8()) { if (NumRegs < 4) return 2; // 4 registers would be issued: 2, 2. @@ -2463,7 +2738,7 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, if (NumRegs % 2) ++A8UOps; return A8UOps; - } else if (Subtarget.isLikeA9()) { + } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { int A9UOps = (NumRegs / 2); // If there are odd number of registers or if it's not 64-bit aligned, // then it takes an extra AGU (Address Generation Unit) cycle. @@ -2496,7 +2771,7 @@ ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData, DefCycle = RegNo / 2 + 1; if (RegNo % 2) ++DefCycle; - } else if (Subtarget.isLikeA9()) { + } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { DefCycle = RegNo; bool isSLoad = false; @@ -2540,7 +2815,7 @@ ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData, DefCycle = 1; // Result latency is issue cycle + 2: E2. DefCycle += 2; - } else if (Subtarget.isLikeA9()) { + } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { DefCycle = (RegNo / 2); // If there are odd number of registers or if it's not 64-bit aligned, // then it takes an extra AGU (Address Generation Unit) cycle. @@ -2571,7 +2846,7 @@ ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData, UseCycle = RegNo / 2 + 1; if (RegNo % 2) ++UseCycle; - } else if (Subtarget.isLikeA9()) { + } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { UseCycle = RegNo; bool isSStore = false; @@ -2612,7 +2887,7 @@ ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData, UseCycle = 2; // Read in E3. UseCycle += 2; - } else if (Subtarget.isLikeA9()) { + } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { UseCycle = (RegNo / 2); // If there are odd number of registers or if it's not 64-bit aligned, // then it takes an extra AGU (Address Generation Unit) cycle. @@ -2822,6 +3097,37 @@ static int adjustDefLatency(const ARMSubtarget &Subtarget, break; } } + } else if (Subtarget.isSwift()) { + // FIXME: Properly handle all of the latency adjustments for address + // writeback. + switch (DefMCID->getOpcode()) { + default: break; + case ARM::LDRrs: + case ARM::LDRBrs: { + unsigned ShOpVal = DefMI->getOperand(3).getImm(); + bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; + unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); + if (!isSub && + (ShImm == 0 || + ((ShImm == 1 || ShImm == 2 || ShImm == 3) && + ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) + Adjust -= 2; + else if (!isSub && + ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr) + --Adjust; + break; + } + case ARM::t2LDRs: + case ARM::t2LDRBs: + case ARM::t2LDRHs: + case ARM::t2LDRSHs: { + // Thumb2 mode: lsl only. + unsigned ShAmt = DefMI->getOperand(3).getImm(); + if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3) + Adjust -= 2; + break; + } + } } if (DefAlign < 8 && Subtarget.isLikeA9()) { @@ -2998,7 +3304,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, // instructions). if (Latency > 0 && Subtarget.isThumb2()) { const MachineFunction *MF = DefMI->getParent()->getParent(); - if (MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize)) + if (MF->getFunction()->getFnAttributes().hasOptimizeForSizeAttr()) --Latency; } return Latency; @@ -3048,7 +3354,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, if (!UseNode->isMachineOpcode()) { int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx); - if (Subtarget.isLikeA9()) + if (Subtarget.isLikeA9() || Subtarget.isSwift()) return Latency <= 2 ? 1 : Latency - 1; else return Latency <= 3 ? 1 : Latency - 2; @@ -3092,6 +3398,33 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, break; } } + } else if (DefIdx == 0 && Latency > 2 && Subtarget.isSwift()) { + // FIXME: Properly handle all of the latency adjustments for address + // writeback. + switch (DefMCID.getOpcode()) { + default: break; + case ARM::LDRrs: + case ARM::LDRBrs: { + unsigned ShOpVal = + cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); + unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); + if (ShImm == 0 || + ((ShImm == 1 || ShImm == 2 || ShImm == 3) && + ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)) + Latency -= 2; + else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr) + --Latency; + break; + } + case ARM::t2LDRs: + case ARM::t2LDRBs: + case ARM::t2LDRHs: + case ARM::t2LDRSHs: { + // Thumb2 mode: lsl 0-3 only. + Latency -= 2; + break; + } + } } if (DefAlign < 8 && Subtarget.isLikeA9()) @@ -3660,6 +3993,122 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { } +//===----------------------------------------------------------------------===// +// Partial register updates +//===----------------------------------------------------------------------===// +// +// Swift renames NEON registers with 64-bit granularity. That means any +// instruction writing an S-reg implicitly reads the containing D-reg. The +// problem is mostly avoided by translating f32 operations to v2f32 operations +// on D-registers, but f32 loads are still a problem. +// +// These instructions can load an f32 into a NEON register: +// +// VLDRS - Only writes S, partial D update. +// VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops. +// VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops. +// +// FCONSTD can be used as a dependency-breaking instruction. + + +unsigned ARMBaseInstrInfo:: +getPartialRegUpdateClearance(const MachineInstr *MI, + unsigned OpNum, + const TargetRegisterInfo *TRI) const { + // Only Swift has partial register update problems. + if (!SwiftPartialUpdateClearance || !Subtarget.isSwift()) + return 0; + + assert(TRI && "Need TRI instance"); + + const MachineOperand &MO = MI->getOperand(OpNum); + if (MO.readsReg()) + return 0; + unsigned Reg = MO.getReg(); + int UseOp = -1; + + switch(MI->getOpcode()) { + // Normal instructions writing only an S-register. + case ARM::VLDRS: + case ARM::FCONSTS: + case ARM::VMOVSR: + // rdar://problem/8791586 + case ARM::VMOVv8i8: + case ARM::VMOVv4i16: + case ARM::VMOVv2i32: + case ARM::VMOVv2f32: + case ARM::VMOVv1i64: + UseOp = MI->findRegisterUseOperandIdx(Reg, false, TRI); + break; + + // Explicitly reads the dependency. + case ARM::VLD1LNd32: + UseOp = 1; + break; + default: + return 0; + } + + // If this instruction actually reads a value from Reg, there is no unwanted + // dependency. + if (UseOp != -1 && MI->getOperand(UseOp).readsReg()) + return 0; + + // We must be able to clobber the whole D-reg. + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + // Virtual register must be a foo:ssub_0<def,undef> operand. + if (!MO.getSubReg() || MI->readsVirtualRegister(Reg)) + return 0; + } else if (ARM::SPRRegClass.contains(Reg)) { + // Physical register: MI must define the full D-reg. + unsigned DReg = TRI->getMatchingSuperReg(Reg, ARM::ssub_0, + &ARM::DPRRegClass); + if (!DReg || !MI->definesRegister(DReg, TRI)) + return 0; + } + + // MI has an unwanted D-register dependency. + // Avoid defs in the previous N instructrions. + return SwiftPartialUpdateClearance; +} + +// Break a partial register dependency after getPartialRegUpdateClearance +// returned non-zero. +void ARMBaseInstrInfo:: +breakPartialRegDependency(MachineBasicBlock::iterator MI, + unsigned OpNum, + const TargetRegisterInfo *TRI) const { + assert(MI && OpNum < MI->getDesc().getNumDefs() && "OpNum is not a def"); + assert(TRI && "Need TRI instance"); + + const MachineOperand &MO = MI->getOperand(OpNum); + unsigned Reg = MO.getReg(); + assert(TargetRegisterInfo::isPhysicalRegister(Reg) && + "Can't break virtual register dependencies."); + unsigned DReg = Reg; + + // If MI defines an S-reg, find the corresponding D super-register. + if (ARM::SPRRegClass.contains(Reg)) { + DReg = ARM::D0 + (Reg - ARM::S0) / 2; + assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken"); + } + + assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps"); + assert(MI->definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg"); + + // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines + // the full D-register by loading the same value to both lanes. The + // instruction is micro-coded with 2 uops, so don't do this until we can + // properly schedule micro-coded instuctions. The dispatcher stalls cause + // too big regressions. + + // Insert the dependency-breaking FCONSTD before MI. + // 96 is the encoding of 0.5, but the actual value doesn't matter here. + AddDefaultPred(BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + get(ARM::FCONSTD), DReg).addImm(96)); + MI->addRegisterKilled(DReg, TRI, true); +} + bool ARMBaseInstrInfo::hasNOP() const { return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0; } diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 304ccc087c..8f4f47b34f 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -182,10 +182,13 @@ public: virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, const BranchProbability - &Probability) const { + &Probability) const { return NumCycles == 1; } + virtual bool isProfitableToUnpredicate(MachineBasicBlock &TMBB, + MachineBasicBlock &FMBB) const; + /// analyzeCompare - For a comparison instruction, return the source registers /// in SrcReg and SrcReg2 if having two register operands, and the value it /// compares against in CmpValue. Return true if the comparison instruction @@ -235,6 +238,10 @@ public: getExecutionDomain(const MachineInstr *MI) const; void setExecutionDomain(MachineInstr *MI, unsigned Domain) const; + unsigned getPartialRegUpdateClearance(const MachineInstr*, unsigned, + const TargetRegisterInfo*) const; + void breakPartialRegDependency(MachineBasicBlock::iterator, unsigned, + const TargetRegisterInfo *TRI) const; /// Get the number of addresses by LDM or VLDM or zero for unknown. unsigned getNumLDMAddresses(const MachineInstr *MI) const; diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 277dd57ef2..1cba45c3a5 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -566,7 +566,7 @@ needsStackRealignment(const MachineFunction &MF) const { const Function *F = MF.getFunction(); unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment(); bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) || - F->hasFnAttr(Attribute::StackAlignment)); + F->getFnAttributes().hasStackAlignmentAttr()); return requiresRealignment && canRealignStack(MF); } diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index d6ef3f333b..6b49e37e87 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -194,6 +194,7 @@ class ARMFastISel : public FastISel { unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg); unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg); unsigned ARMSelectCallOp(bool UseReg); + unsigned ARMLowerPICELF(const GlobalValue *GV, unsigned Align, EVT VT); // Call handling routines. private: @@ -648,6 +649,9 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) { Align = TD.getTypeAllocSize(GV->getType()); } + if (Subtarget->isTargetELF() && RelocM == Reloc::PIC_) + return ARMLowerPICELF(GV, Align, VT); + // Grab index. unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb() ? 4 : 8); @@ -2801,6 +2805,47 @@ bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo, return true; } +unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV, + unsigned Align, EVT VT) { + bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); + ARMConstantPoolConstant *CPV = + ARMConstantPoolConstant::Create(GV, UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT); + unsigned Idx = MCP.getConstantPoolIndex(CPV, Align); + + unsigned Opc; + unsigned DestReg1 = createResultReg(TLI.getRegClassFor(VT)); + // Load value. + if (isThumb2) { + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(ARM::t2LDRpci), DestReg1) + .addConstantPoolIndex(Idx)); + Opc = UseGOTOFF ? ARM::t2ADDrr : ARM::t2LDRs; + } else { + // The extra immediate is for addrmode2. + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, + DL, TII.get(ARM::LDRcp), DestReg1) + .addConstantPoolIndex(Idx).addImm(0)); + Opc = UseGOTOFF ? ARM::ADDrr : ARM::LDRrs; + } + + unsigned GlobalBaseReg = AFI->getGlobalBaseReg(); + if (GlobalBaseReg == 0) { + GlobalBaseReg = MRI.createVirtualRegister(TLI.getRegClassFor(VT)); + AFI->setGlobalBaseReg(GlobalBaseReg); + } + + unsigned DestReg2 = createResultReg(TLI.getRegClassFor(VT)); + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, + DL, TII.get(Opc), DestReg2) + .addReg(DestReg1) + .addReg(GlobalBaseReg); + if (!UseGOTOFF) + MIB.addImm(0); + AddOptionalDefs(MIB); + + return DestReg2; +} + namespace llvm { FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) { diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 2cedf3172c..52374ec4c1 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -1233,7 +1233,7 @@ static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) { return; // Naked functions don't spill callee-saved registers. - if (MF.getFunction()->hasFnAttr(Attribute::Naked)) + if (MF.getFunction()->getFnAttributes().hasNakedAttr()) return; // We are planning to use NEON instructions vst1 / vld1. diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index a44e2a220a..90ae94b3b2 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -347,7 +347,9 @@ bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { if (!CheckVMLxHazard) return true; - if (!Subtarget->isCortexA8() && !Subtarget->isLikeA9()) + + if (!Subtarget->isCortexA8() && !Subtarget->isLikeA9() && + !Subtarget->isSwift()) return true; if (!N->hasOneUse()) @@ -385,12 +387,13 @@ bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift, ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt) { - if (!Subtarget->isLikeA9()) + if (!Subtarget->isLikeA9() && !Subtarget->isSwift()) return true; if (Shift.hasOneUse()) return true; // R << 2 is free. - return ShOpcVal == ARM_AM::lsl && ShAmt == 2; + return ShOpcVal == ARM_AM::lsl && + (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1)); } bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N, @@ -518,7 +521,7 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, return false; // @LOCALMOD-END if (N.getOpcode() == ISD::MUL && - (!Subtarget->isLikeA9() || N.hasOneUse())) { + ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) { if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { // X * [3,5,9] -> X + X * [2,4,8] etc. int RHSC = (int)RHS->getZExtValue(); @@ -582,7 +585,8 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, // Try matching (R shl C) + (R). if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift && - !(Subtarget->isLikeA9() || N.getOperand(0).hasOneUse())) { + !(Subtarget->isLikeA9() || Subtarget->isSwift() || + N.getOperand(0).hasOneUse())) { ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode()); if (ShOpcVal != ARM_AM::no_shift) { // Check to see if the RHS of the shift is a constant, if not, we can't @@ -630,7 +634,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDNode *Op, // @LOCALMOD-END if (N.getOpcode() == ISD::MUL && - (!Subtarget->isLikeA9() || N.hasOneUse())) { + (!(Subtarget->isLikeA9() || Subtarget->isSwift()) || N.hasOneUse())) { if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { // X * [3,5,9] -> X + X * [2,4,8] etc. int RHSC = (int)RHS->getZExtValue(); @@ -697,7 +701,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDNode *Op, } } - if (Subtarget->isLikeA9() && !N.hasOneUse()) { + if ((Subtarget->isLikeA9() || Subtarget->isSwift()) && !N.hasOneUse()) { // Compute R +/- (R << N) and reuse it. Base = N; Offset = CurDAG->getRegister(0, MVT::i32); @@ -753,7 +757,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDNode *Op, // Try matching (R shl C) + (R). if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift && - !(Subtarget->isLikeA9() || N.getOperand(0).hasOneUse())) { + !(Subtarget->isLikeA9() || Subtarget->isSwift() || + N.getOperand(0).hasOneUse())) { ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode()); if (ShOpcVal != ARM_AM::no_shift) { // Check to see if the RHS of the shift is a constant, if not, we can't diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 2e7588b29f..556dacffcc 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -645,9 +645,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) if (!Subtarget->hasV6Ops()) setOperationAction(ISD::BSWAP, MVT::i32, Expand); - // These are expanded into libcalls. - if (!Subtarget->hasDivide() || !Subtarget->isThumb2()) { - // v7M has a hardware divider + if (!(Subtarget->hasDivide() && Subtarget->isThumb2()) && + !(Subtarget->hasDivideInARMMode() && !Subtarget->isThumb())) { + // These are expanded into libcalls if the cpu doesn't have HW divider. setOperationAction(ISD::SDIV, MVT::i32, Expand); setOperationAction(ISD::UDIV, MVT::i32, Expand); } @@ -5873,7 +5873,7 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, // ldrex dest, ptr // (sign extend dest, if required) // cmp dest, incr - // cmov.cond scratch2, dest, incr + // cmov.cond scratch2, incr, dest // strex scratch, scratch2, ptr // cmp scratch, #0 // bne- loopMBB @@ -5896,7 +5896,7 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) .addReg(oldval).addReg(incr)); BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr), scratch2) - .addReg(oldval).addReg(incr).addImm(Cond).addReg(ARM::CPSR); + .addReg(incr).addReg(oldval).addImm(Cond).addReg(ARM::CPSR); MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr); if (strOpc == ARM::t2STREX) @@ -6605,7 +6605,7 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { UnitSize = 2; } else { // Check whether we can use NEON instructions. - if (!MF->getFunction()->hasFnAttr(Attribute::NoImplicitFloat) && + if (!MF->getFunction()->getFnAttributes().hasNoImplicitFloatAttr() && Subtarget->hasNEON()) { if ((Align % 16 == 0) && SizeVal >= 16) { ldrOpc = ARM::VLD1q32wb_fixed; @@ -9343,7 +9343,7 @@ EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size, // See if we can use NEON instructions for this... if (IsZeroVal && - !F->hasFnAttr(Attribute::NoImplicitFloat) && + !F->getFnAttributes().hasNoImplicitFloatAttr() && Subtarget->hasNEON()) { if (memOpAlign(SrcAlign, DstAlign, 16) && Size >= 16) { return MVT::v4i32; diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index c8966fb97a..67a6820932 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -846,6 +846,23 @@ class AMiscA1I<bits<8> opcod, bits<4> opc7_4, dag oops, dag iops, let Inst{3-0} = Rm; } +// Division instructions. +class ADivA1I<bits<3> opcod, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, list<dag> pattern> + : I<oops, iops, AddrModeNone, 4, IndexModeNone, ArithMiscFrm, itin, + opc, asm, "", pattern> { + bits<4> Rd; + bits<4> Rn; + bits<4> Rm; + let Inst{27-23} = 0b01110; + let Inst{22-20} = opcod; + let Inst{19-16} = Rd; + let Inst{15-12} = 0b1111; + let Inst{11-8} = Rm; + let Inst{7-4} = 0b0001; + let Inst{3-0} = Rn; +} + // PKH instructions def PKHLSLAsmOperand : ImmAsmOperand { let Name = "PKHLSLImm"; @@ -893,6 +910,10 @@ class ARMV5TPat<dag pattern, dag result> : Pat<pattern, result> { class ARMV5TEPat<dag pattern, dag result> : Pat<pattern, result> { list<Predicate> Predicates = [IsARM, HasV5TE]; } +// ARMV5MOPat - Same as ARMV5TEPat with UseMulOps. +class ARMV5MOPat<dag pattern, dag result> : Pat<pattern, result> { + list<Predicate> Predicates = [IsARM, HasV5TE, UseMulOps]; +} class ARMV6Pat<dag pattern, dag result> : Pat<pattern, result> { list<Predicate> Predicates = [IsARM, HasV6]; } diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index 31b0c41f08..e62187727c 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -13,13 +13,17 @@ #include "ARMInstrInfo.h" #include "ARM.h" +#include "ARMConstantPoolValue.h" #include "ARMMachineFunctionInfo.h" +#include "ARMTargetMachine.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInst.h" using namespace llvm; @@ -84,3 +88,61 @@ unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const { return 0; } + +namespace { + /// ARMCGBR - Create Global Base Reg pass. This initializes the PIC + /// global base register for ARM ELF. + struct ARMCGBR : public MachineFunctionPass { + static char ID; + ARMCGBR() : MachineFunctionPass(ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF) { + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + if (AFI->getGlobalBaseReg() == 0) + return false; + + const ARMTargetMachine *TM = + static_cast<const ARMTargetMachine *>(&MF.getTarget()); + if (TM->getRelocationModel() != Reloc::PIC_) + return false; + + LLVMContext* Context = &MF.getFunction()->getContext(); + GlobalValue *GV = new GlobalVariable(Type::getInt32Ty(*Context), false, + GlobalValue::ExternalLinkage, 0, + "_GLOBAL_OFFSET_TABLE_"); + unsigned Id = AFI->createPICLabelUId(); + ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id); + unsigned Align = TM->getTargetData()->getPrefTypeAlignment(GV->getType()); + unsigned Idx = MF.getConstantPool()->getConstantPoolIndex(CPV, Align); + + MachineBasicBlock &FirstMBB = MF.front(); + MachineBasicBlock::iterator MBBI = FirstMBB.begin(); + DebugLoc DL = FirstMBB.findDebugLoc(MBBI); + unsigned GlobalBaseReg = AFI->getGlobalBaseReg(); + unsigned Opc = TM->getSubtarget<ARMSubtarget>().isThumb2() ? + ARM::t2LDRpci : ARM::LDRcp; + const TargetInstrInfo &TII = *TM->getInstrInfo(); + MachineInstrBuilder MIB = BuildMI(FirstMBB, MBBI, DL, + TII.get(Opc), GlobalBaseReg) + .addConstantPoolIndex(Idx); + if (Opc == ARM::LDRcp) + MIB.addImm(0); + AddDefaultPred(MIB); + + return true; + } + + virtual const char *getPassName() const { + return "ARM PIC Global Base Reg Initialization"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + }; +} + +char ARMCGBR::ID = 0; +FunctionPass* +llvm::createARMGlobalBaseRegPass() { return new ARMCGBR(); } diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 2060bb9374..118c9ea5dd 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -215,6 +215,8 @@ def HasFP16 : Predicate<"Subtarget->hasFP16()">, AssemblerPredicate<"FeatureFP16","half-float">; def HasDivide : Predicate<"Subtarget->hasDivide()">, AssemblerPredicate<"FeatureHWDiv", "divide">; +def HasDivideInARM : Predicate<"Subtarget->hasDivideInARMMode()">, + AssemblerPredicate<"FeatureHWDivARM">; def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">, AssemblerPredicate<"FeatureT2XtPk", "pack/extract">; @@ -250,6 +252,7 @@ def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">; def UseMovt : Predicate<"Subtarget->useMovt()">; def DontUseMovt : Predicate<"!Subtarget->useMovt()">; def UseFPVMLx : Predicate<"Subtarget->useFPVMLx()">; +def UseMulOps : Predicate<"Subtarget->useMulOps()">; // Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available. // But only select them if more precision in FP computation is allowed. @@ -260,6 +263,20 @@ def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion ==" def DontUseFusedMAC : Predicate<"!Subtarget->hasVFP4() || " "Subtarget->isTargetDarwin()">; +// VGETLNi32 is microcoded on Swift - prefer VMOV. +def HasFastVGETLNi32 : Predicate<"!Subtarget->isSwift()">; +def HasSlowVGETLNi32 : Predicate<"Subtarget->isSwift()">; + +// VDUP.32 is microcoded on Swift - prefer VMOV. +def HasFastVDUP32 : Predicate<"!Subtarget->isSwift()">; +def HasSlowVDUP32 : Predicate<"Subtarget->isSwift()">; + +// Cortex-A9 prefers VMOVSR to VMOVDRR even when using NEON for scalar FP, as +// this allows more effective execution domain optimization. See +// setExecutionDomain(). +def UseVMOVSR : Predicate<"Subtarget->isCortexA9() || !Subtarget->useNEONForSinglePrecisionFP()">; +def DontUseVMOVSR : Predicate<"!Subtarget->isCortexA9() && Subtarget->useNEONForSinglePrecisionFP()">; + def IsLE : Predicate<"TLI.isLittleEndian()">; def IsBE : Predicate<"TLI.isBigEndian()">; @@ -3593,13 +3610,13 @@ def MULv5: ARMPseudoExpand<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, 4, IIC_iMUL32, [(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))], (MUL GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, cc_out:$s)>, - Requires<[IsARM, NoV6]>; + Requires<[IsARM, NoV6, UseMulOps]>; } def MLA : AsMul1I32<0b0000001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), IIC_iMAC32, "mla", "\t$Rd, $Rn, $Rm, $Ra", [(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))]>, - Requires<[IsARM, HasV6]> { + Requires<[IsARM, HasV6, UseMulOps]> { bits<4> Ra; let Inst{15-12} = Ra; } @@ -3615,7 +3632,7 @@ def MLAv5: ARMPseudoExpand<(outs GPR:$Rd), def MLS : AMul1I<0b0000011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), IIC_iMAC32, "mls", "\t$Rd, $Rn, $Rm, $Ra", [(set GPR:$Rd, (sub GPR:$Ra, (mul GPR:$Rn, GPR:$Rm)))]>, - Requires<[IsARM, HasV6T2]> { + Requires<[IsARM, HasV6T2, UseMulOps]> { bits<4> Rd; bits<4> Rm; bits<4> Rn; @@ -3721,7 +3738,7 @@ def SMMLA : AMul2Ia <0b0111010, 0b0001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), IIC_iMAC32, "smmla", "\t$Rd, $Rn, $Rm, $Ra", [(set GPR:$Rd, (add (mulhs GPR:$Rn, GPR:$Rm), GPR:$Ra))]>, - Requires<[IsARM, HasV6]>; + Requires<[IsARM, HasV6, UseMulOps]>; def SMMLAR : AMul2Ia <0b0111010, 0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), @@ -3731,7 +3748,7 @@ def SMMLAR : AMul2Ia <0b0111010, 0b0011, (outs GPR:$Rd), def SMMLS : AMul2Ia <0b0111010, 0b1101, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), IIC_iMAC32, "smmls", "\t$Rd, $Rn, $Rm, $Ra", []>, - Requires<[IsARM, HasV6]>; + Requires<[IsARM, HasV6, UseMulOps]>; def SMMLSR : AMul2Ia <0b0111010, 0b1111, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), @@ -3785,7 +3802,7 @@ multiclass AI_smla<string opc, PatFrag opnode> { [(set GPRnopc:$Rd, (add GPR:$Ra, (opnode (sext_inreg GPRnopc:$Rn, i16), (sext_inreg GPRnopc:$Rm, i16))))]>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE, UseMulOps]>; def BT : AMulxyIa<0b0001000, 0b10, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), @@ -3793,7 +3810,7 @@ multiclass AI_smla<string opc, PatFrag opnode> { [(set GPRnopc:$Rd, (add GPR:$Ra, (opnode (sext_inreg GPRnopc:$Rn, i16), (sra GPRnopc:$Rm, (i32 16)))))]>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE, UseMulOps]>; def TB : AMulxyIa<0b0001000, 0b01, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), @@ -3801,7 +3818,7 @@ multiclass AI_smla<string opc, PatFrag opnode> { [(set GPRnopc:$Rd, (add GPR:$Ra, (opnode (sra GPRnopc:$Rn, (i32 16)), (sext_inreg GPRnopc:$Rm, i16))))]>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE, UseMulOps]>; def TT : AMulxyIa<0b0001000, 0b11, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), @@ -3809,7 +3826,7 @@ multiclass AI_smla<string opc, PatFrag opnode> { [(set GPRnopc:$Rd, (add GPR:$Ra, (opnode (sra GPRnopc:$Rn, (i32 16)), (sra GPRnopc:$Rm, (i32 16)))))]>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE, UseMulOps]>; def WB : AMulxyIa<0b0001001, 0b00, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), @@ -3817,7 +3834,7 @@ multiclass AI_smla<string opc, PatFrag opnode> { [(set GPRnopc:$Rd, (add GPR:$Ra, (sra (opnode GPRnopc:$Rn, (sext_inreg GPRnopc:$Rm, i16)), (i32 16))))]>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE, UseMulOps]>; def WT : AMulxyIa<0b0001001, 0b10, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), @@ -3825,7 +3842,7 @@ multiclass AI_smla<string opc, PatFrag opnode> { [(set GPRnopc:$Rd, (add GPR:$Ra, (sra (opnode GPRnopc:$Rn, (sra GPRnopc:$Rm, (i32 16))), (i32 16))))]>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE, UseMulOps]>; } } @@ -3928,6 +3945,19 @@ defm SMUA : AI_sdml<0, "smua">; defm SMUS : AI_sdml<1, "smus">; //===----------------------------------------------------------------------===// +// Division Instructions (ARMv7-A with virtualization extension) +// +def SDIV : ADivA1I<0b001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iDIV, + "sdiv", "\t$Rd, $Rn, $Rm", + [(set GPR:$Rd, (sdiv GPR:$Rn, GPR:$Rm))]>, + Requires<[IsARM, HasDivideInARM]>; + +def UDIV : ADivA1I<0b011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iDIV, + "udiv", "\t$Rd, $Rn, $Rm", + [(set GPR:$Rd, (udiv GPR:$Rn, GPR:$Rm))]>, + Requires<[IsARM, HasDivideInARM]>; + +//===----------------------------------------------------------------------===// // Misc. Arithmetic Instructions. // @@ -4989,32 +5019,32 @@ def : ARMV5TEPat<(sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))), def : ARMV5TEPat<(sra (mul GPR:$a, sext_16_node:$b), (i32 16)), (SMULWB GPR:$a, GPR:$b)>; -def : ARMV5TEPat<(add GPR:$acc, +def : ARMV5MOPat<(add GPR:$acc, (mul (sra (shl GPR:$a, (i32 16)), (i32 16)), (sra (shl GPR:$b, (i32 16)), (i32 16)))), (SMLABB GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5TEPat<(add GPR:$acc, +def : ARMV5MOPat<(add GPR:$acc, (mul sext_16_node:$a, sext_16_node:$b)), (SMLABB GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5TEPat<(add GPR:$acc, +def : ARMV5MOPat<(add GPR:$acc, (mul (sra (shl GPR:$a, (i32 16)), (i32 16)), (sra GPR:$b, (i32 16)))), (SMLABT GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5TEPat<(add GPR:$acc, +def : ARMV5MOPat<(add GPR:$acc, (mul sext_16_node:$a, (sra GPR:$b, (i32 16)))), (SMLABT GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5TEPat<(add GPR:$acc, +def : ARMV5MOPat<(add GPR:$acc, (mul (sra GPR:$a, (i32 16)), (sra (shl GPR:$b, (i32 16)), (i32 16)))), (SMLATB GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5TEPat<(add GPR:$acc, +def : ARMV5MOPat<(add GPR:$acc, (mul (sra GPR:$a, (i32 16)), sext_16_node:$b)), (SMLATB GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5TEPat<(add GPR:$acc, +def : ARMV5MOPat<(add GPR:$acc, (sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))), (i32 16))), (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5TEPat<(add GPR:$acc, +def : ARMV5MOPat<(add GPR:$acc, (sra (mul GPR:$a, sext_16_node:$b), (i32 16))), (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>; diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 1bcb48776e..de655f1a0e 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -5043,7 +5043,8 @@ def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00, (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane), IIC_VMOVSI, "vmov", "32", "$R, $V$lane", [(set GPR:$R, (extractelt (v2i32 DPR:$V), - imm:$lane))]> { + imm:$lane))]>, + Requires<[HasNEON, HasFastVGETLNi32]> { let Inst{21} = lane{0}; } // def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td @@ -5066,7 +5067,16 @@ def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane), def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src, (DSubReg_i32_reg imm:$lane))), - (SubReg_i32_lane imm:$lane))>; + (SubReg_i32_lane imm:$lane))>, + Requires<[HasNEON, HasFastVGETLNi32]>; +def : Pat<(extractelt (v2i32 DPR:$src), imm:$lane), + (COPY_TO_REGCLASS + (i32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, + Requires<[HasNEON, HasSlowVGETLNi32]>; +def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), + (COPY_TO_REGCLASS + (i32 (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, + Requires<[HasNEON, HasSlowVGETLNi32]>; def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2), (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)), (SSubReg_f32_reg imm:$src2))>; @@ -5175,14 +5185,23 @@ class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>; def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>; -def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>; +def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>, + Requires<[HasNEON, HasFastVDUP32]>; def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>; def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>; def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>; -def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>; +// NEONvdup patterns for uarchs with fast VDUP.32. +def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>, + Requires<[HasNEON,HasFastVDUP32]>; def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>; +// NEONvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead. +def : Pat<(v2i32 (NEONvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>, + Requires<[HasNEON,HasSlowVDUP32]>; +def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>, + Requires<[HasNEON,HasSlowVDUP32]>; + // VDUP : Vector Duplicate Lane (from scalar to all elements) class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt, @@ -5619,6 +5638,11 @@ def : N2VSPat<arm_ftoui, VCVTf2ud>; def : N2VSPat<arm_sitof, VCVTs2fd>; def : N2VSPat<arm_uitof, VCVTu2fd>; +// Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers. +def : Pat<(f32 (bitconvert GPR:$a)), + (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>, + Requires<[HasNEON, DontUseVMOVSR]>; + //===----------------------------------------------------------------------===// // Non-Instruction Patterns //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 2bb667ef37..37b280f447 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -2396,7 +2396,8 @@ def t2MUL: T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32, def t2MLA: T2FourReg< (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "mla", "\t$Rd, $Rn, $Rm, $Ra", - [(set rGPR:$Rd, (add (mul rGPR:$Rn, rGPR:$Rm), rGPR:$Ra))]> { + [(set rGPR:$Rd, (add (mul rGPR:$Rn, rGPR:$Rm), rGPR:$Ra))]>, + Requires<[IsThumb2, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b000; @@ -2406,7 +2407,8 @@ def t2MLA: T2FourReg< def t2MLS: T2FourReg< (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "mls", "\t$Rd, $Rn, $Rm, $Ra", - [(set rGPR:$Rd, (sub rGPR:$Ra, (mul rGPR:$Rn, rGPR:$Rm)))]> { + [(set rGPR:$Rd, (sub rGPR:$Ra, (mul rGPR:$Rn, rGPR:$Rm)))]>, + Requires<[IsThumb2, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b000; @@ -2475,7 +2477,7 @@ def t2SMMLA : T2FourReg< (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smmla", "\t$Rd, $Rn, $Rm, $Ra", [(set rGPR:$Rd, (add (mulhs rGPR:$Rm, rGPR:$Rn), rGPR:$Ra))]>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b101; @@ -2496,7 +2498,7 @@ def t2SMMLS: T2FourReg< (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smmls", "\t$Rd, $Rn, $Rm, $Ra", [(set rGPR:$Rd, (sub rGPR:$Ra, (mulhs rGPR:$Rn, rGPR:$Rm)))]>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b110; @@ -2601,7 +2603,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> { [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sext_inreg rGPR:$Rn, i16), (sext_inreg rGPR:$Rm, i16))))]>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -2614,7 +2616,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> { !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra", [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sext_inreg rGPR:$Rn, i16), (sra rGPR:$Rm, (i32 16)))))]>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -2627,7 +2629,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> { !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra", [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)), (sext_inreg rGPR:$Rm, i16))))]>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -2640,7 +2642,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> { !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra", [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)), (sra rGPR:$Rm, (i32 16)))))]>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -2653,7 +2655,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> { !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra", [(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn, (sext_inreg rGPR:$Rm, i16)), (i32 16))))]>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b011; @@ -2666,7 +2668,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> { !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra", [(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn, (sra rGPR:$Rm, (i32 16))), (i32 16))))]>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b011; @@ -2760,7 +2762,7 @@ def t2SMLSLDX : T2FourReg_mac<1, 0b101, 0b1101, (outs rGPR:$Ra,rGPR:$Rd), // Division Instructions. // Signed and unsigned division on v7-M // -def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi, +def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV, "sdiv", "\t$Rd, $Rn, $Rm", [(set rGPR:$Rd, (sdiv rGPR:$Rn, rGPR:$Rm))]>, Requires<[HasDivide, IsThumb2]> { @@ -2771,7 +2773,7 @@ def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi, let Inst{7-4} = 0b1111; } -def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi, +def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV, "udiv", "\t$Rd, $Rn, $Rm", [(set rGPR:$Rd, (udiv rGPR:$Rn, rGPR:$Rm))]>, Requires<[HasDivide, IsThumb2]> { diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 7d6692f307..b5a896c699 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -523,10 +523,12 @@ def VMOVRS : AVConv2I<0b11100001, 0b1010, let D = VFPNeonDomain; } +// Bitcast i32 -> f32. NEON prefers to use VMOVDRR. def VMOVSR : AVConv4I<0b11100000, 0b1010, (outs SPR:$Sn), (ins GPR:$Rt), IIC_fpMOVIS, "vmov", "\t$Sn, $Rt", - [(set SPR:$Sn, (bitconvert GPR:$Rt))]> { + [(set SPR:$Sn, (bitconvert GPR:$Rt))]>, + Requires<[HasVFP2, UseVMOVSR]> { // Instruction operands. bits<5> Sn; bits<4> Rt; diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h index f1c8fc8481..c0ac04b600 100644 --- a/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -108,6 +108,11 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// pass. DenseMap<unsigned, unsigned> CPEClones; + /// GlobalBaseReg - keeps track of the virtual register initialized for + /// use as the global base register. This is used for PIC in some PIC + /// relocation models. + unsigned GlobalBaseReg; + public: ARMFunctionInfo() : isThumb(false), @@ -119,7 +124,7 @@ public: GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0), NumAlignedDPRCS2Regs(0), JumpTableUId(0), PICLabelUId(0), - VarArgsFrameIndex(0), HasITBlocks(false) {} + VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {} explicit ARMFunctionInfo(MachineFunction &MF) : isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()), @@ -130,7 +135,7 @@ public: GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), GPRCS1Frames(32), GPRCS2Frames(32), DPRCSFrames(32), JumpTableUId(0), PICLabelUId(0), - VarArgsFrameIndex(0), HasITBlocks(false) {} + VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {} bool isThumbFunction() const { return isThumb; } bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; } @@ -249,6 +254,9 @@ public: bool hasITBlocks() const { return HasITBlocks; } void setHasITBlocks(bool h) { HasITBlocks = h; } + unsigned getGlobalBaseReg() const { return GlobalBaseReg; } + void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; } + void recordCPEClone(unsigned CPIdx, unsigned CPCloneIdx) { if (!CPEClones.insert(std::make_pair(CPCloneIdx, CPIdx)).second) assert(0 && "Duplicate entries!"); diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index 6f974fd17d..ed8ac1aff7 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -247,11 +247,16 @@ def CCR : RegisterClass<"ARM", [i32], 32, (add CPSR)> { } // Scalar single precision floating point register class.. -def SPR : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 31)>; +// FIXME: Allocation order changed to s0, s2, s4, ... as a quick hack to +// avoid partial-write dependencies on D registers (S registers are +// renamed as portions of D registers). +def SPR : RegisterClass<"ARM", [f32], 32, (add (decimate + (sequence "S%u", 0, 31), 2), + (sequence "S%u", 0, 31))>; // Subset of SPR which can be used as a source of NEON scalars for 16-bit // operations -def SPR_8 : RegisterClass<"ARM", [f32], 32, (trunc SPR, 16)>; +def SPR_8 : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 15)>; // Scalar double precision floating point / generic 64-bit vector register // class. diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td index 81d2fa37c2..02196d06bf 100644 --- a/lib/Target/ARM/ARMSchedule.td +++ b/lib/Target/ARM/ARMSchedule.td @@ -55,6 +55,7 @@ def IIC_iMUL32 : InstrItinClass; def IIC_iMAC32 : InstrItinClass; def IIC_iMUL64 : InstrItinClass; def IIC_iMAC64 : InstrItinClass; +def IIC_iDIV : InstrItinClass; def IIC_iLoad_i : InstrItinClass; def IIC_iLoad_r : InstrItinClass; def IIC_iLoad_si : InstrItinClass; @@ -261,3 +262,4 @@ def IIC_VTBX4 : InstrItinClass; include "ARMScheduleV6.td" include "ARMScheduleA8.td" include "ARMScheduleA9.td" +include "ARMScheduleSwift.td" diff --git a/lib/Target/ARM/ARMScheduleSwift.td b/lib/Target/ARM/ARMScheduleSwift.td new file mode 100644 index 0000000000..e9bc3e0f39 --- /dev/null +++ b/lib/Target/ARM/ARMScheduleSwift.td @@ -0,0 +1,1085 @@ +//=- ARMScheduleSwift.td - Swift Scheduling Definitions -*- tablegen -*----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the Swift processor.. +// +//===----------------------------------------------------------------------===// + +// ===---------------------------------------------------------------------===// +// This section contains legacy support for itineraries. This is +// required until SD and PostRA schedulers are replaced by MachineScheduler. + +def SW_DIS0 : FuncUnit; +def SW_DIS1 : FuncUnit; +def SW_DIS2 : FuncUnit; + +def SW_ALU0 : FuncUnit; +def SW_ALU1 : FuncUnit; +def SW_LS : FuncUnit; +def SW_IDIV : FuncUnit; +def SW_FDIV : FuncUnit; + +// FIXME: Need bypasses. +// FIXME: Model the multiple stages of IIC_iMOVix2, IIC_iMOVix2addpc, and +// IIC_iMOVix2ld better. +// FIXME: Model the special immediate shifts that are not microcoded. +// FIXME: Do we need to model the fact that uses of r15 in a micro-op force it +// to issue on pipe 1? +// FIXME: Model the pipelined behavior of CMP / TST instructions. +// FIXME: Better model the microcode stages of multiply instructions, especially +// conditional variants. +// FIXME: Add preload instruction when it is documented. +// FIXME: Model non-pipelined nature of FP div / sqrt unit. + +def SwiftItineraries : ProcessorItineraries< + [SW_DIS0, SW_DIS1, SW_DIS2, SW_ALU0, SW_ALU1, SW_LS, SW_IDIV, SW_FDIV], [], [ + // + // Move instructions, unconditional + InstrItinData<IIC_iMOVi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData<IIC_iMOVr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData<IIC_iMOVsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData<IIC_iMOVsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData<IIC_iMOVix2 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2]>, + InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>, + InstrStage<1, [SW_ALU0, SW_ALU1]>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [3]>, + InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>, + InstrStage<1, [SW_ALU0, SW_ALU1]>, + InstrStage<1, [SW_LS]>], + [5]>, + // + // MVN instructions + InstrItinData<IIC_iMVNi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData<IIC_iMVNr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData<IIC_iMVNsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData<IIC_iMVNsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + // + // No operand cycles + InstrItinData<IIC_iALUx , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>]>, + // + // Binary Instructions that produce a result + InstrItinData<IIC_iALUi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + InstrItinData<IIC_iALUr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1, 1]>, + InstrItinData<IIC_iALUsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1, 1]>, + InstrItinData<IIC_iALUsir,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1, 1]>, + InstrItinData<IIC_iALUsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1, 1, 1]>, + // + // Bitwise Instructions that produce a result + InstrItinData<IIC_iBITi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + InstrItinData<IIC_iBITr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1, 1]>, + InstrItinData<IIC_iBITsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1, 1]>, + InstrItinData<IIC_iBITsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1, 1, 1]>, + // + // Unary Instructions that produce a result + + // CLZ, RBIT, etc. + InstrItinData<IIC_iUNAr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + + // BFC, BFI, UBFX, SBFX + InstrItinData<IIC_iUNAsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1]>, + + // + // Zero and sign extension instructions + InstrItinData<IIC_iEXTr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + InstrItinData<IIC_iEXTAr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1, 1]>, + InstrItinData<IIC_iEXTAsr,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1, 1, 1]>, + // + // Compare instructions + InstrItinData<IIC_iCMPi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData<IIC_iCMPr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + InstrItinData<IIC_iCMPsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<2, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + InstrItinData<IIC_iCMPsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<2, [SW_ALU0, SW_ALU1]>], + [1, 1, 1]>, + // + // Test instructions + InstrItinData<IIC_iTSTi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData<IIC_iTSTr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + InstrItinData<IIC_iTSTsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<2, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + InstrItinData<IIC_iTSTsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<2, [SW_ALU0, SW_ALU1]>], + [1, 1, 1]>, + // + // Move instructions, conditional + // FIXME: Correctly model the extra input dep on the destination. + InstrItinData<IIC_iCMOVi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData<IIC_iCMOVr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + InstrItinData<IIC_iCMOVsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + InstrItinData<IIC_iCMOVsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1, 1]>, + InstrItinData<IIC_iCMOVix2, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2]>, + + // Integer multiply pipeline + // + InstrItinData<IIC_iMUL16 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [3, 1, 1]>, + InstrItinData<IIC_iMAC16 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [3, 1, 1, 1]>, + InstrItinData<IIC_iMUL32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + InstrItinData<IIC_iMAC32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1, 1]>, + InstrItinData<IIC_iMUL64 , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0], 1>, + InstrStage<1, [SW_ALU0], 3>, + InstrStage<1, [SW_ALU0]>], + [5, 5, 1, 1]>, + InstrItinData<IIC_iMAC64 , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0], 1>, + InstrStage<1, [SW_ALU0], 1>, + InstrStage<1, [SW_ALU0, SW_ALU1], 3>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [5, 6, 1, 1]>, + // + // Integer divide + InstrItinData<IIC_iDIV , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0], 0>, + InstrStage<14, [SW_IDIV]>], + [14, 1, 1]>, + + // Integer load pipeline + // FIXME: The timings are some rough approximations + // + // Immediate offset + InstrItinData<IIC_iLoad_i , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [3, 1]>, + InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [3, 1]>, + InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_LS], 1>, + InstrStage<1, [SW_LS]>], + [3, 4, 1]>, + // + // Register offset + InstrItinData<IIC_iLoad_r , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [3, 1, 1]>, + InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [3, 1, 1]>, + InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_LS], 1>, + InstrStage<1, [SW_LS], 3>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [3, 4, 1, 1]>, + // + // Scaled register offset + InstrItinData<IIC_iLoad_si , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 2>, + InstrStage<1, [SW_LS]>], + [5, 1, 1]>, + InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 2>, + InstrStage<1, [SW_LS]>], + [5, 1, 1]>, + // + // Immediate offset with update + InstrItinData<IIC_iLoad_iu , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [3, 1, 1]>, + InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [3, 1, 1]>, + // + // Register offset with update + InstrItinData<IIC_iLoad_ru , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0], 1>, + InstrStage<1, [SW_LS]>], + [3, 1, 1, 1]>, + InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0], 1>, + InstrStage<1, [SW_LS]>], + [3, 1, 1, 1]>, + InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 0>, + InstrStage<1, [SW_LS], 3>, + InstrStage<1, [SW_LS], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [3, 4, 1, 1]>, + // + // Scaled register offset with update + InstrItinData<IIC_iLoad_siu , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 2>, + InstrStage<1, [SW_LS], 3>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [5, 3, 1, 1]>, + InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 2>, + InstrStage<1, [SW_LS], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [5, 3, 1, 1]>, + // + // Load multiple, def is the 5th operand. + // FIXME: This assumes 3 to 4 registers. + InstrItinData<IIC_iLoad_m , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 1, 1, 3], [], -1>, // dynamic uops + + // + // Load multiple + update, defs are the 1st and 5th operands. + InstrItinData<IIC_iLoad_mu , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 0>, + InstrStage<1, [SW_LS], 3>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1, 1, 1, 3], [], -1>, // dynamic uops + // + // Load multiple plus branch + InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 1, 1, 3], [], -1>, // dynamic uops + // + // Pop, def is the 3rd operand. + InstrItinData<IIC_iPop , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 3], [], -1>, // dynamic uops + // + // Pop + branch, def is the 3rd operand. + InstrItinData<IIC_iPop_Br, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 3], [], -1>, // dynamic uops + + // + // iLoadi + iALUr for t2LDRpci_pic. + InstrItinData<IIC_iLoadiALU, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS], 3>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [4, 1]>, + + // Integer store pipeline + /// + // Immediate offset + InstrItinData<IIC_iStore_i , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [1, 1]>, + InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [1, 1]>, + InstrItinData<IIC_iStore_d_i, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_LS], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1]>, + // + // Register offset + InstrItinData<IIC_iStore_r , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [1, 1, 1]>, + InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [1, 1, 1]>, + InstrItinData<IIC_iStore_d_r, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_LS], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 1]>, + // + // Scaled register offset + InstrItinData<IIC_iStore_si , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 2>, + InstrStage<1, [SW_LS]>], + [1, 1, 1]>, + InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 2>, + InstrStage<1, [SW_LS]>], + [1, 1, 1]>, + // + // Immediate offset with update + InstrItinData<IIC_iStore_iu , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 1]>, + InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 1]>, + // + // Register offset with update + InstrItinData<IIC_iStore_ru , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 1, 1]>, + InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 1, 1]>, + InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 1, 1]>, + // + // Scaled register offset with update + InstrItinData<IIC_iStore_siu, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 2>, + InstrStage<1, [SW_LS], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>], + [3, 1, 1, 1]>, + InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 2>, + InstrStage<1, [SW_LS], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>], + [3, 1, 1, 1]>, + // + // Store multiple + InstrItinData<IIC_iStore_m , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS], 1>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS], 1>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [], [], -1>, // dynamic uops + // + // Store multiple + update + InstrItinData<IIC_iStore_mu, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS], 1>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS], 1>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [2], [], -1>, // dynamic uops + + // + // Preload + InstrItinData<IIC_Preload, [InstrStage<1, [SW_DIS0], 0>], [1, 1]>, + + // Branch + // + // no delay slots, so the latency of a branch is unimportant + InstrItinData<IIC_Br , [InstrStage<1, [SW_DIS0], 0>]>, + + // FP Special Register to Integer Register File Move + InstrItinData<IIC_fpSTAT , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + // + // Single-precision FP Unary + // + // Most floating-point moves get issued on ALU0. + InstrItinData<IIC_fpUNA32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1]>, + // + // Double-precision FP Unary + InstrItinData<IIC_fpUNA64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1]>, + + // + // Single-precision FP Compare + InstrItinData<IIC_fpCMP32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [1, 1]>, + // + // Double-precision FP Compare + InstrItinData<IIC_fpCMP64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [1, 1]>, + // + // Single to Double FP Convert + InstrItinData<IIC_fpCVTSD , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1]>, + // + // Double to Single FP Convert + InstrItinData<IIC_fpCVTDS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1]>, + + // + // Single to Half FP Convert + InstrItinData<IIC_fpCVTSH , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU1], 4>, + InstrStage<1, [SW_ALU1]>], + [6, 1]>, + // + // Half to Single FP Convert + InstrItinData<IIC_fpCVTHS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1]>, + + // + // Single-Precision FP to Integer Convert + InstrItinData<IIC_fpCVTSI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1]>, + // + // Double-Precision FP to Integer Convert + InstrItinData<IIC_fpCVTDI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1]>, + // + // Integer to Single-Precision FP Convert + InstrItinData<IIC_fpCVTIS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1]>, + // + // Integer to Double-Precision FP Convert + InstrItinData<IIC_fpCVTID , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1]>, + // + // Single-precision FP ALU + InstrItinData<IIC_fpALU32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Double-precision FP ALU + InstrItinData<IIC_fpALU64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Single-precision FP Multiply + InstrItinData<IIC_fpMUL32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1]>, + // + // Double-precision FP Multiply + InstrItinData<IIC_fpMUL64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [6, 1, 1]>, + // + // Single-precision FP MAC + InstrItinData<IIC_fpMAC32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [8, 1, 1]>, + // + // Double-precision FP MAC + InstrItinData<IIC_fpMAC64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [12, 1, 1]>, + // + // Single-precision Fused FP MAC + InstrItinData<IIC_fpFMAC32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [8, 1, 1]>, + // + // Double-precision Fused FP MAC + InstrItinData<IIC_fpFMAC64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [12, 1, 1]>, + // + // Single-precision FP DIV + InstrItinData<IIC_fpDIV32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 0>, + InstrStage<15, [SW_FDIV]>], + [17, 1, 1]>, + // + // Double-precision FP DIV + InstrItinData<IIC_fpDIV64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 0>, + InstrStage<30, [SW_FDIV]>], + [32, 1, 1]>, + // + // Single-precision FP SQRT + InstrItinData<IIC_fpSQRT32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 0>, + InstrStage<15, [SW_FDIV]>], + [17, 1]>, + // + // Double-precision FP SQRT + InstrItinData<IIC_fpSQRT64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 0>, + InstrStage<30, [SW_FDIV]>], + [32, 1, 1]>, + + // + // Integer to Single-precision Move + InstrItinData<IIC_fpMOVIS, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_LS], 4>, + InstrStage<1, [SW_ALU0]>], + [6, 1]>, + // + // Integer to Double-precision Move + InstrItinData<IIC_fpMOVID, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [4, 1]>, + // + // Single-precision to Integer Move + InstrItinData<IIC_fpMOVSI, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [3, 1]>, + // + // Double-precision to Integer Move + InstrItinData<IIC_fpMOVDI, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_LS], 3>, + InstrStage<1, [SW_LS]>], + [3, 4, 1]>, + // + // Single-precision FP Load + InstrItinData<IIC_fpLoad32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [4, 1]>, + // + // Double-precision FP Load + InstrItinData<IIC_fpLoad64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [4, 1]>, + // + // FP Load Multiple + // FIXME: Assumes a single Q register. + InstrItinData<IIC_fpLoad_m, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [1, 1, 1, 4], [], -1>, // dynamic uops + // + // FP Load Multiple + update + // FIXME: Assumes a single Q register. + InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_LS], 4>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1, 1, 1, 4], [], -1>, // dynamic uops + // + // Single-precision FP Store + InstrItinData<IIC_fpStore32,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [1, 1]>, + // + // Double-precision FP Store + InstrItinData<IIC_fpStore64,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [1, 1]>, + // + // FP Store Multiple + // FIXME: Assumes a single Q register. + InstrItinData<IIC_fpStore_m,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [1, 1, 1], [], -1>, // dynamic uops + // + // FP Store Multiple + update + // FIXME: Assumes a single Q register. + InstrItinData<IIC_fpStore_mu,[InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_LS], 4>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1, 1, 1], [], -1>, // dynamic uops + // NEON + // + // Double-register Integer Unary + InstrItinData<IIC_VUNAiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1]>, + // + // Quad-register Integer Unary + InstrItinData<IIC_VUNAiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1]>, + // + // Double-register Integer Q-Unary + InstrItinData<IIC_VQUNAiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1]>, + // + // Quad-register Integer CountQ-Unary + InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1]>, + // + // Double-register Integer Binary + InstrItinData<IIC_VBINiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Quad-register Integer Binary + InstrItinData<IIC_VBINiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Double-register Integer Subtract + InstrItinData<IIC_VSUBiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Quad-register Integer Subtract + InstrItinData<IIC_VSUBiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Double-register Integer Shift + InstrItinData<IIC_VSHLiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Quad-register Integer Shift + InstrItinData<IIC_VSHLiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Double-register Integer Shift (4 cycle) + InstrItinData<IIC_VSHLi4D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + // + // Quad-register Integer Shift (4 cycle) + InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + // + // Double-register Integer Binary (4 cycle) + InstrItinData<IIC_VBINi4D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + // + // Quad-register Integer Binary (4 cycle) + InstrItinData<IIC_VBINi4Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + // + // Double-register Integer Subtract (4 cycle) + InstrItinData<IIC_VSUBi4D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + // + // Quad-register Integer Subtract (4 cycle) + InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + + // + // Double-register Integer Count + InstrItinData<IIC_VCNTiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Quad-register Integer Count + InstrItinData<IIC_VCNTiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Double-register Absolute Difference and Accumulate + InstrItinData<IIC_VABAD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1, 1]>, + // + // Quad-register Absolute Difference and Accumulate + InstrItinData<IIC_VABAQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1, 1]>, + // + // Double-register Integer Pair Add Long + InstrItinData<IIC_VPALiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + // + // Quad-register Integer Pair Add Long + InstrItinData<IIC_VPALiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + + // + // Double-register Integer Multiply (.8, .16) + InstrItinData<IIC_VMULi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1]>, + // + // Quad-register Integer Multiply (.8, .16) + InstrItinData<IIC_VMULi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1]>, + + // + // Double-register Integer Multiply (.32) + InstrItinData<IIC_VMULi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1]>, + // + // Quad-register Integer Multiply (.32) + InstrItinData<IIC_VMULi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1]>, + // + // Double-register Integer Multiply-Accumulate (.8, .16) + InstrItinData<IIC_VMACi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1, 1]>, + // + // Double-register Integer Multiply-Accumulate (.32) + InstrItinData<IIC_VMACi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1, 1]>, + // + // Quad-register Integer Multiply-Accumulate (.8, .16) + InstrItinData<IIC_VMACi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1, 1]>, + // + // Quad-register Integer Multiply-Accumulate (.32) + InstrItinData<IIC_VMACi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1, 1]>, + + // + // Move + InstrItinData<IIC_VMOV, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1]>, + // + // Move Immediate + InstrItinData<IIC_VMOVImm, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2]>, + // + // Double-register Permute Move + InstrItinData<IIC_VMOVD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [2, 1]>, + // + // Quad-register Permute Move + InstrItinData<IIC_VMOVQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [2, 1]>, + // + // Integer to Single-precision Move + InstrItinData<IIC_VMOVIS , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_LS], 4>, + InstrStage<1, [SW_ALU0]>], + [6, 1]>, + // + // Integer to Double-precision Move + InstrItinData<IIC_VMOVID , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [4, 1, 1]>, + // + // Single-precision to Integer Move + InstrItinData<IIC_VMOVSI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [3, 1]>, + // + // Double-precision to Integer Move + InstrItinData<IIC_VMOVDI , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_LS], 3>, + InstrStage<1, [SW_LS]>], + [3, 4, 1]>, + // + // Integer to Lane Move + // FIXME: I think this is correct, but it is not clear from the tuning guide. + InstrItinData<IIC_VMOVISL , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_LS], 4>, + InstrStage<1, [SW_ALU0]>], + [6, 1]>, + + // + // Vector narrow move + InstrItinData<IIC_VMOVN, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [2, 1]>, + // + // Double-register FP Unary + // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here, + // and they issue on a different pipeline. + InstrItinData<IIC_VUNAD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1]>, + // + // Quad-register FP Unary + // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here, + // and they issue on a different pipeline. + InstrItinData<IIC_VUNAQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1]>, + // + // Double-register FP Binary + // FIXME: We're using this itin for many instructions. + InstrItinData<IIC_VBIND, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + + // + // VPADD, etc. + InstrItinData<IIC_VPBIND, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + // + // Double-register FP VMUL + InstrItinData<IIC_VFMULD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1]>, + // + // Quad-register FP Binary + InstrItinData<IIC_VBINQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + // + // Quad-register FP VMUL + InstrItinData<IIC_VFMULQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1]>, + // + // Double-register FP Multiple-Accumulate + InstrItinData<IIC_VMACD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [8, 1, 1]>, + // + // Quad-register FP Multiple-Accumulate + InstrItinData<IIC_VMACQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [8, 1, 1]>, + // + // Double-register Fused FP Multiple-Accumulate + InstrItinData<IIC_VFMACD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [8, 1, 1]>, + // + // Quad-register FusedF P Multiple-Accumulate + InstrItinData<IIC_VFMACQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [8, 1, 1]>, + // + // Double-register Reciprical Step + InstrItinData<IIC_VRECSD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [8, 1, 1]>, + // + // Quad-register Reciprical Step + InstrItinData<IIC_VRECSQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [8, 1, 1]>, + // + // Double-register Permute + // FIXME: The latencies are unclear from the documentation. + InstrItinData<IIC_VPERMD, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1]>], + [3, 4, 3, 4]>, + // + // Quad-register Permute + // FIXME: The latencies are unclear from the documentation. + InstrItinData<IIC_VPERMQ, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1]>], + [3, 4, 3, 4]>, + // + // Quad-register Permute (3 cycle issue on A9) + InstrItinData<IIC_VPERMQ3, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1]>], + [3, 4, 3, 4]>, + + // + // Double-register VEXT + InstrItinData<IIC_VEXTD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [2, 1, 1]>, + // + // Quad-register VEXT + InstrItinData<IIC_VEXTQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [2, 1, 1]>, + // + // VTB + InstrItinData<IIC_VTB1, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [2, 1, 1]>, + InstrItinData<IIC_VTB2, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 3, 3]>, + InstrItinData<IIC_VTB3, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1]>], + [6, 1, 3, 5, 5]>, + InstrItinData<IIC_VTB4, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1]>], + [8, 1, 3, 5, 7, 7]>, + // + // VTBX + InstrItinData<IIC_VTBX1, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [2, 1, 1]>, + InstrItinData<IIC_VTBX2, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 3, 3]>, + InstrItinData<IIC_VTBX3, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1]>], + [6, 1, 3, 5, 5]>, + InstrItinData<IIC_VTBX4, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1]>], + [8, 1, 3, 5, 7, 7]> +]>; + +// ===---------------------------------------------------------------------===// +// This following definitions describe the simple machine model which +// will replace itineraries. + +// Swift machine model for scheduling and other instruction cost heuristics. +def SwiftModel : SchedMachineModel { + let IssueWidth = 3; // 3 micro-ops are dispatched per cycle. + let MinLatency = 0; // Data dependencies are allowed within dispatch groups. + let LoadLatency = 3; + + let Itineraries = SwiftItineraries; +} + +// TODO: Add Swift processor and scheduler resources. diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index c8aa0779bc..6562600202 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -41,6 +41,10 @@ NoInlineJumpTables("no-inline-jumptables", // @LOCALMOD-END static cl::opt<bool> +UseFusedMulOps("arm-use-mulops", + cl::init(true), cl::Hidden); + +static cl::opt<bool> StrictAlign("arm-strict-align", cl::Hidden, cl::desc("Disallow all unaligned memory accesses")); @@ -59,6 +63,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, , HasVFPv4(false) , HasNEON(false) , UseNEONForSinglePrecisionFP(false) + , UseMulOps(UseFusedMulOps) , SlowFPVMLx(false) , HasVMLxForwarding(false) , SlowFPBrcc(false) @@ -74,6 +79,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, , HasFP16(false) , HasD16(false) , HasHardwareDivide(false) + , HasHardwareDivideInARM(false) , HasT2ExtractPack(false) , HasDataBarrier(false) , Pref32BitThumb(false) diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 0a5744e5c1..64081f5be2 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -38,7 +38,7 @@ class StringRef; class ARMSubtarget : public ARMGenSubtargetInfo { protected: enum ARMProcFamilyEnum { - Others, CortexA8, CortexA9, CortexA15 + Others, CortexA8, CortexA9, CortexA15, Swift }; /// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others. @@ -65,6 +65,10 @@ protected: /// determine if NEON should actually be used. bool UseNEONForSinglePrecisionFP; + /// UseMulOps - True if non-microcoded fused integer multiply-add and + /// multiply-subtract instructions should be used. + bool UseMulOps; + /// SlowFPVMLx - If the VFP2 / NEON instructions are available, indicates /// whether the FP VML[AS] instructions are slow (if so, don't use them). bool SlowFPVMLx; @@ -115,6 +119,9 @@ protected: /// HasHardwareDivide - True if subtarget supports [su]div bool HasHardwareDivide; + /// HasHardwareDivideInARM - True if subtarget supports [su]div in ARM mode + bool HasHardwareDivideInARM; + /// HasT2ExtractPack - True if subtarget supports thumb2 extract/pack /// instructions. bool HasT2ExtractPack; @@ -214,6 +221,7 @@ protected: bool isCortexA8() const { return ARMProcFamily == CortexA8; } bool isCortexA9() const { return ARMProcFamily == CortexA9; } bool isCortexA15() const { return ARMProcFamily == CortexA15; } + bool isSwift() const { return ARMProcFamily == Swift; } bool isCortexM3() const { return CPUString == "cortex-m3"; } bool isLikeA9() const { return isCortexA9() || isCortexA15(); } @@ -227,8 +235,10 @@ protected: return hasNEON() && UseNEONForSinglePrecisionFP; } bool hasDivide() const { return HasHardwareDivide; } + bool hasDivideInARMMode() const { return HasHardwareDivideInARM; } bool hasT2ExtractPack() const { return HasT2ExtractPack; } bool hasDataBarrier() const { return HasDataBarrier; } + bool useMulOps() const { return UseMulOps; } bool useFPVMLx() const { return !SlowFPVMLx; } bool hasVMLxForwarding() const { return HasVMLxForwarding; } bool isFPBrccSlow() const { return SlowFPBrcc; } diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 4675c98f0d..ac5f14c09c 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -150,6 +150,11 @@ bool ARMPassConfig::addPreISel() { bool ARMPassConfig::addInstSelector() { addPass(createARMISelDag(getARMTargetMachine(), getOptLevel())); + + const ARMSubtarget *Subtarget = &getARMSubtarget(); + if (Subtarget->isTargetELF() && !Subtarget->isThumb1Only() && + TM->Options.EnableFastISel) + addPass(createARMGlobalBaseRegPass()); return false; } diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index bc711dc35f..aa5ba46ab2 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -257,21 +257,11 @@ public: SmallVectorImpl<MCParsedAsmOperand*> &Operands); bool ParseDirective(AsmToken DirectiveID); - bool mnemonicIsValid(StringRef Mnemonic) { - return mnemonicIsValidImpl(Mnemonic); - } - unsigned checkTargetMatchPredicate(MCInst &Inst); bool MatchAndEmitInstruction(SMLoc IDLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCStreamer &Out); - - unsigned getMCInstOperandNum(unsigned Kind, MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands, - unsigned OperandNum, unsigned &NumMCOperands) { - return getMCInstOperandNumImpl(Kind, Inst, Operands, OperandNum, NumMCOperands); - } }; } // end anonymous namespace @@ -5676,6 +5666,20 @@ bool ARMAsmParser:: processInstruction(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { switch (Inst.getOpcode()) { + // Alias for alternate form of 'ADR Rd, #imm' instruction. + case ARM::ADDri: { + if (Inst.getOperand(1).getReg() != ARM::PC || + Inst.getOperand(5).getReg() != 0) + return false; + MCInst TmpInst; + TmpInst.setOpcode(ARM::ADR); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(2)); + TmpInst.addOperand(Inst.getOperand(3)); + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } // Aliases for alternate PC+imm syntax of LDR instructions. case ARM::t2LDRpcrel: Inst.setOpcode(ARM::t2LDRpci); diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index 34c79f945f..dfc424cda2 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -714,6 +714,15 @@ MCAsmBackend *llvm::createARMAsmBackend(const Target &T, StringRef TT, StringRef else if (TheTriple.getArchName() == "armv6" || TheTriple.getArchName() == "thumbv6") return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V6); + else if (TheTriple.getArchName() == "armv7f" || + TheTriple.getArchName() == "thumbv7f") + return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7F); + else if (TheTriple.getArchName() == "armv7k" || + TheTriple.getArchName() == "thumbv7k") + return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7K); + else if (TheTriple.getArchName() == "armv7s" || + TheTriple.getArchName() == "thumbv7s") + return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7S); return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index e581cc82fa..406317cee4 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -71,6 +71,14 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) { else // Use CPU to figure out the exact features. ARMArchFeature = "+v7"; + } else if (Len >= Idx+2 && TT[Idx+1] == 's') { + if (NoCPU) + // v7s: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureT2XtPk + // Swift + ARMArchFeature = "+v7,+swift,+neon,+db,+t2dsp,+t2xtpk"; + else + // Use CPU to figure out the exact features. + ARMArchFeature = "+v7"; } else { // v7 CPUs have lots of different feature sets. If no CPU is specified, // then assume v7a (e.g. cortex-a8) feature set. Otherwise, return diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index 95640f7df9..2154c93176 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -41,6 +41,12 @@ class ARMMachObjectWriter : public MCMachObjectTargetWriter { const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue); + bool requiresExternRelocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCFragment &Fragment, + unsigned RelocType, const MCSymbolData *SD, + uint64_t FixedValue); + public: ARMMachObjectWriter(bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype) @@ -305,6 +311,46 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer, Writer->addRelocation(Fragment->getParent(), MRE); } +bool ARMMachObjectWriter::requiresExternRelocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCFragment &Fragment, + unsigned RelocType, + const MCSymbolData *SD, + uint64_t FixedValue) { + // Most cases can be identified purely from the symbol. + if (Writer->doesSymbolRequireExternRelocation(SD)) + return true; + int64_t Value = (int64_t)FixedValue; // The displacement is signed. + int64_t Range; + switch (RelocType) { + default: + return false; + case macho::RIT_ARM_Branch24Bit: + // PC pre-adjustment of 8 for these instructions. + Value -= 8; + // ARM BL/BLX has a 25-bit offset. + Range = 0x1ffffff; + break; + case macho::RIT_ARM_ThumbBranch22Bit: + // PC pre-adjustment of 4 for these instructions. + Value -= 4; + // Thumb BL/BLX has a 24-bit offset. + Range = 0xffffff; + } + // BL/BLX also use external relocations when an internal relocation + // would result in the target being out of range. This gives the linker + // enough information to generate a branch island. + const MCSectionData &SymSD = Asm.getSectionData( + SD->getSymbol().getSection()); + Value += Writer->getSectionAddress(&SymSD); + Value -= Writer->getSectionAddress(Fragment.getParent()); + // If the resultant value would be out of range for an internal relocation, + // use an external instead. + if (Value > Range || Value < -(Range + 1)) + return true; + return false; +} + void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout, @@ -373,7 +419,8 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, } // Check whether we need an external or internal relocation. - if (Writer->doesSymbolRequireExternRelocation(SD)) { + if (requiresExternRelocation(Writer, Asm, *Fragment, RelocType, SD, + FixedValue)) { IsExtern = 1; Index = SD->getIndex(); diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp index 4ebba0e4d3..70643bcda3 100644 --- a/lib/Target/ARM/MLxExpansionPass.cpp +++ b/lib/Target/ARM/MLxExpansionPass.cpp @@ -52,6 +52,7 @@ namespace { MachineRegisterInfo *MRI; bool isLikeA9; + bool isSwift; unsigned MIIdx; MachineInstr* LastMIs[4]; SmallPtrSet<MachineInstr*, 4> IgnoreStall; @@ -60,6 +61,7 @@ namespace { void pushStack(MachineInstr *MI); MachineInstr *getAccDefMI(MachineInstr *MI) const; unsigned getDefReg(MachineInstr *MI) const; + bool hasLoopHazard(MachineInstr *MI) const; bool hasRAWHazard(unsigned Reg, MachineInstr *MI) const; bool FindMLxHazard(MachineInstr *MI); void ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI, @@ -135,6 +137,50 @@ unsigned MLxExpansion::getDefReg(MachineInstr *MI) const { return Reg; } +/// hasLoopHazard - Check whether an MLx instruction is chained to itself across +/// a single-MBB loop. +bool MLxExpansion::hasLoopHazard(MachineInstr *MI) const { + unsigned Reg = MI->getOperand(1).getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + return false; + + MachineBasicBlock *MBB = MI->getParent(); + MachineInstr *DefMI = MRI->getVRegDef(Reg); + while (true) { +outer_continue: + if (DefMI->getParent() != MBB) + break; + + if (DefMI->isPHI()) { + for (unsigned i = 1, e = DefMI->getNumOperands(); i < e; i += 2) { + if (DefMI->getOperand(i + 1).getMBB() == MBB) { + unsigned SrcReg = DefMI->getOperand(i).getReg(); + if (TargetRegisterInfo::isVirtualRegister(SrcReg)) { + DefMI = MRI->getVRegDef(SrcReg); + goto outer_continue; + } + } + } + } else if (DefMI->isCopyLike()) { + Reg = DefMI->getOperand(1).getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + DefMI = MRI->getVRegDef(Reg); + continue; + } + } else if (DefMI->isInsertSubreg()) { + Reg = DefMI->getOperand(2).getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + DefMI = MRI->getVRegDef(Reg); + continue; + } + } + + break; + } + + return DefMI == MI; +} + bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const { // FIXME: Detect integer instructions properly. const MCInstrDesc &MCID = MI->getDesc(); @@ -149,6 +195,19 @@ bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const { return false; } +static bool isFpMulInstruction(unsigned Opcode) { + switch (Opcode) { + case ARM::VMULS: + case ARM::VMULfd: + case ARM::VMULfq: + case ARM::VMULD: + case ARM::VMULslfd: + case ARM::VMULslfq: + return true; + default: + return false; + } +} bool MLxExpansion::FindMLxHazard(MachineInstr *MI) { if (NumExpand >= ExpandLimit) @@ -171,6 +230,12 @@ bool MLxExpansion::FindMLxHazard(MachineInstr *MI) { return true; } + // On Swift, we mostly care about hazards from multiplication instructions + // writing the accumulator and the pipelining of loop iterations by out-of- + // order execution. + if (isSwift) + return isFpMulInstruction(DefMI->getOpcode()) || hasLoopHazard(MI); + if (IgnoreStall.count(MI)) return false; @@ -316,7 +381,8 @@ bool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) { TRI = Fn.getTarget().getRegisterInfo(); MRI = &Fn.getRegInfo(); const ARMSubtarget *STI = &Fn.getTarget().getSubtarget<ARMSubtarget>(); - isLikeA9 = STI->isLikeA9(); + isLikeA9 = STI->isLikeA9() || STI->isSwift(); + isSwift = STI->isSwift(); bool Modified = false; for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index 703a128ee0..1c891f14d8 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1350,6 +1350,8 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine } else { setOperationAction(ISD::BR_JT, MVT::Other, Expand); } + // Increase jump tables cutover to 5, was 4. + setMinimumJumpTableEntries(5); setOperationAction(ISD::BR_CC, MVT::i32, Expand); diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp index daceb88076..9e22fd06d1 100644 --- a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp +++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp @@ -44,10 +44,6 @@ class MBlazeAsmParser : public MCTargetAsmParser { bool ParseDirectiveWord(unsigned Size, SMLoc L); - bool mnemonicIsValid(StringRef Mnemonic) { - return mnemonicIsValidImpl(Mnemonic); - } - bool MatchAndEmitInstruction(SMLoc IDLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCStreamer &Out); @@ -60,13 +56,6 @@ class MBlazeAsmParser : public MCTargetAsmParser { /// } - unsigned getMCInstOperandNum(unsigned Kind, MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands, - unsigned OperandNum, unsigned &NumMCOperands) { - return getMCInstOperandNumImpl(Kind, Inst, Operands, OperandNum, - NumMCOperands); - } - public: MBlazeAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser) : MCTargetAsmParser(), Parser(_Parser) {} diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 4cbd4c8e12..b1ada100f4 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -41,10 +41,6 @@ class MipsAsmParser : public MCTargetAsmParser { #define GET_ASSEMBLER_HEADER #include "MipsGenAsmMatcher.inc" - bool mnemonicIsValid(StringRef Mnemonic) { - return mnemonicIsValidImpl(Mnemonic); - } - bool MatchAndEmitInstruction(SMLoc IDLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCStreamer &Out); @@ -62,11 +58,6 @@ class MipsAsmParser : public MCTargetAsmParser { MipsAsmParser::OperandMatchResultTy parseMemOperand(SmallVectorImpl<MCParsedAsmOperand*>&); - unsigned - getMCInstOperandNum(unsigned Kind, MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands, - unsigned OperandNum, unsigned &NumMCOperands); - bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &, StringRef Mnemonic); @@ -265,18 +256,6 @@ public: }; } -unsigned MipsAsmParser:: -getMCInstOperandNum(unsigned Kind, MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands, - unsigned OperandNum, unsigned &NumMCOperands) { - assert (0 && "getMCInstOperandNum() not supported by the Mips target."); - // The Mips backend doesn't currently include the matcher implementation, so - // the getMCInstOperandNumImpl() is undefined. This is a temporary - // work around. - NumMCOperands = 0; - return 0; -} - bool MipsAsmParser:: MatchAndEmitInstruction(SMLoc IDLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands, diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt index 0f84358e26..7dec066fb6 100644 --- a/lib/Target/Mips/CMakeLists.txt +++ b/lib/Target/Mips/CMakeLists.txt @@ -11,6 +11,7 @@ tablegen(LLVM MipsGenCallingConv.inc -gen-callingconv) tablegen(LLVM MipsGenSubtargetInfo.inc -gen-subtarget) tablegen(LLVM MipsGenEDInfo.inc -gen-enhanced-disassembly-info) tablegen(LLVM MipsGenAsmMatcher.inc -gen-asm-matcher) +tablegen(LLVM MipsGenMCPseudoLowering.inc -gen-pseudo-lowering) add_public_tablegen_target(MipsCommonTableGen) add_llvm_target(MipsCodeGen diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp index aa5747209b..82dbcc5bcf 100644 --- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp +++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -108,6 +108,11 @@ static DecodeStatus DecodeCPURegsRegisterClass(MCInst &Inst, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeDSPRegsRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); + static DecodeStatus DecodeFGR64RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, @@ -138,6 +143,11 @@ static DecodeStatus DecodeHWRegs64RegisterClass(MCInst &Inst, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeACRegsRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); + static DecodeStatus DecodeBranchTarget(MCInst &Inst, unsigned Offset, uint64_t Address, @@ -346,6 +356,13 @@ static DecodeStatus DecodeCPURegsRegisterClass(MCInst &Inst, return MCDisassembler::Success; } +static DecodeStatus DecodeDSPRegsRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + return DecodeCPURegsRegisterClass(Inst, RegNo, Address, Decoder); +} + static DecodeStatus DecodeFGR64RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, @@ -463,6 +480,18 @@ static DecodeStatus DecodeHWRegs64RegisterClass(MCInst &Inst, return MCDisassembler::Success; } +static DecodeStatus DecodeACRegsRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo >= 4) + return MCDisassembler::Fail; + + unsigned Reg = getReg(Decoder, Mips::ACRegsRegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Reg)); + return MCDisassembler::Success; +} + static DecodeStatus DecodeBranchTarget(MCInst &Inst, unsigned Offset, uint64_t Address, diff --git a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h index 96033276d2..233214b461 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h +++ b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h @@ -122,14 +122,16 @@ inline static unsigned getMipsRegisterNumbering(unsigned RegEnum) { switch (RegEnum) { case Mips::ZERO: case Mips::ZERO_64: case Mips::F0: case Mips::D0_64: - case Mips::D0: case Mips::FCC0: + case Mips::D0: case Mips::FCC0: case Mips::AC0: return 0; case Mips::AT: case Mips::AT_64: case Mips::F1: case Mips::D1_64: + case Mips::AC1: return 1; case Mips::V0: case Mips::V0_64: case Mips::F2: case Mips::D2_64: - case Mips::D1: + case Mips::D1: case Mips::AC2: return 2; case Mips::V1: case Mips::V1_64: case Mips::F3: case Mips::D3_64: + case Mips::AC3: return 3; case Mips::A0: case Mips::A0_64: case Mips::F4: case Mips::D4_64: case Mips::D2: diff --git a/lib/Target/Mips/Makefile b/lib/Target/Mips/Makefile index 93de517316..bd8c517345 100644 --- a/lib/Target/Mips/Makefile +++ b/lib/Target/Mips/Makefile @@ -17,7 +17,7 @@ BUILT_SOURCES = MipsGenRegisterInfo.inc MipsGenInstrInfo.inc \ MipsGenDAGISel.inc MipsGenCallingConv.inc \ MipsGenSubtargetInfo.inc MipsGenMCCodeEmitter.inc \ MipsGenEDInfo.inc MipsGenDisassemblerTables.inc \ - MipsGenAsmMatcher.inc + MipsGenMCPseudoLowering.inc MipsGenAsmMatcher.inc DIRS = InstPrinter Disassembler AsmParser TargetInfo MCTargetDesc diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp index 9248032340..127c5b89e8 100644 --- a/lib/Target/Mips/Mips16InstrInfo.cpp +++ b/lib/Target/Mips/Mips16InstrInfo.cpp @@ -84,7 +84,15 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { - assert(false && "Implement this function."); + DebugLoc DL; + if (I != MBB.end()) DL = I->getDebugLoc(); + MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore); + unsigned Opc = 0; + if (Mips::CPU16RegsRegClass.hasSubClassEq(RC)) + Opc = Mips::SwRxSpImmX16; + assert(Opc && "Register class not handled!"); + BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO); } void Mips16InstrInfo:: @@ -92,7 +100,16 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { - assert(false && "Implement this function."); + DebugLoc DL; + if (I != MBB.end()) DL = I->getDebugLoc(); + MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad); + unsigned Opc = 0; + + if (Mips::CPU16RegsRegClass.hasSubClassEq(RC)) + Opc = Mips::LwRxSpImmX16; + assert(Opc && "Register class not handled!"); + BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(0) + .addMemOperand(MMO); } bool Mips16InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td index b0ab464a68..b866a5d225 100644 --- a/lib/Target/Mips/Mips16InstrInfo.td +++ b/lib/Target/Mips/Mips16InstrInfo.td @@ -29,10 +29,35 @@ class FI8_MOVR3216_ins<string asmstr, InstrItinClass itin>: // // I8_MOV32R instruction format (used only by MOV32R instruction) // + class FI8_MOV32R16_ins<string asmstr, InstrItinClass itin>: FI8_MOV32R16<(outs CPURegs:$r32), (ins CPU16Regs:$rz), !strconcat(asmstr, "\t$r32, $rz"), [], itin>; + +// +// RR-type instruction format +// + +class FRR16_ins<bits<5> f, string asmstr, InstrItinClass itin> : + FRR16<f, (outs CPU16Regs:$rx), (ins CPU16Regs:$ry), + !strconcat(asmstr, "\t$rx, $ry"), [], itin> { +} + +class FRxRxRy16_ins<bits<5> f, string asmstr, + InstrItinClass itin> : + FRR16<f, (outs CPU16Regs:$rz), (ins CPU16Regs:$rx, CPU16Regs:$ry), + !strconcat(asmstr, "\t$rz, $ry"), + [], itin> { + let Constraints = "$rx = $rz"; +} + +let rx=0 in +class FRR16_JALRC_RA_only_ins<bits<1> nd_, bits<1> l_, + string asmstr, InstrItinClass itin>: + FRR16_JALRC<nd_, l_, 1, (outs), (ins), !strconcat(asmstr, "\t $$ra"), + [], itin> ; + // // EXT-RI instruction format // @@ -56,30 +81,14 @@ class FEXT_2RI16_ins<bits<5> _op, string asmstr, !strconcat(asmstr, "\t$rx, $imm"), [], itin> { let Constraints = "$rx_ = $rx"; } - - -// -// RR-type instruction format -// - -class FRR16_ins<bits<5> f, string asmstr, InstrItinClass itin> : - FRR16<f, (outs CPU16Regs:$rx), (ins CPU16Regs:$ry), - !strconcat(asmstr, "\t$rx, $ry"), [], itin> { -} - -class FRxRxRy16_ins<bits<5> f, string asmstr, - InstrItinClass itin> : - FRR16<f, (outs CPU16Regs:$rz), (ins CPU16Regs:$rx, CPU16Regs:$ry), - !strconcat(asmstr, "\t$rz, $ry"), - [], itin> { - let Constraints = "$rx = $rz"; +// this has an explicit sp argument that we ignore to work around a problem +// in the compiler +class FEXT_RI16_SP_explicit_ins<bits<5> _op, string asmstr, + InstrItinClass itin>: + FEXT_RI16<_op, (outs CPU16Regs:$rx), (ins CPUSPReg:$ry, simm16:$imm), + !strconcat(asmstr, "\t$rx, $imm ( $ry ); "), [], itin> { } -let rx=0 in -class FRR16_JALRC_RA_only_ins<bits<1> nd_, bits<1> l_, - string asmstr, InstrItinClass itin>: - FRR16_JALRC<nd_, l_, 1, (outs), (ins), !strconcat(asmstr, "\t $$ra"), - [], itin> ; // // EXT-RRI instruction format @@ -122,6 +131,13 @@ class ArithLogic16Defs<bit isCom=0> { bit neverHasSideEffects = 1; } +class MayLoad { + bit mayLoad = 1; +} + +class MayStore { + bit mayStore = 1; +} // // Format: ADDIU rx, immediate MIPS16e @@ -169,28 +185,30 @@ def JrRa16: FRR16_JALRC_RA_only_ins<0, 0, "jr", IIAlu>; // Purpose: Load Byte (Extended) // To load a byte from memory as a signed value. // -def LbRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lb", mem16, IIAlu>; +def LbRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lb", mem16, IILoad>, MayLoad; // // Format: LBU ry, offset(rx) MIPS16e // Purpose: Load Byte Unsigned (Extended) // To load a byte from memory as a unsigned value. // -def LbuRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lbu", mem16, IIAlu>; +def LbuRxRyOffMemX16: + FEXT_RRI16_mem_ins<0b10100, "lbu", mem16, IILoad>, MayLoad; // // Format: LH ry, offset(rx) MIPS16e // Purpose: Load Halfword signed (Extended) // To load a halfword from memory as a signed value. // -def LhRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lh", mem16, IIAlu>; +def LhRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lh", mem16, IILoad>, MayLoad; // // Format: LHU ry, offset(rx) MIPS16e // Purpose: Load Halfword unsigned (Extended) // To load a halfword from memory as an unsigned value. // -def LhuRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lhu", mem16, IIAlu>; +def LhuRxRyOffMemX16: + FEXT_RRI16_mem_ins<0b10100, "lhu", mem16, IILoad>, MayLoad; // // Format: LI rx, immediate MIPS16e @@ -204,7 +222,13 @@ def LiRxImmX16: FEXT_RI16_ins<0b01101, "li", IIAlu>; // Purpose: Load Word (Extended) // To load a word from memory as a signed value. // -def LwRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lw", mem16, IIAlu>; +def LwRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lw", mem16, IILoad>, MayLoad; + +// Format: LW rx, offset(sp) MIPS16e +// Purpose: Load Word (SP-Relative, Extended) +// To load an SP-relative word from memory as a signed value. +// +def LwRxSpImmX16: FEXT_RI16_SP_explicit_ins<0b10110, "lw", IILoad>, MayLoad; // // Format: MOVE r32, rz MIPS16e @@ -257,7 +281,7 @@ def OrRxRxRy16: FRxRxRy16_ins<0b01101, "or", IIAlu>, ArithLogic16Defs<1>; let ra=1, s=0,s0=1,s1=1 in def RestoreRaF16: FI8_SVRS16<0b1, (outs), (ins uimm16:$frame_size), - "restore \t$$ra, $$s0, $$s1, $frame_size", [], IILoad > { + "restore \t$$ra, $$s0, $$s1, $frame_size", [], IILoad >, MayLoad { let isCodeGenOnly = 1; } @@ -271,7 +295,7 @@ def RestoreRaF16: let ra=1, s=1,s0=1,s1=1 in def SaveRaF16: FI8_SVRS16<0b1, (outs), (ins uimm16:$frame_size), - "save \t$$ra, $$s0, $$s1, $frame_size", [], IILoad > { + "save \t$$ra, $$s0, $$s1, $frame_size", [], IIStore >, MayStore { let isCodeGenOnly = 1; } // @@ -279,14 +303,16 @@ def SaveRaF16: // Purpose: Store Byte (Extended) // To store a byte to memory. // -def SbRxRyOffMemX16: FEXT_RRI16_mem2_ins<0b11000, "sb", mem16, IIAlu>; +def SbRxRyOffMemX16: + FEXT_RRI16_mem2_ins<0b11000, "sb", mem16, IIStore>, MayStore; // // Format: SH ry, offset(rx) MIPS16e // Purpose: Store Halfword (Extended) // To store a halfword to memory. // -def ShRxRyOffMemX16: FEXT_RRI16_mem2_ins<0b11001, "sh", mem16, IIAlu>; +def ShRxRyOffMemX16: + FEXT_RRI16_mem2_ins<0b11001, "sh", mem16, IIStore>, MayStore; // // Format: SLL rx, ry, sa MIPS16e @@ -350,9 +376,18 @@ def SubuRxRyRz16: FRRR16_ins<0b11, "subu", IIAlu>, ArithLogic16Defs<0>; // Purpose: Store Word (Extended) // To store a word to memory. // -def SwRxRyOffMemX16: FEXT_RRI16_mem2_ins<0b11011, "sw", mem16, IIAlu>; +def SwRxRyOffMemX16: + FEXT_RRI16_mem2_ins<0b11011, "sw", mem16, IIStore>, MayStore; // +// Format: SW rx, offset(sp) MIPS16e +// Purpose: Store Word rx (SP-Relative) +// To store an SP-relative word to memory. +// +def SwRxSpImmX16: FEXT_RI16_SP_explicit_ins<0b11010, "sw", IIStore>, MayStore; + +// +// // Format: XOR rx, ry MIPS16e // Purpose: Xor // To do a bitwise logical XOR. diff --git a/lib/Target/Mips/Mips16RegisterInfo.cpp b/lib/Target/Mips/Mips16RegisterInfo.cpp index 106e82fd38..bfc6b6cabf 100644 --- a/lib/Target/Mips/Mips16RegisterInfo.cpp +++ b/lib/Target/Mips/Mips16RegisterInfo.cpp @@ -57,7 +57,6 @@ void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II, MachineInstr &MI = *II; MachineFunction &MF = *MI.getParent()->getParent(); MachineFrameInfo *MFI = MF.getFrameInfo(); - MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); int MinCSFI = 0; @@ -77,8 +76,7 @@ void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II, // getFrameRegister() returns. unsigned FrameReg; - if (MipsFI->isOutArgFI(FrameIndex) || - (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI)) + if (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI) FrameReg = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP; else FrameReg = getFrameRegister(MF); @@ -94,12 +92,8 @@ void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II, // incoming argument, callee-saved register location or local variable. int64_t Offset; - if (MipsFI->isOutArgFI(FrameIndex)) - Offset = SPOffset; - else - Offset = SPOffset + (int64_t)StackSize; - - Offset += MI.getOperand(OpNo + 1).getImm(); + Offset = SPOffset + (int64_t)StackSize; + Offset += MI.getOperand(OpNo + 1).getImm(); DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n"); diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index 729b7921b4..1bf4a542d8 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -50,6 +50,13 @@ bool MipsAsmPrinter::runOnMachineFunction(MachineFunction &MF) { return true; } +bool MipsAsmPrinter::lowerOperand(const MachineOperand &MO, MCOperand &MCOp) { + MCOp = MCInstLowering.LowerOperand(MO); + return MCOp.isValid(); +} + +#include "MipsGenMCPseudoLowering.inc" + void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { if (MI->isDebugValue()) { SmallString<128> Str; @@ -59,6 +66,10 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } + // Do any auto-generated pseudo lowerings. + if (emitPseudoExpansionLowering(OutStreamer, MI)) + return; + MachineBasicBlock::const_instr_iterator I = MI; MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h index a426f55ba7..efed6357a4 100644 --- a/lib/Target/Mips/MipsAsmPrinter.h +++ b/lib/Target/Mips/MipsAsmPrinter.h @@ -32,6 +32,14 @@ class LLVM_LIBRARY_VISIBILITY MipsAsmPrinter : public AsmPrinter { void EmitInstrWithMacroNoAT(const MachineInstr *MI); +private: + // tblgen'erated function. + bool emitPseudoExpansionLowering(MCStreamer &OutStreamer, + const MachineInstr *MI); + + // lowerOperand - Convert a MachineOperand into the equivalent MCOperand. + bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp); + public: const MipsSubtarget *Subtarget; diff --git a/lib/Target/Mips/MipsDSPInstrFormats.td b/lib/Target/Mips/MipsDSPInstrFormats.td index d9bcccc617..8e01d06596 100644 --- a/lib/Target/Mips/MipsDSPInstrFormats.td +++ b/lib/Target/Mips/MipsDSPInstrFormats.td @@ -23,3 +23,287 @@ def REGIMM_OPCODE : Field6<0b000001>; class DSPInst : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther> { let Predicates = [HasDSP]; } + +class PseudoDSP<dag outs, dag ins, list<dag> pattern>: + MipsPseudo<outs, ins, "", pattern> { + let Predicates = [HasDSP]; +} + +// ADDU.QB sub-class format. +class ADDU_QB_FMT<bits<5> op> : DSPInst { + bits<5> rd; + bits<5> rs; + bits<5> rt; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-11} = rd; + let Inst{10-6} = op; + let Inst{5-0} = 0b010000; +} + +class RADDU_W_QB_FMT<bits<5> op> : DSPInst { + bits<5> rd; + bits<5> rs; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-21} = rs; + let Inst{20-16} = 0; + let Inst{15-11} = rd; + let Inst{10-6} = op; + let Inst{5-0} = 0b010000; +} + +// CMPU.EQ.QB sub-class format. +class CMP_EQ_QB_R2_FMT<bits<5> op> : DSPInst { + bits<5> rs; + bits<5> rt; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-11} = 0; + let Inst{10-6} = op; + let Inst{5-0} = 0b010001; +} + +class CMP_EQ_QB_R3_FMT<bits<5> op> : DSPInst { + bits<5> rs; + bits<5> rt; + bits<5> rd; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-11} = rd; + let Inst{10-6} = op; + let Inst{5-0} = 0b010001; +} + +class PRECR_SRA_PH_W_FMT<bits<5> op> : DSPInst { + bits<5> rs; + bits<5> rt; + bits<5> sa; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-11} = sa; + let Inst{10-6} = op; + let Inst{5-0} = 0b010001; +} + +// ABSQ_S.PH sub-class format. +class ABSQ_S_PH_R2_FMT<bits<5> op> : DSPInst { + bits<5> rd; + bits<5> rt; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-21} = 0; + let Inst{20-16} = rt; + let Inst{15-11} = rd; + let Inst{10-6} = op; + let Inst{5-0} = 0b010010; +} + + +class REPL_FMT<bits<5> op> : DSPInst { + bits<5> rd; + bits<10> imm; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-16} = imm; + let Inst{15-11} = rd; + let Inst{10-6} = op; + let Inst{5-0} = 0b010010; +} + +// SHLL.QB sub-class format. +class SHLL_QB_FMT<bits<5> op> : DSPInst { + bits<5> rd; + bits<5> rt; + bits<5> rs_sa; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-21} = rs_sa; + let Inst{20-16} = rt; + let Inst{15-11} = rd; + let Inst{10-6} = op; + let Inst{5-0} = 0b010011; +} + +// LX sub-class format. +class LX_FMT<bits<5> op> : DSPInst { + bits<5> rd; + bits<5> base; + bits<5> index; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-21} = base; + let Inst{20-16} = index; + let Inst{15-11} = rd; + let Inst{10-6} = op; + let Inst{5-0} = 0b001010; +} + +// ADDUH.QB sub-class format. +class ADDUH_QB_FMT<bits<5> op> : DSPInst { + bits<5> rd; + bits<5> rs; + bits<5> rt; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-11} = rd; + let Inst{10-6} = op; + let Inst{5-0} = 0b011000; +} + +// APPEND sub-class format. +class APPEND_FMT<bits<5> op> : DSPInst { + bits<5> rt; + bits<5> rs; + bits<5> sa; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-11} = sa; + let Inst{10-6} = op; + let Inst{5-0} = 0b110001; +} + +// DPA.W.PH sub-class format. +class DPA_W_PH_FMT<bits<5> op> : DSPInst { + bits<2> ac; + bits<5> rs; + bits<5> rt; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-13} = 0; + let Inst{12-11} = ac; + let Inst{10-6} = op; + let Inst{5-0} = 0b110000; +} + +// MULT sub-class format. +class MULT_FMT<bits<6> opcode, bits<6> funct> : DSPInst { + bits<2> ac; + bits<5> rs; + bits<5> rt; + + let Opcode = opcode; + + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-13} = 0; + let Inst{12-11} = ac; + let Inst{10-6} = 0; + let Inst{5-0} = funct; +} + +// EXTR.W sub-class format (type 1). +class EXTR_W_TY1_FMT<bits<5> op> : DSPInst { + bits<5> rt; + bits<2> ac; + bits<5> shift_rs; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-21} = shift_rs; + let Inst{20-16} = rt; + let Inst{15-13} = 0; + let Inst{12-11} = ac; + let Inst{10-6} = op; + let Inst{5-0} = 0b111000; +} + +// SHILO sub-class format. +class SHILO_R1_FMT<bits<5> op> : DSPInst { + bits<2> ac; + bits<6> shift; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-20} = shift; + let Inst{19-13} = 0; + let Inst{12-11} = ac; + let Inst{10-6} = op; + let Inst{5-0} = 0b111000; +} + +class SHILO_R2_FMT<bits<5> op> : DSPInst { + bits<2> ac; + bits<5> rs; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-21} = rs; + let Inst{20-13} = 0; + let Inst{12-11} = ac; + let Inst{10-6} = op; + let Inst{5-0} = 0b111000; +} + +class RDDSP_FMT<bits<5> op> : DSPInst { + bits<5> rd; + bits<10> mask; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-16} = mask; + let Inst{15-11} = rd; + let Inst{10-6} = op; + let Inst{5-0} = 0b111000; +} + +class WRDSP_FMT<bits<5> op> : DSPInst { + bits<5> rs; + bits<10> mask; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-21} = rs; + let Inst{20-11} = mask; + let Inst{10-6} = op; + let Inst{5-0} = 0b111000; +} + +class BPOSGE32_FMT<bits<5> op> : DSPInst { + bits<16> offset; + + let Opcode = REGIMM_OPCODE.V; + + let Inst{25-21} = 0; + let Inst{20-16} = op; + let Inst{15-0} = offset; +} + +// INSV sub-class format. +class INSV_FMT<bits<6> op> : DSPInst { + bits<5> rt; + bits<5> rs; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-6} = 0; + let Inst{5-0} = op; +} diff --git a/lib/Target/Mips/MipsDSPInstrInfo.td b/lib/Target/Mips/MipsDSPInstrInfo.td index 1a4fd8733a..ef9402865b 100644 --- a/lib/Target/Mips/MipsDSPInstrInfo.td +++ b/lib/Target/Mips/MipsDSPInstrInfo.td @@ -18,3 +18,1302 @@ def immZExt4 : ImmLeaf<i32, [{return isUInt<4>(Imm);}]>; def immZExt8 : ImmLeaf<i32, [{return isUInt<8>(Imm);}]>; def immZExt10 : ImmLeaf<i32, [{return isUInt<10>(Imm);}]>; def immSExt6 : ImmLeaf<i32, [{return isInt<6>(Imm);}]>; + +// Mips-specific dsp nodes +def SDT_MipsExtr : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>]>; +def SDT_MipsShilo : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; +def SDT_MipsDPA : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>]>; + +class MipsDSPBase<string Opc, SDTypeProfile Prof> : + SDNode<!strconcat("MipsISD::", Opc), Prof, + [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>; + +class MipsDSPSideEffectBase<string Opc, SDTypeProfile Prof> : + SDNode<!strconcat("MipsISD::", Opc), Prof, + [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPSideEffect]>; + +def MipsEXTP : MipsDSPSideEffectBase<"EXTP", SDT_MipsExtr>; +def MipsEXTPDP : MipsDSPSideEffectBase<"EXTPDP", SDT_MipsExtr>; +def MipsEXTR_S_H : MipsDSPSideEffectBase<"EXTR_S_H", SDT_MipsExtr>; +def MipsEXTR_W : MipsDSPSideEffectBase<"EXTR_W", SDT_MipsExtr>; +def MipsEXTR_R_W : MipsDSPSideEffectBase<"EXTR_R_W", SDT_MipsExtr>; +def MipsEXTR_RS_W : MipsDSPSideEffectBase<"EXTR_RS_W", SDT_MipsExtr>; + +def MipsSHILO : MipsDSPBase<"SHILO", SDT_MipsShilo>; +def MipsMTHLIP : MipsDSPBase<"MTHLIP", SDT_MipsShilo>; + +def MipsMULSAQ_S_W_PH : MipsDSPSideEffectBase<"MULSAQ_S_W_PH", SDT_MipsDPA>; +def MipsMAQ_S_W_PHL : MipsDSPSideEffectBase<"MAQ_S_W_PHL", SDT_MipsDPA>; +def MipsMAQ_S_W_PHR : MipsDSPSideEffectBase<"MAQ_S_W_PHR", SDT_MipsDPA>; +def MipsMAQ_SA_W_PHL : MipsDSPSideEffectBase<"MAQ_SA_W_PHL", SDT_MipsDPA>; +def MipsMAQ_SA_W_PHR : MipsDSPSideEffectBase<"MAQ_SA_W_PHR", SDT_MipsDPA>; + +def MipsDPAU_H_QBL : MipsDSPBase<"DPAU_H_QBL", SDT_MipsDPA>; +def MipsDPAU_H_QBR : MipsDSPBase<"DPAU_H_QBR", SDT_MipsDPA>; +def MipsDPSU_H_QBL : MipsDSPBase<"DPSU_H_QBL", SDT_MipsDPA>; +def MipsDPSU_H_QBR : MipsDSPBase<"DPSU_H_QBR", SDT_MipsDPA>; +def MipsDPAQ_S_W_PH : MipsDSPSideEffectBase<"DPAQ_S_W_PH", SDT_MipsDPA>; +def MipsDPSQ_S_W_PH : MipsDSPSideEffectBase<"DPSQ_S_W_PH", SDT_MipsDPA>; +def MipsDPAQ_SA_L_W : MipsDSPSideEffectBase<"DPAQ_SA_L_W", SDT_MipsDPA>; +def MipsDPSQ_SA_L_W : MipsDSPSideEffectBase<"DPSQ_SA_L_W", SDT_MipsDPA>; + +def MipsDPA_W_PH : MipsDSPBase<"DPA_W_PH", SDT_MipsDPA>; +def MipsDPS_W_PH : MipsDSPBase<"DPS_W_PH", SDT_MipsDPA>; +def MipsDPAQX_S_W_PH : MipsDSPSideEffectBase<"DPAQX_S_W_PH", SDT_MipsDPA>; +def MipsDPAQX_SA_W_PH : MipsDSPSideEffectBase<"DPAQX_SA_W_PH", SDT_MipsDPA>; +def MipsDPAX_W_PH : MipsDSPBase<"DPAX_W_PH", SDT_MipsDPA>; +def MipsDPSX_W_PH : MipsDSPBase<"DPSX_W_PH", SDT_MipsDPA>; +def MipsDPSQX_S_W_PH : MipsDSPSideEffectBase<"DPSQX_S_W_PH", SDT_MipsDPA>; +def MipsDPSQX_SA_W_PH : MipsDSPSideEffectBase<"DPSQX_SA_W_PH", SDT_MipsDPA>; +def MipsMULSA_W_PH : MipsDSPBase<"MULSA_W_PH", SDT_MipsDPA>; + +def MipsMULT : MipsDSPBase<"MULT", SDT_MipsDPA>; +def MipsMULTU : MipsDSPBase<"MULTU", SDT_MipsDPA>; +def MipsMADD_DSP : MipsDSPBase<"MADD_DSP", SDT_MipsDPA>; +def MipsMADDU_DSP : MipsDSPBase<"MADDU_DSP", SDT_MipsDPA>; +def MipsMSUB_DSP : MipsDSPBase<"MSUB_DSP", SDT_MipsDPA>; +def MipsMSUBU_DSP : MipsDSPBase<"MSUBU_DSP", SDT_MipsDPA>; + +// Flags. +class IsCommutable { + bit isCommutable = 1; +} + +class UseAC { + list<Register> Uses = [AC0]; +} + +class UseDSPCtrl { + list<Register> Uses = [DSPCtrl]; +} + +class ClearDefs { + list<Register> Defs = []; +} + +// Instruction encoding. +class ADDU_QB_ENC : ADDU_QB_FMT<0b00000>; +class ADDU_S_QB_ENC : ADDU_QB_FMT<0b00100>; +class SUBU_QB_ENC : ADDU_QB_FMT<0b00001>; +class SUBU_S_QB_ENC : ADDU_QB_FMT<0b00101>; +class ADDQ_PH_ENC : ADDU_QB_FMT<0b01010>; +class ADDQ_S_PH_ENC : ADDU_QB_FMT<0b01110>; +class SUBQ_PH_ENC : ADDU_QB_FMT<0b01011>; +class SUBQ_S_PH_ENC : ADDU_QB_FMT<0b01111>; +class ADDQ_S_W_ENC : ADDU_QB_FMT<0b10110>; +class SUBQ_S_W_ENC : ADDU_QB_FMT<0b10111>; +class ADDSC_ENC : ADDU_QB_FMT<0b10000>; +class ADDWC_ENC : ADDU_QB_FMT<0b10001>; +class MODSUB_ENC : ADDU_QB_FMT<0b10010>; +class RADDU_W_QB_ENC : RADDU_W_QB_FMT<0b10100>; +class ABSQ_S_PH_ENC : ABSQ_S_PH_R2_FMT<0b01001>; +class ABSQ_S_W_ENC : ABSQ_S_PH_R2_FMT<0b10001>; +class PRECRQ_QB_PH_ENC : CMP_EQ_QB_R3_FMT<0b01100>; +class PRECRQ_PH_W_ENC : CMP_EQ_QB_R3_FMT<0b10100>; +class PRECRQ_RS_PH_W_ENC : CMP_EQ_QB_R3_FMT<0b10101>; +class PRECRQU_S_QB_PH_ENC : CMP_EQ_QB_R3_FMT<0b01111>; +class PRECEQ_W_PHL_ENC : ABSQ_S_PH_R2_FMT<0b01100>; +class PRECEQ_W_PHR_ENC : ABSQ_S_PH_R2_FMT<0b01101>; +class PRECEQU_PH_QBL_ENC : ABSQ_S_PH_R2_FMT<0b00100>; +class PRECEQU_PH_QBR_ENC : ABSQ_S_PH_R2_FMT<0b00101>; +class PRECEQU_PH_QBLA_ENC : ABSQ_S_PH_R2_FMT<0b00110>; +class PRECEQU_PH_QBRA_ENC : ABSQ_S_PH_R2_FMT<0b00111>; +class PRECEU_PH_QBL_ENC : ABSQ_S_PH_R2_FMT<0b11100>; +class PRECEU_PH_QBR_ENC : ABSQ_S_PH_R2_FMT<0b11101>; +class PRECEU_PH_QBLA_ENC : ABSQ_S_PH_R2_FMT<0b11110>; +class PRECEU_PH_QBRA_ENC : ABSQ_S_PH_R2_FMT<0b11111>; +class SHLL_QB_ENC : SHLL_QB_FMT<0b00000>; +class SHLLV_QB_ENC : SHLL_QB_FMT<0b00010>; +class SHRL_QB_ENC : SHLL_QB_FMT<0b00001>; +class SHRLV_QB_ENC : SHLL_QB_FMT<0b00011>; +class SHLL_PH_ENC : SHLL_QB_FMT<0b01000>; +class SHLLV_PH_ENC : SHLL_QB_FMT<0b01010>; +class SHLL_S_PH_ENC : SHLL_QB_FMT<0b01100>; +class SHLLV_S_PH_ENC : SHLL_QB_FMT<0b01110>; +class SHRA_PH_ENC : SHLL_QB_FMT<0b01001>; +class SHRAV_PH_ENC : SHLL_QB_FMT<0b01011>; +class SHRA_R_PH_ENC : SHLL_QB_FMT<0b01101>; +class SHRAV_R_PH_ENC : SHLL_QB_FMT<0b01111>; +class SHLL_S_W_ENC : SHLL_QB_FMT<0b10100>; +class SHLLV_S_W_ENC : SHLL_QB_FMT<0b10110>; +class SHRA_R_W_ENC : SHLL_QB_FMT<0b10101>; +class SHRAV_R_W_ENC : SHLL_QB_FMT<0b10111>; +class MULEU_S_PH_QBL_ENC : ADDU_QB_FMT<0b00110>; +class MULEU_S_PH_QBR_ENC : ADDU_QB_FMT<0b00111>; +class MULEQ_S_W_PHL_ENC : ADDU_QB_FMT<0b11100>; +class MULEQ_S_W_PHR_ENC : ADDU_QB_FMT<0b11101>; +class MULQ_RS_PH_ENC : ADDU_QB_FMT<0b11111>; +class MULSAQ_S_W_PH_ENC : DPA_W_PH_FMT<0b00110>; +class MAQ_S_W_PHL_ENC : DPA_W_PH_FMT<0b10100>; +class MAQ_S_W_PHR_ENC : DPA_W_PH_FMT<0b10110>; +class MAQ_SA_W_PHL_ENC : DPA_W_PH_FMT<0b10000>; +class MAQ_SA_W_PHR_ENC : DPA_W_PH_FMT<0b10010>; +class DPAU_H_QBL_ENC : DPA_W_PH_FMT<0b00011>; +class DPAU_H_QBR_ENC : DPA_W_PH_FMT<0b00111>; +class DPSU_H_QBL_ENC : DPA_W_PH_FMT<0b01011>; +class DPSU_H_QBR_ENC : DPA_W_PH_FMT<0b01111>; +class DPAQ_S_W_PH_ENC : DPA_W_PH_FMT<0b00100>; +class DPSQ_S_W_PH_ENC : DPA_W_PH_FMT<0b00101>; +class DPAQ_SA_L_W_ENC : DPA_W_PH_FMT<0b01100>; +class DPSQ_SA_L_W_ENC : DPA_W_PH_FMT<0b01101>; +class MULT_DSP_ENC : MULT_FMT<0b000000, 0b011000>; +class MULTU_DSP_ENC : MULT_FMT<0b000000, 0b011001>; +class MADD_DSP_ENC : MULT_FMT<0b011100, 0b000000>; +class MADDU_DSP_ENC : MULT_FMT<0b011100, 0b000001>; +class MSUB_DSP_ENC : MULT_FMT<0b011100, 0b000100>; +class MSUBU_DSP_ENC : MULT_FMT<0b011100, 0b000101>; +class CMPU_EQ_QB_ENC : CMP_EQ_QB_R2_FMT<0b00000>; +class CMPU_LT_QB_ENC : CMP_EQ_QB_R2_FMT<0b00001>; +class CMPU_LE_QB_ENC : CMP_EQ_QB_R2_FMT<0b00010>; +class CMPGU_EQ_QB_ENC : CMP_EQ_QB_R3_FMT<0b00100>; +class CMPGU_LT_QB_ENC : CMP_EQ_QB_R3_FMT<0b00101>; +class CMPGU_LE_QB_ENC : CMP_EQ_QB_R3_FMT<0b00110>; +class CMP_EQ_PH_ENC : CMP_EQ_QB_R2_FMT<0b01000>; +class CMP_LT_PH_ENC : CMP_EQ_QB_R2_FMT<0b01001>; +class CMP_LE_PH_ENC : CMP_EQ_QB_R2_FMT<0b01010>; +class BITREV_ENC : ABSQ_S_PH_R2_FMT<0b11011>; +class PACKRL_PH_ENC : CMP_EQ_QB_R3_FMT<0b01110>; +class REPL_QB_ENC : REPL_FMT<0b00010>; +class REPL_PH_ENC : REPL_FMT<0b01010>; +class REPLV_QB_ENC : ABSQ_S_PH_R2_FMT<0b00011>; +class REPLV_PH_ENC : ABSQ_S_PH_R2_FMT<0b01011>; +class PICK_QB_ENC : CMP_EQ_QB_R3_FMT<0b00011>; +class PICK_PH_ENC : CMP_EQ_QB_R3_FMT<0b01011>; +class LWX_ENC : LX_FMT<0b00000>; +class LHX_ENC : LX_FMT<0b00100>; +class LBUX_ENC : LX_FMT<0b00110>; +class BPOSGE32_ENC : BPOSGE32_FMT<0b11100>; +class INSV_ENC : INSV_FMT<0b001100>; + +class EXTP_ENC : EXTR_W_TY1_FMT<0b00010>; +class EXTPV_ENC : EXTR_W_TY1_FMT<0b00011>; +class EXTPDP_ENC : EXTR_W_TY1_FMT<0b01010>; +class EXTPDPV_ENC : EXTR_W_TY1_FMT<0b01011>; +class EXTR_W_ENC : EXTR_W_TY1_FMT<0b00000>; +class EXTRV_W_ENC : EXTR_W_TY1_FMT<0b00001>; +class EXTR_R_W_ENC : EXTR_W_TY1_FMT<0b00100>; +class EXTRV_R_W_ENC : EXTR_W_TY1_FMT<0b00101>; +class EXTR_RS_W_ENC : EXTR_W_TY1_FMT<0b00110>; +class EXTRV_RS_W_ENC : EXTR_W_TY1_FMT<0b00111>; +class EXTR_S_H_ENC : EXTR_W_TY1_FMT<0b01110>; +class EXTRV_S_H_ENC : EXTR_W_TY1_FMT<0b01111>; +class SHILO_ENC : SHILO_R1_FMT<0b11010>; +class SHILOV_ENC : SHILO_R2_FMT<0b11011>; +class MTHLIP_ENC : SHILO_R2_FMT<0b11111>; + +class RDDSP_ENC : RDDSP_FMT<0b10010>; +class WRDSP_ENC : WRDSP_FMT<0b10011>; +class ADDU_PH_ENC : ADDU_QB_FMT<0b01000>; +class ADDU_S_PH_ENC : ADDU_QB_FMT<0b01100>; +class SUBU_PH_ENC : ADDU_QB_FMT<0b01001>; +class SUBU_S_PH_ENC : ADDU_QB_FMT<0b01101>; +class CMPGDU_EQ_QB_ENC : CMP_EQ_QB_R3_FMT<0b11000>; +class CMPGDU_LT_QB_ENC : CMP_EQ_QB_R3_FMT<0b11001>; +class CMPGDU_LE_QB_ENC : CMP_EQ_QB_R3_FMT<0b11010>; +class ABSQ_S_QB_ENC : ABSQ_S_PH_R2_FMT<0b00001>; +class ADDUH_QB_ENC : ADDUH_QB_FMT<0b00000>; +class ADDUH_R_QB_ENC : ADDUH_QB_FMT<0b00010>; +class SUBUH_QB_ENC : ADDUH_QB_FMT<0b00001>; +class SUBUH_R_QB_ENC : ADDUH_QB_FMT<0b00011>; +class ADDQH_PH_ENC : ADDUH_QB_FMT<0b01000>; +class ADDQH_R_PH_ENC : ADDUH_QB_FMT<0b01010>; +class SUBQH_PH_ENC : ADDUH_QB_FMT<0b01001>; +class SUBQH_R_PH_ENC : ADDUH_QB_FMT<0b01011>; +class ADDQH_W_ENC : ADDUH_QB_FMT<0b10000>; +class ADDQH_R_W_ENC : ADDUH_QB_FMT<0b10010>; +class SUBQH_W_ENC : ADDUH_QB_FMT<0b10001>; +class SUBQH_R_W_ENC : ADDUH_QB_FMT<0b10011>; +class MUL_PH_ENC : ADDUH_QB_FMT<0b01100>; +class MUL_S_PH_ENC : ADDUH_QB_FMT<0b01110>; +class MULQ_S_W_ENC : ADDUH_QB_FMT<0b10110>; +class MULQ_RS_W_ENC : ADDUH_QB_FMT<0b10111>; +class MULQ_S_PH_ENC : ADDU_QB_FMT<0b11110>; +class DPA_W_PH_ENC : DPA_W_PH_FMT<0b00000>; +class DPS_W_PH_ENC : DPA_W_PH_FMT<0b00001>; +class DPAQX_S_W_PH_ENC : DPA_W_PH_FMT<0b11000>; +class DPAQX_SA_W_PH_ENC : DPA_W_PH_FMT<0b11010>; +class DPAX_W_PH_ENC : DPA_W_PH_FMT<0b01000>; +class DPSX_W_PH_ENC : DPA_W_PH_FMT<0b01001>; +class DPSQX_S_W_PH_ENC : DPA_W_PH_FMT<0b11001>; +class DPSQX_SA_W_PH_ENC : DPA_W_PH_FMT<0b11011>; +class MULSA_W_PH_ENC : DPA_W_PH_FMT<0b00010>; +class PRECR_QB_PH_ENC : CMP_EQ_QB_R3_FMT<0b01101>; +class PRECR_SRA_PH_W_ENC : PRECR_SRA_PH_W_FMT<0b11110>; +class PRECR_SRA_R_PH_W_ENC : PRECR_SRA_PH_W_FMT<0b11111>; +class SHRA_QB_ENC : SHLL_QB_FMT<0b00100>; +class SHRAV_QB_ENC : SHLL_QB_FMT<0b00110>; +class SHRA_R_QB_ENC : SHLL_QB_FMT<0b00101>; +class SHRAV_R_QB_ENC : SHLL_QB_FMT<0b00111>; +class SHRL_PH_ENC : SHLL_QB_FMT<0b11001>; +class SHRLV_PH_ENC : SHLL_QB_FMT<0b11011>; +class APPEND_ENC : APPEND_FMT<0b00000>; +class BALIGN_ENC : APPEND_FMT<0b10000>; +class PREPEND_ENC : APPEND_FMT<0b00001>; + +// Instruction desc. +class ADDU_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + InstrItinClass itin, RegisterClass RCD, + RegisterClass RCS, RegisterClass RCT = RCS> { + dag OutOperandList = (outs RCD:$rd); + dag InOperandList = (ins RCS:$rs, RCT:$rt); + string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt"); + list<dag> Pattern = [(set RCD:$rd, (OpNode RCS:$rs, RCT:$rt))]; + InstrItinClass Itinerary = itin; + list<Register> Defs = [DSPCtrl]; +} + +class RADDU_W_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + InstrItinClass itin, RegisterClass RCD, + RegisterClass RCS = RCD> { + dag OutOperandList = (outs RCD:$rd); + dag InOperandList = (ins RCS:$rs); + string AsmString = !strconcat(instr_asm, "\t$rd, $rs"); + list<dag> Pattern = [(set RCD:$rd, (OpNode RCS:$rs))]; + InstrItinClass Itinerary = itin; + list<Register> Defs = [DSPCtrl]; +} + +class CMP_EQ_QB_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + InstrItinClass itin, RegisterClass RCS, + RegisterClass RCT = RCS> { + dag OutOperandList = (outs); + dag InOperandList = (ins RCS:$rs, RCT:$rt); + string AsmString = !strconcat(instr_asm, "\t$rs, $rt"); + list<dag> Pattern = [(OpNode RCS:$rs, RCT:$rt)]; + InstrItinClass Itinerary = itin; + list<Register> Defs = [DSPCtrl]; +} + +class CMP_EQ_QB_R3_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + InstrItinClass itin, RegisterClass RCD, + RegisterClass RCS, RegisterClass RCT = RCS> { + dag OutOperandList = (outs RCD:$rd); + dag InOperandList = (ins RCS:$rs, RCT:$rt); + string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt"); + list<dag> Pattern = [(set RCD:$rd, (OpNode RCS:$rs, RCT:$rt))]; + InstrItinClass Itinerary = itin; + list<Register> Defs = [DSPCtrl]; +} + +class PRECR_SRA_PH_W_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + InstrItinClass itin, RegisterClass RCT, + RegisterClass RCS = RCT> { + dag OutOperandList = (outs RCT:$rt); + dag InOperandList = (ins RCS:$rs, shamt:$sa, RCS:$src); + string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $sa"); + list<dag> Pattern = [(set RCT:$rt, (OpNode RCS:$src, RCS:$rs, immZExt5:$sa))]; + InstrItinClass Itinerary = itin; + list<Register> Defs = [DSPCtrl]; + string Constraints = "$src = $rt"; +} + +class ABSQ_S_PH_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + InstrItinClass itin, RegisterClass RCD, + RegisterClass RCT = RCD> { + dag OutOperandList = (outs RCD:$rd); + dag InOperandList = (ins RCT:$rt); + string AsmString = !strconcat(instr_asm, "\t$rd, $rt"); + list<dag> Pattern = [(set RCD:$rd, (OpNode RCT:$rt))]; + InstrItinClass Itinerary = itin; + list<Register> Defs = [DSPCtrl]; +} + +class REPL_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + ImmLeaf immPat, InstrItinClass itin, RegisterClass RC> { + dag OutOperandList = (outs RC:$rd); + dag InOperandList = (ins uimm16:$imm); + string AsmString = !strconcat(instr_asm, "\t$rd, $imm"); + list<dag> Pattern = [(set RC:$rd, (OpNode immPat:$imm))]; + InstrItinClass Itinerary = itin; + list<Register> Defs = [DSPCtrl]; +} + +class SHLL_QB_R3_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + InstrItinClass itin, RegisterClass RC> { + dag OutOperandList = (outs RC:$rd); + dag InOperandList = (ins RC:$rt, CPURegs:$rs_sa); + string AsmString = !strconcat(instr_asm, "\t$rd, $rt, $rs_sa"); + list<dag> Pattern = [(set RC:$rd, (OpNode RC:$rt, CPURegs:$rs_sa))]; + InstrItinClass Itinerary = itin; + list<Register> Defs = [DSPCtrl]; +} + +class SHLL_QB_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + SDPatternOperator ImmPat, InstrItinClass itin, + RegisterClass RC> { + dag OutOperandList = (outs RC:$rd); + dag InOperandList = (ins RC:$rt, uimm16:$rs_sa); + string AsmString = !strconcat(instr_asm, "\t$rd, $rt, $rs_sa"); + list<dag> Pattern = [(set RC:$rd, (OpNode RC:$rt, ImmPat:$rs_sa))]; + InstrItinClass Itinerary = itin; + list<Register> Defs = [DSPCtrl]; +} + +class LX_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + InstrItinClass itin> { + dag OutOperandList = (outs CPURegs:$rd); + dag InOperandList = (ins CPURegs:$base, CPURegs:$index); + string AsmString = !strconcat(instr_asm, "\t$rd, ${index}(${base})"); + list<dag> Pattern = [(set CPURegs:$rd, + (OpNode CPURegs:$base, CPURegs:$index))]; + InstrItinClass Itinerary = itin; + list<Register> Defs = [DSPCtrl]; + bit mayLoad = 1; +} + +class ADDUH_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + InstrItinClass itin, RegisterClass RCD, + RegisterClass RCS = RCD, RegisterClass RCT = RCD> { + dag OutOperandList = (outs RCD:$rd); + dag InOperandList = (ins RCS:$rs, RCT:$rt); + string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt"); + list<dag> Pattern = [(set RCD:$rd, (OpNode RCS:$rs, RCT:$rt))]; + InstrItinClass Itinerary = itin; + list<Register> Defs = [DSPCtrl]; +} + +class APPEND_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + SDPatternOperator ImmOp, InstrItinClass itin> { + dag OutOperandList = (outs CPURegs:$rt); + dag InOperandList = (ins CPURegs:$rs, shamt:$sa, CPURegs:$src); + string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $sa"); + list<dag> Pattern = [(set CPURegs:$rt, + (OpNode CPURegs:$src, CPURegs:$rs, ImmOp:$sa))]; + InstrItinClass Itinerary = itin; + list<Register> Defs = [DSPCtrl]; + string Constraints = "$src = $rt"; +} + +class EXTR_W_TY1_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + InstrItinClass itin> { + dag OutOperandList = (outs CPURegs:$rt); + dag InOperandList = (ins ACRegs:$ac, CPURegs:$shift_rs); + string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs"); + InstrItinClass Itinerary = itin; + list<Register> Defs = [DSPCtrl]; +} + +class EXTR_W_TY1_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + InstrItinClass itin> { + dag OutOperandList = (outs CPURegs:$rt); + dag InOperandList = (ins ACRegs:$ac, uimm16:$shift_rs); + string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs"); + InstrItinClass Itinerary = itin; + list<Register> Defs = [DSPCtrl]; +} + +class SHILO_R1_PSEUDO_BASE<SDPatternOperator OpNode, InstrItinClass itin, + Instruction realinst> : + PseudoDSP<(outs), (ins simm16:$shift), [(OpNode immSExt6:$shift)]>, + PseudoInstExpansion<(realinst AC0, simm16:$shift)> { + list<Register> Defs = [DSPCtrl, AC0]; + list<Register> Uses = [AC0]; + InstrItinClass Itinerary = itin; +} + +class SHILO_R1_DESC_BASE<string instr_asm> { + dag OutOperandList = (outs ACRegs:$ac); + dag InOperandList = (ins simm16:$shift); + string AsmString = !strconcat(instr_asm, "\t$ac, $shift"); +} + +class SHILO_R2_PSEUDO_BASE<SDPatternOperator OpNode, InstrItinClass itin, + Instruction realinst> : + PseudoDSP<(outs), (ins CPURegs:$rs), [(OpNode CPURegs:$rs)]>, + PseudoInstExpansion<(realinst AC0, CPURegs:$rs)> { + list<Register> Defs = [DSPCtrl, AC0]; + list<Register> Uses = [AC0]; + InstrItinClass Itinerary = itin; +} + +class SHILO_R2_DESC_BASE<string instr_asm> { + dag OutOperandList = (outs ACRegs:$ac); + dag InOperandList = (ins CPURegs:$rs); + string AsmString = !strconcat(instr_asm, "\t$ac, $rs"); +} + +class MTHLIP_DESC_BASE<string instr_asm> { + dag OutOperandList = (outs ACRegs:$ac); + dag InOperandList = (ins CPURegs:$rs); + string AsmString = !strconcat(instr_asm, "\t$rs, $ac"); +} + +class RDDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + InstrItinClass itin> { + dag OutOperandList = (outs CPURegs:$rd); + dag InOperandList = (ins uimm16:$mask); + string AsmString = !strconcat(instr_asm, "\t$rd, $mask"); + list<dag> Pattern = [(set CPURegs:$rd, (OpNode immZExt10:$mask))]; + InstrItinClass Itinerary = itin; + list<Register> Uses = [DSPCtrl]; +} + +class WRDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + InstrItinClass itin> { + dag OutOperandList = (outs); + dag InOperandList = (ins CPURegs:$rs, uimm16:$mask); + string AsmString = !strconcat(instr_asm, "\t$rs, $mask"); + list<dag> Pattern = [(OpNode CPURegs:$rs, immZExt10:$mask)]; + InstrItinClass Itinerary = itin; + list<Register> Defs = [DSPCtrl]; +} + +class DPA_W_PH_PSEUDO_BASE<SDPatternOperator OpNode, InstrItinClass itin, + Instruction realinst> : + PseudoDSP<(outs), (ins CPURegs:$rs, CPURegs:$rt), + [(OpNode CPURegs:$rs, CPURegs:$rt)]>, + PseudoInstExpansion<(realinst AC0, CPURegs:$rs, CPURegs:$rt)> { + list<Register> Defs = [DSPCtrl, AC0]; + list<Register> Uses = [AC0]; + InstrItinClass Itinerary = itin; +} + +class DPA_W_PH_DESC_BASE<string instr_asm> { + dag OutOperandList = (outs ACRegs:$ac); + dag InOperandList = (ins CPURegs:$rs, CPURegs:$rt); + string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt"); +} + +class MULT_PSEUDO_BASE<SDPatternOperator OpNode, InstrItinClass itin, + Instruction realinst> : + PseudoDSP<(outs), (ins CPURegs:$rs, CPURegs:$rt), + [(OpNode CPURegs:$rs, CPURegs:$rt)]>, + PseudoInstExpansion<(realinst AC0, CPURegs:$rs, CPURegs:$rt)> { + list<Register> Defs = [DSPCtrl, AC0]; + InstrItinClass Itinerary = itin; +} + +class MULT_DESC_BASE<string instr_asm> { + dag OutOperandList = (outs ACRegs:$ac); + dag InOperandList = (ins CPURegs:$rs, CPURegs:$rt); + string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt"); +} + +class BPOSGE32_PSEUDO_DESC_BASE<SDPatternOperator OpNode, InstrItinClass itin> : + MipsPseudo<(outs CPURegs:$dst), (ins), "", [(set CPURegs:$dst, (OpNode))]> { + list<Register> Uses = [DSPCtrl]; + bit usesCustomInserter = 1; +} + +class BPOSGE32_DESC_BASE<string instr_asm, InstrItinClass itin> { + dag OutOperandList = (outs); + dag InOperandList = (ins brtarget:$offset); + string AsmString = !strconcat(instr_asm, "\t$offset"); + InstrItinClass Itinerary = itin; + list<Register> Uses = [DSPCtrl]; + bit isBranch = 1; + bit isTerminator = 1; + bit hasDelaySlot = 1; +} + +class INSV_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + InstrItinClass itin> { + dag OutOperandList = (outs CPURegs:$rt); + dag InOperandList = (ins CPURegs:$src, CPURegs:$rs); + string AsmString = !strconcat(instr_asm, "\t$rt, $rs"); + list<dag> Pattern = [(set CPURegs:$rt, (OpNode CPURegs:$src, CPURegs:$rs))]; + InstrItinClass Itinerary = itin; + list<Register> Uses = [DSPCtrl]; + string Constraints = "$src = $rt"; +} + +//===----------------------------------------------------------------------===// +// MIPS DSP Rev 1 +//===----------------------------------------------------------------------===// + +// Addition/subtraction +class ADDU_QB_DESC : ADDU_QB_DESC_BASE<"addu.qb", int_mips_addu_qb, NoItinerary, + DSPRegs, DSPRegs>, IsCommutable; + +class ADDU_S_QB_DESC : ADDU_QB_DESC_BASE<"addu_s.qb", int_mips_addu_s_qb, + NoItinerary, DSPRegs, DSPRegs>, + IsCommutable; + +class SUBU_QB_DESC : ADDU_QB_DESC_BASE<"subu.qb", int_mips_subu_qb, NoItinerary, + DSPRegs, DSPRegs>; + +class SUBU_S_QB_DESC : ADDU_QB_DESC_BASE<"subu_s.qb", int_mips_subu_s_qb, + NoItinerary, DSPRegs, DSPRegs>; + +class ADDQ_PH_DESC : ADDU_QB_DESC_BASE<"addq.ph", int_mips_addq_ph, NoItinerary, + DSPRegs, DSPRegs>, IsCommutable; + +class ADDQ_S_PH_DESC : ADDU_QB_DESC_BASE<"addq_s.ph", int_mips_addq_s_ph, + NoItinerary, DSPRegs, DSPRegs>, + IsCommutable; + +class SUBQ_PH_DESC : ADDU_QB_DESC_BASE<"subq.ph", int_mips_subq_ph, NoItinerary, + DSPRegs, DSPRegs>; + +class SUBQ_S_PH_DESC : ADDU_QB_DESC_BASE<"subq_s.ph", int_mips_subq_s_ph, + NoItinerary, DSPRegs, DSPRegs>; + +class ADDQ_S_W_DESC : ADDU_QB_DESC_BASE<"addq_s.w", int_mips_addq_s_w, + NoItinerary, CPURegs, CPURegs>, + IsCommutable; + +class SUBQ_S_W_DESC : ADDU_QB_DESC_BASE<"subq_s.w", int_mips_subq_s_w, + NoItinerary, CPURegs, CPURegs>; + +class ADDSC_DESC : ADDU_QB_DESC_BASE<"addsc", int_mips_addsc, NoItinerary, + CPURegs, CPURegs>, IsCommutable; + +class ADDWC_DESC : ADDU_QB_DESC_BASE<"addwc", int_mips_addwc, NoItinerary, + CPURegs, CPURegs>, + IsCommutable, UseDSPCtrl; + +class MODSUB_DESC : ADDU_QB_DESC_BASE<"modsub", int_mips_modsub, NoItinerary, + CPURegs, CPURegs>, ClearDefs; + +class RADDU_W_QB_DESC : RADDU_W_QB_DESC_BASE<"raddu.w.qb", int_mips_raddu_w_qb, + NoItinerary, CPURegs, DSPRegs>, + ClearDefs; + +// Absolute value +class ABSQ_S_PH_DESC : ABSQ_S_PH_R2_DESC_BASE<"absq_s.ph", int_mips_absq_s_ph, + NoItinerary, DSPRegs>; + +class ABSQ_S_W_DESC : ABSQ_S_PH_R2_DESC_BASE<"absq_s.w", int_mips_absq_s_w, + NoItinerary, CPURegs>; + +// Precision reduce/expand +class PRECRQ_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrq.qb.ph", + int_mips_precrq_qb_ph, + NoItinerary, DSPRegs, DSPRegs>, + ClearDefs; + +class PRECRQ_PH_W_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrq.ph.w", + int_mips_precrq_ph_w, + NoItinerary, DSPRegs, CPURegs>, + ClearDefs; + +class PRECRQ_RS_PH_W_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrq_rs.ph.w", + int_mips_precrq_rs_ph_w, + NoItinerary, DSPRegs, + CPURegs>; + +class PRECRQU_S_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrqu_s.qb.ph", + int_mips_precrqu_s_qb_ph, + NoItinerary, DSPRegs, + DSPRegs>; + +class PRECEQ_W_PHL_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceq.w.phl", + int_mips_preceq_w_phl, + NoItinerary, CPURegs, DSPRegs>, + ClearDefs; + +class PRECEQ_W_PHR_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceq.w.phr", + int_mips_preceq_w_phr, + NoItinerary, CPURegs, DSPRegs>, + ClearDefs; + +class PRECEQU_PH_QBL_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbl", + int_mips_precequ_ph_qbl, + NoItinerary, DSPRegs>, + ClearDefs; + +class PRECEQU_PH_QBR_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbr", + int_mips_precequ_ph_qbr, + NoItinerary, DSPRegs>, + ClearDefs; + +class PRECEQU_PH_QBLA_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbla", + int_mips_precequ_ph_qbla, + NoItinerary, DSPRegs>, + ClearDefs; + +class PRECEQU_PH_QBRA_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbra", + int_mips_precequ_ph_qbra, + NoItinerary, DSPRegs>, + ClearDefs; + +class PRECEU_PH_QBL_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbl", + int_mips_preceu_ph_qbl, + NoItinerary, DSPRegs>, + ClearDefs; + +class PRECEU_PH_QBR_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbr", + int_mips_preceu_ph_qbr, + NoItinerary, DSPRegs>, + ClearDefs; + +class PRECEU_PH_QBLA_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbla", + int_mips_preceu_ph_qbla, + NoItinerary, DSPRegs>, + ClearDefs; + +class PRECEU_PH_QBRA_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbra", + int_mips_preceu_ph_qbra, + NoItinerary, DSPRegs>, + ClearDefs; + +// Shift +class SHLL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shll.qb", int_mips_shll_qb, immZExt3, + NoItinerary, DSPRegs>; + +class SHLLV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shllv.qb", int_mips_shll_qb, + NoItinerary, DSPRegs>; + +class SHRL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shrl.qb", int_mips_shrl_qb, immZExt3, + NoItinerary, DSPRegs>, ClearDefs; + +class SHRLV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrlv.qb", int_mips_shrl_qb, + NoItinerary, DSPRegs>, ClearDefs; + +class SHLL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shll.ph", int_mips_shll_ph, immZExt4, + NoItinerary, DSPRegs>; + +class SHLLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv.ph", int_mips_shll_ph, + NoItinerary, DSPRegs>; + +class SHLL_S_PH_DESC : SHLL_QB_R2_DESC_BASE<"shll_s.ph", int_mips_shll_s_ph, + immZExt4, NoItinerary, DSPRegs>; + +class SHLLV_S_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv_s.ph", int_mips_shll_s_ph, + NoItinerary, DSPRegs>; + +class SHRA_PH_DESC : SHLL_QB_R2_DESC_BASE<"shra.ph", int_mips_shra_ph, immZExt4, + NoItinerary, DSPRegs>, ClearDefs; + +class SHRAV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrav.ph", int_mips_shra_ph, + NoItinerary, DSPRegs>, ClearDefs; + +class SHRA_R_PH_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.ph", int_mips_shra_r_ph, + immZExt4, NoItinerary, DSPRegs>, + ClearDefs; + +class SHRAV_R_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.ph", int_mips_shra_r_ph, + NoItinerary, DSPRegs>, ClearDefs; + +class SHLL_S_W_DESC : SHLL_QB_R2_DESC_BASE<"shll_s.w", int_mips_shll_s_w, + immZExt5, NoItinerary, CPURegs>; + +class SHLLV_S_W_DESC : SHLL_QB_R3_DESC_BASE<"shllv_s.w", int_mips_shll_s_w, + NoItinerary, CPURegs>; + +class SHRA_R_W_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.w", int_mips_shra_r_w, + immZExt5, NoItinerary, CPURegs>, + ClearDefs; + +class SHRAV_R_W_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.w", int_mips_shra_r_w, + NoItinerary, CPURegs>; + +// Multiplication +class MULEU_S_PH_QBL_DESC : ADDU_QB_DESC_BASE<"muleu_s.ph.qbl", + int_mips_muleu_s_ph_qbl, + NoItinerary, DSPRegs, DSPRegs>; + +class MULEU_S_PH_QBR_DESC : ADDU_QB_DESC_BASE<"muleu_s.ph.qbr", + int_mips_muleu_s_ph_qbr, + NoItinerary, DSPRegs, DSPRegs>; + +class MULEQ_S_W_PHL_DESC : ADDU_QB_DESC_BASE<"muleq_s.w.phl", + int_mips_muleq_s_w_phl, + NoItinerary, CPURegs, DSPRegs>, + IsCommutable; + +class MULEQ_S_W_PHR_DESC : ADDU_QB_DESC_BASE<"muleq_s.w.phr", + int_mips_muleq_s_w_phr, + NoItinerary, CPURegs, DSPRegs>, + IsCommutable; + +class MULQ_RS_PH_DESC : ADDU_QB_DESC_BASE<"mulq_rs.ph", int_mips_mulq_rs_ph, + NoItinerary, DSPRegs, DSPRegs>, + IsCommutable; + +class MULSAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsaq_s.w.ph">; + +class MAQ_S_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phl">; + +class MAQ_S_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phr">; + +class MAQ_SA_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phl">; + +class MAQ_SA_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phr">; + +// Dot product with accumulate/subtract +class DPAU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpau.h.qbl">; + +class DPAU_H_QBR_DESC : DPA_W_PH_DESC_BASE<"dpau.h.qbr">; + +class DPSU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpsu.h.qbl">; + +class DPSU_H_QBR_DESC : DPA_W_PH_DESC_BASE<"dpsu.h.qbr">; + +class DPAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaq_s.w.ph">; + +class DPSQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsq_s.w.ph">; + +class DPAQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpaq_sa.l.w">; + +class DPSQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpsq_sa.l.w">; + +class MULT_DSP_DESC : MULT_DESC_BASE<"mult">; + +class MULTU_DSP_DESC : MULT_DESC_BASE<"multu">; + +class MADD_DSP_DESC : MULT_DESC_BASE<"madd">; + +class MADDU_DSP_DESC : MULT_DESC_BASE<"maddu">; + +class MSUB_DSP_DESC : MULT_DESC_BASE<"msub">; + +class MSUBU_DSP_DESC : MULT_DESC_BASE<"msubu">; + +// Comparison +class CMPU_EQ_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.eq.qb", + int_mips_cmpu_eq_qb, NoItinerary, + DSPRegs>, IsCommutable; + +class CMPU_LT_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.lt.qb", + int_mips_cmpu_lt_qb, NoItinerary, + DSPRegs>, IsCommutable; + +class CMPU_LE_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.le.qb", + int_mips_cmpu_le_qb, NoItinerary, + DSPRegs>, IsCommutable; + +class CMPGU_EQ_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgu.eq.qb", + int_mips_cmpgu_eq_qb, + NoItinerary, CPURegs, DSPRegs>, + IsCommutable; + +class CMPGU_LT_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgu.lt.qb", + int_mips_cmpgu_lt_qb, + NoItinerary, CPURegs, DSPRegs>, + IsCommutable; + +class CMPGU_LE_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgu.le.qb", + int_mips_cmpgu_le_qb, + NoItinerary, CPURegs, DSPRegs>, + IsCommutable; + +class CMP_EQ_PH_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmp.eq.ph", int_mips_cmp_eq_ph, + NoItinerary, DSPRegs>, + IsCommutable; + +class CMP_LT_PH_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmp.lt.ph", int_mips_cmp_lt_ph, + NoItinerary, DSPRegs>, + IsCommutable; + +class CMP_LE_PH_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmp.le.ph", int_mips_cmp_le_ph, + NoItinerary, DSPRegs>, + IsCommutable; + +// Misc +class BITREV_DESC : ABSQ_S_PH_R2_DESC_BASE<"bitrev", int_mips_bitrev, + NoItinerary, CPURegs>, ClearDefs; + +class PACKRL_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"packrl.ph", int_mips_packrl_ph, + NoItinerary, DSPRegs, DSPRegs>, + ClearDefs; + +class REPL_QB_DESC : REPL_DESC_BASE<"repl.qb", int_mips_repl_qb, immZExt8, + NoItinerary, DSPRegs>, ClearDefs; + +class REPL_PH_DESC : REPL_DESC_BASE<"repl.ph", int_mips_repl_ph, immZExt10, + NoItinerary, DSPRegs>, ClearDefs; + +class REPLV_QB_DESC : ABSQ_S_PH_R2_DESC_BASE<"replv.qb", int_mips_repl_qb, + NoItinerary, DSPRegs, CPURegs>, + ClearDefs; + +class REPLV_PH_DESC : ABSQ_S_PH_R2_DESC_BASE<"replv.ph", int_mips_repl_ph, + NoItinerary, DSPRegs, CPURegs>, + ClearDefs; + +class PICK_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"pick.qb", int_mips_pick_qb, + NoItinerary, DSPRegs, DSPRegs>, + ClearDefs, UseDSPCtrl; + +class PICK_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"pick.ph", int_mips_pick_ph, + NoItinerary, DSPRegs, DSPRegs>, + ClearDefs, UseDSPCtrl; + +class LWX_DESC : LX_DESC_BASE<"lwx", int_mips_lwx, NoItinerary>, ClearDefs; + +class LHX_DESC : LX_DESC_BASE<"lhx", int_mips_lhx, NoItinerary>, ClearDefs; + +class LBUX_DESC : LX_DESC_BASE<"lbux", int_mips_lbux, NoItinerary>, ClearDefs; + +class BPOSGE32_DESC : BPOSGE32_DESC_BASE<"bposge32", NoItinerary>; + +// Extr +class EXTP_DESC : EXTR_W_TY1_R1_DESC_BASE<"extp", MipsEXTP, NoItinerary>; + +class EXTPV_DESC : EXTR_W_TY1_R2_DESC_BASE<"extpv", MipsEXTP, NoItinerary>; + +class EXTPDP_DESC : EXTR_W_TY1_R1_DESC_BASE<"extpdp", MipsEXTPDP, NoItinerary>; + +class EXTPDPV_DESC : EXTR_W_TY1_R2_DESC_BASE<"extpdpv", MipsEXTPDP, + NoItinerary>; + +class EXTR_W_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr.w", MipsEXTR_W, NoItinerary>; + +class EXTRV_W_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv.w", MipsEXTR_W, + NoItinerary>; + +class EXTR_R_W_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_r.w", MipsEXTR_R_W, + NoItinerary>; + +class EXTRV_R_W_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_r.w", MipsEXTR_R_W, + NoItinerary>; + +class EXTR_RS_W_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_rs.w", MipsEXTR_RS_W, + NoItinerary>; + +class EXTRV_RS_W_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_rs.w", MipsEXTR_RS_W, + NoItinerary>; + +class EXTR_S_H_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_s.h", MipsEXTR_S_H, + NoItinerary>; + +class EXTRV_S_H_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_s.h", MipsEXTR_S_H, + NoItinerary>; + +class SHILO_DESC : SHILO_R1_DESC_BASE<"shilo">; + +class SHILOV_DESC : SHILO_R2_DESC_BASE<"shilov">; + +class MTHLIP_DESC : MTHLIP_DESC_BASE<"mthlip">; + +class RDDSP_DESC : RDDSP_DESC_BASE<"rddsp", int_mips_rddsp, NoItinerary>; + +class WRDSP_DESC : WRDSP_DESC_BASE<"wrdsp", int_mips_wrdsp, NoItinerary>; + +class INSV_DESC : INSV_DESC_BASE<"insv", int_mips_insv, NoItinerary>; + +//===----------------------------------------------------------------------===// +// MIPS DSP Rev 2 +// Addition/subtraction +class ADDU_PH_DESC : ADDU_QB_DESC_BASE<"addu.ph", int_mips_addu_ph, NoItinerary, + DSPRegs, DSPRegs>, IsCommutable; + +class ADDU_S_PH_DESC : ADDU_QB_DESC_BASE<"addu_s.ph", int_mips_addu_s_ph, + NoItinerary, DSPRegs, DSPRegs>, + IsCommutable; + +class SUBU_PH_DESC : ADDU_QB_DESC_BASE<"subu.ph", int_mips_subu_ph, NoItinerary, + DSPRegs, DSPRegs>; + +class SUBU_S_PH_DESC : ADDU_QB_DESC_BASE<"subu_s.ph", int_mips_subu_s_ph, + NoItinerary, DSPRegs, DSPRegs>; + +class ADDUH_QB_DESC : ADDUH_QB_DESC_BASE<"adduh.qb", int_mips_adduh_qb, + NoItinerary, DSPRegs>, + ClearDefs, IsCommutable; + +class ADDUH_R_QB_DESC : ADDUH_QB_DESC_BASE<"adduh_r.qb", int_mips_adduh_r_qb, + NoItinerary, DSPRegs>, + ClearDefs, IsCommutable; + +class SUBUH_QB_DESC : ADDUH_QB_DESC_BASE<"subuh.qb", int_mips_subuh_qb, + NoItinerary, DSPRegs>, ClearDefs; + +class SUBUH_R_QB_DESC : ADDUH_QB_DESC_BASE<"subuh_r.qb", int_mips_subuh_r_qb, + NoItinerary, DSPRegs>, ClearDefs; + +class ADDQH_PH_DESC : ADDUH_QB_DESC_BASE<"addqh.ph", int_mips_addqh_ph, + NoItinerary, DSPRegs>, + ClearDefs, IsCommutable; + +class ADDQH_R_PH_DESC : ADDUH_QB_DESC_BASE<"addqh_r.ph", int_mips_addqh_r_ph, + NoItinerary, DSPRegs>, + ClearDefs, IsCommutable; + +class SUBQH_PH_DESC : ADDUH_QB_DESC_BASE<"subqh.ph", int_mips_subqh_ph, + NoItinerary, DSPRegs>, ClearDefs; + +class SUBQH_R_PH_DESC : ADDUH_QB_DESC_BASE<"subqh_r.ph", int_mips_subqh_r_ph, + NoItinerary, DSPRegs>, ClearDefs; + +class ADDQH_W_DESC : ADDUH_QB_DESC_BASE<"addqh.w", int_mips_addqh_w, + NoItinerary, CPURegs>, + ClearDefs, IsCommutable; + +class ADDQH_R_W_DESC : ADDUH_QB_DESC_BASE<"addqh_r.w", int_mips_addqh_r_w, + NoItinerary, CPURegs>, + ClearDefs, IsCommutable; + +class SUBQH_W_DESC : ADDUH_QB_DESC_BASE<"subqh.w", int_mips_subqh_w, + NoItinerary, CPURegs>, ClearDefs; + +class SUBQH_R_W_DESC : ADDUH_QB_DESC_BASE<"subqh_r.w", int_mips_subqh_r_w, + NoItinerary, CPURegs>, ClearDefs; + +// Comparison +class CMPGDU_EQ_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgdu.eq.qb", + int_mips_cmpgdu_eq_qb, + NoItinerary, CPURegs, DSPRegs>, + IsCommutable; + +class CMPGDU_LT_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgdu.lt.qb", + int_mips_cmpgdu_lt_qb, + NoItinerary, CPURegs, DSPRegs>, + IsCommutable; + +class CMPGDU_LE_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgdu.le.qb", + int_mips_cmpgdu_le_qb, + NoItinerary, CPURegs, DSPRegs>, + IsCommutable; + +// Absolute +class ABSQ_S_QB_DESC : ABSQ_S_PH_R2_DESC_BASE<"absq_s.qb", int_mips_absq_s_qb, + NoItinerary, DSPRegs>; + +// Multiplication +class MUL_PH_DESC : ADDUH_QB_DESC_BASE<"mul.ph", int_mips_mul_ph, NoItinerary, + DSPRegs>, IsCommutable; + +class MUL_S_PH_DESC : ADDUH_QB_DESC_BASE<"mul_s.ph", int_mips_mul_s_ph, + NoItinerary, DSPRegs>, IsCommutable; + +class MULQ_S_W_DESC : ADDUH_QB_DESC_BASE<"mulq_s.w", int_mips_mulq_s_w, + NoItinerary, CPURegs>, IsCommutable; + +class MULQ_RS_W_DESC : ADDUH_QB_DESC_BASE<"mulq_rs.w", int_mips_mulq_rs_w, + NoItinerary, CPURegs>, IsCommutable; + +class MULQ_S_PH_DESC : ADDU_QB_DESC_BASE<"mulq_s.ph", int_mips_mulq_s_ph, + NoItinerary, DSPRegs, DSPRegs>, + IsCommutable; + +// Dot product with accumulate/subtract +class DPA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpa.w.ph">; + +class DPS_W_PH_DESC : DPA_W_PH_DESC_BASE<"dps.w.ph">; + +class DPAQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_s.w.ph">; + +class DPAQX_SA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_sa.w.ph">; + +class DPAX_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpax.w.ph">; + +class DPSX_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsx.w.ph">; + +class DPSQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_s.w.ph">; + +class DPSQX_SA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_sa.w.ph">; + +class MULSA_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsa.w.ph">; + +// Precision reduce/expand +class PRECR_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precr.qb.ph", + int_mips_precr_qb_ph, + NoItinerary, DSPRegs, DSPRegs>; + +class PRECR_SRA_PH_W_DESC : PRECR_SRA_PH_W_DESC_BASE<"precr_sra.ph.w", + int_mips_precr_sra_ph_w, + NoItinerary, DSPRegs, + CPURegs>, ClearDefs; + +class PRECR_SRA_R_PH_W_DESC : PRECR_SRA_PH_W_DESC_BASE<"precr_sra_r.ph.w", + int_mips_precr_sra_r_ph_w, + NoItinerary, DSPRegs, + CPURegs>, ClearDefs; + +// Shift +class SHRA_QB_DESC : SHLL_QB_R2_DESC_BASE<"shra.qb", int_mips_shra_qb, immZExt3, + NoItinerary, DSPRegs>, ClearDefs; + +class SHRAV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrav.qb", int_mips_shra_qb, + NoItinerary, DSPRegs>, ClearDefs; + +class SHRA_R_QB_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.qb", int_mips_shra_r_qb, + immZExt3, NoItinerary, DSPRegs>, + ClearDefs; + +class SHRAV_R_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.qb", int_mips_shra_r_qb, + NoItinerary, DSPRegs>, ClearDefs; + +class SHRL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shrl.ph", int_mips_shrl_ph, immZExt4, + NoItinerary, DSPRegs>, ClearDefs; + +class SHRLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrlv.ph", int_mips_shrl_ph, + NoItinerary, DSPRegs>, ClearDefs; + +// Misc +class APPEND_DESC : APPEND_DESC_BASE<"append", int_mips_append, immZExt5, + NoItinerary>, ClearDefs; + +class BALIGN_DESC : APPEND_DESC_BASE<"balign", int_mips_balign, immZExt2, + NoItinerary>, ClearDefs; + +class PREPEND_DESC : APPEND_DESC_BASE<"prepend", int_mips_prepend, immZExt5, + NoItinerary>, ClearDefs; + +// Pseudos. +def BPOSGE32_PSEUDO : BPOSGE32_PSEUDO_DESC_BASE<int_mips_bposge32, NoItinerary>; + +// Instruction defs. +// MIPS DSP Rev 1 +def ADDU_QB : ADDU_QB_ENC, ADDU_QB_DESC; +def ADDU_S_QB : ADDU_S_QB_ENC, ADDU_S_QB_DESC; +def SUBU_QB : SUBU_QB_ENC, SUBU_QB_DESC; +def SUBU_S_QB : SUBU_S_QB_ENC, SUBU_S_QB_DESC; +def ADDQ_PH : ADDQ_PH_ENC, ADDQ_PH_DESC; +def ADDQ_S_PH : ADDQ_S_PH_ENC, ADDQ_S_PH_DESC; +def SUBQ_PH : SUBQ_PH_ENC, SUBQ_PH_DESC; +def SUBQ_S_PH : SUBQ_S_PH_ENC, SUBQ_S_PH_DESC; +def ADDQ_S_W : ADDQ_S_W_ENC, ADDQ_S_W_DESC; +def SUBQ_S_W : SUBQ_S_W_ENC, SUBQ_S_W_DESC; +def ADDSC : ADDSC_ENC, ADDSC_DESC; +def ADDWC : ADDWC_ENC, ADDWC_DESC; +def MODSUB : MODSUB_ENC, MODSUB_DESC; +def RADDU_W_QB : RADDU_W_QB_ENC, RADDU_W_QB_DESC; +def ABSQ_S_PH : ABSQ_S_PH_ENC, ABSQ_S_PH_DESC; +def ABSQ_S_W : ABSQ_S_W_ENC, ABSQ_S_W_DESC; +def PRECRQ_QB_PH : PRECRQ_QB_PH_ENC, PRECRQ_QB_PH_DESC; +def PRECRQ_PH_W : PRECRQ_PH_W_ENC, PRECRQ_PH_W_DESC; +def PRECRQ_RS_PH_W : PRECRQ_RS_PH_W_ENC, PRECRQ_RS_PH_W_DESC; +def PRECRQU_S_QB_PH : PRECRQU_S_QB_PH_ENC, PRECRQU_S_QB_PH_DESC; +def PRECEQ_W_PHL : PRECEQ_W_PHL_ENC, PRECEQ_W_PHL_DESC; +def PRECEQ_W_PHR : PRECEQ_W_PHR_ENC, PRECEQ_W_PHR_DESC; +def PRECEQU_PH_QBL : PRECEQU_PH_QBL_ENC, PRECEQU_PH_QBL_DESC; +def PRECEQU_PH_QBR : PRECEQU_PH_QBR_ENC, PRECEQU_PH_QBR_DESC; +def PRECEQU_PH_QBLA : PRECEQU_PH_QBLA_ENC, PRECEQU_PH_QBLA_DESC; +def PRECEQU_PH_QBRA : PRECEQU_PH_QBRA_ENC, PRECEQU_PH_QBRA_DESC; +def PRECEU_PH_QBL : PRECEU_PH_QBL_ENC, PRECEU_PH_QBL_DESC; +def PRECEU_PH_QBR : PRECEU_PH_QBR_ENC, PRECEU_PH_QBR_DESC; +def PRECEU_PH_QBLA : PRECEU_PH_QBLA_ENC, PRECEU_PH_QBLA_DESC; +def PRECEU_PH_QBRA : PRECEU_PH_QBRA_ENC, PRECEU_PH_QBRA_DESC; +def SHLL_QB : SHLL_QB_ENC, SHLL_QB_DESC; +def SHLLV_QB : SHLLV_QB_ENC, SHLLV_QB_DESC; +def SHRL_QB : SHRL_QB_ENC, SHRL_QB_DESC; +def SHRLV_QB : SHRLV_QB_ENC, SHRLV_QB_DESC; +def SHLL_PH : SHLL_PH_ENC, SHLL_PH_DESC; +def SHLLV_PH : SHLLV_PH_ENC, SHLLV_PH_DESC; +def SHLL_S_PH : SHLL_S_PH_ENC, SHLL_S_PH_DESC; +def SHLLV_S_PH : SHLLV_S_PH_ENC, SHLLV_S_PH_DESC; +def SHRA_PH : SHRA_PH_ENC, SHRA_PH_DESC; +def SHRAV_PH : SHRAV_PH_ENC, SHRAV_PH_DESC; +def SHRA_R_PH : SHRA_R_PH_ENC, SHRA_R_PH_DESC; +def SHRAV_R_PH : SHRAV_R_PH_ENC, SHRAV_R_PH_DESC; +def SHLL_S_W : SHLL_S_W_ENC, SHLL_S_W_DESC; +def SHLLV_S_W : SHLLV_S_W_ENC, SHLLV_S_W_DESC; +def SHRA_R_W : SHRA_R_W_ENC, SHRA_R_W_DESC; +def SHRAV_R_W : SHRAV_R_W_ENC, SHRAV_R_W_DESC; +def MULEU_S_PH_QBL : MULEU_S_PH_QBL_ENC, MULEU_S_PH_QBL_DESC; +def MULEU_S_PH_QBR : MULEU_S_PH_QBR_ENC, MULEU_S_PH_QBR_DESC; +def MULEQ_S_W_PHL : MULEQ_S_W_PHL_ENC, MULEQ_S_W_PHL_DESC; +def MULEQ_S_W_PHR : MULEQ_S_W_PHR_ENC, MULEQ_S_W_PHR_DESC; +def MULQ_RS_PH : MULQ_RS_PH_ENC, MULQ_RS_PH_DESC; +def MULSAQ_S_W_PH : MULSAQ_S_W_PH_ENC, MULSAQ_S_W_PH_DESC; +def MAQ_S_W_PHL : MAQ_S_W_PHL_ENC, MAQ_S_W_PHL_DESC; +def MAQ_S_W_PHR : MAQ_S_W_PHR_ENC, MAQ_S_W_PHR_DESC; +def MAQ_SA_W_PHL : MAQ_SA_W_PHL_ENC, MAQ_SA_W_PHL_DESC; +def MAQ_SA_W_PHR : MAQ_SA_W_PHR_ENC, MAQ_SA_W_PHR_DESC; +def DPAU_H_QBL : DPAU_H_QBL_ENC, DPAU_H_QBL_DESC; +def DPAU_H_QBR : DPAU_H_QBR_ENC, DPAU_H_QBR_DESC; +def DPSU_H_QBL : DPSU_H_QBL_ENC, DPSU_H_QBL_DESC; +def DPSU_H_QBR : DPSU_H_QBR_ENC, DPSU_H_QBR_DESC; +def DPAQ_S_W_PH : DPAQ_S_W_PH_ENC, DPAQ_S_W_PH_DESC; +def DPSQ_S_W_PH : DPSQ_S_W_PH_ENC, DPSQ_S_W_PH_DESC; +def DPAQ_SA_L_W : DPAQ_SA_L_W_ENC, DPAQ_SA_L_W_DESC; +def DPSQ_SA_L_W : DPSQ_SA_L_W_ENC, DPSQ_SA_L_W_DESC; +def MULT_DSP : MULT_DSP_ENC, MULT_DSP_DESC; +def MULTU_DSP : MULTU_DSP_ENC, MULTU_DSP_DESC; +def MADD_DSP : MADD_DSP_ENC, MADD_DSP_DESC; +def MADDU_DSP : MADDU_DSP_ENC, MADDU_DSP_DESC; +def MSUB_DSP : MSUB_DSP_ENC, MSUB_DSP_DESC; +def MSUBU_DSP : MSUBU_DSP_ENC, MSUBU_DSP_DESC; +def CMPU_EQ_QB : CMPU_EQ_QB_ENC, CMPU_EQ_QB_DESC; +def CMPU_LT_QB : CMPU_LT_QB_ENC, CMPU_LT_QB_DESC; +def CMPU_LE_QB : CMPU_LE_QB_ENC, CMPU_LE_QB_DESC; +def CMPGU_EQ_QB : CMPGU_EQ_QB_ENC, CMPGU_EQ_QB_DESC; +def CMPGU_LT_QB : CMPGU_LT_QB_ENC, CMPGU_LT_QB_DESC; +def CMPGU_LE_QB : CMPGU_LE_QB_ENC, CMPGU_LE_QB_DESC; +def CMP_EQ_PH : CMP_EQ_PH_ENC, CMP_EQ_PH_DESC; +def CMP_LT_PH : CMP_LT_PH_ENC, CMP_LT_PH_DESC; +def CMP_LE_PH : CMP_LE_PH_ENC, CMP_LE_PH_DESC; +def BITREV : BITREV_ENC, BITREV_DESC; +def PACKRL_PH : PACKRL_PH_ENC, PACKRL_PH_DESC; +def REPL_QB : REPL_QB_ENC, REPL_QB_DESC; +def REPL_PH : REPL_PH_ENC, REPL_PH_DESC; +def REPLV_QB : REPLV_QB_ENC, REPLV_QB_DESC; +def REPLV_PH : REPLV_PH_ENC, REPLV_PH_DESC; +def PICK_QB : PICK_QB_ENC, PICK_QB_DESC; +def PICK_PH : PICK_PH_ENC, PICK_PH_DESC; +def LWX : LWX_ENC, LWX_DESC; +def LHX : LHX_ENC, LHX_DESC; +def LBUX : LBUX_ENC, LBUX_DESC; +def BPOSGE32 : BPOSGE32_ENC, BPOSGE32_DESC; +def INSV : INSV_ENC, INSV_DESC; +def EXTP : EXTP_ENC, EXTP_DESC; +def EXTPV : EXTPV_ENC, EXTPV_DESC; +def EXTPDP : EXTPDP_ENC, EXTPDP_DESC; +def EXTPDPV : EXTPDPV_ENC, EXTPDPV_DESC; +def EXTR_W : EXTR_W_ENC, EXTR_W_DESC; +def EXTRV_W : EXTRV_W_ENC, EXTRV_W_DESC; +def EXTR_R_W : EXTR_R_W_ENC, EXTR_R_W_DESC; +def EXTRV_R_W : EXTRV_R_W_ENC, EXTRV_R_W_DESC; +def EXTR_RS_W : EXTR_RS_W_ENC, EXTR_RS_W_DESC; +def EXTRV_RS_W : EXTRV_RS_W_ENC, EXTRV_RS_W_DESC; +def EXTR_S_H : EXTR_S_H_ENC, EXTR_S_H_DESC; +def EXTRV_S_H : EXTRV_S_H_ENC, EXTRV_S_H_DESC; +def SHILO : SHILO_ENC, SHILO_DESC; +def SHILOV : SHILOV_ENC, SHILOV_DESC; +def MTHLIP : MTHLIP_ENC, MTHLIP_DESC; +def RDDSP : RDDSP_ENC, RDDSP_DESC; +def WRDSP : WRDSP_ENC, WRDSP_DESC; + +// MIPS DSP Rev 2 +let Predicates = [HasDSPR2] in { + +def ADDU_PH : ADDU_PH_ENC, ADDU_PH_DESC; +def ADDU_S_PH : ADDU_S_PH_ENC, ADDU_S_PH_DESC; +def SUBU_PH : SUBU_PH_ENC, SUBU_PH_DESC; +def SUBU_S_PH : SUBU_S_PH_ENC, SUBU_S_PH_DESC; +def CMPGDU_EQ_QB : CMPGDU_EQ_QB_ENC, CMPGDU_EQ_QB_DESC; +def CMPGDU_LT_QB : CMPGDU_LT_QB_ENC, CMPGDU_LT_QB_DESC; +def CMPGDU_LE_QB : CMPGDU_LE_QB_ENC, CMPGDU_LE_QB_DESC; +def ABSQ_S_QB : ABSQ_S_QB_ENC, ABSQ_S_QB_DESC; +def ADDUH_QB : ADDUH_QB_ENC, ADDUH_QB_DESC; +def ADDUH_R_QB : ADDUH_R_QB_ENC, ADDUH_R_QB_DESC; +def SUBUH_QB : SUBUH_QB_ENC, SUBUH_QB_DESC; +def SUBUH_R_QB : SUBUH_R_QB_ENC, SUBUH_R_QB_DESC; +def ADDQH_PH : ADDQH_PH_ENC, ADDQH_PH_DESC; +def ADDQH_R_PH : ADDQH_R_PH_ENC, ADDQH_R_PH_DESC; +def SUBQH_PH : SUBQH_PH_ENC, SUBQH_PH_DESC; +def SUBQH_R_PH : SUBQH_R_PH_ENC, SUBQH_R_PH_DESC; +def ADDQH_W : ADDQH_W_ENC, ADDQH_W_DESC; +def ADDQH_R_W : ADDQH_R_W_ENC, ADDQH_R_W_DESC; +def SUBQH_W : SUBQH_W_ENC, SUBQH_W_DESC; +def SUBQH_R_W : SUBQH_R_W_ENC, SUBQH_R_W_DESC; +def MUL_PH : MUL_PH_ENC, MUL_PH_DESC; +def MUL_S_PH : MUL_S_PH_ENC, MUL_S_PH_DESC; +def MULQ_S_W : MULQ_S_W_ENC, MULQ_S_W_DESC; +def MULQ_RS_W : MULQ_RS_W_ENC, MULQ_RS_W_DESC; +def MULQ_S_PH : MULQ_S_PH_ENC, MULQ_S_PH_DESC; +def DPA_W_PH : DPA_W_PH_ENC, DPA_W_PH_DESC; +def DPS_W_PH : DPS_W_PH_ENC, DPS_W_PH_DESC; +def DPAQX_S_W_PH : DPAQX_S_W_PH_ENC, DPAQX_S_W_PH_DESC; +def DPAQX_SA_W_PH : DPAQX_SA_W_PH_ENC, DPAQX_SA_W_PH_DESC; +def DPAX_W_PH : DPAX_W_PH_ENC, DPAX_W_PH_DESC; +def DPSX_W_PH : DPSX_W_PH_ENC, DPSX_W_PH_DESC; +def DPSQX_S_W_PH : DPSQX_S_W_PH_ENC, DPSQX_S_W_PH_DESC; +def DPSQX_SA_W_PH : DPSQX_SA_W_PH_ENC, DPSQX_SA_W_PH_DESC; +def MULSA_W_PH : MULSA_W_PH_ENC, MULSA_W_PH_DESC; +def PRECR_QB_PH : PRECR_QB_PH_ENC, PRECR_QB_PH_DESC; +def PRECR_SRA_PH_W : PRECR_SRA_PH_W_ENC, PRECR_SRA_PH_W_DESC; +def PRECR_SRA_R_PH_W : PRECR_SRA_R_PH_W_ENC, PRECR_SRA_R_PH_W_DESC; +def SHRA_QB : SHRA_QB_ENC, SHRA_QB_DESC; +def SHRAV_QB : SHRAV_QB_ENC, SHRAV_QB_DESC; +def SHRA_R_QB : SHRA_R_QB_ENC, SHRA_R_QB_DESC; +def SHRAV_R_QB : SHRAV_R_QB_ENC, SHRAV_R_QB_DESC; +def SHRL_PH : SHRL_PH_ENC, SHRL_PH_DESC; +def SHRLV_PH : SHRLV_PH_ENC, SHRLV_PH_DESC; +def APPEND : APPEND_ENC, APPEND_DESC; +def BALIGN : BALIGN_ENC, BALIGN_DESC; +def PREPEND : PREPEND_ENC, PREPEND_DESC; + +} + +// Pseudos. +def MULSAQ_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMULSAQ_S_W_PH, NoItinerary, + MULSAQ_S_W_PH>; +def MAQ_S_W_PHL_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMAQ_S_W_PHL, NoItinerary, + MAQ_S_W_PHL>; +def MAQ_S_W_PHR_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMAQ_S_W_PHR, NoItinerary, + MAQ_S_W_PHR>; +def MAQ_SA_W_PHL_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMAQ_SA_W_PHL, NoItinerary, + MAQ_SA_W_PHL>; +def MAQ_SA_W_PHR_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMAQ_SA_W_PHR, NoItinerary, + MAQ_SA_W_PHR>; +def DPAU_H_QBL_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAU_H_QBL, NoItinerary, + DPAU_H_QBL>; +def DPAU_H_QBR_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAU_H_QBR, NoItinerary, + DPAU_H_QBR>; +def DPSU_H_QBL_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSU_H_QBL, NoItinerary, + DPSU_H_QBL>; +def DPSU_H_QBR_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSU_H_QBR, NoItinerary, + DPSU_H_QBR>; +def DPAQ_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAQ_S_W_PH, NoItinerary, + DPAQ_S_W_PH>; +def DPSQ_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSQ_S_W_PH, NoItinerary, + DPSQ_S_W_PH>; +def DPAQ_SA_L_W_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAQ_SA_L_W, NoItinerary, + DPAQ_SA_L_W>; +def DPSQ_SA_L_W_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSQ_SA_L_W, NoItinerary, + DPSQ_SA_L_W>; + +def MULT_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMULT, NoItinerary, MULT_DSP>, + IsCommutable; +def MULTU_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMULTU, NoItinerary, MULTU_DSP>, + IsCommutable; +def MADD_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMADD_DSP, NoItinerary, MADD_DSP>, + IsCommutable, UseAC; +def MADDU_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMADDU_DSP, NoItinerary, MADDU_DSP>, + IsCommutable, UseAC; +def MSUB_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMSUB_DSP, NoItinerary, MSUB_DSP>, + UseAC; +def MSUBU_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMSUBU_DSP, NoItinerary, MSUBU_DSP>, + UseAC; + +def SHILO_PSEUDO : SHILO_R1_PSEUDO_BASE<MipsSHILO, NoItinerary, SHILO>; +def SHILOV_PSEUDO : SHILO_R2_PSEUDO_BASE<MipsSHILO, NoItinerary, SHILOV>; +def MTHLIP_PSEUDO : SHILO_R2_PSEUDO_BASE<MipsMTHLIP, NoItinerary, MTHLIP>; + +let Predicates = [HasDSPR2] in { + +def DPA_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPA_W_PH, NoItinerary, DPA_W_PH>; +def DPS_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPS_W_PH, NoItinerary, DPS_W_PH>; +def DPAQX_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAQX_S_W_PH, NoItinerary, + DPAQX_S_W_PH>; +def DPAQX_SA_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAQX_SA_W_PH, NoItinerary, + DPAQX_SA_W_PH>; +def DPAX_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAX_W_PH, NoItinerary, + DPAX_W_PH>; +def DPSX_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSX_W_PH, NoItinerary, + DPSX_W_PH>; +def DPSQX_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSQX_S_W_PH, NoItinerary, + DPSQX_S_W_PH>; +def DPSQX_SA_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSQX_SA_W_PH, NoItinerary, + DPSQX_SA_W_PH>; +def MULSA_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMULSA_W_PH, NoItinerary, + MULSA_W_PH>; + +} + +// Patterns. +class DSPPat<dag pattern, dag result, Predicate pred = HasDSP> : + Pat<pattern, result>, Requires<[pred]>; + +class BitconvertPat<ValueType DstVT, ValueType SrcVT, RegisterClass DstRC, + RegisterClass SrcRC> : + DSPPat<(DstVT (bitconvert (SrcVT SrcRC:$src))), + (COPY_TO_REGCLASS SrcRC:$src, DstRC)>; + +def : BitconvertPat<i32, v2i16, CPURegs, DSPRegs>; +def : BitconvertPat<i32, v4i8, CPURegs, DSPRegs>; +def : BitconvertPat<v2i16, i32, DSPRegs, CPURegs>; +def : BitconvertPat<v4i8, i32, DSPRegs, CPURegs>; + +def : DSPPat<(v2i16 (load addr:$a)), + (v2i16 (COPY_TO_REGCLASS (LW addr:$a), DSPRegs))>; +def : DSPPat<(v4i8 (load addr:$a)), + (v4i8 (COPY_TO_REGCLASS (LW addr:$a), DSPRegs))>; +def : DSPPat<(store (v2i16 DSPRegs:$val), addr:$a), + (SW (COPY_TO_REGCLASS DSPRegs:$val, CPURegs), addr:$a)>; +def : DSPPat<(store (v4i8 DSPRegs:$val), addr:$a), + (SW (COPY_TO_REGCLASS DSPRegs:$val, CPURegs), addr:$a)>; + +// Extr patterns. +class EXTR_W_TY1_R2_Pat<SDPatternOperator OpNode, Instruction Instr> : + DSPPat<(i32 (OpNode CPURegs:$rs)), (Instr AC0, CPURegs:$rs)>; + +class EXTR_W_TY1_R1_Pat<SDPatternOperator OpNode, Instruction Instr> : + DSPPat<(i32 (OpNode immZExt5:$shift)), (Instr AC0, immZExt5:$shift)>; + +def : EXTR_W_TY1_R1_Pat<MipsEXTP, EXTP>; +def : EXTR_W_TY1_R2_Pat<MipsEXTP, EXTPV>; +def : EXTR_W_TY1_R1_Pat<MipsEXTPDP, EXTPDP>; +def : EXTR_W_TY1_R2_Pat<MipsEXTPDP, EXTPDPV>; +def : EXTR_W_TY1_R1_Pat<MipsEXTR_W, EXTR_W>; +def : EXTR_W_TY1_R2_Pat<MipsEXTR_W, EXTRV_W>; +def : EXTR_W_TY1_R1_Pat<MipsEXTR_R_W, EXTR_R_W>; +def : EXTR_W_TY1_R2_Pat<MipsEXTR_R_W, EXTRV_R_W>; +def : EXTR_W_TY1_R1_Pat<MipsEXTR_RS_W, EXTR_RS_W>; +def : EXTR_W_TY1_R2_Pat<MipsEXTR_RS_W, EXTRV_RS_W>; +def : EXTR_W_TY1_R1_Pat<MipsEXTR_S_H, EXTR_S_H>; +def : EXTR_W_TY1_R2_Pat<MipsEXTR_S_H, EXTRV_S_H>; diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index b1220d6250..e9f330ffc1 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -875,6 +875,8 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const case ISD::SRL_PARTS: return LowerShiftRightParts(Op, DAG, false); case ISD::LOAD: return LowerLOAD(Op, DAG); case ISD::STORE: return LowerSTORE(Op, DAG); + case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); + case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, DAG); // @LOCALMOD-BEGIN case ISD::NACL_TP_TLS_OFFSET: return LowerNaClTpTlsOffset(Op, DAG); @@ -988,6 +990,70 @@ static MachineBasicBlock* ExpandCondMov(MachineInstr *MI, MachineBasicBlock *BB, return BB; } */ + +MachineBasicBlock * +MipsTargetLowering::EmitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{ + // $bb: + // bposge32_pseudo $vr0 + // => + // $bb: + // bposge32 $tbb + // $fbb: + // li $vr2, 0 + // b $sink + // $tbb: + // li $vr1, 1 + // $sink: + // $vr0 = phi($vr2, $fbb, $vr1, $tbb) + + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetRegisterClass *RC = &Mips::CPURegsRegClass; + DebugLoc DL = MI->getDebugLoc(); + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = llvm::next(MachineFunction::iterator(BB)); + MachineFunction *F = BB->getParent(); + MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, FBB); + F->insert(It, TBB); + F->insert(It, Sink); + + // Transfer the remainder of BB and its successor edges to Sink. + Sink->splice(Sink->begin(), BB, llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + Sink->transferSuccessorsAndUpdatePHIs(BB); + + // Add successors. + BB->addSuccessor(FBB); + BB->addSuccessor(TBB); + FBB->addSuccessor(Sink); + TBB->addSuccessor(Sink); + + // Insert the real bposge32 instruction to $BB. + BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB); + + // Fill $FBB. + unsigned VR2 = RegInfo.createVirtualRegister(RC); + BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2) + .addReg(Mips::ZERO).addImm(0); + BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink); + + // Fill $TBB. + unsigned VR1 = RegInfo.createVirtualRegister(RC); + BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1) + .addReg(Mips::ZERO).addImm(1); + + // Insert phi function to $Sink. + BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI), + MI->getOperand(0).getReg()) + .addReg(VR2).addMBB(FBB).addReg(VR1).addMBB(TBB); + + MI->eraseFromParent(); // The pseudo instruction is gone now. + return Sink; +} + MachineBasicBlock * MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { @@ -1096,6 +1162,8 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case Mips::ATOMIC_CMP_SWAP_I64: case Mips::ATOMIC_CMP_SWAP_I64_P8: return EmitAtomicCmpSwap(MI, BB, 8); + case Mips::BPOSGE32_PSEUDO: + return EmitBPOSGE32(MI, BB); } } @@ -2340,6 +2408,151 @@ SDValue MipsTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { return CreateStoreLR(MipsISD::SDR, DAG, SD, SDL, IsLittle ? 0 : 7); } +// This function expands mips intrinsic nodes which have 64-bit input operands +// or output values. +// +// out64 = intrinsic-node in64 +// => +// lo = copy (extract-element (in64, 0)) +// hi = copy (extract-element (in64, 1)) +// mips-specific-node +// v0 = copy lo +// v1 = copy hi +// out64 = merge-values (v0, v1) +// +static SDValue LowerDSPIntr(SDValue Op, SelectionDAG &DAG, + unsigned Opc, bool HasI64In, bool HasI64Out) { + DebugLoc DL = Op.getDebugLoc(); + bool HasChainIn = Op->getOperand(0).getValueType() == MVT::Other; + SDValue Chain = HasChainIn ? Op->getOperand(0) : DAG.getEntryNode(); + SmallVector<SDValue, 3> Ops; + + if (HasI64In) { + SDValue InLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, + Op->getOperand(1 + HasChainIn), + DAG.getConstant(0, MVT::i32)); + SDValue InHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, + Op->getOperand(1 + HasChainIn), + DAG.getConstant(1, MVT::i32)); + + Chain = DAG.getCopyToReg(Chain, DL, Mips::LO, InLo, SDValue()); + Chain = DAG.getCopyToReg(Chain, DL, Mips::HI, InHi, Chain.getValue(1)); + + Ops.push_back(Chain); + Ops.append(Op->op_begin() + HasChainIn + 2, Op->op_end()); + Ops.push_back(Chain.getValue(1)); + } else { + Ops.push_back(Chain); + Ops.append(Op->op_begin() + HasChainIn + 1, Op->op_end()); + } + + if (!HasI64Out) + return DAG.getNode(Opc, DL, Op->value_begin(), Op->getNumValues(), + Ops.begin(), Ops.size()); + + SDValue Intr = DAG.getNode(Opc, DL, DAG.getVTList(MVT::Other, MVT::Glue), + Ops.begin(), Ops.size()); + SDValue OutLo = DAG.getCopyFromReg(Intr.getValue(0), DL, Mips::LO, MVT::i32, + Intr.getValue(1)); + SDValue OutHi = DAG.getCopyFromReg(OutLo.getValue(1), DL, Mips::HI, MVT::i32, + OutLo.getValue(2)); + SDValue Out = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, OutLo, OutHi); + + if (!HasChainIn) + return Out; + + SDValue Vals[] = { Out, OutHi.getValue(1) }; + return DAG.getMergeValues(Vals, 2, DL); +} + +SDValue MipsTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + switch (cast<ConstantSDNode>(Op->getOperand(0))->getZExtValue()) { + default: + return SDValue(); + case Intrinsic::mips_shilo: + return LowerDSPIntr(Op, DAG, MipsISD::SHILO, true, true); + case Intrinsic::mips_dpau_h_qbl: + return LowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL, true, true); + case Intrinsic::mips_dpau_h_qbr: + return LowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR, true, true); + case Intrinsic::mips_dpsu_h_qbl: + return LowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL, true, true); + case Intrinsic::mips_dpsu_h_qbr: + return LowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR, true, true); + case Intrinsic::mips_dpa_w_ph: + return LowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH, true, true); + case Intrinsic::mips_dps_w_ph: + return LowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH, true, true); + case Intrinsic::mips_dpax_w_ph: + return LowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH, true, true); + case Intrinsic::mips_dpsx_w_ph: + return LowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH, true, true); + case Intrinsic::mips_mulsa_w_ph: + return LowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH, true, true); + case Intrinsic::mips_mult: + return LowerDSPIntr(Op, DAG, MipsISD::MULT, false, true); + case Intrinsic::mips_multu: + return LowerDSPIntr(Op, DAG, MipsISD::MULTU, false, true); + case Intrinsic::mips_madd: + return LowerDSPIntr(Op, DAG, MipsISD::MADD_DSP, true, true); + case Intrinsic::mips_maddu: + return LowerDSPIntr(Op, DAG, MipsISD::MADDU_DSP, true, true); + case Intrinsic::mips_msub: + return LowerDSPIntr(Op, DAG, MipsISD::MSUB_DSP, true, true); + case Intrinsic::mips_msubu: + return LowerDSPIntr(Op, DAG, MipsISD::MSUBU_DSP, true, true); + } +} + +SDValue MipsTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + switch (cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue()) { + default: + return SDValue(); + case Intrinsic::mips_extp: + return LowerDSPIntr(Op, DAG, MipsISD::EXTP, true, false); + case Intrinsic::mips_extpdp: + return LowerDSPIntr(Op, DAG, MipsISD::EXTPDP, true, false); + case Intrinsic::mips_extr_w: + return LowerDSPIntr(Op, DAG, MipsISD::EXTR_W, true, false); + case Intrinsic::mips_extr_r_w: + return LowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W, true, false); + case Intrinsic::mips_extr_rs_w: + return LowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W, true, false); + case Intrinsic::mips_extr_s_h: + return LowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H, true, false); + case Intrinsic::mips_mthlip: + return LowerDSPIntr(Op, DAG, MipsISD::MTHLIP, true, true); + case Intrinsic::mips_mulsaq_s_w_ph: + return LowerDSPIntr(Op, DAG, MipsISD::MULSAQ_S_W_PH, true, true); + case Intrinsic::mips_maq_s_w_phl: + return LowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL, true, true); + case Intrinsic::mips_maq_s_w_phr: + return LowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR, true, true); + case Intrinsic::mips_maq_sa_w_phl: + return LowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHL, true, true); + case Intrinsic::mips_maq_sa_w_phr: + return LowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHR, true, true); + case Intrinsic::mips_dpaq_s_w_ph: + return LowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH, true, true); + case Intrinsic::mips_dpsq_s_w_ph: + return LowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH, true, true); + case Intrinsic::mips_dpaq_sa_l_w: + return LowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W, true, true); + case Intrinsic::mips_dpsq_sa_l_w: + return LowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W, true, true); + case Intrinsic::mips_dpaqx_s_w_ph: + return LowerDSPIntr(Op, DAG, MipsISD::DPAQX_S_W_PH, true, true); + case Intrinsic::mips_dpaqx_sa_w_ph: + return LowerDSPIntr(Op, DAG, MipsISD::DPAQX_SA_W_PH, true, true); + case Intrinsic::mips_dpsqx_s_w_ph: + return LowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH, true, true); + case Intrinsic::mips_dpsqx_sa_w_ph: + return LowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH, true, true); + } +} + //===----------------------------------------------------------------------===// // Calling Convention Implementation //===----------------------------------------------------------------------===// diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 4e9398430b..2dce449765 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -202,6 +202,8 @@ namespace llvm { bool IsSRA) const; SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; // @LOCALMOD-BEGIN SDValue LowerNaClTpTlsOffset(SDValue Op, SelectionDAG &DAG) const; @@ -265,6 +267,8 @@ namespace llvm { virtual unsigned getJumpTableEncoding() const; + MachineBasicBlock *EmitBPOSGE32(MachineInstr *MI, + MachineBasicBlock *BB) const; MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, unsigned Size, unsigned BinOpcode, bool Nand = false) const; MachineBasicBlock *EmitAtomicBinaryPartword(MachineInstr *MI, diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 3f98ae857b..6fa94a96e5 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -1266,3 +1266,8 @@ include "MipsCondMov.td" include "Mips16InstrFormats.td" include "Mips16InstrInfo.td" + +// DSP +include "MipsDSPInstrFormats.td" +include "MipsDSPInstrInfo.td" + diff --git a/lib/Target/Mips/MipsMCInstLower.h b/lib/Target/Mips/MipsMCInstLower.h index 3eab5a452e..c4a6016105 100644 --- a/lib/Target/Mips/MipsMCInstLower.h +++ b/lib/Target/Mips/MipsMCInstLower.h @@ -33,11 +33,11 @@ public: MipsMCInstLower(MipsAsmPrinter &asmprinter); void Initialize(Mangler *mang, MCContext *C); void Lower(const MachineInstr *MI, MCInst &OutMI) const; + MCOperand LowerOperand(const MachineOperand& MO, unsigned offset = 0) const; private: MCOperand LowerSymbolOperand(const MachineOperand &MO, MachineOperandType MOTy, unsigned Offset) const; - MCOperand LowerOperand(const MachineOperand& MO, unsigned offset = 0) const; }; } diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h index df3c4c0de0..93ce94803a 100644 --- a/lib/Target/Mips/MipsMachineFunction.h +++ b/lib/Target/Mips/MipsMachineFunction.h @@ -45,9 +45,7 @@ class MipsFunctionInfo : public MachineFunctionInfo { // Range of frame object indices. // InArgFIRange: Range of indices of all frame objects created during call to // LowerFormalArguments. - // OutArgFIRange: Range of indices of all frame objects created during call to - // LowerCall except for the frame object for restoring $gp. - std::pair<int, int> InArgFIRange, OutArgFIRange; + std::pair<int, int> InArgFIRange; unsigned MaxCallFrameSize; bool EmitNOAT; @@ -56,7 +54,7 @@ public: MipsFunctionInfo(MachineFunction& MF) : MF(MF), SRetReturnReg(0), GlobalBaseReg(0), VarArgsFrameIndex(0), InArgFIRange(std::make_pair(-1, 0)), - OutArgFIRange(std::make_pair(-1, 0)), MaxCallFrameSize(0), EmitNOAT(false) + MaxCallFrameSize(0), EmitNOAT(false) {} bool isInArgFI(int FI) const { @@ -64,16 +62,6 @@ public: } void setLastInArgFI(int FI) { InArgFIRange.second = FI; } - bool isOutArgFI(int FI) const { - return FI <= OutArgFIRange.first && FI >= OutArgFIRange.second; - } - void extendOutArgFIRange(int FirstFI, int LastFI) { - if (!OutArgFIRange.second) - // this must be the first time this function was called. - OutArgFIRange.first = FirstFI; - OutArgFIRange.second = LastFI; - } - unsigned getSRetReturnReg() const { return SRetReturnReg; } void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; } diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index ae4813e128..a72e3b857f 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -14,6 +14,8 @@ let Namespace = "Mips" in { def sub_fpeven : SubRegIndex; def sub_fpodd : SubRegIndex; def sub_32 : SubRegIndex; +def sub_lo : SubRegIndex; +def sub_hi : SubRegIndex; } // We have banks of 32 registers each. @@ -247,33 +249,11 @@ let Namespace = "Mips" in { def HWR29_64 : Register<"29">; // Accum registers - def LO0 : Register<"ac0"> { - let Aliases = [LO]; - } - def HI0 : Register<"hi0"> { - let Aliases = [HI]; - } - def LO1 : Register<"ac1">; - def HI1 : Register<"hi1">; - def LO2 : Register<"ac2">; - def HI2 : Register<"hi2">; - def LO3 : Register<"ac3">; - def HI3 : Register<"hi3">; - - let SubRegIndices = [sub_32] in { - def LO0_64 : RegisterWithSubRegs<"ac0", [LO0]> { - let Aliases = [LO64]; - } - def HI0_64 : RegisterWithSubRegs<"hi0", [HI0]> { - let Aliases = [HI64]; - } - def LO1_64 : RegisterWithSubRegs<"ac1", [LO1]>; - def HI1_64 : RegisterWithSubRegs<"hi1", [HI1]>; - def LO2_64 : RegisterWithSubRegs<"ac2", [LO2]>; - def HI2_64 : RegisterWithSubRegs<"hi2", [HI2]>; - def LO3_64 : RegisterWithSubRegs<"ac3", [LO3]>; - def HI3_64 : RegisterWithSubRegs<"hi3", [HI3]>; - } + let SubRegIndices = [sub_lo, sub_hi] in + def AC0 : RegisterWithSubRegs<"ac0", [LO, HI]>; + def AC1 : Register<"ac1">; + def AC2 : Register<"ac2">; + def AC3 : Register<"ac3">; def DSPCtrl : Register<"dspctrl">; } @@ -322,6 +302,7 @@ def CPU16Regs : RegisterClass<"Mips", [i32], 32, (add def CPURAReg : RegisterClass<"Mips", [i32], 32, (add RA)>; +def CPUSPReg : RegisterClass<"Mips", [i32], 32, (add SP)>; // 64bit fp: // * FGR64 - 32 64-bit registers @@ -357,9 +338,5 @@ def HILO64 : RegisterClass<"Mips", [i64], 64, (add HI64, LO64)>; def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>; def HWRegs64 : RegisterClass<"Mips", [i64], 32, (add HWR29_64)>; -// Accum Registers -def HIRegs : RegisterClass<"Mips", [i32], 32, (sequence "HI%u", 0, 3)>; -def LORegs : RegisterClass<"Mips", [i32], 32, (sequence "LO%u", 0, 3)>; - -def HI64Regs : RegisterClass<"Mips", [i64], 64, (sequence "HI%u_64", 0, 3)>; -def LO64Regs : RegisterClass<"Mips", [i64], 64, (sequence "LO%u_64", 0, 3)>; +// Accumulator Registers +def ACRegs : RegisterClass<"Mips", [i64], 64, (sequence "AC%u", 0, 3)>; diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp index d868f73758..8e2c2c5174 100644 --- a/lib/Target/Mips/MipsSERegisterInfo.cpp +++ b/lib/Target/Mips/MipsSERegisterInfo.cpp @@ -91,8 +91,7 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II, // getFrameRegister() returns. unsigned FrameReg; - if (MipsFI->isOutArgFI(FrameIndex) || - (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI)) + if (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI) FrameReg = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP; else FrameReg = getFrameRegister(MF); @@ -106,12 +105,8 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II, // incoming argument, callee-saved register location or local variable. int64_t Offset; - if (MipsFI->isOutArgFI(FrameIndex)) - Offset = SPOffset; - else - Offset = SPOffset + (int64_t)StackSize; - - Offset += MI.getOperand(OpNo + 1).getImm(); + Offset = SPOffset + (int64_t)StackSize; + Offset += MI.getOperand(OpNo + 1).getImm(); DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n"); diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp index 7f5927d8ed..1ff41ca358 100644 --- a/lib/Target/Mips/MipsSubtarget.cpp +++ b/lib/Target/Mips/MipsSubtarget.cpp @@ -31,7 +31,8 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU, MipsArchVersion(Mips32), MipsABI(UnknownABI), IsLittle(little), IsSingleFloat(false), IsFP64bit(false), IsGP64bit(false), HasVFPU(false), IsLinux(true), HasSEInReg(false), HasCondMov(false), HasMulDivAdd(false), - HasMinMax(false), HasSwap(false), HasBitCount(false), InMips16Mode(false) + HasMinMax(false), HasSwap(false), HasBitCount(false), InMips16Mode(false), + HasDSP(false), HasDSPR2(false), IsAndroid(false) // @LOCALMOD-START , TargetTriple(TT) // @LOCALMOD-END diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index 97d3600b1b..d8851a04eb 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -193,7 +193,7 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const { // to adjust the stack pointer (we fit in the Red Zone). For 64-bit // SVR4, we also require a stack frame if we need to spill the CR, // since this spill area is addressed relative to the stack pointer. - bool DisableRedZone = MF.getFunction()->hasFnAttr(Attribute::NoRedZone); + bool DisableRedZone = MF.getFunction()->getFnAttributes().hasNoRedZoneAttr(); // FIXME SVR4 The 32-bit SVR4 ABI has no red zone. However, it can // still generate stackless code if all local vars are reg-allocated. // Try: (FrameSize <= 224 @@ -255,7 +255,7 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { // Naked functions have no stack frame pushed, so we don't have a frame // pointer. - if (MF.getFunction()->hasFnAttr(Attribute::Naked)) + if (MF.getFunction()->getFnAttributes().hasNakedAttr()) return false; return MF.getTarget().Options.DisableFramePointerElim(MF) || diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 2e8fa1842a..27f26cd5fd 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -2048,7 +2048,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin_Or_64SVR4( SmallVector<SDValue, 8> MemOps; unsigned nAltivecParamsAtEnd = 0; - for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { + Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); + for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo, ++FuncArg) { SDValue ArgVal; bool needsLoad = false; EVT ObjectVT = Ins[ArgNo].VT; @@ -2103,7 +2104,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin_Or_64SVR4( EVT ObjType = (ObjSize == 1 ? MVT::i8 : (ObjSize == 2 ? MVT::i16 : MVT::i32)); SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo(), + MachinePointerInfo(FuncArg, + CurArgOffset), ObjType, false, false, 0); MemOps.push_back(Store); ++GPR_idx; @@ -2136,7 +2138,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin_Or_64SVR4( } SDValue Store = DAG.getStore(Val.getValue(1), dl, Shifted, FIN, - MachinePointerInfo(), + MachinePointerInfo(FuncArg, ArgOffset), false, false, 0); MemOps.push_back(Store); ++GPR_idx; @@ -6000,7 +6002,7 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, bool is31 = (getTargetMachine().Options.DisableFramePointerElim(MF) || MFI->hasVarSizedObjects()) && MFI->getStackSize() && - !MF.getFunction()->hasFnAttr(Attribute::Naked); + !MF.getFunction()->getFnAttributes().hasNakedAttr(); unsigned FrameReg = isPPC64 ? (is31 ? PPC::X31 : PPC::X1) : (is31 ? PPC::R31 : PPC::R1); SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 285e74a4c2..1665d7313c 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -596,7 +596,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // to Offset to get the correct offset. // Naked functions have stack size 0, although getStackSize may not reflect that // because we didn't call all the pieces that compute it for naked functions. - if (!MF.getFunction()->hasFnAttr(Attribute::Naked)) + if (!MF.getFunction()->getFnAttributes().hasNakedAttr()) Offset += MFI->getStackSize(); // If we can, encode the offset directly into the instruction. If this is a diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp index cc6dc1e259..0040147022 100644 --- a/lib/Target/TargetData.cpp +++ b/lib/Target/TargetData.cpp @@ -314,6 +314,8 @@ void TargetData::setAlignment(AlignTypeEnum align_type, unsigned abi_align, unsigned pref_align, uint32_t bit_width) { assert(abi_align <= pref_align && "Preferred alignment worse than ABI!"); + assert(pref_align < (1 << 16) && "Alignment doesn't fit in bitfield"); + assert(bit_width < (1 << 24) && "Bit width doesn't fit in bitfield"); for (unsigned i = 0, e = Alignments.size(); i != e; ++i) { if (Alignments[i].AlignType == align_type && Alignments[i].TypeBitWidth == bit_width) { diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 77961e53ae..9263bdde20 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -60,10 +60,6 @@ private: bool ParseDirectiveWord(unsigned Size, SMLoc L); bool ParseDirectiveCode(StringRef IDVal, SMLoc L); - bool mnemonicIsValid(StringRef Mnemonic) { - return mnemonicIsValidImpl(Mnemonic); - } - bool processInstruction(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Ops); @@ -77,13 +73,6 @@ private: unsigned &OrigErrorInfo, bool matchingInlineAsm = false); - unsigned getMCInstOperandNum(unsigned Kind, MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands, - unsigned OperandNum, unsigned &NumMCOperands) { - return getMCInstOperandNumImpl(Kind, Inst, Operands, OperandNum, - NumMCOperands); - } - /// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi) /// in 64bit mode or (%esi) or %es:(%esi) in 32bit mode. bool isSrcOp(X86Operand &Op); @@ -1636,16 +1625,20 @@ MatchInstruction(SMLoc IDLoc, unsigned &Kind, unsigned Match1, Match2, Match3, Match4; unsigned tKind; - Match1 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore); + Match1 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore, + isParsingIntelSyntax()); if (Match1 == Match_Success) Kind = tKind; Tmp[Base.size()] = Suffixes[1]; - Match2 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore); + Match2 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore, + isParsingIntelSyntax()); if (Match2 == Match_Success) Kind = tKind; Tmp[Base.size()] = Suffixes[2]; - Match3 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore); + Match3 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore, + isParsingIntelSyntax()); if (Match3 == Match_Success) Kind = tKind; Tmp[Base.size()] = Suffixes[3]; - Match4 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore); + Match4 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore, + isParsingIntelSyntax()); if (Match4 == Match_Success) Kind = tKind; // Restore the old token. diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp index 46e72f9f60..b123afa001 100644 --- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp @@ -15,6 +15,7 @@ #define DEBUG_TYPE "asm-printer" #include "X86ATTInstPrinter.h" #include "X86InstComments.h" +#include "MCTargetDesc/X86BaseInfo.h" #include "MCTargetDesc/X86MCTargetDesc.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCAsmInfo.h" @@ -38,6 +39,12 @@ void X86ATTInstPrinter::printRegName(raw_ostream &OS, void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot) { + const MCInstrDesc &Desc = MII.get(MI->getOpcode()); + uint64_t TSFlags = Desc.TSFlags; + + if (TSFlags & X86II::LOCK) + OS << "\tlock\n"; + // Try to print any aliases first. if (!printAliasInstr(MI, OS)) printInstruction(MI, OS); diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp index ad14e34707..f9bb3be9d7 100644 --- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp @@ -15,6 +15,7 @@ #define DEBUG_TYPE "asm-printer" #include "X86IntelInstPrinter.h" #include "X86InstComments.h" +#include "MCTargetDesc/X86BaseInfo.h" #include "MCTargetDesc/X86MCTargetDesc.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCExpr.h" @@ -32,6 +33,12 @@ void X86IntelInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot) { + const MCInstrDesc &Desc = MII.get(MI->getOpcode()); + uint64_t TSFlags = Desc.TSFlags; + + if (TSFlags & X86II::LOCK) + OS << "\tlock\n"; + printInstruction(MI, OS); // Next always print the annotation. diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp index 3c0e3e6f2d..7706b9308e 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp @@ -34,6 +34,10 @@ AsmWriterFlavor("x86-asm-syntax", cl::init(ATT), clEnumValN(Intel, "intel", "Emit Intel-style assembly"), clEnumValEnd)); +static cl::opt<bool> +MarkedJTDataRegions("mark-data-regions", cl::init(false), + cl::desc("Mark code section jump table data regions."), + cl::Hidden); void X86MCAsmInfoDarwin::anchor() { } @@ -59,6 +63,7 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) { SupportsDebugInformation = true; DwarfUsesInlineInfoSection = true; + UseDataRegionDirectives = MarkedJTDataRegions; // Exceptions handling ExceptionsType = ExceptionHandling::DwarfCFI; diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp index f0f1982d57..7ff058edbc 100644 --- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp @@ -11,11 +11,13 @@ #include "MCTargetDesc/X86MCTargetDesc.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCAsmLayout.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCMachObjectWriter.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCValue.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" #include "llvm/Object/MachOFormat.h" using namespace llvm; @@ -23,7 +25,7 @@ using namespace llvm::object; namespace { class X86MachObjectWriter : public MCMachObjectTargetWriter { - void RecordScatteredRelocation(MachObjectWriter *Writer, + bool RecordScatteredRelocation(MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, @@ -335,7 +337,7 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer, Writer->addRelocation(Fragment->getParent(), MRE); } -void X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, +bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, @@ -381,6 +383,19 @@ void X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, // Relocations are written out in reverse order, so the PAIR comes first. if (Type == macho::RIT_Difference || Type == macho::RIT_Generic_LocalDifference) { + // If the offset is too large to fit in a scattered relocation, + // we're hosed. It's an unfortunate limitation of the MachO format. + if (FixupOffset > 0xffffff) { + char Buffer[32]; + format("0x%x", FixupOffset).print(Buffer, sizeof(Buffer)); + Asm.getContext().FatalError(Fixup.getLoc(), + Twine("Section too large, can't encode " + "r_address (") + Buffer + + ") into 24 bits of scattered " + "relocation entry."); + llvm_unreachable("fatal error returned?!"); + } + macho::RelocationEntry MRE; MRE.Word0 = ((0 << 0) | (macho::RIT_Pair << 24) | @@ -389,6 +404,16 @@ void X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, macho::RF_Scattered); MRE.Word1 = Value2; Writer->addRelocation(Fragment->getParent(), MRE); + } else { + // If the offset is more than 24-bits, it won't fit in a scattered + // relocation offset field, so we fall back to using a non-scattered + // relocation. This is a bit risky, as if the offset reaches out of + // the block and the linker is doing scattered loading on this + // symbol, things can go badly. + // + // Required for 'as' compatibility. + if (FixupOffset > 0xffffff) + return false; } macho::RelocationEntry MRE; @@ -399,6 +424,7 @@ void X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, macho::RF_Scattered); MRE.Word1 = Value; Writer->addRelocation(Fragment->getParent(), MRE); + return true; } void X86MachObjectWriter::RecordTLVPRelocation(MachObjectWriter *Writer, @@ -469,9 +495,11 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer, // If this is a difference or a defined symbol plus an offset, then we need a // scattered relocation entry. Differences always require scattered // relocations. - if (Target.getSymB()) - return RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, - Target, Log2Size, FixedValue); + if (Target.getSymB()) { + RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, + Target, Log2Size, FixedValue); + return; + } // Get the symbol data, if any. MCSymbolData *SD = 0; @@ -483,9 +511,13 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer, uint32_t Offset = Target.getConstant(); if (IsPCRel) Offset += 1 << Log2Size; - if (Offset && SD && !Writer->doesSymbolRequireExternRelocation(SD)) - return RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, - Target, Log2Size, FixedValue); + // Try to record the scattered relocation if needed. Fall back to non + // scattered if necessary (see comments in RecordScatteredRelocation() + // for details). + if (Offset && SD && !Writer->doesSymbolRequireExternRelocation(SD) && + RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, + Target, Log2Size, FixedValue)) + return; // See <reloc.h>. uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 0d8def0e47..85922f1277 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -676,7 +676,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // function, and use up to 128 bytes of stack space, don't have a frame // pointer, calls, or dynamic alloca then we do not need to adjust the // stack pointer (we fit in the Red Zone). - if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) && + if (Is64Bit && !Fn->getFnAttributes().hasNoRedZoneAttr() && !RegInfo->needsStackRealignment(MF) && !MFI->hasVarSizedObjects() && // No dynamic alloca. !MFI->adjustsStack() && // No calls. diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index b409e88148..767e261a82 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -443,7 +443,7 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) { void X86DAGToDAGISel::PreprocessISelDAG() { // OptForSize is used in pattern predicates that isel is matching. - OptForSize = MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize); + OptForSize = MF->getFunction()->getFnAttributes().hasOptimizeForSizeAttr(); for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), E = CurDAG->allnodes_end(); I != E; ) { @@ -2253,6 +2253,10 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { case X86ISD::ATOMSUB64_DAG: case X86ISD::ATOMNAND64_DAG: case X86ISD::ATOMAND64_DAG: + case X86ISD::ATOMMAX64_DAG: + case X86ISD::ATOMMIN64_DAG: + case X86ISD::ATOMUMAX64_DAG: + case X86ISD::ATOMUMIN64_DAG: case X86ISD::ATOMSWAP64_DAG: { unsigned Opc; switch (Opcode) { @@ -2263,6 +2267,10 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { case X86ISD::ATOMSUB64_DAG: Opc = X86::ATOMSUB6432; break; case X86ISD::ATOMNAND64_DAG: Opc = X86::ATOMNAND6432; break; case X86ISD::ATOMAND64_DAG: Opc = X86::ATOMAND6432; break; + case X86ISD::ATOMMAX64_DAG: Opc = X86::ATOMMAX6432; break; + case X86ISD::ATOMMIN64_DAG: Opc = X86::ATOMMIN6432; break; + case X86ISD::ATOMUMAX64_DAG: Opc = X86::ATOMUMAX6432; break; + case X86ISD::ATOMUMIN64_DAG: Opc = X86::ATOMUMIN6432; break; case X86ISD::ATOMSWAP64_DAG: Opc = X86::ATOMSWAP6432; break; } SDNode *RetVal = SelectAtomic64(Node, Opc); @@ -2389,13 +2397,16 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue N1 = Node->getOperand(1); bool isSigned = Opcode == ISD::SMUL_LOHI; + bool hasBMI2 = Subtarget->hasBMI2(); if (!isSigned) { switch (NVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unsupported VT!"); case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break; case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break; - case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break; - case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break; + case MVT::i32: Opc = hasBMI2 ? X86::MULX32rr : X86::MUL32r; + MOpc = hasBMI2 ? X86::MULX32rm : X86::MUL32m; break; + case MVT::i64: Opc = hasBMI2 ? X86::MULX64rr : X86::MUL64r; + MOpc = hasBMI2 ? X86::MULX64rm : X86::MUL64m; break; } } else { switch (NVT.getSimpleVT().SimpleTy) { @@ -2407,13 +2418,31 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { } } - unsigned LoReg, HiReg; - switch (NVT.getSimpleVT().SimpleTy) { - default: llvm_unreachable("Unsupported VT!"); - case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break; - case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break; - case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break; - case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break; + unsigned SrcReg, LoReg, HiReg; + switch (Opc) { + default: llvm_unreachable("Unknown MUL opcode!"); + case X86::IMUL8r: + case X86::MUL8r: + SrcReg = LoReg = X86::AL; HiReg = X86::AH; + break; + case X86::IMUL16r: + case X86::MUL16r: + SrcReg = LoReg = X86::AX; HiReg = X86::DX; + break; + case X86::IMUL32r: + case X86::MUL32r: + SrcReg = LoReg = X86::EAX; HiReg = X86::EDX; + break; + case X86::IMUL64r: + case X86::MUL64r: + SrcReg = LoReg = X86::RAX; HiReg = X86::RDX; + break; + case X86::MULX32rr: + SrcReg = X86::EDX; LoReg = HiReg = 0; + break; + case X86::MULX64rr: + SrcReg = X86::RDX; LoReg = HiReg = 0; + break; } SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; @@ -2425,22 +2454,47 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { std::swap(N0, N1); } - SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg, + SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg, N0, SDValue()).getValue(1); + SDValue ResHi, ResLo; if (foldedLoad) { + SDValue Chain; SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), InFlag }; - SDNode *CNode = - CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops, - array_lengthof(Ops)); - InFlag = SDValue(CNode, 1); + if (MOpc == X86::MULX32rm || MOpc == X86::MULX64rm) { + SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other, MVT::Glue); + SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops, + array_lengthof(Ops)); + ResHi = SDValue(CNode, 0); + ResLo = SDValue(CNode, 1); + Chain = SDValue(CNode, 2); + InFlag = SDValue(CNode, 3); + } else { + SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue); + SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops, + array_lengthof(Ops)); + Chain = SDValue(CNode, 0); + InFlag = SDValue(CNode, 1); + } // Update the chain. - ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); + ReplaceUses(N1.getValue(1), Chain); } else { - SDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag); - InFlag = SDValue(CNode, 0); + SDValue Ops[] = { N1, InFlag }; + if (Opc == X86::MULX32rr || Opc == X86::MULX64rr) { + SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Glue); + SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops, + array_lengthof(Ops)); + ResHi = SDValue(CNode, 0); + ResLo = SDValue(CNode, 1); + InFlag = SDValue(CNode, 2); + } else { + SDVTList VTs = CurDAG->getVTList(MVT::Glue); + SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops, + array_lengthof(Ops)); + InFlag = SDValue(CNode, 0); + } } // Prevent use of AH in a REX instruction by referencing AX instead. @@ -2465,19 +2519,25 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { } // Copy the low half of the result, if it is needed. if (!SDValue(Node, 0).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - LoReg, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(SDValue(Node, 0), Result); - DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); + if (ResLo.getNode() == 0) { + assert(LoReg && "Register for low half is not defined!"); + ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, NVT, + InFlag); + InFlag = ResLo.getValue(2); + } + ReplaceUses(SDValue(Node, 0), ResLo); + DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG); dbgs() << '\n'); } // Copy the high half of the result, if it is needed. if (!SDValue(Node, 1).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - HiReg, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(SDValue(Node, 1), Result); - DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); + if (ResHi.getNode() == 0) { + assert(HiReg && "Register for high half is not defined!"); + ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, NVT, + InFlag); + InFlag = ResHi.getValue(2); + } + ReplaceUses(SDValue(Node, 1), ResHi); + DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG); dbgs() << '\n'); } return NULL; @@ -2678,7 +2738,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { MVT::i8, Reg); // Emit a testb. - return CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, Subreg, Imm); + SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, + Subreg, Imm); + // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has + // one, do not call ReplaceAllUsesWith. + ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), + SDValue(NewNode, 0)); + return NULL; } // For example, "testl %eax, $2048" to "testb %ah, $8". @@ -2709,8 +2775,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { // Emit a testb. The EXTRACT_SUBREG becomes a COPY that can only // target GR8_NOREX registers, so make sure the register class is // forced. - return CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl, MVT::i32, - Subreg, ShiftedImm); + SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl, + MVT::i32, Subreg, ShiftedImm); + // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has + // one, do not call ReplaceAllUsesWith. + ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), + SDValue(NewNode, 0)); + return NULL; } // For example, "testl %eax, $32776" to "testw %ax, $32776". @@ -2726,7 +2797,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { MVT::i16, Reg); // Emit a testw. - return CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32, Subreg, Imm); + SDNode *NewNode = CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32, + Subreg, Imm); + // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has + // one, do not call ReplaceAllUsesWith. + ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), + SDValue(NewNode, 0)); + return NULL; } // For example, "testq %rax, $268468232" to "testl %eax, $268468232". @@ -2742,7 +2819,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { MVT::i32, Reg); // Emit a testl. - return CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32, Subreg, Imm); + SDNode *NewNode = CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32, + Subreg, Imm); + // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has + // one, do not call ReplaceAllUsesWith. + ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), + SDValue(NewNode, 0)); + return NULL; } } break; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index bdfe245027..ffaf04cea7 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -522,6 +522,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom); setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Custom); setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom); } if (Subtarget->hasCmpxchg16b()) { @@ -1357,7 +1361,7 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size, // cases like PR2962. This should be removed when PR2962 is fixed. const Function *F = MF.getFunction(); if (IsZeroVal && - !F->hasFnAttr(Attribute::NoImplicitFloat)) { + !F->getFnAttributes().hasNoImplicitFloatAttr()) { if (Size >= 16 && (Subtarget->isUnalignedMemAccessFast() || ((DstAlign == 0 || DstAlign >= 16) && @@ -2048,7 +2052,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, TotalNumIntRegs); - bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat); + bool NoImplicitFloatOps = Fn->getFnAttributes().hasNoImplicitFloatAttr(); assert(!(NumXMMRegs && !Subtarget->hasSSE1()) && "SSE register cannot be used when SSE is disabled!"); assert(!(NumXMMRegs && MF.getTarget().Options.UseSoftFloat && @@ -2240,7 +2244,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Check if it's really possible to do a tail call. isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, SR != NotStructReturn, - MF.getFunction()->hasStructRetAttr(), + MF.getFunction()->hasStructRetAttr(), CLI.RetTy, Outs, OutVals, Ins, DAG); // Sibcalls are automatically detected tailcalls which do not require @@ -2524,7 +2528,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, OpFlags = X86II::MO_DARWIN_STUB; } else if (Subtarget->isPICStyleRIPRel() && isa<Function>(GV) && - cast<Function>(GV)->hasFnAttr(Attribute::NonLazyBind)) { + cast<Function>(GV)->getFnAttributes().hasNonLazyBindAttr()) { // If the function is marked as non-lazy, generate an indirect call // which loads from the GOT directly. This avoids runtime overhead // at the cost of eager binding (and one extra byte of encoding). @@ -2761,6 +2765,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, bool isVarArg, bool isCalleeStructRet, bool isCallerStructRet, + Type *RetTy, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, @@ -2772,6 +2777,13 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, // If -tailcallopt is specified, make fastcc functions tail-callable. const MachineFunction &MF = DAG.getMachineFunction(); const Function *CallerF = DAG.getMachineFunction().getFunction(); + + // If the function return type is x86_fp80 and the callee return type is not, + // then the FP_EXTEND of the call result is not a nop. It's not safe to + // perform a tailcall optimization here. + if (CallerF->getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty()) + return false; + CallingConv::ID CallerCC = CallerF->getCallingConv(); bool CCMatch = CallerCC == CalleeCC; @@ -6661,7 +6673,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { bool HasAVX = Subtarget->hasAVX(); bool HasAVX2 = Subtarget->hasAVX2(); MachineFunction &MF = DAG.getMachineFunction(); - bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize); + bool OptForSize = MF.getFunction()->getFnAttributes().hasOptimizeForSizeAttr(); assert(VT.getSizeInBits() != 64 && "Can't lower MMX shuffles"); @@ -9783,7 +9795,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { // Sanity Check: Make sure using fp_offset makes sense. assert(!getTargetMachine().Options.UseSoftFloat && !(DAG.getMachineFunction() - .getFunction()->hasFnAttr(Attribute::NoImplicitFloat)) && + .getFunction()->getFnAttributes().hasNoImplicitFloatAttr()) && Subtarget->hasSSE1()); } @@ -11769,6 +11781,10 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, case ISD::ATOMIC_LOAD_OR: case ISD::ATOMIC_LOAD_SUB: case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_UMAX: + case ISD::ATOMIC_LOAD_UMIN: case ISD::ATOMIC_SWAP: { unsigned Opc; switch (N->getOpcode()) { @@ -11791,6 +11807,18 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, case ISD::ATOMIC_LOAD_XOR: Opc = X86ISD::ATOMXOR64_DAG; break; + case ISD::ATOMIC_LOAD_MAX: + Opc = X86ISD::ATOMMAX64_DAG; + break; + case ISD::ATOMIC_LOAD_MIN: + Opc = X86ISD::ATOMMIN64_DAG; + break; + case ISD::ATOMIC_LOAD_UMAX: + Opc = X86ISD::ATOMUMAX64_DAG; + break; + case ISD::ATOMIC_LOAD_UMIN: + Opc = X86ISD::ATOMUMIN64_DAG; + break; case ISD::ATOMIC_SWAP: Opc = X86ISD::ATOMSWAP64_DAG; break; @@ -12182,6 +12210,10 @@ static unsigned getNonAtomic6432Opcode(unsigned Opc, unsigned &HiOpc) { case X86::ATOMADD6432: HiOpc = X86::ADC32rr; return X86::ADD32rr; case X86::ATOMSUB6432: HiOpc = X86::SBB32rr; return X86::SUB32rr; case X86::ATOMSWAP6432: HiOpc = X86::MOV32rr; return X86::MOV32rr; + case X86::ATOMMAX6432: HiOpc = X86::SETLr; return X86::SETLr; + case X86::ATOMMIN6432: HiOpc = X86::SETGr; return X86::SETGr; + case X86::ATOMUMAX6432: HiOpc = X86::SETBr; return X86::SETBr; + case X86::ATOMUMIN6432: HiOpc = X86::SETAr; return X86::SETAr; } llvm_unreachable("Unhandled atomic-load-op opcode!"); } @@ -12499,6 +12531,7 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI, SrcHiReg = MI->getOperand(CurOp++).getReg(); const TargetRegisterClass *RC = &X86::GR32RegClass; + const TargetRegisterClass *RC8 = &X86::GR8RegClass; unsigned LCMPXCHGOpc = X86::LCMPXCHG8B; unsigned LOADOpc = X86::MOV32rm; @@ -12586,6 +12619,55 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI, BuildMI(mainMBB, DL, TII->get(NOTOpc), t1H).addReg(t2H); break; } + case X86::ATOMMAX6432: + case X86::ATOMMIN6432: + case X86::ATOMUMAX6432: + case X86::ATOMUMIN6432: { + unsigned HiOpc; + unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc); + unsigned cL = MRI.createVirtualRegister(RC8); + unsigned cH = MRI.createVirtualRegister(RC8); + unsigned cL32 = MRI.createVirtualRegister(RC); + unsigned cH32 = MRI.createVirtualRegister(RC); + unsigned cc = MRI.createVirtualRegister(RC); + // cl := cmp src_lo, lo + BuildMI(mainMBB, DL, TII->get(X86::CMP32rr)) + .addReg(SrcLoReg).addReg(LoReg); + BuildMI(mainMBB, DL, TII->get(LoOpc), cL); + BuildMI(mainMBB, DL, TII->get(X86::MOVZX32rr8), cL32).addReg(cL); + // ch := cmp src_hi, hi + BuildMI(mainMBB, DL, TII->get(X86::CMP32rr)) + .addReg(SrcHiReg).addReg(HiReg); + BuildMI(mainMBB, DL, TII->get(HiOpc), cH); + BuildMI(mainMBB, DL, TII->get(X86::MOVZX32rr8), cH32).addReg(cH); + // cc := if (src_hi == hi) ? cl : ch; + if (Subtarget->hasCMov()) { + BuildMI(mainMBB, DL, TII->get(X86::CMOVE32rr), cc) + .addReg(cH32).addReg(cL32); + } else { + MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), cc) + .addReg(cH32).addReg(cL32) + .addImm(X86::COND_E); + mainMBB = EmitLoweredSelect(MIB, mainMBB); + } + BuildMI(mainMBB, DL, TII->get(X86::TEST32rr)).addReg(cc).addReg(cc); + if (Subtarget->hasCMov()) { + BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t1L) + .addReg(SrcLoReg).addReg(LoReg); + BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t1H) + .addReg(SrcHiReg).addReg(HiReg); + } else { + MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t1L) + .addReg(SrcLoReg).addReg(LoReg) + .addImm(X86::COND_NE); + mainMBB = EmitLoweredSelect(MIB, mainMBB); + MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t1H) + .addReg(SrcHiReg).addReg(HiReg) + .addImm(X86::COND_NE); + mainMBB = EmitLoweredSelect(MIB, mainMBB); + } + break; + } case X86::ATOMSWAP6432: { unsigned HiOpc; unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc); @@ -13576,6 +13658,10 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::ATOMNAND6432: case X86::ATOMADD6432: case X86::ATOMSUB6432: + case X86::ATOMMAX6432: + case X86::ATOMMIN6432: + case X86::ATOMUMAX6432: + case X86::ATOMUMIN6432: case X86::ATOMSWAP6432: return EmitAtomicLoadArith6432(MI, BB); @@ -15562,7 +15648,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, return SDValue(); const Function *F = DAG.getMachineFunction().getFunction(); - bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat); + bool NoImplicitFloatOps = F->getFnAttributes().hasNoImplicitFloatAttr(); bool F64IsLegal = !DAG.getTarget().Options.UseSoftFloat && !NoImplicitFloatOps && Subtarget->hasSSE2(); if ((VT.isVector() || diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index d3545b0e9f..a53909b7a0 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -355,6 +355,10 @@ namespace llvm { ATOMXOR64_DAG, ATOMAND64_DAG, ATOMNAND64_DAG, + ATOMMAX64_DAG, + ATOMMIN64_DAG, + ATOMUMAX64_DAG, + ATOMUMIN64_DAG, ATOMSWAP64_DAG, // LCMPXCHG_DAG, LCMPXCHG8_DAG, LCMPXCHG16_DAG - Compare and swap. @@ -752,6 +756,7 @@ namespace llvm { bool isVarArg, bool isCalleeStructRet, bool isCallerStructRet, + Type *RetTy, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 1296bcbe89..3a1ac11f9c 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -561,7 +561,6 @@ defm ATOMSWAP : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMSWAP">; // TODO: Get this to fold the constant into the instruction. let isCodeGenOnly = 1, Defs = [EFLAGS] in def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero), - "lock\n\t" "or{l}\t{$zero, $dst|$dst, $zero}", [], IIC_ALU_MEM>, Requires<[In32BitMode]>, LOCK; @@ -581,72 +580,72 @@ let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in { def #NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, RegOpc{3}, RegOpc{2}, RegOpc{1}, 0 }, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), - !strconcat("lock\n\t", mnemonic, "{b}\t", + !strconcat(mnemonic, "{b}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_NONMEM>, LOCK; def #NAME#16mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 }, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), - !strconcat("lock\n\t", mnemonic, "{w}\t", + !strconcat(mnemonic, "{w}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_NONMEM>, OpSize, LOCK; def #NAME#32mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 }, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), - !strconcat("lock\n\t", mnemonic, "{l}\t", + !strconcat(mnemonic, "{l}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_NONMEM>, LOCK; def #NAME#64mr : RI<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 }, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), - !strconcat("lock\n\t", mnemonic, "{q}\t", + !strconcat(mnemonic, "{q}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_NONMEM>, LOCK; def #NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 }, ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2), - !strconcat("lock\n\t", mnemonic, "{b}\t", + !strconcat(mnemonic, "{b}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_MEM>, LOCK; def #NAME#16mi : Ii16<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 }, ImmMod, (outs), (ins i16mem :$dst, i16imm :$src2), - !strconcat("lock\n\t", mnemonic, "{w}\t", + !strconcat(mnemonic, "{w}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_MEM>, OpSize, LOCK; def #NAME#32mi : Ii32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 }, ImmMod, (outs), (ins i32mem :$dst, i32imm :$src2), - !strconcat("lock\n\t", mnemonic, "{l}\t", + !strconcat(mnemonic, "{l}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_MEM>, LOCK; def #NAME#64mi32 : RIi32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 }, ImmMod, (outs), (ins i64mem :$dst, i64i32imm :$src2), - !strconcat("lock\n\t", mnemonic, "{q}\t", + !strconcat(mnemonic, "{q}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_MEM>, LOCK; def #NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 }, ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2), - !strconcat("lock\n\t", mnemonic, "{w}\t", + !strconcat(mnemonic, "{w}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_MEM>, OpSize, LOCK; def #NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 }, ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2), - !strconcat("lock\n\t", mnemonic, "{l}\t", + !strconcat(mnemonic, "{l}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_MEM>, LOCK; def #NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 }, ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2), - !strconcat("lock\n\t", mnemonic, "{q}\t", + !strconcat(mnemonic, "{q}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_MEM>, LOCK; @@ -666,16 +665,16 @@ multiclass LOCK_ArithUnOp<bits<8> Opc8, bits<8> Opc, Format Form, let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in { def #NAME#8m : I<Opc8, Form, (outs), (ins i8mem :$dst), - !strconcat("lock\n\t", mnemonic, "{b}\t$dst"), + !strconcat(mnemonic, "{b}\t$dst"), [], IIC_UNARY_MEM>, LOCK; def #NAME#16m : I<Opc, Form, (outs), (ins i16mem:$dst), - !strconcat("lock\n\t", mnemonic, "{w}\t$dst"), + !strconcat(mnemonic, "{w}\t$dst"), [], IIC_UNARY_MEM>, OpSize, LOCK; def #NAME#32m : I<Opc, Form, (outs), (ins i32mem:$dst), - !strconcat("lock\n\t", mnemonic, "{l}\t$dst"), + !strconcat(mnemonic, "{l}\t$dst"), [], IIC_UNARY_MEM>, LOCK; def #NAME#64m : RI<Opc, Form, (outs), (ins i64mem:$dst), - !strconcat("lock\n\t", mnemonic, "{q}\t$dst"), + !strconcat(mnemonic, "{q}\t$dst"), [], IIC_UNARY_MEM>, LOCK; } } @@ -689,7 +688,7 @@ multiclass LCMPXCHG_UnOp<bits<8> Opc, Format Form, string mnemonic, InstrItinClass itin> { let isCodeGenOnly = 1 in { def #NAME# : I<Opc, Form, (outs), (ins x86memop:$ptr), - !strconcat("lock\n\t", mnemonic, "\t$ptr"), + !strconcat(mnemonic, "\t$ptr"), [(frag addr:$ptr)], itin>, TB, LOCK; } } @@ -700,23 +699,19 @@ multiclass LCMPXCHG_BinOp<bits<8> Opc8, bits<8> Opc, Format Form, let isCodeGenOnly = 1 in { let Defs = [AL, EFLAGS], Uses = [AL] in def #NAME#8 : I<Opc8, Form, (outs), (ins i8mem:$ptr, GR8:$swap), - !strconcat("lock\n\t", mnemonic, - "{b}\t{$swap, $ptr|$ptr, $swap}"), + !strconcat(mnemonic, "{b}\t{$swap, $ptr|$ptr, $swap}"), [(frag addr:$ptr, GR8:$swap, 1)], itin8>, TB, LOCK; let Defs = [AX, EFLAGS], Uses = [AX] in def #NAME#16 : I<Opc, Form, (outs), (ins i16mem:$ptr, GR16:$swap), - !strconcat("lock\n\t", mnemonic, - "{w}\t{$swap, $ptr|$ptr, $swap}"), + !strconcat(mnemonic, "{w}\t{$swap, $ptr|$ptr, $swap}"), [(frag addr:$ptr, GR16:$swap, 2)], itin>, TB, OpSize, LOCK; let Defs = [EAX, EFLAGS], Uses = [EAX] in def #NAME#32 : I<Opc, Form, (outs), (ins i32mem:$ptr, GR32:$swap), - !strconcat("lock\n\t", mnemonic, - "{l}\t{$swap, $ptr|$ptr, $swap}"), + !strconcat(mnemonic, "{l}\t{$swap, $ptr|$ptr, $swap}"), [(frag addr:$ptr, GR32:$swap, 4)], itin>, TB, LOCK; let Defs = [RAX, EFLAGS], Uses = [RAX] in def #NAME#64 : RI<Opc, Form, (outs), (ins i64mem:$ptr, GR64:$swap), - !strconcat("lock\n\t", mnemonic, - "{q}\t{$swap, $ptr|$ptr, $swap}"), + !strconcat(mnemonic, "{q}\t{$swap, $ptr|$ptr, $swap}"), [(frag addr:$ptr, GR64:$swap, 8)], itin>, TB, LOCK; } } @@ -744,31 +739,27 @@ multiclass ATOMIC_LOAD_BINOP<bits<8> opc8, bits<8> opc, string mnemonic, let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1 in { def #NAME#8 : I<opc8, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr), - !strconcat("lock\n\t", mnemonic, - "{b}\t{$val, $ptr|$ptr, $val}"), + !strconcat(mnemonic, "{b}\t{$val, $ptr|$ptr, $val}"), [(set GR8:$dst, (!cast<PatFrag>(frag # "_8") addr:$ptr, GR8:$val))], itin8>; def #NAME#16 : I<opc, MRMSrcMem, (outs GR16:$dst), (ins GR16:$val, i16mem:$ptr), - !strconcat("lock\n\t", mnemonic, - "{w}\t{$val, $ptr|$ptr, $val}"), + !strconcat(mnemonic, "{w}\t{$val, $ptr|$ptr, $val}"), [(set GR16:$dst, (!cast<PatFrag>(frag # "_16") addr:$ptr, GR16:$val))], itin>, OpSize; def #NAME#32 : I<opc, MRMSrcMem, (outs GR32:$dst), (ins GR32:$val, i32mem:$ptr), - !strconcat("lock\n\t", mnemonic, - "{l}\t{$val, $ptr|$ptr, $val}"), + !strconcat(mnemonic, "{l}\t{$val, $ptr|$ptr, $val}"), [(set GR32:$dst, (!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val))], itin>; def #NAME#64 : RI<opc, MRMSrcMem, (outs GR64:$dst), (ins GR64:$val, i64mem:$ptr), - !strconcat("lock\n\t", mnemonic, - "{q}\t{$val, $ptr|$ptr, $val}"), + !strconcat(mnemonic, "{q}\t{$val, $ptr|$ptr, $val}"), [(set GR64:$dst, (!cast<PatFrag>(frag # "_64") addr:$ptr, GR64:$val))], diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index e595876dcf..af570adb79 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -566,6 +566,16 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VSQRTPSYr_Int, X86::VSQRTPSYm_Int, TB_ALIGN_32 }, { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrm, TB_NO_REVERSE }, { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrm, TB_NO_REVERSE }, + + // BMI/BMI2 foldable instructions + { X86::RORX32ri, X86::RORX32mi, 0 }, + { X86::RORX64ri, X86::RORX64mi, 0 }, + { X86::SARX32rr, X86::SARX32rm, 0 }, + { X86::SARX64rr, X86::SARX64rm, 0 }, + { X86::SHRX32rr, X86::SHRX32rm, 0 }, + { X86::SHRX64rr, X86::SHRX64rm, 0 }, + { X86::SHLX32rr, X86::SHLX32rm, 0 }, + { X86::SHLX64rr, X86::SHLX64rm, 0 }, }; for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) { @@ -1145,6 +1155,10 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4mr, TB_ALIGN_16 }, { X86::VFMSUBADDPS4rrY, X86::VFMSUBADDPS4mrY, TB_ALIGN_32 }, { X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4mrY, TB_ALIGN_32 }, + + // BMI/BMI2 foldable instructions + { X86::MULX32rr, X86::MULX32rm, 0 }, + { X86::MULX64rr, X86::MULX64rm, 0 }, }; for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) { @@ -3812,7 +3826,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // Unless optimizing for size, don't fold to avoid partial // register update stalls - if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize) && + if (!MF.getFunction()->getFnAttributes().hasOptimizeForSizeAttr() && hasPartialRegUpdate(MI->getOpcode())) return 0; @@ -3853,7 +3867,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // Unless optimizing for size, don't fold to avoid partial // register update stalls - if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize) && + if (!MF.getFunction()->getFnAttributes().hasOptimizeForSizeAttr() && hasPartialRegUpdate(MI->getOpcode())) return 0; diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 4fce5acc23..5074724fb8 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -568,17 +568,17 @@ def HasMMX : Predicate<"Subtarget->hasMMX()">; def Has3DNow : Predicate<"Subtarget->has3DNow()">; def Has3DNowA : Predicate<"Subtarget->has3DNowA()">; def HasSSE1 : Predicate<"Subtarget->hasSSE1()">; -def UseSSE1 : Predicate<"Subtarget->hasSSE1() && Subtarget->hasNoAVX()">; +def UseSSE1 : Predicate<"Subtarget->hasSSE1() && !Subtarget->hasAVX()">; def HasSSE2 : Predicate<"Subtarget->hasSSE2()">; -def UseSSE2 : Predicate<"Subtarget->hasSSE2() && Subtarget->hasNoAVX()">; +def UseSSE2 : Predicate<"Subtarget->hasSSE2() && !Subtarget->hasAVX()">; def HasSSE3 : Predicate<"Subtarget->hasSSE3()">; -def UseSSE3 : Predicate<"Subtarget->hasSSE3() && Subtarget->hasNoAVX()">; +def UseSSE3 : Predicate<"Subtarget->hasSSE3() && !Subtarget->hasAVX()">; def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">; -def UseSSSE3 : Predicate<"Subtarget->hasSSSE3() && Subtarget->hasNoAVX()">; +def UseSSSE3 : Predicate<"Subtarget->hasSSSE3() && !Subtarget->hasAVX()">; def HasSSE41 : Predicate<"Subtarget->hasSSE41()">; -def UseSSE41 : Predicate<"Subtarget->hasSSE41() && Subtarget->hasNoAVX()">; +def UseSSE41 : Predicate<"Subtarget->hasSSE41() && !Subtarget->hasAVX()">; def HasSSE42 : Predicate<"Subtarget->hasSSE42()">; -def UseSSE42 : Predicate<"Subtarget->hasSSE42() && Subtarget->hasNoAVX()">; +def UseSSE42 : Predicate<"Subtarget->hasSSE42() && !Subtarget->hasAVX()">; def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">; def HasAVX : Predicate<"Subtarget->hasAVX()">; def HasAVX2 : Predicate<"Subtarget->hasAVX2()">; diff --git a/lib/Target/X86/X86InstrShiftRotate.td b/lib/Target/X86/X86InstrShiftRotate.td index bdeb63ffbd..893488c159 100644 --- a/lib/Target/X86/X86InstrShiftRotate.td +++ b/lib/Target/X86/X86InstrShiftRotate.td @@ -839,6 +839,16 @@ def SHRD64mri8 : RIi8<0xAC, MRMDestMem, } // Defs = [EFLAGS] +def ROT32L2R_imm8 : SDNodeXForm<imm, [{ + // Convert a ROTL shamt to a ROTR shamt on 32-bit integer. + return getI8Imm(32 - N->getZExtValue()); +}]>; + +def ROT64L2R_imm8 : SDNodeXForm<imm, [{ + // Convert a ROTL shamt to a ROTR shamt on 64-bit integer. + return getI8Imm(64 - N->getZExtValue()); +}]>; + multiclass bmi_rotate<string asm, RegisterClass RC, X86MemOperand x86memop> { let neverHasSideEffects = 1 in { def ri : Ii8<0xF0, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, i8imm:$src2), @@ -873,4 +883,72 @@ let Predicates = [HasBMI2] in { defm SHRX64 : bmi_shift<"shrx{q}", GR64, i64mem>, T8XD, VEX_W; defm SHLX32 : bmi_shift<"shlx{l}", GR32, i32mem>, T8, OpSize; defm SHLX64 : bmi_shift<"shlx{q}", GR64, i64mem>, T8, OpSize, VEX_W; + + // Prefer RORX which is non-destructive and doesn't update EFLAGS. + let AddedComplexity = 10 in { + def : Pat<(rotl GR32:$src, (i8 imm:$shamt)), + (RORX32ri GR32:$src, (ROT32L2R_imm8 imm:$shamt))>; + def : Pat<(rotl GR64:$src, (i8 imm:$shamt)), + (RORX64ri GR64:$src, (ROT64L2R_imm8 imm:$shamt))>; + } + + def : Pat<(rotl (loadi32 addr:$src), (i8 imm:$shamt)), + (RORX32mi addr:$src, (ROT32L2R_imm8 imm:$shamt))>; + def : Pat<(rotl (loadi64 addr:$src), (i8 imm:$shamt)), + (RORX64mi addr:$src, (ROT64L2R_imm8 imm:$shamt))>; + + // Prefer SARX/SHRX/SHLX over SAR/SHR/SHL with variable shift BUT not + // immedidate shift, i.e. the following code is considered better + // + // mov %edi, %esi + // shl $imm, %esi + // ... %edi, ... + // + // than + // + // movb $imm, %sil + // shlx %sil, %edi, %esi + // ... %edi, ... + // + let AddedComplexity = 1 in { + def : Pat<(sra GR32:$src1, GR8:$src2), + (SARX32rr GR32:$src1, + (INSERT_SUBREG + (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; + def : Pat<(sra GR64:$src1, GR8:$src2), + (SARX64rr GR64:$src1, + (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; + + def : Pat<(srl GR32:$src1, GR8:$src2), + (SHRX32rr GR32:$src1, + (INSERT_SUBREG + (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; + def : Pat<(srl GR64:$src1, GR8:$src2), + (SHRX64rr GR64:$src1, + (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; + + def : Pat<(shl GR32:$src1, GR8:$src2), + (SHLX32rr GR32:$src1, + (INSERT_SUBREG + (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; + def : Pat<(shl GR64:$src1, GR8:$src2), + (SHLX64rr GR64:$src1, + (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; + } + + // Patterns on SARXrm/SHRXrm/SHLXrm are explicitly omitted to favor + // + // mov (%ecx), %esi + // shl $imm, $esi + // + // over + // + // movb $imm %al + // shlx %al, (%ecx), %esi + // + // As SARXrr/SHRXrr/SHLXrr is favored on variable shift, the peephole + // optimization will fold them into SARXrm/SHRXrm/SHLXrm if possible. } diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 582f5e99ff..262d32e4e6 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -423,7 +423,7 @@ bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { const Function *F = MF.getFunction(); unsigned StackAlign = TM.getFrameLowering()->getStackAlignment(); bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) || - F->hasFnAttr(Attribute::StackAlignment)); + F->getFnAttributes().hasStackAlignmentAttr()); // If we've requested that we force align the stack do so now. if (ForceStackAlign) diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 4c7b8fc4de..921ded8f2d 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -205,7 +205,6 @@ public: bool hasSSE42() const { return X86SSELevel >= SSE42; } bool hasAVX() const { return X86SSELevel >= AVX; } bool hasAVX2() const { return X86SSELevel >= AVX2; } - bool hasNoAVX() const { return X86SSELevel < AVX; } bool hasSSE4A() const { return HasSSE4A; } bool has3DNow() const { return X863DNowLevel >= ThreeDNow; } bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; } diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index b94dd69deb..10f5b6e658 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -592,14 +592,6 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, Type *RetTy = FTy->getReturnType(); - // Work around LLVM bug PR56: the CWriter cannot emit varargs functions which - // have zero fixed arguments. - bool ExtraArgHack = false; - if (Params.empty() && FTy->isVarArg()) { - ExtraArgHack = true; - Params.push_back(Type::getInt32Ty(F->getContext())); - } - // Construct the new function type using the new arguments. FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg()); @@ -711,9 +703,6 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, } } - if (ExtraArgHack) - Args.push_back(Constant::getNullValue(Type::getInt32Ty(F->getContext()))); - // Push any varargs arguments on the list. for (; AI != CS.arg_end(); ++AI, ++ArgIndex) { Args.push_back(*AI); @@ -870,16 +859,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, } // Increment I2 past all of the arguments added for this promoted pointer. - for (unsigned i = 0, e = ArgIndices.size(); i != e; ++i) - ++I2; + std::advance(I2, ArgIndices.size()); } - // Notify the alias analysis implementation that we inserted a new argument. - if (ExtraArgHack) - AA.copyValue(Constant::getNullValue(Type::getInt32Ty(F->getContext())), - NF->arg_begin()); - - // Tell the alias analysis that the old function is about to disappear. AA.replaceWithNewValue(F, NF); diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index fd23a935b9..c7429c5954 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -717,9 +717,9 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // here. Currently, this should not be possible, but special handling might be // required when new return value attributes are added. if (NRetTy->isVoidTy()) - RAttrs &= ~Attribute::typeIncompatible(NRetTy); + RAttrs &= ~Attributes::typeIncompatible(NRetTy); else - assert((RAttrs & Attribute::typeIncompatible(NRetTy)) == 0 + assert((RAttrs & Attributes::typeIncompatible(NRetTy)) == 0 && "Return attributes no longer compatible?"); if (RAttrs) @@ -786,7 +786,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { Attributes RAttrs = CallPAL.getRetAttributes(); Attributes FnAttrs = CallPAL.getFnAttributes(); // Adjust in case the function was changed to return void. - RAttrs &= ~Attribute::typeIncompatible(NF->getReturnType()); + RAttrs &= ~Attributes::typeIncompatible(NF->getReturnType()); if (RAttrs) AttributesVec.push_back(AttributeWithIndex::get(0, RAttrs)); diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index b888e95982..b1ba6be5ff 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -962,7 +962,9 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, // If we get here we could have other crazy uses that are transitively // loaded. assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) || - isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser)) && + isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser) || + isa<BitCastInst>(GlobalUser) || + isa<GetElementPtrInst>(GlobalUser)) && "Only expect load and stores!"); } } diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp index 664ddf6f7a..42f0991360 100644 --- a/lib/Transforms/IPO/InlineAlways.cpp +++ b/lib/Transforms/IPO/InlineAlways.cpp @@ -65,7 +65,7 @@ Pass *llvm::createAlwaysInlinerPass(bool InsertLifetime) { /// \brief Minimal filter to detect invalid constructs for inlining. static bool isInlineViable(Function &F) { - bool ReturnsTwice = F.hasFnAttr(Attribute::ReturnsTwice); + bool ReturnsTwice = F.getFnAttributes().hasReturnsTwiceAttr(); for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { // Disallow inlining of functions which contain an indirect branch. if (isa<IndirectBrInst>(BI->getTerminator())) @@ -114,7 +114,7 @@ InlineCost AlwaysInliner::getInlineCost(CallSite CS) { if (Callee->isDeclaration()) return InlineCost::getNever(); // Return never for anything not marked as always inline. - if (!Callee->hasFnAttr(Attribute::AlwaysInline)) + if (!Callee->getFnAttributes().hasAlwaysInlineAttr()) return InlineCost::getNever(); // Do some minimal analysis to preclude non-viable functions. diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index a9263baa44..7932b40bdc 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -93,10 +93,10 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, // If the inlined function had a higher stack protection level than the // calling function, then bump up the caller's stack protection level. - if (Callee->hasFnAttr(Attribute::StackProtectReq)) + if (Callee->getFnAttributes().hasStackProtectReqAttr()) Caller->addFnAttr(Attribute::StackProtectReq); - else if (Callee->hasFnAttr(Attribute::StackProtect) && - !Caller->hasFnAttr(Attribute::StackProtectReq)) + else if (Callee->getFnAttributes().hasStackProtectAttr() && + !Caller->getFnAttributes().hasStackProtectReqAttr()) Caller->addFnAttr(Attribute::StackProtect); // Look at all of the allocas that we inlined through this call site. If we @@ -209,7 +209,7 @@ unsigned Inliner::getInlineThreshold(CallSite CS) const { // would decrease the threshold. Function *Caller = CS.getCaller(); bool OptSize = Caller && !Caller->isDeclaration() && - Caller->hasFnAttr(Attribute::OptimizeForSize); + Caller->getFnAttributes().hasOptimizeForSizeAttr(); if (!(InlineLimit.getNumOccurrences() > 0) && OptSize && OptSizeThreshold < thres) thres = OptSizeThreshold; @@ -217,7 +217,7 @@ unsigned Inliner::getInlineThreshold(CallSite CS) const { // Listen to the inlinehint attribute when it would increase the threshold. Function *Callee = CS.getCalledFunction(); bool InlineHint = Callee && !Callee->isDeclaration() && - Callee->hasFnAttr(Attribute::InlineHint); + Callee->getFnAttributes().hasInlineHintAttr(); if (InlineHint && HintThreshold > thres) thres = HintThreshold; @@ -533,7 +533,7 @@ bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) { // Handle the case when this function is called and we only want to care // about always-inline functions. This is a bit of a hack to share code // between here and the InlineAlways pass. - if (AlwaysInlineOnly && !F->hasFnAttr(Attribute::AlwaysInline)) + if (AlwaysInlineOnly && !F->getFnAttributes().hasAlwaysInlineAttr()) continue; // If the only remaining users of the function are dead constants, remove diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index c81b333813..9e328b9ac9 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -211,13 +211,12 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { // FIXME: We shouldn't bother with this anymore. MPM.add(createStripDeadPrototypesPass()); // Get rid of dead prototypes - // GlobalOpt already deletes dead functions and globals, at -O3 try a + // GlobalOpt already deletes dead functions and globals, at -O2 try a // late pass of GlobalDCE. It is capable of deleting dead cycles. - if (OptLevel > 2) + if (OptLevel > 1) { MPM.add(createGlobalDCEPass()); // Remove dead fns and globals. - - if (OptLevel > 1) MPM.add(createConstantMergePass()); // Merge dup global constants + } } addExtensionsToPM(EP_OptimizerLast, MPM); } diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 23c08699ff..ac30dcdcbf 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1037,7 +1037,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { if (!CallerPAL.isEmpty() && !Caller->use_empty()) { Attributes RAttrs = CallerPAL.getRetAttributes(); - if (RAttrs & Attribute::typeIncompatible(NewRetTy)) + if (RAttrs & Attributes::typeIncompatible(NewRetTy)) return false; // Attribute not compatible with transformed value. } @@ -1067,7 +1067,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { return false; // Cannot transform this parameter value. Attributes Attrs = CallerPAL.getParamAttributes(i + 1); - if (Attrs & Attribute::typeIncompatible(ParamTy)) + if (Attrs & Attributes::typeIncompatible(ParamTy)) return false; // Attribute not compatible with transformed value. // If the parameter is passed as a byval argument, then we have to have a @@ -1141,7 +1141,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { // If the return value is not being used, the type may not be compatible // with the existing attributes. Wipe out any problematic attributes. - RAttrs &= ~Attribute::typeIncompatible(NewRetTy); + RAttrs &= ~Attributes::typeIncompatible(NewRetTy); // Add the new return attributes. if (RAttrs) diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 5b6cf4a4a8..a446e427e5 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -264,26 +264,28 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { } } - // Check to see if this allocation is only modified by a memcpy/memmove from - // a constant global whose alignment is equal to or exceeds that of the - // allocation. If this is the case, we can change all users to use - // the constant global instead. This is commonly produced by the CFE by - // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' - // is only subsequently read. - SmallVector<Instruction *, 4> ToDelete; - if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) { - if (AI.getAlignment() <= getPointeeAlignment(Copy->getSource(), *TD)) { - DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n'); - DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); - for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) - EraseInstFromFunction(*ToDelete[i]); - Constant *TheSrc = cast<Constant>(Copy->getSource()); - Instruction *NewI - = ReplaceInstUsesWith(AI, ConstantExpr::getBitCast(TheSrc, - AI.getType())); - EraseInstFromFunction(*Copy); - ++NumGlobalCopies; - return NewI; + if (TD) { + // Check to see if this allocation is only modified by a memcpy/memmove from + // a constant global whose alignment is equal to or exceeds that of the + // allocation. If this is the case, we can change all users to use + // the constant global instead. This is commonly produced by the CFE by + // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' + // is only subsequently read. + SmallVector<Instruction *, 4> ToDelete; + if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) { + if (AI.getAlignment() <= getPointeeAlignment(Copy->getSource(), *TD)) { + DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n'); + DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); + for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) + EraseInstFromFunction(*ToDelete[i]); + Constant *TheSrc = cast<Constant>(Copy->getSource()); + Instruction *NewI + = ReplaceInstUsesWith(AI, ConstantExpr::getBitCast(TheSrc, + AI.getType())); + EraseInstFromFunction(*Copy); + ++NumGlobalCopies; + return NewI; + } } } diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index 291e80019e..0ba7340e64 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -903,7 +903,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { return &SI; } - if (VectorType* VecTy = dyn_cast<VectorType>(SI.getType())) { + if (VectorType *VecTy = dyn_cast<VectorType>(SI.getType())) { unsigned VWidth = VecTy->getNumElements(); APInt UndefElts(VWidth, 0); APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); @@ -912,6 +912,28 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { return ReplaceInstUsesWith(SI, V); return &SI; } + + if (ConstantVector *CV = dyn_cast<ConstantVector>(CondVal)) { + // Form a shufflevector instruction. + SmallVector<Constant *, 8> Mask(VWidth); + Type *Int32Ty = Type::getInt32Ty(CV->getContext()); + for (unsigned i = 0; i != VWidth; ++i) { + Constant *Elem = cast<Constant>(CV->getOperand(i)); + if (ConstantInt *E = dyn_cast<ConstantInt>(Elem)) + Mask[i] = ConstantInt::get(Int32Ty, i + (E->isZero() ? VWidth : 0)); + else if (isa<UndefValue>(Elem)) + Mask[i] = UndefValue::get(Int32Ty); + else + return 0; + } + Constant *MaskVal = ConstantVector::get(Mask); + Value *V = Builder->CreateShuffleVector(TrueVal, FalseVal, MaskVal); + return ReplaceInstUsesWith(SI, V); + } + + if (isa<ConstantAggregateZero>(CondVal)) { + return ReplaceInstUsesWith(SI, FalseVal); + } } return 0; diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index afa6a4b5e6..1b102bd243 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -854,7 +854,7 @@ bool AddressSanitizer::handleFunction(Module &M, Function &F) { // If needed, insert __asan_init before checking for AddressSafety attr. maybeInsertAsanInitAtFunctionEntry(F); - if (!F.hasFnAttr(Attribute::AddressSafety)) return false; + if (!F.getFnAttributes().hasAddressSafetyAttr()) return false; if (!ClDebugFunc.empty() && ClDebugFunc != F.getName()) return false; diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index 495cdc6321..305d70f27b 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -149,7 +149,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) { TLInfo = &getAnalysis<TargetLibraryInfo>(); DT = getAnalysisIfAvailable<DominatorTree>(); PFI = getAnalysisIfAvailable<ProfileInfo>(); - OptSize = F.hasFnAttr(Attribute::OptimizeForSize); + OptSize = F.getFnAttributes().hasOptimizeForSizeAttr(); /// This optimization identifies DIV instructions that can be /// profitably bypassed and carried out with a shorter, faster divide. @@ -226,7 +226,8 @@ bool CodeGenPrepare::EliminateFallThrough(Function &F) { // edge, just collapse it. BasicBlock *SinglePred = BB->getSinglePredecessor(); - if (!SinglePred || SinglePred == BB) continue; + // Don't merge if BB's address is taken. + if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue; BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator()); if (Term && !Term->isConditional()) { @@ -788,7 +789,7 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) { } // If we eliminated all predecessors of the block, delete the block now. - if (Changed && pred_begin(BB) == pred_end(BB)) + if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB)) BB->eraseFromParent(); return Changed; diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index 9b0aadb0b5..3ec6f3dcc3 100644 --- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -235,6 +235,11 @@ bool CorrelatedValuePropagation::processSwitch(SwitchInst *SI) { // This case never fires - remove it. CI.getCaseSuccessor()->removePredecessor(BB); SI->removeCase(CI); // Does not invalidate the iterator. + + // The condition can be modified by removePredecessor's PHI simplification + // logic. + Cond = SI->getCondition(); + ++NumDeadCases; Changed = true; } else if (State == LazyValueInfo::True) { diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index 1ff4329c84..301ee2f663 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -30,6 +30,7 @@ #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/Debug.h" #include "llvm/ADT/SetVector.h" @@ -45,6 +46,7 @@ namespace { AliasAnalysis *AA; MemoryDependenceAnalysis *MD; DominatorTree *DT; + const TargetLibraryInfo *TLI; static char ID; // Pass identification, replacement for typeid DSE() : FunctionPass(ID), AA(0), MD(0), DT(0) { @@ -55,6 +57,7 @@ namespace { AA = &getAnalysis<AliasAnalysis>(); MD = &getAnalysis<MemoryDependenceAnalysis>(); DT = &getAnalysis<DominatorTree>(); + TLI = AA->getTargetLibraryInfo(); bool Changed = false; for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) @@ -144,7 +147,7 @@ static void DeleteDeadInstruction(Instruction *I, /// hasMemoryWrite - Does this instruction write some memory? This only returns /// true for things that we can analyze with other helpers below. -static bool hasMemoryWrite(Instruction *I) { +static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo *TLI) { if (isa<StoreInst>(I)) return true; if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { @@ -159,6 +162,26 @@ static bool hasMemoryWrite(Instruction *I) { return true; } } + if (CallSite CS = I) { + if (Function *F = CS.getCalledFunction()) { + if (TLI && TLI->has(LibFunc::strcpy) && + F->getName() == TLI->getName(LibFunc::strcpy)) { + return true; + } + if (TLI && TLI->has(LibFunc::strncpy) && + F->getName() == TLI->getName(LibFunc::strncpy)) { + return true; + } + if (TLI && TLI->has(LibFunc::strcat) && + F->getName() == TLI->getName(LibFunc::strcat)) { + return true; + } + if (TLI && TLI->has(LibFunc::strncat) && + F->getName() == TLI->getName(LibFunc::strncat)) { + return true; + } + } + } return false; } @@ -206,7 +229,8 @@ getLocForWrite(Instruction *Inst, AliasAnalysis &AA) { /// instruction if any. static AliasAnalysis::Location getLocForRead(Instruction *Inst, AliasAnalysis &AA) { - assert(hasMemoryWrite(Inst) && "Unknown instruction case"); + assert(hasMemoryWrite(Inst, AA.getTargetLibraryInfo()) && + "Unknown instruction case"); // The only instructions that both read and write are the mem transfer // instructions (memcpy/memmove). @@ -223,23 +247,29 @@ static bool isRemovable(Instruction *I) { if (StoreInst *SI = dyn_cast<StoreInst>(I)) return SI->isUnordered(); - IntrinsicInst *II = cast<IntrinsicInst>(I); - switch (II->getIntrinsicID()) { - default: llvm_unreachable("doesn't pass 'hasMemoryWrite' predicate"); - case Intrinsic::lifetime_end: - // Never remove dead lifetime_end's, e.g. because it is followed by a - // free. - return false; - case Intrinsic::init_trampoline: - // Always safe to remove init_trampoline. - return true; + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + switch (II->getIntrinsicID()) { + default: llvm_unreachable("doesn't pass 'hasMemoryWrite' predicate"); + case Intrinsic::lifetime_end: + // Never remove dead lifetime_end's, e.g. because it is followed by a + // free. + return false; + case Intrinsic::init_trampoline: + // Always safe to remove init_trampoline. + return true; - case Intrinsic::memset: - case Intrinsic::memmove: - case Intrinsic::memcpy: - // Don't remove volatile memory intrinsics. - return !cast<MemIntrinsic>(II)->isVolatile(); + case Intrinsic::memset: + case Intrinsic::memmove: + case Intrinsic::memcpy: + // Don't remove volatile memory intrinsics. + return !cast<MemIntrinsic>(II)->isVolatile(); + } } + + if (CallSite CS = I) + return CS.getInstruction()->use_empty(); + + return false; } @@ -250,14 +280,19 @@ static bool isShortenable(Instruction *I) { if (isa<StoreInst>(I)) return false; - IntrinsicInst *II = cast<IntrinsicInst>(I); - switch (II->getIntrinsicID()) { - default: return false; - case Intrinsic::memset: - case Intrinsic::memcpy: - // Do shorten memory intrinsics. - return true; + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + switch (II->getIntrinsicID()) { + default: return false; + case Intrinsic::memset: + case Intrinsic::memcpy: + // Do shorten memory intrinsics. + return true; + } } + + // Don't shorten libcalls calls for now. + + return false; } /// getStoredPointerOperand - Return the pointer that is being written to. @@ -267,12 +302,18 @@ static Value *getStoredPointerOperand(Instruction *I) { if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) return MI->getDest(); - IntrinsicInst *II = cast<IntrinsicInst>(I); - switch (II->getIntrinsicID()) { - default: llvm_unreachable("Unexpected intrinsic!"); - case Intrinsic::init_trampoline: - return II->getArgOperand(0); + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + switch (II->getIntrinsicID()) { + default: llvm_unreachable("Unexpected intrinsic!"); + case Intrinsic::init_trampoline: + return II->getArgOperand(0); + } } + + CallSite CS = I; + // All the supported functions so far happen to have dest as their first + // argument. + return CS.getArgument(0); } static uint64_t getPointerSize(const Value *V, AliasAnalysis &AA) { @@ -455,13 +496,13 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { Instruction *Inst = BBI++; // Handle 'free' calls specially. - if (CallInst *F = isFreeCall(Inst, AA->getTargetLibraryInfo())) { + if (CallInst *F = isFreeCall(Inst, TLI)) { MadeChange |= HandleFree(F); continue; } // If we find something that writes memory, get its memory dependence. - if (!hasMemoryWrite(Inst)) + if (!hasMemoryWrite(Inst, TLI)) continue; MemDepResult InstDep = MD->getDependency(Inst); @@ -484,7 +525,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { // in case we need it. WeakVH NextInst(BBI); - DeleteDeadInstruction(SI, *MD, AA->getTargetLibraryInfo()); + DeleteDeadInstruction(SI, *MD, TLI); if (NextInst == 0) // Next instruction deleted. BBI = BB.begin(); @@ -531,7 +572,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { << *DepWrite << "\n KILLER: " << *Inst << '\n'); // Delete the store and now-dead instructions that feed it. - DeleteDeadInstruction(DepWrite, *MD, AA->getTargetLibraryInfo()); + DeleteDeadInstruction(DepWrite, *MD, TLI); ++NumFastStores; MadeChange = true; @@ -628,7 +669,7 @@ bool DSE::HandleFree(CallInst *F) { MemDepResult Dep = MD->getPointerDependencyFrom(Loc, false, InstPt, BB); while (Dep.isDef() || Dep.isClobber()) { Instruction *Dependency = Dep.getInst(); - if (!hasMemoryWrite(Dependency) || !isRemovable(Dependency)) + if (!hasMemoryWrite(Dependency, TLI) || !isRemovable(Dependency)) break; Value *DepPointer = @@ -641,7 +682,7 @@ bool DSE::HandleFree(CallInst *F) { Instruction *Next = llvm::next(BasicBlock::iterator(Dependency)); // DCE instructions only used to calculate that store - DeleteDeadInstruction(Dependency, *MD, AA->getTargetLibraryInfo()); + DeleteDeadInstruction(Dependency, *MD, TLI); ++NumFastStores; MadeChange = true; @@ -681,8 +722,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) { // Okay, so these are dead heap objects, but if the pointer never escapes // then it's leaked by this function anyways. - else if (isAllocLikeFn(I, AA->getTargetLibraryInfo()) && - !PointerMayBeCaptured(I, true, true)) + else if (isAllocLikeFn(I, TLI) && !PointerMayBeCaptured(I, true, true)) DeadStackObjects.insert(I); } @@ -698,7 +738,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) { --BBI; // If we find a store, check to see if it points into a dead stack value. - if (hasMemoryWrite(BBI) && isRemovable(BBI)) { + if (hasMemoryWrite(BBI, TLI) && isRemovable(BBI)) { // See through pointer-to-pointer bitcasts SmallVector<Value *, 4> Pointers; GetUnderlyingObjects(getStoredPointerOperand(BBI), Pointers); @@ -726,8 +766,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) { dbgs() << '\n'); // DCE instructions only used to calculate that store. - DeleteDeadInstruction(Dead, *MD, AA->getTargetLibraryInfo(), - &DeadStackObjects); + DeleteDeadInstruction(Dead, *MD, TLI, &DeadStackObjects); ++NumFastStores; MadeChange = true; continue; @@ -735,10 +774,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) { } // Remove any dead non-memory-mutating instructions. - if (isInstructionTriviallyDead(BBI, AA->getTargetLibraryInfo())) { + if (isInstructionTriviallyDead(BBI, TLI)) { Instruction *Inst = BBI++; - DeleteDeadInstruction(Inst, *MD, AA->getTargetLibraryInfo(), - &DeadStackObjects); + DeleteDeadInstruction(Inst, *MD, TLI, &DeadStackObjects); ++NumFastOther; MadeChange = true; continue; @@ -754,7 +792,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) { if (CallSite CS = cast<Value>(BBI)) { // Remove allocation function calls from the list of dead stack objects; // there can't be any references before the definition. - if (isAllocLikeFn(BBI, AA->getTargetLibraryInfo())) + if (isAllocLikeFn(BBI, TLI)) DeadStackObjects.remove(BBI); // If this call does not access memory, it can't be loading any of our diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index 09a186f7f9..f8709a537f 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -145,7 +145,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { // not user specified. unsigned Threshold = CurrentThreshold; if (!UserThreshold && - Header->getParent()->hasFnAttr(Attribute::OptimizeForSize)) + Header->getParent()->getFnAttributes().hasOptimizeForSizeAttr()) Threshold = OptSizeUnrollThreshold; // Find trip count and trip multiple if count is not available diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index 58f7739888..74c8f43ec2 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -638,7 +638,7 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) { // Check to see if it would be profitable to unswitch current loop. // Do not do non-trivial unswitch while optimizing for size. - if (OptimizeForSize || F->hasFnAttr(Attribute::OptimizeForSize)) + if (OptimizeForSize || F->getFnAttributes().hasOptimizeForSizeAttr()) return false; UnswitchNontrivialCondition(LoopCond, Val, currentLoop); diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index e3182d319c..a8dc0533bf 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -202,11 +202,11 @@ public: use_iterator use_begin(const_iterator I) { return Uses[I - begin()].begin(); } use_iterator use_end(unsigned Idx) { return Uses[Idx].end(); } use_iterator use_end(const_iterator I) { return Uses[I - begin()].end(); } - void use_insert(unsigned Idx, use_iterator UI, const PartitionUse &U) { - Uses[Idx].insert(UI, U); + void use_push_back(unsigned Idx, const PartitionUse &U) { + Uses[Idx].push_back(U); } - void use_insert(const_iterator I, use_iterator UI, const PartitionUse &U) { - Uses[I - begin()].insert(UI, U); + void use_push_back(const_iterator I, const PartitionUse &U) { + Uses[I - begin()].push_back(U); } void use_erase(unsigned Idx, use_iterator UI) { Uses[Idx].erase(UI); } void use_erase(const_iterator I, use_iterator UI) { @@ -522,8 +522,10 @@ private: void insertUse(Instruction &I, int64_t Offset, uint64_t Size, bool IsSplittable = false) { - // Completely skip uses which don't overlap the allocation. - if ((Offset >= 0 && (uint64_t)Offset >= AllocSize) || + // Completely skip uses which have a zero size or don't overlap the + // allocation. + if (Size == 0 || + (Offset >= 0 && (uint64_t)Offset >= AllocSize) || (Offset < 0 && (uint64_t)-Offset >= Size)) { DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte use @" << Offset << " which starts past the end of the " << AllocSize @@ -660,11 +662,14 @@ private: bool Inserted = false; llvm::tie(PMI, Inserted) = MemTransferPartitionMap.insert(std::make_pair(&II, NewIdx)); - if (!Inserted && Offsets.IsSplittable) { + if (Offsets.IsSplittable && + (!Inserted || II.getRawSource() == II.getRawDest())) { // We've found a memory transfer intrinsic which refers to the alloca as - // both a source and dest. We refuse to split these to simplify splitting - // logic. If possible, SROA will still split them into separate allocas - // and then re-analyze. + // both a source and dest. This is detected either by direct equality of + // the operand values, or when we visit the intrinsic twice due to two + // different chains of values leading to it. We refuse to split these to + // simplify splitting logic. If possible, SROA will still split them into + // separate allocas and then re-analyze. Offsets.IsSplittable = false; P.Partitions[PMI->second].IsSplittable = false; P.Partitions[NewIdx].IsSplittable = false; @@ -697,6 +702,9 @@ private: SmallVector<std::pair<Instruction *, Instruction *>, 4> Uses; Visited.insert(Root); Uses.push_back(std::make_pair(cast<Instruction>(*U), Root)); + // If there are no loads or stores, the access is dead. We mark that as + // a size zero access. + Size = 0; do { Instruction *I, *UsedI; llvm::tie(UsedI, I) = Uses.pop_back_val(); @@ -824,9 +832,9 @@ private: } void insertUse(Instruction &User, int64_t Offset, uint64_t Size) { - // If the use extends outside of the allocation, record it as a dead use - // for elimination later. - if ((uint64_t)Offset >= AllocSize || + // If the use has a zero size or extends outside of the allocation, record + // it as a dead use for elimination later. + if (Size == 0 || (uint64_t)Offset >= AllocSize || (Offset < 0 && (uint64_t)-Offset >= Size)) return markAsDead(User); @@ -853,7 +861,7 @@ private: PartitionUse NewUse(std::max(I->BeginOffset, BeginOffset), std::min(I->EndOffset, EndOffset), &User, cast<Instruction>(*U)); - P.Uses[I - P.begin()].push_back(NewUse); + P.use_push_back(I, NewUse); if (isa<PHINode>(U->getUser()) || isa<SelectInst>(U->getUser())) P.PHIOrSelectOpMap[std::make_pair(&User, U->get())] = std::make_pair(I - P.begin(), P.Uses[I - P.begin()].size() - 1); @@ -1102,8 +1110,6 @@ AllocaPartitioning::AllocaPartitioning(const TargetData &TD, AllocaInst &AI) Uses.resize(Partitions.size()); UseBuilder UB(TD, AI, *this); UB(); - for (iterator I = Partitions.begin(), E = Partitions.end(); I != E; ++I) - std::stable_sort(use_begin(I), use_end(I)); } Type *AllocaPartitioning::getCommonType(iterator I) const { @@ -1890,7 +1896,8 @@ private: Value *extractInteger(IRBuilder<> &IRB, IntegerType *TargetTy, uint64_t Offset) { assert(IntPromotionTy && "Alloca is not an integer we can extract from"); - Value *V = IRB.CreateLoad(&NewAI, getName(".load")); + Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), + getName(".load")); assert(Offset >= NewAllocaBeginOffset && "Out of bounds offset"); uint64_t RelOffset = Offset - NewAllocaBeginOffset; if (RelOffset) @@ -1906,7 +1913,7 @@ private: StoreInst *insertInteger(IRBuilder<> &IRB, Value *V, uint64_t Offset) { IntegerType *Ty = cast<IntegerType>(V->getType()); if (Ty == IntPromotionTy) - return IRB.CreateStore(V, &NewAI); + return IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); assert(Ty->getBitWidth() < IntPromotionTy->getBitWidth() && "Cannot insert a larger integer!"); @@ -1918,10 +1925,12 @@ private: APInt Mask = ~Ty->getMask().zext(IntPromotionTy->getBitWidth()) .shl(RelOffset*8); - Value *Old = IRB.CreateAnd(IRB.CreateLoad(&NewAI, getName(".oldload")), + Value *Old = IRB.CreateAnd(IRB.CreateAlignedLoad(&NewAI, + NewAI.getAlignment(), + getName(".oldload")), Mask, getName(".mask")); - return IRB.CreateStore(IRB.CreateOr(Old, V, getName(".insert")), - &NewAI); + return IRB.CreateAlignedStore(IRB.CreateOr(Old, V, getName(".insert")), + &NewAI, NewAI.getAlignment()); } void deleteIfTriviallyDead(Value *V) { @@ -1943,12 +1952,12 @@ private: Value *Result; if (LI.getType() == VecTy->getElementType() || BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) { - Result - = IRB.CreateExtractElement(IRB.CreateLoad(&NewAI, getName(".load")), - getIndex(IRB, BeginOffset), - getName(".extract")); + Result = IRB.CreateExtractElement( + IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")), + getIndex(IRB, BeginOffset), getName(".extract")); } else { - Result = IRB.CreateLoad(&NewAI, getName(".load")); + Result = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), + getName(".load")); } if (Result->getType() != LI.getType()) Result = getValueCast(IRB, Result, LI.getType()); @@ -1983,6 +1992,9 @@ private: Value *NewPtr = getAdjustedAllocaPtr(IRB, LI.getPointerOperand()->getType()); LI.setOperand(0, NewPtr); + if (LI.getAlignment()) + LI.setAlignment(MinAlign(NewAI.getAlignment(), + BeginOffset - NewAllocaBeginOffset)); DEBUG(dbgs() << " to: " << LI << "\n"); deleteIfTriviallyDead(OldOp); @@ -1996,13 +2008,14 @@ private: BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) { if (V->getType() != ElementTy) V = getValueCast(IRB, V, ElementTy); - V = IRB.CreateInsertElement(IRB.CreateLoad(&NewAI, getName(".load")), V, - getIndex(IRB, BeginOffset), + LoadInst *LI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), + getName(".load")); + V = IRB.CreateInsertElement(LI, V, getIndex(IRB, BeginOffset), getName(".insert")); } else if (V->getType() != VecTy) { V = getValueCast(IRB, V, VecTy); } - StoreInst *Store = IRB.CreateStore(V, &NewAI); + StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); Pass.DeadInsts.push_back(&SI); (void)Store; @@ -2033,6 +2046,9 @@ private: Value *NewPtr = getAdjustedAllocaPtr(IRB, SI.getPointerOperand()->getType()); SI.setOperand(1, NewPtr); + if (SI.getAlignment()) + SI.setAlignment(MinAlign(NewAI.getAlignment(), + BeginOffset - NewAllocaBeginOffset)); DEBUG(dbgs() << " to: " << SI << "\n"); deleteIfTriviallyDead(OldOp); @@ -2048,6 +2064,15 @@ private: // pointer to the new alloca. if (!isa<Constant>(II.getLength())) { II.setDest(getAdjustedAllocaPtr(IRB, II.getRawDest()->getType())); + + Type *CstTy = II.getAlignmentCst()->getType(); + if (!NewAI.getAlignment()) + II.setAlignment(ConstantInt::get(CstTy, 0)); + else + II.setAlignment( + ConstantInt::get(CstTy, MinAlign(NewAI.getAlignment(), + BeginOffset - NewAllocaBeginOffset))); + deleteIfTriviallyDead(OldPtr); return false; } @@ -2067,11 +2092,15 @@ private: !TD.isLegalInteger(TD.getTypeSizeInBits(ScalarTy)))) { Type *SizeTy = II.getLength()->getType(); Constant *Size = ConstantInt::get(SizeTy, EndOffset - BeginOffset); + unsigned Align = 1; + if (NewAI.getAlignment()) + Align = MinAlign(NewAI.getAlignment(), + BeginOffset - NewAllocaBeginOffset); CallInst *New = IRB.CreateMemSet(getAdjustedAllocaPtr(IRB, II.getRawDest()->getType()), - II.getValue(), Size, II.getAlignment(), + II.getValue(), Size, Align, II.isVolatile()); (void)New; DEBUG(dbgs() << " to: " << *New << "\n"); @@ -2109,11 +2138,13 @@ private: // If this is an element-wide memset of a vectorizable alloca, insert it. if (VecTy && (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset)) { - StoreInst *Store = IRB.CreateStore( - IRB.CreateInsertElement(IRB.CreateLoad(&NewAI, getName(".load")), V, - getIndex(IRB, BeginOffset), + StoreInst *Store = IRB.CreateAlignedStore( + IRB.CreateInsertElement(IRB.CreateAlignedLoad(&NewAI, + NewAI.getAlignment(), + getName(".load")), + V, getIndex(IRB, BeginOffset), getName(".insert")), - &NewAI); + &NewAI, NewAI.getAlignment()); (void)Store; DEBUG(dbgs() << " to: " << *Store << "\n"); return true; @@ -2131,7 +2162,8 @@ private: assert(V->getType() == VecTy); } - Value *New = IRB.CreateStore(V, &NewAI, II.isVolatile()); + Value *New = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(), + II.isVolatile()); (void)New; DEBUG(dbgs() << " to: " << *New << "\n"); return !II.isVolatile(); @@ -2164,6 +2196,13 @@ private: else II.setSource(getAdjustedAllocaPtr(IRB, II.getRawSource()->getType())); + Type *CstTy = II.getAlignmentCst()->getType(); + if (II.getAlignment() > 1) + II.setAlignment(ConstantInt::get( + CstTy, MinAlign(II.getAlignment(), + MinAlign(NewAI.getAlignment(), + BeginOffset - NewAllocaBeginOffset)))); + DEBUG(dbgs() << " to: " << II << "\n"); deleteIfTriviallyDead(OldOp); return false; @@ -2221,6 +2260,11 @@ private: OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy, getName("." + OtherPtr->getName())); + unsigned Align = II.getAlignment(); + if (Align > 1) + Align = MinAlign(RelOffset.zextOrTrunc(64).getZExtValue(), + MinAlign(II.getAlignment(), NewAI.getAlignment())); + // Strip all inbounds GEPs and pointer casts to try to dig out any root // alloca that should be re-examined after rewriting this instruction. if (AllocaInst *AI @@ -2236,8 +2280,7 @@ private: CallInst *New = IRB.CreateMemCpy(IsDest ? OurPtr : OtherPtr, IsDest ? OtherPtr : OurPtr, - Size, II.getAlignment(), - II.isVolatile()); + Size, Align, II.isVolatile()); (void)New; DEBUG(dbgs() << " to: " << *New << "\n"); return false; @@ -2251,22 +2294,25 @@ private: Value *Src; if (IsVectorElement && !IsDest) { // We have to extract rather than load. - Src = IRB.CreateExtractElement(IRB.CreateLoad(SrcPtr, - getName(".copyload")), - getIndex(IRB, BeginOffset), - getName(".copyextract")); + Src = IRB.CreateExtractElement( + IRB.CreateAlignedLoad(SrcPtr, Align, getName(".copyload")), + getIndex(IRB, BeginOffset), + getName(".copyextract")); } else { - Src = IRB.CreateLoad(SrcPtr, II.isVolatile(), getName(".copyload")); + Src = IRB.CreateAlignedLoad(SrcPtr, Align, II.isVolatile(), + getName(".copyload")); } if (IsVectorElement && IsDest) { // We have to insert into a loaded copy before storing. - Src = IRB.CreateInsertElement(IRB.CreateLoad(&NewAI, getName(".load")), - Src, getIndex(IRB, BeginOffset), - getName(".insert")); + Src = IRB.CreateInsertElement( + IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")), + Src, getIndex(IRB, BeginOffset), + getName(".insert")); } - Value *Store = IRB.CreateStore(Src, DstPtr, II.isVolatile()); + StoreInst *Store = cast<StoreInst>( + IRB.CreateAlignedStore(Src, DstPtr, Align, II.isVolatile())); (void)Store; DEBUG(dbgs() << " to: " << *Store << "\n"); return !II.isVolatile(); @@ -2460,8 +2506,7 @@ private: else { AllocaPartitioning::PartitionUse OtherUse = *UI; OtherUse.User = Load; - P.use_insert(PI, std::upper_bound(UI, P.use_end(PI), OtherUse), - OtherUse); + P.use_push_back(PI, OtherUse); } } } @@ -2559,7 +2604,7 @@ private: LoadInst *OtherLoad = IsTrueVal ? FL : TL; assert(OtherUse.Ptr == OtherLoad->getOperand(0)); OtherUse.User = OtherLoad; - P.use_insert(PI, P.use_end(PI), OtherUse); + P.use_push_back(PI, OtherUse); } // Transfer alignment and TBAA info if present. @@ -2576,8 +2621,6 @@ private: LI->replaceAllUsesWith(V); Pass.DeadInsts.push_back(LI); } - if (PI != P.end()) - std::stable_sort(P.use_begin(PI), P.use_end(PI)); deleteIfTriviallyDead(OldPtr); return NewPtr == &NewAI; @@ -2959,9 +3002,19 @@ bool SROA::rewriteAllocaPartition(AllocaInst &AI, assert(PI == P.begin() && "Begin offset is zero on later partition"); NewAI = &AI; } else { - // FIXME: The alignment here is overly conservative -- we could in many - // cases get away with much weaker alignment constraints. - NewAI = new AllocaInst(AllocaTy, 0, AI.getAlignment(), + unsigned Alignment = AI.getAlignment(); + if (!Alignment) { + // The minimum alignment which users can rely on when the explicit + // alignment is omitted or zero is that required by the ABI for this + // type. + Alignment = TD->getABITypeAlignment(AI.getAllocatedType()); + } + Alignment = MinAlign(Alignment, PI->BeginOffset); + // If we will get at least this much alignment from the type alone, leave + // the alloca's alignment unconstrained. + if (Alignment <= TD->getABITypeAlignment(AllocaTy)) + Alignment = 0; + NewAI = new AllocaInst(AllocaTy, 0, Alignment, AI.getName() + ".sroa." + Twine(PI - P.begin()), &AI); ++NumNewAllocas; diff --git a/lib/Transforms/Utils/IntegerDivision.cpp b/lib/Transforms/Utils/IntegerDivision.cpp index 9d630349ab..55227e2714 100644 --- a/lib/Transforms/Utils/IntegerDivision.cpp +++ b/lib/Transforms/Utils/IntegerDivision.cpp @@ -23,11 +23,69 @@ using namespace llvm; +/// Generate code to compute the remainder of two signed integers. Returns the +/// remainder, which will have the sign of the dividend. Builder's insert point +/// should be pointing where the caller wants code generated, e.g. at the srem +/// instruction. This will generate a urem in the process, and Builder's insert +/// point will be pointing at the uren (if present, i.e. not folded), ready to +/// be expanded if the user wishes +static Value *generateSignedRemainderCode(Value *Dividend, Value *Divisor, + IRBuilder<> &Builder) { + ConstantInt *ThirtyOne = Builder.getInt32(31); + + // ; %dividend_sgn = ashr i32 %dividend, 31 + // ; %divisor_sgn = ashr i32 %divisor, 31 + // ; %dvd_xor = xor i32 %dividend, %dividend_sgn + // ; %dvs_xor = xor i32 %divisor, %divisor_sgn + // ; %u_dividend = sub i32 %dvd_xor, %dividend_sgn + // ; %u_divisor = sub i32 %dvs_xor, %divisor_sgn + // ; %urem = urem i32 %dividend, %divisor + // ; %xored = xor i32 %urem, %dividend_sgn + // ; %srem = sub i32 %xored, %dividend_sgn + Value *DividendSign = Builder.CreateAShr(Dividend, ThirtyOne); + Value *DivisorSign = Builder.CreateAShr(Divisor, ThirtyOne); + Value *DvdXor = Builder.CreateXor(Dividend, DividendSign); + Value *DvsXor = Builder.CreateXor(Divisor, DivisorSign); + Value *UDividend = Builder.CreateSub(DvdXor, DividendSign); + Value *UDivisor = Builder.CreateSub(DvsXor, DivisorSign); + Value *URem = Builder.CreateURem(UDividend, UDivisor); + Value *Xored = Builder.CreateXor(URem, DividendSign); + Value *SRem = Builder.CreateSub(Xored, DividendSign); + + if (Instruction *URemInst = dyn_cast<Instruction>(URem)) + Builder.SetInsertPoint(URemInst); + + return SRem; +} + + +/// Generate code to compute the remainder of two unsigned integers. Returns the +/// remainder. Builder's insert point should be pointing where the caller wants +/// code generated, e.g. at the urem instruction. This will generate a udiv in +/// the process, and Builder's insert point will be pointing at the udiv (if +/// present, i.e. not folded), ready to be expanded if the user wishes +static Value *generatedUnsignedRemainderCode(Value *Dividend, Value *Divisor, + IRBuilder<> &Builder) { + // Remainder = Dividend - Quotient*Divisor + + // ; %quotient = udiv i32 %dividend, %divisor + // ; %product = mul i32 %divisor, %quotient + // ; %remainder = sub i32 %dividend, %product + Value *Quotient = Builder.CreateUDiv(Dividend, Divisor); + Value *Product = Builder.CreateMul(Divisor, Quotient); + Value *Remainder = Builder.CreateSub(Dividend, Product); + + if (Instruction *UDiv = dyn_cast<Instruction>(Quotient)) + Builder.SetInsertPoint(UDiv); + + return Remainder; +} + /// Generate code to divide two signed integers. Returns the quotient, rounded -/// towards 0. Builder's insert point should be pointing at the sdiv -/// instruction. This will generate a udiv in the process, and Builder's insert -/// point will be pointing at the udiv (if present, i.e. not folded), ready to -/// be expanded if the user wishes. +/// towards 0. Builder's insert point should be pointing where the caller wants +/// code generated, e.g. at the sdiv instruction. This will generate a udiv in +/// the process, and Builder's insert point will be pointing at the udiv (if +/// present, i.e. not folded), ready to be expanded if the user wishes. static Value *generateSignedDivisionCode(Value *Dividend, Value *Divisor, IRBuilder<> &Builder) { // Implementation taken from compiler-rt's __divsi3 @@ -62,8 +120,8 @@ static Value *generateSignedDivisionCode(Value *Dividend, Value *Divisor, } /// Generates code to divide two unsigned scalar 32-bit integers. Returns the -/// quotient, rounded towards 0. Builder's insert point should be pointing at -/// the udiv instruction. +/// quotient, rounded towards 0. Builder's insert point should be pointing where +/// the caller wants code generated, e.g. at the udiv instruction. static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor, IRBuilder<> &Builder) { // The basic algorithm can be found in the compiler-rt project's @@ -265,6 +323,56 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor, return Q_5; } +/// Generate code to calculate the remainder of two integers, replacing Rem with +/// the generated code. This currently generates code using the udiv expansion, +/// but future work includes generating more specialized code, e.g. when more +/// information about the operands are known. Currently only implements 32bit +/// scalar division (due to udiv's limitation), but future work is removing this +/// limitation. +/// +/// @brief Replace Rem with generated code. +bool llvm::expandRemainder(BinaryOperator *Rem) { + assert((Rem->getOpcode() == Instruction::SRem || + Rem->getOpcode() == Instruction::URem) && + "Trying to expand remainder from a non-remainder function"); + + IRBuilder<> Builder(Rem); + + // First prepare the sign if it's a signed remainder + if (Rem->getOpcode() == Instruction::SRem) { + Value *Remainder = generateSignedRemainderCode(Rem->getOperand(0), + Rem->getOperand(1), Builder); + + Rem->replaceAllUsesWith(Remainder); + Rem->dropAllReferences(); + Rem->eraseFromParent(); + + // If we didn't actually generate a udiv instruction, we're done + BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint()); + if (!BO || BO->getOpcode() != Instruction::URem) + return true; + + Rem = BO; + } + + Value *Remainder = generatedUnsignedRemainderCode(Rem->getOperand(0), + Rem->getOperand(1), + Builder); + + Rem->replaceAllUsesWith(Remainder); + Rem->dropAllReferences(); + Rem->eraseFromParent(); + + // Expand the udiv + if (BinaryOperator *UDiv = dyn_cast<BinaryOperator>(Builder.GetInsertPoint())) { + assert(UDiv->getOpcode() == Instruction::UDiv && "Non-udiv in expansion?"); + expandDivision(UDiv); + } + + return true; +} + + /// Generate code to divide two integers, replacing Div with the generated /// code. This currently generates code similarly to compiler-rt's /// implementations, but future work includes generating more specialized code @@ -287,7 +395,7 @@ bool llvm::expandDivision(BinaryOperator *Div) { if (Div->getOpcode() == Instruction::SDiv) { // Lower the code to unsigned division, and reset Div to point to the udiv. Value *Quotient = generateSignedDivisionCode(Div->getOperand(0), - Div->getOperand(1), Builder); + Div->getOperand(1), Builder); Div->replaceAllUsesWith(Quotient); Div->dropAllReferences(); Div->eraseFromParent(); diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 876ff2c337..065325b7c2 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -58,9 +58,10 @@ static cl::opt<bool> SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), cl::desc("Sink common instructions down to the end block")); -STATISTIC(NumSpeculations, "Number of speculative executed instructions"); +STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps"); STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables"); STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block"); +STATISTIC(NumSpeculations, "Number of speculative executed instructions"); namespace { /// ValueEqualityComparisonCase - Represents a case of a switch. @@ -3240,83 +3241,227 @@ static bool GetCaseResults(SwitchInst *SI, return true; } -/// BuildLookupTable - Build a lookup table with the contents of Results, using -/// DefaultResult to fill the holes in the table. If the table ends up -/// containing the same result in each element, set *SingleResult to that value -/// and return NULL. -static GlobalVariable *BuildLookupTable(Module &M, - uint64_t TableSize, - ConstantInt *Offset, - const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Results, - Constant *DefaultResult, - Constant **SingleResult) { - assert(Results.size() && "Need values to build lookup table"); - assert(TableSize >= Results.size() && "Table needs to hold all values"); +namespace { + /// SwitchLookupTable - This class represents a lookup table that can be used + /// to replace a switch. + class SwitchLookupTable { + public: + /// SwitchLookupTable - Create a lookup table to use as a switch replacement + /// with the contents of Values, using DefaultValue to fill any holes in the + /// table. + SwitchLookupTable(Module &M, + uint64_t TableSize, + ConstantInt *Offset, + const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Values, + Constant *DefaultValue, + const TargetData *TD); + + /// BuildLookup - Build instructions with Builder to retrieve the value at + /// the position given by Index in the lookup table. + Value *BuildLookup(Value *Index, IRBuilder<> &Builder); + + /// WouldFitInRegister - Return true if a table with TableSize elements of + /// type ElementType would fit in a target-legal register. + static bool WouldFitInRegister(const TargetData *TD, + uint64_t TableSize, + const Type *ElementType); + + private: + // Depending on the contents of the table, it can be represented in + // different ways. + enum { + // For tables where each element contains the same value, we just have to + // store that single value and return it for each lookup. + SingleValueKind, + + // For small tables with integer elements, we can pack them into a bitmap + // that fits into a target-legal register. Values are retrieved by + // shift and mask operations. + BitMapKind, + + // The table is stored as an array of values. Values are retrieved by load + // instructions from the table. + ArrayKind + } Kind; + + // For SingleValueKind, this is the single value. + Constant *SingleValue; + + // For BitMapKind, this is the bitmap. + ConstantInt *BitMap; + IntegerType *BitMapElementTy; + + // For ArrayKind, this is the array. + GlobalVariable *Array; + }; +} + +SwitchLookupTable::SwitchLookupTable(Module &M, + uint64_t TableSize, + ConstantInt *Offset, + const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Values, + Constant *DefaultValue, + const TargetData *TD) { + assert(Values.size() && "Can't build lookup table without values!"); + assert(TableSize >= Values.size() && "Can't fit values in table!"); // If all values in the table are equal, this is that value. - Constant *SameResult = Results.begin()->second; + SingleValue = Values.begin()->second; // Build up the table contents. - std::vector<Constant*> TableContents(TableSize); - for (size_t I = 0, E = Results.size(); I != E; ++I) { - ConstantInt *CaseVal = Results[I].first; - Constant *CaseRes = Results[I].second; - - uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue(); + SmallVector<Constant*, 64> TableContents(TableSize); + for (size_t I = 0, E = Values.size(); I != E; ++I) { + ConstantInt *CaseVal = Values[I].first; + Constant *CaseRes = Values[I].second; + assert(CaseRes->getType() == DefaultValue->getType()); + + uint64_t Idx = (CaseVal->getValue() - Offset->getValue()) + .getLimitedValue(); TableContents[Idx] = CaseRes; - if (CaseRes != SameResult) - SameResult = NULL; + if (CaseRes != SingleValue) + SingleValue = NULL; } // Fill in any holes in the table with the default result. - if (Results.size() < TableSize) { - for (unsigned i = 0; i < TableSize; ++i) { - if (!TableContents[i]) - TableContents[i] = DefaultResult; + if (Values.size() < TableSize) { + for (uint64_t I = 0; I < TableSize; ++I) { + if (!TableContents[I]) + TableContents[I] = DefaultValue; } - if (DefaultResult != SameResult) - SameResult = NULL; + if (DefaultValue != SingleValue) + SingleValue = NULL; + } + + // If each element in the table contains the same value, we only need to store + // that single value. + if (SingleValue) { + Kind = SingleValueKind; + return; } - // Same result was used in the entire table; just return that. - if (SameResult) { - *SingleResult = SameResult; - return NULL; + // If the type is integer and the table fits in a register, build a bitmap. + if (WouldFitInRegister(TD, TableSize, DefaultValue->getType())) { + IntegerType *IT = cast<IntegerType>(DefaultValue->getType()); + APInt TableInt(TableSize * IT->getBitWidth(), 0); + for (uint64_t I = TableSize; I > 0; --I) { + TableInt <<= IT->getBitWidth(); + ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]); + TableInt |= Val->getValue().zext(TableInt.getBitWidth()); + } + BitMap = ConstantInt::get(M.getContext(), TableInt); + BitMapElementTy = IT; + Kind = BitMapKind; + ++NumBitMaps; + return; } - ArrayType *ArrayTy = ArrayType::get(DefaultResult->getType(), TableSize); + // Store the table in an array. + ArrayType *ArrayTy = ArrayType::get(DefaultValue->getType(), TableSize); Constant *Initializer = ConstantArray::get(ArrayTy, TableContents); - GlobalVariable *GV = new GlobalVariable(M, ArrayTy, /*constant=*/ true, - GlobalVariable::PrivateLinkage, - Initializer, - "switch.table"); - GV->setUnnamedAddr(true); - return GV; + Array = new GlobalVariable(M, ArrayTy, /*constant=*/ true, + GlobalVariable::PrivateLinkage, + Initializer, + "switch.table"); + Array->setUnnamedAddr(true); + Kind = ArrayKind; +} + +Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) { + switch (Kind) { + case SingleValueKind: + return SingleValue; + case BitMapKind: { + // Type of the bitmap (e.g. i59). + IntegerType *MapTy = BitMap->getType(); + + // Cast Index to the same type as the bitmap. + // Note: The Index is <= the number of elements in the table, so + // truncating it to the width of the bitmask is safe. + Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast"); + + // Multiply the shift amount by the element width. + ShiftAmt = Builder.CreateMul(ShiftAmt, + ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()), + "switch.shiftamt"); + + // Shift down. + Value *DownShifted = Builder.CreateLShr(BitMap, ShiftAmt, + "switch.downshift"); + // Mask off. + return Builder.CreateTrunc(DownShifted, BitMapElementTy, + "switch.masked"); + } + case ArrayKind: { + Value *GEPIndices[] = { Builder.getInt32(0), Index }; + Value *GEP = Builder.CreateInBoundsGEP(Array, GEPIndices, + "switch.gep"); + return Builder.CreateLoad(GEP, "switch.load"); + } + } + llvm_unreachable("Unknown lookup table kind!"); +} + +bool SwitchLookupTable::WouldFitInRegister(const TargetData *TD, + uint64_t TableSize, + const Type *ElementType) { + if (!TD) + return false; + const IntegerType *IT = dyn_cast<IntegerType>(ElementType); + if (!IT) + return false; + // FIXME: If the type is wider than it needs to be, e.g. i8 but all values + // are <= 15, we could try to narrow the type. + + // Avoid overflow, fitsInLegalInteger uses unsigned int for the width. + if (TableSize >= UINT_MAX/IT->getBitWidth()) + return false; + return TD->fitsInLegalInteger(TableSize * IT->getBitWidth()); +} + +/// ShouldBuildLookupTable - Determine whether a lookup table should be built +/// for this switch, based on the number of caes, size of the table and the +/// types of the results. +static bool ShouldBuildLookupTable(SwitchInst *SI, + uint64_t TableSize, + const TargetData *TD, + const SmallDenseMap<PHINode*, Type*>& ResultTypes) { + // The table density should be at least 40%. This is the same criterion as for + // jump tables, see SelectionDAGBuilder::handleJTSwitchCase. + // FIXME: Find the best cut-off. + if (SI->getNumCases() > TableSize || TableSize >= UINT64_MAX / 10) + return false; // TableSize overflowed, or mul below might overflow. + if (SI->getNumCases() * 10 >= TableSize * 4) + return true; + + // If each table would fit in a register, we should build it anyway. + for (SmallDenseMap<PHINode*, Type*>::const_iterator I = ResultTypes.begin(), + E = ResultTypes.end(); I != E; ++I) { + if (!SwitchLookupTable::WouldFitInRegister(TD, TableSize, I->second)) + return false; + } + return true; } /// SwitchToLookupTable - If the switch is only used to initialize one or more /// phi nodes in a common successor block with different constant values, /// replace the switch with lookup tables. static bool SwitchToLookupTable(SwitchInst *SI, - IRBuilder<> &Builder) { + IRBuilder<> &Builder, + const TargetData* TD) { assert(SI->getNumCases() > 1 && "Degenerate switch?"); // FIXME: Handle unreachable cases. // FIXME: If the switch is too sparse for a lookup table, perhaps we could // split off a dense part and build a lookup table for that. - // FIXME: If the results are all integers and the lookup table would fit in a - // target-legal register, we should store them as a bitmap and use shift/mask - // to look up the result. - // FIXME: This creates arrays of GEPs to constant strings, which means each // GEP needs a runtime relocation in PIC code. We should just build one big // string and lookup indices into that. - // Ignore the switch if the number of cases are too small. + // Ignore the switch if the number of cases is too small. // This is similar to the check when building jump tables in // SelectionDAGBuilder::handleJTSwitchCase. // FIXME: Determine the best cut-off. @@ -3370,33 +3515,12 @@ static bool SwitchToLookupTable(SwitchInst *SI, } APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue(); - // The table density should be at lest 40%. This is the same criterion as for - // jump tables, see SelectionDAGBuilder::handleJTSwitchCase. - // FIXME: Find the best cut-off. - // Be careful to avoid overlow in the density computation. - if (RangeSpread.zextOrSelf(64).ugt(UINT64_MAX / 4 - 1)) - return false; uint64_t TableSize = RangeSpread.getLimitedValue() + 1; - if (SI->getNumCases() * 10 < TableSize * 4) + if (!ShouldBuildLookupTable(SI, TableSize, TD, ResultTypes)) return false; - // Build the lookup tables. - SmallDenseMap<PHINode*, GlobalVariable*> LookupTables; - SmallDenseMap<PHINode*, Constant*> SingleResults; - - Module &Mod = *CommonDest->getParent()->getParent(); - for (SmallVector<PHINode*, 4>::iterator I = PHIs.begin(), E = PHIs.end(); - I != E; ++I) { - PHINode *PHI = *I; - - Constant *SingleResult = NULL; - LookupTables[PHI] = BuildLookupTable(Mod, TableSize, MinCaseVal, - ResultLists[PHI], DefaultResults[PHI], - &SingleResult); - SingleResults[PHI] = SingleResult; - } - // Create the BB that does the lookups. + Module &Mod = *CommonDest->getParent()->getParent(); BasicBlock *LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup", CommonDest->getParent(), @@ -3414,19 +3538,13 @@ static bool SwitchToLookupTable(SwitchInst *SI, // Populate the BB that does the lookups. Builder.SetInsertPoint(LookupBB); bool ReturnedEarly = false; - for (SmallVector<PHINode*, 4>::iterator I = PHIs.begin(), E = PHIs.end(); - I != E; ++I) { - PHINode *PHI = *I; - // There was a single result for this phi; just use that. - if (Constant *SingleResult = SingleResults[PHI]) { - PHI->addIncoming(SingleResult, LookupBB); - continue; - } + for (size_t I = 0, E = PHIs.size(); I != E; ++I) { + PHINode *PHI = PHIs[I]; + + SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultLists[PHI], + DefaultResults[PHI], TD); - Value *GEPIndices[] = { Builder.getInt32(0), TableIndex }; - Value *GEP = Builder.CreateInBoundsGEP(LookupTables[PHI], GEPIndices, - "switch.gep"); - Value *Result = Builder.CreateLoad(GEP, "switch.load"); + Value *Result = Table.BuildLookup(TableIndex, Builder); // If the result is used to return immediately from the function, we want to // do that right here. @@ -3494,7 +3612,7 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { if (ForwardSwitchConditionToPHI(SI)) return SimplifyCFG(BB) | true; - if (SwitchToLookupTable(SI, Builder)) + if (SwitchToLookupTable(SI, Builder, TD)) return SimplifyCFG(BB) | true; return false; diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp index fc2538db64..a30b09321b 100644 --- a/lib/Transforms/Utils/ValueMapper.cpp +++ b/lib/Transforms/Utils/ValueMapper.cpp @@ -21,7 +21,7 @@ using namespace llvm; // Out of line method to get vtable etc for class. -void ValueMapTypeRemapper::Anchor() {} +void ValueMapTypeRemapper::anchor() {} Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, ValueMapTypeRemapper *TypeMapper) { diff --git a/lib/VMCore/Attributes.cpp b/lib/VMCore/Attributes.cpp index af8163fd40..7d3197cb0d 100644 --- a/lib/VMCore/Attributes.cpp +++ b/lib/VMCore/Attributes.cpp @@ -12,6 +12,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Attributes.h" +#include "AttributesImpl.h" +#include "LLVMContextImpl.h" #include "llvm/Type.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/FoldingSet.h" @@ -94,21 +96,52 @@ std::string Attributes::getAsString() const { return Result; } -Attributes Attribute::typeIncompatible(Type *Ty) { - Attributes Incompatible = None; +Attributes Attributes::typeIncompatible(Type *Ty) { + Attributes Incompatible = Attribute::None; if (!Ty->isIntegerTy()) // Attributes that only apply to integers. - Incompatible |= SExt | ZExt; + Incompatible |= Attribute::SExt | Attribute::ZExt; if (!Ty->isPointerTy()) // Attributes that only apply to pointers. - Incompatible |= ByVal | Nest | NoAlias | StructRet | NoCapture; + Incompatible |= Attribute::ByVal | Attribute::Nest | Attribute::NoAlias | + Attribute::StructRet | Attribute::NoCapture; return Incompatible; } //===----------------------------------------------------------------------===// +// AttributeImpl Definition +//===----------------------------------------------------------------------===// + +Attributes::Attributes(AttributesImpl *A) : Bits(0) {} + +Attributes Attributes::get(LLVMContext &Context, Attributes::Builder &B) { + // If there are no attributes, return an empty Attributes class. + if (B.Bits == 0) + return Attributes(); + + // Otherwise, build a key to look up the existing attributes. + LLVMContextImpl *pImpl = Context.pImpl; + FoldingSetNodeID ID; + ID.AddInteger(B.Bits); + + void *InsertPoint; + AttributesImpl *PA = pImpl->AttrsSet.FindNodeOrInsertPos(ID, InsertPoint); + + if (!PA) { + // If we didn't find any existing attributes of the same shape then create a + // new one and insert it. + PA = new AttributesImpl(B.Bits); + pImpl->AttrsSet.InsertNode(PA, InsertPoint); + } + + // Return the AttributesList that we found or created. + return Attributes(PA); +} + +//===----------------------------------------------------------------------===// // AttributeListImpl Definition //===----------------------------------------------------------------------===// diff --git a/lib/VMCore/AttributesImpl.h b/lib/VMCore/AttributesImpl.h new file mode 100644 index 0000000000..90890a14c3 --- /dev/null +++ b/lib/VMCore/AttributesImpl.h @@ -0,0 +1,40 @@ +//===-- AttributesImpl.h - Attributes Internals -----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines various helper methods and classes used by LLVMContextImpl +// for creating and managing attributes. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ATTRIBUTESIMPL_H +#define LLVM_ATTRIBUTESIMPL_H + +#include "llvm/ADT/FoldingSet.h" + +namespace llvm { + +class AttributesImpl : public FoldingSetNode { + uint64_t Bits; // FIXME: We will be expanding this. + + void operator=(const AttributesImpl &) LLVM_DELETED_FUNCTION; + AttributesImpl(const AttributesImpl &) LLVM_DELETED_FUNCTION; +public: + AttributesImpl(uint64_t bits) : Bits(bits) {} + + void Profile(FoldingSetNodeID &ID) const { + Profile(ID, Bits); + } + static void Profile(FoldingSetNodeID &ID, uint64_t Bits) { + ID.AddInteger(Bits); + } +}; + +} // end llvm namespace + +#endif diff --git a/lib/VMCore/Function.cpp b/lib/VMCore/Function.cpp index 2e0b3168c9..012d27603a 100644 --- a/lib/VMCore/Function.cpp +++ b/lib/VMCore/Function.cpp @@ -78,7 +78,7 @@ unsigned Argument::getArgNo() const { /// in its containing function. bool Argument::hasByValAttr() const { if (!getType()->isPointerTy()) return false; - return getParent()->paramHasAttr(getArgNo()+1, Attribute::ByVal); + return getParent()->getParamAttributes(getArgNo()+1).hasByValAttr(); } unsigned Argument::getParamAlignment() const { @@ -91,21 +91,21 @@ unsigned Argument::getParamAlignment() const { /// it in its containing function. bool Argument::hasNestAttr() const { if (!getType()->isPointerTy()) return false; - return getParent()->paramHasAttr(getArgNo()+1, Attribute::Nest); + return getParent()->getParamAttributes(getArgNo()+1).hasNestAttr(); } /// hasNoAliasAttr - Return true if this argument has the noalias attribute on /// it in its containing function. bool Argument::hasNoAliasAttr() const { if (!getType()->isPointerTy()) return false; - return getParent()->paramHasAttr(getArgNo()+1, Attribute::NoAlias); + return getParent()->getParamAttributes(getArgNo()+1).hasNoAliasAttr(); } /// hasNoCaptureAttr - Return true if this argument has the nocapture attribute /// on it in its containing function. bool Argument::hasNoCaptureAttr() const { if (!getType()->isPointerTy()) return false; - return getParent()->paramHasAttr(getArgNo()+1, Attribute::NoCapture); + return getParent()->getParamAttributes(getArgNo()+1).hasNoCaptureAttr(); } /// hasSRetAttr - Return true if this argument has the sret attribute on @@ -114,7 +114,7 @@ bool Argument::hasStructRetAttr() const { if (!getType()->isPointerTy()) return false; if (this != getParent()->arg_begin()) return false; // StructRet param must be first param - return getParent()->paramHasAttr(1, Attribute::StructRet); + return getParent()->getParamAttributes(1).hasStructRetAttr(); } /// addAttr - Add a Attribute to an argument diff --git a/lib/VMCore/IRBuilder.cpp b/lib/VMCore/IRBuilder.cpp index 5c4e6d9642..04f08fe28e 100644 --- a/lib/VMCore/IRBuilder.cpp +++ b/lib/VMCore/IRBuilder.cpp @@ -80,7 +80,7 @@ CreateMemSet(Value *Ptr, Value *Val, Value *Size, unsigned Align, CallInst *IRBuilderBase:: CreateMemCpy(Value *Dst, Value *Src, Value *Size, unsigned Align, - bool isVolatile, MDNode *TBAATag) { + bool isVolatile, MDNode *TBAATag, MDNode *TBAAStructTag) { Dst = getCastedInt8PtrValue(Dst); Src = getCastedInt8PtrValue(Src); @@ -94,6 +94,10 @@ CreateMemCpy(Value *Dst, Value *Src, Value *Size, unsigned Align, // Set the TBAA info if present. if (TBAATag) CI->setMetadata(LLVMContext::MD_tbaa, TBAATag); + + // Set the TBAA Struct info if present. + if (TBAAStructTag) + CI->setMetadata(LLVMContext::MD_tbaa_struct, TBAAStructTag); return CI; } diff --git a/lib/VMCore/LLVMContextImpl.cpp b/lib/VMCore/LLVMContextImpl.cpp index 6279bb823d..a86363b632 100644 --- a/lib/VMCore/LLVMContextImpl.cpp +++ b/lib/VMCore/LLVMContextImpl.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "LLVMContextImpl.h" +#include "llvm/Attributes.h" #include "llvm/Module.h" #include "llvm/ADT/STLExtras.h" #include <algorithm> @@ -93,6 +94,11 @@ LLVMContextImpl::~LLVMContextImpl() { E = CDSConstants.end(); I != E; ++I) delete I->second; CDSConstants.clear(); + + // Destroy attributes. + for (FoldingSetIterator<AttributesImpl> I = AttrsSet.begin(), + E = AttrsSet.end(); I != E; ++I) + delete &*I; // Destroy MDNodes. ~MDNode can move and remove nodes between the MDNodeSet // and the NonUniquedMDNodes sets, so copy the values out first. @@ -107,6 +113,7 @@ LLVMContextImpl::~LLVMContextImpl() { (*I)->destroy(); assert(MDNodeSet.empty() && NonUniquedMDNodes.empty() && "Destroying all MDNodes didn't empty the Context's sets."); + // Destroy MDStrings. DeleteContainerSeconds(MDStringCache); } diff --git a/lib/VMCore/LLVMContextImpl.h b/lib/VMCore/LLVMContextImpl.h index 2252028b15..ee31814c05 100644 --- a/lib/VMCore/LLVMContextImpl.h +++ b/lib/VMCore/LLVMContextImpl.h @@ -16,6 +16,7 @@ #define LLVM_LLVMCONTEXT_IMPL_H #include "llvm/LLVMContext.h" +#include "AttributesImpl.h" #include "ConstantsContext.h" #include "LeaksContext.h" #include "llvm/Constants.h" @@ -253,10 +254,13 @@ public: typedef DenseMap<DenseMapAPFloatKeyInfo::KeyTy, ConstantFP*, DenseMapAPFloatKeyInfo> FPMapTy; FPMapTy FPConstants; + + FoldingSet<AttributesImpl> AttrsSet; StringMap<Value*> MDStringCache; - + FoldingSet<MDNode> MDNodeSet; + // MDNodes may be uniqued or not uniqued. When they're not uniqued, they // aren't in the MDNodeSet, but they're still shared between objects, so no // one object can destroy them. This set allows us to at least destroy them diff --git a/lib/VMCore/ValueTypes.cpp b/lib/VMCore/ValueTypes.cpp index e9370f62e6..2ee9f0f4c9 100644 --- a/lib/VMCore/ValueTypes.cpp +++ b/lib/VMCore/ValueTypes.cpp @@ -56,31 +56,31 @@ bool EVT::isExtendedVector() const { } bool EVT::isExtended16BitVector() const { - return isExtendedVector() && getSizeInBits() == 16; + return isExtendedVector() && getExtendedSizeInBits() == 16; } bool EVT::isExtended32BitVector() const { - return isExtendedVector() && getSizeInBits() == 32; + return isExtendedVector() && getExtendedSizeInBits() == 32; } bool EVT::isExtended64BitVector() const { - return isExtendedVector() && getSizeInBits() == 64; + return isExtendedVector() && getExtendedSizeInBits() == 64; } bool EVT::isExtended128BitVector() const { - return isExtendedVector() && getSizeInBits() == 128; + return isExtendedVector() && getExtendedSizeInBits() == 128; } bool EVT::isExtended256BitVector() const { - return isExtendedVector() && getSizeInBits() == 256; + return isExtendedVector() && getExtendedSizeInBits() == 256; } bool EVT::isExtended512BitVector() const { - return isExtendedVector() && getSizeInBits() == 512; + return isExtendedVector() && getExtendedSizeInBits() == 512; } bool EVT::isExtended1024BitVector() const { - return isExtendedVector() && getSizeInBits() == 1024; + return isExtendedVector() && getExtendedSizeInBits() == 1024; } EVT EVT::getExtendedVectorElementType() const { diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp index 647a52fbdd..292456ab63 100644 --- a/lib/VMCore/Verifier.cpp +++ b/lib/VMCore/Verifier.cpp @@ -546,7 +546,7 @@ void Verifier::VerifyParameterAttrs(Attributes Attrs, Type *Ty, MutI.getAsString() + " are incompatible!", V); } - Attributes TypeI = Attrs & Attribute::typeIncompatible(Ty); + Attributes TypeI = Attrs & Attributes::typeIncompatible(Ty); Assert1(!TypeI, "Wrong type for attribute " + TypeI.getAsString(), V); diff --git a/test/Analysis/CallGraph/do-nothing-intrinsic.ll b/test/Analysis/CallGraph/do-nothing-intrinsic.ll new file mode 100644 index 0000000000..f28ad10f57 --- /dev/null +++ b/test/Analysis/CallGraph/do-nothing-intrinsic.ll @@ -0,0 +1,13 @@ +; RUN: opt < %s -basiccg +; PR13903 + +define void @main() { + invoke void @llvm.donothing() + to label %ret unwind label %unw +unw: + %tmp = landingpad i8 personality i8 0 cleanup + br label %ret +ret: + ret void +} +declare void @llvm.donothing() nounwind readnone diff --git a/test/CodeGen/ARM/2010-12-07-PEIBug.ll b/test/CodeGen/ARM/2010-12-07-PEIBug.ll index 770ad4466a..4879f4e10b 100644 --- a/test/CodeGen/ARM/2010-12-07-PEIBug.ll +++ b/test/CodeGen/ARM/2010-12-07-PEIBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 | FileCheck %s +; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a9 | FileCheck %s ; rdar://8728956 define hidden void @foo() nounwind ssp { diff --git a/test/CodeGen/ARM/2012-05-04-vmov.ll b/test/CodeGen/ARM/2012-05-04-vmov.ll new file mode 100644 index 0000000000..d52ef2cc5a --- /dev/null +++ b/test/CodeGen/ARM/2012-05-04-vmov.ll @@ -0,0 +1,11 @@ +; RUN: llc -O1 -march=arm -mcpu=cortex-a9 < %s | FileCheck -check-prefix=A9-CHECK %s +; RUN: llc -O1 -march=arm -mcpu=swift < %s | FileCheck -check-prefix=SWIFT-CHECK %s +; Check that swift doesn't use vmov.32. <rdar://problem/10453003>. + +define <2 x i32> @testuvec(<2 x i32> %A, <2 x i32> %B) nounwind { +entry: + %div = udiv <2 x i32> %A, %B + ret <2 x i32> %div +; A9-CHECK: vmov.32 +; SWIFT-CHECK-NOT: vmov.32 +} diff --git a/test/CodeGen/ARM/2012-05-10-PreferVMOVtoVDUP32.ll b/test/CodeGen/ARM/2012-05-10-PreferVMOVtoVDUP32.ll new file mode 100644 index 0000000000..dd678436c0 --- /dev/null +++ b/test/CodeGen/ARM/2012-05-10-PreferVMOVtoVDUP32.ll @@ -0,0 +1,14 @@ +; RUN: llc -march=arm -mcpu=swift < %s | FileCheck %s +; <rdar://problem/10451892> + +define void @f(i32 %x, i32* %p) nounwind ssp { +entry: +; CHECK-NOT: vdup.32 + %vecinit.i = insertelement <2 x i32> undef, i32 %x, i32 0 + %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %x, i32 1 + %0 = bitcast i32* %p to i8* + tail call void @llvm.arm.neon.vst1.v2i32(i8* %0, <2 x i32> %vecinit1.i, i32 4) + ret void +} + +declare void @llvm.arm.neon.vst1.v2i32(i8*, <2 x i32>, i32) nounwind diff --git a/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv.ll b/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv.ll new file mode 100644 index 0000000000..75766099a2 --- /dev/null +++ b/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv.ll @@ -0,0 +1,11 @@ +; RUN: llc < %s -march=arm -mcpu=cortex-a8 2>&1 | FileCheck %s + +; Check for error message: +; CHECK: non-trivial scalar-to-vector conversion, possible invalid constraint for vector type + +define void @f() nounwind ssp { + %1 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } asm "vldm $4, { ${0:q}, ${1:q}, ${2:q}, ${3:q} }", "=r,=r,=r,=r,r"(i64* undef) nounwind, !srcloc !0 + ret void +} + +!0 = metadata !{i32 318437} diff --git a/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv2.ll b/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv2.ll new file mode 100644 index 0000000000..6fa1391474 --- /dev/null +++ b/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv2.ll @@ -0,0 +1,11 @@ +; RUN: llc < %s -march=arm -mcpu=cortex-a8 2>&1 | FileCheck %s + +; Check for error message: +; CHECK: scalar-to-vector conversion failed, possible invalid constraint for vector type + +define hidden void @f(i32* %corr, i32 %order) nounwind ssp { + tail call void asm sideeffect "vst1.s32 { ${1:q}, ${2:q} }, [$0]", "r,{q0},{q1}"(i32* %corr, <2 x i64>* undef, <2 x i64>* undef) nounwind, !srcloc !0 + ret void +} + +!0 = metadata !{i32 257} diff --git a/test/CodeGen/ARM/atomicrmw_minmax.ll b/test/CodeGen/ARM/atomicrmw_minmax.ll new file mode 100644 index 0000000000..69f1384e12 --- /dev/null +++ b/test/CodeGen/ARM/atomicrmw_minmax.ll @@ -0,0 +1,21 @@ +; RUN: llc -march=arm -mcpu=cortex-a9 < %s | FileCheck %s + +; CHECK: max: +define i32 @max(i8 %ctx, i32* %ptr, i32 %val) +{ +; CHECK: ldrex +; CHECK: cmp [[old:r[0-9]*]], [[val:r[0-9]*]] +; CHECK: movhi {{r[0-9]*}}, [[old]] + %old = atomicrmw umax i32* %ptr, i32 %val monotonic + ret i32 %old +} + +; CHECK: min: +define i32 @min(i8 %ctx, i32* %ptr, i32 %val) +{ +; CHECK: ldrex +; CHECK: cmp [[old:r[0-9]*]], [[val:r[0-9]*]] +; CHECK: movlo {{r[0-9]*}}, [[old]] + %old = atomicrmw umin i32* %ptr, i32 %val monotonic + ret i32 %old +} diff --git a/test/CodeGen/ARM/avoid-cpsr-rmw.ll b/test/CodeGen/ARM/avoid-cpsr-rmw.ll index 1b385ab79c..96e83dd88e 100644 --- a/test/CodeGen/ARM/avoid-cpsr-rmw.ll +++ b/test/CodeGen/ARM/avoid-cpsr-rmw.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a9 | FileCheck %s +; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=swift | FileCheck %s ; Avoid some 's' 16-bit instruction which partially update CPSR (and add false ; dependency) when it isn't dependent on last CPSR defining instruction. ; rdar://8928208 diff --git a/test/CodeGen/ARM/call-noret.ll b/test/CodeGen/ARM/call-noret.ll new file mode 100644 index 0000000000..d294f2cf1a --- /dev/null +++ b/test/CodeGen/ARM/call-noret.ll @@ -0,0 +1,39 @@ +; RUN: llc < %s -mtriple=armv7-apple-ios -mcpu=cortex-a8 | FileCheck %s -check-prefix=ARM +; RUN: llc < %s -mtriple=armv7-apple-ios -mcpu=swift | FileCheck %s -check-prefix=SWIFT +; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 | FileCheck %s -check-prefix=T2 +; rdar://8979299 + +define void @t1() noreturn nounwind ssp { +entry: +; ARM: t1: +; ARM: mov lr, pc +; ARM: b _bar + +; SWIFT: t1: +; SWIFT: mov lr, pc +; SWIFT: b _bar + +; T2: t1: +; T2: blx _bar + tail call void @bar() noreturn nounwind + unreachable +} + +define void @t2() noreturn nounwind ssp { +entry: +; ARM: t2: +; ARM: mov lr, pc +; ARM: b _t1 + +; SWIFT: t2: +; SWIFT: mov lr, pc +; SWIFT: b _t1 + +; T2: t2: +; T2: mov lr, pc +; T2: b.w _t1 + tail call void @t1() noreturn nounwind + unreachable +} + +declare void @bar() noreturn diff --git a/test/CodeGen/ARM/div.ll b/test/CodeGen/ARM/div.ll index 3d29e05a0c..82cfca182b 100644 --- a/test/CodeGen/ARM/div.ll +++ b/test/CodeGen/ARM/div.ll @@ -1,9 +1,13 @@ -; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s -check-prefix=CHECK-ARM +; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-a8 | FileCheck %s -check-prefix=CHECK-ARM +; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=swift | FileCheck %s -check-prefix=CHECK-SWIFT define i32 @f1(i32 %a, i32 %b) { entry: ; CHECK-ARM: f1 ; CHECK-ARM: __divsi3 + +; CHECK-SWIFT: f1 +; CHECK-SWIFT: sdiv %tmp1 = sdiv i32 %a, %b ; <i32> [#uses=1] ret i32 %tmp1 } @@ -12,6 +16,9 @@ define i32 @f2(i32 %a, i32 %b) { entry: ; CHECK-ARM: f2 ; CHECK-ARM: __udivsi3 + +; CHECK-SWIFT: f2 +; CHECK-SWIFT: udiv %tmp1 = udiv i32 %a, %b ; <i32> [#uses=1] ret i32 %tmp1 } @@ -20,6 +27,10 @@ define i32 @f3(i32 %a, i32 %b) { entry: ; CHECK-ARM: f3 ; CHECK-ARM: __modsi3 + +; CHECK-SWIFT: f3 +; CHECK-SWIFT: sdiv +; CHECK-SWIFT: mls %tmp1 = srem i32 %a, %b ; <i32> [#uses=1] ret i32 %tmp1 } @@ -28,6 +39,10 @@ define i32 @f4(i32 %a, i32 %b) { entry: ; CHECK-ARM: f4 ; CHECK-ARM: __umodsi3 + +; CHECK-SWIFT: f4 +; CHECK-SWIFT: udiv +; CHECK-SWIFT: mls %tmp1 = urem i32 %a, %b ; <i32> [#uses=1] ret i32 %tmp1 } diff --git a/test/CodeGen/ARM/domain-conv-vmovs.ll b/test/CodeGen/ARM/domain-conv-vmovs.ll index 18e169357b..a5c4114458 100644 --- a/test/CodeGen/ARM/domain-conv-vmovs.ll +++ b/test/CodeGen/ARM/domain-conv-vmovs.ll @@ -79,8 +79,8 @@ define float @test_ineligible(float, float %in) { ; internal fault). call void @bar() ; CHECL: bl bar -; CHECK: vext.32 -; CHECK: vext.32 +; CHECK: vext.32 +; CHECK: vext.32 ret float %val } diff --git a/test/CodeGen/ARM/fabss.ll b/test/CodeGen/ARM/fabss.ll index bcb4ee7452..46c2f1c65f 100644 --- a/test/CodeGen/ARM/fabss.ll +++ b/test/CodeGen/ARM/fabss.ll @@ -14,12 +14,12 @@ entry: declare float @fabsf(float) ; VFP2: test: -; VFP2: vabs.f32 s1, s1 +; VFP2: vabs.f32 s2, s2 ; NFP1: test: ; NFP1: vabs.f32 d1, d1 ; NFP0: test: -; NFP0: vabs.f32 s1, s1 +; NFP0: vabs.f32 s2, s2 ; CORTEXA8: test: ; CORTEXA8: vadd.f32 [[D1:d[0-9]+]] diff --git a/test/CodeGen/ARM/fadds.ll b/test/CodeGen/ARM/fadds.ll index e35103c045..48ef5ed88f 100644 --- a/test/CodeGen/ARM/fadds.ll +++ b/test/CodeGen/ARM/fadds.ll @@ -10,14 +10,14 @@ entry: } ; VFP2: test: -; VFP2: vadd.f32 s0, s1, s0 +; VFP2: vadd.f32 s ; NFP1: test: -; NFP1: vadd.f32 d0, d1, d0 +; NFP1: vadd.f32 d ; NFP0: test: -; NFP0: vadd.f32 s0, s1, s0 +; NFP0: vadd.f32 s ; CORTEXA8: test: -; CORTEXA8: vadd.f32 d0, d1, d0 +; CORTEXA8: vadd.f32 d ; CORTEXA9: test: ; CORTEXA9: vadd.f32 s{{.}}, s{{.}}, s{{.}} diff --git a/test/CodeGen/ARM/fast-isel-pic.ll b/test/CodeGen/ARM/fast-isel-pic.ll index 392a845d2c..867d53f973 100644 --- a/test/CodeGen/ARM/fast-isel-pic.ll +++ b/test/CodeGen/ARM/fast-isel-pic.ll @@ -1,6 +1,8 @@ ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=pic -mtriple=arm-apple-ios | FileCheck %s --check-prefix=ARM ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARMv7 +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=pic -mtriple=thumbv7-none-linux-gnueabi | FileCheck %s --check-prefix=THUMB-ELF +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=pic -mtriple=armv7-none-linux-gnueabi | FileCheck %s --check-prefix=ARMv7-ELF @g = global i32 0, align 4 @@ -10,6 +12,10 @@ entry: ; THUMB: movw [[reg0:r[0-9]+]], ; THUMB: movt [[reg0]], ; THUMB: add [[reg0]], pc +; THUMB-ELF: LoadGV +; THUMB-ELF: ldr.n r[[reg0:[0-9]+]], +; THUMB-ELF: ldr.n r[[reg1:[0-9]+]], +; THUMB-ELF: ldr r[[reg0]], [r[[reg1]], r[[reg0]]] ; ARM: LoadGV ; ARM: ldr [[reg1:r[0-9]+]], ; ARM: add [[reg1]], pc, [[reg1]] @@ -17,6 +23,10 @@ entry: ; ARMv7: movw [[reg2:r[0-9]+]], ; ARMv7: movt [[reg2]], ; ARMv7: add [[reg2]], pc, [[reg2]] +; ARMv7-ELF: LoadGV +; ARMv7-ELF: ldr r[[reg2:[0-9]+]], +; ARMv7-ELF: ldr r[[reg3:[0-9]+]], +; ARMv7-ELF: ldr r[[reg2]], [r[[reg3]], r[[reg2]]] %tmp = load i32* @g ret i32 %tmp } @@ -30,6 +40,10 @@ entry: ; THUMB: movt r[[reg3]], ; THUMB: add r[[reg3]], pc ; THUMB: ldr r[[reg3]], [r[[reg3]]] +; THUMB-ELF: LoadIndirectSymbol +; THUMB-ELF: ldr.n r[[reg3:[0-9]+]], +; THUMB-ELF: ldr.n r[[reg4:[0-9]+]], +; THUMB-ELF: ldr r[[reg3]], [r[[reg4]], r[[reg3]]] ; ARM: LoadIndirectSymbol ; ARM: ldr [[reg4:r[0-9]+]], ; ARM: ldr [[reg4]], [pc, [[reg4]]] @@ -38,6 +52,10 @@ entry: ; ARMv7: movt r[[reg5]], ; ARMv7: add r[[reg5]], pc, r[[reg5]] ; ARMv7: ldr r[[reg5]], [r[[reg5]]] +; ARMv7-ELF: LoadIndirectSymbol +; ARMv7-ELF: ldr r[[reg5:[0-9]+]], +; ARMv7-ELF: ldr r[[reg6:[0-9]+]], +; ARMv7-ELF: ldr r[[reg5]], [r[[reg6]], r[[reg5]]] %tmp = load i32* @i ret i32 %tmp } diff --git a/test/CodeGen/ARM/fdivs.ll b/test/CodeGen/ARM/fdivs.ll index 31c1ca9405..8fab002135 100644 --- a/test/CodeGen/ARM/fdivs.ll +++ b/test/CodeGen/ARM/fdivs.ll @@ -10,14 +10,14 @@ entry: } ; VFP2: test: -; VFP2: vdiv.f32 s0, s1, s0 +; VFP2: vdiv.f32 s0, s2, s0 ; NFP1: test: -; NFP1: vdiv.f32 s0, s1, s0 +; NFP1: vdiv.f32 s0, s2, s0 ; NFP0: test: -; NFP0: vdiv.f32 s0, s1, s0 +; NFP0: vdiv.f32 s0, s2, s0 ; CORTEXA8: test: -; CORTEXA8: vdiv.f32 s0, s1, s0 +; CORTEXA8: vdiv.f32 s0, s2, s0 ; CORTEXA9: test: ; CORTEXA9: vdiv.f32 s{{.}}, s{{.}}, s{{.}} diff --git a/test/CodeGen/ARM/fmuls.ll b/test/CodeGen/ARM/fmuls.ll index 3c3182bc63..1566a9272d 100644 --- a/test/CodeGen/ARM/fmuls.ll +++ b/test/CodeGen/ARM/fmuls.ll @@ -10,15 +10,15 @@ entry: } ; VFP2: test: -; VFP2: vmul.f32 s0, s1, s0 +; VFP2: vmul.f32 s ; NFP1: test: -; NFP1: vmul.f32 d0, d1, d0 +; NFP1: vmul.f32 d ; NFP0: test: -; NFP0: vmul.f32 s0, s1, s0 +; NFP0: vmul.f32 s ; CORTEXA8: test: -; CORTEXA8: vmul.f32 d0, d1, d0 +; CORTEXA8: vmul.f32 d ; CORTEXA9: test: ; CORTEXA9: vmul.f32 s{{.}}, s{{.}}, s{{.}} diff --git a/test/CodeGen/ARM/fp_convert.ll b/test/CodeGen/ARM/fp_convert.ll index 7002cecf36..44298b9c5d 100644 --- a/test/CodeGen/ARM/fp_convert.ll +++ b/test/CodeGen/ARM/fp_convert.ll @@ -31,7 +31,7 @@ define float @test3(i32 %a, i32 %b) { ; VFP2: test3: ; VFP2: vcvt.f32.u32 s{{.}}, s{{.}} ; NEON: test3: -; NEON: vcvt.f32.u32 d0, d0 +; NEON: vcvt.f32.u32 d entry: %0 = add i32 %a, %b %1 = uitofp i32 %0 to float @@ -42,7 +42,7 @@ define float @test4(i32 %a, i32 %b) { ; VFP2: test4: ; VFP2: vcvt.f32.s32 s{{.}}, s{{.}} ; NEON: test4: -; NEON: vcvt.f32.s32 d0, d0 +; NEON: vcvt.f32.s32 d entry: %0 = add i32 %a, %b %1 = sitofp i32 %0 to float diff --git a/test/CodeGen/ARM/fsubs.ll b/test/CodeGen/ARM/fsubs.ll index bea8d5f4f3..f039e74c8e 100644 --- a/test/CodeGen/ARM/fsubs.ll +++ b/test/CodeGen/ARM/fsubs.ll @@ -8,6 +8,6 @@ entry: ret float %0 } -; VFP2: vsub.f32 s0, s1, s0 -; NFP1: vsub.f32 d0, d1, d0 -; NFP0: vsub.f32 s0, s1, s0 +; VFP2: vsub.f32 s +; NFP1: vsub.f32 d +; NFP0: vsub.f32 s diff --git a/test/CodeGen/ARM/ifcvt1.ll b/test/CodeGen/ARM/ifcvt1.ll index cd870bb5d4..fd831442c1 100644 --- a/test/CodeGen/ARM/ifcvt1.ll +++ b/test/CodeGen/ARM/ifcvt1.ll @@ -1,17 +1,21 @@ -; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s +; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8 +; RUN: llc < %s -march=arm -mcpu=swift | FileCheck %s -check-prefix=SWIFT define i32 @t1(i32 %a, i32 %b) { -; CHECK: t1: +; A8: t1: +; SWIFT: t1: %tmp2 = icmp eq i32 %a, 0 br i1 %tmp2, label %cond_false, label %cond_true cond_true: -; CHECK: subeq r0, r1, #1 +; A8: subeq r0, r1, #1 +; SWIFT: sub r0, r1, #1 %tmp5 = add i32 %b, 1 ret i32 %tmp5 cond_false: -; CHECK: addne r0, r1, #1 +; A8: addne r0, r1, #1 +; SWIFT: addne r0, r1, #1 %tmp7 = add i32 %b, -1 ret i32 %tmp7 } diff --git a/test/CodeGen/ARM/ifcvt12.ll b/test/CodeGen/ARM/ifcvt12.ll new file mode 100644 index 0000000000..77bdca57e5 --- /dev/null +++ b/test/CodeGen/ARM/ifcvt12.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -mtriple=arm-apple-darwin -mcpu=cortex-a8 | FileCheck %s +define i32 @f1(i32 %a, i32 %b, i32 %c) { +; CHECK: f1: +; CHECK: mlsne r0, r0, r1, r2 + %tmp1 = icmp eq i32 %a, 0 + br i1 %tmp1, label %cond_false, label %cond_true + +cond_true: + %tmp2 = mul i32 %a, %b + %tmp3 = sub i32 %c, %tmp2 + ret i32 %tmp3 + +cond_false: + ret i32 %a +} diff --git a/test/CodeGen/ARM/ifcvt5.ll b/test/CodeGen/ARM/ifcvt5.ll index 95f5c97f2a..5081791bc2 100644 --- a/test/CodeGen/ARM/ifcvt5.ll +++ b/test/CodeGen/ARM/ifcvt5.ll @@ -1,4 +1,6 @@ -; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s +; RUN: llc < %s -mtriple=armv7-apple-ios -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8 +; RUN: llc < %s -mtriple=armv7-apple-ios -mcpu=swift | FileCheck %s -check-prefix=SWIFT +; rdar://8402126 @x = external global i32* ; <i32**> [#uses=1] @@ -10,8 +12,12 @@ entry: } define i32 @t1(i32 %a, i32 %b) { -; CHECK: t1: -; CHECK: poplt {r7, pc} +; A8: t1: +; A8: poplt {r7, pc} + +; SWIFT: t1: +; SWIFT: pop {r7, pc} +; SWIFT: pop {r7, pc} entry: %tmp1 = icmp sgt i32 %a, 10 ; <i1> [#uses=1] br i1 %tmp1, label %cond_true, label %UnifiedReturnBlock diff --git a/test/CodeGen/ARM/ldr_post.ll b/test/CodeGen/ARM/ldr_post.ll index 8ddf025dbf..a6ca434483 100644 --- a/test/CodeGen/ARM/ldr_post.ll +++ b/test/CodeGen/ARM/ldr_post.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -march=arm | FileCheck %s +; RUN: llc < %s -march=arm -mcpu=swift | FileCheck %s ; CHECK: test1: ; CHECK: ldr {{.*, \[.*]}}, -r2 diff --git a/test/CodeGen/ARM/ldr_pre.ll b/test/CodeGen/ARM/ldr_pre.ll index e904e5fd2c..6c40ad7326 100644 --- a/test/CodeGen/ARM/ldr_pre.ll +++ b/test/CodeGen/ARM/ldr_pre.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -march=arm | FileCheck %s +; RUN: llc < %s -march=arm -mcpu=swift | FileCheck %s ; CHECK: test1: ; CHECK: ldr {{.*!}} diff --git a/test/CodeGen/ARM/mls.ll b/test/CodeGen/ARM/mls.ll index a6cdba4454..066bf98de6 100644 --- a/test/CodeGen/ARM/mls.ll +++ b/test/CodeGen/ARM/mls.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -march=arm -mattr=+v6t2 | FileCheck %s +; RUN: llc < %s -march=arm -mattr=+v6t2 -arm-use-mulops=false | FileCheck %s -check-prefix=NO_MULOPS define i32 @f1(i32 %a, i32 %b, i32 %c) { %tmp1 = mul i32 %a, %b @@ -13,4 +14,15 @@ define i32 @f2(i32 %a, i32 %b, i32 %c) { ret i32 %tmp2 } +; CHECK: f1: ; CHECK: mls r0, r0, r1, r2 +; NO_MULOPS: f1: +; NO_MULOPS: mul r0, r0, r1 +; NO_MULOPS-NEXT: sub r0, r2, r0 + +; CHECK: f2: +; CHECK: mul r0, r0, r1 +; CHECK-NEXT: sub r0, r0, r2 +; NO_MULOPS: f2: +; NO_MULOPS: mul r0, r0, r1 +; NO_MULOPS-NEXT: sub r0, r0, r2 diff --git a/test/CodeGen/ARM/neon-fma.ll b/test/CodeGen/ARM/neon-fma.ll new file mode 100644 index 0000000000..d2cca5009d --- /dev/null +++ b/test/CodeGen/ARM/neon-fma.ll @@ -0,0 +1,22 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -mcpu=swift | FileCheck %s + +; CHECK: test_v2f32 +; CHECK: vfma.f32 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + +define <2 x float> @test_v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone ssp { +entry: + %call = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone + ret <2 x float> %call +} + +; CHECK: test_v4f32 +; CHECK: vfma.f32 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}} + +define <4 x float> @test_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) nounwind readnone ssp { +entry: + %call = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) nounwind readnone + ret <4 x float> %call +} + +declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone +declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) nounwind readnone diff --git a/test/CodeGen/ARM/neon_ld2.ll b/test/CodeGen/ARM/neon_ld2.ll index 630db93035..497619ed74 100644 --- a/test/CodeGen/ARM/neon_ld2.ll +++ b/test/CodeGen/ARM/neon_ld2.ll @@ -1,10 +1,16 @@ ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc < %s -march=arm -mcpu=swift | FileCheck %s --check-prefix=SWIFT ; CHECK: t1 ; CHECK: vld1.64 ; CHECK: vld1.64 ; CHECK: vadd.i64 q ; CHECK: vst1.64 +; SWIFT: t1 +; SWIFT: vld1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+, :128\]}} +; SWIFT: vld1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+, :128\]}} +; SWIFT: vadd.i64 q +; SWIFT: vst1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+, :128\]}} define void @t1(<4 x i32>* %r, <2 x i64>* %a, <2 x i64>* %b) nounwind { entry: %0 = load <2 x i64>* %a, align 16 ; <<2 x i64>> [#uses=1] @@ -21,6 +27,12 @@ entry: ; CHECK: vsub.i64 q ; CHECK: vmov r0, r1, d ; CHECK: vmov r2, r3, d +; SWIFT: t2 +; SWIFT: vld1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+, :128\]}} +; SWIFT: vld1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+, :128\]}} +; SWIFT: vsub.i64 q +; SWIFT: vmov r0, r1, d +; SWIFT: vmov r2, r3, d define <4 x i32> @t2(<2 x i64>* %a, <2 x i64>* %b) nounwind readonly { entry: %0 = load <2 x i64>* %a, align 16 ; <<2 x i64>> [#uses=1] @@ -30,3 +42,18 @@ entry: ret <4 x i32> %3 } +; Limited alignment. +; SWIFT: t3 +; SWIFT: vld1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+}} +; SWIFT: vld1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+}} +; SWIFT: vadd.i64 q +; SWIFT: vst1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+}} +define void @t3(<4 x i32>* %r, <2 x i64>* %a, <2 x i64>* %b) nounwind { +entry: + %0 = load <2 x i64>* %a, align 8 + %1 = load <2 x i64>* %b, align 8 + %2 = add <2 x i64> %0, %1 + %3 = bitcast <2 x i64> %2 to <4 x i32> + store <4 x i32> %3, <4 x i32>* %r, align 8 + ret void +} diff --git a/test/CodeGen/ARM/opt-shuff-tstore.ll b/test/CodeGen/ARM/opt-shuff-tstore.ll index df98e231cc..74c9a21355 100644 --- a/test/CodeGen/ARM/opt-shuff-tstore.ll +++ b/test/CodeGen/ARM/opt-shuff-tstore.ll @@ -2,7 +2,7 @@ ; CHECK: func_4_8 ; CHECK: vst1.32 -; CHECK-NEXT: bx lr +; CHECK: bx lr define void @func_4_8(<4 x i8> %param, <4 x i8>* %p) { %r = add <4 x i8> %param, <i8 1, i8 2, i8 3, i8 4> store <4 x i8> %r, <4 x i8>* %p @@ -11,7 +11,7 @@ define void @func_4_8(<4 x i8> %param, <4 x i8>* %p) { ; CHECK: func_2_16 ; CHECK: vst1.32 -; CHECK-NEXT: bx lr +; CHECK: bx lr define void @func_2_16(<2 x i16> %param, <2 x i16>* %p) { %r = add <2 x i16> %param, <i16 1, i16 2> store <2 x i16> %r, <2 x i16>* %p diff --git a/test/CodeGen/ARM/reg_sequence.ll b/test/CodeGen/ARM/reg_sequence.ll index 206b96cd07..6d6586e4f2 100644 --- a/test/CodeGen/ARM/reg_sequence.ll +++ b/test/CodeGen/ARM/reg_sequence.ll @@ -124,7 +124,6 @@ return1: return2: ; CHECK: %return2 ; CHECK: vadd.i32 -; CHECK: vorr {{q[0-9]+}}, {{q[0-9]+}} ; CHECK-NOT: vmov ; CHECK: vst2.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}} %tmp100 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0 ; <<4 x i32>> [#uses=1] diff --git a/test/CodeGen/ARM/subreg-remat.ll b/test/CodeGen/ARM/subreg-remat.ll index 03ae12c6de..455bfce0f2 100644 --- a/test/CodeGen/ARM/subreg-remat.ll +++ b/test/CodeGen/ARM/subreg-remat.ll @@ -4,14 +4,14 @@ target triple = "thumbv7-apple-ios" ; ; The vector %v2 is built like this: ; -; %vreg6:ssub_1<def> = VMOVSR %vreg0<kill>, pred:14, pred:%noreg, %vreg6<imp-def>; DPR_VFP2:%vreg6 GPR:%vreg0 +; %vreg6:ssub_1<def> = ... ; %vreg6:ssub_0<def> = VLDRS <cp#0>, 0, pred:14, pred:%noreg; mem:LD4[ConstantPool] DPR_VFP2:%vreg6 ; ; When %vreg6 spills, the VLDRS constant pool load cannot be rematerialized ; since it implicitly reads the ssub_1 sub-register. ; ; CHECK: f1 -; CHECK: vmov s1, r0 +; CHECK: vmov d0, r0, r0 ; CHECK: vldr s0, LCPI ; The vector must be spilled: ; CHECK: vstr d0, diff --git a/test/CodeGen/Mips/dsp-r1.ll b/test/CodeGen/Mips/dsp-r1.ll new file mode 100644 index 0000000000..c9dc8cfd0b --- /dev/null +++ b/test/CodeGen/Mips/dsp-r1.ll @@ -0,0 +1,1241 @@ +; RUN: llc -march=mipsel -mattr=+dsp < %s | FileCheck %s + +define i32 @test__builtin_mips_extr_w1(i32 %i0, i32, i64 %a0) nounwind { +entry: +; CHECK: extr.w + + %1 = tail call i32 @llvm.mips.extr.w(i64 %a0, i32 15) + ret i32 %1 +} + +declare i32 @llvm.mips.extr.w(i64, i32) nounwind + +define i32 @test__builtin_mips_extr_w2(i32 %i0, i32, i64 %a0, i32 %a1) nounwind { +entry: +; CHECK: extrv.w + + %1 = tail call i32 @llvm.mips.extr.w(i64 %a0, i32 %a1) + ret i32 %1 +} + +define i32 @test__builtin_mips_extr_r_w1(i32 %i0, i32, i64 %a0) nounwind { +entry: +; CHECK: extr_r.w + + %1 = tail call i32 @llvm.mips.extr.r.w(i64 %a0, i32 15) + ret i32 %1 +} + +declare i32 @llvm.mips.extr.r.w(i64, i32) nounwind + +define i32 @test__builtin_mips_extr_s_h1(i32 %i0, i32, i64 %a0, i32 %a1) nounwind { +entry: +; CHECK: extrv_s.h + + %1 = tail call i32 @llvm.mips.extr.s.h(i64 %a0, i32 %a1) + ret i32 %1 +} + +declare i32 @llvm.mips.extr.s.h(i64, i32) nounwind + +define i32 @test__builtin_mips_extr_rs_w1(i32 %i0, i32, i64 %a0) nounwind { +entry: +; CHECK: extr_rs.w + + %1 = tail call i32 @llvm.mips.extr.rs.w(i64 %a0, i32 15) + ret i32 %1 +} + +declare i32 @llvm.mips.extr.rs.w(i64, i32) nounwind + +define i32 @test__builtin_mips_extr_rs_w2(i32 %i0, i32, i64 %a0, i32 %a1) nounwind { +entry: +; CHECK: extrv_rs.w + + %1 = tail call i32 @llvm.mips.extr.rs.w(i64 %a0, i32 %a1) + ret i32 %1 +} + +define i32 @test__builtin_mips_extr_s_h2(i32 %i0, i32, i64 %a0) nounwind { +entry: +; CHECK: extr_s.h + + %1 = tail call i32 @llvm.mips.extr.s.h(i64 %a0, i32 15) + ret i32 %1 +} + +define i32 @test__builtin_mips_extr_r_w2(i32 %i0, i32, i64 %a0, i32 %a1) nounwind { +entry: +; CHECK: extrv_r.w + + %1 = tail call i32 @llvm.mips.extr.r.w(i64 %a0, i32 %a1) + ret i32 %1 +} + +define i32 @test__builtin_mips_extp1(i32 %i0, i32, i64 %a0) nounwind { +entry: +; CHECK: extp ${{[0-9]+}} + + %1 = tail call i32 @llvm.mips.extp(i64 %a0, i32 15) + ret i32 %1 +} + +declare i32 @llvm.mips.extp(i64, i32) nounwind + +define i32 @test__builtin_mips_extp2(i32 %i0, i32, i64 %a0, i32 %a1) nounwind { +entry: +; CHECK: extpv + + %1 = tail call i32 @llvm.mips.extp(i64 %a0, i32 %a1) + ret i32 %1 +} + +define i32 @test__builtin_mips_extpdp1(i32 %i0, i32, i64 %a0) nounwind { +entry: +; CHECK: extpdp ${{[0-9]+}} + + %1 = tail call i32 @llvm.mips.extpdp(i64 %a0, i32 15) + ret i32 %1 +} + +declare i32 @llvm.mips.extpdp(i64, i32) nounwind + +define i32 @test__builtin_mips_extpdp2(i32 %i0, i32, i64 %a0, i32 %a1) nounwind { +entry: +; CHECK: extpdpv + + %1 = tail call i32 @llvm.mips.extpdp(i64 %a0, i32 %a1) + ret i32 %1 +} + +define i64 @test__builtin_mips_dpau_h_qbl1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind readnone { +entry: +; CHECK: dpau.h.qbl + + %1 = bitcast i32 %a1.coerce to <4 x i8> + %2 = bitcast i32 %a2.coerce to <4 x i8> + %3 = tail call i64 @llvm.mips.dpau.h.qbl(i64 %a0, <4 x i8> %1, <4 x i8> %2) + ret i64 %3 +} + +declare i64 @llvm.mips.dpau.h.qbl(i64, <4 x i8>, <4 x i8>) nounwind readnone + +define i64 @test__builtin_mips_dpau_h_qbr1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind readnone { +entry: +; CHECK: dpau.h.qbr + + %1 = bitcast i32 %a1.coerce to <4 x i8> + %2 = bitcast i32 %a2.coerce to <4 x i8> + %3 = tail call i64 @llvm.mips.dpau.h.qbr(i64 %a0, <4 x i8> %1, <4 x i8> %2) + ret i64 %3 +} + +declare i64 @llvm.mips.dpau.h.qbr(i64, <4 x i8>, <4 x i8>) nounwind readnone + +define i64 @test__builtin_mips_dpsu_h_qbl1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind readnone { +entry: +; CHECK: dpsu.h.qbl + + %1 = bitcast i32 %a1.coerce to <4 x i8> + %2 = bitcast i32 %a2.coerce to <4 x i8> + %3 = tail call i64 @llvm.mips.dpsu.h.qbl(i64 %a0, <4 x i8> %1, <4 x i8> %2) + ret i64 %3 +} + +declare i64 @llvm.mips.dpsu.h.qbl(i64, <4 x i8>, <4 x i8>) nounwind readnone + +define i64 @test__builtin_mips_dpsu_h_qbr1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind readnone { +entry: +; CHECK: dpsu.h.qbr + + %1 = bitcast i32 %a1.coerce to <4 x i8> + %2 = bitcast i32 %a2.coerce to <4 x i8> + %3 = tail call i64 @llvm.mips.dpsu.h.qbr(i64 %a0, <4 x i8> %1, <4 x i8> %2) + ret i64 %3 +} + +declare i64 @llvm.mips.dpsu.h.qbr(i64, <4 x i8>, <4 x i8>) nounwind readnone + +define i64 @test__builtin_mips_dpaq_s_w_ph1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind { +entry: +; CHECK: dpaq_s.w.ph + + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = bitcast i32 %a2.coerce to <2 x i16> + %3 = tail call i64 @llvm.mips.dpaq.s.w.ph(i64 %a0, <2 x i16> %1, <2 x i16> %2) + ret i64 %3 +} + +declare i64 @llvm.mips.dpaq.s.w.ph(i64, <2 x i16>, <2 x i16>) nounwind + +define i64 @test__builtin_mips_dpaq_sa_l_w1(i32 %i0, i32, i64 %a0, i32 %a1, i32 %a2) nounwind { +entry: +; CHECK: dpaq_sa.l.w + + %1 = tail call i64 @llvm.mips.dpaq.sa.l.w(i64 %a0, i32 %a1, i32 %a2) + ret i64 %1 +} + +declare i64 @llvm.mips.dpaq.sa.l.w(i64, i32, i32) nounwind + +define i64 @test__builtin_mips_dpsq_s_w_ph1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind { +entry: +; CHECK: dpsq_s.w.ph + + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = bitcast i32 %a2.coerce to <2 x i16> + %3 = tail call i64 @llvm.mips.dpsq.s.w.ph(i64 %a0, <2 x i16> %1, <2 x i16> %2) + ret i64 %3 +} + +declare i64 @llvm.mips.dpsq.s.w.ph(i64, <2 x i16>, <2 x i16>) nounwind + +define i64 @test__builtin_mips_dpsq_sa_l_w1(i32 %i0, i32, i64 %a0, i32 %a1, i32 %a2) nounwind { +entry: +; CHECK: dpsq_sa.l.w + + %1 = tail call i64 @llvm.mips.dpsq.sa.l.w(i64 %a0, i32 %a1, i32 %a2) + ret i64 %1 +} + +declare i64 @llvm.mips.dpsq.sa.l.w(i64, i32, i32) nounwind + +define i64 @test__builtin_mips_mulsaq_s_w_ph1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind { +entry: +; CHECK: mulsaq_s.w.ph + + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = bitcast i32 %a2.coerce to <2 x i16> + %3 = tail call i64 @llvm.mips.mulsaq.s.w.ph(i64 %a0, <2 x i16> %1, <2 x i16> %2) + ret i64 %3 +} + +declare i64 @llvm.mips.mulsaq.s.w.ph(i64, <2 x i16>, <2 x i16>) nounwind + +define i64 @test__builtin_mips_maq_s_w_phl1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind { +entry: +; CHECK: maq_s.w.phl + + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = bitcast i32 %a2.coerce to <2 x i16> + %3 = tail call i64 @llvm.mips.maq.s.w.phl(i64 %a0, <2 x i16> %1, <2 x i16> %2) + ret i64 %3 +} + +declare i64 @llvm.mips.maq.s.w.phl(i64, <2 x i16>, <2 x i16>) nounwind + +define i64 @test__builtin_mips_maq_s_w_phr1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind { +entry: +; CHECK: maq_s.w.phr + + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = bitcast i32 %a2.coerce to <2 x i16> + %3 = tail call i64 @llvm.mips.maq.s.w.phr(i64 %a0, <2 x i16> %1, <2 x i16> %2) + ret i64 %3 +} + +declare i64 @llvm.mips.maq.s.w.phr(i64, <2 x i16>, <2 x i16>) nounwind + +define i64 @test__builtin_mips_maq_sa_w_phl1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind { +entry: +; CHECK: maq_sa.w.phl + + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = bitcast i32 %a2.coerce to <2 x i16> + %3 = tail call i64 @llvm.mips.maq.sa.w.phl(i64 %a0, <2 x i16> %1, <2 x i16> %2) + ret i64 %3 +} + +declare i64 @llvm.mips.maq.sa.w.phl(i64, <2 x i16>, <2 x i16>) nounwind + +define i64 @test__builtin_mips_maq_sa_w_phr1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind { +entry: +; CHECK: maq_sa.w.phr + + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = bitcast i32 %a2.coerce to <2 x i16> + %3 = tail call i64 @llvm.mips.maq.sa.w.phr(i64 %a0, <2 x i16> %1, <2 x i16> %2) + ret i64 %3 +} + +declare i64 @llvm.mips.maq.sa.w.phr(i64, <2 x i16>, <2 x i16>) nounwind + +define i64 @test__builtin_mips_shilo1(i32 %i0, i32, i64 %a0) nounwind readnone { +entry: +; CHECK: shilo $ac{{[0-9]}} + + %1 = tail call i64 @llvm.mips.shilo(i64 %a0, i32 0) + ret i64 %1 +} + +declare i64 @llvm.mips.shilo(i64, i32) nounwind readnone + +define i64 @test__builtin_mips_shilo2(i32 %i0, i32, i64 %a0, i32 %a1) nounwind readnone { +entry: +; CHECK: shilov + + %1 = tail call i64 @llvm.mips.shilo(i64 %a0, i32 %a1) + ret i64 %1 +} + +define i64 @test__builtin_mips_mthlip1(i32 %i0, i32, i64 %a0, i32 %a1) nounwind { +entry: +; CHECK: mthlip ${{[0-9]+}} + + %1 = tail call i64 @llvm.mips.mthlip(i64 %a0, i32 %a1) + ret i64 %1 +} + +declare i64 @llvm.mips.mthlip(i64, i32) nounwind + +define i32 @test__builtin_mips_bposge321(i32 %i0) nounwind readonly { +entry: +; CHECK: bposge32 $BB{{[0-9]+}} + + %0 = tail call i32 @llvm.mips.bposge32() + ret i32 %0 +} + +declare i32 @llvm.mips.bposge32() nounwind readonly + +define i64 @test__builtin_mips_madd1(i32 %i0, i32, i64 %a0, i32 %a1, i32 %a2) nounwind readnone { +entry: +; CHECK: madd $ac{{[0-9]}} + + %1 = tail call i64 @llvm.mips.madd(i64 %a0, i32 %a1, i32 %a2) + ret i64 %1 +} + +declare i64 @llvm.mips.madd(i64, i32, i32) nounwind readnone + +define i64 @test__builtin_mips_maddu1(i32 %i0, i32, i64 %a0, i32 %a1, i32 %a2) nounwind readnone { +entry: +; CHECK: maddu $ac{{[0-9]}} + + %1 = tail call i64 @llvm.mips.maddu(i64 %a0, i32 %a1, i32 %a2) + ret i64 %1 +} + +declare i64 @llvm.mips.maddu(i64, i32, i32) nounwind readnone + +define i64 @test__builtin_mips_msub1(i32 %i0, i32, i64 %a0, i32 %a1, i32 %a2) nounwind readnone { +entry: +; CHECK: msub $ac{{[0-9]}} + + %1 = tail call i64 @llvm.mips.msub(i64 %a0, i32 %a1, i32 %a2) + ret i64 %1 +} + +declare i64 @llvm.mips.msub(i64, i32, i32) nounwind readnone + +define i64 @test__builtin_mips_msubu1(i32 %i0, i32, i64 %a0, i32 %a1, i32 %a2) nounwind readnone { +entry: +; CHECK: msubu $ac{{[0-9]}} + + %1 = tail call i64 @llvm.mips.msubu(i64 %a0, i32 %a1, i32 %a2) + ret i64 %1 +} + +declare i64 @llvm.mips.msubu(i64, i32, i32) nounwind readnone + +define i64 @test__builtin_mips_mult1(i32 %i0, i32 %a0, i32 %a1) nounwind readnone { +entry: +; CHECK: mult $ac{{[0-9]}} + + %0 = tail call i64 @llvm.mips.mult(i32 %a0, i32 %a1) + ret i64 %0 +} + +declare i64 @llvm.mips.mult(i32, i32) nounwind readnone + +define i64 @test__builtin_mips_multu1(i32 %i0, i32 %a0, i32 %a1) nounwind readnone { +entry: +; CHECK: multu $ac{{[0-9]}} + + %0 = tail call i64 @llvm.mips.multu(i32 %a0, i32 %a1) + ret i64 %0 +} + +declare i64 @llvm.mips.multu(i32, i32) nounwind readnone + +define { i32 } @test__builtin_mips_addq_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind { +entry: +; CHECK: addq.ph + + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.mips.addq.ph(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0 + ret { i32 } %.fca.0.insert +} + +declare <2 x i16> @llvm.mips.addq.ph(<2 x i16>, <2 x i16>) nounwind + +define { i32 } @test__builtin_mips_addq_s_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind { +entry: +; CHECK: addq_s.ph + + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.mips.addq.s.ph(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0 + ret { i32 } %.fca.0.insert +} + +declare <2 x i16> @llvm.mips.addq.s.ph(<2 x i16>, <2 x i16>) nounwind + +define i32 @test__builtin_mips_addq_s_w1(i32 %i0, i32 %a0, i32 %a1) nounwind { +entry: +; CHECK: addq_s.w + + %0 = tail call i32 @llvm.mips.addq.s.w(i32 %a0, i32 %a1) + ret i32 %0 +} + +declare i32 @llvm.mips.addq.s.w(i32, i32) nounwind + +define { i32 } @test__builtin_mips_addu_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind { +entry: +; CHECK: addu.qb + + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = bitcast i32 %a1.coerce to <4 x i8> + %2 = tail call <4 x i8> @llvm.mips.addu.qb(<4 x i8> %0, <4 x i8> %1) + %3 = bitcast <4 x i8> %2 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0 + ret { i32 } %.fca.0.insert +} + +declare <4 x i8> @llvm.mips.addu.qb(<4 x i8>, <4 x i8>) nounwind + +define { i32 } @test__builtin_mips_addu_s_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind { +entry: +; CHECK: addu_s.qb + + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = bitcast i32 %a1.coerce to <4 x i8> + %2 = tail call <4 x i8> @llvm.mips.addu.s.qb(<4 x i8> %0, <4 x i8> %1) + %3 = bitcast <4 x i8> %2 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0 + ret { i32 } %.fca.0.insert +} + +declare <4 x i8> @llvm.mips.addu.s.qb(<4 x i8>, <4 x i8>) nounwind + +define { i32 } @test__builtin_mips_subq_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind { +entry: +; CHECK: subq.ph + + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.mips.subq.ph(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0 + ret { i32 } %.fca.0.insert +} + +declare <2 x i16> @llvm.mips.subq.ph(<2 x i16>, <2 x i16>) nounwind + +define { i32 } @test__builtin_mips_subq_s_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind { +entry: +; CHECK: subq_s.ph + + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.mips.subq.s.ph(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0 + ret { i32 } %.fca.0.insert +} + +declare <2 x i16> @llvm.mips.subq.s.ph(<2 x i16>, <2 x i16>) nounwind + +define i32 @test__builtin_mips_subq_s_w1(i32 %i0, i32 %a0, i32 %a1) nounwind { +entry: +; CHECK: subq_s.w + + %0 = tail call i32 @llvm.mips.subq.s.w(i32 %a0, i32 %a1) + ret i32 %0 +} + +declare i32 @llvm.mips.subq.s.w(i32, i32) nounwind + +define { i32 } @test__builtin_mips_subu_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind { +entry: +; CHECK: subu.qb + + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = bitcast i32 %a1.coerce to <4 x i8> + %2 = tail call <4 x i8> @llvm.mips.subu.qb(<4 x i8> %0, <4 x i8> %1) + %3 = bitcast <4 x i8> %2 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0 + ret { i32 } %.fca.0.insert +} + +declare <4 x i8> @llvm.mips.subu.qb(<4 x i8>, <4 x i8>) nounwind + +define { i32 } @test__builtin_mips_subu_s_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind { +entry: +; CHECK: subu_s.qb + + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = bitcast i32 %a1.coerce to <4 x i8> + %2 = tail call <4 x i8> @llvm.mips.subu.s.qb(<4 x i8> %0, <4 x i8> %1) + %3 = bitcast <4 x i8> %2 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0 + ret { i32 } %.fca.0.insert +} + +declare <4 x i8> @llvm.mips.subu.s.qb(<4 x i8>, <4 x i8>) nounwind + +define i32 @test__builtin_mips_addsc1(i32 %i0, i32 %a0, i32 %a1) nounwind { +entry: +; CHECK: addsc ${{[0-9]+}} + + %0 = tail call i32 @llvm.mips.addsc(i32 %a0, i32 %a1) + ret i32 %0 +} + +declare i32 @llvm.mips.addsc(i32, i32) nounwind + +define i32 @test__builtin_mips_addwc1(i32 %i0, i32 %a0, i32 %a1) nounwind { +entry: +; CHECK: addwc ${{[0-9]+}} + + %0 = tail call i32 @llvm.mips.addwc(i32 %a0, i32 %a1) + ret i32 %0 +} + +declare i32 @llvm.mips.addwc(i32, i32) nounwind + +define i32 @test__builtin_mips_modsub1(i32 %i0, i32 %a0, i32 %a1) nounwind readnone { +entry: +; CHECK: modsub ${{[0-9]+}} + + %0 = tail call i32 @llvm.mips.modsub(i32 %a0, i32 %a1) + ret i32 %0 +} + +declare i32 @llvm.mips.modsub(i32, i32) nounwind readnone + +define i32 @test__builtin_mips_raddu_w_qb1(i32 %i0, i32 %a0.coerce) nounwind readnone { +entry: +; CHECK: raddu.w.qb + + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = tail call i32 @llvm.mips.raddu.w.qb(<4 x i8> %0) + ret i32 %1 +} + +declare i32 @llvm.mips.raddu.w.qb(<4 x i8>) nounwind readnone + +define { i32 } @test__builtin_mips_muleu_s_ph_qbl1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind { +entry: +; CHECK: muleu_s.ph.qbl + + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.mips.muleu.s.ph.qbl(<4 x i8> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0 + ret { i32 } %.fca.0.insert +} + +declare <2 x i16> @llvm.mips.muleu.s.ph.qbl(<4 x i8>, <2 x i16>) nounwind + +define { i32 } @test__builtin_mips_muleu_s_ph_qbr1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind { +entry: +; CHECK: muleu_s.ph.qbr + + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.mips.muleu.s.ph.qbr(<4 x i8> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0 + ret { i32 } %.fca.0.insert +} + +declare <2 x i16> @llvm.mips.muleu.s.ph.qbr(<4 x i8>, <2 x i16>) nounwind + +define { i32 } @test__builtin_mips_mulq_rs_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind { +entry: +; CHECK: mulq_rs.ph + + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.mips.mulq.rs.ph(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0 + ret { i32 } %.fca.0.insert +} + +declare <2 x i16> @llvm.mips.mulq.rs.ph(<2 x i16>, <2 x i16>) nounwind + +define i32 @test__builtin_mips_muleq_s_w_phl1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind { +entry: +; CHECK: muleq_s.w.phl + + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = tail call i32 @llvm.mips.muleq.s.w.phl(<2 x i16> %0, <2 x i16> %1) + ret i32 %2 +} + +declare i32 @llvm.mips.muleq.s.w.phl(<2 x i16>, <2 x i16>) nounwind + +define i32 @test__builtin_mips_muleq_s_w_phr1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind { +entry: +; CHECK: muleq_s.w.phr + + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = tail call i32 @llvm.mips.muleq.s.w.phr(<2 x i16> %0, <2 x i16> %1) + ret i32 %2 +} + +declare i32 @llvm.mips.muleq.s.w.phr(<2 x i16>, <2 x i16>) nounwind + +define { i32 } @test__builtin_mips_precrq_qb_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind readnone { +entry: +; CHECK: precrq.qb.ph + + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = tail call <4 x i8> @llvm.mips.precrq.qb.ph(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <4 x i8> %2 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0 + ret { i32 } %.fca.0.insert +} + +declare <4 x i8> @llvm.mips.precrq.qb.ph(<2 x i16>, <2 x i16>) nounwind readnone + +define { i32 } @test__builtin_mips_precrq_ph_w1(i32 %i0, i32 %a0, i32 %a1) nounwind readnone { +entry: +; CHECK: precrq.ph.w + + %0 = tail call <2 x i16> @llvm.mips.precrq.ph.w(i32 %a0, i32 %a1) + %1 = bitcast <2 x i16> %0 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0 + ret { i32 } %.fca.0.insert +} + +declare <2 x i16> @llvm.mips.precrq.ph.w(i32, i32) nounwind readnone + +define { i32 } @test__builtin_mips_precrq_rs_ph_w1(i32 %i0, i32 %a0, i32 %a1) nounwind { +entry: +; CHECK: precrq_rs.ph.w + + %0 = tail call <2 x i16> @llvm.mips.precrq.rs.ph.w(i32 %a0, i32 %a1) + %1 = bitcast <2 x i16> %0 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0 + ret { i32 } %.fca.0.insert +} + +declare <2 x i16> @llvm.mips.precrq.rs.ph.w(i32, i32) nounwind + +define { i32 } @test__builtin_mips_precrqu_s_qb_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind { +entry: +; CHECK: precrqu_s.qb.ph + + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = tail call <4 x i8> @llvm.mips.precrqu.s.qb.ph(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <4 x i8> %2 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0 + ret { i32 } %.fca.0.insert +} + +declare <4 x i8> @llvm.mips.precrqu.s.qb.ph(<2 x i16>, <2 x i16>) nounwind + + +define i32 @test__builtin_mips_cmpu_eq_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind { +entry: +; CHECK: cmpu.eq.qb + + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = bitcast i32 %a1.coerce to <4 x i8> + tail call void @llvm.mips.cmpu.eq.qb(<4 x i8> %0, <4 x i8> %1) + %2 = tail call i32 @llvm.mips.rddsp(i32 31) + ret i32 %2 +} + +declare void @llvm.mips.cmpu.eq.qb(<4 x i8>, <4 x i8>) nounwind + +declare i32 @llvm.mips.rddsp(i32) nounwind readonly + +define i32 @test__builtin_mips_cmpu_lt_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind { +entry: +; CHECK: cmpu.lt.qb + + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = bitcast i32 %a1.coerce to <4 x i8> + tail call void @llvm.mips.cmpu.lt.qb(<4 x i8> %0, <4 x i8> %1) + %2 = tail call i32 @llvm.mips.rddsp(i32 31) + ret i32 %2 +} + +declare void @llvm.mips.cmpu.lt.qb(<4 x i8>, <4 x i8>) nounwind + +define i32 @test__builtin_mips_cmpu_le_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind { +entry: +; CHECK: cmpu.le.qb + + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = bitcast i32 %a1.coerce to <4 x i8> + tail call void @llvm.mips.cmpu.le.qb(<4 x i8> %0, <4 x i8> %1) + %2 = tail call i32 @llvm.mips.rddsp(i32 31) + ret i32 %2 +} + +declare void @llvm.mips.cmpu.le.qb(<4 x i8>, <4 x i8>) nounwind + +define i32 @test__builtin_mips_cmpgu_eq_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind { +entry: +; CHECK: cmpgu.eq.qb + + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = bitcast i32 %a1.coerce to <4 x i8> + %2 = tail call i32 @llvm.mips.cmpgu.eq.qb(<4 x i8> %0, <4 x i8> %1) + ret i32 %2 +} + +declare i32 @llvm.mips.cmpgu.eq.qb(<4 x i8>, <4 x i8>) nounwind + +define i32 @test__builtin_mips_cmpgu_lt_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind { +entry: +; CHECK: cmpgu.lt.qb + + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = bitcast i32 %a1.coerce to <4 x i8> + %2 = tail call i32 @llvm.mips.cmpgu.lt.qb(<4 x i8> %0, <4 x i8> %1) + ret i32 %2 +} + +declare i32 @llvm.mips.cmpgu.lt.qb(<4 x i8>, <4 x i8>) nounwind + +define i32 @test__builtin_mips_cmpgu_le_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind { +entry: +; CHECK: cmpgu.le.qb + + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = bitcast i32 %a1.coerce to <4 x i8> + %2 = tail call i32 @llvm.mips.cmpgu.le.qb(<4 x i8> %0, <4 x i8> %1) + ret i32 %2 +} + +declare i32 @llvm.mips.cmpgu.le.qb(<4 x i8>, <4 x i8>) nounwind + +define i32 @test__builtin_mips_cmp_eq_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind { +entry: +; CHECK: cmp.eq.ph + + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = bitcast i32 %a1.coerce to <2 x i16> + tail call void @llvm.mips.cmp.eq.ph(<2 x i16> %0, <2 x i16> %1) + %2 = tail call i32 @llvm.mips.rddsp(i32 31) + ret i32 %2 +} + +declare void @llvm.mips.cmp.eq.ph(<2 x i16>, <2 x i16>) nounwind + +define i32 @test__builtin_mips_cmp_lt_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind { +entry: +; CHECK: cmp.lt.ph + + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = bitcast i32 %a1.coerce to <2 x i16> + tail call void @llvm.mips.cmp.lt.ph(<2 x i16> %0, <2 x i16> %1) + %2 = tail call i32 @llvm.mips.rddsp(i32 31) + ret i32 %2 +} + +declare void @llvm.mips.cmp.lt.ph(<2 x i16>, <2 x i16>) nounwind + +define i32 @test__builtin_mips_cmp_le_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind { +entry: +; CHECK: cmp.le.ph + + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = bitcast i32 %a1.coerce to <2 x i16> + tail call void @llvm.mips.cmp.le.ph(<2 x i16> %0, <2 x i16> %1) + %2 = tail call i32 @llvm.mips.rddsp(i32 31) + ret i32 %2 +} + +declare void @llvm.mips.cmp.le.ph(<2 x i16>, <2 x i16>) nounwind + +define { i32 } @test__builtin_mips_pick_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind readonly { +entry: +; CHECK: pick.qb + + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = bitcast i32 %a1.coerce to <4 x i8> + %2 = tail call <4 x i8> @llvm.mips.pick.qb(<4 x i8> %0, <4 x i8> %1) + %3 = bitcast <4 x i8> %2 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0 + ret { i32 } %.fca.0.insert +} + +declare <4 x i8> @llvm.mips.pick.qb(<4 x i8>, <4 x i8>) nounwind readonly + +define { i32 } @test__builtin_mips_pick_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind readonly { +entry: +; CHECK: pick.ph + + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.mips.pick.ph(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0 + ret { i32 } %.fca.0.insert +} + +declare <2 x i16> @llvm.mips.pick.ph(<2 x i16>, <2 x i16>) nounwind readonly + +define { i32 } @test__builtin_mips_packrl_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind readnone { +entry: +; CHECK: packrl.ph + + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.mips.packrl.ph(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0 + ret { i32 } %.fca.0.insert +} + +declare <2 x i16> @llvm.mips.packrl.ph(<2 x i16>, <2 x i16>) nounwind readnone + +define i32 @test__builtin_mips_rddsp1(i32 %i0) nounwind readonly { +entry: +; CHECK: rddsp ${{[0-9]+}} + + %0 = tail call i32 @llvm.mips.rddsp(i32 31) + ret i32 %0 +} + +define { i32 } @test__builtin_mips_shll_qb1(i32 %i0, i32 %a0.coerce) nounwind { +entry: +; CHECK: shll.qb + + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = tail call <4 x i8> @llvm.mips.shll.qb(<4 x i8> %0, i32 3) + %2 = bitcast <4 x i8> %1 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0 + ret { i32 } %.fca.0.insert +} + +declare <4 x i8> @llvm.mips.shll.qb(<4 x i8>, i32) nounwind + +define { i32 } @test__builtin_mips_shll_qb2(i32 %i0, i32 %a0.coerce, i32 %a1) nounwind { +entry: +; CHECK: shllv.qb + + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = tail call <4 x i8> @llvm.mips.shll.qb(<4 x i8> %0, i32 %a1) + %2 = bitcast <4 x i8> %1 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0 + ret { i32 } %.fca.0.insert +} + +define { i32 } @test__builtin_mips_shll_ph1(i32 %i0, i32 %a0.coerce) nounwind { +entry: +; CHECK: shll.ph + + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = tail call <2 x i16> @llvm.mips.shll.ph(<2 x i16> %0, i32 7) + %2 = bitcast <2 x i16> %1 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0 + ret { i32 } %.fca.0.insert +} + +declare <2 x i16> @llvm.mips.shll.ph(<2 x i16>, i32) nounwind + +define { i32 } @test__builtin_mips_shll_ph2(i32 %i0, i32 %a0.coerce, i32 %a1) nounwind { +entry: +; CHECK: shllv.ph + + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = tail call <2 x i16> @llvm.mips.shll.ph(<2 x i16> %0, i32 %a1) + %2 = bitcast <2 x i16> %1 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0 + ret { i32 } %.fca.0.insert +} + +define { i32 } @test__builtin_mips_shll_s_ph1(i32 %i0, i32 %a0.coerce) nounwind { +entry: +; CHECK: shll_s.ph + + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = tail call <2 x i16> @llvm.mips.shll.s.ph(<2 x i16> %0, i32 7) + %2 = bitcast <2 x i16> %1 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0 + ret { i32 } %.fca.0.insert +} + +declare <2 x i16> @llvm.mips.shll.s.ph(<2 x i16>, i32) nounwind + +define { i32 } @test__builtin_mips_shll_s_ph2(i32 %i0, i32 %a0.coerce, i32 %a1) nounwind { +entry: +; CHECK: shllv_s.ph + + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = tail call <2 x i16> @llvm.mips.shll.s.ph(<2 x i16> %0, i32 %a1) + %2 = bitcast <2 x i16> %1 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0 + ret { i32 } %.fca.0.insert +} + +define i32 @test__builtin_mips_shll_s_w1(i32 %i0, i32 %a0) nounwind { +entry: +; CHECK: shll_s.w + + %0 = tail call i32 @llvm.mips.shll.s.w(i32 %a0, i32 15) + ret i32 %0 +} + +declare i32 @llvm.mips.shll.s.w(i32, i32) nounwind + +define i32 @test__builtin_mips_shll_s_w2(i32 %i0, i32 %a0, i32 %a1) nounwind { +entry: +; CHECK: shllv_s.w + + %0 = tail call i32 @llvm.mips.shll.s.w(i32 %a0, i32 %a1) + ret i32 %0 +} + +define { i32 } @test__builtin_mips_shrl_qb1(i32 %i0, i32 %a0.coerce) nounwind readnone { +entry: +; CHECK: shrl.qb + + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = tail call <4 x i8> @llvm.mips.shrl.qb(<4 x i8> %0, i32 3) + %2 = bitcast <4 x i8> %1 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0 + ret { i32 } %.fca.0.insert +} + +declare <4 x i8> @llvm.mips.shrl.qb(<4 x i8>, i32) nounwind readnone + +define { i32 } @test__builtin_mips_shrl_qb2(i32 %i0, i32 %a0.coerce, i32 %a1) nounwind readnone { +entry: +; CHECK: shrlv.qb + + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = tail call <4 x i8> @llvm.mips.shrl.qb(<4 x i8> %0, i32 %a1) + %2 = bitcast <4 x i8> %1 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0 + ret { i32 } %.fca.0.insert +} + +define { i32 } @test__builtin_mips_shra_ph1(i32 %i0, i32 %a0.coerce) nounwind readnone { +entry: +; CHECK: shra.ph + + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = tail call <2 x i16> @llvm.mips.shra.ph(<2 x i16> %0, i32 7) + %2 = bitcast <2 x i16> %1 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0 + ret { i32 } %.fca.0.insert +} + +declare <2 x i16> @llvm.mips.shra.ph(<2 x i16>, i32) nounwind readnone + +define { i32 } @test__builtin_mips_shra_ph2(i32 %i0, i32 %a0.coerce, i32 %a1) nounwind readnone { +entry: +; CHECK: shrav.ph + + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = tail call <2 x i16> @llvm.mips.shra.ph(<2 x i16> %0, i32 %a1) + %2 = bitcast <2 x i16> %1 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0 + ret { i32 } %.fca.0.insert +} + +define { i32 } @test__builtin_mips_shra_r_ph1(i32 %i0, i32 %a0.coerce) nounwind readnone { +entry: +; CHECK: shra_r.ph + + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = tail call <2 x i16> @llvm.mips.shra.r.ph(<2 x i16> %0, i32 7) + %2 = bitcast <2 x i16> %1 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0 + ret { i32 } %.fca.0.insert +} + +declare <2 x i16> @llvm.mips.shra.r.ph(<2 x i16>, i32) nounwind readnone + +define { i32 } @test__builtin_mips_shra_r_ph2(i32 %i0, i32 %a0.coerce, i32 %a1) nounwind readnone { +entry: +; CHECK: shrav_r.ph + + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = tail call <2 x i16> @llvm.mips.shra.r.ph(<2 x i16> %0, i32 %a1) + %2 = bitcast <2 x i16> %1 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0 + ret { i32 } %.fca.0.insert +} + +define i32 @test__builtin_mips_shra_r_w1(i32 %i0, i32 %a0) nounwind readnone { +entry: +; CHECK: shra_r.w + + %0 = tail call i32 @llvm.mips.shra.r.w(i32 %a0, i32 15) + ret i32 %0 +} + +declare i32 @llvm.mips.shra.r.w(i32, i32) nounwind readnone + +define i32 @test__builtin_mips_shra_r_w2(i32 %i0, i32 %a0, i32 %a1) nounwind readnone { +entry: +; CHECK: shrav_r.w + + %0 = tail call i32 @llvm.mips.shra.r.w(i32 %a0, i32 %a1) + ret i32 %0 +} + +define { i32 } @test__builtin_mips_absq_s_ph1(i32 %i0, i32 %a0.coerce) nounwind { +entry: +; CHECK: absq_s.ph + + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = tail call <2 x i16> @llvm.mips.absq.s.ph(<2 x i16> %0) + %2 = bitcast <2 x i16> %1 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0 + ret { i32 } %.fca.0.insert +} + +declare <2 x i16> @llvm.mips.absq.s.ph(<2 x i16>) nounwind + +define i32 @test__builtin_mips_absq_s_w1(i32 %i0, i32 %a0) nounwind { +entry: +; CHECK: absq_s.w + + %0 = tail call i32 @llvm.mips.absq.s.w(i32 %a0) + ret i32 %0 +} + +declare i32 @llvm.mips.absq.s.w(i32) nounwind + +define i32 @test__builtin_mips_preceq_w_phl1(i32 %i0, i32 %a0.coerce) nounwind readnone { +entry: +; CHECK: preceq.w.phl + + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = tail call i32 @llvm.mips.preceq.w.phl(<2 x i16> %0) + ret i32 %1 +} + +declare i32 @llvm.mips.preceq.w.phl(<2 x i16>) nounwind readnone + +define i32 @test__builtin_mips_preceq_w_phr1(i32 %i0, i32 %a0.coerce) nounwind readnone { +entry: +; CHECK: preceq.w.phr + + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = tail call i32 @llvm.mips.preceq.w.phr(<2 x i16> %0) + ret i32 %1 +} + +declare i32 @llvm.mips.preceq.w.phr(<2 x i16>) nounwind readnone + +define { i32 } @test__builtin_mips_precequ_ph_qbl1(i32 %i0, i32 %a0.coerce) nounwind readnone { +entry: +; CHECK: precequ.ph.qbl + + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = tail call <2 x i16> @llvm.mips.precequ.ph.qbl(<4 x i8> %0) + %2 = bitcast <2 x i16> %1 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0 + ret { i32 } %.fca.0.insert +} + +declare <2 x i16> @llvm.mips.precequ.ph.qbl(<4 x i8>) nounwind readnone + +define { i32 } @test__builtin_mips_precequ_ph_qbr1(i32 %i0, i32 %a0.coerce) nounwind readnone { +entry: +; CHECK: precequ.ph.qbr + + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = tail call <2 x i16> @llvm.mips.precequ.ph.qbr(<4 x i8> %0) + %2 = bitcast <2 x i16> %1 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0 + ret { i32 } %.fca.0.insert +} + +declare <2 x i16> @llvm.mips.precequ.ph.qbr(<4 x i8>) nounwind readnone + +define { i32 } @test__builtin_mips_precequ_ph_qbla1(i32 %i0, i32 %a0.coerce) nounwind readnone { +entry: +; CHECK: precequ.ph.qbla + + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = tail call <2 x i16> @llvm.mips.precequ.ph.qbla(<4 x i8> %0) + %2 = bitcast <2 x i16> %1 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0 + ret { i32 } %.fca.0.insert +} + +declare <2 x i16> @llvm.mips.precequ.ph.qbla(<4 x i8>) nounwind readnone + +define { i32 } @test__builtin_mips_precequ_ph_qbra1(i32 %i0, i32 %a0.coerce) nounwind readnone { +entry: +; CHECK: precequ.ph.qbra + + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = tail call <2 x i16> @llvm.mips.precequ.ph.qbra(<4 x i8> %0) + %2 = bitcast <2 x i16> %1 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0 + ret { i32 } %.fca.0.insert +} + +declare <2 x i16> @llvm.mips.precequ.ph.qbra(<4 x i8>) nounwind readnone + +define { i32 } @test__builtin_mips_preceu_ph_qbl1(i32 %i0, i32 %a0.coerce) nounwind readnone { +entry: +; CHECK: preceu.ph.qbl + + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = tail call <2 x i16> @llvm.mips.preceu.ph.qbl(<4 x i8> %0) + %2 = bitcast <2 x i16> %1 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0 + ret { i32 } %.fca.0.insert +} + +declare <2 x i16> @llvm.mips.preceu.ph.qbl(<4 x i8>) nounwind readnone + +define { i32 } @test__builtin_mips_preceu_ph_qbr1(i32 %i0, i32 %a0.coerce) nounwind readnone { +entry: +; CHECK: preceu.ph.qbr + + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = tail call <2 x i16> @llvm.mips.preceu.ph.qbr(<4 x i8> %0) + %2 = bitcast <2 x i16> %1 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0 + ret { i32 } %.fca.0.insert +} + +declare <2 x i16> @llvm.mips.preceu.ph.qbr(<4 x i8>) nounwind readnone + +define { i32 } @test__builtin_mips_preceu_ph_qbla1(i32 %i0, i32 %a0.coerce) nounwind readnone { +entry: +; CHECK: preceu.ph.qbla + + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = tail call <2 x i16> @llvm.mips.preceu.ph.qbla(<4 x i8> %0) + %2 = bitcast <2 x i16> %1 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0 + ret { i32 } %.fca.0.insert +} + +declare <2 x i16> @llvm.mips.preceu.ph.qbla(<4 x i8>) nounwind readnone + +define { i32 } @test__builtin_mips_preceu_ph_qbra1(i32 %i0, i32 %a0.coerce) nounwind readnone { +entry: +; CHECK: preceu.ph.qbra + + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = tail call <2 x i16> @llvm.mips.preceu.ph.qbra(<4 x i8> %0) + %2 = bitcast <2 x i16> %1 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0 + ret { i32 } %.fca.0.insert +} + +declare <2 x i16> @llvm.mips.preceu.ph.qbra(<4 x i8>) nounwind readnone + +define { i32 } @test__builtin_mips_repl_qb1(i32 %i0) nounwind readnone { +entry: +; CHECK: repl.qb + + %0 = tail call <4 x i8> @llvm.mips.repl.qb(i32 127) + %1 = bitcast <4 x i8> %0 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0 + ret { i32 } %.fca.0.insert +} + +declare <4 x i8> @llvm.mips.repl.qb(i32) nounwind readnone + +define { i32 } @test__builtin_mips_repl_qb2(i32 %i0, i32 %a0) nounwind readnone { +entry: +; CHECK: replv.qb + + %0 = tail call <4 x i8> @llvm.mips.repl.qb(i32 %a0) + %1 = bitcast <4 x i8> %0 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0 + ret { i32 } %.fca.0.insert +} + +define { i32 } @test__builtin_mips_repl_ph1(i32 %i0) nounwind readnone { +entry: +; CHECK: repl.ph + + %0 = tail call <2 x i16> @llvm.mips.repl.ph(i32 0) + %1 = bitcast <2 x i16> %0 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0 + ret { i32 } %.fca.0.insert +} + +declare <2 x i16> @llvm.mips.repl.ph(i32) nounwind readnone + +define { i32 } @test__builtin_mips_repl_ph2(i32 %i0, i32 %a0) nounwind readnone { +entry: +; CHECK: replv.ph + + %0 = tail call <2 x i16> @llvm.mips.repl.ph(i32 %a0) + %1 = bitcast <2 x i16> %0 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0 + ret { i32 } %.fca.0.insert +} + +define i32 @test__builtin_mips_bitrev1(i32 %i0, i32 %a0) nounwind readnone { +entry: +; CHECK: bitrev ${{[0-9]+}} + + %0 = tail call i32 @llvm.mips.bitrev(i32 %a0) + ret i32 %0 +} + +declare i32 @llvm.mips.bitrev(i32) nounwind readnone + +define i32 @test__builtin_mips_lbux1(i32 %i0, i8* %a0, i32 %a1) nounwind readonly { +entry: +; CHECK: lbux ${{[0-9]+}} + + %0 = tail call i32 @llvm.mips.lbux(i8* %a0, i32 %a1) + ret i32 %0 +} + +declare i32 @llvm.mips.lbux(i8*, i32) nounwind readonly + +define i32 @test__builtin_mips_lhx1(i32 %i0, i8* %a0, i32 %a1) nounwind readonly { +entry: +; CHECK: lhx ${{[0-9]+}} + + %0 = tail call i32 @llvm.mips.lhx(i8* %a0, i32 %a1) + ret i32 %0 +} + +declare i32 @llvm.mips.lhx(i8*, i32) nounwind readonly + +define i32 @test__builtin_mips_lwx1(i32 %i0, i8* %a0, i32 %a1) nounwind readonly { +entry: +; CHECK: lwx ${{[0-9]+}} + + %0 = tail call i32 @llvm.mips.lwx(i8* %a0, i32 %a1) + ret i32 %0 +} + +declare i32 @llvm.mips.lwx(i8*, i32) nounwind readonly + +define i32 @test__builtin_mips_wrdsp1(i32 %i0, i32 %a0) nounwind { +entry: +; CHECK: wrdsp ${{[0-9]+}} + + tail call void @llvm.mips.wrdsp(i32 %a0, i32 31) + %0 = tail call i32 @llvm.mips.rddsp(i32 31) + ret i32 %0 +} + +declare void @llvm.mips.wrdsp(i32, i32) nounwind diff --git a/test/CodeGen/Mips/dsp-r2.ll b/test/CodeGen/Mips/dsp-r2.ll new file mode 100644 index 0000000000..631f9e43c2 --- /dev/null +++ b/test/CodeGen/Mips/dsp-r2.ll @@ -0,0 +1,568 @@ +; RUN: llc -march=mipsel -mattr=+dspr2 < %s | FileCheck %s + +define i64 @test__builtin_mips_dpa_w_ph1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind readnone { +entry: +; CHECK: dpa.w.ph + + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = bitcast i32 %a2.coerce to <2 x i16> + %3 = tail call i64 @llvm.mips.dpa.w.ph(i64 %a0, <2 x i16> %1, <2 x i16> %2) + ret i64 %3 +} + +declare i64 @llvm.mips.dpa.w.ph(i64, <2 x i16>, <2 x i16>) nounwind readnone + +define i64 @test__builtin_mips_dps_w_ph1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind readnone { +entry: +; CHECK: dps.w.ph + + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = bitcast i32 %a2.coerce to <2 x i16> + %3 = tail call i64 @llvm.mips.dps.w.ph(i64 %a0, <2 x i16> %1, <2 x i16> %2) + ret i64 %3 +} + +declare i64 @llvm.mips.dps.w.ph(i64, <2 x i16>, <2 x i16>) nounwind readnone + +define i64 @test__builtin_mips_mulsa_w_ph1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind readnone { +entry: +; CHECK: mulsa.w.ph + + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = bitcast i32 %a2.coerce to <2 x i16> + %3 = tail call i64 @llvm.mips.mulsa.w.ph(i64 %a0, <2 x i16> %1, <2 x i16> %2) + ret i64 %3 +} + +declare i64 @llvm.mips.mulsa.w.ph(i64, <2 x i16>, <2 x i16>) nounwind readnone + +define i64 @test__builtin_mips_dpax_w_ph1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind readnone { +entry: +; CHECK: dpax.w.ph + + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = bitcast i32 %a2.coerce to <2 x i16> + %3 = tail call i64 @llvm.mips.dpax.w.ph(i64 %a0, <2 x i16> %1, <2 x i16> %2) + ret i64 %3 +} + +declare i64 @llvm.mips.dpax.w.ph(i64, <2 x i16>, <2 x i16>) nounwind readnone + +define i64 @test__builtin_mips_dpsx_w_ph1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind readnone { +entry: +; CHECK: dpsx.w.ph + + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = bitcast i32 %a2.coerce to <2 x i16> + %3 = tail call i64 @llvm.mips.dpsx.w.ph(i64 %a0, <2 x i16> %1, <2 x i16> %2) + ret i64 %3 +} + +declare i64 @llvm.mips.dpsx.w.ph(i64, <2 x i16>, <2 x i16>) nounwind readnone + +define i64 @test__builtin_mips_dpaqx_s_w_ph1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind { +entry: +; CHECK: dpaqx_s.w.ph + + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = bitcast i32 %a2.coerce to <2 x i16> + %3 = tail call i64 @llvm.mips.dpaqx.s.w.ph(i64 %a0, <2 x i16> %1, <2 x i16> %2) + ret i64 %3 +} + +declare i64 @llvm.mips.dpaqx.s.w.ph(i64, <2 x i16>, <2 x i16>) nounwind + +define i64 @test__builtin_mips_dpaqx_sa_w_ph1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind { +entry: +; CHECK: dpaqx_sa.w.ph + + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = bitcast i32 %a2.coerce to <2 x i16> + %3 = tail call i64 @llvm.mips.dpaqx.sa.w.ph(i64 %a0, <2 x i16> %1, <2 x i16> %2) + ret i64 %3 +} + +declare i64 @llvm.mips.dpaqx.sa.w.ph(i64, <2 x i16>, <2 x i16>) nounwind + +define i64 @test__builtin_mips_dpsqx_s_w_ph1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind { +entry: +; CHECK: dpsqx_s.w.ph + + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = bitcast i32 %a2.coerce to <2 x i16> + %3 = tail call i64 @llvm.mips.dpsqx.s.w.ph(i64 %a0, <2 x i16> %1, <2 x i16> %2) + ret i64 %3 +} + +declare i64 @llvm.mips.dpsqx.s.w.ph(i64, <2 x i16>, <2 x i16>) nounwind + +define i64 @test__builtin_mips_dpsqx_sa_w_ph1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind { +entry: +; CHECK: dpsqx_sa.w.ph + + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = bitcast i32 %a2.coerce to <2 x i16> + %3 = tail call i64 @llvm.mips.dpsqx.sa.w.ph(i64 %a0, <2 x i16> %1, <2 x i16> %2) + ret i64 %3 +} + +declare i64 @llvm.mips.dpsqx.sa.w.ph(i64, <2 x i16>, <2 x i16>) nounwind + +define { i32 } @test__builtin_mips_addu_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind { +entry: +; CHECK: addu.ph + + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.mips.addu.ph(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0 + ret { i32 } %.fca.0.insert +} + +declare <2 x i16> @llvm.mips.addu.ph(<2 x i16>, <2 x i16>) nounwind + +define { i32 } @test__builtin_mips_addu_s_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind { +entry: +; CHECK: addu_s.ph + + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.mips.addu.s.ph(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0 + ret { i32 } %.fca.0.insert +} + +declare <2 x i16> @llvm.mips.addu.s.ph(<2 x i16>, <2 x i16>) nounwind + +define { i32 } @test__builtin_mips_mulq_s_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind { +entry: +; CHECK: mulq_s.ph + + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.mips.mulq.s.ph(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0 + ret { i32 } %.fca.0.insert +} + +declare <2 x i16> @llvm.mips.mulq.s.ph(<2 x i16>, <2 x i16>) nounwind + +define { i32 } @test__builtin_mips_subu_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind { +entry: +; CHECK: subu.ph + + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.mips.subu.ph(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0 + ret { i32 } %.fca.0.insert +} + +declare <2 x i16> @llvm.mips.subu.ph(<2 x i16>, <2 x i16>) nounwind + +define { i32 } @test__builtin_mips_subu_s_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind { +entry: +; CHECK: subu_s.ph + + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.mips.subu.s.ph(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0 + ret { i32 } %.fca.0.insert +} + +declare <2 x i16> @llvm.mips.subu.s.ph(<2 x i16>, <2 x i16>) nounwind + +define i32 @test__builtin_mips_cmpgdu_eq_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind { +entry: +; CHECK: cmpgdu.eq.qb + + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = bitcast i32 %a1.coerce to <4 x i8> + %2 = tail call i32 @llvm.mips.cmpgdu.eq.qb(<4 x i8> %0, <4 x i8> %1) + ret i32 %2 +} + +declare i32 @llvm.mips.cmpgdu.eq.qb(<4 x i8>, <4 x i8>) nounwind + +define i32 @test__builtin_mips_cmpgdu_lt_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind { +entry: +; CHECK: cmpgdu.lt.qb + + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = bitcast i32 %a1.coerce to <4 x i8> + %2 = tail call i32 @llvm.mips.cmpgdu.lt.qb(<4 x i8> %0, <4 x i8> %1) + ret i32 %2 +} + +declare i32 @llvm.mips.cmpgdu.lt.qb(<4 x i8>, <4 x i8>) nounwind + +define i32 @test__builtin_mips_cmpgdu_le_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind { +entry: +; CHECK: cmpgdu.le.qb + + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = bitcast i32 %a1.coerce to <4 x i8> + %2 = tail call i32 @llvm.mips.cmpgdu.le.qb(<4 x i8> %0, <4 x i8> %1) + ret i32 %2 +} + +declare i32 @llvm.mips.cmpgdu.le.qb(<4 x i8>, <4 x i8>) nounwind + +define { i32 } @test__builtin_mips_precr_qb_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind { +entry: +; CHECK: precr.qb.ph + + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = tail call <4 x i8> @llvm.mips.precr.qb.ph(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <4 x i8> %2 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0 + ret { i32 } %.fca.0.insert +} + +declare <4 x i8> @llvm.mips.precr.qb.ph(<2 x i16>, <2 x i16>) nounwind + +define { i32 } @test__builtin_mips_precr_sra_ph_w1(i32 %i0, i32 %a0, i32 %a1) nounwind readnone { +entry: +; CHECK: precr_sra.ph.w + + %0 = tail call <2 x i16> @llvm.mips.precr.sra.ph.w(i32 %a0, i32 %a1, i32 15) + %1 = bitcast <2 x i16> %0 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0 + ret { i32 } %.fca.0.insert +} + +declare <2 x i16> @llvm.mips.precr.sra.ph.w(i32, i32, i32) nounwind readnone + +define { i32 } @test__builtin_mips_precr_sra_r_ph_w1(i32 %i0, i32 %a0, i32 %a1) nounwind readnone { +entry: +; CHECK: precr_sra_r.ph.w + + %0 = tail call <2 x i16> @llvm.mips.precr.sra.r.ph.w(i32 %a0, i32 %a1, i32 15) + %1 = bitcast <2 x i16> %0 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0 + ret { i32 } %.fca.0.insert +} + +declare <2 x i16> @llvm.mips.precr.sra.r.ph.w(i32, i32, i32) nounwind readnone + +define { i32 } @test__builtin_mips_shra_qb1(i32 %i0, i32 %a0.coerce) nounwind readnone { +entry: +; CHECK: shra.qb + + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = tail call <4 x i8> @llvm.mips.shra.qb(<4 x i8> %0, i32 3) + %2 = bitcast <4 x i8> %1 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0 + ret { i32 } %.fca.0.insert +} + +declare <4 x i8> @llvm.mips.shra.qb(<4 x i8>, i32) nounwind readnone + +define { i32 } @test__builtin_mips_shra_r_qb1(i32 %i0, i32 %a0.coerce) nounwind readnone { +entry: +; CHECK: shra_r.qb + + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = tail call <4 x i8> @llvm.mips.shra.r.qb(<4 x i8> %0, i32 3) + %2 = bitcast <4 x i8> %1 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0 + ret { i32 } %.fca.0.insert +} + +declare <4 x i8> @llvm.mips.shra.r.qb(<4 x i8>, i32) nounwind readnone + +define { i32 } @test__builtin_mips_shra_qb2(i32 %i0, i32 %a0.coerce, i32 %a1) nounwind readnone { +entry: +; CHECK: shrav.qb + + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = tail call <4 x i8> @llvm.mips.shra.qb(<4 x i8> %0, i32 %a1) + %2 = bitcast <4 x i8> %1 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0 + ret { i32 } %.fca.0.insert +} + +define { i32 } @test__builtin_mips_shra_r_qb2(i32 %i0, i32 %a0.coerce, i32 %a1) nounwind readnone { +entry: +; CHECK: shrav_r.qb + + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = tail call <4 x i8> @llvm.mips.shra.r.qb(<4 x i8> %0, i32 %a1) + %2 = bitcast <4 x i8> %1 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0 + ret { i32 } %.fca.0.insert +} + +define { i32 } @test__builtin_mips_shrl_ph1(i32 %i0, i32 %a0.coerce) nounwind readnone { +entry: +; CHECK: shrl.ph + + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = tail call <2 x i16> @llvm.mips.shrl.ph(<2 x i16> %0, i32 7) + %2 = bitcast <2 x i16> %1 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0 + ret { i32 } %.fca.0.insert +} + +declare <2 x i16> @llvm.mips.shrl.ph(<2 x i16>, i32) nounwind readnone + +define { i32 } @test__builtin_mips_shrl_ph2(i32 %i0, i32 %a0.coerce, i32 %a1) nounwind readnone { +entry: +; CHECK: shrlv.ph + + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = tail call <2 x i16> @llvm.mips.shrl.ph(<2 x i16> %0, i32 %a1) + %2 = bitcast <2 x i16> %1 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0 + ret { i32 } %.fca.0.insert +} + +define { i32 } @test__builtin_mips_absq_s_qb1(i32 %i0, i32 %a0.coerce) nounwind { +entry: +; CHECK: absq_s.qb + + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = tail call <4 x i8> @llvm.mips.absq.s.qb(<4 x i8> %0) + %2 = bitcast <4 x i8> %1 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0 + ret { i32 } %.fca.0.insert +} + +declare <4 x i8> @llvm.mips.absq.s.qb(<4 x i8>) nounwind + +define { i32 } @test__builtin_mips_mul_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind { +entry: +; CHECK: mul.ph + + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.mips.mul.ph(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0 + ret { i32 } %.fca.0.insert +} + +declare <2 x i16> @llvm.mips.mul.ph(<2 x i16>, <2 x i16>) nounwind + +define { i32 } @test__builtin_mips_mul_s_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind { +entry: +; CHECK: mul_s.ph + + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.mips.mul.s.ph(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0 + ret { i32 } %.fca.0.insert +} + +declare <2 x i16> @llvm.mips.mul.s.ph(<2 x i16>, <2 x i16>) nounwind + +define i32 @test__builtin_mips_mulq_rs_w1(i32 %i0, i32 %a0, i32 %a1) nounwind { +entry: +; CHECK: mulq_rs.w + + %0 = tail call i32 @llvm.mips.mulq.rs.w(i32 %a0, i32 %a1) + ret i32 %0 +} + +declare i32 @llvm.mips.mulq.rs.w(i32, i32) nounwind + +define i32 @test__builtin_mips_mulq_s_w1(i32 %i0, i32 %a0, i32 %a1) nounwind { +entry: +; CHECK: mulq_s.w + + %0 = tail call i32 @llvm.mips.mulq.s.w(i32 %a0, i32 %a1) + ret i32 %0 +} + +declare i32 @llvm.mips.mulq.s.w(i32, i32) nounwind + +define { i32 } @test__builtin_mips_adduh_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind readnone { +entry: +; CHECK: adduh.qb + + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = bitcast i32 %a1.coerce to <4 x i8> + %2 = tail call <4 x i8> @llvm.mips.adduh.qb(<4 x i8> %0, <4 x i8> %1) + %3 = bitcast <4 x i8> %2 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0 + ret { i32 } %.fca.0.insert +} + +declare <4 x i8> @llvm.mips.adduh.qb(<4 x i8>, <4 x i8>) nounwind readnone + +define { i32 } @test__builtin_mips_adduh_r_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind readnone { +entry: +; CHECK: adduh_r.qb + + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = bitcast i32 %a1.coerce to <4 x i8> + %2 = tail call <4 x i8> @llvm.mips.adduh.r.qb(<4 x i8> %0, <4 x i8> %1) + %3 = bitcast <4 x i8> %2 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0 + ret { i32 } %.fca.0.insert +} + +declare <4 x i8> @llvm.mips.adduh.r.qb(<4 x i8>, <4 x i8>) nounwind readnone + +define { i32 } @test__builtin_mips_subuh_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind readnone { +entry: +; CHECK: subuh.qb + + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = bitcast i32 %a1.coerce to <4 x i8> + %2 = tail call <4 x i8> @llvm.mips.subuh.qb(<4 x i8> %0, <4 x i8> %1) + %3 = bitcast <4 x i8> %2 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0 + ret { i32 } %.fca.0.insert +} + +declare <4 x i8> @llvm.mips.subuh.qb(<4 x i8>, <4 x i8>) nounwind readnone + +define { i32 } @test__builtin_mips_subuh_r_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind readnone { +entry: +; CHECK: subuh_r.qb + + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = bitcast i32 %a1.coerce to <4 x i8> + %2 = tail call <4 x i8> @llvm.mips.subuh.r.qb(<4 x i8> %0, <4 x i8> %1) + %3 = bitcast <4 x i8> %2 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0 + ret { i32 } %.fca.0.insert +} + +declare <4 x i8> @llvm.mips.subuh.r.qb(<4 x i8>, <4 x i8>) nounwind readnone + +define { i32 } @test__builtin_mips_addqh_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind readnone { +entry: +; CHECK: addqh.ph + + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.mips.addqh.ph(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0 + ret { i32 } %.fca.0.insert +} + +declare <2 x i16> @llvm.mips.addqh.ph(<2 x i16>, <2 x i16>) nounwind readnone + +define { i32 } @test__builtin_mips_addqh_r_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind readnone { +entry: +; CHECK: addqh_r.ph + + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.mips.addqh.r.ph(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0 + ret { i32 } %.fca.0.insert +} + +declare <2 x i16> @llvm.mips.addqh.r.ph(<2 x i16>, <2 x i16>) nounwind readnone + +define i32 @test__builtin_mips_addqh_w1(i32 %i0, i32 %a0, i32 %a1) nounwind readnone { +entry: +; CHECK: addqh.w + + %0 = tail call i32 @llvm.mips.addqh.w(i32 %a0, i32 %a1) + ret i32 %0 +} + +declare i32 @llvm.mips.addqh.w(i32, i32) nounwind readnone + +define i32 @test__builtin_mips_addqh_r_w1(i32 %i0, i32 %a0, i32 %a1) nounwind readnone { +entry: +; CHECK: addqh_r.w + + %0 = tail call i32 @llvm.mips.addqh.r.w(i32 %a0, i32 %a1) + ret i32 %0 +} + +declare i32 @llvm.mips.addqh.r.w(i32, i32) nounwind readnone + +define { i32 } @test__builtin_mips_subqh_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind readnone { +entry: +; CHECK: subqh.ph + + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.mips.subqh.ph(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0 + ret { i32 } %.fca.0.insert +} + +declare <2 x i16> @llvm.mips.subqh.ph(<2 x i16>, <2 x i16>) nounwind readnone + +define { i32 } @test__builtin_mips_subqh_r_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind readnone { +entry: +; CHECK: subqh_r.ph + + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.mips.subqh.r.ph(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0 + ret { i32 } %.fca.0.insert +} + +declare <2 x i16> @llvm.mips.subqh.r.ph(<2 x i16>, <2 x i16>) nounwind readnone + +define i32 @test__builtin_mips_subqh_w1(i32 %i0, i32 %a0, i32 %a1) nounwind readnone { +entry: +; CHECK: subqh.w + + %0 = tail call i32 @llvm.mips.subqh.w(i32 %a0, i32 %a1) + ret i32 %0 +} + +declare i32 @llvm.mips.subqh.w(i32, i32) nounwind readnone + +define i32 @test__builtin_mips_subqh_r_w1(i32 %i0, i32 %a0, i32 %a1) nounwind readnone { +entry: +; CHECK: subqh_r.w + + %0 = tail call i32 @llvm.mips.subqh.r.w(i32 %a0, i32 %a1) + ret i32 %0 +} + +declare i32 @llvm.mips.subqh.r.w(i32, i32) nounwind readnone + +define i32 @test__builtin_mips_append1(i32 %i0, i32 %a0, i32 %a1) nounwind readnone { +entry: +; CHECK: append ${{[0-9]+}} + + %0 = tail call i32 @llvm.mips.append(i32 %a0, i32 %a1, i32 15) + ret i32 %0 +} + +declare i32 @llvm.mips.append(i32, i32, i32) nounwind readnone + +define i32 @test__builtin_mips_balign1(i32 %i0, i32 %a0, i32 %a1) nounwind readnone { +entry: +; CHECK: balign ${{[0-9]+}} + + %0 = tail call i32 @llvm.mips.balign(i32 %a0, i32 %a1, i32 1) + ret i32 %0 +} + +declare i32 @llvm.mips.balign(i32, i32, i32) nounwind readnone + +define i32 @test__builtin_mips_prepend1(i32 %i0, i32 %a0, i32 %a1) nounwind readnone { +entry: +; CHECK: prepend ${{[0-9]+}} + + %0 = tail call i32 @llvm.mips.prepend(i32 %a0, i32 %a1, i32 15) + ret i32 %0 +} + +declare i32 @llvm.mips.prepend(i32, i32, i32) nounwind readnone diff --git a/test/CodeGen/Mips/vector-load-store.ll b/test/CodeGen/Mips/vector-load-store.ll new file mode 100644 index 0000000000..d889963099 --- /dev/null +++ b/test/CodeGen/Mips/vector-load-store.ll @@ -0,0 +1,27 @@ +; RUN: llc -march=mipsel -mattr=+dsp < %s | FileCheck %s + +@g1 = common global <2 x i16> zeroinitializer, align 4 +@g0 = common global <2 x i16> zeroinitializer, align 4 +@g3 = common global <4 x i8> zeroinitializer, align 4 +@g2 = common global <4 x i8> zeroinitializer, align 4 + +define void @func_v2i16() nounwind { +entry: +; CHECK: lw +; CHECK: sw + + %0 = load <2 x i16>* @g1, align 4 + store <2 x i16> %0, <2 x i16>* @g0, align 4 + ret void +} + +define void @func_v4i8() nounwind { +entry: +; CHECK: lw +; CHECK: sw + + %0 = load <4 x i8>* @g3, align 4 + store <4 x i8> %0, <4 x i8>* @g2, align 4 + ret void +} + diff --git a/test/CodeGen/PowerPC/pr13891.ll b/test/CodeGen/PowerPC/pr13891.ll new file mode 100644 index 0000000000..3ae73850a3 --- /dev/null +++ b/test/CodeGen/PowerPC/pr13891.ll @@ -0,0 +1,27 @@ +; RUN: llc < %s | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +%struct.foo = type { i8, i8 } + +define void @_Z5check3foos(%struct.foo* nocapture byval %f, i16 signext %i) noinline { +; CHECK: _Z5check3foos: +; CHECK: sth 3, {{[0-9]+}}(1) +; CHECK: lha {{[0-9]+}}, {{[0-9]+}}(1) +entry: + %0 = bitcast %struct.foo* %f to i16* + %1 = load i16* %0, align 2 + %bf.val.sext = ashr i16 %1, 8 + %cmp = icmp eq i16 %bf.val.sext, %i + br i1 %cmp, label %if.end, label %if.then + +if.then: ; preds = %entry + %conv = sext i16 %bf.val.sext to i32 + tail call void @exit(i32 %conv) + br label %if.end + +if.end: ; preds = %entry, %if.then + ret void +} + +declare void @exit(i32) diff --git a/test/CodeGen/Thumb2/cortex-fp.ll b/test/CodeGen/Thumb2/cortex-fp.ll index d06f8a7bee..b7df2fbf54 100644 --- a/test/CodeGen/Thumb2/cortex-fp.ll +++ b/test/CodeGen/Thumb2/cortex-fp.ll @@ -7,8 +7,8 @@ define float @foo(float %a, float %b) { entry: ; CHECK: foo ; CORTEXM3: blx ___mulsf3 -; CORTEXM4: vmul.f32 s0, s1, s0 -; CORTEXA8: vmul.f32 d0, d1, d0 +; CORTEXM4: vmul.f32 s0, s2, s0 +; CORTEXA8: vmul.f32 d %0 = fmul float %a, %b ret float %0 } @@ -19,6 +19,6 @@ entry: %0 = fmul double %a, %b ; CORTEXM3: blx ___muldf3 ; CORTEXM4: blx ___muldf3 -; CORTEXA8: vmul.f64 d16, d17, d16 +; CORTEXA8: vmul.f64 d ret double %0 } diff --git a/test/CodeGen/Thumb2/div.ll b/test/CodeGen/Thumb2/div.ll index 2c00c70c0d..f89746a303 100644 --- a/test/CodeGen/Thumb2/div.ll +++ b/test/CodeGen/Thumb2/div.ll @@ -2,6 +2,8 @@ ; RUN: | FileCheck %s -check-prefix=CHECK-THUMB ; RUN: llc < %s -march=thumb -mcpu=cortex-m3 -mattr=+thumb2 \ ; RUN: | FileCheck %s -check-prefix=CHECK-THUMBV7M +; RUN: llc < %s -march=thumb -mcpu=swift \ +; RUN: | FileCheck %s -check-prefix=CHECK-SWIFT-T2 define i32 @f1(i32 %a, i32 %b) { entry: @@ -9,6 +11,8 @@ entry: ; CHECK-THUMB: __divsi3 ; CHECK-THUMBV7M: f1 ; CHECK-THUMBV7M: sdiv +; CHECK-SWIFT-T2: f1 +; CHECK-SWIFT-T2: sdiv %tmp1 = sdiv i32 %a, %b ; <i32> [#uses=1] ret i32 %tmp1 } @@ -19,6 +23,8 @@ entry: ; CHECK-THUMB: __udivsi3 ; CHECK-THUMBV7M: f2 ; CHECK-THUMBV7M: udiv +; CHECK-SWIFT-T2: f2 +; CHECK-SWIFT-T2: udiv %tmp1 = udiv i32 %a, %b ; <i32> [#uses=1] ret i32 %tmp1 } @@ -29,6 +35,8 @@ entry: ; CHECK-THUMB: __modsi3 ; CHECK-THUMBV7M: f3 ; CHECK-THUMBV7M: sdiv +; CHECK-SWIFT-T2: f3 +; CHECK-SWIFT-T2: sdiv %tmp1 = srem i32 %a, %b ; <i32> [#uses=1] ret i32 %tmp1 } @@ -39,6 +47,8 @@ entry: ; CHECK-THUMB: __umodsi3 ; CHECK-THUMBV7M: f4 ; CHECK-THUMBV7M: udiv +; CHECK-SWIFT-T2: f4 +; CHECK-SWIFT-T2: udiv %tmp1 = urem i32 %a, %b ; <i32> [#uses=1] ret i32 %tmp1 } diff --git a/test/CodeGen/Thumb2/thumb2-mla.ll b/test/CodeGen/Thumb2/thumb2-mla.ll index c4cc749ea5..594d9742b0 100644 --- a/test/CodeGen/Thumb2/thumb2-mla.ll +++ b/test/CodeGen/Thumb2/thumb2-mla.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mattr=+thumb2 -arm-use-mulops=false | FileCheck %s -check-prefix=NO_MULOPS define i32 @f1(i32 %a, i32 %b, i32 %c) { %tmp1 = mul i32 %a, %b @@ -7,6 +8,9 @@ define i32 @f1(i32 %a, i32 %b, i32 %c) { } ; CHECK: f1: ; CHECK: mla r0, r0, r1, r2 +; NO_MULOPS: f1: +; NO_MULOPS: muls r0, r1, r0 +; NO_MULOPS-NEXT: add r0, r2 define i32 @f2(i32 %a, i32 %b, i32 %c) { %tmp1 = mul i32 %a, %b @@ -15,3 +19,6 @@ define i32 @f2(i32 %a, i32 %b, i32 %c) { } ; CHECK: f2: ; CHECK: mla r0, r0, r1, r2 +; NO_MULOPS: f2: +; NO_MULOPS: muls r0, r1, r0 +; NO_MULOPS-NEXT: add r0, r2 diff --git a/test/CodeGen/Thumb2/thumb2-smla.ll b/test/CodeGen/Thumb2/thumb2-smla.ll index c128eccd66..aaaedfa42e 100644 --- a/test/CodeGen/Thumb2/thumb2-smla.ll +++ b/test/CodeGen/Thumb2/thumb2-smla.ll @@ -1,8 +1,12 @@ ; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk,+t2dsp | FileCheck %s +; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk,+t2dsp -arm-use-mulops=false | FileCheck %s -check-prefix=NO_MULOPS define i32 @f3(i32 %a, i16 %x, i32 %y) { ; CHECK: f3 ; CHECK: smlabt r0, r1, r2, r0 +; NO_MULOPS: f3 +; NO_MULOPS: smultb r1, r2, r1 +; NO_MULOPS-NEXT: add r0, r1 %tmp = sext i16 %x to i32 ; <i32> [#uses=1] %tmp2 = ashr i32 %y, 16 ; <i32> [#uses=1] %tmp3 = mul i32 %tmp2, %tmp ; <i32> [#uses=1] diff --git a/test/CodeGen/X86/2012-09-28-CGPBug.ll b/test/CodeGen/X86/2012-09-28-CGPBug.ll new file mode 100644 index 0000000000..32d7d012dd --- /dev/null +++ b/test/CodeGen/X86/2012-09-28-CGPBug.ll @@ -0,0 +1,53 @@ +; RUN: llc -mtriple=i386-apple-macosx < %s | FileCheck %s +; rdar://12396696 + +@JT = global [4 x i32] [i32 sub (i32 ptrtoint (i8* blockaddress(@h, %18) to i32), i32 ptrtoint (i8* blockaddress(@h, %11) to i32)), i32 sub (i32 ptrtoint (i8* blockaddress(@h, %17) to i32), i32 ptrtoint (i8* blockaddress(@h, %11) to i32)), i32 sub (i32 ptrtoint (i8* blockaddress(@h, %22) to i32), i32 ptrtoint (i8* blockaddress(@h, %18) to i32)), i32 sub (i32 ptrtoint (i8* blockaddress(@h, %22) to i32), i32 ptrtoint (i8* blockaddress(@h, %17) to i32))] +@gGlobalLock = external global i8* +@.str40 = external global [35 x i8] + +; CHECK: _JT: +; CHECK-NOT: .long Ltmp{{[0-9]+}}-1 +; CHECK-NOT: .long 1-Ltmp{{[0-9]+}} +; CHECK: .long Ltmp{{[0-9]+}}-Ltmp{{[0-9]+}} +; CHECK: .long Ltmp{{[0-9]+}}-Ltmp{{[0-9]+}} +; CHECK: .long Ltmp{{[0-9]+}}-Ltmp{{[0-9]+}} +; CHECK: .long Ltmp{{[0-9]+}}-Ltmp{{[0-9]+}} + +define void @h(i8*) nounwind ssp { + %2 = alloca i8* + store i8* %0, i8** %2 + %3 = load i8** %2 + %4 = bitcast i8* %3 to { i32, i32 }* + %5 = getelementptr { i32, i32 }* %4, i32 0, i32 0 + %6 = load i32* %5 + %7 = srem i32 %6, 2 + %8 = icmp slt i32 %6, 2 + %9 = select i1 %8, i32 %6, i32 %7 + %10 = icmp eq i32 %9, 0 + br label %11 + +; <label>:11 ; preds = %1 + %12 = zext i1 %10 to i32 + %13 = getelementptr [4 x i32]* @JT, i32 0, i32 %12 + %14 = load i32* %13 + %15 = add i32 %14, ptrtoint (i8* blockaddress(@h, %11) to i32) + %16 = inttoptr i32 %15 to i8* + indirectbr i8* %16, [label %17, label %18] + +; <label>:17 ; preds = %11 + tail call void (i8*, ...)* @g(i8* getelementptr inbounds ([35 x i8]* @.str40, i32 0, i32 0)) + br label %22 + +; <label>:18 ; preds = %11 + %19 = call i32 @f(i32 -1037694186) nounwind + %20 = inttoptr i32 %19 to i32 (i8**)* + %21 = tail call i32 %20(i8** @gGlobalLock) + br label %22 + +; <label>:22 ; preds = %18, %17 + ret void +} + +declare i32 @f(i32) + +declare void @g(i8*, ...) diff --git a/test/CodeGen/X86/atomic-minmax-i6432.ll b/test/CodeGen/X86/atomic-minmax-i6432.ll new file mode 100644 index 0000000000..01a926489b --- /dev/null +++ b/test/CodeGen/X86/atomic-minmax-i6432.ll @@ -0,0 +1,51 @@ +; RUN: llc -march=x86 -mattr=+cmov -mtriple=i386-pc-linux < %s | FileCheck %s +@sc64 = external global i64 + +define void @atomic_maxmin_i6432() { +; CHECK: atomic_maxmin_i6432 + %1 = atomicrmw max i64* @sc64, i64 5 acquire +; CHECK: [[LABEL:.LBB[0-9]+_[0-9]+]] +; CHECK: cmpl +; CHECK: setl +; CHECK: cmpl +; CHECK: setl +; CHECK: cmovne +; CHECK: cmovne +; CHECK: lock +; CHECK-NEXT: cmpxchg8b +; CHECK: jne [[LABEL]] + %2 = atomicrmw min i64* @sc64, i64 6 acquire +; CHECK: [[LABEL:.LBB[0-9]+_[0-9]+]] +; CHECK: cmpl +; CHECK: setg +; CHECK: cmpl +; CHECK: setg +; CHECK: cmovne +; CHECK: cmovne +; CHECK: lock +; CHECK-NEXT: cmpxchg8b +; CHECK: jne [[LABEL]] + %3 = atomicrmw umax i64* @sc64, i64 7 acquire +; CHECK: [[LABEL:.LBB[0-9]+_[0-9]+]] +; CHECK: cmpl +; CHECK: setb +; CHECK: cmpl +; CHECK: setb +; CHECK: cmovne +; CHECK: cmovne +; CHECK: lock +; CHECK-NEXT: cmpxchg8b +; CHECK: jne [[LABEL]] + %4 = atomicrmw umin i64* @sc64, i64 8 acquire +; CHECK: [[LABEL:.LBB[0-9]+_[0-9]+]] +; CHECK: cmpl +; CHECK: seta +; CHECK: cmpl +; CHECK: seta +; CHECK: cmovne +; CHECK: cmovne +; CHECK: lock +; CHECK-NEXT: cmpxchg8b +; CHECK: jne [[LABEL]] + ret void +} diff --git a/test/CodeGen/X86/atomic6432.ll b/test/CodeGen/X86/atomic6432.ll index 556c36ebfd..f9b21c5bc7 100644 --- a/test/CodeGen/X86/atomic6432.ll +++ b/test/CodeGen/X86/atomic6432.ll @@ -1,5 +1,4 @@ ; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 | FileCheck %s --check-prefix X32 -; XFAIL: * @sc64 = external global i64 diff --git a/test/CodeGen/X86/crash.ll b/test/CodeGen/X86/crash.ll index 9badfc82e9..ae5804195c 100644 --- a/test/CodeGen/X86/crash.ll +++ b/test/CodeGen/X86/crash.ll @@ -442,3 +442,38 @@ entry: ret void } declare void @_Z6PrintFz(...) + +@a = external global i32, align 4 +@fn1.g = private unnamed_addr constant [9 x i32*] [i32* null, i32* @a, i32* null, i32* null, i32* null, i32* null, i32* null, i32* null, i32* null], align 16 +@e = external global i32, align 4 + +define void @pr13943() nounwind uwtable ssp { +entry: + %srcval = load i576* bitcast ([9 x i32*]* @fn1.g to i576*), align 16 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %g.0 = phi i576 [ %srcval, %entry ], [ %ins, %for.inc ] + %0 = load i32* @e, align 4 + %1 = lshr i576 %g.0, 64 + %2 = trunc i576 %1 to i64 + %3 = inttoptr i64 %2 to i32* + %cmp = icmp eq i32* undef, %3 + %conv2 = zext i1 %cmp to i32 + %and = and i32 %conv2, %0 + tail call void (...)* @fn3(i32 %and) nounwind + %tobool = icmp eq i32 undef, 0 + br i1 %tobool, label %for.inc, label %if.then + +if.then: ; preds = %for.cond + ret void + +for.inc: ; preds = %for.cond + %4 = shl i576 %1, 384 + %mask = and i576 %g.0, -726838724295606890509921801691610055141362320587174446476410459910173841445449629921945328942266354949348255351381262292727973638307841 + %5 = and i576 %4, 726838724295606890509921801691610055141362320587174446476410459910173841445449629921945328942266354949348255351381262292727973638307840 + %ins = or i576 %5, %mask + br label %for.cond +} + +declare void @fn3(...) diff --git a/test/CodeGen/X86/jump_sign.ll b/test/CodeGen/X86/jump_sign.ll index 0cf397d4af..78d9e06f59 100644 --- a/test/CodeGen/X86/jump_sign.ll +++ b/test/CodeGen/X86/jump_sign.ll @@ -278,3 +278,31 @@ entry: %cond = select i1 %cmp, i32 %add, i32 0 ret i32 %cond } + +; PR13966 +@b = common global i32 0, align 4 +@a = common global i32 0, align 4 +define i32 @test1(i32 %p1) nounwind uwtable { +entry: +; CHECK: test1: +; CHECK: testb +; CHECK: j +; CHECK: ret + %0 = load i32* @b, align 4 + %cmp = icmp ult i32 %0, %p1 + %conv = zext i1 %cmp to i32 + %1 = load i32* @a, align 4 + %and = and i32 %conv, %1 + %conv1 = trunc i32 %and to i8 + %2 = urem i8 %conv1, 3 + %tobool = icmp eq i8 %2, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: + %dec = add nsw i32 %1, -1 + store i32 %dec, i32* @a, align 4 + br label %if.end + +if.end: + ret i32 undef +} diff --git a/test/CodeGen/X86/mulx32.ll b/test/CodeGen/X86/mulx32.ll new file mode 100644 index 0000000000..b75ac009e7 --- /dev/null +++ b/test/CodeGen/X86/mulx32.ll @@ -0,0 +1,22 @@ +; RUN: llc -mcpu=core-avx2 -march=x86 < %s | FileCheck %s + +define i64 @f1(i32 %a, i32 %b) { + %x = zext i32 %a to i64 + %y = zext i32 %b to i64 + %r = mul i64 %x, %y +; CHECK: f1 +; CHECK: mulxl +; CHECK: ret + ret i64 %r +} + +define i64 @f2(i32 %a, i32* %p) { + %b = load i32* %p + %x = zext i32 %a to i64 + %y = zext i32 %b to i64 + %r = mul i64 %x, %y +; CHECK: f2 +; CHECK: mulxl ({{.+}}), %{{.+}}, %{{.+}} +; CHECK: ret + ret i64 %r +} diff --git a/test/CodeGen/X86/mulx64.ll b/test/CodeGen/X86/mulx64.ll new file mode 100644 index 0000000000..d5730282a1 --- /dev/null +++ b/test/CodeGen/X86/mulx64.ll @@ -0,0 +1,22 @@ +; RUN: llc -mcpu=core-avx2 -march=x86-64 < %s | FileCheck %s + +define i128 @f1(i64 %a, i64 %b) { + %x = zext i64 %a to i128 + %y = zext i64 %b to i128 + %r = mul i128 %x, %y +; CHECK: f1 +; CHECK: mulxq +; CHECK: ret + ret i128 %r +} + +define i128 @f2(i64 %a, i64* %p) { + %b = load i64* %p + %x = zext i64 %a to i128 + %y = zext i64 %b to i128 + %r = mul i128 %x, %y +; CHECK: f2 +; CHECK: mulxq ({{.+}}), %{{.+}}, %{{.+}} +; CHECK: ret + ret i128 %r +} diff --git a/test/CodeGen/X86/phys_subreg_coalesce-3.ll b/test/CodeGen/X86/phys_subreg_coalesce-3.ll index 51320dd6d0..2a20e7ad6f 100644 --- a/test/CodeGen/X86/phys_subreg_coalesce-3.ll +++ b/test/CodeGen/X86/phys_subreg_coalesce-3.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s +; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=corei7 | FileCheck %s ; rdar://5571034 ; This requires physreg joining, %vreg13 is live everywhere: diff --git a/test/CodeGen/X86/pic_jumptable.ll b/test/CodeGen/X86/pic_jumptable.ll index 8c16dc68b2..bdd8859358 100644 --- a/test/CodeGen/X86/pic_jumptable.ll +++ b/test/CodeGen/X86/pic_jumptable.ll @@ -1,5 +1,7 @@ ; RUN: llc < %s -relocation-model=pic -mtriple=i386-linux-gnu -asm-verbose=false \ ; RUN: | FileCheck %s --check-prefix=CHECK-LINUX +; RUN: llc < %s -relocation-model=pic -mark-data-regions -mtriple=i686-apple-darwin -asm-verbose=false \ +; RUN: | FileCheck %s --check-prefix=CHECK-DATA ; RUN: llc < %s -relocation-model=pic -mtriple=i686-apple-darwin -asm-verbose=false \ ; RUN: | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-apple-darwin | not grep 'lJTI' @@ -16,6 +18,16 @@ entry: ; CHECK: Ltmp0 = LJTI0_0-L0$pb ; CHECK-NEXT: addl Ltmp0(%eax,%ecx,4) ; CHECK-NEXT: jmpl *%eax + +;; When data-in-code markers are enabled, we should see them around the jump +;; table. +; CHECK-DATA: .data_region jt32 +; CHECK-DATA: LJTI0_0 +; CHECK-DATA: .end_data_region + +;; When they're not enabled, make sure we don't see them at all. +; CHECK-NOT: .data_region +; CHECK-LINUX-NOT: .data_region %Y_addr = alloca i32 ; <i32*> [#uses=2] %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] store i32 %Y, i32* %Y_addr diff --git a/test/CodeGen/X86/ptr-rotate.ll b/test/CodeGen/X86/ptr-rotate.ll index 6debd16ba5..fbd13b5036 100644 --- a/test/CodeGen/X86/ptr-rotate.ll +++ b/test/CodeGen/X86/ptr-rotate.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=i386-apple-darwin -o - < %s | FileCheck %s +; RUN: llc -mtriple=i386-apple-darwin -mcpu=corei7 -o - < %s | FileCheck %s define i32 @func(i8* %A) nounwind readnone { entry: diff --git a/test/CodeGen/X86/rot32.ll b/test/CodeGen/X86/rot32.ll index 99602fd64f..e95a734e04 100644 --- a/test/CodeGen/X86/rot32.ll +++ b/test/CodeGen/X86/rot32.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -march=x86 | FileCheck %s +; RUN: llc < %s -march=x86 -mcpu=corei7 | FileCheck %s +; RUN: llc < %s -march=x86 -mcpu=core-avx2 | FileCheck %s --check-prefix=BMI2 define i32 @foo(i32 %x, i32 %y, i32 %z) nounwind readnone { entry: @@ -48,12 +49,25 @@ define i32 @xfoo(i32 %x, i32 %y, i32 %z) nounwind readnone { entry: ; CHECK: xfoo: ; CHECK: roll $7 +; BMI2: xfoo: +; BMI2: rorxl $25 %0 = lshr i32 %x, 25 %1 = shl i32 %x, 7 %2 = or i32 %0, %1 ret i32 %2 } +define i32 @xfoop(i32* %p) nounwind readnone { +entry: +; BMI2: xfoop: +; BMI2: rorxl $25, ({{.+}}), %{{.+}} + %x = load i32* %p + %a = lshr i32 %x, 25 + %b = shl i32 %x, 7 + %c = or i32 %a, %b + ret i32 %c +} + define i32 @xbar(i32 %x, i32 %y, i32 %z) nounwind readnone { entry: ; CHECK: xbar: @@ -68,12 +82,25 @@ define i32 @xun(i32 %x, i32 %y, i32 %z) nounwind readnone { entry: ; CHECK: xun: ; CHECK: roll $25 +; BMI2: xun: +; BMI2: rorxl $7 %0 = lshr i32 %x, 7 %1 = shl i32 %x, 25 %2 = or i32 %0, %1 ret i32 %2 } +define i32 @xunp(i32* %p) nounwind readnone { +entry: +; BMI2: xunp: +; BMI2: rorxl $7, ({{.+}}), %{{.+}} + %x = load i32* %p + %a = lshr i32 %x, 7 + %b = shl i32 %x, 25 + %c = or i32 %a, %b + ret i32 %c +} + define i32 @xbu(i32 %x, i32 %y, i32 %z) nounwind readnone { entry: ; CHECK: xbu: diff --git a/test/CodeGen/X86/rot64.ll b/test/CodeGen/X86/rot64.ll index 4e082bb860..7fa982d83b 100644 --- a/test/CodeGen/X86/rot64.ll +++ b/test/CodeGen/X86/rot64.ll @@ -1,8 +1,9 @@ -; RUN: llc < %s -march=x86-64 > %t -; RUN: grep rol %t | count 3 +; RUN: llc < %s -march=x86-64 -mcpu=corei7 > %t +; RUN: grep rol %t | count 5 ; RUN: grep ror %t | count 1 ; RUN: grep shld %t | count 2 ; RUN: grep shrd %t | count 2 +; RUN: llc < %s -march=x86-64 -mcpu=core-avx2 | FileCheck %s --check-prefix=BMI2 define i64 @foo(i64 %x, i64 %y, i64 %z) nounwind readnone { entry: @@ -42,12 +43,25 @@ entry: define i64 @xfoo(i64 %x, i64 %y, i64 %z) nounwind readnone { entry: +; BMI2: xfoo: +; BMI2: rorxq $57 %0 = lshr i64 %x, 57 %1 = shl i64 %x, 7 %2 = or i64 %0, %1 ret i64 %2 } +define i64 @xfoop(i64* %p) nounwind readnone { +entry: +; BMI2: xfoop: +; BMI2: rorxq $57, ({{.+}}), %{{.+}} + %x = load i64* %p + %a = lshr i64 %x, 57 + %b = shl i64 %x, 7 + %c = or i64 %a, %b + ret i64 %c +} + define i64 @xbar(i64 %x, i64 %y, i64 %z) nounwind readnone { entry: %0 = shl i64 %y, 7 @@ -58,12 +72,25 @@ entry: define i64 @xun(i64 %x, i64 %y, i64 %z) nounwind readnone { entry: +; BMI2: xun: +; BMI2: rorxq $7 %0 = lshr i64 %x, 7 %1 = shl i64 %x, 57 %2 = or i64 %0, %1 ret i64 %2 } +define i64 @xunp(i64* %p) nounwind readnone { +entry: +; BMI2: xunp: +; BMI2: rorxq $7, ({{.+}}), %{{.+}} + %x = load i64* %p + %a = lshr i64 %x, 7 + %b = shl i64 %x, 57 + %c = or i64 %a, %b + ret i64 %c +} + define i64 @xbu(i64 %x, i64 %y, i64 %z) nounwind readnone { entry: %0 = lshr i64 %y, 7 diff --git a/test/CodeGen/X86/rotate2.ll b/test/CodeGen/X86/rotate2.ll index 2eea3999e7..2316c70850 100644 --- a/test/CodeGen/X86/rotate2.ll +++ b/test/CodeGen/X86/rotate2.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86-64 | grep rol | count 2 +; RUN: llc < %s -march=x86-64 -mcpu=corei7 | grep rol | count 2 define i64 @test1(i64 %x) nounwind { entry: diff --git a/test/CodeGen/X86/shift-bmi2.ll b/test/CodeGen/X86/shift-bmi2.ll new file mode 100644 index 0000000000..d1f321f177 --- /dev/null +++ b/test/CodeGen/X86/shift-bmi2.ll @@ -0,0 +1,178 @@ +; RUN: llc -mtriple=i386-unknown-unknown -mcpu=core-avx2 < %s | FileCheck --check-prefix=BMI2 %s +; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=core-avx2 < %s | FileCheck --check-prefix=BMI264 %s + +define i32 @shl32(i32 %x, i32 %shamt) nounwind uwtable readnone { +entry: + %shl = shl i32 %x, %shamt +; BMI2: shl32 +; BMI2: shlxl +; BMI2: ret +; BMI264: shl32 +; BMI264: shlxl +; BMI264: ret + ret i32 %shl +} + +define i32 @shl32i(i32 %x) nounwind uwtable readnone { +entry: + %shl = shl i32 %x, 5 +; BMI2: shl32i +; BMI2-NOT: shlxl +; BMI2: ret +; BMI264: shl32i +; BMI264-NOT: shlxl +; BMI264: ret + ret i32 %shl +} + +define i32 @shl32p(i32* %p, i32 %shamt) nounwind uwtable readnone { +entry: + %x = load i32* %p + %shl = shl i32 %x, %shamt +; BMI2: shl32p +; BMI2: shlxl %{{.+}}, ({{.+}}), %{{.+}} +; BMI2: ret +; BMI264: shl32p +; BMI264: shlxl %{{.+}}, ({{.+}}), %{{.+}} +; BMI264: ret + ret i32 %shl +} + +define i32 @shl32pi(i32* %p) nounwind uwtable readnone { +entry: + %x = load i32* %p + %shl = shl i32 %x, 5 +; BMI2: shl32pi +; BMI2-NOT: shlxl +; BMI2: ret +; BMI264: shl32pi +; BMI264-NOT: shlxl +; BMI264: ret + ret i32 %shl +} + +define i64 @shl64(i64 %x, i64 %shamt) nounwind uwtable readnone { +entry: + %shl = shl i64 %x, %shamt +; BMI264: shl64 +; BMI264: shlxq +; BMI264: ret + ret i64 %shl +} + +define i64 @shl64i(i64 %x) nounwind uwtable readnone { +entry: + %shl = shl i64 %x, 7 +; BMI264: shl64i +; BMI264-NOT: shlxq +; BMI264: ret + ret i64 %shl +} + +define i64 @shl64p(i64* %p, i64 %shamt) nounwind uwtable readnone { +entry: + %x = load i64* %p + %shl = shl i64 %x, %shamt +; BMI264: shl64p +; BMI264: shlxq %{{.+}}, ({{.+}}), %{{.+}} +; BMI264: ret + ret i64 %shl +} + +define i64 @shl64pi(i64* %p) nounwind uwtable readnone { +entry: + %x = load i64* %p + %shl = shl i64 %x, 7 +; BMI264: shl64p +; BMI264-NOT: shlxq +; BMI264: ret + ret i64 %shl +} + +define i32 @lshr32(i32 %x, i32 %shamt) nounwind uwtable readnone { +entry: + %shl = lshr i32 %x, %shamt +; BMI2: lshr32 +; BMI2: shrxl +; BMI2: ret +; BMI264: lshr32 +; BMI264: shrxl +; BMI264: ret + ret i32 %shl +} + +define i32 @lshr32p(i32* %p, i32 %shamt) nounwind uwtable readnone { +entry: + %x = load i32* %p + %shl = lshr i32 %x, %shamt +; BMI2: lshr32p +; BMI2: shrxl %{{.+}}, ({{.+}}), %{{.+}} +; BMI2: ret +; BMI264: lshr32 +; BMI264: shrxl %{{.+}}, ({{.+}}), %{{.+}} +; BMI264: ret + ret i32 %shl +} + +define i64 @lshr64(i64 %x, i64 %shamt) nounwind uwtable readnone { +entry: + %shl = lshr i64 %x, %shamt +; BMI264: lshr64 +; BMI264: shrxq +; BMI264: ret + ret i64 %shl +} + +define i64 @lshr64p(i64* %p, i64 %shamt) nounwind uwtable readnone { +entry: + %x = load i64* %p + %shl = lshr i64 %x, %shamt +; BMI264: lshr64p +; BMI264: shrxq %{{.+}}, ({{.+}}), %{{.+}} +; BMI264: ret + ret i64 %shl +} + +define i32 @ashr32(i32 %x, i32 %shamt) nounwind uwtable readnone { +entry: + %shl = ashr i32 %x, %shamt +; BMI2: ashr32 +; BMI2: sarxl +; BMI2: ret +; BMI264: ashr32 +; BMI264: sarxl +; BMI264: ret + ret i32 %shl +} + +define i32 @ashr32p(i32* %p, i32 %shamt) nounwind uwtable readnone { +entry: + %x = load i32* %p + %shl = ashr i32 %x, %shamt +; BMI2: ashr32p +; BMI2: sarxl %{{.+}}, ({{.+}}), %{{.+}} +; BMI2: ret +; BMI264: ashr32 +; BMI264: sarxl %{{.+}}, ({{.+}}), %{{.+}} +; BMI264: ret + ret i32 %shl +} + +define i64 @ashr64(i64 %x, i64 %shamt) nounwind uwtable readnone { +entry: + %shl = ashr i64 %x, %shamt +; BMI264: ashr64 +; BMI264: sarxq +; BMI264: ret + ret i64 %shl +} + +define i64 @ashr64p(i64* %p, i64 %shamt) nounwind uwtable readnone { +entry: + %x = load i64* %p + %shl = ashr i64 %x, %shamt +; BMI264: ashr64p +; BMI264: sarxq %{{.+}}, ({{.+}}), %{{.+}} +; BMI264: ret + ret i64 %shl +} diff --git a/test/CodeGen/X86/tailcall-64.ll b/test/CodeGen/X86/tailcall-64.ll index adc8620060..ecc253ba58 100644 --- a/test/CodeGen/X86/tailcall-64.ll +++ b/test/CodeGen/X86/tailcall-64.ll @@ -1,6 +1,4 @@ -; RUN: llc < %s | FileCheck %s -target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" -target triple = "x86_64-apple-darwin11.4.0" +; RUN: llc -mtriple=x86_64-apple-macosx -mcpu=core2 < %s | FileCheck %s declare i64 @testi() @@ -132,3 +130,28 @@ entry: %call = tail call i32 (i8*, ...)* %0(i8* null, i32 0, i32 0, i32 0, i32 0, i32 0) nounwind ret i32 %call } + +define x86_fp80 @fp80_call(x86_fp80 %x) nounwind { +entry: +; CHECK: fp80_call: +; CHECK: jmp _fp80_callee + %call = tail call x86_fp80 @fp80_callee(x86_fp80 %x) nounwind + ret x86_fp80 %call +} + +declare x86_fp80 @fp80_callee(x86_fp80) + +; rdar://12229511 +define x86_fp80 @trunc_fp80(x86_fp80 %x) nounwind { +entry: +; CHECK: trunc_fp80 +; CHECK: callq _trunc +; CHECK-NOT: jmp _trunc +; CHECK: ret + %conv = fptrunc x86_fp80 %x to double + %call = tail call double @trunc(double %conv) nounwind readnone + %conv1 = fpext double %call to x86_fp80 + ret x86_fp80 %conv1 +} + +declare double @trunc(double) nounwind readnone diff --git a/test/CodeGen/X86/targetLoweringGeneric.ll b/test/CodeGen/X86/targetLoweringGeneric.ll index ba5f8f8361..a773e9daef 100644 --- a/test/CodeGen/X86/targetLoweringGeneric.ll +++ b/test/CodeGen/X86/targetLoweringGeneric.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=i386-apple-darwin9 -fast-isel=false -O0 < %s | FileCheck %s +; RUN: llc -mtriple=i386-apple-darwin9 -mcpu=corei7 -fast-isel=false -O0 < %s | FileCheck %s ; Gather non-machine specific tests for the transformations in ; CodeGen/SelectionDAG/TargetLowering. Currently, these diff --git a/test/DebugInfo/bug_null_debuginfo.ll b/test/DebugInfo/bug_null_debuginfo.ll index a7fdf70d71..b17affed89 100644 --- a/test/DebugInfo/bug_null_debuginfo.ll +++ b/test/DebugInfo/bug_null_debuginfo.ll @@ -1,5 +1,4 @@ -; RUN: llc - +; RUN: llc < %s !llvm.dbg.cu = !{!0} diff --git a/test/MC/ARM/arm-arithmetic-aliases.s b/test/MC/ARM/arm-arithmetic-aliases.s index 9895cfc02b..3ed4448581 100644 --- a/test/MC/ARM/arm-arithmetic-aliases.s +++ b/test/MC/ARM/arm-arithmetic-aliases.s @@ -124,3 +124,7 @@ bicseq r2, r3 @ CHECK: bicseq r2, r2, #6 @ encoding: [0x06,0x20,0xd2,0x03] @ CHECK: bicseq r2, r2, r3 @ encoding: [0x03,0x20,0xd2,0x01] @ CHECK: bicseq r2, r2, r3 @ encoding: [0x03,0x20,0xd2,0x01] + +add r0, pc, #123 + +@ CHECK: adr r0, #123 @ encoding: [0x7b,0x00,0x8f,0xe2] diff --git a/test/MC/MachO/ARM/long-call-branch-island-relocation.s b/test/MC/MachO/ARM/long-call-branch-island-relocation.s new file mode 100644 index 0000000000..8ee7da54b5 --- /dev/null +++ b/test/MC/MachO/ARM/long-call-branch-island-relocation.s @@ -0,0 +1,43 @@ +@ RUN: llvm-mc -n -triple armv7-apple-darwin10 %s -filetype=obj -o %t.o +@ RUN: macho-dump --dump-section-data < %t.o | FileCheck %s + +@ rdar://12359919 + + .syntax unified + .text + + .globl _bar + .align 2 + .code 16 + .thumb_func _bar +_bar: + push {r7, lr} + mov r7, sp + bl _foo + pop {r7, pc} + + +_junk: +@ Make the _foo symbol sufficiently far away to force the 'bl' relocation +@ above to be out of range. On Darwin, the assembler deals with this by +@ generating an external relocation so the linker can create a branch +@ island. + + .space 20000000 + + .section __TEXT,initcode,regular,pure_instructions + + .globl _foo + .align 2 + .code 16 +_foo: + push {r7, lr} + mov r7, sp + pop {r7, pc} + + +@ CHECK: ('_relocations', [ +@ CHECK: # Relocation 0 +@ CHECK: (('word-0', 0x4), +@ CHECK: ('word-1', 0x6d000002)), +@ CHECK: ]) diff --git a/test/MC/MachO/i386-large-relocations.s b/test/MC/MachO/i386-large-relocations.s new file mode 100644 index 0000000000..e5a1cfb2c5 --- /dev/null +++ b/test/MC/MachO/i386-large-relocations.s @@ -0,0 +1,36 @@ +// RUN: llvm-mc -triple i386-apple-darwin10 %s -filetype=obj -o - | macho-dump | FileCheck %s + +.space 0x1ed280 + .section __DATA,__const + .align 4 +.space 0x5181020 +_foo: + .long _bar + .long 0 + .long _bar+8 + .long _bar+24 + .long 0 + .long _bar+16 + +.zerofill __DATA,__bss,__dummy,0x5d780 +.zerofill __DATA,__bss,_bar,48,4 + +// Normally scattered relocations are used for sym+offset expressions. When +// the value exceeds 24-bits, however, it's outside what MachO can encode, +// so the assembler falls back to non-scattered relocations. +// rdar://12358909 + +// CHECK: ('_relocations', [ +// CHECK: # Relocation 0 +// CHECK: (('word-0', 0x5181034), +// CHECK: ('word-1', 0x4000003)), +// CHECK: # Relocation 1 +// CHECK: (('word-0', 0x518102c), +// CHECK: ('word-1', 0x4000003)), +// CHECK: # Relocation 2 +// CHECK: (('word-0', 0x5181028), +// CHECK: ('word-1', 0x4000003)), +// CHECK: # Relocation 3 +// CHECK: (('word-0', 0x5181020), +// CHECK: ('word-1', 0x4000003)), +// CHECK: ]) diff --git a/test/Other/lint.ll b/test/Other/lint.ll index c84f56f8f6..78bbbe9e6f 100644 --- a/test/Other/lint.ll +++ b/test/Other/lint.ll @@ -9,8 +9,11 @@ declare void @has_noaliases(i32* noalias %p, i32* %q) declare void @one_arg(i32) @CG = constant i32 7 +@E = external global i8 define i32 @foo() noreturn { + %buf = alloca i8 + %buf2 = alloca {i8, i8}, align 2 ; CHECK: Caller and callee calling convention differ call void @bar() ; CHECK: Null pointer dereference @@ -26,8 +29,10 @@ define i32 @foo() noreturn { ; CHECK: Address one pointer dereference store i32 0, i32* inttoptr (i64 1 to i32*) ; CHECK: Memory reference address is misaligned - %x = inttoptr i32 1 to i32* - load i32* %x, align 4 + store i8 0, i8* %buf, align 2 +; CHECK: Memory reference address is misaligned + %gep = getelementptr {i8, i8}* %buf2, i32 0, i32 1 + store i8 0, i8* %gep, align 2 ; CHECK: Division by zero %sd = sdiv i32 2, 0 ; CHECK: Division by zero @@ -75,6 +80,18 @@ define i32 @foo() noreturn { ; CHECK: Write to read-only memory call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (i32* @CG to i8*), i8* bitcast (i32* @CG to i8*), i64 1, i32 1, i1 0) +; CHECK: Undefined behavior: Buffer overflow + %wider = bitcast i8* %buf to i16* + store i16 0, i16* %wider +; CHECK: Undefined behavior: Buffer overflow + %inner = getelementptr {i8, i8}* %buf2, i32 0, i32 1 + %wider2 = bitcast i8* %inner to i16* + store i16 0, i16* %wider2 +; CHECK: Undefined behavior: Buffer overflow + %before = getelementptr i8* %buf, i32 -1 + %wider3 = bitcast i8* %before to i16* + store i16 0, i16* %wider3 + br label %next next: @@ -84,6 +101,10 @@ next: ret i32 0 foo: +; CHECK-NOT: Undefined behavior: Buffer overflow +; CHECK-NOT: Memory reference address is misaligned + %e = bitcast i8* @E to i64* + store i64 0, i64* %e %z = add i32 0, 0 ; CHECK: unreachable immediately preceded by instruction without side effects unreachable diff --git a/test/Transforms/CorrelatedValuePropagation/crash.ll b/test/Transforms/CorrelatedValuePropagation/crash.ll index 80c43d0f1d..9723d18252 100644 --- a/test/Transforms/CorrelatedValuePropagation/crash.ll +++ b/test/Transforms/CorrelatedValuePropagation/crash.ll @@ -35,3 +35,28 @@ srf.exit.i: func_29.exit: ret void } + +; PR13972 +define void @test3() nounwind { +for.body: + br label %return + +for.cond.i: ; preds = %if.else.i, %for.body.i + %e.2.i = phi i32 [ %e.2.i, %if.else.i ], [ -8, %for.body.i ] + br i1 undef, label %return, label %for.body.i + +for.body.i: ; preds = %for.cond.i + switch i32 %e.2.i, label %for.cond3.i [ + i32 -3, label %if.else.i + i32 0, label %for.cond.i + ] + +for.cond3.i: ; preds = %for.cond3.i, %for.body.i + br label %for.cond3.i + +if.else.i: ; preds = %for.body.i + br label %for.cond.i + +return: ; preds = %for.cond.i, %for.body + ret void +} diff --git a/test/Transforms/DeadStoreElimination/libcalls.ll b/test/Transforms/DeadStoreElimination/libcalls.ll new file mode 100644 index 0000000000..4639c0bc96 --- /dev/null +++ b/test/Transforms/DeadStoreElimination/libcalls.ll @@ -0,0 +1,70 @@ +; RUN: opt -S -basicaa -dse < %s | FileCheck %s + +declare i8* @strcpy(i8* %dest, i8* %src) nounwind +define void @test1(i8* %src) { +; CHECK: @test1 + %B = alloca [16 x i8] + %dest = getelementptr inbounds [16 x i8]* %B, i64 0, i64 0 +; CHECK-NOT: @strcpy + %call = call i8* @strcpy(i8* %dest, i8* %src) +; CHECK: ret void + ret void +} + +declare i8* @strncpy(i8* %dest, i8* %src, i32 %n) nounwind +define void @test2(i8* %src) { +; CHECK: @test2 + %B = alloca [16 x i8] + %dest = getelementptr inbounds [16 x i8]* %B, i64 0, i64 0 +; CHECK-NOT: @strncpy + %call = call i8* @strncpy(i8* %dest, i8* %src, i32 12) +; CHECK: ret void + ret void +} + +declare i8* @strcat(i8* %dest, i8* %src) nounwind +define void @test3(i8* %src) { +; CHECK: @test3 + %B = alloca [16 x i8] + %dest = getelementptr inbounds [16 x i8]* %B, i64 0, i64 0 +; CHECK-NOT: @strcat + %call = call i8* @strcat(i8* %dest, i8* %src) +; CHECK: ret void + ret void +} + +declare i8* @strncat(i8* %dest, i8* %src, i32 %n) nounwind +define void @test4(i8* %src) { +; CHECK: @test4 + %B = alloca [16 x i8] + %dest = getelementptr inbounds [16 x i8]* %B, i64 0, i64 0 +; CHECK-NOT: @strncat + %call = call i8* @strncat(i8* %dest, i8* %src, i32 12) +; CHECK: ret void + ret void +} + +define void @test5(i8* nocapture %src) { +; CHECK: @test5 + %dest = alloca [100 x i8], align 16 + %arraydecay = getelementptr inbounds [100 x i8]* %dest, i64 0, i64 0 + %call = call i8* @strcpy(i8* %arraydecay, i8* %src) +; CHECK: %call = call i8* @strcpy + %arrayidx = getelementptr inbounds i8* %call, i64 10 + store i8 97, i8* %arrayidx, align 1 + ret void +} + +declare void @user(i8* %p) +define void @test6(i8* %src) { +; CHECK: @test6 + %B = alloca [16 x i8] + %dest = getelementptr inbounds [16 x i8]* %B, i64 0, i64 0 +; CHECK: @strcpy + %call = call i8* @strcpy(i8* %dest, i8* %src) +; CHECK: @user + call void @user(i8* %dest) +; CHECK: ret void + ret void +} + diff --git a/test/Transforms/GlobalOpt/load-store-global.ll b/test/Transforms/GlobalOpt/load-store-global.ll index f824b2c11c..25a53370fa 100644 --- a/test/Transforms/GlobalOpt/load-store-global.ll +++ b/test/Transforms/GlobalOpt/load-store-global.ll @@ -1,15 +1,38 @@ -; RUN: opt < %s -globalopt -S | not grep G +; RUN: opt < %s -globalopt -S | FileCheck %s @G = internal global i32 17 ; <i32*> [#uses=3] +; CHECK-NOT: @G define void @foo() { %V = load i32* @G ; <i32> [#uses=1] store i32 %V, i32* @G ret void +; CHECK: @foo +; CHECK-NEXT: ret void } define i32 @bar() { %X = load i32* @G ; <i32> [#uses=1] ret i32 %X +; CHECK: @bar +; CHECK-NEXT: ret i32 17 +} + +@a = internal global i64* null, align 8 +; CHECK-NOT: @a + +; PR13968 +define void @qux() nounwind { + %b = bitcast i64** @a to i8* + %g = getelementptr i64** @a, i32 1 + %cmp = icmp ne i8* null, %b + %cmp2 = icmp eq i8* null, %b + %cmp3 = icmp eq i64** null, %g + store i64* inttoptr (i64 1 to i64*), i64** @a, align 8 + %l = load i64** @a, align 8 + ret void +; CHECK: @qux +; CHECK-NOT: store +; CHECK-NOT: load } diff --git a/test/Transforms/InstCombine/2012-09-24-MemcpyFromGlobalCrash.ll b/test/Transforms/InstCombine/2012-09-24-MemcpyFromGlobalCrash.ll new file mode 100644 index 0000000000..4cd60b42fb --- /dev/null +++ b/test/Transforms/InstCombine/2012-09-24-MemcpyFromGlobalCrash.ll @@ -0,0 +1,19 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s + +; Check we don't crash due to lack of target data. + +@G = constant [100 x i8] zeroinitializer + +declare void @bar(i8*) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind + +define void @test() { +; CHECK: @test +; CHECK: llvm.memcpy +; CHECK: ret void + %A = alloca [100 x i8] + %a = getelementptr inbounds [100 x i8]* %A, i64 0, i64 0 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* getelementptr inbounds ([100 x i8]* @G, i64 0, i32 0), i64 100, i32 4, i1 false) + call void @bar(i8* %a) readonly + ret void +} diff --git a/test/Transforms/InstCombine/memcpy_chk-1.ll b/test/Transforms/InstCombine/memcpy_chk-1.ll new file mode 100644 index 0000000000..7c7d91808a --- /dev/null +++ b/test/Transforms/InstCombine/memcpy_chk-1.ll @@ -0,0 +1,60 @@ +; Test lib call simplification of __memcpy_chk calls with various values +; for dstlen and len. +; +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" + +%struct.T1 = type { [100 x i32], [100 x i32], [1024 x i8] } +%struct.T2 = type { [100 x i32], [100 x i32], [1024 x i8] } +%struct.T3 = type { [100 x i32], [100 x i32], [2048 x i8] } + +@t1 = common global %struct.T1 zeroinitializer +@t2 = common global %struct.T2 zeroinitializer +@t3 = common global %struct.T3 zeroinitializer + +; Check cases where dstlen >= len. + +define void @test_simplify1() { +; CHECK: @test_simplify1 + %dst = bitcast %struct.T1* @t1 to i8* + %src = bitcast %struct.T2* @t2 to i8* + +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64 + call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 1824, i64 1824) + ret void +} + +define void @test_simplify2() { +; CHECK: @test_simplify2 + %dst = bitcast %struct.T1* @t1 to i8* + %src = bitcast %struct.T3* @t3 to i8* + +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64 + call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 1824, i64 2848) + ret void +} + +; Check cases where dstlen < len. + +define void @test_no_simplify1() { +; CHECK: @test_no_simplify1 + %dst = bitcast %struct.T3* @t3 to i8* + %src = bitcast %struct.T1* @t1 to i8* + +; CHECK-NEXT: call i8* @__memcpy_chk + call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 2848, i64 1824) + ret void +} + +define void @test_no_simplify2() { +; CHECK: @test_no_simplify2 + %dst = bitcast %struct.T1* @t1 to i8* + %src = bitcast %struct.T2* @t2 to i8* + +; CHECK-NEXT: call i8* @__memcpy_chk + call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 1024, i64 0) + ret void +} + +declare i8* @__memcpy_chk(i8*, i8*, i64, i64) diff --git a/test/Transforms/InstCombine/memcpy_chk-2.ll b/test/Transforms/InstCombine/memcpy_chk-2.ll new file mode 100644 index 0000000000..aa43029d47 --- /dev/null +++ b/test/Transforms/InstCombine/memcpy_chk-2.ll @@ -0,0 +1,24 @@ +; Test that lib call simplification doesn't simplify __memcpy_chk calls +; with the wrong prototype. +; +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" + +%struct.T1 = type { [100 x i32], [100 x i32], [1024 x i8] } +%struct.T2 = type { [100 x i32], [100 x i32], [1024 x i8] } + +@t1 = common global %struct.T1 zeroinitializer +@t2 = common global %struct.T2 zeroinitializer + +define void @test_no_simplify() { +; CHECK: @test_no_simplify + %dst = bitcast %struct.T1* @t1 to i8* + %src = bitcast %struct.T2* @t2 to i8* + +; CHECK-NEXT: call i8* @__memcpy_chk + call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 1824) + ret void +} + +declare i8* @__memcpy_chk(i8*, i8*, i64) diff --git a/test/Transforms/InstCombine/memmove_chk-1.ll b/test/Transforms/InstCombine/memmove_chk-1.ll new file mode 100644 index 0000000000..f9ff9a103a --- /dev/null +++ b/test/Transforms/InstCombine/memmove_chk-1.ll @@ -0,0 +1,60 @@ +; Test lib call simplification of __memmove_chk calls with various values +; for dstlen and len. +; +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" + +%struct.T1 = type { [100 x i32], [100 x i32], [1024 x i8] } +%struct.T2 = type { [100 x i32], [100 x i32], [1024 x i8] } +%struct.T3 = type { [100 x i32], [100 x i32], [2048 x i8] } + +@t1 = common global %struct.T1 zeroinitializer +@t2 = common global %struct.T2 zeroinitializer +@t3 = common global %struct.T3 zeroinitializer + +; Check cases where dstlen >= len. + +define void @test_simplify1() { +; CHECK: @test_simplify1 + %dst = bitcast %struct.T1* @t1 to i8* + %src = bitcast %struct.T2* @t2 to i8* + +; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64 + call i8* @__memmove_chk(i8* %dst, i8* %src, i64 1824, i64 1824) + ret void +} + +define void @test_simplify2() { +; CHECK: @test_simplify2 + %dst = bitcast %struct.T1* @t1 to i8* + %src = bitcast %struct.T3* @t3 to i8* + +; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64 + call i8* @__memmove_chk(i8* %dst, i8* %src, i64 1824, i64 2848) + ret void +} + +; Check cases where dstlen < len. + +define void @test_no_simplify1() { +; CHECK: @test_no_simplify1 + %dst = bitcast %struct.T3* @t3 to i8* + %src = bitcast %struct.T1* @t1 to i8* + +; CHECK-NEXT: call i8* @__memmove_chk + call i8* @__memmove_chk(i8* %dst, i8* %src, i64 2848, i64 1824) + ret void +} + +define void @test_no_simplify2() { +; CHECK: @test_no_simplify2 + %dst = bitcast %struct.T1* @t1 to i8* + %src = bitcast %struct.T2* @t2 to i8* + +; CHECK-NEXT: call i8* @__memmove_chk + call i8* @__memmove_chk(i8* %dst, i8* %src, i64 1024, i64 0) + ret void +} + +declare i8* @__memmove_chk(i8*, i8*, i64, i64) diff --git a/test/Transforms/InstCombine/memmove_chk-2.ll b/test/Transforms/InstCombine/memmove_chk-2.ll new file mode 100644 index 0000000000..f0a915fde2 --- /dev/null +++ b/test/Transforms/InstCombine/memmove_chk-2.ll @@ -0,0 +1,24 @@ +; Test that lib call simplification doesn't simplify __memmove_chk calls +; with the wrong prototype. +; +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" + +%struct.T1 = type { [100 x i32], [100 x i32], [1024 x i8] } +%struct.T2 = type { [100 x i32], [100 x i32], [1024 x i8] } + +@t1 = common global %struct.T1 zeroinitializer +@t2 = common global %struct.T2 zeroinitializer + +define void @test_no_simplify() { +; CHECK: @test_no_simplify + %dst = bitcast %struct.T1* @t1 to i8* + %src = bitcast %struct.T2* @t2 to i8* + +; CHECK-NEXT: call i8* @__memmove_chk + call i8* @__memmove_chk(i8* %dst, i8* %src, i64 1824) + ret void +} + +declare i8* @__memmove_chk(i8*, i8*, i64) diff --git a/test/Transforms/InstCombine/memset_chk-1.ll b/test/Transforms/InstCombine/memset_chk-1.ll new file mode 100644 index 0000000000..be4c1cfccd --- /dev/null +++ b/test/Transforms/InstCombine/memset_chk-1.ll @@ -0,0 +1,61 @@ +; Test lib call simplification of __memset_chk calls with various values +; for dstlen and len. +; +; RUN: opt < %s -instcombine -S | FileCheck %s +; rdar://7719085 + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" + +%struct.T = type { [100 x i32], [100 x i32], [1024 x i8] } +@t = common global %struct.T zeroinitializer + +; Check cases where dstlen >= len. + +define void @test_simplify1() { +; CHECK: @test_simplify1 + %dst = bitcast %struct.T* @t to i8* + +; CHECK-NEXT: call void @llvm.memset.p0i8.i64 + call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 1824) + ret void +} + +define void @test_simplify2() { +; CHECK: @test_simplify2 + %dst = bitcast %struct.T* @t to i8* + +; CHECK-NEXT: call void @llvm.memset.p0i8.i64 + call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 3648) + ret void +} + +define void @test_simplify3() { +; CHECK: @test_simplify3 + %dst = bitcast %struct.T* @t to i8* + +; CHECK-NEXT: call void @llvm.memset.p0i8.i64 + call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 -1) + ret void +} + +; Check cases where dstlen < len. + +define void @test_no_simplify1() { +; CHECK: @test_no_simplify1 + %dst = bitcast %struct.T* @t to i8* + +; CHECK-NEXT: call i8* @__memset_chk + call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 400) + ret void +} + +define void @test_no_simplify2() { +; CHECK: @test_no_simplify2 + %dst = bitcast %struct.T* @t to i8* + +; CHECK-NEXT: call i8* @__memset_chk + call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 0) + ret void +} + +declare i8* @__memset_chk(i8*, i32, i64, i64) diff --git a/test/Transforms/InstCombine/memset_chk-2.ll b/test/Transforms/InstCombine/memset_chk-2.ll new file mode 100644 index 0000000000..60fbf163c2 --- /dev/null +++ b/test/Transforms/InstCombine/memset_chk-2.ll @@ -0,0 +1,20 @@ +; Test that lib call simplification doesn't simplify __memset_chk calls +; with the wrong prototype. +; +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" + +%struct.T = type { [100 x i32], [100 x i32], [1024 x i8] } +@t = common global %struct.T zeroinitializer + +define void @test_no_simplify() { +; CHECK: @test_no_simplify + %dst = bitcast %struct.T* @t to i8* + +; CHECK-NEXT: call i8* @__memset_chk + call i8* @__memset_chk(i8* %dst, i32 0, i64 1824) + ret void +} + +declare i8* @__memset_chk(i8*, i32, i64) diff --git a/test/Transforms/InstCombine/memset_chk.ll b/test/Transforms/InstCombine/memset_chk.ll deleted file mode 100644 index 58ecda582f..0000000000 --- a/test/Transforms/InstCombine/memset_chk.ll +++ /dev/null @@ -1,18 +0,0 @@ -; RUN: opt < %s -instcombine -S | FileCheck %s -; rdar://7719085 - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" - -%struct.data = type { [100 x i32], [100 x i32], [1024 x i8] } - -define i32 @t() nounwind ssp { -; CHECK: @t -; CHECK: @llvm.memset.p0i8.i64 -entry: - %0 = alloca %struct.data, align 8 ; <%struct.data*> [#uses=1] - %1 = bitcast %struct.data* %0 to i8* ; <i8*> [#uses=1] - %2 = call i8* @__memset_chk(i8* %1, i32 0, i64 1824, i64 1824) nounwind ; <i8*> [#uses=0] - ret i32 0 -} - -declare i8* @__memset_chk(i8*, i32, i64, i64) nounwind diff --git a/test/Transforms/InstCombine/strcpy_chk-1.ll b/test/Transforms/InstCombine/strcpy_chk-1.ll new file mode 100644 index 0000000000..c03e8a348b --- /dev/null +++ b/test/Transforms/InstCombine/strcpy_chk-1.ll @@ -0,0 +1,88 @@ +; Test lib call simplification of __strcpy_chk calls with various values +; for src, dst, and slen. +; +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" + +@a = common global [60 x i8] zeroinitializer, align 1 +@b = common global [60 x i8] zeroinitializer, align 1 +@.str = private constant [8 x i8] c"abcdefg\00" + +; Check cases where slen >= strlen (src). + +define void @test_simplify1() { +; CHECK: @test_simplify1 + %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 + %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0 + +; CHECK-NEXT: call i8* @strcpy + call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 60) + ret void +} + +define void @test_simplify2() { +; CHECK: @test_simplify2 + %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 + %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0 + +; CHECK-NEXT: call i8* @strcpy + call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 8) + ret void +} + +define void @test_simplify3() { +; CHECK: @test_simplify3 + %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 + %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0 + +; CHECK-NEXT: call i8* @strcpy + call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 -1) + ret void +} + +; Check cases where there are no string constants. + +define void @test_simplify4() { +; CHECK: @test_simplify4 + %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 + %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0 + +; CHECK-NEXT: call i8* @strcpy + call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 -1) + ret void +} + +define void @test_no_simplify1() { +; CHECK: @test_no_simplify1 + %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 + %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0 + +; CHECK-NEXT: call i8* @__strcpy_chk + call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 8) + ret void +} + +; Check case were slen < strlen (src). + +define void @test_no_simplify2() { +; CHECK: @test_no_simplify2 + %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 + %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0 + +; CHECK-NEXT: call i8* @__strcpy_chk + call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 3) + ret void +} + +define void @test_no_simplify3() { +; CHECK: @test_no_simplify3 + %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 + %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0 + +; CHECK-NEXT: call i8* @__strcpy_chk + call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 0) + ret void +} + +declare i8* @__strcpy_chk(i8*, i8*, i32) nounwind diff --git a/test/Transforms/InstCombine/strcpy_chk-2.ll b/test/Transforms/InstCombine/strcpy_chk-2.ll new file mode 100644 index 0000000000..d76ea5d068 --- /dev/null +++ b/test/Transforms/InstCombine/strcpy_chk-2.ll @@ -0,0 +1,21 @@ +; Test that lib call simplification doesn't simplify __strcpy_chk calls +; with the wrong prototype. +; +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" + +@a = common global [60 x i16] zeroinitializer, align 1 +@.str = private constant [8 x i8] c"abcdefg\00" + +define void @test_no_simplify() { +; CHECK: @test_no_simplify + %dst = getelementptr inbounds [60 x i16]* @a, i32 0, i32 0 + %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0 + +; CHECK-NEXT: call i16* @__strcpy_chk + call i16* @__strcpy_chk(i16* %dst, i8* %src, i32 8) + ret void +} + +declare i16* @__strcpy_chk(i16*, i8*, i32) diff --git a/test/Transforms/InstCombine/strcpy_chk.ll b/test/Transforms/InstCombine/strcpy_chk.ll deleted file mode 100644 index 8835a0ba46..0000000000 --- a/test/Transforms/InstCombine/strcpy_chk.ll +++ /dev/null @@ -1,13 +0,0 @@ -; RUN: opt < %s -instcombine -S | FileCheck %s -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" -@a = common global [60 x i8] zeroinitializer, align 1 ; <[60 x i8]*> [#uses=1] -@.str = private constant [8 x i8] c"abcdefg\00" ; <[8 x i8]*> [#uses=1] - -define i8* @foo() nounwind { -; CHECK: @foo -; CHECK-NEXT: call i8* @strcpy - %call = call i8* @__strcpy_chk(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([8 x i8]* @.str, i32 0, i32 0), i32 60) ; <i8*> [#uses=1] - ret i8* %call -} - -declare i8* @__strcpy_chk(i8*, i8*, i32) nounwind diff --git a/test/Transforms/InstCombine/strncpy_chk-1.ll b/test/Transforms/InstCombine/strncpy_chk-1.ll new file mode 100644 index 0000000000..ae7e2fb5f1 --- /dev/null +++ b/test/Transforms/InstCombine/strncpy_chk-1.ll @@ -0,0 +1,66 @@ +; Test lib call simplification of __strncpy_chk calls with various values +; for len and dstlen. +; +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" + +@a = common global [60 x i8] zeroinitializer, align 1 +@b = common global [60 x i8] zeroinitializer, align 1 +@.str = private constant [8 x i8] c"abcdefg\00" + +; Check cases where dstlen >= len + +define void @test_simplify1() { +; CHECK: @test_simplify1 + %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 + %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0 + +; CHECK-NEXT: call i8* @strncpy + call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 8, i32 60) + ret void +} + +define void @test_simplify2() { +; CHECK: @test_simplify2 + %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 + %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0 + +; CHECK-NEXT: call i8* @strncpy + call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 8, i32 8) + ret void +} + +define void @test_simplify3() { +; CHECK: @test_simplify3 + %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 + %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0 + +; CHECK-NEXT: call i8* @strncpy + call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 8, i32 60) + ret void +} + +; Check cases where dstlen < len + +define void @test_no_simplify1() { +; CHECK: @test_no_simplify1 + %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 + %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0 + +; CHECK-NEXT: call i8* @__strncpy_chk + call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 8, i32 4) + ret void +} + +define void @test_no_simplify2() { +; CHECK: @test_no_simplify2 + %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 + %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0 + +; CHECK-NEXT: call i8* @__strncpy_chk + call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 8, i32 0) + ret void +} + +declare i8* @__strncpy_chk(i8*, i8*, i32, i32) diff --git a/test/Transforms/InstCombine/strncpy_chk-2.ll b/test/Transforms/InstCombine/strncpy_chk-2.ll new file mode 100644 index 0000000000..a0f132ebf6 --- /dev/null +++ b/test/Transforms/InstCombine/strncpy_chk-2.ll @@ -0,0 +1,21 @@ +; Test that lib call simplification doesn't simplify __strncpy_chk calls +; with the wrong prototype. +; +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" + +@a = common global [60 x i16] zeroinitializer, align 1 +@b = common global [60 x i16] zeroinitializer, align 1 + +define void @test_no_simplify() { +; CHECK: @test_no_simplify + %dst = getelementptr inbounds [60 x i16]* @a, i32 0, i32 0 + %src = getelementptr inbounds [60 x i16]* @b, i32 0, i32 0 + +; CHECK-NEXT: call i16* @__strncpy_chk + call i16* @__strncpy_chk(i16* %dst, i16* %src, i32 60, i32 60) + ret void +} + +declare i16* @__strncpy_chk(i16*, i16*, i32, i32) diff --git a/test/Transforms/InstCombine/vec_demanded_elts.ll b/test/Transforms/InstCombine/vec_demanded_elts.ll index 0019a57627..2d90750a2f 100644 --- a/test/Transforms/InstCombine/vec_demanded_elts.ll +++ b/test/Transforms/InstCombine/vec_demanded_elts.ll @@ -196,7 +196,7 @@ define <4 x float> @test_select(float %f, float %g) { ; CHECK-NOT: insertelement ; CHECK: %a3 = insertelement <4 x float> %a0, float 3.000000e+00, i32 3 ; CHECK-NOT: insertelement -; CHECK: %ret = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x float> %a3, <4 x float> <float undef, float 4.000000e+00, float 5.000000e+00, float undef> +; CHECK: shufflevector <4 x float> %a3, <4 x float> <float undef, float 4.000000e+00, float 5.000000e+00, float undef>, <4 x i32> <i32 0, i32 5, i32 6, i32 3> %a0 = insertelement <4 x float> undef, float %f, i32 0 %a1 = insertelement <4 x float> %a0, float 1.000000e+00, i32 1 %a2 = insertelement <4 x float> %a1, float 2.000000e+00, i32 2 diff --git a/test/Transforms/InstCombine/vec_shuffle.ll b/test/Transforms/InstCombine/vec_shuffle.ll index 8f78c2e6bd..14f532195d 100644 --- a/test/Transforms/InstCombine/vec_shuffle.ll +++ b/test/Transforms/InstCombine/vec_shuffle.ll @@ -153,3 +153,46 @@ define <8 x i8> @test12a(<8 x i8> %tmp6, <8 x i8> %tmp2) nounwind { ret <8 x i8> %tmp3 } +; We should form a shuffle out of a select with constant condition. +define <4 x i16> @test13a(<4 x i16> %lhs, <4 x i16> %rhs) { +; CHECK: @test13a +; CHECK-NEXT: shufflevector <4 x i16> %lhs, <4 x i16> %rhs, <4 x i32> <i32 0, i32 5, i32 2, i32 7> +; CHECK-NEXT: ret + %A = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, + <4 x i16> %lhs, <4 x i16> %rhs + ret <4 x i16> %A +} + +define <4 x i16> @test13b(<4 x i16> %lhs, <4 x i16> %rhs) { +; CHECK: @test13b +; CHECK-NEXT: ret <4 x i16> %lhs + %A = select <4 x i1> <i1 true, i1 undef, i1 true, i1 true>, + <4 x i16> %lhs, <4 x i16> %rhs + ret <4 x i16> %A +} + +define <4 x i16> @test13c(<4 x i16> %lhs, <4 x i16> %rhs) { +; CHECK: @test13c +; CHECK-NEXT: shufflevector <4 x i16> %lhs, <4 x i16> %rhs, <4 x i32> <i32 0, i32 undef, i32 2, i32 7> +; CHECK-NEXT: ret + %A = select <4 x i1> <i1 true, i1 undef, i1 true, i1 false>, + <4 x i16> %lhs, <4 x i16> %rhs + ret <4 x i16> %A +} + +define <4 x i16> @test13d(<4 x i16> %lhs, <4 x i16> %rhs) { +; CHECK: @test13d +; CHECK: select +; CHECK-NEXT: ret + %A = select <4 x i1> <i1 true, i1 icmp ugt (<4 x i16>(<4 x i16>, <4 x i16>)* @test13a, <4 x i16>(<4 x i16>, <4 x i16>)* @test13b), i1 true, i1 false>, + <4 x i16> %lhs, <4 x i16> %rhs + ret <4 x i16> %A +} + +define <4 x i16> @test13e(<4 x i16> %lhs, <4 x i16> %rhs) { +; CHECK: @test13e +; CHECK-NEXT: ret <4 x i16> %rhs + %A = select <4 x i1> <i1 false, i1 false, i1 false, i1 false>, + <4 x i16> %lhs, <4 x i16> %rhs + ret <4 x i16> %A +} diff --git a/test/Transforms/LoopUnroll/pr11361.ll b/test/Transforms/LoopUnroll/pr11361.ll index 7ce7f5fe46..62de2f728d 100644 --- a/test/Transforms/LoopUnroll/pr11361.ll +++ b/test/Transforms/LoopUnroll/pr11361.ll @@ -1,4 +1,4 @@ -; RUN: opt -loop-unroll -disable-output +; RUN: opt -loop-unroll -disable-output < %s ; PR11361 ; This tests for an iterator invalidation issue. diff --git a/test/Transforms/LoopUnswitch/2011-06-02-CritSwitch.ll b/test/Transforms/LoopUnswitch/2011-06-02-CritSwitch.ll index 61c54ddb15..609520064a 100644 --- a/test/Transforms/LoopUnswitch/2011-06-02-CritSwitch.ll +++ b/test/Transforms/LoopUnswitch/2011-06-02-CritSwitch.ll @@ -1,4 +1,4 @@ -; RUN: opt -loop-unswitch -disable-output +; RUN: opt -loop-unswitch -disable-output < %s ; PR10031 define i32 @test(i32 %command) { diff --git a/test/Transforms/PhaseOrdering/gdce.ll b/test/Transforms/PhaseOrdering/gdce.ll new file mode 100644 index 0000000000..273e47e97c --- /dev/null +++ b/test/Transforms/PhaseOrdering/gdce.ll @@ -0,0 +1,106 @@ +; RUN: opt -O2 -S %s | FileCheck %s + +; Run global DCE to eliminate unused ctor and dtor. +; rdar://9142819 + +; CHECK: main +; CHECK-NOT: _ZN4BaseC1Ev +; CHECK-NOT: _ZN4BaseD1Ev +; CHECK-NOT: _ZN4BaseD2Ev +; CHECK-NOT: _ZN4BaseC2Ev +; CHECK-NOT: _ZN4BaseD0Ev + +%class.Base = type { i32 (...)** } + +@_ZTV4Base = linkonce_odr unnamed_addr constant [4 x i8*] [i8* null, i8* bitcast ({ i8*, i8* }* @_ZTI4Base to i8*), i8* bitcast (void (%class.Base*)* @_ZN4BaseD1Ev to i8*), i8* bitcast (void (%class.Base*)* @_ZN4BaseD0Ev to i8*)] +@_ZTVN10__cxxabiv117__class_type_infoE = external global i8* +@_ZTS4Base = linkonce_odr constant [6 x i8] c"4Base\00" +@_ZTI4Base = linkonce_odr unnamed_addr constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8** @_ZTVN10__cxxabiv117__class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([6 x i8]* @_ZTS4Base, i32 0, i32 0) } + +define i32 @main() uwtable ssp { +entry: + %retval = alloca i32, align 4 + %b = alloca %class.Base, align 8 + %cleanup.dest.slot = alloca i32 + store i32 0, i32* %retval + call void @_ZN4BaseC1Ev(%class.Base* %b) + store i32 0, i32* %retval + store i32 1, i32* %cleanup.dest.slot + call void @_ZN4BaseD1Ev(%class.Base* %b) + %0 = load i32* %retval + ret i32 %0 +} + +define linkonce_odr void @_ZN4BaseC1Ev(%class.Base* %this) unnamed_addr uwtable ssp align 2 { +entry: + %this.addr = alloca %class.Base*, align 8 + store %class.Base* %this, %class.Base** %this.addr, align 8 + %this1 = load %class.Base** %this.addr + call void @_ZN4BaseC2Ev(%class.Base* %this1) + ret void +} + +define linkonce_odr void @_ZN4BaseD1Ev(%class.Base* %this) unnamed_addr uwtable ssp align 2 { +entry: + %this.addr = alloca %class.Base*, align 8 + store %class.Base* %this, %class.Base** %this.addr, align 8 + %this1 = load %class.Base** %this.addr + call void @_ZN4BaseD2Ev(%class.Base* %this1) + ret void +} + +define linkonce_odr void @_ZN4BaseD2Ev(%class.Base* %this) unnamed_addr nounwind uwtable ssp align 2 { +entry: + %this.addr = alloca %class.Base*, align 8 + store %class.Base* %this, %class.Base** %this.addr, align 8 + %this1 = load %class.Base** %this.addr + ret void +} + +define linkonce_odr void @_ZN4BaseC2Ev(%class.Base* %this) unnamed_addr nounwind uwtable ssp align 2 { +entry: + %this.addr = alloca %class.Base*, align 8 + store %class.Base* %this, %class.Base** %this.addr, align 8 + %this1 = load %class.Base** %this.addr + %0 = bitcast %class.Base* %this1 to i8*** + store i8** getelementptr inbounds ([4 x i8*]* @_ZTV4Base, i64 0, i64 2), i8*** %0 + ret void +} + +define linkonce_odr void @_ZN4BaseD0Ev(%class.Base* %this) unnamed_addr uwtable ssp align 2 { +entry: + %this.addr = alloca %class.Base*, align 8 + %exn.slot = alloca i8* + %ehselector.slot = alloca i32 + store %class.Base* %this, %class.Base** %this.addr, align 8 + %this1 = load %class.Base** %this.addr + invoke void @_ZN4BaseD1Ev(%class.Base* %this1) + to label %invoke.cont unwind label %lpad + +invoke.cont: ; preds = %entry + %0 = bitcast %class.Base* %this1 to i8* + call void @_ZdlPv(i8* %0) nounwind + ret void + +lpad: ; preds = %entry + %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + cleanup + %2 = extractvalue { i8*, i32 } %1, 0 + store i8* %2, i8** %exn.slot + %3 = extractvalue { i8*, i32 } %1, 1 + store i32 %3, i32* %ehselector.slot + %4 = bitcast %class.Base* %this1 to i8* + call void @_ZdlPv(i8* %4) nounwind + br label %eh.resume + +eh.resume: ; preds = %lpad + %exn = load i8** %exn.slot + %sel = load i32* %ehselector.slot + %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0 + %lpad.val2 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1 + resume { i8*, i32 } %lpad.val2 +} + +declare i32 @__gxx_personality_v0(...) + +declare void @_ZdlPv(i8*) nounwind diff --git a/test/Transforms/SROA/alignment.ll b/test/Transforms/SROA/alignment.ll new file mode 100644 index 0000000000..02a67551a3 --- /dev/null +++ b/test/Transforms/SROA/alignment.ll @@ -0,0 +1,85 @@ +; RUN: opt < %s -sroa -S | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) + +define void @test1({ i8, i8 }* %a, { i8, i8 }* %b) { +; CHECK: @test1 +; CHECK: %[[gep_a0:.*]] = getelementptr inbounds { i8, i8 }* %a, i64 0, i32 0 +; CHECK: %[[a0:.*]] = load i8* %[[gep_a0]], align 16 +; CHECK: %[[gep_a1:.*]] = getelementptr inbounds { i8, i8 }* %a, i64 0, i32 1 +; CHECK: %[[a1:.*]] = load i8* %[[gep_a1]], align 1 +; CHECK: %[[gep_b0:.*]] = getelementptr inbounds { i8, i8 }* %b, i64 0, i32 0 +; CHECK: store i8 %[[a0]], i8* %[[gep_b0]], align 16 +; CHECK: %[[gep_b1:.*]] = getelementptr inbounds { i8, i8 }* %b, i64 0, i32 1 +; CHECK: store i8 %[[a1]], i8* %[[gep_b1]], align 1 +; CHECK: ret void + +entry: + %alloca = alloca { i8, i8 }, align 16 + %gep_a = getelementptr { i8, i8 }* %a, i32 0, i32 0 + %gep_alloca = getelementptr { i8, i8 }* %alloca, i32 0, i32 0 + %gep_b = getelementptr { i8, i8 }* %b, i32 0, i32 0 + + store i8 420, i8* %gep_alloca, align 16 + + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %gep_alloca, i8* %gep_a, i32 2, i32 16, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %gep_b, i8* %gep_alloca, i32 2, i32 16, i1 false) + ret void +} + +define void @test2() { +; CHECK: @test2 +; CHECK: alloca i16 +; CHECK: load i8* %{{.*}}, align 1 +; CHECK: store i8 42, i8* %{{.*}}, align 1 +; CHECK: ret void + +entry: + %a = alloca { i8, i8, i8, i8 }, align 2 + %gep1 = getelementptr { i8, i8, i8, i8 }* %a, i32 0, i32 1 + %cast1 = bitcast i8* %gep1 to i16* + store volatile i16 0, i16* %cast1 + %gep2 = getelementptr { i8, i8, i8, i8 }* %a, i32 0, i32 2 + %result = load i8* %gep2, align 2 + store i8 42, i8* %gep2, align 2 + ret void +} + +define void @PR13920(<2 x i64>* %a, i16* %b) { +; Test that alignments on memcpy intrinsics get propagated to loads and stores. +; CHECK: @PR13920 +; CHECK: load <2 x i64>* %a, align 2 +; CHECK: store <2 x i64> {{.*}}, <2 x i64>* {{.*}}, align 2 +; CHECK: ret void + +entry: + %aa = alloca <2 x i64>, align 16 + %aptr = bitcast <2 x i64>* %a to i8* + %aaptr = bitcast <2 x i64>* %aa to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %aaptr, i8* %aptr, i32 16, i32 2, i1 false) + %bptr = bitcast i16* %b to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %bptr, i8* %aaptr, i32 16, i32 2, i1 false) + ret void +} + +define void @test3(i8* %x) { +; Test that when we promote an alloca to a type with lower ABI alignment, we +; provide the needed explicit alignment that code using the alloca may be +; expecting. However, also check that any offset within an alloca can in turn +; reduce the alignment. +; CHECK: @test3 +; CHECK: alloca [22 x i8], align 8 +; CHECK: alloca [18 x i8], align 2 +; CHECK: ret void + +entry: + %a = alloca { i8*, i8*, i8* } + %b = alloca { i8*, i8*, i8* } + %a_raw = bitcast { i8*, i8*, i8* }* %a to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a_raw, i8* %x, i32 22, i32 8, i1 false) + %b_raw = bitcast { i8*, i8*, i8* }* %b to i8* + %b_gep = getelementptr i8* %b_raw, i32 6 + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b_gep, i8* %x, i32 18, i32 2, i1 false) + ret void +} diff --git a/test/Transforms/SROA/basictest.ll b/test/Transforms/SROA/basictest.ll index a61de05f45..e58cef63ba 100644 --- a/test/Transforms/SROA/basictest.ll +++ b/test/Transforms/SROA/basictest.ll @@ -855,3 +855,45 @@ entry: %result = or i8 %load, %load2 ret i8 %result } + +%PR13916.struct = type { i8 } + +define void @PR13916.1() { +; Ensure that we handle overlapping memcpy intrinsics correctly, especially in +; the case where there is a directly identical value for both source and dest. +; CHECK: @PR13916.1 +; FIXME: We shouldn't leave this alloca around. +; CHECK: alloca +; CHECK: ret void + +entry: + %a = alloca i8 + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %a, i32 1, i32 1, i1 false) + %tmp2 = load i8* %a + ret void +} + +define void @PR13916.2() { +; Check whether we continue to handle them correctly when they start off with +; different pointer value chains, but during rewriting we coalesce them into the +; same value. +; CHECK: @PR13916.2 +; FIXME: We shouldn't leave this alloca around. +; CHECK: alloca +; CHECK: ret void + +entry: + %a = alloca %PR13916.struct, align 1 + br i1 undef, label %if.then, label %if.end + +if.then: + %tmp0 = bitcast %PR13916.struct* %a to i8* + %tmp1 = bitcast %PR13916.struct* %a to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp0, i8* %tmp1, i32 1, i32 1, i1 false) + br label %if.end + +if.end: + %gep = getelementptr %PR13916.struct* %a, i32 0, i32 0 + %tmp2 = load i8* %gep + ret void +} diff --git a/test/Transforms/SROA/phi-and-select.ll b/test/Transforms/SROA/phi-and-select.ll index ad0c55748d..b55d917f72 100644 --- a/test/Transforms/SROA/phi-and-select.ll +++ b/test/Transforms/SROA/phi-and-select.ll @@ -327,3 +327,48 @@ exit: %load = load i32* %a ret i32 %load } + +define i32 @PR13905() { +; Check a pattern where we have a chain of dead phi nodes to ensure they are +; deleted and promotion can proceed. +; CHECK: @PR13905 +; CHECK-NOT: alloca i32 +; CHECK: ret i32 undef + +entry: + %h = alloca i32 + store i32 0, i32* %h + br i1 undef, label %loop1, label %exit + +loop1: + %phi1 = phi i32* [ null, %entry ], [ %h, %loop1 ], [ %h, %loop2 ] + br i1 undef, label %loop1, label %loop2 + +loop2: + br i1 undef, label %loop1, label %exit + +exit: + %phi2 = phi i32* [ %phi1, %loop2 ], [ null, %entry ] + ret i32 undef +} + +define i32 @PR13906() { +; Another pattern which can lead to crashes due to failing to clear out dead +; PHI nodes or select nodes. This triggers subtly differently from the above +; cases because the PHI node is (recursively) alive, but the select is dead. +; CHECK: @PR13906 +; CHECK-NOT: alloca + +entry: + %c = alloca i32 + store i32 0, i32* %c + br label %for.cond + +for.cond: + %d.0 = phi i32* [ undef, %entry ], [ %c, %if.then ], [ %d.0, %for.cond ] + br i1 undef, label %if.then, label %for.cond + +if.then: + %tmpcast.d.0 = select i1 undef, i32* %c, i32* %d.0 + br label %for.cond +} diff --git a/test/Transforms/SimplifyCFG/switch_to_lookup_table.ll b/test/Transforms/SimplifyCFG/switch_to_lookup_table.ll index f0bd688050..134ac4eeb1 100644 --- a/test/Transforms/SimplifyCFG/switch_to_lookup_table.ll +++ b/test/Transforms/SimplifyCFG/switch_to_lookup_table.ll @@ -6,17 +6,14 @@ target triple = "x86_64-unknown-linux-gnu" ; The table for @f ; CHECK: @switch.table = private unnamed_addr constant [7 x i32] [i32 55, i32 123, i32 0, i32 -1, i32 27, i32 62, i32 1] -; The int table for @h -; CHECK: @switch.table1 = private unnamed_addr constant [4 x i8] c"*\09X\05" - ; The float table for @h -; CHECK: @switch.table2 = private unnamed_addr constant [4 x float] [float 0x40091EB860000000, float 0x3FF3BE76C0000000, float 0x4012449BA0000000, float 0x4001AE1480000000] +; CHECK: @switch.table1 = private unnamed_addr constant [4 x float] [float 0x40091EB860000000, float 0x3FF3BE76C0000000, float 0x4012449BA0000000, float 0x4001AE1480000000] ; The table for @foostring -; CHECK: @switch.table3 = private unnamed_addr constant [4 x i8*] [i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str1, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str2, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str3, i64 0, i64 0)] +; CHECK: @switch.table2 = private unnamed_addr constant [4 x i8*] [i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str1, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str2, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str3, i64 0, i64 0)] ; The table for @earlyreturncrash -; CHECK: @switch.table4 = private unnamed_addr constant [4 x i32] [i32 42, i32 9, i32 88, i32 5] +; CHECK: @switch.table3 = private unnamed_addr constant [4 x i32] [i32 42, i32 9, i32 88, i32 5] ; A simple int-to-int selection switch. ; It is dense enough to be replaced by table lookup. @@ -88,14 +85,15 @@ sw.epilog: ; CHECK-NEXT: %0 = icmp ult i32 %switch.tableidx, 4 ; CHECK-NEXT: br i1 %0, label %switch.lookup, label %sw.epilog ; CHECK: switch.lookup: -; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i8]* @switch.table1, i32 0, i32 %switch.tableidx -; CHECK-NEXT: %switch.load = load i8* %switch.gep -; CHECK-NEXT: %switch.gep1 = getelementptr inbounds [4 x float]* @switch.table2, i32 0, i32 %switch.tableidx -; CHECK-NEXT: %switch.load2 = load float* %switch.gep1 +; CHECK-NEXT: %switch.shiftamt = mul i32 %switch.tableidx, 8 +; CHECK-NEXT: %switch.downshift = lshr i32 89655594, %switch.shiftamt +; CHECK-NEXT: %switch.masked = trunc i32 %switch.downshift to i8 +; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x float]* @switch.table1, i32 0, i32 %switch.tableidx +; CHECK-NEXT: %switch.load = load float* %switch.gep ; CHECK-NEXT: br label %sw.epilog ; CHECK: sw.epilog: -; CHECK-NEXT: %a.0 = phi i8 [ %switch.load, %switch.lookup ], [ 7, %entry ] -; CHECK-NEXT: %b.0 = phi float [ %switch.load2, %switch.lookup ], [ 0x4023FAE140000000, %entry ] +; CHECK-NEXT: %a.0 = phi i8 [ %switch.masked, %switch.lookup ], [ 7, %entry ] +; CHECK-NEXT: %b.0 = phi float [ %switch.load, %switch.lookup ], [ 0x4023FAE140000000, %entry ] ; CHECK-NEXT: call void @dummy(i8 signext %a.0, float %b.0) ; CHECK-NEXT: ret void } @@ -137,7 +135,7 @@ return: ; CHECK-NEXT: %0 = icmp ult i32 %switch.tableidx, 4 ; CHECK-NEXT: br i1 %0, label %switch.lookup, label %return ; CHECK: switch.lookup: -; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i8*]* @switch.table3, i32 0, i32 %switch.tableidx +; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i8*]* @switch.table2, i32 0, i32 %switch.tableidx ; CHECK-NEXT: %switch.load = load i8** %switch.gep ; CHECK-NEXT: ret i8* %switch.load } @@ -166,9 +164,108 @@ sw.epilog: ; CHECK: @earlyreturncrash ; CHECK: switch.lookup: -; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i32]* @switch.table4, i32 0, i32 %switch.tableidx +; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i32]* @switch.table3, i32 0, i32 %switch.tableidx ; CHECK-NEXT: %switch.load = load i32* %switch.gep ; CHECK-NEXT: ret i32 %switch.load ; CHECK: sw.epilog: ; CHECK-NEXT: ret i32 7 } + + +; Example 7 from http://blog.regehr.org/archives/320 +; It is not dense enough for a regular table, but the results +; can be packed into a bitmap. + +define i32 @crud(i8 zeroext %c) { +entry: + %cmp = icmp ult i8 %c, 33 + br i1 %cmp, label %lor.end, label %switch.early.test + +switch.early.test: + switch i8 %c, label %lor.rhs [ + i8 92, label %lor.end + i8 62, label %lor.end + i8 60, label %lor.end + i8 59, label %lor.end + i8 58, label %lor.end + i8 46, label %lor.end + i8 44, label %lor.end + i8 34, label %lor.end + i8 39, label %switch.edge + ] + +switch.edge: br label %lor.end +lor.rhs: br label %lor.end + +lor.end: + %0 = phi i1 [ true, %switch.early.test ], + [ false, %lor.rhs ], + [ true, %entry ], + [ true, %switch.early.test ], + [ true, %switch.early.test ], + [ true, %switch.early.test ], + [ true, %switch.early.test ], + [ true, %switch.early.test ], + [ true, %switch.early.test ], + [ true, %switch.early.test ], + [ true, %switch.edge ] + %lor.ext = zext i1 %0 to i32 + ret i32 %lor.ext + +; CHECK: @crud +; CHECK: entry: +; CHECK-NEXT: %cmp = icmp ult i8 %c, 33 +; CHECK-NEXT: br i1 %cmp, label %lor.end, label %switch.early.test +; CHECK: switch.early.test: +; CHECK-NEXT: %switch.tableidx = sub i8 %c, 34 +; CHECK-NEXT: %0 = icmp ult i8 %switch.tableidx, 59 +; CHECK-NEXT: br i1 %0, label %switch.lookup, label %lor.end +; CHECK: switch.lookup: +; CHECK-NEXT: %switch.cast = zext i8 %switch.tableidx to i59 +; CHECK-NEXT: %switch.shiftamt = mul i59 %switch.cast, 1 +; CHECK-NEXT: %switch.downshift = lshr i59 -288230375765830623, %switch.shiftamt +; CHECK-NEXT: %switch.masked = trunc i59 %switch.downshift to i1 +; CHECK-NEXT: br label %lor.end +; CHECK: lor.end: +; CHECK-NEXT: %1 = phi i1 [ true, %entry ], [ %switch.masked, %switch.lookup ], [ false, %switch.early.test ] +; CHECK-NEXT: %lor.ext = zext i1 %1 to i32 +; CHECK-NEXT: ret i32 %lor.ext +} + +; PR13946 +define i32 @overflow(i32 %type) nounwind { +entry: + switch i32 %type, label %sw.default [ + i32 -2147483648, label %sw.bb + i32 0, label %sw.bb + i32 1, label %sw.bb1 + i32 2, label %sw.bb2 + i32 -2147483645, label %sw.bb3 + i32 3, label %sw.bb3 + ] + +sw.bb: + br label %if.end + +sw.bb1: + br label %if.end + +sw.bb2: + br label %if.end + +sw.bb3: + br label %if.end + +sw.default: + br label %if.end + +if.else: + br label %if.end + +if.end: + %dirent_type.0 = phi i32 [ 3, %sw.default ], [ 6, %sw.bb3 ], [ 5, %sw.bb2 ], [ 0, %sw.bb1 ], [ 3, %sw.bb ], [ 0, %if.else ] + ret i32 %dirent_type.0 +; CHECK: define i32 @overflow +; CHECK: switch +; CHECK: phi +} diff --git a/tools/lli/CMakeLists.txt b/tools/lli/CMakeLists.txt index 68cb921028..a9c7adf978 100644 --- a/tools/lli/CMakeLists.txt +++ b/tools/lli/CMakeLists.txt @@ -1,6 +1,4 @@ -link_directories( ${LLVM_INTEL_JITEVENTS_LIBDIR} ) - set(LLVM_LINK_COMPONENTS mcjit jit interpreter nativecodegen bitreader asmparser selectiondag) if( LLVM_USE_OPROFILE ) diff --git a/unittests/ADT/APFloatTest.cpp b/unittests/ADT/APFloatTest.cpp index 00b62feaeb..c8d7177d86 100644 --- a/unittests/ADT/APFloatTest.cpp +++ b/unittests/ADT/APFloatTest.cpp @@ -689,6 +689,23 @@ TEST(APFloatTest, roundToIntegral) { P = R; P.roundToIntegral(APFloat::rmNearestTiesToEven); EXPECT_EQ(R.convertToDouble(), P.convertToDouble()); + + P = APFloat::getZero(APFloat::IEEEdouble); + P.roundToIntegral(APFloat::rmTowardZero); + EXPECT_EQ(0.0, P.convertToDouble()); + P = APFloat::getZero(APFloat::IEEEdouble, true); + P.roundToIntegral(APFloat::rmTowardZero); + EXPECT_EQ(-0.0, P.convertToDouble()); + P = APFloat::getNaN(APFloat::IEEEdouble); + P.roundToIntegral(APFloat::rmTowardZero); + EXPECT_TRUE(IsNAN(P.convertToDouble())); + P = APFloat::getInf(APFloat::IEEEdouble); + P.roundToIntegral(APFloat::rmTowardZero); + EXPECT_TRUE(IsInf(P.convertToDouble()) && P.convertToDouble() > 0.0); + P = APFloat::getInf(APFloat::IEEEdouble, true); + P.roundToIntegral(APFloat::rmTowardZero); + EXPECT_TRUE(IsInf(P.convertToDouble()) && P.convertToDouble() < 0.0); + } TEST(APFloatTest, getLargest) { diff --git a/unittests/ExecutionEngine/JIT/CMakeLists.txt b/unittests/ExecutionEngine/JIT/CMakeLists.txt index d43d72de40..11cf784e1e 100644 --- a/unittests/ExecutionEngine/JIT/CMakeLists.txt +++ b/unittests/ExecutionEngine/JIT/CMakeLists.txt @@ -14,8 +14,6 @@ set(LLVM_OPTIONAL_SOURCES ) if( LLVM_USE_INTEL_JITEVENTS ) - include_directories( ${LLVM_INTEL_JITEVENTS_INCDIR} ) - link_directories( ${LLVM_INTEL_JITEVENTS_LIBDIR} ) set(ProfileTestSources IntelJITEventListenerTest.cpp ) diff --git a/unittests/ExecutionEngine/JIT/IntelJITEventListenerTest.cpp b/unittests/ExecutionEngine/JIT/IntelJITEventListenerTest.cpp index 8ed7a15be3..d3f66a27e9 100644 --- a/unittests/ExecutionEngine/JIT/IntelJITEventListenerTest.cpp +++ b/unittests/ExecutionEngine/JIT/IntelJITEventListenerTest.cpp @@ -11,7 +11,10 @@ using namespace llvm; -#include "llvm/ExecutionEngine/IntelJITEventsWrapper.h" +// Because we want to keep the implementation details of the Intel API used to +// communicate with Amplifier out of the public header files, the header below +// is included from the source tree instead. +#include "../../../lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h" #include <map> #include <list> @@ -80,7 +83,7 @@ public: EXPECT_TRUE(0 != MockWrapper); Listener.reset(JITEventListener::createIntelJITEventListener( - MockWrapper.get())); + MockWrapper.take())); EXPECT_TRUE(0 != Listener); EE->RegisterJITEventListener(Listener.get()); } diff --git a/unittests/Transforms/Utils/CMakeLists.txt b/unittests/Transforms/Utils/CMakeLists.txt index 365bfbb0bf..730d83b838 100644 --- a/unittests/Transforms/Utils/CMakeLists.txt +++ b/unittests/Transforms/Utils/CMakeLists.txt @@ -4,5 +4,6 @@ set(LLVM_LINK_COMPONENTS add_llvm_unittest(UtilsTests Cloning.cpp + IntegerDivision.cpp Local.cpp ) diff --git a/unittests/Transforms/Utils/IntegerDivision.cpp b/unittests/Transforms/Utils/IntegerDivision.cpp new file mode 100644 index 0000000000..a3211391d6 --- /dev/null +++ b/unittests/Transforms/Utils/IntegerDivision.cpp @@ -0,0 +1,142 @@ +//===- IntegerDivision.cpp - Unit tests for the integer division code -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "gtest/gtest.h" +#include "llvm/BasicBlock.h" +#include "llvm/GlobalValue.h" +#include "llvm/Function.h" +#include "llvm/IRBuilder.h" +#include "llvm/Module.h" +#include "llvm/Transforms/Utils/IntegerDivision.h" + +using namespace llvm; + +namespace { + +TEST(IntegerDivision, SDiv) { + LLVMContext &C(getGlobalContext()); + Module M("test division", C); + IRBuilder<> Builder(C); + + SmallVector<Type*, 2> ArgTys(2, Builder.getInt32Ty()); + Function *F = Function::Create(FunctionType::get(Builder.getInt32Ty(), + ArgTys, false), + GlobalValue::ExternalLinkage, "F", &M); + assert(F->getArgumentList().size() == 2); + + BasicBlock *BB = BasicBlock::Create(C, "", F); + Builder.SetInsertPoint(BB); + + Function::arg_iterator AI = F->arg_begin(); + Value *A = AI++; + Value *B = AI++; + + Value *Div = Builder.CreateSDiv(A, B); + EXPECT_TRUE(BB->front().getOpcode() == Instruction::SDiv); + + Value *Ret = Builder.CreateRet(Div); + + expandDivision(cast<BinaryOperator>(Div)); + EXPECT_TRUE(BB->front().getOpcode() == Instruction::AShr); + + Instruction* Quotient = dyn_cast<Instruction>(cast<User>(Ret)->getOperand(0)); + EXPECT_TRUE(Quotient && Quotient->getOpcode() == Instruction::Sub); +} + +TEST(IntegerDivision, UDiv) { + LLVMContext &C(getGlobalContext()); + Module M("test division", C); + IRBuilder<> Builder(C); + + SmallVector<Type*, 2> ArgTys(2, Builder.getInt32Ty()); + Function *F = Function::Create(FunctionType::get(Builder.getInt32Ty(), + ArgTys, false), + GlobalValue::ExternalLinkage, "F", &M); + assert(F->getArgumentList().size() == 2); + + BasicBlock *BB = BasicBlock::Create(C, "", F); + Builder.SetInsertPoint(BB); + + Function::arg_iterator AI = F->arg_begin(); + Value *A = AI++; + Value *B = AI++; + + Value *Div = Builder.CreateUDiv(A, B); + EXPECT_TRUE(BB->front().getOpcode() == Instruction::UDiv); + + Value *Ret = Builder.CreateRet(Div); + + expandDivision(cast<BinaryOperator>(Div)); + EXPECT_TRUE(BB->front().getOpcode() == Instruction::ICmp); + + Instruction* Quotient = dyn_cast<Instruction>(cast<User>(Ret)->getOperand(0)); + EXPECT_TRUE(Quotient && Quotient->getOpcode() == Instruction::PHI); +} + +TEST(IntegerDivision, SRem) { + LLVMContext &C(getGlobalContext()); + Module M("test remainder", C); + IRBuilder<> Builder(C); + + SmallVector<Type*, 2> ArgTys(2, Builder.getInt32Ty()); + Function *F = Function::Create(FunctionType::get(Builder.getInt32Ty(), + ArgTys, false), + GlobalValue::ExternalLinkage, "F", &M); + assert(F->getArgumentList().size() == 2); + + BasicBlock *BB = BasicBlock::Create(C, "", F); + Builder.SetInsertPoint(BB); + + Function::arg_iterator AI = F->arg_begin(); + Value *A = AI++; + Value *B = AI++; + + Value *Rem = Builder.CreateSRem(A, B); + EXPECT_TRUE(BB->front().getOpcode() == Instruction::SRem); + + Value *Ret = Builder.CreateRet(Rem); + + expandRemainder(cast<BinaryOperator>(Rem)); + EXPECT_TRUE(BB->front().getOpcode() == Instruction::AShr); + + Instruction* Remainder = dyn_cast<Instruction>(cast<User>(Ret)->getOperand(0)); + EXPECT_TRUE(Remainder && Remainder->getOpcode() == Instruction::Sub); +} + +TEST(IntegerDivision, URem) { + LLVMContext &C(getGlobalContext()); + Module M("test remainder", C); + IRBuilder<> Builder(C); + + SmallVector<Type*, 2> ArgTys(2, Builder.getInt32Ty()); + Function *F = Function::Create(FunctionType::get(Builder.getInt32Ty(), + ArgTys, false), + GlobalValue::ExternalLinkage, "F", &M); + assert(F->getArgumentList().size() == 2); + + BasicBlock *BB = BasicBlock::Create(C, "", F); + Builder.SetInsertPoint(BB); + + Function::arg_iterator AI = F->arg_begin(); + Value *A = AI++; + Value *B = AI++; + + Value *Rem = Builder.CreateURem(A, B); + EXPECT_TRUE(BB->front().getOpcode() == Instruction::URem); + + Value *Ret = Builder.CreateRet(Rem); + + expandRemainder(cast<BinaryOperator>(Rem)); + EXPECT_TRUE(BB->front().getOpcode() == Instruction::ICmp); + + Instruction* Remainder = dyn_cast<Instruction>(cast<User>(Ret)->getOperand(0)); + EXPECT_TRUE(Remainder && Remainder->getOpcode() == Instruction::Sub); +} + +} diff --git a/utils/TableGen/AsmMatcherEmitter.cpp b/utils/TableGen/AsmMatcherEmitter.cpp index 593de698a9..7b49723d21 100644 --- a/utils/TableGen/AsmMatcherEmitter.cpp +++ b/utils/TableGen/AsmMatcherEmitter.cpp @@ -1714,9 +1714,9 @@ static void emitConvertToMCInst(CodeGenTarget &Target, StringRef ClassName, raw_string_ostream OpOS(OperandFnBody); // Start the operand number lookup function. OpOS << "unsigned " << Target.getName() << ClassName << "::\n" - << "getMCInstOperandNumImpl(unsigned Kind, MCInst &Inst,\n" - << " const SmallVectorImpl<MCParsedAsmOperand*> " - << "&Operands,\n unsigned OperandNum, unsigned " + << "getMCInstOperandNum(unsigned Kind,\n" + << " const SmallVectorImpl<MCParsedAsmOperand*> " + << "&Operands,\n unsigned OperandNum, unsigned " << "&NumMCOperands) {\n" << " assert(Kind < CVT_NUM_SIGNATURES && \"Invalid signature!\");\n" << " NumMCOperands = 0;\n" @@ -2617,11 +2617,11 @@ void AsmMatcherEmitter::run(raw_ostream &OS) { << "unsigned Opcode,\n" << " const SmallVectorImpl<MCParsedAsmOperand*> " << "&Operands);\n"; - OS << " unsigned getMCInstOperandNumImpl(unsigned Kind, MCInst &Inst,\n" - << " const " + OS << " unsigned getMCInstOperandNum(unsigned Kind,\n" + << " const " << "SmallVectorImpl<MCParsedAsmOperand*> &Operands,\n " - << " unsigned OperandNum, unsigned &NumMCOperands);\n"; - OS << " bool mnemonicIsValidImpl(StringRef Mnemonic);\n"; + << " unsigned OperandNum, unsigned &NumMCOperands);\n"; + OS << " bool mnemonicIsValid(StringRef Mnemonic);\n"; OS << " unsigned MatchInstructionImpl(\n" << " const SmallVectorImpl<MCParsedAsmOperand*> &Operands,\n" << " unsigned &Kind, MCInst &Inst, " @@ -2800,7 +2800,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) { // A method to determine if a mnemonic is in the list. OS << "bool " << Target.getName() << ClassName << "::\n" - << "mnemonicIsValidImpl(StringRef Mnemonic) {\n"; + << "mnemonicIsValid(StringRef Mnemonic) {\n"; OS << " // Search the table.\n"; OS << " std::pair<const MatchEntry*, const MatchEntry*> MnemonicRange =\n"; OS << " std::equal_range(MatchTable, MatchTable+" |